diff --git "a/data/merged.json" "b/data/merged.json" new file mode 100644--- /dev/null +++ "b/data/merged.json" @@ -0,0 +1,56463 @@ +[ + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max_runtime_minutes", + "output": "max runtime minutes refers to If the experiment is not done after this many minutes, stop feature engineering and model tuning as soon as possible and proceed with building the final modeling pipeline and deployment artifacts, independent of model score convergence or pre-determined number of iterations. Only active is not in reproducible mode. Depending on the data and experiment settings, overall experiment runtime can differ significantly from this setting." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max_runtime_minutes", + "output": "max runtime minutes refers to Max. runtime in minutes before triggering the 'Finish' button. Approximately enforced. (0 = disabled): If the experiment is not done after this many minutes, stop feature engineering and model tuning as soon as possible and proceed with building the final modeling pipeline and deployment artifacts, independent of model score convergence or pre-determined number of iterations. Only active is not in reproducible mode. Depending on the data and experiment settings, overall experiment runtime can differ significantly from this setting." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max runtime minutes", + "output": "max runtime minutes refers to Max. runtime in minutes before triggering the 'Finish' button. Approximately enforced. (0 = disabled): If the experiment is not done after this many minutes, stop feature engineering and model tuning as soon as possible and proceed with building the final modeling pipeline and deployment artifacts, independent of model score convergence or pre-determined number of iterations. Only active is not in reproducible mode. Depending on the data and experiment settings, overall experiment runtime can differ significantly from this setting." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Max. runtime in minutes before triggering the 'Finish' button. Approximately enforced. (0 = disabled): ", + "output": "max runtime minutes refers to Max. runtime in minutes before triggering the 'Finish' button. Approximately enforced. (0 = disabled): If the experiment is not done after this many minutes, stop feature engineering and model tuning as soon as possible and proceed with building the final modeling pipeline and deployment artifacts, independent of model score convergence or pre-determined number of iterations. Only active is not in reproducible mode. Depending on the data and experiment settings, overall experiment runtime can differ significantly from this setting." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting max_runtime_minutes", + "output": "max runtime minutes refers to If the experiment is not done after this many minutes, stop feature engineering and model tuning as soon as possible and proceed with building the final modeling pipeline and deployment artifacts, independent of model score convergence or pre-determined number of iterations. Only active is not in reproducible mode. Depending on the data and experiment settings, overall experiment runtime can differ significantly from this setting." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting max_runtime_minutes", + "output": "max runtime minutes refers to Max. runtime in minutes before triggering the 'Finish' button. Approximately enforced. (0 = disabled): If the experiment is not done after this many minutes, stop feature engineering and model tuning as soon as possible and proceed with building the final modeling pipeline and deployment artifacts, independent of model score convergence or pre-determined number of iterations. Only active is not in reproducible mode. Depending on the data and experiment settings, overall experiment runtime can differ significantly from this setting." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "min_auto_runtime_minutes", + "output": "min auto runtime minutes refers to if non-zero, then set max_runtime_minutes automatically to min(max_runtime_minutes, max(min_auto_runtime_minutes, runtime estimate)) when enable_preview_time_estimate is true, so that the preview performs a best estimate of the runtime. Set to zero to disable runtime estimate being used to constrain runtime of experiment." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "min_auto_runtime_minutes", + "output": "min auto runtime minutes refers to Min. runtime in minutes for automatic runtime control (0 = disabled): if non-zero, then set max_runtime_minutes automatically to min(max_runtime_minutes, max(min_auto_runtime_minutes, runtime estimate)) when enable_preview_time_estimate is true, so that the preview performs a best estimate of the runtime. Set to zero to disable runtime estimate being used to constrain runtime of experiment." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "min auto runtime minutes", + "output": "min auto runtime minutes refers to Min. runtime in minutes for automatic runtime control (0 = disabled): if non-zero, then set max_runtime_minutes automatically to min(max_runtime_minutes, max(min_auto_runtime_minutes, runtime estimate)) when enable_preview_time_estimate is true, so that the preview performs a best estimate of the runtime. Set to zero to disable runtime estimate being used to constrain runtime of experiment." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Min. runtime in minutes for automatic runtime control (0 = disabled): ", + "output": "min auto runtime minutes refers to Min. runtime in minutes for automatic runtime control (0 = disabled): if non-zero, then set max_runtime_minutes automatically to min(max_runtime_minutes, max(min_auto_runtime_minutes, runtime estimate)) when enable_preview_time_estimate is true, so that the preview performs a best estimate of the runtime. Set to zero to disable runtime estimate being used to constrain runtime of experiment." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting min_auto_runtime_minutes", + "output": "min auto runtime minutes refers to if non-zero, then set max_runtime_minutes automatically to min(max_runtime_minutes, max(min_auto_runtime_minutes, runtime estimate)) when enable_preview_time_estimate is true, so that the preview performs a best estimate of the runtime. Set to zero to disable runtime estimate being used to constrain runtime of experiment." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting min_auto_runtime_minutes", + "output": "min auto runtime minutes refers to Min. runtime in minutes for automatic runtime control (0 = disabled): if non-zero, then set max_runtime_minutes automatically to min(max_runtime_minutes, max(min_auto_runtime_minutes, runtime estimate)) when enable_preview_time_estimate is true, so that the preview performs a best estimate of the runtime. Set to zero to disable runtime estimate being used to constrain runtime of experiment." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max_runtime_minutes_smart", + "output": "max runtime minutes smart refers to Whether to tune max_runtime_minutes based upon final number of base models,so try to trigger start of final model in order to better ensure stop entire experiment before max_runtime_minutes.Note: If the time given is short enough that tuning models are reduced belowfinal model expectations, the final model may be shorter than expected leadingto an overall shorter experiment time." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max_runtime_minutes_smart", + "output": "max runtime minutes smart refers to Smart runtime mode: Whether to tune max_runtime_minutes based upon final number of base models,so try to trigger start of final model in order to better ensure stop entire experiment before max_runtime_minutes.Note: If the time given is short enough that tuning models are reduced belowfinal model expectations, the final model may be shorter than expected leadingto an overall shorter experiment time." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max runtime minutes smart", + "output": "max runtime minutes smart refers to Smart runtime mode: Whether to tune max_runtime_minutes based upon final number of base models,so try to trigger start of final model in order to better ensure stop entire experiment before max_runtime_minutes.Note: If the time given is short enough that tuning models are reduced belowfinal model expectations, the final model may be shorter than expected leadingto an overall shorter experiment time." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Smart runtime mode: ", + "output": "max runtime minutes smart refers to Smart runtime mode: Whether to tune max_runtime_minutes based upon final number of base models,so try to trigger start of final model in order to better ensure stop entire experiment before max_runtime_minutes.Note: If the time given is short enough that tuning models are reduced belowfinal model expectations, the final model may be shorter than expected leadingto an overall shorter experiment time." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting max_runtime_minutes_smart", + "output": "max runtime minutes smart refers to Whether to tune max_runtime_minutes based upon final number of base models,so try to trigger start of final model in order to better ensure stop entire experiment before max_runtime_minutes.Note: If the time given is short enough that tuning models are reduced belowfinal model expectations, the final model may be shorter than expected leadingto an overall shorter experiment time." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting max_runtime_minutes_smart", + "output": "max runtime minutes smart refers to Smart runtime mode: Whether to tune max_runtime_minutes based upon final number of base models,so try to trigger start of final model in order to better ensure stop entire experiment before max_runtime_minutes.Note: If the time given is short enough that tuning models are reduced belowfinal model expectations, the final model may be shorter than expected leadingto an overall shorter experiment time." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max_runtime_minutes_until_abort", + "output": "max runtime minutes until abort refers to If the experiment is not done after this many minutes, push the abort button. Preserves experiment artifacts made so far for summary and log zip files, but further artifacts are made." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max_runtime_minutes_until_abort", + "output": "max runtime minutes until abort refers to Max. runtime in minutes before triggering the 'Abort' button.(0 = disabled): If the experiment is not done after this many minutes, push the abort button. Preserves experiment artifacts made so far for summary and log zip files, but further artifacts are made." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max runtime minutes until abort", + "output": "max runtime minutes until abort refers to Max. runtime in minutes before triggering the 'Abort' button.(0 = disabled): If the experiment is not done after this many minutes, push the abort button. Preserves experiment artifacts made so far for summary and log zip files, but further artifacts are made." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Max. runtime in minutes before triggering the 'Abort' button.(0 = disabled): ", + "output": "max runtime minutes until abort refers to Max. runtime in minutes before triggering the 'Abort' button.(0 = disabled): If the experiment is not done after this many minutes, push the abort button. Preserves experiment artifacts made so far for summary and log zip files, but further artifacts are made." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting max_runtime_minutes_until_abort", + "output": "max runtime minutes until abort refers to If the experiment is not done after this many minutes, push the abort button. Preserves experiment artifacts made so far for summary and log zip files, but further artifacts are made." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting max_runtime_minutes_until_abort", + "output": "max runtime minutes until abort refers to Max. runtime in minutes before triggering the 'Abort' button.(0 = disabled): If the experiment is not done after this many minutes, push the abort button. Preserves experiment artifacts made so far for summary and log zip files, but further artifacts are made." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "strict_reproducible_for_max_runtime", + "output": "strict reproducible for max runtime refers to If reproducbile is set, then experiment and all artifacts are reproducible, however then experiments may take arbitrarily long for a given choice of dials, features, and models. Setting this to False allows the experiment to complete after a fixed time, with all aspects of the model and feature building are reproducible and seeded, but the overall experiment behavior will not necessarily be reproducible if later iterations would have been used in final model building. This should set to True if every seeded experiment of exact same setup needs to generate the exact same final model, regardless of duration." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "strict_reproducible_for_max_runtime", + "output": "strict reproducible for max runtime refers to Whether to disable time-based limits when reproducible is set: If reproducbile is set, then experiment and all artifacts are reproducible, however then experiments may take arbitrarily long for a given choice of dials, features, and models. Setting this to False allows the experiment to complete after a fixed time, with all aspects of the model and feature building are reproducible and seeded, but the overall experiment behavior will not necessarily be reproducible if later iterations would have been used in final model building. This should set to True if every seeded experiment of exact same setup needs to generate the exact same final model, regardless of duration." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "strict reproducible for max runtime", + "output": "strict reproducible for max runtime refers to Whether to disable time-based limits when reproducible is set: If reproducbile is set, then experiment and all artifacts are reproducible, however then experiments may take arbitrarily long for a given choice of dials, features, and models. Setting this to False allows the experiment to complete after a fixed time, with all aspects of the model and feature building are reproducible and seeded, but the overall experiment behavior will not necessarily be reproducible if later iterations would have been used in final model building. This should set to True if every seeded experiment of exact same setup needs to generate the exact same final model, regardless of duration." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Whether to disable time-based limits when reproducible is set: ", + "output": "strict reproducible for max runtime refers to Whether to disable time-based limits when reproducible is set: If reproducbile is set, then experiment and all artifacts are reproducible, however then experiments may take arbitrarily long for a given choice of dials, features, and models. Setting this to False allows the experiment to complete after a fixed time, with all aspects of the model and feature building are reproducible and seeded, but the overall experiment behavior will not necessarily be reproducible if later iterations would have been used in final model building. This should set to True if every seeded experiment of exact same setup needs to generate the exact same final model, regardless of duration." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting strict_reproducible_for_max_runtime", + "output": "strict reproducible for max runtime refers to If reproducbile is set, then experiment and all artifacts are reproducible, however then experiments may take arbitrarily long for a given choice of dials, features, and models. Setting this to False allows the experiment to complete after a fixed time, with all aspects of the model and feature building are reproducible and seeded, but the overall experiment behavior will not necessarily be reproducible if later iterations would have been used in final model building. This should set to True if every seeded experiment of exact same setup needs to generate the exact same final model, regardless of duration." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting strict_reproducible_for_max_runtime", + "output": "strict reproducible for max runtime refers to Whether to disable time-based limits when reproducible is set: If reproducbile is set, then experiment and all artifacts are reproducible, however then experiments may take arbitrarily long for a given choice of dials, features, and models. Setting this to False allows the experiment to complete after a fixed time, with all aspects of the model and feature building are reproducible and seeded, but the overall experiment behavior will not necessarily be reproducible if later iterations would have been used in final model building. This should set to True if every seeded experiment of exact same setup needs to generate the exact same final model, regardless of duration." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable_preview_time_estimate", + "output": "enable preview time estimate refers to Uses model built on large number of experiments to estimate runtime. It can be inaccurate in cases that were not trained on." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable_preview_time_estimate", + "output": "enable preview time estimate refers to Whether to have preview estimate runtime: Uses model built on large number of experiments to estimate runtime. It can be inaccurate in cases that were not trained on." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable preview time estimate", + "output": "enable preview time estimate refers to Whether to have preview estimate runtime: Uses model built on large number of experiments to estimate runtime. It can be inaccurate in cases that were not trained on." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Whether to have preview estimate runtime: ", + "output": "enable preview time estimate refers to Whether to have preview estimate runtime: Uses model built on large number of experiments to estimate runtime. It can be inaccurate in cases that were not trained on." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting enable_preview_time_estimate", + "output": "enable preview time estimate refers to Uses model built on large number of experiments to estimate runtime. It can be inaccurate in cases that were not trained on." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting enable_preview_time_estimate", + "output": "enable preview time estimate refers to Whether to have preview estimate runtime: Uses model built on large number of experiments to estimate runtime. It can be inaccurate in cases that were not trained on." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable_preview_mojo_size_estimate", + "output": "enable preview mojo size estimate refers to Uses model built on large number of experiments to estimate mojo size. It can be inaccurate in cases that were not trained on." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable_preview_mojo_size_estimate", + "output": "enable preview mojo size estimate refers to Whether to have preview estimate mojo size: Uses model built on large number of experiments to estimate mojo size. It can be inaccurate in cases that were not trained on." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable preview mojo size estimate", + "output": "enable preview mojo size estimate refers to Whether to have preview estimate mojo size: Uses model built on large number of experiments to estimate mojo size. It can be inaccurate in cases that were not trained on." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Whether to have preview estimate mojo size: ", + "output": "enable preview mojo size estimate refers to Whether to have preview estimate mojo size: Uses model built on large number of experiments to estimate mojo size. It can be inaccurate in cases that were not trained on." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting enable_preview_mojo_size_estimate", + "output": "enable preview mojo size estimate refers to Uses model built on large number of experiments to estimate mojo size. It can be inaccurate in cases that were not trained on." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting enable_preview_mojo_size_estimate", + "output": "enable preview mojo size estimate refers to Whether to have preview estimate mojo size: Uses model built on large number of experiments to estimate mojo size. It can be inaccurate in cases that were not trained on." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable_preview_cpu_memory_estimate", + "output": "enable preview cpu memory estimate refers to Uses model built on large number of experiments to estimate max cpu memory. It can be inaccurate in cases that were not trained on." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable_preview_cpu_memory_estimate", + "output": "enable preview cpu memory estimate refers to Whether to have preview estimate max cpu memory: Uses model built on large number of experiments to estimate max cpu memory. It can be inaccurate in cases that were not trained on." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable preview cpu memory estimate", + "output": "enable preview cpu memory estimate refers to Whether to have preview estimate max cpu memory: Uses model built on large number of experiments to estimate max cpu memory. It can be inaccurate in cases that were not trained on." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Whether to have preview estimate max cpu memory: ", + "output": "enable preview cpu memory estimate refers to Whether to have preview estimate max cpu memory: Uses model built on large number of experiments to estimate max cpu memory. It can be inaccurate in cases that were not trained on." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting enable_preview_cpu_memory_estimate", + "output": "enable preview cpu memory estimate refers to Uses model built on large number of experiments to estimate max cpu memory. It can be inaccurate in cases that were not trained on." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting enable_preview_cpu_memory_estimate", + "output": "enable preview cpu memory estimate refers to Whether to have preview estimate max cpu memory: Uses model built on large number of experiments to estimate max cpu memory. It can be inaccurate in cases that were not trained on." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "time_abort", + "output": "time abort refers to If the experiment is not done by this time, push the abort button. Accepts time in format given by time_abort_format (defaults to %Y-%m-%d %H:%M:%S)assuming a time zone set by time_abort_timezone (defaults to UTC). One can also give integer seconds since 1970-01-01 00:00:00 UTC. Applies to time on a DAI worker that runs experiments. Preserves experiment artifacts made so far for summary and log zip files, but further artifacts are made.NOTE: If start new experiment with same parameters, restart, or refit, thisabsolute time will apply to such experiments or set of leaderboard experiments." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "time_abort", + "output": "time abort refers to Time to trigger the 'Abort' button.: If the experiment is not done by this time, push the abort button. Accepts time in format given by time_abort_format (defaults to %Y-%m-%d %H:%M:%S)assuming a time zone set by time_abort_timezone (defaults to UTC). One can also give integer seconds since 1970-01-01 00:00:00 UTC. Applies to time on a DAI worker that runs experiments. Preserves experiment artifacts made so far for summary and log zip files, but further artifacts are made.NOTE: If start new experiment with same parameters, restart, or refit, thisabsolute time will apply to such experiments or set of leaderboard experiments." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "time abort", + "output": "time abort refers to Time to trigger the 'Abort' button.: If the experiment is not done by this time, push the abort button. Accepts time in format given by time_abort_format (defaults to %Y-%m-%d %H:%M:%S)assuming a time zone set by time_abort_timezone (defaults to UTC). One can also give integer seconds since 1970-01-01 00:00:00 UTC. Applies to time on a DAI worker that runs experiments. Preserves experiment artifacts made so far for summary and log zip files, but further artifacts are made.NOTE: If start new experiment with same parameters, restart, or refit, thisabsolute time will apply to such experiments or set of leaderboard experiments." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Time to trigger the 'Abort' button.: ", + "output": "time abort refers to Time to trigger the 'Abort' button.: If the experiment is not done by this time, push the abort button. Accepts time in format given by time_abort_format (defaults to %Y-%m-%d %H:%M:%S)assuming a time zone set by time_abort_timezone (defaults to UTC). One can also give integer seconds since 1970-01-01 00:00:00 UTC. Applies to time on a DAI worker that runs experiments. Preserves experiment artifacts made so far for summary and log zip files, but further artifacts are made.NOTE: If start new experiment with same parameters, restart, or refit, thisabsolute time will apply to such experiments or set of leaderboard experiments." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting time_abort", + "output": "time abort refers to If the experiment is not done by this time, push the abort button. Accepts time in format given by time_abort_format (defaults to %Y-%m-%d %H:%M:%S)assuming a time zone set by time_abort_timezone (defaults to UTC). One can also give integer seconds since 1970-01-01 00:00:00 UTC. Applies to time on a DAI worker that runs experiments. Preserves experiment artifacts made so far for summary and log zip files, but further artifacts are made.NOTE: If start new experiment with same parameters, restart, or refit, thisabsolute time will apply to such experiments or set of leaderboard experiments." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting time_abort", + "output": "time abort refers to Time to trigger the 'Abort' button.: If the experiment is not done by this time, push the abort button. Accepts time in format given by time_abort_format (defaults to %Y-%m-%d %H:%M:%S)assuming a time zone set by time_abort_timezone (defaults to UTC). One can also give integer seconds since 1970-01-01 00:00:00 UTC. Applies to time on a DAI worker that runs experiments. Preserves experiment artifacts made so far for summary and log zip files, but further artifacts are made.NOTE: If start new experiment with same parameters, restart, or refit, thisabsolute time will apply to such experiments or set of leaderboard experiments." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "time_abort_format", + "output": "time abort format refers to Any format is allowed as accepted by datetime.strptime." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "time_abort_format", + "output": "time abort format refers to Time string format for time_abort.: Any format is allowed as accepted by datetime.strptime." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "time abort format", + "output": "time abort format refers to Time string format for time_abort.: Any format is allowed as accepted by datetime.strptime." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Time string format for time_abort.: ", + "output": "time abort format refers to Time string format for time_abort.: Any format is allowed as accepted by datetime.strptime." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting time_abort_format", + "output": "time abort format refers to Any format is allowed as accepted by datetime.strptime." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting time_abort_format", + "output": "time abort format refers to Time string format for time_abort.: Any format is allowed as accepted by datetime.strptime." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "time_abort_timezone", + "output": "time abort timezone refers to Any time zone in format accepted by datetime.strptime." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "time_abort_timezone", + "output": "time abort timezone refers to Time zone for time_abort.: Any time zone in format accepted by datetime.strptime." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "time abort timezone", + "output": "time abort timezone refers to Time zone for time_abort.: Any time zone in format accepted by datetime.strptime." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Time zone for time_abort.: ", + "output": "time abort timezone refers to Time zone for time_abort.: Any time zone in format accepted by datetime.strptime." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting time_abort_timezone", + "output": "time abort timezone refers to Any time zone in format accepted by datetime.strptime." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting time_abort_timezone", + "output": "time abort timezone refers to Time zone for time_abort.: Any time zone in format accepted by datetime.strptime." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "delete_model_dirs_and_files", + "output": "delete model dirs and files refers to Whether to delete all directories and files matching experiment pattern when call do_delete_model (True), or whether to just delete directories (False). False can be used to preserve experiment logs that do not take up much space. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "delete_model_dirs_and_files", + "output": "delete model dirs and files refers to Whether to delete all directories and files matching experiment pattern when call do_delete_model (True), or whether to just delete directories (False). False can be used to preserve experiment logs that do not take up much space. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "delete model dirs and files", + "output": "delete model dirs and files refers to Whether to delete all directories and files matching experiment pattern when call do_delete_model (True), or whether to just delete directories (False). False can be used to preserve experiment logs that do not take up much space. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "delete model dirs and files refers to Whether to delete all directories and files matching experiment pattern when call do_delete_model (True), or whether to just delete directories (False). False can be used to preserve experiment logs that do not take up much space. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting delete_model_dirs_and_files", + "output": "delete model dirs and files refers to Whether to delete all directories and files matching experiment pattern when call do_delete_model (True), or whether to just delete directories (False). False can be used to preserve experiment logs that do not take up much space. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting delete_model_dirs_and_files", + "output": "delete model dirs and files refers to Whether to delete all directories and files matching experiment pattern when call do_delete_model (True), or whether to just delete directories (False). False can be used to preserve experiment logs that do not take up much space. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "delete_data_dirs_and_files", + "output": "delete data dirs and files refers to Whether to delete all directories and files matching dataset pattern when call do_delete_dataset (True), or whether to just delete directories (False). False can be used to preserve dataset logs that do not take up much space. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "delete_data_dirs_and_files", + "output": "delete data dirs and files refers to Whether to delete all directories and files matching dataset pattern when call do_delete_dataset (True), or whether to just delete directories (False). False can be used to preserve dataset logs that do not take up much space. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "delete data dirs and files", + "output": "delete data dirs and files refers to Whether to delete all directories and files matching dataset pattern when call do_delete_dataset (True), or whether to just delete directories (False). False can be used to preserve dataset logs that do not take up much space. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "delete data dirs and files refers to Whether to delete all directories and files matching dataset pattern when call do_delete_dataset (True), or whether to just delete directories (False). False can be used to preserve dataset logs that do not take up much space. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting delete_data_dirs_and_files", + "output": "delete data dirs and files refers to Whether to delete all directories and files matching dataset pattern when call do_delete_dataset (True), or whether to just delete directories (False). False can be used to preserve dataset logs that do not take up much space. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting delete_data_dirs_and_files", + "output": "delete data dirs and files refers to Whether to delete all directories and files matching dataset pattern when call do_delete_dataset (True), or whether to just delete directories (False). False can be used to preserve dataset logs that do not take up much space. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "recipe", + "output": "recipe refers to # Recipe type## Recipes override any GUI settings- **'auto'**: all models and features automatically determined by experiment settings, toml settings, and feature_engineering_effort- **'compliant'** : like 'auto' except: - *interpretability=10* (to avoid complexity, overrides GUI or python client chose for interpretability) - *enable_glm='on'* (rest 'off', to avoid complexity and be compatible with algorithms supported by MLI) - *fixed_ensemble_level=0*: Don't use any ensemble - *feature_brain_level=0*(: No feature brain used (to ensure every restart is identical) - *max_feature_interaction_depth=1*: interaction depth is set to 1 (no multi-feature interactions to avoid complexity) - *target_transformer='identity'*: for regression (to avoid complexity) - *check_distribution_shift_drop='off'*: Don't use distribution shift between train, valid, and test to drop features (bit risky without fine-tuning)- **'monotonic_gbm'** : like 'auto' except: - *monotonicity_constraints_interpretability_switch=1*: enable monotonicity constraints - *self.config.monotonicity_constraints_correlation_threshold = 0.01*: see below - *monotonicity_constraints_drop_low_correlation_features=true*: drop features that aren't correlated with target by at least 0.01 (specified by parameter above) - *fixed_ensemble_level=0*: Don't use any ensemble (to avoid complexity) - *included_models=['LightGBMModel']* - *included_transformers=['OriginalTransformer']*: only original (numeric) features will be used - *feature_brain_level=0*: No feature brain used (to ensure every restart is identical) - *monotonicity_constraints_log_level='high'* - *autodoc_pd_max_runtime=-1*: no timeout for PDP creation in AutoDoc- **'kaggle'** : like 'auto' except: - external validation set is concatenated with train set, with target marked as missing - test set is concatenated with train set, with target marked as missing - transformers that do not use the target are allowed to fit_transform across entire train + validation + test - several config toml expert options open-up limits (e.g. more numerics are treated as categoricals) - Note: If plentiful memory, can: - choose kaggle mode and then change fixed_feature_interaction_depth to large negative number, otherwise default number of features given to transformer is limited to 50 by default - choose mutation_mode = \"full\", so even more types are transformations are done at once per transformer- **'nlp_model'**: Only enables NLP models that process pure text- **'nlp_transformer'**: Only enables NLP transformers that process pure text, while any model type is allowed- **'image_model'**: Only enables Image models that process pure images- **'image_transformer'**: Only enables Image transformers that process pure images, while any model type is allowed- **'unsupervised'**: Only enables unsupervised transformers, models and scorers- **'gpus_max'**: Maximize use of GPUs (e.g. use XGBoost, rapids, Optuna hyperparameter search, etc.)- **'more_overfit_protection'**: Potentially improve overfit, esp. for small data, by disabling target encoding and making GA behave like final model for tree counts and learning rate- **'feature_store_mojo'**: Creates a MOJO to be used as transformer in the H2O Feature Store, to augment data on a row-by-row level based on Driverless AI's feature engineering. Only includes transformers that don't depend on the target, since features like target encoding need to be created at model fitting time to avoid data leakage. And features like lags need to be created from the raw data, they can't be computed with a row-by-row MOJO transformer.Each pipeline building recipe mode can be chosen, and then fine-tuned using each expert settings. Changing thepipeline building recipe will reset all pipeline building recipe options back to default and then re-apply thespecific rules for the new mode, which will undo any fine-tuning of expert options that are part of pipeline buildingrecipe rules.If choose to do new/continued/refitted/retrained experiment from parent experiment, the recipe rules are not re-appliedand any fine-tuning is preserved. To reset recipe behavior, one can switch between 'auto' and the desired mode. Thisway the new child experiment will use the default settings for the chosen recipe." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "recipe", + "output": "recipe refers to Pipeline Building Recipe: # Recipe type## Recipes override any GUI settings- **'auto'**: all models and features automatically determined by experiment settings, toml settings, and feature_engineering_effort- **'compliant'** : like 'auto' except: - *interpretability=10* (to avoid complexity, overrides GUI or python client chose for interpretability) - *enable_glm='on'* (rest 'off', to avoid complexity and be compatible with algorithms supported by MLI) - *fixed_ensemble_level=0*: Don't use any ensemble - *feature_brain_level=0*(: No feature brain used (to ensure every restart is identical) - *max_feature_interaction_depth=1*: interaction depth is set to 1 (no multi-feature interactions to avoid complexity) - *target_transformer='identity'*: for regression (to avoid complexity) - *check_distribution_shift_drop='off'*: Don't use distribution shift between train, valid, and test to drop features (bit risky without fine-tuning)- **'monotonic_gbm'** : like 'auto' except: - *monotonicity_constraints_interpretability_switch=1*: enable monotonicity constraints - *self.config.monotonicity_constraints_correlation_threshold = 0.01*: see below - *monotonicity_constraints_drop_low_correlation_features=true*: drop features that aren't correlated with target by at least 0.01 (specified by parameter above) - *fixed_ensemble_level=0*: Don't use any ensemble (to avoid complexity) - *included_models=['LightGBMModel']* - *included_transformers=['OriginalTransformer']*: only original (numeric) features will be used - *feature_brain_level=0*: No feature brain used (to ensure every restart is identical) - *monotonicity_constraints_log_level='high'* - *autodoc_pd_max_runtime=-1*: no timeout for PDP creation in AutoDoc- **'kaggle'** : like 'auto' except: - external validation set is concatenated with train set, with target marked as missing - test set is concatenated with train set, with target marked as missing - transformers that do not use the target are allowed to fit_transform across entire train + validation + test - several config toml expert options open-up limits (e.g. more numerics are treated as categoricals) - Note: If plentiful memory, can: - choose kaggle mode and then change fixed_feature_interaction_depth to large negative number, otherwise default number of features given to transformer is limited to 50 by default - choose mutation_mode = \"full\", so even more types are transformations are done at once per transformer- **'nlp_model'**: Only enables NLP models that process pure text- **'nlp_transformer'**: Only enables NLP transformers that process pure text, while any model type is allowed- **'image_model'**: Only enables Image models that process pure images- **'image_transformer'**: Only enables Image transformers that process pure images, while any model type is allowed- **'unsupervised'**: Only enables unsupervised transformers, models and scorers- **'gpus_max'**: Maximize use of GPUs (e.g. use XGBoost, rapids, Optuna hyperparameter search, etc.)- **'more_overfit_protection'**: Potentially improve overfit, esp. for small data, by disabling target encoding and making GA behave like final model for tree counts and learning rate- **'feature_store_mojo'**: Creates a MOJO to be used as transformer in the H2O Feature Store, to augment data on a row-by-row level based on Driverless AI's feature engineering. Only includes transformers that don't depend on the target, since features like target encoding need to be created at model fitting time to avoid data leakage. And features like lags need to be created from the raw data, they can't be computed with a row-by-row MOJO transformer.Each pipeline building recipe mode can be chosen, and then fine-tuned using each expert settings. Changing thepipeline building recipe will reset all pipeline building recipe options back to default and then re-apply thespecific rules for the new mode, which will undo any fine-tuning of expert options that are part of pipeline buildingrecipe rules.If choose to do new/continued/refitted/retrained experiment from parent experiment, the recipe rules are not re-appliedand any fine-tuning is preserved. To reset recipe behavior, one can switch between 'auto' and the desired mode. Thisway the new child experiment will use the default settings for the chosen recipe." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "recipe", + "output": "recipe refers to Pipeline Building Recipe: # Recipe type## Recipes override any GUI settings- **'auto'**: all models and features automatically determined by experiment settings, toml settings, and feature_engineering_effort- **'compliant'** : like 'auto' except: - *interpretability=10* (to avoid complexity, overrides GUI or python client chose for interpretability) - *enable_glm='on'* (rest 'off', to avoid complexity and be compatible with algorithms supported by MLI) - *fixed_ensemble_level=0*: Don't use any ensemble - *feature_brain_level=0*(: No feature brain used (to ensure every restart is identical) - *max_feature_interaction_depth=1*: interaction depth is set to 1 (no multi-feature interactions to avoid complexity) - *target_transformer='identity'*: for regression (to avoid complexity) - *check_distribution_shift_drop='off'*: Don't use distribution shift between train, valid, and test to drop features (bit risky without fine-tuning)- **'monotonic_gbm'** : like 'auto' except: - *monotonicity_constraints_interpretability_switch=1*: enable monotonicity constraints - *self.config.monotonicity_constraints_correlation_threshold = 0.01*: see below - *monotonicity_constraints_drop_low_correlation_features=true*: drop features that aren't correlated with target by at least 0.01 (specified by parameter above) - *fixed_ensemble_level=0*: Don't use any ensemble (to avoid complexity) - *included_models=['LightGBMModel']* - *included_transformers=['OriginalTransformer']*: only original (numeric) features will be used - *feature_brain_level=0*: No feature brain used (to ensure every restart is identical) - *monotonicity_constraints_log_level='high'* - *autodoc_pd_max_runtime=-1*: no timeout for PDP creation in AutoDoc- **'kaggle'** : like 'auto' except: - external validation set is concatenated with train set, with target marked as missing - test set is concatenated with train set, with target marked as missing - transformers that do not use the target are allowed to fit_transform across entire train + validation + test - several config toml expert options open-up limits (e.g. more numerics are treated as categoricals) - Note: If plentiful memory, can: - choose kaggle mode and then change fixed_feature_interaction_depth to large negative number, otherwise default number of features given to transformer is limited to 50 by default - choose mutation_mode = \"full\", so even more types are transformations are done at once per transformer- **'nlp_model'**: Only enables NLP models that process pure text- **'nlp_transformer'**: Only enables NLP transformers that process pure text, while any model type is allowed- **'image_model'**: Only enables Image models that process pure images- **'image_transformer'**: Only enables Image transformers that process pure images, while any model type is allowed- **'unsupervised'**: Only enables unsupervised transformers, models and scorers- **'gpus_max'**: Maximize use of GPUs (e.g. use XGBoost, rapids, Optuna hyperparameter search, etc.)- **'more_overfit_protection'**: Potentially improve overfit, esp. for small data, by disabling target encoding and making GA behave like final model for tree counts and learning rate- **'feature_store_mojo'**: Creates a MOJO to be used as transformer in the H2O Feature Store, to augment data on a row-by-row level based on Driverless AI's feature engineering. Only includes transformers that don't depend on the target, since features like target encoding need to be created at model fitting time to avoid data leakage. And features like lags need to be created from the raw data, they can't be computed with a row-by-row MOJO transformer.Each pipeline building recipe mode can be chosen, and then fine-tuned using each expert settings. Changing thepipeline building recipe will reset all pipeline building recipe options back to default and then re-apply thespecific rules for the new mode, which will undo any fine-tuning of expert options that are part of pipeline buildingrecipe rules.If choose to do new/continued/refitted/retrained experiment from parent experiment, the recipe rules are not re-appliedand any fine-tuning is preserved. To reset recipe behavior, one can switch between 'auto' and the desired mode. Thisway the new child experiment will use the default settings for the chosen recipe." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Pipeline Building Recipe: ", + "output": "recipe refers to Pipeline Building Recipe: # Recipe type## Recipes override any GUI settings- **'auto'**: all models and features automatically determined by experiment settings, toml settings, and feature_engineering_effort- **'compliant'** : like 'auto' except: - *interpretability=10* (to avoid complexity, overrides GUI or python client chose for interpretability) - *enable_glm='on'* (rest 'off', to avoid complexity and be compatible with algorithms supported by MLI) - *fixed_ensemble_level=0*: Don't use any ensemble - *feature_brain_level=0*(: No feature brain used (to ensure every restart is identical) - *max_feature_interaction_depth=1*: interaction depth is set to 1 (no multi-feature interactions to avoid complexity) - *target_transformer='identity'*: for regression (to avoid complexity) - *check_distribution_shift_drop='off'*: Don't use distribution shift between train, valid, and test to drop features (bit risky without fine-tuning)- **'monotonic_gbm'** : like 'auto' except: - *monotonicity_constraints_interpretability_switch=1*: enable monotonicity constraints - *self.config.monotonicity_constraints_correlation_threshold = 0.01*: see below - *monotonicity_constraints_drop_low_correlation_features=true*: drop features that aren't correlated with target by at least 0.01 (specified by parameter above) - *fixed_ensemble_level=0*: Don't use any ensemble (to avoid complexity) - *included_models=['LightGBMModel']* - *included_transformers=['OriginalTransformer']*: only original (numeric) features will be used - *feature_brain_level=0*: No feature brain used (to ensure every restart is identical) - *monotonicity_constraints_log_level='high'* - *autodoc_pd_max_runtime=-1*: no timeout for PDP creation in AutoDoc- **'kaggle'** : like 'auto' except: - external validation set is concatenated with train set, with target marked as missing - test set is concatenated with train set, with target marked as missing - transformers that do not use the target are allowed to fit_transform across entire train + validation + test - several config toml expert options open-up limits (e.g. more numerics are treated as categoricals) - Note: If plentiful memory, can: - choose kaggle mode and then change fixed_feature_interaction_depth to large negative number, otherwise default number of features given to transformer is limited to 50 by default - choose mutation_mode = \"full\", so even more types are transformations are done at once per transformer- **'nlp_model'**: Only enables NLP models that process pure text- **'nlp_transformer'**: Only enables NLP transformers that process pure text, while any model type is allowed- **'image_model'**: Only enables Image models that process pure images- **'image_transformer'**: Only enables Image transformers that process pure images, while any model type is allowed- **'unsupervised'**: Only enables unsupervised transformers, models and scorers- **'gpus_max'**: Maximize use of GPUs (e.g. use XGBoost, rapids, Optuna hyperparameter search, etc.)- **'more_overfit_protection'**: Potentially improve overfit, esp. for small data, by disabling target encoding and making GA behave like final model for tree counts and learning rate- **'feature_store_mojo'**: Creates a MOJO to be used as transformer in the H2O Feature Store, to augment data on a row-by-row level based on Driverless AI's feature engineering. Only includes transformers that don't depend on the target, since features like target encoding need to be created at model fitting time to avoid data leakage. And features like lags need to be created from the raw data, they can't be computed with a row-by-row MOJO transformer.Each pipeline building recipe mode can be chosen, and then fine-tuned using each expert settings. Changing thepipeline building recipe will reset all pipeline building recipe options back to default and then re-apply thespecific rules for the new mode, which will undo any fine-tuning of expert options that are part of pipeline buildingrecipe rules.If choose to do new/continued/refitted/retrained experiment from parent experiment, the recipe rules are not re-appliedand any fine-tuning is preserved. To reset recipe behavior, one can switch between 'auto' and the desired mode. Thisway the new child experiment will use the default settings for the chosen recipe." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting recipe", + "output": "recipe refers to # Recipe type## Recipes override any GUI settings- **'auto'**: all models and features automatically determined by experiment settings, toml settings, and feature_engineering_effort- **'compliant'** : like 'auto' except: - *interpretability=10* (to avoid complexity, overrides GUI or python client chose for interpretability) - *enable_glm='on'* (rest 'off', to avoid complexity and be compatible with algorithms supported by MLI) - *fixed_ensemble_level=0*: Don't use any ensemble - *feature_brain_level=0*(: No feature brain used (to ensure every restart is identical) - *max_feature_interaction_depth=1*: interaction depth is set to 1 (no multi-feature interactions to avoid complexity) - *target_transformer='identity'*: for regression (to avoid complexity) - *check_distribution_shift_drop='off'*: Don't use distribution shift between train, valid, and test to drop features (bit risky without fine-tuning)- **'monotonic_gbm'** : like 'auto' except: - *monotonicity_constraints_interpretability_switch=1*: enable monotonicity constraints - *self.config.monotonicity_constraints_correlation_threshold = 0.01*: see below - *monotonicity_constraints_drop_low_correlation_features=true*: drop features that aren't correlated with target by at least 0.01 (specified by parameter above) - *fixed_ensemble_level=0*: Don't use any ensemble (to avoid complexity) - *included_models=['LightGBMModel']* - *included_transformers=['OriginalTransformer']*: only original (numeric) features will be used - *feature_brain_level=0*: No feature brain used (to ensure every restart is identical) - *monotonicity_constraints_log_level='high'* - *autodoc_pd_max_runtime=-1*: no timeout for PDP creation in AutoDoc- **'kaggle'** : like 'auto' except: - external validation set is concatenated with train set, with target marked as missing - test set is concatenated with train set, with target marked as missing - transformers that do not use the target are allowed to fit_transform across entire train + validation + test - several config toml expert options open-up limits (e.g. more numerics are treated as categoricals) - Note: If plentiful memory, can: - choose kaggle mode and then change fixed_feature_interaction_depth to large negative number, otherwise default number of features given to transformer is limited to 50 by default - choose mutation_mode = \"full\", so even more types are transformations are done at once per transformer- **'nlp_model'**: Only enables NLP models that process pure text- **'nlp_transformer'**: Only enables NLP transformers that process pure text, while any model type is allowed- **'image_model'**: Only enables Image models that process pure images- **'image_transformer'**: Only enables Image transformers that process pure images, while any model type is allowed- **'unsupervised'**: Only enables unsupervised transformers, models and scorers- **'gpus_max'**: Maximize use of GPUs (e.g. use XGBoost, rapids, Optuna hyperparameter search, etc.)- **'more_overfit_protection'**: Potentially improve overfit, esp. for small data, by disabling target encoding and making GA behave like final model for tree counts and learning rate- **'feature_store_mojo'**: Creates a MOJO to be used as transformer in the H2O Feature Store, to augment data on a row-by-row level based on Driverless AI's feature engineering. Only includes transformers that don't depend on the target, since features like target encoding need to be created at model fitting time to avoid data leakage. And features like lags need to be created from the raw data, they can't be computed with a row-by-row MOJO transformer.Each pipeline building recipe mode can be chosen, and then fine-tuned using each expert settings. Changing thepipeline building recipe will reset all pipeline building recipe options back to default and then re-apply thespecific rules for the new mode, which will undo any fine-tuning of expert options that are part of pipeline buildingrecipe rules.If choose to do new/continued/refitted/retrained experiment from parent experiment, the recipe rules are not re-appliedand any fine-tuning is preserved. To reset recipe behavior, one can switch between 'auto' and the desired mode. Thisway the new child experiment will use the default settings for the chosen recipe." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting recipe", + "output": "recipe refers to Pipeline Building Recipe: # Recipe type## Recipes override any GUI settings- **'auto'**: all models and features automatically determined by experiment settings, toml settings, and feature_engineering_effort- **'compliant'** : like 'auto' except: - *interpretability=10* (to avoid complexity, overrides GUI or python client chose for interpretability) - *enable_glm='on'* (rest 'off', to avoid complexity and be compatible with algorithms supported by MLI) - *fixed_ensemble_level=0*: Don't use any ensemble - *feature_brain_level=0*(: No feature brain used (to ensure every restart is identical) - *max_feature_interaction_depth=1*: interaction depth is set to 1 (no multi-feature interactions to avoid complexity) - *target_transformer='identity'*: for regression (to avoid complexity) - *check_distribution_shift_drop='off'*: Don't use distribution shift between train, valid, and test to drop features (bit risky without fine-tuning)- **'monotonic_gbm'** : like 'auto' except: - *monotonicity_constraints_interpretability_switch=1*: enable monotonicity constraints - *self.config.monotonicity_constraints_correlation_threshold = 0.01*: see below - *monotonicity_constraints_drop_low_correlation_features=true*: drop features that aren't correlated with target by at least 0.01 (specified by parameter above) - *fixed_ensemble_level=0*: Don't use any ensemble (to avoid complexity) - *included_models=['LightGBMModel']* - *included_transformers=['OriginalTransformer']*: only original (numeric) features will be used - *feature_brain_level=0*: No feature brain used (to ensure every restart is identical) - *monotonicity_constraints_log_level='high'* - *autodoc_pd_max_runtime=-1*: no timeout for PDP creation in AutoDoc- **'kaggle'** : like 'auto' except: - external validation set is concatenated with train set, with target marked as missing - test set is concatenated with train set, with target marked as missing - transformers that do not use the target are allowed to fit_transform across entire train + validation + test - several config toml expert options open-up limits (e.g. more numerics are treated as categoricals) - Note: If plentiful memory, can: - choose kaggle mode and then change fixed_feature_interaction_depth to large negative number, otherwise default number of features given to transformer is limited to 50 by default - choose mutation_mode = \"full\", so even more types are transformations are done at once per transformer- **'nlp_model'**: Only enables NLP models that process pure text- **'nlp_transformer'**: Only enables NLP transformers that process pure text, while any model type is allowed- **'image_model'**: Only enables Image models that process pure images- **'image_transformer'**: Only enables Image transformers that process pure images, while any model type is allowed- **'unsupervised'**: Only enables unsupervised transformers, models and scorers- **'gpus_max'**: Maximize use of GPUs (e.g. use XGBoost, rapids, Optuna hyperparameter search, etc.)- **'more_overfit_protection'**: Potentially improve overfit, esp. for small data, by disabling target encoding and making GA behave like final model for tree counts and learning rate- **'feature_store_mojo'**: Creates a MOJO to be used as transformer in the H2O Feature Store, to augment data on a row-by-row level based on Driverless AI's feature engineering. Only includes transformers that don't depend on the target, since features like target encoding need to be created at model fitting time to avoid data leakage. And features like lags need to be created from the raw data, they can't be computed with a row-by-row MOJO transformer.Each pipeline building recipe mode can be chosen, and then fine-tuned using each expert settings. Changing thepipeline building recipe will reset all pipeline building recipe options back to default and then re-apply thespecific rules for the new mode, which will undo any fine-tuning of expert options that are part of pipeline buildingrecipe rules.If choose to do new/continued/refitted/retrained experiment from parent experiment, the recipe rules are not re-appliedand any fine-tuning is preserved. To reset recipe behavior, one can switch between 'auto' and the desired mode. Thisway the new child experiment will use the default settings for the chosen recipe." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "custom_unsupervised_expert_mode", + "output": "custom unsupervised expert mode refers to Whether to treat model like UnsupervisedModel, so that one specifies each scorer, pretransformer, and transformer in expert panel like one would do for supervised experiments. Otherwise (False), custom unsupervised models will assume the model itself specified these. If the unsupervised model chosen has _included_transformers, _included_pretransformers, and _included_scorers selected, this should be set to False (default) else should be set to True. Then if one wants the unsupervised model to only produce 1 gene-transformer, then the custom unsupervised model can have: _ngenes_max = 1 _ngenes_max_by_layer = [1000, 1] The 1000 for the pretransformer layer just means that layer can have any number of genes. Choose 1 if you expect single instance of the pretransformer to be all one needs, e.g. consumes input features fully and produces complete useful output features. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "custom_unsupervised_expert_mode", + "output": "custom unsupervised expert mode refers to Whether to treat custom unsupervised model like UnsupervisedModel: Whether to treat model like UnsupervisedModel, so that one specifies each scorer, pretransformer, and transformer in expert panel like one would do for supervised experiments. Otherwise (False), custom unsupervised models will assume the model itself specified these. If the unsupervised model chosen has _included_transformers, _included_pretransformers, and _included_scorers selected, this should be set to False (default) else should be set to True. Then if one wants the unsupervised model to only produce 1 gene-transformer, then the custom unsupervised model can have: _ngenes_max = 1 _ngenes_max_by_layer = [1000, 1] The 1000 for the pretransformer layer just means that layer can have any number of genes. Choose 1 if you expect single instance of the pretransformer to be all one needs, e.g. consumes input features fully and produces complete useful output features. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "custom unsupervised expert mode", + "output": "custom unsupervised expert mode refers to Whether to treat custom unsupervised model like UnsupervisedModel: Whether to treat model like UnsupervisedModel, so that one specifies each scorer, pretransformer, and transformer in expert panel like one would do for supervised experiments. Otherwise (False), custom unsupervised models will assume the model itself specified these. If the unsupervised model chosen has _included_transformers, _included_pretransformers, and _included_scorers selected, this should be set to False (default) else should be set to True. Then if one wants the unsupervised model to only produce 1 gene-transformer, then the custom unsupervised model can have: _ngenes_max = 1 _ngenes_max_by_layer = [1000, 1] The 1000 for the pretransformer layer just means that layer can have any number of genes. Choose 1 if you expect single instance of the pretransformer to be all one needs, e.g. consumes input features fully and produces complete useful output features. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Whether to treat custom unsupervised model like UnsupervisedModel: ", + "output": "custom unsupervised expert mode refers to Whether to treat custom unsupervised model like UnsupervisedModel: Whether to treat model like UnsupervisedModel, so that one specifies each scorer, pretransformer, and transformer in expert panel like one would do for supervised experiments. Otherwise (False), custom unsupervised models will assume the model itself specified these. If the unsupervised model chosen has _included_transformers, _included_pretransformers, and _included_scorers selected, this should be set to False (default) else should be set to True. Then if one wants the unsupervised model to only produce 1 gene-transformer, then the custom unsupervised model can have: _ngenes_max = 1 _ngenes_max_by_layer = [1000, 1] The 1000 for the pretransformer layer just means that layer can have any number of genes. Choose 1 if you expect single instance of the pretransformer to be all one needs, e.g. consumes input features fully and produces complete useful output features. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting custom_unsupervised_expert_mode", + "output": "custom unsupervised expert mode refers to Whether to treat model like UnsupervisedModel, so that one specifies each scorer, pretransformer, and transformer in expert panel like one would do for supervised experiments. Otherwise (False), custom unsupervised models will assume the model itself specified these. If the unsupervised model chosen has _included_transformers, _included_pretransformers, and _included_scorers selected, this should be set to False (default) else should be set to True. Then if one wants the unsupervised model to only produce 1 gene-transformer, then the custom unsupervised model can have: _ngenes_max = 1 _ngenes_max_by_layer = [1000, 1] The 1000 for the pretransformer layer just means that layer can have any number of genes. Choose 1 if you expect single instance of the pretransformer to be all one needs, e.g. consumes input features fully and produces complete useful output features. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting custom_unsupervised_expert_mode", + "output": "custom unsupervised expert mode refers to Whether to treat custom unsupervised model like UnsupervisedModel: Whether to treat model like UnsupervisedModel, so that one specifies each scorer, pretransformer, and transformer in expert panel like one would do for supervised experiments. Otherwise (False), custom unsupervised models will assume the model itself specified these. If the unsupervised model chosen has _included_transformers, _included_pretransformers, and _included_scorers selected, this should be set to False (default) else should be set to True. Then if one wants the unsupervised model to only produce 1 gene-transformer, then the custom unsupervised model can have: _ngenes_max = 1 _ngenes_max_by_layer = [1000, 1] The 1000 for the pretransformer layer just means that layer can have any number of genes. Choose 1 if you expect single instance of the pretransformer to be all one needs, e.g. consumes input features fully and produces complete useful output features. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable_genetic_algorithm", + "output": "enable genetic algorithm refers to Whether to enable genetic algorithm for selection and hyper-parameter tuning of features and models.- If disabled ('off'), will go directly to final pipeline training (using default feature engineering and feature selection).- 'auto' is same as 'on' unless pure NLP or Image experiment.- \"Optuna\": Uses DAI genetic algorithm for feature engineering, but model hyperparameters are tuned with Optuna. - In the Optuna case, the scores shown in the iteration panel are the best score and trial scores. - Optuna mode currently only uses Optuna for XGBoost, LightGBM, and CatBoost (custom recipe). - If Pruner is enabled, as is default, Optuna mode disables mutations of eval_metric so pruning uses same metric across trials to compare properly.Currently does not supported when pre_transformers or multi-layer pipeline used, which must go through at least one round of tuning or evolution. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable_genetic_algorithm", + "output": "enable genetic algorithm refers to Enable genetic algorithm for selection and tuning of features and models: Whether to enable genetic algorithm for selection and hyper-parameter tuning of features and models.- If disabled ('off'), will go directly to final pipeline training (using default feature engineering and feature selection).- 'auto' is same as 'on' unless pure NLP or Image experiment.- \"Optuna\": Uses DAI genetic algorithm for feature engineering, but model hyperparameters are tuned with Optuna. - In the Optuna case, the scores shown in the iteration panel are the best score and trial scores. - Optuna mode currently only uses Optuna for XGBoost, LightGBM, and CatBoost (custom recipe). - If Pruner is enabled, as is default, Optuna mode disables mutations of eval_metric so pruning uses same metric across trials to compare properly.Currently does not supported when pre_transformers or multi-layer pipeline used, which must go through at least one round of tuning or evolution. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable genetic algorithm", + "output": "enable genetic algorithm refers to Enable genetic algorithm for selection and tuning of features and models: Whether to enable genetic algorithm for selection and hyper-parameter tuning of features and models.- If disabled ('off'), will go directly to final pipeline training (using default feature engineering and feature selection).- 'auto' is same as 'on' unless pure NLP or Image experiment.- \"Optuna\": Uses DAI genetic algorithm for feature engineering, but model hyperparameters are tuned with Optuna. - In the Optuna case, the scores shown in the iteration panel are the best score and trial scores. - Optuna mode currently only uses Optuna for XGBoost, LightGBM, and CatBoost (custom recipe). - If Pruner is enabled, as is default, Optuna mode disables mutations of eval_metric so pruning uses same metric across trials to compare properly.Currently does not supported when pre_transformers or multi-layer pipeline used, which must go through at least one round of tuning or evolution. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Enable genetic algorithm for selection and tuning of features and models: ", + "output": "enable genetic algorithm refers to Enable genetic algorithm for selection and tuning of features and models: Whether to enable genetic algorithm for selection and hyper-parameter tuning of features and models.- If disabled ('off'), will go directly to final pipeline training (using default feature engineering and feature selection).- 'auto' is same as 'on' unless pure NLP or Image experiment.- \"Optuna\": Uses DAI genetic algorithm for feature engineering, but model hyperparameters are tuned with Optuna. - In the Optuna case, the scores shown in the iteration panel are the best score and trial scores. - Optuna mode currently only uses Optuna for XGBoost, LightGBM, and CatBoost (custom recipe). - If Pruner is enabled, as is default, Optuna mode disables mutations of eval_metric so pruning uses same metric across trials to compare properly.Currently does not supported when pre_transformers or multi-layer pipeline used, which must go through at least one round of tuning or evolution. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting enable_genetic_algorithm", + "output": "enable genetic algorithm refers to Whether to enable genetic algorithm for selection and hyper-parameter tuning of features and models.- If disabled ('off'), will go directly to final pipeline training (using default feature engineering and feature selection).- 'auto' is same as 'on' unless pure NLP or Image experiment.- \"Optuna\": Uses DAI genetic algorithm for feature engineering, but model hyperparameters are tuned with Optuna. - In the Optuna case, the scores shown in the iteration panel are the best score and trial scores. - Optuna mode currently only uses Optuna for XGBoost, LightGBM, and CatBoost (custom recipe). - If Pruner is enabled, as is default, Optuna mode disables mutations of eval_metric so pruning uses same metric across trials to compare properly.Currently does not supported when pre_transformers or multi-layer pipeline used, which must go through at least one round of tuning or evolution. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting enable_genetic_algorithm", + "output": "enable genetic algorithm refers to Enable genetic algorithm for selection and tuning of features and models: Whether to enable genetic algorithm for selection and hyper-parameter tuning of features and models.- If disabled ('off'), will go directly to final pipeline training (using default feature engineering and feature selection).- 'auto' is same as 'on' unless pure NLP or Image experiment.- \"Optuna\": Uses DAI genetic algorithm for feature engineering, but model hyperparameters are tuned with Optuna. - In the Optuna case, the scores shown in the iteration panel are the best score and trial scores. - Optuna mode currently only uses Optuna for XGBoost, LightGBM, and CatBoost (custom recipe). - If Pruner is enabled, as is default, Optuna mode disables mutations of eval_metric so pruning uses same metric across trials to compare properly.Currently does not supported when pre_transformers or multi-layer pipeline used, which must go through at least one round of tuning or evolution. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "feature_engineering_effort", + "output": "feature engineering effort refers to How much effort to spend on feature engineering (-1...10)Heuristic combination of various developer-level toml parameters-1 : auto (5, except 1 for wide data in order to limit engineering)0 : keep only numeric features, only model tuning during evolution1 : keep only numeric features and frequency-encoded categoricals, only model tuning during evolution2 : Like #1 but instead just no Text features. Some feature tuning before evolution.3 : Like #5 but only tuning during evolution. Mixed tuning of features and model parameters.4 : Like #5, but slightly more focused on model tuning5 : Default. Balanced feature-model tuning6-7 : Like #5, but slightly more focused on feature engineering8 : Like #6-7, but even more focused on feature engineering with high feature generation rate, no feature dropping even if high interpretability9-10: Like #8, but no model tuning during feature evolution " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "feature_engineering_effort", + "output": "feature engineering effort refers to Feature engineering effort (0..10): How much effort to spend on feature engineering (-1...10)Heuristic combination of various developer-level toml parameters-1 : auto (5, except 1 for wide data in order to limit engineering)0 : keep only numeric features, only model tuning during evolution1 : keep only numeric features and frequency-encoded categoricals, only model tuning during evolution2 : Like #1 but instead just no Text features. Some feature tuning before evolution.3 : Like #5 but only tuning during evolution. Mixed tuning of features and model parameters.4 : Like #5, but slightly more focused on model tuning5 : Default. Balanced feature-model tuning6-7 : Like #5, but slightly more focused on feature engineering8 : Like #6-7, but even more focused on feature engineering with high feature generation rate, no feature dropping even if high interpretability9-10: Like #8, but no model tuning during feature evolution " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "feature engineering effort", + "output": "feature engineering effort refers to Feature engineering effort (0..10): How much effort to spend on feature engineering (-1...10)Heuristic combination of various developer-level toml parameters-1 : auto (5, except 1 for wide data in order to limit engineering)0 : keep only numeric features, only model tuning during evolution1 : keep only numeric features and frequency-encoded categoricals, only model tuning during evolution2 : Like #1 but instead just no Text features. Some feature tuning before evolution.3 : Like #5 but only tuning during evolution. Mixed tuning of features and model parameters.4 : Like #5, but slightly more focused on model tuning5 : Default. Balanced feature-model tuning6-7 : Like #5, but slightly more focused on feature engineering8 : Like #6-7, but even more focused on feature engineering with high feature generation rate, no feature dropping even if high interpretability9-10: Like #8, but no model tuning during feature evolution " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Feature engineering effort (0..10): ", + "output": "feature engineering effort refers to Feature engineering effort (0..10): How much effort to spend on feature engineering (-1...10)Heuristic combination of various developer-level toml parameters-1 : auto (5, except 1 for wide data in order to limit engineering)0 : keep only numeric features, only model tuning during evolution1 : keep only numeric features and frequency-encoded categoricals, only model tuning during evolution2 : Like #1 but instead just no Text features. Some feature tuning before evolution.3 : Like #5 but only tuning during evolution. Mixed tuning of features and model parameters.4 : Like #5, but slightly more focused on model tuning5 : Default. Balanced feature-model tuning6-7 : Like #5, but slightly more focused on feature engineering8 : Like #6-7, but even more focused on feature engineering with high feature generation rate, no feature dropping even if high interpretability9-10: Like #8, but no model tuning during feature evolution " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting feature_engineering_effort", + "output": "feature engineering effort refers to How much effort to spend on feature engineering (-1...10)Heuristic combination of various developer-level toml parameters-1 : auto (5, except 1 for wide data in order to limit engineering)0 : keep only numeric features, only model tuning during evolution1 : keep only numeric features and frequency-encoded categoricals, only model tuning during evolution2 : Like #1 but instead just no Text features. Some feature tuning before evolution.3 : Like #5 but only tuning during evolution. Mixed tuning of features and model parameters.4 : Like #5, but slightly more focused on model tuning5 : Default. Balanced feature-model tuning6-7 : Like #5, but slightly more focused on feature engineering8 : Like #6-7, but even more focused on feature engineering with high feature generation rate, no feature dropping even if high interpretability9-10: Like #8, but no model tuning during feature evolution " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting feature_engineering_effort", + "output": "feature engineering effort refers to Feature engineering effort (0..10): How much effort to spend on feature engineering (-1...10)Heuristic combination of various developer-level toml parameters-1 : auto (5, except 1 for wide data in order to limit engineering)0 : keep only numeric features, only model tuning during evolution1 : keep only numeric features and frequency-encoded categoricals, only model tuning during evolution2 : Like #1 but instead just no Text features. Some feature tuning before evolution.3 : Like #5 but only tuning during evolution. Mixed tuning of features and model parameters.4 : Like #5, but slightly more focused on model tuning5 : Default. Balanced feature-model tuning6-7 : Like #5, but slightly more focused on feature engineering8 : Like #6-7, but even more focused on feature engineering with high feature generation rate, no feature dropping even if high interpretability9-10: Like #8, but no model tuning during feature evolution " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "check_distribution_shift", + "output": "check distribution shift refers to Whether to enable train/valid and train/test distribution shift detection ('auto'/'on'/'off').By default, LightGBMModel is used for shift detection if possible, unless it is turned off in modelexpert panel, and then only the models selected in recipe list will be used. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "check_distribution_shift", + "output": "check distribution shift refers to Data distribution shift detection: Whether to enable train/valid and train/test distribution shift detection ('auto'/'on'/'off').By default, LightGBMModel is used for shift detection if possible, unless it is turned off in modelexpert panel, and then only the models selected in recipe list will be used. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "check distribution shift", + "output": "check distribution shift refers to Data distribution shift detection: Whether to enable train/valid and train/test distribution shift detection ('auto'/'on'/'off').By default, LightGBMModel is used for shift detection if possible, unless it is turned off in modelexpert panel, and then only the models selected in recipe list will be used. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Data distribution shift detection: ", + "output": "check distribution shift refers to Data distribution shift detection: Whether to enable train/valid and train/test distribution shift detection ('auto'/'on'/'off').By default, LightGBMModel is used for shift detection if possible, unless it is turned off in modelexpert panel, and then only the models selected in recipe list will be used. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting check_distribution_shift", + "output": "check distribution shift refers to Whether to enable train/valid and train/test distribution shift detection ('auto'/'on'/'off').By default, LightGBMModel is used for shift detection if possible, unless it is turned off in modelexpert panel, and then only the models selected in recipe list will be used. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting check_distribution_shift", + "output": "check distribution shift refers to Data distribution shift detection: Whether to enable train/valid and train/test distribution shift detection ('auto'/'on'/'off').By default, LightGBMModel is used for shift detection if possible, unless it is turned off in modelexpert panel, and then only the models selected in recipe list will be used. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "check_distribution_shift_transformed", + "output": "check distribution shift transformed refers to Whether to enable train/test distribution shift detection ('auto'/'on'/'off') for final model transformed features.By default, LightGBMModel is used for shift detection if possible, unless it is turned off in modelexpert panel, and then only the models selected in recipe list will be used. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "check_distribution_shift_transformed", + "output": "check distribution shift transformed refers to Data distribution shift detection on transformed features: Whether to enable train/test distribution shift detection ('auto'/'on'/'off') for final model transformed features.By default, LightGBMModel is used for shift detection if possible, unless it is turned off in modelexpert panel, and then only the models selected in recipe list will be used. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "check distribution shift transformed", + "output": "check distribution shift transformed refers to Data distribution shift detection on transformed features: Whether to enable train/test distribution shift detection ('auto'/'on'/'off') for final model transformed features.By default, LightGBMModel is used for shift detection if possible, unless it is turned off in modelexpert panel, and then only the models selected in recipe list will be used. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Data distribution shift detection on transformed features: ", + "output": "check distribution shift transformed refers to Data distribution shift detection on transformed features: Whether to enable train/test distribution shift detection ('auto'/'on'/'off') for final model transformed features.By default, LightGBMModel is used for shift detection if possible, unless it is turned off in modelexpert panel, and then only the models selected in recipe list will be used. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting check_distribution_shift_transformed", + "output": "check distribution shift transformed refers to Whether to enable train/test distribution shift detection ('auto'/'on'/'off') for final model transformed features.By default, LightGBMModel is used for shift detection if possible, unless it is turned off in modelexpert panel, and then only the models selected in recipe list will be used. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting check_distribution_shift_transformed", + "output": "check distribution shift transformed refers to Data distribution shift detection on transformed features: Whether to enable train/test distribution shift detection ('auto'/'on'/'off') for final model transformed features.By default, LightGBMModel is used for shift detection if possible, unless it is turned off in modelexpert panel, and then only the models selected in recipe list will be used. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "check_distribution_shift_drop", + "output": "check distribution shift drop refers to Whether to drop high-shift features ('auto'/'on'/'off'). Auto disables for time series." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "check_distribution_shift_drop", + "output": "check distribution shift drop refers to Data distribution shift detection drop of features: Whether to drop high-shift features ('auto'/'on'/'off'). Auto disables for time series." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "check distribution shift drop", + "output": "check distribution shift drop refers to Data distribution shift detection drop of features: Whether to drop high-shift features ('auto'/'on'/'off'). Auto disables for time series." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Data distribution shift detection drop of features: ", + "output": "check distribution shift drop refers to Data distribution shift detection drop of features: Whether to drop high-shift features ('auto'/'on'/'off'). Auto disables for time series." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting check_distribution_shift_drop", + "output": "check distribution shift drop refers to Whether to drop high-shift features ('auto'/'on'/'off'). Auto disables for time series." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting check_distribution_shift_drop", + "output": "check distribution shift drop refers to Data distribution shift detection drop of features: Whether to drop high-shift features ('auto'/'on'/'off'). Auto disables for time series." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "drop_features_distribution_shift_threshold_auc", + "output": "drop features distribution shift threshold auc refers to If distribution shift detection is enabled, drop features (except ID, text, date/datetime, time, weight) forwhich shift AUC, GINI, or Spearman correlation is above this value(e.g. AUC of a binary classifier that predicts whether given feature valuebelongs to train or test data) " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "drop_features_distribution_shift_threshold_auc", + "output": "drop features distribution shift threshold auc refers to Max allowed feature shift (AUC) before dropping feature: If distribution shift detection is enabled, drop features (except ID, text, date/datetime, time, weight) forwhich shift AUC, GINI, or Spearman correlation is above this value(e.g. AUC of a binary classifier that predicts whether given feature valuebelongs to train or test data) " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "drop features distribution shift threshold auc", + "output": "drop features distribution shift threshold auc refers to Max allowed feature shift (AUC) before dropping feature: If distribution shift detection is enabled, drop features (except ID, text, date/datetime, time, weight) forwhich shift AUC, GINI, or Spearman correlation is above this value(e.g. AUC of a binary classifier that predicts whether given feature valuebelongs to train or test data) " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Max allowed feature shift (AUC) before dropping feature: ", + "output": "drop features distribution shift threshold auc refers to Max allowed feature shift (AUC) before dropping feature: If distribution shift detection is enabled, drop features (except ID, text, date/datetime, time, weight) forwhich shift AUC, GINI, or Spearman correlation is above this value(e.g. AUC of a binary classifier that predicts whether given feature valuebelongs to train or test data) " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting drop_features_distribution_shift_threshold_auc", + "output": "drop features distribution shift threshold auc refers to If distribution shift detection is enabled, drop features (except ID, text, date/datetime, time, weight) forwhich shift AUC, GINI, or Spearman correlation is above this value(e.g. AUC of a binary classifier that predicts whether given feature valuebelongs to train or test data) " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting drop_features_distribution_shift_threshold_auc", + "output": "drop features distribution shift threshold auc refers to Max allowed feature shift (AUC) before dropping feature: If distribution shift detection is enabled, drop features (except ID, text, date/datetime, time, weight) forwhich shift AUC, GINI, or Spearman correlation is above this value(e.g. AUC of a binary classifier that predicts whether given feature valuebelongs to train or test data) " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "check_leakage", + "output": "check leakage refers to Specify whether to check leakage for each feature (``on`` or ``off``).If a fold column is used, this option checks leakage without using the fold column.By default, LightGBM Model is used for leakage detection when possible, unless it isturned off in the Model Expert Settings tab, in which case only the models selected withthe ``included_models`` option are used. Note that this option is always disabled for timeseries experiments. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "check_leakage", + "output": "check leakage refers to Leakage detection: Specify whether to check leakage for each feature (``on`` or ``off``).If a fold column is used, this option checks leakage without using the fold column.By default, LightGBM Model is used for leakage detection when possible, unless it isturned off in the Model Expert Settings tab, in which case only the models selected withthe ``included_models`` option are used. Note that this option is always disabled for timeseries experiments. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "check leakage", + "output": "check leakage refers to Leakage detection: Specify whether to check leakage for each feature (``on`` or ``off``).If a fold column is used, this option checks leakage without using the fold column.By default, LightGBM Model is used for leakage detection when possible, unless it isturned off in the Model Expert Settings tab, in which case only the models selected withthe ``included_models`` option are used. Note that this option is always disabled for timeseries experiments. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Leakage detection: ", + "output": "check leakage refers to Leakage detection: Specify whether to check leakage for each feature (``on`` or ``off``).If a fold column is used, this option checks leakage without using the fold column.By default, LightGBM Model is used for leakage detection when possible, unless it isturned off in the Model Expert Settings tab, in which case only the models selected withthe ``included_models`` option are used. Note that this option is always disabled for timeseries experiments. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting check_leakage", + "output": "check leakage refers to Specify whether to check leakage for each feature (``on`` or ``off``).If a fold column is used, this option checks leakage without using the fold column.By default, LightGBM Model is used for leakage detection when possible, unless it isturned off in the Model Expert Settings tab, in which case only the models selected withthe ``included_models`` option are used. Note that this option is always disabled for timeseries experiments. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting check_leakage", + "output": "check leakage refers to Leakage detection: Specify whether to check leakage for each feature (``on`` or ``off``).If a fold column is used, this option checks leakage without using the fold column.By default, LightGBM Model is used for leakage detection when possible, unless it isturned off in the Model Expert Settings tab, in which case only the models selected withthe ``included_models`` option are used. Note that this option is always disabled for timeseries experiments. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "drop_features_leakage_threshold_auc", + "output": "drop features leakage threshold auc refers to If leakage detection is enabled, drop features for which AUC (R2 for regression), GINI, or Spearman correlation is above this value. If fold column present, features are not dropped, because leakage test applies without fold column used. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "drop_features_leakage_threshold_auc", + "output": "drop features leakage threshold auc refers to Leakage detection dropping AUC/R2 threshold: If leakage detection is enabled, drop features for which AUC (R2 for regression), GINI, or Spearman correlation is above this value. If fold column present, features are not dropped, because leakage test applies without fold column used. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "drop features leakage threshold auc", + "output": "drop features leakage threshold auc refers to Leakage detection dropping AUC/R2 threshold: If leakage detection is enabled, drop features for which AUC (R2 for regression), GINI, or Spearman correlation is above this value. If fold column present, features are not dropped, because leakage test applies without fold column used. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Leakage detection dropping AUC/R2 threshold: ", + "output": "drop features leakage threshold auc refers to Leakage detection dropping AUC/R2 threshold: If leakage detection is enabled, drop features for which AUC (R2 for regression), GINI, or Spearman correlation is above this value. If fold column present, features are not dropped, because leakage test applies without fold column used. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting drop_features_leakage_threshold_auc", + "output": "drop features leakage threshold auc refers to If leakage detection is enabled, drop features for which AUC (R2 for regression), GINI, or Spearman correlation is above this value. If fold column present, features are not dropped, because leakage test applies without fold column used. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting drop_features_leakage_threshold_auc", + "output": "drop features leakage threshold auc refers to Leakage detection dropping AUC/R2 threshold: If leakage detection is enabled, drop features for which AUC (R2 for regression), GINI, or Spearman correlation is above this value. If fold column present, features are not dropped, because leakage test applies without fold column used. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "leakage_max_data_size", + "output": "leakage max data size refers to Max number of rows x number of columns to trigger (stratified) sampling for leakage checks " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "leakage_max_data_size", + "output": "leakage max data size refers to Max rows x columns for leakage: Max number of rows x number of columns to trigger (stratified) sampling for leakage checks " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "leakage max data size", + "output": "leakage max data size refers to Max rows x columns for leakage: Max number of rows x number of columns to trigger (stratified) sampling for leakage checks " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Max rows x columns for leakage: ", + "output": "leakage max data size refers to Max rows x columns for leakage: Max number of rows x number of columns to trigger (stratified) sampling for leakage checks " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting leakage_max_data_size", + "output": "leakage max data size refers to Max number of rows x number of columns to trigger (stratified) sampling for leakage checks " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting leakage_max_data_size", + "output": "leakage max data size refers to Max rows x columns for leakage: Max number of rows x number of columns to trigger (stratified) sampling for leakage checks " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max_features_importance", + "output": "max features importance refers to Specify the maximum number of features to use and show in importance tables.When Interpretability is set higher than 1,transformed or original features with lower importance than the top max_features_importance features are always removed.Feature importances of transformed or original features correspondingly will be pruned.Higher values can lead to lower performance and larger disk space used for datasets with more than 100k columns. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max_features_importance", + "output": "max features importance refers to Max. num. features for variable importance: Specify the maximum number of features to use and show in importance tables.When Interpretability is set higher than 1,transformed or original features with lower importance than the top max_features_importance features are always removed.Feature importances of transformed or original features correspondingly will be pruned.Higher values can lead to lower performance and larger disk space used for datasets with more than 100k columns. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max features importance", + "output": "max features importance refers to Max. num. features for variable importance: Specify the maximum number of features to use and show in importance tables.When Interpretability is set higher than 1,transformed or original features with lower importance than the top max_features_importance features are always removed.Feature importances of transformed or original features correspondingly will be pruned.Higher values can lead to lower performance and larger disk space used for datasets with more than 100k columns. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Max. num. features for variable importance: ", + "output": "max features importance refers to Max. num. features for variable importance: Specify the maximum number of features to use and show in importance tables.When Interpretability is set higher than 1,transformed or original features with lower importance than the top max_features_importance features are always removed.Feature importances of transformed or original features correspondingly will be pruned.Higher values can lead to lower performance and larger disk space used for datasets with more than 100k columns. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting max_features_importance", + "output": "max features importance refers to Specify the maximum number of features to use and show in importance tables.When Interpretability is set higher than 1,transformed or original features with lower importance than the top max_features_importance features are always removed.Feature importances of transformed or original features correspondingly will be pruned.Higher values can lead to lower performance and larger disk space used for datasets with more than 100k columns. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting max_features_importance", + "output": "max features importance refers to Max. num. features for variable importance: Specify the maximum number of features to use and show in importance tables.When Interpretability is set higher than 1,transformed or original features with lower importance than the top max_features_importance features are always removed.Feature importances of transformed or original features correspondingly will be pruned.Higher values can lead to lower performance and larger disk space used for datasets with more than 100k columns. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "make_python_scoring_pipeline", + "output": "make python scoring pipeline refers to Whether to create the Python scoring pipeline at the end of each experiment." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "make_python_scoring_pipeline", + "output": "make python scoring pipeline refers to Make Python scoring pipeline: Whether to create the Python scoring pipeline at the end of each experiment." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "make python scoring pipeline", + "output": "make python scoring pipeline refers to Make Python scoring pipeline: Whether to create the Python scoring pipeline at the end of each experiment." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Make Python scoring pipeline: ", + "output": "make python scoring pipeline refers to Make Python scoring pipeline: Whether to create the Python scoring pipeline at the end of each experiment." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting make_python_scoring_pipeline", + "output": "make python scoring pipeline refers to Whether to create the Python scoring pipeline at the end of each experiment." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting make_python_scoring_pipeline", + "output": "make python scoring pipeline refers to Make Python scoring pipeline: Whether to create the Python scoring pipeline at the end of each experiment." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "make_mojo_scoring_pipeline", + "output": "make mojo scoring pipeline refers to Whether to create the MOJO scoring pipeline at the end of each experiment. If set to \"auto\", will attempt tocreate it if possible (without dropping capabilities). If set to \"on\", might need to drop some models,transformers or custom recipes. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "make_mojo_scoring_pipeline", + "output": "make mojo scoring pipeline refers to Make MOJO scoring pipeline: Whether to create the MOJO scoring pipeline at the end of each experiment. If set to \"auto\", will attempt tocreate it if possible (without dropping capabilities). If set to \"on\", might need to drop some models,transformers or custom recipes. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "make mojo scoring pipeline", + "output": "make mojo scoring pipeline refers to Make MOJO scoring pipeline: Whether to create the MOJO scoring pipeline at the end of each experiment. If set to \"auto\", will attempt tocreate it if possible (without dropping capabilities). If set to \"on\", might need to drop some models,transformers or custom recipes. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Make MOJO scoring pipeline: ", + "output": "make mojo scoring pipeline refers to Make MOJO scoring pipeline: Whether to create the MOJO scoring pipeline at the end of each experiment. If set to \"auto\", will attempt tocreate it if possible (without dropping capabilities). If set to \"on\", might need to drop some models,transformers or custom recipes. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting make_mojo_scoring_pipeline", + "output": "make mojo scoring pipeline refers to Whether to create the MOJO scoring pipeline at the end of each experiment. If set to \"auto\", will attempt tocreate it if possible (without dropping capabilities). If set to \"on\", might need to drop some models,transformers or custom recipes. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting make_mojo_scoring_pipeline", + "output": "make mojo scoring pipeline refers to Make MOJO scoring pipeline: Whether to create the MOJO scoring pipeline at the end of each experiment. If set to \"auto\", will attempt tocreate it if possible (without dropping capabilities). If set to \"on\", might need to drop some models,transformers or custom recipes. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "make_triton_scoring_pipeline", + "output": "make triton scoring pipeline refers to Whether to create a C++ MOJO based Triton scoring pipeline at the end of each experiment. If set to \"auto\", will attempt tocreate it if possible (without dropping capabilities). If set to \"on\", might need to drop some models,transformers or custom recipes. Requires make_mojo_scoring_pipeline != \"off\". " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "make_triton_scoring_pipeline", + "output": "make triton scoring pipeline refers to Make Triton scoring pipeline: Whether to create a C++ MOJO based Triton scoring pipeline at the end of each experiment. If set to \"auto\", will attempt tocreate it if possible (without dropping capabilities). If set to \"on\", might need to drop some models,transformers or custom recipes. Requires make_mojo_scoring_pipeline != \"off\". " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "make triton scoring pipeline", + "output": "make triton scoring pipeline refers to Make Triton scoring pipeline: Whether to create a C++ MOJO based Triton scoring pipeline at the end of each experiment. If set to \"auto\", will attempt tocreate it if possible (without dropping capabilities). If set to \"on\", might need to drop some models,transformers or custom recipes. Requires make_mojo_scoring_pipeline != \"off\". " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Make Triton scoring pipeline: ", + "output": "make triton scoring pipeline refers to Make Triton scoring pipeline: Whether to create a C++ MOJO based Triton scoring pipeline at the end of each experiment. If set to \"auto\", will attempt tocreate it if possible (without dropping capabilities). If set to \"on\", might need to drop some models,transformers or custom recipes. Requires make_mojo_scoring_pipeline != \"off\". " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting make_triton_scoring_pipeline", + "output": "make triton scoring pipeline refers to Whether to create a C++ MOJO based Triton scoring pipeline at the end of each experiment. If set to \"auto\", will attempt tocreate it if possible (without dropping capabilities). If set to \"on\", might need to drop some models,transformers or custom recipes. Requires make_mojo_scoring_pipeline != \"off\". " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting make_triton_scoring_pipeline", + "output": "make triton scoring pipeline refers to Make Triton scoring pipeline: Whether to create a C++ MOJO based Triton scoring pipeline at the end of each experiment. If set to \"auto\", will attempt tocreate it if possible (without dropping capabilities). If set to \"on\", might need to drop some models,transformers or custom recipes. Requires make_mojo_scoring_pipeline != \"off\". " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "auto_deploy_triton_scoring_pipeline", + "output": "auto deploy triton scoring pipeline refers to Whether to automatically deploy the model to the Triton inference server at the end of each experiment.\"local\" will deploy to the local (built-in) Triton inference server to location specified by triton_model_repository_dir_local.\"remote\" will deploy to the remote Triton inference server to location provided by triton_host_remote (and optionally, triton_model_repository_dir_remote).\"off\" requires manual action (Deploy wizard or Python client or manual transfer of exported Triton directory from Deploy wizard) to deploy the model to Triton. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "auto_deploy_triton_scoring_pipeline", + "output": "auto deploy triton scoring pipeline refers to Whether to automatically deploy every model to built-in or remote Triton inference server.: Whether to automatically deploy the model to the Triton inference server at the end of each experiment.\"local\" will deploy to the local (built-in) Triton inference server to location specified by triton_model_repository_dir_local.\"remote\" will deploy to the remote Triton inference server to location provided by triton_host_remote (and optionally, triton_model_repository_dir_remote).\"off\" requires manual action (Deploy wizard or Python client or manual transfer of exported Triton directory from Deploy wizard) to deploy the model to Triton. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "auto deploy triton scoring pipeline", + "output": "auto deploy triton scoring pipeline refers to Whether to automatically deploy every model to built-in or remote Triton inference server.: Whether to automatically deploy the model to the Triton inference server at the end of each experiment.\"local\" will deploy to the local (built-in) Triton inference server to location specified by triton_model_repository_dir_local.\"remote\" will deploy to the remote Triton inference server to location provided by triton_host_remote (and optionally, triton_model_repository_dir_remote).\"off\" requires manual action (Deploy wizard or Python client or manual transfer of exported Triton directory from Deploy wizard) to deploy the model to Triton. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Whether to automatically deploy every model to built-in or remote Triton inference server.: ", + "output": "auto deploy triton scoring pipeline refers to Whether to automatically deploy every model to built-in or remote Triton inference server.: Whether to automatically deploy the model to the Triton inference server at the end of each experiment.\"local\" will deploy to the local (built-in) Triton inference server to location specified by triton_model_repository_dir_local.\"remote\" will deploy to the remote Triton inference server to location provided by triton_host_remote (and optionally, triton_model_repository_dir_remote).\"off\" requires manual action (Deploy wizard or Python client or manual transfer of exported Triton directory from Deploy wizard) to deploy the model to Triton. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting auto_deploy_triton_scoring_pipeline", + "output": "auto deploy triton scoring pipeline refers to Whether to automatically deploy the model to the Triton inference server at the end of each experiment.\"local\" will deploy to the local (built-in) Triton inference server to location specified by triton_model_repository_dir_local.\"remote\" will deploy to the remote Triton inference server to location provided by triton_host_remote (and optionally, triton_model_repository_dir_remote).\"off\" requires manual action (Deploy wizard or Python client or manual transfer of exported Triton directory from Deploy wizard) to deploy the model to Triton. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting auto_deploy_triton_scoring_pipeline", + "output": "auto deploy triton scoring pipeline refers to Whether to automatically deploy every model to built-in or remote Triton inference server.: Whether to automatically deploy the model to the Triton inference server at the end of each experiment.\"local\" will deploy to the local (built-in) Triton inference server to location specified by triton_model_repository_dir_local.\"remote\" will deploy to the remote Triton inference server to location provided by triton_host_remote (and optionally, triton_model_repository_dir_remote).\"off\" requires manual action (Deploy wizard or Python client or manual transfer of exported Triton directory from Deploy wizard) to deploy the model to Triton. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "triton_dedup_local_tmp", + "output": "triton dedup local tmp refers to Replace duplicate files inside the Triton tmp directory with hard links, to significantly reduce the used disk space for local Triton deployments." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "triton_dedup_local_tmp", + "output": "triton dedup local tmp refers to Replace duplicate files inside the Triton tmp directory with hard links, to significantly reduce the used disk space for local Triton deployments." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "triton dedup local tmp", + "output": "triton dedup local tmp refers to Replace duplicate files inside the Triton tmp directory with hard links, to significantly reduce the used disk space for local Triton deployments." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "triton dedup local tmp refers to Replace duplicate files inside the Triton tmp directory with hard links, to significantly reduce the used disk space for local Triton deployments." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting triton_dedup_local_tmp", + "output": "triton dedup local tmp refers to Replace duplicate files inside the Triton tmp directory with hard links, to significantly reduce the used disk space for local Triton deployments." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting triton_dedup_local_tmp", + "output": "triton dedup local tmp refers to Replace duplicate files inside the Triton tmp directory with hard links, to significantly reduce the used disk space for local Triton deployments." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "triton_mini_acceptance_test_local", + "output": "triton mini acceptance test local refers to Test local Triton deployments during creation of MOJO pipeline. Requires enable_triton_server_local and make_triton_scoring_pipeline to be enabled." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "triton_mini_acceptance_test_local", + "output": "triton mini acceptance test local refers to Test local Triton deployments during creation of MOJO pipeline.: Test local Triton deployments during creation of MOJO pipeline. Requires enable_triton_server_local and make_triton_scoring_pipeline to be enabled." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "triton mini acceptance test local", + "output": "triton mini acceptance test local refers to Test local Triton deployments during creation of MOJO pipeline.: Test local Triton deployments during creation of MOJO pipeline. Requires enable_triton_server_local and make_triton_scoring_pipeline to be enabled." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Test local Triton deployments during creation of MOJO pipeline.: ", + "output": "triton mini acceptance test local refers to Test local Triton deployments during creation of MOJO pipeline.: Test local Triton deployments during creation of MOJO pipeline. Requires enable_triton_server_local and make_triton_scoring_pipeline to be enabled." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting triton_mini_acceptance_test_local", + "output": "triton mini acceptance test local refers to Test local Triton deployments during creation of MOJO pipeline. Requires enable_triton_server_local and make_triton_scoring_pipeline to be enabled." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting triton_mini_acceptance_test_local", + "output": "triton mini acceptance test local refers to Test local Triton deployments during creation of MOJO pipeline.: Test local Triton deployments during creation of MOJO pipeline. Requires enable_triton_server_local and make_triton_scoring_pipeline to be enabled." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "triton_mini_acceptance_test_remote", + "output": "triton mini acceptance test remote refers to Test remote Triton deployments during creation of MOJO pipeline. Requires triton_host_remote to be configured and make_triton_scoring_pipeline to be enabled." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "triton_mini_acceptance_test_remote", + "output": "triton mini acceptance test remote refers to Test remote Triton deployments during creation of MOJO pipeline.: Test remote Triton deployments during creation of MOJO pipeline. Requires triton_host_remote to be configured and make_triton_scoring_pipeline to be enabled." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "triton mini acceptance test remote", + "output": "triton mini acceptance test remote refers to Test remote Triton deployments during creation of MOJO pipeline.: Test remote Triton deployments during creation of MOJO pipeline. Requires triton_host_remote to be configured and make_triton_scoring_pipeline to be enabled." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Test remote Triton deployments during creation of MOJO pipeline.: ", + "output": "triton mini acceptance test remote refers to Test remote Triton deployments during creation of MOJO pipeline.: Test remote Triton deployments during creation of MOJO pipeline. Requires triton_host_remote to be configured and make_triton_scoring_pipeline to be enabled." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting triton_mini_acceptance_test_remote", + "output": "triton mini acceptance test remote refers to Test remote Triton deployments during creation of MOJO pipeline. Requires triton_host_remote to be configured and make_triton_scoring_pipeline to be enabled." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting triton_mini_acceptance_test_remote", + "output": "triton mini acceptance test remote refers to Test remote Triton deployments during creation of MOJO pipeline.: Test remote Triton deployments during creation of MOJO pipeline. Requires triton_host_remote to be configured and make_triton_scoring_pipeline to be enabled." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mojo_for_predictions_benchmark", + "output": "mojo for predictions benchmark refers to Perform timing and accuracy benchmarks for Injected MOJO scoring vs Python scoring. This is for full scoring data, and can be slow. This also requires hard asserts. Doesn't force MOJO scoring by itself, so depends on mojo_for_predictions='on' if want full coverage." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mojo_for_predictions_benchmark", + "output": "mojo for predictions benchmark refers to Perform timing and accuracy benchmarks for Injected MOJO scoring vs Python scoring. This is for full scoring data, and can be slow. This also requires hard asserts. Doesn't force MOJO scoring by itself, so depends on mojo_for_predictions='on' if want full coverage." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mojo for predictions benchmark", + "output": "mojo for predictions benchmark refers to Perform timing and accuracy benchmarks for Injected MOJO scoring vs Python scoring. This is for full scoring data, and can be slow. This also requires hard asserts. Doesn't force MOJO scoring by itself, so depends on mojo_for_predictions='on' if want full coverage." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "mojo for predictions benchmark refers to Perform timing and accuracy benchmarks for Injected MOJO scoring vs Python scoring. This is for full scoring data, and can be slow. This also requires hard asserts. Doesn't force MOJO scoring by itself, so depends on mojo_for_predictions='on' if want full coverage." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting mojo_for_predictions_benchmark", + "output": "mojo for predictions benchmark refers to Perform timing and accuracy benchmarks for Injected MOJO scoring vs Python scoring. This is for full scoring data, and can be slow. This also requires hard asserts. Doesn't force MOJO scoring by itself, so depends on mojo_for_predictions='on' if want full coverage." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting mojo_for_predictions_benchmark", + "output": "mojo for predictions benchmark refers to Perform timing and accuracy benchmarks for Injected MOJO scoring vs Python scoring. This is for full scoring data, and can be slow. This also requires hard asserts. Doesn't force MOJO scoring by itself, so depends on mojo_for_predictions='on' if want full coverage." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mojo_for_predictions_benchmark_slower_than_python_threshold", + "output": "mojo for predictions benchmark slower than python threshold refers to Fail hard if MOJO scoring is this many times slower than Python scoring." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mojo_for_predictions_benchmark_slower_than_python_threshold", + "output": "mojo for predictions benchmark slower than python threshold refers to Fail hard if MOJO scoring is this many times slower than Python scoring." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mojo for predictions benchmark slower than python threshold", + "output": "mojo for predictions benchmark slower than python threshold refers to Fail hard if MOJO scoring is this many times slower than Python scoring." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "mojo for predictions benchmark slower than python threshold refers to Fail hard if MOJO scoring is this many times slower than Python scoring." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting mojo_for_predictions_benchmark_slower_than_python_threshold", + "output": "mojo for predictions benchmark slower than python threshold refers to Fail hard if MOJO scoring is this many times slower than Python scoring." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting mojo_for_predictions_benchmark_slower_than_python_threshold", + "output": "mojo for predictions benchmark slower than python threshold refers to Fail hard if MOJO scoring is this many times slower than Python scoring." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mojo_for_predictions_benchmark_slower_than_python_min_rows", + "output": "mojo for predictions benchmark slower than python min rows refers to Fail hard if MOJO scoring is slower than Python scoring by a factor specified by mojo_for_predictions_benchmark_slower_than_python_threshold, but only if have at least this many rows. To reduce false positives." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mojo_for_predictions_benchmark_slower_than_python_min_rows", + "output": "mojo for predictions benchmark slower than python min rows refers to Fail hard if MOJO scoring is slower than Python scoring by a factor specified by mojo_for_predictions_benchmark_slower_than_python_threshold, but only if have at least this many rows. To reduce false positives." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mojo for predictions benchmark slower than python min rows", + "output": "mojo for predictions benchmark slower than python min rows refers to Fail hard if MOJO scoring is slower than Python scoring by a factor specified by mojo_for_predictions_benchmark_slower_than_python_threshold, but only if have at least this many rows. To reduce false positives." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "mojo for predictions benchmark slower than python min rows refers to Fail hard if MOJO scoring is slower than Python scoring by a factor specified by mojo_for_predictions_benchmark_slower_than_python_threshold, but only if have at least this many rows. To reduce false positives." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting mojo_for_predictions_benchmark_slower_than_python_min_rows", + "output": "mojo for predictions benchmark slower than python min rows refers to Fail hard if MOJO scoring is slower than Python scoring by a factor specified by mojo_for_predictions_benchmark_slower_than_python_threshold, but only if have at least this many rows. To reduce false positives." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting mojo_for_predictions_benchmark_slower_than_python_min_rows", + "output": "mojo for predictions benchmark slower than python min rows refers to Fail hard if MOJO scoring is slower than Python scoring by a factor specified by mojo_for_predictions_benchmark_slower_than_python_threshold, but only if have at least this many rows. To reduce false positives." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mojo_for_predictions_benchmark_slower_than_python_min_seconds", + "output": "mojo for predictions benchmark slower than python min seconds refers to Fail hard if MOJO scoring is slower than Python scoring by a factor specified by mojo_for_predictions_benchmark_slower_than_python_threshold, but only if takes at least this many seconds. To reduce false positives." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mojo_for_predictions_benchmark_slower_than_python_min_seconds", + "output": "mojo for predictions benchmark slower than python min seconds refers to Fail hard if MOJO scoring is slower than Python scoring by a factor specified by mojo_for_predictions_benchmark_slower_than_python_threshold, but only if takes at least this many seconds. To reduce false positives." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mojo for predictions benchmark slower than python min seconds", + "output": "mojo for predictions benchmark slower than python min seconds refers to Fail hard if MOJO scoring is slower than Python scoring by a factor specified by mojo_for_predictions_benchmark_slower_than_python_threshold, but only if takes at least this many seconds. To reduce false positives." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "mojo for predictions benchmark slower than python min seconds refers to Fail hard if MOJO scoring is slower than Python scoring by a factor specified by mojo_for_predictions_benchmark_slower_than_python_threshold, but only if takes at least this many seconds. To reduce false positives." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting mojo_for_predictions_benchmark_slower_than_python_min_seconds", + "output": "mojo for predictions benchmark slower than python min seconds refers to Fail hard if MOJO scoring is slower than Python scoring by a factor specified by mojo_for_predictions_benchmark_slower_than_python_threshold, but only if takes at least this many seconds. To reduce false positives." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting mojo_for_predictions_benchmark_slower_than_python_min_seconds", + "output": "mojo for predictions benchmark slower than python min seconds refers to Fail hard if MOJO scoring is slower than Python scoring by a factor specified by mojo_for_predictions_benchmark_slower_than_python_threshold, but only if takes at least this many seconds. To reduce false positives." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "inject_mojo_for_predictions", + "output": "inject mojo for predictions refers to Inject MOJO into fitted Python state if mini acceptance test passes, so can use C++ MOJO runtime when calling predict(enable_mojo=True, IS_SCORER=True, ...). Prerequisite for mojo_for_predictions='on' or 'auto'." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "inject_mojo_for_predictions", + "output": "inject mojo for predictions refers to Inject MOJO into fitted Python state if mini acceptance test passes, so can use C++ MOJO runtime when calling predict(enable_mojo=True, IS_SCORER=True, ...). Prerequisite for mojo_for_predictions='on' or 'auto'." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "inject mojo for predictions", + "output": "inject mojo for predictions refers to Inject MOJO into fitted Python state if mini acceptance test passes, so can use C++ MOJO runtime when calling predict(enable_mojo=True, IS_SCORER=True, ...). Prerequisite for mojo_for_predictions='on' or 'auto'." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "inject mojo for predictions refers to Inject MOJO into fitted Python state if mini acceptance test passes, so can use C++ MOJO runtime when calling predict(enable_mojo=True, IS_SCORER=True, ...). Prerequisite for mojo_for_predictions='on' or 'auto'." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting inject_mojo_for_predictions", + "output": "inject mojo for predictions refers to Inject MOJO into fitted Python state if mini acceptance test passes, so can use C++ MOJO runtime when calling predict(enable_mojo=True, IS_SCORER=True, ...). Prerequisite for mojo_for_predictions='on' or 'auto'." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting inject_mojo_for_predictions", + "output": "inject mojo for predictions refers to Inject MOJO into fitted Python state if mini acceptance test passes, so can use C++ MOJO runtime when calling predict(enable_mojo=True, IS_SCORER=True, ...). Prerequisite for mojo_for_predictions='on' or 'auto'." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mojo_for_predictions", + "output": "mojo for predictions refers to Use MOJO for making fast low-latency predictions after experiment has finished (when applicable, for AutoDoc/Diagnostics/Predictions/MLI and standalone Python scoring via scorer.zip). For 'auto', only use MOJO if number of rows is equal or below mojo_for_predictions_max_rows. For larger frames, it can be faster to use the Python backend since used libraries are more likely already vectorized." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mojo_for_predictions", + "output": "mojo for predictions refers to Allow use of MOJO for making predictions: Use MOJO for making fast low-latency predictions after experiment has finished (when applicable, for AutoDoc/Diagnostics/Predictions/MLI and standalone Python scoring via scorer.zip). For 'auto', only use MOJO if number of rows is equal or below mojo_for_predictions_max_rows. For larger frames, it can be faster to use the Python backend since used libraries are more likely already vectorized." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mojo for predictions", + "output": "mojo for predictions refers to Allow use of MOJO for making predictions: Use MOJO for making fast low-latency predictions after experiment has finished (when applicable, for AutoDoc/Diagnostics/Predictions/MLI and standalone Python scoring via scorer.zip). For 'auto', only use MOJO if number of rows is equal or below mojo_for_predictions_max_rows. For larger frames, it can be faster to use the Python backend since used libraries are more likely already vectorized." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Allow use of MOJO for making predictions: ", + "output": "mojo for predictions refers to Allow use of MOJO for making predictions: Use MOJO for making fast low-latency predictions after experiment has finished (when applicable, for AutoDoc/Diagnostics/Predictions/MLI and standalone Python scoring via scorer.zip). For 'auto', only use MOJO if number of rows is equal or below mojo_for_predictions_max_rows. For larger frames, it can be faster to use the Python backend since used libraries are more likely already vectorized." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting mojo_for_predictions", + "output": "mojo for predictions refers to Use MOJO for making fast low-latency predictions after experiment has finished (when applicable, for AutoDoc/Diagnostics/Predictions/MLI and standalone Python scoring via scorer.zip). For 'auto', only use MOJO if number of rows is equal or below mojo_for_predictions_max_rows. For larger frames, it can be faster to use the Python backend since used libraries are more likely already vectorized." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting mojo_for_predictions", + "output": "mojo for predictions refers to Allow use of MOJO for making predictions: Use MOJO for making fast low-latency predictions after experiment has finished (when applicable, for AutoDoc/Diagnostics/Predictions/MLI and standalone Python scoring via scorer.zip). For 'auto', only use MOJO if number of rows is equal or below mojo_for_predictions_max_rows. For larger frames, it can be faster to use the Python backend since used libraries are more likely already vectorized." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mojo_for_predictions_max_rows", + "output": "mojo for predictions max rows refers to For smaller datasets, the single-threaded but low latency C++ MOJO runtime can lead to significantly faster scoring times than the regular in-Driverless AI Python scoring environment. If enable_mojo=True is passed to the predict API, and the MOJO exists and is applicable, then use the MOJO runtime for datasets that have fewer or equal number of rows than this threshold. MLI/AutoDoc set enable_mojo=True by default, so this setting applies. This setting is only used if mojo_for_predictions is 'auto'." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mojo_for_predictions_max_rows", + "output": "mojo for predictions max rows refers to Max number of rows for C++ MOJO predictions: For smaller datasets, the single-threaded but low latency C++ MOJO runtime can lead to significantly faster scoring times than the regular in-Driverless AI Python scoring environment. If enable_mojo=True is passed to the predict API, and the MOJO exists and is applicable, then use the MOJO runtime for datasets that have fewer or equal number of rows than this threshold. MLI/AutoDoc set enable_mojo=True by default, so this setting applies. This setting is only used if mojo_for_predictions is 'auto'." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mojo for predictions max rows", + "output": "mojo for predictions max rows refers to Max number of rows for C++ MOJO predictions: For smaller datasets, the single-threaded but low latency C++ MOJO runtime can lead to significantly faster scoring times than the regular in-Driverless AI Python scoring environment. If enable_mojo=True is passed to the predict API, and the MOJO exists and is applicable, then use the MOJO runtime for datasets that have fewer or equal number of rows than this threshold. MLI/AutoDoc set enable_mojo=True by default, so this setting applies. This setting is only used if mojo_for_predictions is 'auto'." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Max number of rows for C++ MOJO predictions: ", + "output": "mojo for predictions max rows refers to Max number of rows for C++ MOJO predictions: For smaller datasets, the single-threaded but low latency C++ MOJO runtime can lead to significantly faster scoring times than the regular in-Driverless AI Python scoring environment. If enable_mojo=True is passed to the predict API, and the MOJO exists and is applicable, then use the MOJO runtime for datasets that have fewer or equal number of rows than this threshold. MLI/AutoDoc set enable_mojo=True by default, so this setting applies. This setting is only used if mojo_for_predictions is 'auto'." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting mojo_for_predictions_max_rows", + "output": "mojo for predictions max rows refers to For smaller datasets, the single-threaded but low latency C++ MOJO runtime can lead to significantly faster scoring times than the regular in-Driverless AI Python scoring environment. If enable_mojo=True is passed to the predict API, and the MOJO exists and is applicable, then use the MOJO runtime for datasets that have fewer or equal number of rows than this threshold. MLI/AutoDoc set enable_mojo=True by default, so this setting applies. This setting is only used if mojo_for_predictions is 'auto'." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting mojo_for_predictions_max_rows", + "output": "mojo for predictions max rows refers to Max number of rows for C++ MOJO predictions: For smaller datasets, the single-threaded but low latency C++ MOJO runtime can lead to significantly faster scoring times than the regular in-Driverless AI Python scoring environment. If enable_mojo=True is passed to the predict API, and the MOJO exists and is applicable, then use the MOJO runtime for datasets that have fewer or equal number of rows than this threshold. MLI/AutoDoc set enable_mojo=True by default, so this setting applies. This setting is only used if mojo_for_predictions is 'auto'." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mojo_for_predictions_batch_size", + "output": "mojo for predictions batch size refers to Batch size (in rows) for C++ MOJO predictions. Only when enable_mojo=True is passed to the predict API, and when the MOJO is applicable (e.g., fewer rows than mojo_for_predictions_max_rows). Larger values can lead to faster scoring, but use more memory." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mojo_for_predictions_batch_size", + "output": "mojo for predictions batch size refers to Batch size for C++ MOJO predictions.: Batch size (in rows) for C++ MOJO predictions. Only when enable_mojo=True is passed to the predict API, and when the MOJO is applicable (e.g., fewer rows than mojo_for_predictions_max_rows). Larger values can lead to faster scoring, but use more memory." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mojo for predictions batch size", + "output": "mojo for predictions batch size refers to Batch size for C++ MOJO predictions.: Batch size (in rows) for C++ MOJO predictions. Only when enable_mojo=True is passed to the predict API, and when the MOJO is applicable (e.g., fewer rows than mojo_for_predictions_max_rows). Larger values can lead to faster scoring, but use more memory." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Batch size for C++ MOJO predictions.: ", + "output": "mojo for predictions batch size refers to Batch size for C++ MOJO predictions.: Batch size (in rows) for C++ MOJO predictions. Only when enable_mojo=True is passed to the predict API, and when the MOJO is applicable (e.g., fewer rows than mojo_for_predictions_max_rows). Larger values can lead to faster scoring, but use more memory." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting mojo_for_predictions_batch_size", + "output": "mojo for predictions batch size refers to Batch size (in rows) for C++ MOJO predictions. Only when enable_mojo=True is passed to the predict API, and when the MOJO is applicable (e.g., fewer rows than mojo_for_predictions_max_rows). Larger values can lead to faster scoring, but use more memory." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting mojo_for_predictions_batch_size", + "output": "mojo for predictions batch size refers to Batch size for C++ MOJO predictions.: Batch size (in rows) for C++ MOJO predictions. Only when enable_mojo=True is passed to the predict API, and when the MOJO is applicable (e.g., fewer rows than mojo_for_predictions_max_rows). Larger values can lead to faster scoring, but use more memory." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mojo_acceptance_test_rtol", + "output": "mojo acceptance test rtol refers to Relative tolerance for mini MOJO acceptance test. If Python/C++ MOJO differs more than this from Python, won't use MOJO inside Python for later scoring. Only applicable if mojo_for_predictions=True. Disabled if <= 0." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mojo_acceptance_test_rtol", + "output": "mojo acceptance test rtol refers to Relative tolerance for mini MOJO acceptance test.: Relative tolerance for mini MOJO acceptance test. If Python/C++ MOJO differs more than this from Python, won't use MOJO inside Python for later scoring. Only applicable if mojo_for_predictions=True. Disabled if <= 0." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mojo acceptance test rtol", + "output": "mojo acceptance test rtol refers to Relative tolerance for mini MOJO acceptance test.: Relative tolerance for mini MOJO acceptance test. If Python/C++ MOJO differs more than this from Python, won't use MOJO inside Python for later scoring. Only applicable if mojo_for_predictions=True. Disabled if <= 0." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Relative tolerance for mini MOJO acceptance test.: ", + "output": "mojo acceptance test rtol refers to Relative tolerance for mini MOJO acceptance test.: Relative tolerance for mini MOJO acceptance test. If Python/C++ MOJO differs more than this from Python, won't use MOJO inside Python for later scoring. Only applicable if mojo_for_predictions=True. Disabled if <= 0." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting mojo_acceptance_test_rtol", + "output": "mojo acceptance test rtol refers to Relative tolerance for mini MOJO acceptance test. If Python/C++ MOJO differs more than this from Python, won't use MOJO inside Python for later scoring. Only applicable if mojo_for_predictions=True. Disabled if <= 0." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting mojo_acceptance_test_rtol", + "output": "mojo acceptance test rtol refers to Relative tolerance for mini MOJO acceptance test.: Relative tolerance for mini MOJO acceptance test. If Python/C++ MOJO differs more than this from Python, won't use MOJO inside Python for later scoring. Only applicable if mojo_for_predictions=True. Disabled if <= 0." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mojo_acceptance_test_atol", + "output": "mojo acceptance test atol refers to Absolute tolerance for mini MOJO acceptance test (for regression/Shapley, will be scaled by max(abs(preds)). If Python/C++ MOJO differs more than this from Python, won't use MOJO inside Python for later scoring. Only applicable if mojo_for_predictions=True. Disabled if <= 0." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mojo_acceptance_test_atol", + "output": "mojo acceptance test atol refers to Absolute tolerance for mini MOJO acceptance test.: Absolute tolerance for mini MOJO acceptance test (for regression/Shapley, will be scaled by max(abs(preds)). If Python/C++ MOJO differs more than this from Python, won't use MOJO inside Python for later scoring. Only applicable if mojo_for_predictions=True. Disabled if <= 0." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mojo acceptance test atol", + "output": "mojo acceptance test atol refers to Absolute tolerance for mini MOJO acceptance test.: Absolute tolerance for mini MOJO acceptance test (for regression/Shapley, will be scaled by max(abs(preds)). If Python/C++ MOJO differs more than this from Python, won't use MOJO inside Python for later scoring. Only applicable if mojo_for_predictions=True. Disabled if <= 0." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Absolute tolerance for mini MOJO acceptance test.: ", + "output": "mojo acceptance test atol refers to Absolute tolerance for mini MOJO acceptance test.: Absolute tolerance for mini MOJO acceptance test (for regression/Shapley, will be scaled by max(abs(preds)). If Python/C++ MOJO differs more than this from Python, won't use MOJO inside Python for later scoring. Only applicable if mojo_for_predictions=True. Disabled if <= 0." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting mojo_acceptance_test_atol", + "output": "mojo acceptance test atol refers to Absolute tolerance for mini MOJO acceptance test (for regression/Shapley, will be scaled by max(abs(preds)). If Python/C++ MOJO differs more than this from Python, won't use MOJO inside Python for later scoring. Only applicable if mojo_for_predictions=True. Disabled if <= 0." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting mojo_acceptance_test_atol", + "output": "mojo acceptance test atol refers to Absolute tolerance for mini MOJO acceptance test.: Absolute tolerance for mini MOJO acceptance test (for regression/Shapley, will be scaled by max(abs(preds)). If Python/C++ MOJO differs more than this from Python, won't use MOJO inside Python for later scoring. Only applicable if mojo_for_predictions=True. Disabled if <= 0." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "reduce_mojo_size", + "output": "reduce mojo size refers to Whether to attempt to reduce the size of the MOJO scoring pipeline. A smaller MOJO will also lead toless memory footprint during scoring. It is achieved by reducing some other settings like interaction depth, andhence can affect the predictive accuracy of the model. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "reduce_mojo_size", + "output": "reduce mojo size refers to Attempt to reduce the size of the MOJO: Whether to attempt to reduce the size of the MOJO scoring pipeline. A smaller MOJO will also lead toless memory footprint during scoring. It is achieved by reducing some other settings like interaction depth, andhence can affect the predictive accuracy of the model. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "reduce mojo size", + "output": "reduce mojo size refers to Attempt to reduce the size of the MOJO: Whether to attempt to reduce the size of the MOJO scoring pipeline. A smaller MOJO will also lead toless memory footprint during scoring. It is achieved by reducing some other settings like interaction depth, andhence can affect the predictive accuracy of the model. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Attempt to reduce the size of the MOJO: ", + "output": "reduce mojo size refers to Attempt to reduce the size of the MOJO: Whether to attempt to reduce the size of the MOJO scoring pipeline. A smaller MOJO will also lead toless memory footprint during scoring. It is achieved by reducing some other settings like interaction depth, andhence can affect the predictive accuracy of the model. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting reduce_mojo_size", + "output": "reduce mojo size refers to Whether to attempt to reduce the size of the MOJO scoring pipeline. A smaller MOJO will also lead toless memory footprint during scoring. It is achieved by reducing some other settings like interaction depth, andhence can affect the predictive accuracy of the model. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting reduce_mojo_size", + "output": "reduce mojo size refers to Attempt to reduce the size of the MOJO: Whether to attempt to reduce the size of the MOJO scoring pipeline. A smaller MOJO will also lead toless memory footprint during scoring. It is achieved by reducing some other settings like interaction depth, andhence can affect the predictive accuracy of the model. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "make_pipeline_visualization", + "output": "make pipeline visualization refers to Whether to create the pipeline visualization at the end of each experiment.Uses MOJO to show pipeline, input features, transformers, model, and outputs of model. MOJO-capable tree models show first tree." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "make_pipeline_visualization", + "output": "make pipeline visualization refers to Make pipeline visualization: Whether to create the pipeline visualization at the end of each experiment.Uses MOJO to show pipeline, input features, transformers, model, and outputs of model. MOJO-capable tree models show first tree." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "make pipeline visualization", + "output": "make pipeline visualization refers to Make pipeline visualization: Whether to create the pipeline visualization at the end of each experiment.Uses MOJO to show pipeline, input features, transformers, model, and outputs of model. MOJO-capable tree models show first tree." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Make pipeline visualization: ", + "output": "make pipeline visualization refers to Make pipeline visualization: Whether to create the pipeline visualization at the end of each experiment.Uses MOJO to show pipeline, input features, transformers, model, and outputs of model. MOJO-capable tree models show first tree." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting make_pipeline_visualization", + "output": "make pipeline visualization refers to Whether to create the pipeline visualization at the end of each experiment.Uses MOJO to show pipeline, input features, transformers, model, and outputs of model. MOJO-capable tree models show first tree." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting make_pipeline_visualization", + "output": "make pipeline visualization refers to Make pipeline visualization: Whether to create the pipeline visualization at the end of each experiment.Uses MOJO to show pipeline, input features, transformers, model, and outputs of model. MOJO-capable tree models show first tree." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "make_python_pipeline_visualization", + "output": "make python pipeline visualization refers to Whether to create the python pipeline visualization at the end of each experiment. Each feature and transformer includes a variable importance at end in brackets. Only done when forced on, and artifacts as png files will appear in summary zip. Each experiment has files per individual in final population: 1) preprune_False_0.0 : Before final pruning, without any additional variable importance threshold pruning 2) preprune_True_0.0 : Before final pruning, with additional variable importance <=0.0 pruning 3) postprune_False_0.0 : After final pruning, without any additional variable importance threshold pruning 4) postprune_True_0.0 : After final pruning, with additional variable importance <=0.0 pruning 5) posttournament_False_0.0 : After final pruning and tournament, without any additional variable importance threshold pruning 6) posttournament_True_0.0 : After final pruning and tournament, with additional variable importance <=0.0 pruning 1-5 are done with 'on' while 'auto' only does 6 corresponding to the final post-pruned individuals. Even post pruning, some features have zero importance, because only those genes that have value+variance in variable importance of value=0.0 get pruned. GA can have many folds with positive variance for a gene, and those are not removed in case they are useful features for final model. If small mojo option is chosen (reduce_mojo_size True), then the variance of feature gain is ignored for which genes and features are pruned as well as for what appears in the graph. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "make_python_pipeline_visualization", + "output": "make python pipeline visualization refers to Make python pipeline visualization: Whether to create the python pipeline visualization at the end of each experiment. Each feature and transformer includes a variable importance at end in brackets. Only done when forced on, and artifacts as png files will appear in summary zip. Each experiment has files per individual in final population: 1) preprune_False_0.0 : Before final pruning, without any additional variable importance threshold pruning 2) preprune_True_0.0 : Before final pruning, with additional variable importance <=0.0 pruning 3) postprune_False_0.0 : After final pruning, without any additional variable importance threshold pruning 4) postprune_True_0.0 : After final pruning, with additional variable importance <=0.0 pruning 5) posttournament_False_0.0 : After final pruning and tournament, without any additional variable importance threshold pruning 6) posttournament_True_0.0 : After final pruning and tournament, with additional variable importance <=0.0 pruning 1-5 are done with 'on' while 'auto' only does 6 corresponding to the final post-pruned individuals. Even post pruning, some features have zero importance, because only those genes that have value+variance in variable importance of value=0.0 get pruned. GA can have many folds with positive variance for a gene, and those are not removed in case they are useful features for final model. If small mojo option is chosen (reduce_mojo_size True), then the variance of feature gain is ignored for which genes and features are pruned as well as for what appears in the graph. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "make python pipeline visualization", + "output": "make python pipeline visualization refers to Make python pipeline visualization: Whether to create the python pipeline visualization at the end of each experiment. Each feature and transformer includes a variable importance at end in brackets. Only done when forced on, and artifacts as png files will appear in summary zip. Each experiment has files per individual in final population: 1) preprune_False_0.0 : Before final pruning, without any additional variable importance threshold pruning 2) preprune_True_0.0 : Before final pruning, with additional variable importance <=0.0 pruning 3) postprune_False_0.0 : After final pruning, without any additional variable importance threshold pruning 4) postprune_True_0.0 : After final pruning, with additional variable importance <=0.0 pruning 5) posttournament_False_0.0 : After final pruning and tournament, without any additional variable importance threshold pruning 6) posttournament_True_0.0 : After final pruning and tournament, with additional variable importance <=0.0 pruning 1-5 are done with 'on' while 'auto' only does 6 corresponding to the final post-pruned individuals. Even post pruning, some features have zero importance, because only those genes that have value+variance in variable importance of value=0.0 get pruned. GA can have many folds with positive variance for a gene, and those are not removed in case they are useful features for final model. If small mojo option is chosen (reduce_mojo_size True), then the variance of feature gain is ignored for which genes and features are pruned as well as for what appears in the graph. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Make python pipeline visualization: ", + "output": "make python pipeline visualization refers to Make python pipeline visualization: Whether to create the python pipeline visualization at the end of each experiment. Each feature and transformer includes a variable importance at end in brackets. Only done when forced on, and artifacts as png files will appear in summary zip. Each experiment has files per individual in final population: 1) preprune_False_0.0 : Before final pruning, without any additional variable importance threshold pruning 2) preprune_True_0.0 : Before final pruning, with additional variable importance <=0.0 pruning 3) postprune_False_0.0 : After final pruning, without any additional variable importance threshold pruning 4) postprune_True_0.0 : After final pruning, with additional variable importance <=0.0 pruning 5) posttournament_False_0.0 : After final pruning and tournament, without any additional variable importance threshold pruning 6) posttournament_True_0.0 : After final pruning and tournament, with additional variable importance <=0.0 pruning 1-5 are done with 'on' while 'auto' only does 6 corresponding to the final post-pruned individuals. Even post pruning, some features have zero importance, because only those genes that have value+variance in variable importance of value=0.0 get pruned. GA can have many folds with positive variance for a gene, and those are not removed in case they are useful features for final model. If small mojo option is chosen (reduce_mojo_size True), then the variance of feature gain is ignored for which genes and features are pruned as well as for what appears in the graph. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting make_python_pipeline_visualization", + "output": "make python pipeline visualization refers to Whether to create the python pipeline visualization at the end of each experiment. Each feature and transformer includes a variable importance at end in brackets. Only done when forced on, and artifacts as png files will appear in summary zip. Each experiment has files per individual in final population: 1) preprune_False_0.0 : Before final pruning, without any additional variable importance threshold pruning 2) preprune_True_0.0 : Before final pruning, with additional variable importance <=0.0 pruning 3) postprune_False_0.0 : After final pruning, without any additional variable importance threshold pruning 4) postprune_True_0.0 : After final pruning, with additional variable importance <=0.0 pruning 5) posttournament_False_0.0 : After final pruning and tournament, without any additional variable importance threshold pruning 6) posttournament_True_0.0 : After final pruning and tournament, with additional variable importance <=0.0 pruning 1-5 are done with 'on' while 'auto' only does 6 corresponding to the final post-pruned individuals. Even post pruning, some features have zero importance, because only those genes that have value+variance in variable importance of value=0.0 get pruned. GA can have many folds with positive variance for a gene, and those are not removed in case they are useful features for final model. If small mojo option is chosen (reduce_mojo_size True), then the variance of feature gain is ignored for which genes and features are pruned as well as for what appears in the graph. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting make_python_pipeline_visualization", + "output": "make python pipeline visualization refers to Make python pipeline visualization: Whether to create the python pipeline visualization at the end of each experiment. Each feature and transformer includes a variable importance at end in brackets. Only done when forced on, and artifacts as png files will appear in summary zip. Each experiment has files per individual in final population: 1) preprune_False_0.0 : Before final pruning, without any additional variable importance threshold pruning 2) preprune_True_0.0 : Before final pruning, with additional variable importance <=0.0 pruning 3) postprune_False_0.0 : After final pruning, without any additional variable importance threshold pruning 4) postprune_True_0.0 : After final pruning, with additional variable importance <=0.0 pruning 5) posttournament_False_0.0 : After final pruning and tournament, without any additional variable importance threshold pruning 6) posttournament_True_0.0 : After final pruning and tournament, with additional variable importance <=0.0 pruning 1-5 are done with 'on' while 'auto' only does 6 corresponding to the final post-pruned individuals. Even post pruning, some features have zero importance, because only those genes that have value+variance in variable importance of value=0.0 get pruned. GA can have many folds with positive variance for a gene, and those are not removed in case they are useful features for final model. If small mojo option is chosen (reduce_mojo_size True), then the variance of feature gain is ignored for which genes and features are pruned as well as for what appears in the graph. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "make_autoreport", + "output": "make autoreport refers to Whether to create the experiment AutoDoc after end of experiment. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "make_autoreport", + "output": "make autoreport refers to Make AutoDoc: Whether to create the experiment AutoDoc after end of experiment. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "make autoreport", + "output": "make autoreport refers to Make AutoDoc: Whether to create the experiment AutoDoc after end of experiment. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Make AutoDoc: ", + "output": "make autoreport refers to Make AutoDoc: Whether to create the experiment AutoDoc after end of experiment. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting make_autoreport", + "output": "make autoreport refers to Whether to create the experiment AutoDoc after end of experiment. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting make_autoreport", + "output": "make autoreport refers to Make AutoDoc: Whether to create the experiment AutoDoc after end of experiment. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max_cols_make_autoreport_automatically", + "output": "max cols make autoreport automatically refers to Number of columns beyond which will not automatically build autoreport at end of experiment.: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max_cols_make_autoreport_automatically", + "output": "max cols make autoreport automatically refers to Number of columns beyond which will not automatically build autoreport at end of experiment.: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max cols make autoreport automatically", + "output": "max cols make autoreport automatically refers to Number of columns beyond which will not automatically build autoreport at end of experiment.: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Number of columns beyond which will not automatically build autoreport at end of experiment.: ", + "output": "max cols make autoreport automatically refers to Number of columns beyond which will not automatically build autoreport at end of experiment.: " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting max_cols_make_autoreport_automatically", + "output": "max cols make autoreport automatically refers to Number of columns beyond which will not automatically build autoreport at end of experiment.: " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting max_cols_make_autoreport_automatically", + "output": "max cols make autoreport automatically refers to Number of columns beyond which will not automatically build autoreport at end of experiment.: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max_cols_make_pipeline_visualization_automatically", + "output": "max cols make pipeline visualization automatically refers to Number of columns beyond which will not automatically build pipeline visualization at end of experiment.: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max_cols_make_pipeline_visualization_automatically", + "output": "max cols make pipeline visualization automatically refers to Number of columns beyond which will not automatically build pipeline visualization at end of experiment.: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max cols make pipeline visualization automatically", + "output": "max cols make pipeline visualization automatically refers to Number of columns beyond which will not automatically build pipeline visualization at end of experiment.: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Number of columns beyond which will not automatically build pipeline visualization at end of experiment.: ", + "output": "max cols make pipeline visualization automatically refers to Number of columns beyond which will not automatically build pipeline visualization at end of experiment.: " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting max_cols_make_pipeline_visualization_automatically", + "output": "max cols make pipeline visualization automatically refers to Number of columns beyond which will not automatically build pipeline visualization at end of experiment.: " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting max_cols_make_pipeline_visualization_automatically", + "output": "max cols make pipeline visualization automatically refers to Number of columns beyond which will not automatically build pipeline visualization at end of experiment.: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "pass_env_to_deprecated_python_scoring", + "output": "pass env to deprecated python scoring refers to Pass environment variables from running Driverless AI instance to Python scoring pipeline for deprecated models, when they are used to make predictions. Use with caution. If config.toml overrides are set by env vars, and they differ from what the experiment's env looked like when it was trained, then unexpected consequences can occur. Enable this only to \" override certain well-controlled settings like the port for H2O-3 custom recipe server. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "pass_env_to_deprecated_python_scoring", + "output": "pass env to deprecated python scoring refers to Pass environment variables to deprecated python scoring package: Pass environment variables from running Driverless AI instance to Python scoring pipeline for deprecated models, when they are used to make predictions. Use with caution. If config.toml overrides are set by env vars, and they differ from what the experiment's env looked like when it was trained, then unexpected consequences can occur. Enable this only to \" override certain well-controlled settings like the port for H2O-3 custom recipe server. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "pass env to deprecated python scoring", + "output": "pass env to deprecated python scoring refers to Pass environment variables to deprecated python scoring package: Pass environment variables from running Driverless AI instance to Python scoring pipeline for deprecated models, when they are used to make predictions. Use with caution. If config.toml overrides are set by env vars, and they differ from what the experiment's env looked like when it was trained, then unexpected consequences can occur. Enable this only to \" override certain well-controlled settings like the port for H2O-3 custom recipe server. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Pass environment variables to deprecated python scoring package: ", + "output": "pass env to deprecated python scoring refers to Pass environment variables to deprecated python scoring package: Pass environment variables from running Driverless AI instance to Python scoring pipeline for deprecated models, when they are used to make predictions. Use with caution. If config.toml overrides are set by env vars, and they differ from what the experiment's env looked like when it was trained, then unexpected consequences can occur. Enable this only to \" override certain well-controlled settings like the port for H2O-3 custom recipe server. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting pass_env_to_deprecated_python_scoring", + "output": "pass env to deprecated python scoring refers to Pass environment variables from running Driverless AI instance to Python scoring pipeline for deprecated models, when they are used to make predictions. Use with caution. If config.toml overrides are set by env vars, and they differ from what the experiment's env looked like when it was trained, then unexpected consequences can occur. Enable this only to \" override certain well-controlled settings like the port for H2O-3 custom recipe server. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting pass_env_to_deprecated_python_scoring", + "output": "pass env to deprecated python scoring refers to Pass environment variables to deprecated python scoring package: Pass environment variables from running Driverless AI instance to Python scoring pipeline for deprecated models, when they are used to make predictions. Use with caution. If config.toml overrides are set by env vars, and they differ from what the experiment's env looked like when it was trained, then unexpected consequences can occur. Enable this only to \" override certain well-controlled settings like the port for H2O-3 custom recipe server. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "transformer_description_line_length", + "output": "transformer description line length refers to Line length for autoreport descriptions of transformers. -1 means use autodoc_keras_summary_line_length: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "transformer_description_line_length", + "output": "transformer description line length refers to Line length for autoreport descriptions of transformers. -1 means use autodoc_keras_summary_line_length: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "transformer description line length", + "output": "transformer description line length refers to Line length for autoreport descriptions of transformers. -1 means use autodoc_keras_summary_line_length: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Line length for autoreport descriptions of transformers. -1 means use autodoc_keras_summary_line_length: ", + "output": "transformer description line length refers to Line length for autoreport descriptions of transformers. -1 means use autodoc_keras_summary_line_length: " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting transformer_description_line_length", + "output": "transformer description line length refers to Line length for autoreport descriptions of transformers. -1 means use autodoc_keras_summary_line_length: " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting transformer_description_line_length", + "output": "transformer description line length refers to Line length for autoreport descriptions of transformers. -1 means use autodoc_keras_summary_line_length: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "benchmark_mojo_latency", + "output": "benchmark mojo latency refers to Whether to measure the MOJO scoring latency at the time of MOJO creation." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "benchmark_mojo_latency", + "output": "benchmark mojo latency refers to Measure MOJO scoring latency: Whether to measure the MOJO scoring latency at the time of MOJO creation." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "benchmark mojo latency", + "output": "benchmark mojo latency refers to Measure MOJO scoring latency: Whether to measure the MOJO scoring latency at the time of MOJO creation." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Measure MOJO scoring latency: ", + "output": "benchmark mojo latency refers to Measure MOJO scoring latency: Whether to measure the MOJO scoring latency at the time of MOJO creation." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting benchmark_mojo_latency", + "output": "benchmark mojo latency refers to Whether to measure the MOJO scoring latency at the time of MOJO creation." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting benchmark_mojo_latency", + "output": "benchmark mojo latency refers to Measure MOJO scoring latency: Whether to measure the MOJO scoring latency at the time of MOJO creation." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "benchmark_mojo_latency_auto_size_limit", + "output": "benchmark mojo latency auto size limit refers to Max size of pipeline.mojo file (in MB) for automatic mode of MOJO scoring latency measurement" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "benchmark_mojo_latency_auto_size_limit", + "output": "benchmark mojo latency auto size limit refers to Max size of pipeline.mojo file (in MB) for when benchmark_mojo_latency is set to 'auto': Max size of pipeline.mojo file (in MB) for automatic mode of MOJO scoring latency measurement" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "benchmark mojo latency auto size limit", + "output": "benchmark mojo latency auto size limit refers to Max size of pipeline.mojo file (in MB) for when benchmark_mojo_latency is set to 'auto': Max size of pipeline.mojo file (in MB) for automatic mode of MOJO scoring latency measurement" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Max size of pipeline.mojo file (in MB) for when benchmark_mojo_latency is set to 'auto': ", + "output": "benchmark mojo latency auto size limit refers to Max size of pipeline.mojo file (in MB) for when benchmark_mojo_latency is set to 'auto': Max size of pipeline.mojo file (in MB) for automatic mode of MOJO scoring latency measurement" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting benchmark_mojo_latency_auto_size_limit", + "output": "benchmark mojo latency auto size limit refers to Max size of pipeline.mojo file (in MB) for automatic mode of MOJO scoring latency measurement" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting benchmark_mojo_latency_auto_size_limit", + "output": "benchmark mojo latency auto size limit refers to Max size of pipeline.mojo file (in MB) for when benchmark_mojo_latency is set to 'auto': Max size of pipeline.mojo file (in MB) for automatic mode of MOJO scoring latency measurement" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mojo_building_timeout", + "output": "mojo building timeout refers to If MOJO creation times out at end of experiment, can still make MOJO from the GUI or from the R/Py clients (timeout doesn't apply there)." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mojo_building_timeout", + "output": "mojo building timeout refers to Timeout in seconds to wait for MOJO creation at end of experiment.: If MOJO creation times out at end of experiment, can still make MOJO from the GUI or from the R/Py clients (timeout doesn't apply there)." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mojo building timeout", + "output": "mojo building timeout refers to Timeout in seconds to wait for MOJO creation at end of experiment.: If MOJO creation times out at end of experiment, can still make MOJO from the GUI or from the R/Py clients (timeout doesn't apply there)." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Timeout in seconds to wait for MOJO creation at end of experiment.: ", + "output": "mojo building timeout refers to Timeout in seconds to wait for MOJO creation at end of experiment.: If MOJO creation times out at end of experiment, can still make MOJO from the GUI or from the R/Py clients (timeout doesn't apply there)." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting mojo_building_timeout", + "output": "mojo building timeout refers to If MOJO creation times out at end of experiment, can still make MOJO from the GUI or from the R/Py clients (timeout doesn't apply there)." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting mojo_building_timeout", + "output": "mojo building timeout refers to Timeout in seconds to wait for MOJO creation at end of experiment.: If MOJO creation times out at end of experiment, can still make MOJO from the GUI or from the R/Py clients (timeout doesn't apply there)." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mojo_vis_building_timeout", + "output": "mojo vis building timeout refers to If MOJO visualization creation times out at end of experiment, MOJO is still created if possible within the time limit specified by mojo_building_timeout." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mojo_vis_building_timeout", + "output": "mojo vis building timeout refers to Timeout in seconds to wait for MOJO visualization creation at end of experiment.: If MOJO visualization creation times out at end of experiment, MOJO is still created if possible within the time limit specified by mojo_building_timeout." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mojo vis building timeout", + "output": "mojo vis building timeout refers to Timeout in seconds to wait for MOJO visualization creation at end of experiment.: If MOJO visualization creation times out at end of experiment, MOJO is still created if possible within the time limit specified by mojo_building_timeout." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Timeout in seconds to wait for MOJO visualization creation at end of experiment.: ", + "output": "mojo vis building timeout refers to Timeout in seconds to wait for MOJO visualization creation at end of experiment.: If MOJO visualization creation times out at end of experiment, MOJO is still created if possible within the time limit specified by mojo_building_timeout." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting mojo_vis_building_timeout", + "output": "mojo vis building timeout refers to If MOJO visualization creation times out at end of experiment, MOJO is still created if possible within the time limit specified by mojo_building_timeout." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting mojo_vis_building_timeout", + "output": "mojo vis building timeout refers to Timeout in seconds to wait for MOJO visualization creation at end of experiment.: If MOJO visualization creation times out at end of experiment, MOJO is still created if possible within the time limit specified by mojo_building_timeout." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mojo_building_parallelism", + "output": "mojo building parallelism refers to If MOJO creation is too slow, increase this value. Higher values can finish faster, but use more memory.If MOJO creation fails due to an out-of-memory error, reduce this value to 1.Set to -1 for all physical cores. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mojo_building_parallelism", + "output": "mojo building parallelism refers to Number of parallel workers to use during MOJO creation (-1 = all cores): If MOJO creation is too slow, increase this value. Higher values can finish faster, but use more memory.If MOJO creation fails due to an out-of-memory error, reduce this value to 1.Set to -1 for all physical cores. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mojo building parallelism", + "output": "mojo building parallelism refers to Number of parallel workers to use during MOJO creation (-1 = all cores): If MOJO creation is too slow, increase this value. Higher values can finish faster, but use more memory.If MOJO creation fails due to an out-of-memory error, reduce this value to 1.Set to -1 for all physical cores. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Number of parallel workers to use during MOJO creation (-1 = all cores): ", + "output": "mojo building parallelism refers to Number of parallel workers to use during MOJO creation (-1 = all cores): If MOJO creation is too slow, increase this value. Higher values can finish faster, but use more memory.If MOJO creation fails due to an out-of-memory error, reduce this value to 1.Set to -1 for all physical cores. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting mojo_building_parallelism", + "output": "mojo building parallelism refers to If MOJO creation is too slow, increase this value. Higher values can finish faster, but use more memory.If MOJO creation fails due to an out-of-memory error, reduce this value to 1.Set to -1 for all physical cores. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting mojo_building_parallelism", + "output": "mojo building parallelism refers to Number of parallel workers to use during MOJO creation (-1 = all cores): If MOJO creation is too slow, increase this value. Higher values can finish faster, but use more memory.If MOJO creation fails due to an out-of-memory error, reduce this value to 1.Set to -1 for all physical cores. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mojo_building_parallelism_base_model_size_limit", + "output": "mojo building parallelism base model size limit refers to Size in bytes that all pickled and compressed base models have to satisfy to use parallel MOJO building. For large base models, parallel MOJO building can use too much memory. Only used if final_fitted_model_per_model_fold_files is true. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mojo_building_parallelism_base_model_size_limit", + "output": "mojo building parallelism base model size limit refers to Size of base models to allow mojo_building_parallelism: Size in bytes that all pickled and compressed base models have to satisfy to use parallel MOJO building. For large base models, parallel MOJO building can use too much memory. Only used if final_fitted_model_per_model_fold_files is true. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mojo building parallelism base model size limit", + "output": "mojo building parallelism base model size limit refers to Size of base models to allow mojo_building_parallelism: Size in bytes that all pickled and compressed base models have to satisfy to use parallel MOJO building. For large base models, parallel MOJO building can use too much memory. Only used if final_fitted_model_per_model_fold_files is true. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Size of base models to allow mojo_building_parallelism: ", + "output": "mojo building parallelism base model size limit refers to Size of base models to allow mojo_building_parallelism: Size in bytes that all pickled and compressed base models have to satisfy to use parallel MOJO building. For large base models, parallel MOJO building can use too much memory. Only used if final_fitted_model_per_model_fold_files is true. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting mojo_building_parallelism_base_model_size_limit", + "output": "mojo building parallelism base model size limit refers to Size in bytes that all pickled and compressed base models have to satisfy to use parallel MOJO building. For large base models, parallel MOJO building can use too much memory. Only used if final_fitted_model_per_model_fold_files is true. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting mojo_building_parallelism_base_model_size_limit", + "output": "mojo building parallelism base model size limit refers to Size of base models to allow mojo_building_parallelism: Size in bytes that all pickled and compressed base models have to satisfy to use parallel MOJO building. For large base models, parallel MOJO building can use too much memory. Only used if final_fitted_model_per_model_fold_files is true. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "show_pipeline_sizes", + "output": "show pipeline sizes refers to Whether to show model and pipeline sizes in logs. If 'auto', then not done if more than 10 base models+folds, because expect not concerned with size." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "show_pipeline_sizes", + "output": "show pipeline sizes refers to Whether to show model and pipeline sizes in logs: Whether to show model and pipeline sizes in logs. If 'auto', then not done if more than 10 base models+folds, because expect not concerned with size." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "show pipeline sizes", + "output": "show pipeline sizes refers to Whether to show model and pipeline sizes in logs: Whether to show model and pipeline sizes in logs. If 'auto', then not done if more than 10 base models+folds, because expect not concerned with size." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Whether to show model and pipeline sizes in logs: ", + "output": "show pipeline sizes refers to Whether to show model and pipeline sizes in logs: Whether to show model and pipeline sizes in logs. If 'auto', then not done if more than 10 base models+folds, because expect not concerned with size." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting show_pipeline_sizes", + "output": "show pipeline sizes refers to Whether to show model and pipeline sizes in logs. If 'auto', then not done if more than 10 base models+folds, because expect not concerned with size." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting show_pipeline_sizes", + "output": "show pipeline sizes refers to Whether to show model and pipeline sizes in logs: Whether to show model and pipeline sizes in logs. If 'auto', then not done if more than 10 base models+folds, because expect not concerned with size." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "exclusive_mode", + "output": "exclusive mode refers to safe: assume might be running another experiment on same nodemoderate: assume not running any other experiments or tasks on same node, but still only use physical core countmax: assume not running anything else on node at all except the experimentIf multinode is enabled, this option has no effect, unless worker_remote_processors=1 when it will still be applied.Each exclusive mode can be chosen, and then fine-tuned using each expert settings. Changing theexclusive mode will reset all exclusive mode related options back to default and then re-apply thespecific rules for the new mode, which will undo any fine-tuning of expert options that are part of exclusive mode rules.If choose to do new/continued/refitted/retrained experiment from parent experiment, all the mode rules are not re-appliedand any fine-tuning is preserved. To reset mode behavior, one can switch between 'safe' and the desired mode. Thisway the new child experiment will use the default system resources for the chosen mode. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "exclusive_mode", + "output": "exclusive mode refers to Exclusive level of access to node resources: safe: assume might be running another experiment on same nodemoderate: assume not running any other experiments or tasks on same node, but still only use physical core countmax: assume not running anything else on node at all except the experimentIf multinode is enabled, this option has no effect, unless worker_remote_processors=1 when it will still be applied.Each exclusive mode can be chosen, and then fine-tuned using each expert settings. Changing theexclusive mode will reset all exclusive mode related options back to default and then re-apply thespecific rules for the new mode, which will undo any fine-tuning of expert options that are part of exclusive mode rules.If choose to do new/continued/refitted/retrained experiment from parent experiment, all the mode rules are not re-appliedand any fine-tuning is preserved. To reset mode behavior, one can switch between 'safe' and the desired mode. Thisway the new child experiment will use the default system resources for the chosen mode. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "exclusive mode", + "output": "exclusive mode refers to Exclusive level of access to node resources: safe: assume might be running another experiment on same nodemoderate: assume not running any other experiments or tasks on same node, but still only use physical core countmax: assume not running anything else on node at all except the experimentIf multinode is enabled, this option has no effect, unless worker_remote_processors=1 when it will still be applied.Each exclusive mode can be chosen, and then fine-tuned using each expert settings. Changing theexclusive mode will reset all exclusive mode related options back to default and then re-apply thespecific rules for the new mode, which will undo any fine-tuning of expert options that are part of exclusive mode rules.If choose to do new/continued/refitted/retrained experiment from parent experiment, all the mode rules are not re-appliedand any fine-tuning is preserved. To reset mode behavior, one can switch between 'safe' and the desired mode. Thisway the new child experiment will use the default system resources for the chosen mode. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Exclusive level of access to node resources: ", + "output": "exclusive mode refers to Exclusive level of access to node resources: safe: assume might be running another experiment on same nodemoderate: assume not running any other experiments or tasks on same node, but still only use physical core countmax: assume not running anything else on node at all except the experimentIf multinode is enabled, this option has no effect, unless worker_remote_processors=1 when it will still be applied.Each exclusive mode can be chosen, and then fine-tuned using each expert settings. Changing theexclusive mode will reset all exclusive mode related options back to default and then re-apply thespecific rules for the new mode, which will undo any fine-tuning of expert options that are part of exclusive mode rules.If choose to do new/continued/refitted/retrained experiment from parent experiment, all the mode rules are not re-appliedand any fine-tuning is preserved. To reset mode behavior, one can switch between 'safe' and the desired mode. Thisway the new child experiment will use the default system resources for the chosen mode. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting exclusive_mode", + "output": "exclusive mode refers to safe: assume might be running another experiment on same nodemoderate: assume not running any other experiments or tasks on same node, but still only use physical core countmax: assume not running anything else on node at all except the experimentIf multinode is enabled, this option has no effect, unless worker_remote_processors=1 when it will still be applied.Each exclusive mode can be chosen, and then fine-tuned using each expert settings. Changing theexclusive mode will reset all exclusive mode related options back to default and then re-apply thespecific rules for the new mode, which will undo any fine-tuning of expert options that are part of exclusive mode rules.If choose to do new/continued/refitted/retrained experiment from parent experiment, all the mode rules are not re-appliedand any fine-tuning is preserved. To reset mode behavior, one can switch between 'safe' and the desired mode. Thisway the new child experiment will use the default system resources for the chosen mode. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting exclusive_mode", + "output": "exclusive mode refers to Exclusive level of access to node resources: safe: assume might be running another experiment on same nodemoderate: assume not running any other experiments or tasks on same node, but still only use physical core countmax: assume not running anything else on node at all except the experimentIf multinode is enabled, this option has no effect, unless worker_remote_processors=1 when it will still be applied.Each exclusive mode can be chosen, and then fine-tuned using each expert settings. Changing theexclusive mode will reset all exclusive mode related options back to default and then re-apply thespecific rules for the new mode, which will undo any fine-tuning of expert options that are part of exclusive mode rules.If choose to do new/continued/refitted/retrained experiment from parent experiment, all the mode rules are not re-appliedand any fine-tuning is preserved. To reset mode behavior, one can switch between 'safe' and the desired mode. Thisway the new child experiment will use the default system resources for the chosen mode. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max_workers", + "output": "max workers refers to Maximum number of workers for Driverless AI server pool (only 1 needed currently)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max_workers", + "output": "max workers refers to Maximum number of workers for Driverless AI server pool (only 1 needed currently)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max workers", + "output": "max workers refers to Maximum number of workers for Driverless AI server pool (only 1 needed currently)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "max workers refers to Maximum number of workers for Driverless AI server pool (only 1 needed currently)" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting max_workers", + "output": "max workers refers to Maximum number of workers for Driverless AI server pool (only 1 needed currently)" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting max_workers", + "output": "max workers refers to Maximum number of workers for Driverless AI server pool (only 1 needed currently)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max_cores", + "output": "max cores refers to Max number of CPU cores to use per experiment. Set to <= 0 to use all (physical) cores.One can also set environment variable 'OMP_NUM_THREADS' to number of cores to use for OpenMP(e.g., in bash: 'export OMP_NUM_THREADS=32' and 'export OPENBLAS_NUM_THREADS=32'). " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max_cores", + "output": "max cores refers to Number of cores to use (0 = all): Max number of CPU cores to use per experiment. Set to <= 0 to use all (physical) cores.One can also set environment variable 'OMP_NUM_THREADS' to number of cores to use for OpenMP(e.g., in bash: 'export OMP_NUM_THREADS=32' and 'export OPENBLAS_NUM_THREADS=32'). " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max cores", + "output": "max cores refers to Number of cores to use (0 = all): Max number of CPU cores to use per experiment. Set to <= 0 to use all (physical) cores.One can also set environment variable 'OMP_NUM_THREADS' to number of cores to use for OpenMP(e.g., in bash: 'export OMP_NUM_THREADS=32' and 'export OPENBLAS_NUM_THREADS=32'). " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Number of cores to use (0 = all): ", + "output": "max cores refers to Number of cores to use (0 = all): Max number of CPU cores to use per experiment. Set to <= 0 to use all (physical) cores.One can also set environment variable 'OMP_NUM_THREADS' to number of cores to use for OpenMP(e.g., in bash: 'export OMP_NUM_THREADS=32' and 'export OPENBLAS_NUM_THREADS=32'). " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting max_cores", + "output": "max cores refers to Max number of CPU cores to use per experiment. Set to <= 0 to use all (physical) cores.One can also set environment variable 'OMP_NUM_THREADS' to number of cores to use for OpenMP(e.g., in bash: 'export OMP_NUM_THREADS=32' and 'export OPENBLAS_NUM_THREADS=32'). " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting max_cores", + "output": "max cores refers to Number of cores to use (0 = all): Max number of CPU cores to use per experiment. Set to <= 0 to use all (physical) cores.One can also set environment variable 'OMP_NUM_THREADS' to number of cores to use for OpenMP(e.g., in bash: 'export OMP_NUM_THREADS=32' and 'export OPENBLAS_NUM_THREADS=32'). " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max_cores_dai", + "output": "max cores dai refers to Max number of CPU cores to use across all of DAI experiments and tasks.-1 is all available, with stall_subprocess_submission_dai_fork_threshold_count=0 means restricted to core count. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max_cores_dai", + "output": "max cores dai refers to Max number of CPU cores to use across all of DAI experiments and tasks.-1 is all available, with stall_subprocess_submission_dai_fork_threshold_count=0 means restricted to core count. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max cores dai", + "output": "max cores dai refers to Max number of CPU cores to use across all of DAI experiments and tasks.-1 is all available, with stall_subprocess_submission_dai_fork_threshold_count=0 means restricted to core count. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "max cores dai refers to Max number of CPU cores to use across all of DAI experiments and tasks.-1 is all available, with stall_subprocess_submission_dai_fork_threshold_count=0 means restricted to core count. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting max_cores_dai", + "output": "max cores dai refers to Max number of CPU cores to use across all of DAI experiments and tasks.-1 is all available, with stall_subprocess_submission_dai_fork_threshold_count=0 means restricted to core count. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting max_cores_dai", + "output": "max cores dai refers to Max number of CPU cores to use across all of DAI experiments and tasks.-1 is all available, with stall_subprocess_submission_dai_fork_threshold_count=0 means restricted to core count. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "virtual_cores_per_physical_core", + "output": "virtual cores per physical core refers to Number of virtual cores per physical core (0: auto mode, >=1 use that integer value). If >=1, the reported physical cores in logs will match the virtual cores divided by this value." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "virtual_cores_per_physical_core", + "output": "virtual cores per physical core refers to Number of virtual cores per physical core (0: auto mode, >=1 use that integer value). If >=1, the reported physical cores in logs will match the virtual cores divided by this value." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "virtual cores per physical core", + "output": "virtual cores per physical core refers to Number of virtual cores per physical core (0: auto mode, >=1 use that integer value). If >=1, the reported physical cores in logs will match the virtual cores divided by this value." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "virtual cores per physical core refers to Number of virtual cores per physical core (0: auto mode, >=1 use that integer value). If >=1, the reported physical cores in logs will match the virtual cores divided by this value." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting virtual_cores_per_physical_core", + "output": "virtual cores per physical core refers to Number of virtual cores per physical core (0: auto mode, >=1 use that integer value). If >=1, the reported physical cores in logs will match the virtual cores divided by this value." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting virtual_cores_per_physical_core", + "output": "virtual cores per physical core refers to Number of virtual cores per physical core (0: auto mode, >=1 use that integer value). If >=1, the reported physical cores in logs will match the virtual cores divided by this value." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "min_virtual_cores_per_physical_core_if_unequal", + "output": "min virtual cores per physical core if unequal refers to Mininum number of virtual cores per physical core. Only applies if virtual cores != physical cores. Can help situations like Intel i9 13900 with 24 physical cores and only 32 virtual cores. So better to limit physical cores to 16." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "min_virtual_cores_per_physical_core_if_unequal", + "output": "min virtual cores per physical core if unequal refers to Mininum number of virtual cores per physical core. Only applies if virtual cores != physical cores. Can help situations like Intel i9 13900 with 24 physical cores and only 32 virtual cores. So better to limit physical cores to 16." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "min virtual cores per physical core if unequal", + "output": "min virtual cores per physical core if unequal refers to Mininum number of virtual cores per physical core. Only applies if virtual cores != physical cores. Can help situations like Intel i9 13900 with 24 physical cores and only 32 virtual cores. So better to limit physical cores to 16." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "min virtual cores per physical core if unequal refers to Mininum number of virtual cores per physical core. Only applies if virtual cores != physical cores. Can help situations like Intel i9 13900 with 24 physical cores and only 32 virtual cores. So better to limit physical cores to 16." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting min_virtual_cores_per_physical_core_if_unequal", + "output": "min virtual cores per physical core if unequal refers to Mininum number of virtual cores per physical core. Only applies if virtual cores != physical cores. Can help situations like Intel i9 13900 with 24 physical cores and only 32 virtual cores. So better to limit physical cores to 16." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting min_virtual_cores_per_physical_core_if_unequal", + "output": "min virtual cores per physical core if unequal refers to Mininum number of virtual cores per physical core. Only applies if virtual cores != physical cores. Can help situations like Intel i9 13900 with 24 physical cores and only 32 virtual cores. So better to limit physical cores to 16." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "override_physical_cores", + "output": "override physical cores refers to Number of physical cores to assume are present (0: auto, >=1 use that integer value). If for some reason DAI does not automatically figure out physical cores correctly, one can override with this value. Some systems, especially virtualized, do not always provide correct information about the virtual cores, physical cores, sockets, etc." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "override_physical_cores", + "output": "override physical cores refers to Number of physical cores to assume are present (0: auto, >=1 use that integer value). If for some reason DAI does not automatically figure out physical cores correctly, one can override with this value. Some systems, especially virtualized, do not always provide correct information about the virtual cores, physical cores, sockets, etc." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "override physical cores", + "output": "override physical cores refers to Number of physical cores to assume are present (0: auto, >=1 use that integer value). If for some reason DAI does not automatically figure out physical cores correctly, one can override with this value. Some systems, especially virtualized, do not always provide correct information about the virtual cores, physical cores, sockets, etc." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "override physical cores refers to Number of physical cores to assume are present (0: auto, >=1 use that integer value). If for some reason DAI does not automatically figure out physical cores correctly, one can override with this value. Some systems, especially virtualized, do not always provide correct information about the virtual cores, physical cores, sockets, etc." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting override_physical_cores", + "output": "override physical cores refers to Number of physical cores to assume are present (0: auto, >=1 use that integer value). If for some reason DAI does not automatically figure out physical cores correctly, one can override with this value. Some systems, especially virtualized, do not always provide correct information about the virtual cores, physical cores, sockets, etc." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting override_physical_cores", + "output": "override physical cores refers to Number of physical cores to assume are present (0: auto, >=1 use that integer value). If for some reason DAI does not automatically figure out physical cores correctly, one can override with this value. Some systems, especially virtualized, do not always provide correct information about the virtual cores, physical cores, sockets, etc." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "override_virtual_cores", + "output": "override virtual cores refers to Number of virtual cores to assume are present (0: auto, >=1 use that integer value). If for some reason DAI does not automatically figure out virtual cores correctly, or only a portion of the system is to be used, one can override with this value. Some systems, especially virtualized, do not always provide correct information about the virtual cores, physical cores, sockets, etc." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "override_virtual_cores", + "output": "override virtual cores refers to Number of virtual cores to assume are present (0: auto, >=1 use that integer value). If for some reason DAI does not automatically figure out virtual cores correctly, or only a portion of the system is to be used, one can override with this value. Some systems, especially virtualized, do not always provide correct information about the virtual cores, physical cores, sockets, etc." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "override virtual cores", + "output": "override virtual cores refers to Number of virtual cores to assume are present (0: auto, >=1 use that integer value). If for some reason DAI does not automatically figure out virtual cores correctly, or only a portion of the system is to be used, one can override with this value. Some systems, especially virtualized, do not always provide correct information about the virtual cores, physical cores, sockets, etc." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "override virtual cores refers to Number of virtual cores to assume are present (0: auto, >=1 use that integer value). If for some reason DAI does not automatically figure out virtual cores correctly, or only a portion of the system is to be used, one can override with this value. Some systems, especially virtualized, do not always provide correct information about the virtual cores, physical cores, sockets, etc." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting override_virtual_cores", + "output": "override virtual cores refers to Number of virtual cores to assume are present (0: auto, >=1 use that integer value). If for some reason DAI does not automatically figure out virtual cores correctly, or only a portion of the system is to be used, one can override with this value. Some systems, especially virtualized, do not always provide correct information about the virtual cores, physical cores, sockets, etc." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting override_virtual_cores", + "output": "override virtual cores refers to Number of virtual cores to assume are present (0: auto, >=1 use that integer value). If for some reason DAI does not automatically figure out virtual cores correctly, or only a portion of the system is to be used, one can override with this value. Some systems, especially virtualized, do not always provide correct information about the virtual cores, physical cores, sockets, etc." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "small_data_recipe_work", + "output": "small data recipe work refers to Whether to treat data as small recipe in terms of work, by spreading many small tasks across many cores instead of forcing GPUs, for models that support it via static var _use_single_core_if_many. 'auto' looks at _use_single_core_if_many for models and data size, 'on' forces, 'off' disables." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "small_data_recipe_work", + "output": "small data recipe work refers to Small data work: Whether to treat data as small recipe in terms of work, by spreading many small tasks across many cores instead of forcing GPUs, for models that support it via static var _use_single_core_if_many. 'auto' looks at _use_single_core_if_many for models and data size, 'on' forces, 'off' disables." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "small data recipe work", + "output": "small data recipe work refers to Small data work: Whether to treat data as small recipe in terms of work, by spreading many small tasks across many cores instead of forcing GPUs, for models that support it via static var _use_single_core_if_many. 'auto' looks at _use_single_core_if_many for models and data size, 'on' forces, 'off' disables." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Small data work: ", + "output": "small data recipe work refers to Small data work: Whether to treat data as small recipe in terms of work, by spreading many small tasks across many cores instead of forcing GPUs, for models that support it via static var _use_single_core_if_many. 'auto' looks at _use_single_core_if_many for models and data size, 'on' forces, 'off' disables." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting small_data_recipe_work", + "output": "small data recipe work refers to Whether to treat data as small recipe in terms of work, by spreading many small tasks across many cores instead of forcing GPUs, for models that support it via static var _use_single_core_if_many. 'auto' looks at _use_single_core_if_many for models and data size, 'on' forces, 'off' disables." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting small_data_recipe_work", + "output": "small data recipe work refers to Small data work: Whether to treat data as small recipe in terms of work, by spreading many small tasks across many cores instead of forcing GPUs, for models that support it via static var _use_single_core_if_many. 'auto' looks at _use_single_core_if_many for models and data size, 'on' forces, 'off' disables." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "stall_subprocess_submission_dai_fork_threshold_count", + "output": "stall subprocess submission dai fork threshold count refers to Stall submission of tasks if total DAI fork count exceeds count (-1 to disable, 0 for automatic of max_cores_dai)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "stall_subprocess_submission_dai_fork_threshold_count", + "output": "stall subprocess submission dai fork threshold count refers to Stall submission of tasks if total DAI fork count exceeds count (-1 to disable, 0 for automatic of max_cores_dai)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "stall subprocess submission dai fork threshold count", + "output": "stall subprocess submission dai fork threshold count refers to Stall submission of tasks if total DAI fork count exceeds count (-1 to disable, 0 for automatic of max_cores_dai)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "stall subprocess submission dai fork threshold count refers to Stall submission of tasks if total DAI fork count exceeds count (-1 to disable, 0 for automatic of max_cores_dai)" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting stall_subprocess_submission_dai_fork_threshold_count", + "output": "stall subprocess submission dai fork threshold count refers to Stall submission of tasks if total DAI fork count exceeds count (-1 to disable, 0 for automatic of max_cores_dai)" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting stall_subprocess_submission_dai_fork_threshold_count", + "output": "stall subprocess submission dai fork threshold count refers to Stall submission of tasks if total DAI fork count exceeds count (-1 to disable, 0 for automatic of max_cores_dai)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "stall_subprocess_submission_mem_threshold_pct", + "output": "stall subprocess submission mem threshold pct refers to Stall submission of tasks if system memory available is less than this threshold in percent (set to 0 to disable).Above this threshold, the number of workers in any pool of workers is linearly reduced down to 1 once hitting this threshold. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "stall_subprocess_submission_mem_threshold_pct", + "output": "stall subprocess submission mem threshold pct refers to Stall submission of tasks if system memory available is less than this threshold in percent (set to 0 to disable).Above this threshold, the number of workers in any pool of workers is linearly reduced down to 1 once hitting this threshold. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "stall subprocess submission mem threshold pct", + "output": "stall subprocess submission mem threshold pct refers to Stall submission of tasks if system memory available is less than this threshold in percent (set to 0 to disable).Above this threshold, the number of workers in any pool of workers is linearly reduced down to 1 once hitting this threshold. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "stall subprocess submission mem threshold pct refers to Stall submission of tasks if system memory available is less than this threshold in percent (set to 0 to disable).Above this threshold, the number of workers in any pool of workers is linearly reduced down to 1 once hitting this threshold. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting stall_subprocess_submission_mem_threshold_pct", + "output": "stall subprocess submission mem threshold pct refers to Stall submission of tasks if system memory available is less than this threshold in percent (set to 0 to disable).Above this threshold, the number of workers in any pool of workers is linearly reduced down to 1 once hitting this threshold. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting stall_subprocess_submission_mem_threshold_pct", + "output": "stall subprocess submission mem threshold pct refers to Stall submission of tasks if system memory available is less than this threshold in percent (set to 0 to disable).Above this threshold, the number of workers in any pool of workers is linearly reduced down to 1 once hitting this threshold. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max_cores_by_physical", + "output": "max cores by physical refers to Whether to set automatic number of cores by physical (True) or logical (False) count.Using all logical cores can lead to poor performance due to cache thrashing. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max_cores_by_physical", + "output": "max cores by physical refers to Whether to set automatic number of cores by physical (True) or logical (False) count.Using all logical cores can lead to poor performance due to cache thrashing. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max cores by physical", + "output": "max cores by physical refers to Whether to set automatic number of cores by physical (True) or logical (False) count.Using all logical cores can lead to poor performance due to cache thrashing. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "max cores by physical refers to Whether to set automatic number of cores by physical (True) or logical (False) count.Using all logical cores can lead to poor performance due to cache thrashing. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting max_cores_by_physical", + "output": "max cores by physical refers to Whether to set automatic number of cores by physical (True) or logical (False) count.Using all logical cores can lead to poor performance due to cache thrashing. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting max_cores_by_physical", + "output": "max cores by physical refers to Whether to set automatic number of cores by physical (True) or logical (False) count.Using all logical cores can lead to poor performance due to cache thrashing. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max_cores_limit", + "output": "max cores limit refers to Absolute limit to core count" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max_cores_limit", + "output": "max cores limit refers to Absolute limit to core count" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max cores limit", + "output": "max cores limit refers to Absolute limit to core count" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "max cores limit refers to Absolute limit to core count" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting max_cores_limit", + "output": "max cores limit refers to Absolute limit to core count" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting max_cores_limit", + "output": "max cores limit refers to Absolute limit to core count" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max_fit_cores", + "output": "max fit cores refers to Control maximum number of cores to use for a model's fit call (0 = all physical cores >= 1 that count). See also tensorflow_model_max_cores to further limit TensorFlow main models." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max_fit_cores", + "output": "max fit cores refers to Maximum number of cores to use for model fit: Control maximum number of cores to use for a model's fit call (0 = all physical cores >= 1 that count). See also tensorflow_model_max_cores to further limit TensorFlow main models." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max fit cores", + "output": "max fit cores refers to Maximum number of cores to use for model fit: Control maximum number of cores to use for a model's fit call (0 = all physical cores >= 1 that count). See also tensorflow_model_max_cores to further limit TensorFlow main models." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Maximum number of cores to use for model fit: ", + "output": "max fit cores refers to Maximum number of cores to use for model fit: Control maximum number of cores to use for a model's fit call (0 = all physical cores >= 1 that count). See also tensorflow_model_max_cores to further limit TensorFlow main models." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting max_fit_cores", + "output": "max fit cores refers to Control maximum number of cores to use for a model's fit call (0 = all physical cores >= 1 that count). See also tensorflow_model_max_cores to further limit TensorFlow main models." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting max_fit_cores", + "output": "max fit cores refers to Maximum number of cores to use for model fit: Control maximum number of cores to use for a model's fit call (0 = all physical cores >= 1 that count). See also tensorflow_model_max_cores to further limit TensorFlow main models." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "parallel_score_max_workers", + "output": "parallel score max workers refers to Control maximum number of cores to use for a scoring across all chosen scorers (0 = auto)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "parallel_score_max_workers", + "output": "parallel score max workers refers to Maximum number of cores to use for model parallel scoring: Control maximum number of cores to use for a scoring across all chosen scorers (0 = auto)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "parallel score max workers", + "output": "parallel score max workers refers to Maximum number of cores to use for model parallel scoring: Control maximum number of cores to use for a scoring across all chosen scorers (0 = auto)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Maximum number of cores to use for model parallel scoring: ", + "output": "parallel score max workers refers to Maximum number of cores to use for model parallel scoring: Control maximum number of cores to use for a scoring across all chosen scorers (0 = auto)" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting parallel_score_max_workers", + "output": "parallel score max workers refers to Control maximum number of cores to use for a scoring across all chosen scorers (0 = auto)" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting parallel_score_max_workers", + "output": "parallel score max workers refers to Maximum number of cores to use for model parallel scoring: Control maximum number of cores to use for a scoring across all chosen scorers (0 = auto)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "use_dask_cluster", + "output": "use dask cluster refers to Whether to use full multinode distributed cluster (True) or single-node dask (False).In some cases, using entire cluster can be inefficient. E.g. several DGX nodes can be more efficientif used one DGX at a time for medium-sized data. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "use_dask_cluster", + "output": "use dask cluster refers to If full dask cluster is enabled, use full cluster: Whether to use full multinode distributed cluster (True) or single-node dask (False).In some cases, using entire cluster can be inefficient. E.g. several DGX nodes can be more efficientif used one DGX at a time for medium-sized data. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "use dask cluster", + "output": "use dask cluster refers to If full dask cluster is enabled, use full cluster: Whether to use full multinode distributed cluster (True) or single-node dask (False).In some cases, using entire cluster can be inefficient. E.g. several DGX nodes can be more efficientif used one DGX at a time for medium-sized data. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "If full dask cluster is enabled, use full cluster: ", + "output": "use dask cluster refers to If full dask cluster is enabled, use full cluster: Whether to use full multinode distributed cluster (True) or single-node dask (False).In some cases, using entire cluster can be inefficient. E.g. several DGX nodes can be more efficientif used one DGX at a time for medium-sized data. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting use_dask_cluster", + "output": "use dask cluster refers to Whether to use full multinode distributed cluster (True) or single-node dask (False).In some cases, using entire cluster can be inefficient. E.g. several DGX nodes can be more efficientif used one DGX at a time for medium-sized data. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting use_dask_cluster", + "output": "use dask cluster refers to If full dask cluster is enabled, use full cluster: Whether to use full multinode distributed cluster (True) or single-node dask (False).In some cases, using entire cluster can be inefficient. E.g. several DGX nodes can be more efficientif used one DGX at a time for medium-sized data. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max_predict_cores", + "output": "max predict cores refers to Control maximum number of cores to use for a model's predict call (0 = all physical cores >= 1 that count)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max_predict_cores", + "output": "max predict cores refers to Maximum number of cores to use for model predict: Control maximum number of cores to use for a model's predict call (0 = all physical cores >= 1 that count)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max predict cores", + "output": "max predict cores refers to Maximum number of cores to use for model predict: Control maximum number of cores to use for a model's predict call (0 = all physical cores >= 1 that count)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Maximum number of cores to use for model predict: ", + "output": "max predict cores refers to Maximum number of cores to use for model predict: Control maximum number of cores to use for a model's predict call (0 = all physical cores >= 1 that count)" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting max_predict_cores", + "output": "max predict cores refers to Control maximum number of cores to use for a model's predict call (0 = all physical cores >= 1 that count)" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting max_predict_cores", + "output": "max predict cores refers to Maximum number of cores to use for model predict: Control maximum number of cores to use for a model's predict call (0 = all physical cores >= 1 that count)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max_predict_cores_in_dai_reduce_factor", + "output": "max predict cores in dai reduce factor refers to Factor by which to reduce physical cores, to use for post-model experiment tasks like autoreport, MLI, etc." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max_predict_cores_in_dai_reduce_factor", + "output": "max predict cores in dai reduce factor refers to Factor by which to reduce physical cores, to use for post-model experiment tasks like autoreport, MLI, etc." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max predict cores in dai reduce factor", + "output": "max predict cores in dai reduce factor refers to Factor by which to reduce physical cores, to use for post-model experiment tasks like autoreport, MLI, etc." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "max predict cores in dai reduce factor refers to Factor by which to reduce physical cores, to use for post-model experiment tasks like autoreport, MLI, etc." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting max_predict_cores_in_dai_reduce_factor", + "output": "max predict cores in dai reduce factor refers to Factor by which to reduce physical cores, to use for post-model experiment tasks like autoreport, MLI, etc." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting max_predict_cores_in_dai_reduce_factor", + "output": "max predict cores in dai reduce factor refers to Factor by which to reduce physical cores, to use for post-model experiment tasks like autoreport, MLI, etc." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max_max_predict_cores_in_dai", + "output": "max max predict cores in dai refers to Maximum number of cores to use for post-model experiment tasks like autoreport, MLI, etc." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max_max_predict_cores_in_dai", + "output": "max max predict cores in dai refers to Maximum number of cores to use for post-model experiment tasks like autoreport, MLI, etc." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max max predict cores in dai", + "output": "max max predict cores in dai refers to Maximum number of cores to use for post-model experiment tasks like autoreport, MLI, etc." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "max max predict cores in dai refers to Maximum number of cores to use for post-model experiment tasks like autoreport, MLI, etc." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting max_max_predict_cores_in_dai", + "output": "max max predict cores in dai refers to Maximum number of cores to use for post-model experiment tasks like autoreport, MLI, etc." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting max_max_predict_cores_in_dai", + "output": "max max predict cores in dai refers to Maximum number of cores to use for post-model experiment tasks like autoreport, MLI, etc." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max_predict_cores_in_dai", + "output": "max predict cores in dai refers to Control maximum number of cores to use for a model's transform and predict call when doing operations inside DAI-MLI GUI and R/Py client. The main experiment and other tasks like MLI and autoreport have separate queues. The main experiments have run at most worker_remote_processors tasks (limited by cores if auto mode), while other tasks run at most worker_local_processors (limited by cores if auto mode) tasks at the same time, so many small tasks can add up. To prevent overloading the system, the defaults are conservative. However, if most of the activity involves autoreport or MLI, and no model experiments are running, it may be safe to increase this value to something larger than 4. -1 : Auto mode. Up to physical cores divided by 4, up to maximum of 10. 0 : all physical cores >= 1: that count). " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max_predict_cores_in_dai", + "output": "max predict cores in dai refers to Maximum number of cores to use for model transform and predict when doing MLI and AutoDoc.: Control maximum number of cores to use for a model's transform and predict call when doing operations inside DAI-MLI GUI and R/Py client. The main experiment and other tasks like MLI and autoreport have separate queues. The main experiments have run at most worker_remote_processors tasks (limited by cores if auto mode), while other tasks run at most worker_local_processors (limited by cores if auto mode) tasks at the same time, so many small tasks can add up. To prevent overloading the system, the defaults are conservative. However, if most of the activity involves autoreport or MLI, and no model experiments are running, it may be safe to increase this value to something larger than 4. -1 : Auto mode. Up to physical cores divided by 4, up to maximum of 10. 0 : all physical cores >= 1: that count). " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max predict cores in dai", + "output": "max predict cores in dai refers to Maximum number of cores to use for model transform and predict when doing MLI and AutoDoc.: Control maximum number of cores to use for a model's transform and predict call when doing operations inside DAI-MLI GUI and R/Py client. The main experiment and other tasks like MLI and autoreport have separate queues. The main experiments have run at most worker_remote_processors tasks (limited by cores if auto mode), while other tasks run at most worker_local_processors (limited by cores if auto mode) tasks at the same time, so many small tasks can add up. To prevent overloading the system, the defaults are conservative. However, if most of the activity involves autoreport or MLI, and no model experiments are running, it may be safe to increase this value to something larger than 4. -1 : Auto mode. Up to physical cores divided by 4, up to maximum of 10. 0 : all physical cores >= 1: that count). " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Maximum number of cores to use for model transform and predict when doing MLI and AutoDoc.: ", + "output": "max predict cores in dai refers to Maximum number of cores to use for model transform and predict when doing MLI and AutoDoc.: Control maximum number of cores to use for a model's transform and predict call when doing operations inside DAI-MLI GUI and R/Py client. The main experiment and other tasks like MLI and autoreport have separate queues. The main experiments have run at most worker_remote_processors tasks (limited by cores if auto mode), while other tasks run at most worker_local_processors (limited by cores if auto mode) tasks at the same time, so many small tasks can add up. To prevent overloading the system, the defaults are conservative. However, if most of the activity involves autoreport or MLI, and no model experiments are running, it may be safe to increase this value to something larger than 4. -1 : Auto mode. Up to physical cores divided by 4, up to maximum of 10. 0 : all physical cores >= 1: that count). " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting max_predict_cores_in_dai", + "output": "max predict cores in dai refers to Control maximum number of cores to use for a model's transform and predict call when doing operations inside DAI-MLI GUI and R/Py client. The main experiment and other tasks like MLI and autoreport have separate queues. The main experiments have run at most worker_remote_processors tasks (limited by cores if auto mode), while other tasks run at most worker_local_processors (limited by cores if auto mode) tasks at the same time, so many small tasks can add up. To prevent overloading the system, the defaults are conservative. However, if most of the activity involves autoreport or MLI, and no model experiments are running, it may be safe to increase this value to something larger than 4. -1 : Auto mode. Up to physical cores divided by 4, up to maximum of 10. 0 : all physical cores >= 1: that count). " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting max_predict_cores_in_dai", + "output": "max predict cores in dai refers to Maximum number of cores to use for model transform and predict when doing MLI and AutoDoc.: Control maximum number of cores to use for a model's transform and predict call when doing operations inside DAI-MLI GUI and R/Py client. The main experiment and other tasks like MLI and autoreport have separate queues. The main experiments have run at most worker_remote_processors tasks (limited by cores if auto mode), while other tasks run at most worker_local_processors (limited by cores if auto mode) tasks at the same time, so many small tasks can add up. To prevent overloading the system, the defaults are conservative. However, if most of the activity involves autoreport or MLI, and no model experiments are running, it may be safe to increase this value to something larger than 4. -1 : Auto mode. Up to physical cores divided by 4, up to maximum of 10. 0 : all physical cores >= 1: that count). " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "batch_cpu_tuning_max_workers", + "output": "batch cpu tuning max workers refers to Control number of workers used in CPU mode for tuning (0 = socket count -1 = all physical cores >= 1 that count). More workers will be more parallel but models learn less from each other." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "batch_cpu_tuning_max_workers", + "output": "batch cpu tuning max workers refers to Tuning workers per batch for CPU: Control number of workers used in CPU mode for tuning (0 = socket count -1 = all physical cores >= 1 that count). More workers will be more parallel but models learn less from each other." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "batch cpu tuning max workers", + "output": "batch cpu tuning max workers refers to Tuning workers per batch for CPU: Control number of workers used in CPU mode for tuning (0 = socket count -1 = all physical cores >= 1 that count). More workers will be more parallel but models learn less from each other." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Tuning workers per batch for CPU: ", + "output": "batch cpu tuning max workers refers to Tuning workers per batch for CPU: Control number of workers used in CPU mode for tuning (0 = socket count -1 = all physical cores >= 1 that count). More workers will be more parallel but models learn less from each other." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting batch_cpu_tuning_max_workers", + "output": "batch cpu tuning max workers refers to Control number of workers used in CPU mode for tuning (0 = socket count -1 = all physical cores >= 1 that count). More workers will be more parallel but models learn less from each other." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting batch_cpu_tuning_max_workers", + "output": "batch cpu tuning max workers refers to Tuning workers per batch for CPU: Control number of workers used in CPU mode for tuning (0 = socket count -1 = all physical cores >= 1 that count). More workers will be more parallel but models learn less from each other." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "cpu_max_workers", + "output": "cpu max workers refers to Control number of workers used in CPU mode for training (0 = socket count -1 = all physical cores >= 1 that count)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "cpu_max_workers", + "output": "cpu max workers refers to Num. workers for CPU training: Control number of workers used in CPU mode for training (0 = socket count -1 = all physical cores >= 1 that count)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "cpu max workers", + "output": "cpu max workers refers to Num. workers for CPU training: Control number of workers used in CPU mode for training (0 = socket count -1 = all physical cores >= 1 that count)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Num. workers for CPU training: ", + "output": "cpu max workers refers to Num. workers for CPU training: Control number of workers used in CPU mode for training (0 = socket count -1 = all physical cores >= 1 that count)" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting cpu_max_workers", + "output": "cpu max workers refers to Control number of workers used in CPU mode for training (0 = socket count -1 = all physical cores >= 1 that count)" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting cpu_max_workers", + "output": "cpu max workers refers to Num. workers for CPU training: Control number of workers used in CPU mode for training (0 = socket count -1 = all physical cores >= 1 that count)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "assumed_simultaneous_dt_forks_munging", + "output": "assumed simultaneous dt forks munging refers to Expected maximum number of forks, used to ensure datatable doesn't overload system. For actual use beyond this value, system will start to have slow-down issues" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "assumed_simultaneous_dt_forks_munging", + "output": "assumed simultaneous dt forks munging refers to Assumed/Expected number of munging forks: Expected maximum number of forks, used to ensure datatable doesn't overload system. For actual use beyond this value, system will start to have slow-down issues" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "assumed simultaneous dt forks munging", + "output": "assumed simultaneous dt forks munging refers to Assumed/Expected number of munging forks: Expected maximum number of forks, used to ensure datatable doesn't overload system. For actual use beyond this value, system will start to have slow-down issues" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Assumed/Expected number of munging forks: ", + "output": "assumed simultaneous dt forks munging refers to Assumed/Expected number of munging forks: Expected maximum number of forks, used to ensure datatable doesn't overload system. For actual use beyond this value, system will start to have slow-down issues" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting assumed_simultaneous_dt_forks_munging", + "output": "assumed simultaneous dt forks munging refers to Expected maximum number of forks, used to ensure datatable doesn't overload system. For actual use beyond this value, system will start to have slow-down issues" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting assumed_simultaneous_dt_forks_munging", + "output": "assumed simultaneous dt forks munging refers to Assumed/Expected number of munging forks: Expected maximum number of forks, used to ensure datatable doesn't overload system. For actual use beyond this value, system will start to have slow-down issues" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "assumed_simultaneous_dt_forks_stats_openblas", + "output": "assumed simultaneous dt forks stats openblas refers to Expected maximum number of forks by computing statistics during ingestion, used to ensure datatable doesn't overload system" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "assumed_simultaneous_dt_forks_stats_openblas", + "output": "assumed simultaneous dt forks stats openblas refers to Expected maximum number of forks by computing statistics during ingestion, used to ensure datatable doesn't overload system" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "assumed simultaneous dt forks stats openblas", + "output": "assumed simultaneous dt forks stats openblas refers to Expected maximum number of forks by computing statistics during ingestion, used to ensure datatable doesn't overload system" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "assumed simultaneous dt forks stats openblas refers to Expected maximum number of forks by computing statistics during ingestion, used to ensure datatable doesn't overload system" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting assumed_simultaneous_dt_forks_stats_openblas", + "output": "assumed simultaneous dt forks stats openblas refers to Expected maximum number of forks by computing statistics during ingestion, used to ensure datatable doesn't overload system" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting assumed_simultaneous_dt_forks_stats_openblas", + "output": "assumed simultaneous dt forks stats openblas refers to Expected maximum number of forks by computing statistics during ingestion, used to ensure datatable doesn't overload system" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max_max_dt_threads_munging", + "output": "max max dt threads munging refers to Maximum of threads for datatable for munging" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max_max_dt_threads_munging", + "output": "max max dt threads munging refers to Max. threads for datatable munging: Maximum of threads for datatable for munging" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max max dt threads munging", + "output": "max max dt threads munging refers to Max. threads for datatable munging: Maximum of threads for datatable for munging" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Max. threads for datatable munging: ", + "output": "max max dt threads munging refers to Max. threads for datatable munging: Maximum of threads for datatable for munging" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting max_max_dt_threads_munging", + "output": "max max dt threads munging refers to Maximum of threads for datatable for munging" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting max_max_dt_threads_munging", + "output": "max max dt threads munging refers to Max. threads for datatable munging: Maximum of threads for datatable for munging" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max_max_dt_threads_stats_openblas", + "output": "max max dt threads stats openblas refers to Expected maximum of threads for datatable no matter if many more cores" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max_max_dt_threads_stats_openblas", + "output": "max max dt threads stats openblas refers to Expected maximum of threads for datatable no matter if many more cores" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max max dt threads stats openblas", + "output": "max max dt threads stats openblas refers to Expected maximum of threads for datatable no matter if many more cores" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "max max dt threads stats openblas refers to Expected maximum of threads for datatable no matter if many more cores" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting max_max_dt_threads_stats_openblas", + "output": "max max dt threads stats openblas refers to Expected maximum of threads for datatable no matter if many more cores" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting max_max_dt_threads_stats_openblas", + "output": "max max dt threads stats openblas refers to Expected maximum of threads for datatable no matter if many more cores" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max_max_dt_threads_readwrite", + "output": "max max dt threads readwrite refers to Maximum of threads for datatable for reading/writing files" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max_max_dt_threads_readwrite", + "output": "max max dt threads readwrite refers to Max. threads for datatable reading/writing: Maximum of threads for datatable for reading/writing files" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max max dt threads readwrite", + "output": "max max dt threads readwrite refers to Max. threads for datatable reading/writing: Maximum of threads for datatable for reading/writing files" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Max. threads for datatable reading/writing: ", + "output": "max max dt threads readwrite refers to Max. threads for datatable reading/writing: Maximum of threads for datatable for reading/writing files" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting max_max_dt_threads_readwrite", + "output": "max max dt threads readwrite refers to Maximum of threads for datatable for reading/writing files" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting max_max_dt_threads_readwrite", + "output": "max max dt threads readwrite refers to Max. threads for datatable reading/writing: Maximum of threads for datatable for reading/writing files" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max_workers_final_base_models", + "output": "max workers final base models refers to Maximum parallel workers for final model building.0 means automatic, >=1 means limit to no more than that number of parallel jobs.Can be required if some transformer or model uses more than the expected amount of memory.Ways to reduce final model building memory usage, e.g. set one or more of these and retrain final model:1) Increase munging_memory_overhead_factor to 102) Increase final_munging_memory_reduction_factor to 103) Lower max_workers_final_munging to 14) Lower max_workers_final_base_models to 15) Lower max_cores to, e.g., 1/2 or 1/4 of physical cores." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max_workers_final_base_models", + "output": "max workers final base models refers to Max. workers for final model building: Maximum parallel workers for final model building.0 means automatic, >=1 means limit to no more than that number of parallel jobs.Can be required if some transformer or model uses more than the expected amount of memory.Ways to reduce final model building memory usage, e.g. set one or more of these and retrain final model:1) Increase munging_memory_overhead_factor to 102) Increase final_munging_memory_reduction_factor to 103) Lower max_workers_final_munging to 14) Lower max_workers_final_base_models to 15) Lower max_cores to, e.g., 1/2 or 1/4 of physical cores." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max workers final base models", + "output": "max workers final base models refers to Max. workers for final model building: Maximum parallel workers for final model building.0 means automatic, >=1 means limit to no more than that number of parallel jobs.Can be required if some transformer or model uses more than the expected amount of memory.Ways to reduce final model building memory usage, e.g. set one or more of these and retrain final model:1) Increase munging_memory_overhead_factor to 102) Increase final_munging_memory_reduction_factor to 103) Lower max_workers_final_munging to 14) Lower max_workers_final_base_models to 15) Lower max_cores to, e.g., 1/2 or 1/4 of physical cores." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Max. workers for final model building: ", + "output": "max workers final base models refers to Max. workers for final model building: Maximum parallel workers for final model building.0 means automatic, >=1 means limit to no more than that number of parallel jobs.Can be required if some transformer or model uses more than the expected amount of memory.Ways to reduce final model building memory usage, e.g. set one or more of these and retrain final model:1) Increase munging_memory_overhead_factor to 102) Increase final_munging_memory_reduction_factor to 103) Lower max_workers_final_munging to 14) Lower max_workers_final_base_models to 15) Lower max_cores to, e.g., 1/2 or 1/4 of physical cores." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting max_workers_final_base_models", + "output": "max workers final base models refers to Maximum parallel workers for final model building.0 means automatic, >=1 means limit to no more than that number of parallel jobs.Can be required if some transformer or model uses more than the expected amount of memory.Ways to reduce final model building memory usage, e.g. set one or more of these and retrain final model:1) Increase munging_memory_overhead_factor to 102) Increase final_munging_memory_reduction_factor to 103) Lower max_workers_final_munging to 14) Lower max_workers_final_base_models to 15) Lower max_cores to, e.g., 1/2 or 1/4 of physical cores." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting max_workers_final_base_models", + "output": "max workers final base models refers to Max. workers for final model building: Maximum parallel workers for final model building.0 means automatic, >=1 means limit to no more than that number of parallel jobs.Can be required if some transformer or model uses more than the expected amount of memory.Ways to reduce final model building memory usage, e.g. set one or more of these and retrain final model:1) Increase munging_memory_overhead_factor to 102) Increase final_munging_memory_reduction_factor to 103) Lower max_workers_final_munging to 14) Lower max_workers_final_base_models to 15) Lower max_cores to, e.g., 1/2 or 1/4 of physical cores." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max_workers_final_munging", + "output": "max workers final munging refers to Maximum parallel workers for final per-model munging.0 means automatic, >=1 means limit to no more than that number of parallel jobs.Can be required if some transformer uses more than the expected amount of memory." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max_workers_final_munging", + "output": "max workers final munging refers to Max. workers for final per-model munging: Maximum parallel workers for final per-model munging.0 means automatic, >=1 means limit to no more than that number of parallel jobs.Can be required if some transformer uses more than the expected amount of memory." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max workers final munging", + "output": "max workers final munging refers to Max. workers for final per-model munging: Maximum parallel workers for final per-model munging.0 means automatic, >=1 means limit to no more than that number of parallel jobs.Can be required if some transformer uses more than the expected amount of memory." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Max. workers for final per-model munging: ", + "output": "max workers final munging refers to Max. workers for final per-model munging: Maximum parallel workers for final per-model munging.0 means automatic, >=1 means limit to no more than that number of parallel jobs.Can be required if some transformer uses more than the expected amount of memory." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting max_workers_final_munging", + "output": "max workers final munging refers to Maximum parallel workers for final per-model munging.0 means automatic, >=1 means limit to no more than that number of parallel jobs.Can be required if some transformer uses more than the expected amount of memory." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting max_workers_final_munging", + "output": "max workers final munging refers to Max. workers for final per-model munging: Maximum parallel workers for final per-model munging.0 means automatic, >=1 means limit to no more than that number of parallel jobs.Can be required if some transformer uses more than the expected amount of memory." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "min_dt_threads_munging", + "output": "min dt threads munging refers to Minimum number of threads for datatable (and OpenMP) during data munging (per process).datatable is the main data munging tool used within Driverless ai (source :https://github.com/h2oai/datatable) " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "min_dt_threads_munging", + "output": "min dt threads munging refers to Minimum number of threads for datatable (and OpenMP) during data munging (per process).datatable is the main data munging tool used within Driverless ai (source :https://github.com/h2oai/datatable) " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "min dt threads munging", + "output": "min dt threads munging refers to Minimum number of threads for datatable (and OpenMP) during data munging (per process).datatable is the main data munging tool used within Driverless ai (source :https://github.com/h2oai/datatable) " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "min dt threads munging refers to Minimum number of threads for datatable (and OpenMP) during data munging (per process).datatable is the main data munging tool used within Driverless ai (source :https://github.com/h2oai/datatable) " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting min_dt_threads_munging", + "output": "min dt threads munging refers to Minimum number of threads for datatable (and OpenMP) during data munging (per process).datatable is the main data munging tool used within Driverless ai (source :https://github.com/h2oai/datatable) " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting min_dt_threads_munging", + "output": "min dt threads munging refers to Minimum number of threads for datatable (and OpenMP) during data munging (per process).datatable is the main data munging tool used within Driverless ai (source :https://github.com/h2oai/datatable) " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "min_dt_threads_final_munging", + "output": "min dt threads final munging refers to Like min_datatable (and OpenMP)_threads_munging but for final pipeline munging" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "min_dt_threads_final_munging", + "output": "min dt threads final munging refers to Like min_datatable (and OpenMP)_threads_munging but for final pipeline munging" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "min dt threads final munging", + "output": "min dt threads final munging refers to Like min_datatable (and OpenMP)_threads_munging but for final pipeline munging" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "min dt threads final munging refers to Like min_datatable (and OpenMP)_threads_munging but for final pipeline munging" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting min_dt_threads_final_munging", + "output": "min dt threads final munging refers to Like min_datatable (and OpenMP)_threads_munging but for final pipeline munging" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting min_dt_threads_final_munging", + "output": "min dt threads final munging refers to Like min_datatable (and OpenMP)_threads_munging but for final pipeline munging" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max_dt_threads_munging", + "output": "max dt threads munging refers to Maximum number of threads for datatable during data munging (per process) (0 = all, -1 = auto).If multiple forks, threads are distributed across forks." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max_dt_threads_munging", + "output": "max dt threads munging refers to Max. Num. of threads to use for datatable and openblas for munging and model training (0 = all, -1 = auto): Maximum number of threads for datatable during data munging (per process) (0 = all, -1 = auto).If multiple forks, threads are distributed across forks." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max dt threads munging", + "output": "max dt threads munging refers to Max. Num. of threads to use for datatable and openblas for munging and model training (0 = all, -1 = auto): Maximum number of threads for datatable during data munging (per process) (0 = all, -1 = auto).If multiple forks, threads are distributed across forks." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Max. Num. of threads to use for datatable and openblas for munging and model training (0 = all, -1 = auto): ", + "output": "max dt threads munging refers to Max. Num. of threads to use for datatable and openblas for munging and model training (0 = all, -1 = auto): Maximum number of threads for datatable during data munging (per process) (0 = all, -1 = auto).If multiple forks, threads are distributed across forks." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting max_dt_threads_munging", + "output": "max dt threads munging refers to Maximum number of threads for datatable during data munging (per process) (0 = all, -1 = auto).If multiple forks, threads are distributed across forks." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting max_dt_threads_munging", + "output": "max dt threads munging refers to Max. Num. of threads to use for datatable and openblas for munging and model training (0 = all, -1 = auto): Maximum number of threads for datatable during data munging (per process) (0 = all, -1 = auto).If multiple forks, threads are distributed across forks." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max_dt_threads_readwrite", + "output": "max dt threads readwrite refers to Maximum number of threads for datatable during data reading and writing (per process) (0 = all, -1 = auto).If multiple forks, threads are distributed across forks." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max_dt_threads_readwrite", + "output": "max dt threads readwrite refers to Max. Num. of threads to use for datatable read and write of files (0 = all, -1 = auto): Maximum number of threads for datatable during data reading and writing (per process) (0 = all, -1 = auto).If multiple forks, threads are distributed across forks." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max dt threads readwrite", + "output": "max dt threads readwrite refers to Max. Num. of threads to use for datatable read and write of files (0 = all, -1 = auto): Maximum number of threads for datatable during data reading and writing (per process) (0 = all, -1 = auto).If multiple forks, threads are distributed across forks." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Max. Num. of threads to use for datatable read and write of files (0 = all, -1 = auto): ", + "output": "max dt threads readwrite refers to Max. Num. of threads to use for datatable read and write of files (0 = all, -1 = auto): Maximum number of threads for datatable during data reading and writing (per process) (0 = all, -1 = auto).If multiple forks, threads are distributed across forks." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting max_dt_threads_readwrite", + "output": "max dt threads readwrite refers to Maximum number of threads for datatable during data reading and writing (per process) (0 = all, -1 = auto).If multiple forks, threads are distributed across forks." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting max_dt_threads_readwrite", + "output": "max dt threads readwrite refers to Max. Num. of threads to use for datatable read and write of files (0 = all, -1 = auto): Maximum number of threads for datatable during data reading and writing (per process) (0 = all, -1 = auto).If multiple forks, threads are distributed across forks." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max_dt_threads_stats_openblas", + "output": "max dt threads stats openblas refers to Maximum number of threads for datatable stats and openblas (per process) (0 = all, -1 = auto).If multiple forks, threads are distributed across forks." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max_dt_threads_stats_openblas", + "output": "max dt threads stats openblas refers to Max. Num. of threads to use for datatable stats and openblas (0 = all, -1 = auto): Maximum number of threads for datatable stats and openblas (per process) (0 = all, -1 = auto).If multiple forks, threads are distributed across forks." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max dt threads stats openblas", + "output": "max dt threads stats openblas refers to Max. Num. of threads to use for datatable stats and openblas (0 = all, -1 = auto): Maximum number of threads for datatable stats and openblas (per process) (0 = all, -1 = auto).If multiple forks, threads are distributed across forks." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Max. Num. of threads to use for datatable stats and openblas (0 = all, -1 = auto): ", + "output": "max dt threads stats openblas refers to Max. Num. of threads to use for datatable stats and openblas (0 = all, -1 = auto): Maximum number of threads for datatable stats and openblas (per process) (0 = all, -1 = auto).If multiple forks, threads are distributed across forks." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting max_dt_threads_stats_openblas", + "output": "max dt threads stats openblas refers to Maximum number of threads for datatable stats and openblas (per process) (0 = all, -1 = auto).If multiple forks, threads are distributed across forks." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting max_dt_threads_stats_openblas", + "output": "max dt threads stats openblas refers to Max. Num. of threads to use for datatable stats and openblas (0 = all, -1 = auto): Maximum number of threads for datatable stats and openblas (per process) (0 = all, -1 = auto).If multiple forks, threads are distributed across forks." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max_dt_threads_do_timeseries_split_suggestion", + "output": "max dt threads do timeseries split suggestion refers to Maximum number of threads for datatable during TS properties preview panel computations)." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max_dt_threads_do_timeseries_split_suggestion", + "output": "max dt threads do timeseries split suggestion refers to Maximum number of threads for datatable during TS properties preview panel computations)." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max dt threads do timeseries split suggestion", + "output": "max dt threads do timeseries split suggestion refers to Maximum number of threads for datatable during TS properties preview panel computations)." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "max dt threads do timeseries split suggestion refers to Maximum number of threads for datatable during TS properties preview panel computations)." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting max_dt_threads_do_timeseries_split_suggestion", + "output": "max dt threads do timeseries split suggestion refers to Maximum number of threads for datatable during TS properties preview panel computations)." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting max_dt_threads_do_timeseries_split_suggestion", + "output": "max dt threads do timeseries split suggestion refers to Maximum number of threads for datatable during TS properties preview panel computations)." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "num_gpus_per_experiment", + "output": "num gpus per experiment refers to Number of GPUs to use per experiment for training task. Set to -1 for all GPUs.An experiment will generate many different models.Currently num_gpus_per_experiment!=-1 disables GPU locking, so is only recommended forsingle experiments and single users.Ignored if GPUs disabled or no GPUs on system.More info at: https://github.com/NVIDIA/nvidia-docker/wiki/nvidia-docker#gpu-isolationIn multinode context when using dask, this refers to the per-node value.For ImageAutoModel, this refers to the total number of GPUs used for that entire model type,since there is only one model type for the entire experiment.E.g. if have 4 GPUs and want 2 ImageAuto experiments to run on 2 GPUs each, can setnum_gpus_per_experiment to 2 for each experiment, and each of the 4 GPUs will be used one at a timeby the 2 experiments each using 2 GPUs only. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "num_gpus_per_experiment", + "output": "num gpus per experiment refers to #GPUs/Experiment (-1 = autodetect or all): Number of GPUs to use per experiment for training task. Set to -1 for all GPUs.An experiment will generate many different models.Currently num_gpus_per_experiment!=-1 disables GPU locking, so is only recommended forsingle experiments and single users.Ignored if GPUs disabled or no GPUs on system.More info at: https://github.com/NVIDIA/nvidia-docker/wiki/nvidia-docker#gpu-isolationIn multinode context when using dask, this refers to the per-node value.For ImageAutoModel, this refers to the total number of GPUs used for that entire model type,since there is only one model type for the entire experiment.E.g. if have 4 GPUs and want 2 ImageAuto experiments to run on 2 GPUs each, can setnum_gpus_per_experiment to 2 for each experiment, and each of the 4 GPUs will be used one at a timeby the 2 experiments each using 2 GPUs only. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "num gpus per experiment", + "output": "num gpus per experiment refers to #GPUs/Experiment (-1 = autodetect or all): Number of GPUs to use per experiment for training task. Set to -1 for all GPUs.An experiment will generate many different models.Currently num_gpus_per_experiment!=-1 disables GPU locking, so is only recommended forsingle experiments and single users.Ignored if GPUs disabled or no GPUs on system.More info at: https://github.com/NVIDIA/nvidia-docker/wiki/nvidia-docker#gpu-isolationIn multinode context when using dask, this refers to the per-node value.For ImageAutoModel, this refers to the total number of GPUs used for that entire model type,since there is only one model type for the entire experiment.E.g. if have 4 GPUs and want 2 ImageAuto experiments to run on 2 GPUs each, can setnum_gpus_per_experiment to 2 for each experiment, and each of the 4 GPUs will be used one at a timeby the 2 experiments each using 2 GPUs only. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "#GPUs/Experiment (-1 = autodetect or all): ", + "output": "num gpus per experiment refers to #GPUs/Experiment (-1 = autodetect or all): Number of GPUs to use per experiment for training task. Set to -1 for all GPUs.An experiment will generate many different models.Currently num_gpus_per_experiment!=-1 disables GPU locking, so is only recommended forsingle experiments and single users.Ignored if GPUs disabled or no GPUs on system.More info at: https://github.com/NVIDIA/nvidia-docker/wiki/nvidia-docker#gpu-isolationIn multinode context when using dask, this refers to the per-node value.For ImageAutoModel, this refers to the total number of GPUs used for that entire model type,since there is only one model type for the entire experiment.E.g. if have 4 GPUs and want 2 ImageAuto experiments to run on 2 GPUs each, can setnum_gpus_per_experiment to 2 for each experiment, and each of the 4 GPUs will be used one at a timeby the 2 experiments each using 2 GPUs only. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting num_gpus_per_experiment", + "output": "num gpus per experiment refers to Number of GPUs to use per experiment for training task. Set to -1 for all GPUs.An experiment will generate many different models.Currently num_gpus_per_experiment!=-1 disables GPU locking, so is only recommended forsingle experiments and single users.Ignored if GPUs disabled or no GPUs on system.More info at: https://github.com/NVIDIA/nvidia-docker/wiki/nvidia-docker#gpu-isolationIn multinode context when using dask, this refers to the per-node value.For ImageAutoModel, this refers to the total number of GPUs used for that entire model type,since there is only one model type for the entire experiment.E.g. if have 4 GPUs and want 2 ImageAuto experiments to run on 2 GPUs each, can setnum_gpus_per_experiment to 2 for each experiment, and each of the 4 GPUs will be used one at a timeby the 2 experiments each using 2 GPUs only. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting num_gpus_per_experiment", + "output": "num gpus per experiment refers to #GPUs/Experiment (-1 = autodetect or all): Number of GPUs to use per experiment for training task. Set to -1 for all GPUs.An experiment will generate many different models.Currently num_gpus_per_experiment!=-1 disables GPU locking, so is only recommended forsingle experiments and single users.Ignored if GPUs disabled or no GPUs on system.More info at: https://github.com/NVIDIA/nvidia-docker/wiki/nvidia-docker#gpu-isolationIn multinode context when using dask, this refers to the per-node value.For ImageAutoModel, this refers to the total number of GPUs used for that entire model type,since there is only one model type for the entire experiment.E.g. if have 4 GPUs and want 2 ImageAuto experiments to run on 2 GPUs each, can setnum_gpus_per_experiment to 2 for each experiment, and each of the 4 GPUs will be used one at a timeby the 2 experiments each using 2 GPUs only. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "min_num_cores_per_gpu", + "output": "min num cores per gpu refers to Number of CPU cores per GPU. Limits number of GPUs in order to have sufficient cores per GPU. Set to -1 to disable." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "min_num_cores_per_gpu", + "output": "min num cores per gpu refers to Num Cores/GPU: Number of CPU cores per GPU. Limits number of GPUs in order to have sufficient cores per GPU. Set to -1 to disable." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "min num cores per gpu", + "output": "min num cores per gpu refers to Num Cores/GPU: Number of CPU cores per GPU. Limits number of GPUs in order to have sufficient cores per GPU. Set to -1 to disable." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Num Cores/GPU: ", + "output": "min num cores per gpu refers to Num Cores/GPU: Number of CPU cores per GPU. Limits number of GPUs in order to have sufficient cores per GPU. Set to -1 to disable." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting min_num_cores_per_gpu", + "output": "min num cores per gpu refers to Number of CPU cores per GPU. Limits number of GPUs in order to have sufficient cores per GPU. Set to -1 to disable." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting min_num_cores_per_gpu", + "output": "min num cores per gpu refers to Num Cores/GPU: Number of CPU cores per GPU. Limits number of GPUs in order to have sufficient cores per GPU. Set to -1 to disable." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "num_gpus_per_model", + "output": "num gpus per model refers to Number of GPUs to use per model training task. Set to -1 for all GPUs.For example, when this is set to -1 and there are 4 GPUs available, all of them can be used for the training of a single model.Only applicable currently to image auto pipeline building recipe or Dask models with more than one GPU or more than one node.Ignored if GPUs disabled or no GPUs on system.For ImageAutoModel, the maximum of num_gpus_per_model and num_gpus_per_experiment (all GPUs if -1) is taken.More info at: https://github.com/NVIDIA/nvidia-docker/wiki/nvidia-docker#gpu-isolationIn multinode context when using Dask, this refers to the per-node value. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "num_gpus_per_model", + "output": "num gpus per model refers to #GPUs/Model (-1 = all): Number of GPUs to use per model training task. Set to -1 for all GPUs.For example, when this is set to -1 and there are 4 GPUs available, all of them can be used for the training of a single model.Only applicable currently to image auto pipeline building recipe or Dask models with more than one GPU or more than one node.Ignored if GPUs disabled or no GPUs on system.For ImageAutoModel, the maximum of num_gpus_per_model and num_gpus_per_experiment (all GPUs if -1) is taken.More info at: https://github.com/NVIDIA/nvidia-docker/wiki/nvidia-docker#gpu-isolationIn multinode context when using Dask, this refers to the per-node value. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "num gpus per model", + "output": "num gpus per model refers to #GPUs/Model (-1 = all): Number of GPUs to use per model training task. Set to -1 for all GPUs.For example, when this is set to -1 and there are 4 GPUs available, all of them can be used for the training of a single model.Only applicable currently to image auto pipeline building recipe or Dask models with more than one GPU or more than one node.Ignored if GPUs disabled or no GPUs on system.For ImageAutoModel, the maximum of num_gpus_per_model and num_gpus_per_experiment (all GPUs if -1) is taken.More info at: https://github.com/NVIDIA/nvidia-docker/wiki/nvidia-docker#gpu-isolationIn multinode context when using Dask, this refers to the per-node value. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "#GPUs/Model (-1 = all): ", + "output": "num gpus per model refers to #GPUs/Model (-1 = all): Number of GPUs to use per model training task. Set to -1 for all GPUs.For example, when this is set to -1 and there are 4 GPUs available, all of them can be used for the training of a single model.Only applicable currently to image auto pipeline building recipe or Dask models with more than one GPU or more than one node.Ignored if GPUs disabled or no GPUs on system.For ImageAutoModel, the maximum of num_gpus_per_model and num_gpus_per_experiment (all GPUs if -1) is taken.More info at: https://github.com/NVIDIA/nvidia-docker/wiki/nvidia-docker#gpu-isolationIn multinode context when using Dask, this refers to the per-node value. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting num_gpus_per_model", + "output": "num gpus per model refers to Number of GPUs to use per model training task. Set to -1 for all GPUs.For example, when this is set to -1 and there are 4 GPUs available, all of them can be used for the training of a single model.Only applicable currently to image auto pipeline building recipe or Dask models with more than one GPU or more than one node.Ignored if GPUs disabled or no GPUs on system.For ImageAutoModel, the maximum of num_gpus_per_model and num_gpus_per_experiment (all GPUs if -1) is taken.More info at: https://github.com/NVIDIA/nvidia-docker/wiki/nvidia-docker#gpu-isolationIn multinode context when using Dask, this refers to the per-node value. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting num_gpus_per_model", + "output": "num gpus per model refers to #GPUs/Model (-1 = all): Number of GPUs to use per model training task. Set to -1 for all GPUs.For example, when this is set to -1 and there are 4 GPUs available, all of them can be used for the training of a single model.Only applicable currently to image auto pipeline building recipe or Dask models with more than one GPU or more than one node.Ignored if GPUs disabled or no GPUs on system.For ImageAutoModel, the maximum of num_gpus_per_model and num_gpus_per_experiment (all GPUs if -1) is taken.More info at: https://github.com/NVIDIA/nvidia-docker/wiki/nvidia-docker#gpu-isolationIn multinode context when using Dask, this refers to the per-node value. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "num_gpus_for_prediction", + "output": "num gpus for prediction refers to Number of GPUs to use for predict for models and transform for transformers when running outside of fit/fit_transform.-1 means all, 0 means no GPUs, >1 means that many GPUs up to visible limit.If predict/transform are called in same process as fit/fit_transform, number of GPUs will match,while new processes will use this count for number of GPUs for applicable models/transformers.Exception: TensorFlow, PyTorch models/transformers, and RAPIDS predict on GPU always if GPUs exist.RAPIDS requires python scoring package be used also on GPUs.In multinode context when using Dask, this refers to the per-node value. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "num_gpus_for_prediction", + "output": "num gpus for prediction refers to Num. of GPUs for isolated prediction/transform: Number of GPUs to use for predict for models and transform for transformers when running outside of fit/fit_transform.-1 means all, 0 means no GPUs, >1 means that many GPUs up to visible limit.If predict/transform are called in same process as fit/fit_transform, number of GPUs will match,while new processes will use this count for number of GPUs for applicable models/transformers.Exception: TensorFlow, PyTorch models/transformers, and RAPIDS predict on GPU always if GPUs exist.RAPIDS requires python scoring package be used also on GPUs.In multinode context when using Dask, this refers to the per-node value. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "num gpus for prediction", + "output": "num gpus for prediction refers to Num. of GPUs for isolated prediction/transform: Number of GPUs to use for predict for models and transform for transformers when running outside of fit/fit_transform.-1 means all, 0 means no GPUs, >1 means that many GPUs up to visible limit.If predict/transform are called in same process as fit/fit_transform, number of GPUs will match,while new processes will use this count for number of GPUs for applicable models/transformers.Exception: TensorFlow, PyTorch models/transformers, and RAPIDS predict on GPU always if GPUs exist.RAPIDS requires python scoring package be used also on GPUs.In multinode context when using Dask, this refers to the per-node value. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Num. of GPUs for isolated prediction/transform: ", + "output": "num gpus for prediction refers to Num. of GPUs for isolated prediction/transform: Number of GPUs to use for predict for models and transform for transformers when running outside of fit/fit_transform.-1 means all, 0 means no GPUs, >1 means that many GPUs up to visible limit.If predict/transform are called in same process as fit/fit_transform, number of GPUs will match,while new processes will use this count for number of GPUs for applicable models/transformers.Exception: TensorFlow, PyTorch models/transformers, and RAPIDS predict on GPU always if GPUs exist.RAPIDS requires python scoring package be used also on GPUs.In multinode context when using Dask, this refers to the per-node value. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting num_gpus_for_prediction", + "output": "num gpus for prediction refers to Number of GPUs to use for predict for models and transform for transformers when running outside of fit/fit_transform.-1 means all, 0 means no GPUs, >1 means that many GPUs up to visible limit.If predict/transform are called in same process as fit/fit_transform, number of GPUs will match,while new processes will use this count for number of GPUs for applicable models/transformers.Exception: TensorFlow, PyTorch models/transformers, and RAPIDS predict on GPU always if GPUs exist.RAPIDS requires python scoring package be used also on GPUs.In multinode context when using Dask, this refers to the per-node value. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting num_gpus_for_prediction", + "output": "num gpus for prediction refers to Num. of GPUs for isolated prediction/transform: Number of GPUs to use for predict for models and transform for transformers when running outside of fit/fit_transform.-1 means all, 0 means no GPUs, >1 means that many GPUs up to visible limit.If predict/transform are called in same process as fit/fit_transform, number of GPUs will match,while new processes will use this count for number of GPUs for applicable models/transformers.Exception: TensorFlow, PyTorch models/transformers, and RAPIDS predict on GPU always if GPUs exist.RAPIDS requires python scoring package be used also on GPUs.In multinode context when using Dask, this refers to the per-node value. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "gpu_id_start", + "output": "gpu id start refers to Which gpu_id to start with-1 : auto-mode. E.g. 2 experiments can each set num_gpus_per_experiment to 2 and use 4 GPUsIf using CUDA_VISIBLE_DEVICES=... to control GPUs (preferred method), gpu_id=0 is thefirst in that restricted list of devices.E.g. if CUDA_VISIBLE_DEVICES='4,5' then gpu_id_start=0 will refer to thedevice #4.E.g. from expert mode, to run 2 experiments, each on a distinct GPU out of 2 GPUs:Experiment#1: num_gpus_per_model=1, num_gpus_per_experiment=1, gpu_id_start=0Experiment#2: num_gpus_per_model=1, num_gpus_per_experiment=1, gpu_id_start=1E.g. from expert mode, to run 2 experiments, each on a distinct GPU out of 8 GPUs:Experiment#1: num_gpus_per_model=1, num_gpus_per_experiment=4, gpu_id_start=0Experiment#2: num_gpus_per_model=1, num_gpus_per_experiment=4, gpu_id_start=4E.g. Like just above, but now run on all 4 GPUs/modelExperiment#1: num_gpus_per_model=4, num_gpus_per_experiment=4, gpu_id_start=0Experiment#2: num_gpus_per_model=4, num_gpus_per_experiment=4, gpu_id_start=4If num_gpus_per_model!=1, global GPU locking is disabled(because underlying algorithms don't support arbitrary gpu ids, only sequential ids),so must setup above correctly to avoid overlap across all experiments by all usersMore info at: https://github.com/NVIDIA/nvidia-docker/wiki/nvidia-docker#gpu-isolationNote that GPU selection does not wrap, so gpu_id_start + num_gpus_per_model must be less than number of visibile GPUs " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "gpu_id_start", + "output": "gpu id start refers to GPU starting ID (0..visible #GPUs - 1): Which gpu_id to start with-1 : auto-mode. E.g. 2 experiments can each set num_gpus_per_experiment to 2 and use 4 GPUsIf using CUDA_VISIBLE_DEVICES=... to control GPUs (preferred method), gpu_id=0 is thefirst in that restricted list of devices.E.g. if CUDA_VISIBLE_DEVICES='4,5' then gpu_id_start=0 will refer to thedevice #4.E.g. from expert mode, to run 2 experiments, each on a distinct GPU out of 2 GPUs:Experiment#1: num_gpus_per_model=1, num_gpus_per_experiment=1, gpu_id_start=0Experiment#2: num_gpus_per_model=1, num_gpus_per_experiment=1, gpu_id_start=1E.g. from expert mode, to run 2 experiments, each on a distinct GPU out of 8 GPUs:Experiment#1: num_gpus_per_model=1, num_gpus_per_experiment=4, gpu_id_start=0Experiment#2: num_gpus_per_model=1, num_gpus_per_experiment=4, gpu_id_start=4E.g. Like just above, but now run on all 4 GPUs/modelExperiment#1: num_gpus_per_model=4, num_gpus_per_experiment=4, gpu_id_start=0Experiment#2: num_gpus_per_model=4, num_gpus_per_experiment=4, gpu_id_start=4If num_gpus_per_model!=1, global GPU locking is disabled(because underlying algorithms don't support arbitrary gpu ids, only sequential ids),so must setup above correctly to avoid overlap across all experiments by all usersMore info at: https://github.com/NVIDIA/nvidia-docker/wiki/nvidia-docker#gpu-isolationNote that GPU selection does not wrap, so gpu_id_start + num_gpus_per_model must be less than number of visibile GPUs " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "gpu id start", + "output": "gpu id start refers to GPU starting ID (0..visible #GPUs - 1): Which gpu_id to start with-1 : auto-mode. E.g. 2 experiments can each set num_gpus_per_experiment to 2 and use 4 GPUsIf using CUDA_VISIBLE_DEVICES=... to control GPUs (preferred method), gpu_id=0 is thefirst in that restricted list of devices.E.g. if CUDA_VISIBLE_DEVICES='4,5' then gpu_id_start=0 will refer to thedevice #4.E.g. from expert mode, to run 2 experiments, each on a distinct GPU out of 2 GPUs:Experiment#1: num_gpus_per_model=1, num_gpus_per_experiment=1, gpu_id_start=0Experiment#2: num_gpus_per_model=1, num_gpus_per_experiment=1, gpu_id_start=1E.g. from expert mode, to run 2 experiments, each on a distinct GPU out of 8 GPUs:Experiment#1: num_gpus_per_model=1, num_gpus_per_experiment=4, gpu_id_start=0Experiment#2: num_gpus_per_model=1, num_gpus_per_experiment=4, gpu_id_start=4E.g. Like just above, but now run on all 4 GPUs/modelExperiment#1: num_gpus_per_model=4, num_gpus_per_experiment=4, gpu_id_start=0Experiment#2: num_gpus_per_model=4, num_gpus_per_experiment=4, gpu_id_start=4If num_gpus_per_model!=1, global GPU locking is disabled(because underlying algorithms don't support arbitrary gpu ids, only sequential ids),so must setup above correctly to avoid overlap across all experiments by all usersMore info at: https://github.com/NVIDIA/nvidia-docker/wiki/nvidia-docker#gpu-isolationNote that GPU selection does not wrap, so gpu_id_start + num_gpus_per_model must be less than number of visibile GPUs " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "GPU starting ID (0..visible #GPUs - 1): ", + "output": "gpu id start refers to GPU starting ID (0..visible #GPUs - 1): Which gpu_id to start with-1 : auto-mode. E.g. 2 experiments can each set num_gpus_per_experiment to 2 and use 4 GPUsIf using CUDA_VISIBLE_DEVICES=... to control GPUs (preferred method), gpu_id=0 is thefirst in that restricted list of devices.E.g. if CUDA_VISIBLE_DEVICES='4,5' then gpu_id_start=0 will refer to thedevice #4.E.g. from expert mode, to run 2 experiments, each on a distinct GPU out of 2 GPUs:Experiment#1: num_gpus_per_model=1, num_gpus_per_experiment=1, gpu_id_start=0Experiment#2: num_gpus_per_model=1, num_gpus_per_experiment=1, gpu_id_start=1E.g. from expert mode, to run 2 experiments, each on a distinct GPU out of 8 GPUs:Experiment#1: num_gpus_per_model=1, num_gpus_per_experiment=4, gpu_id_start=0Experiment#2: num_gpus_per_model=1, num_gpus_per_experiment=4, gpu_id_start=4E.g. Like just above, but now run on all 4 GPUs/modelExperiment#1: num_gpus_per_model=4, num_gpus_per_experiment=4, gpu_id_start=0Experiment#2: num_gpus_per_model=4, num_gpus_per_experiment=4, gpu_id_start=4If num_gpus_per_model!=1, global GPU locking is disabled(because underlying algorithms don't support arbitrary gpu ids, only sequential ids),so must setup above correctly to avoid overlap across all experiments by all usersMore info at: https://github.com/NVIDIA/nvidia-docker/wiki/nvidia-docker#gpu-isolationNote that GPU selection does not wrap, so gpu_id_start + num_gpus_per_model must be less than number of visibile GPUs " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting gpu_id_start", + "output": "gpu id start refers to Which gpu_id to start with-1 : auto-mode. E.g. 2 experiments can each set num_gpus_per_experiment to 2 and use 4 GPUsIf using CUDA_VISIBLE_DEVICES=... to control GPUs (preferred method), gpu_id=0 is thefirst in that restricted list of devices.E.g. if CUDA_VISIBLE_DEVICES='4,5' then gpu_id_start=0 will refer to thedevice #4.E.g. from expert mode, to run 2 experiments, each on a distinct GPU out of 2 GPUs:Experiment#1: num_gpus_per_model=1, num_gpus_per_experiment=1, gpu_id_start=0Experiment#2: num_gpus_per_model=1, num_gpus_per_experiment=1, gpu_id_start=1E.g. from expert mode, to run 2 experiments, each on a distinct GPU out of 8 GPUs:Experiment#1: num_gpus_per_model=1, num_gpus_per_experiment=4, gpu_id_start=0Experiment#2: num_gpus_per_model=1, num_gpus_per_experiment=4, gpu_id_start=4E.g. Like just above, but now run on all 4 GPUs/modelExperiment#1: num_gpus_per_model=4, num_gpus_per_experiment=4, gpu_id_start=0Experiment#2: num_gpus_per_model=4, num_gpus_per_experiment=4, gpu_id_start=4If num_gpus_per_model!=1, global GPU locking is disabled(because underlying algorithms don't support arbitrary gpu ids, only sequential ids),so must setup above correctly to avoid overlap across all experiments by all usersMore info at: https://github.com/NVIDIA/nvidia-docker/wiki/nvidia-docker#gpu-isolationNote that GPU selection does not wrap, so gpu_id_start + num_gpus_per_model must be less than number of visibile GPUs " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting gpu_id_start", + "output": "gpu id start refers to GPU starting ID (0..visible #GPUs - 1): Which gpu_id to start with-1 : auto-mode. E.g. 2 experiments can each set num_gpus_per_experiment to 2 and use 4 GPUsIf using CUDA_VISIBLE_DEVICES=... to control GPUs (preferred method), gpu_id=0 is thefirst in that restricted list of devices.E.g. if CUDA_VISIBLE_DEVICES='4,5' then gpu_id_start=0 will refer to thedevice #4.E.g. from expert mode, to run 2 experiments, each on a distinct GPU out of 2 GPUs:Experiment#1: num_gpus_per_model=1, num_gpus_per_experiment=1, gpu_id_start=0Experiment#2: num_gpus_per_model=1, num_gpus_per_experiment=1, gpu_id_start=1E.g. from expert mode, to run 2 experiments, each on a distinct GPU out of 8 GPUs:Experiment#1: num_gpus_per_model=1, num_gpus_per_experiment=4, gpu_id_start=0Experiment#2: num_gpus_per_model=1, num_gpus_per_experiment=4, gpu_id_start=4E.g. Like just above, but now run on all 4 GPUs/modelExperiment#1: num_gpus_per_model=4, num_gpus_per_experiment=4, gpu_id_start=0Experiment#2: num_gpus_per_model=4, num_gpus_per_experiment=4, gpu_id_start=4If num_gpus_per_model!=1, global GPU locking is disabled(because underlying algorithms don't support arbitrary gpu ids, only sequential ids),so must setup above correctly to avoid overlap across all experiments by all usersMore info at: https://github.com/NVIDIA/nvidia-docker/wiki/nvidia-docker#gpu-isolationNote that GPU selection does not wrap, so gpu_id_start + num_gpus_per_model must be less than number of visibile GPUs " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "allow_reduce_features_when_failure", + "output": "allow reduce features when failure refers to Whether to reduce features until model does not fail.Currently for non-dask XGBoost models (i.e. GLMModel, XGBoostGBMModel, XGBoostDartModel, XGBoostRFModel),during normal fit or when using Optuna.Primarily useful for GPU OOM.If XGBoost runs out of GPU memory, this is detected, and(regardless of setting of skip_model_failures),we perform feature selection using XGBoost on subsets of features.The dataset is progressively reduced by factor of 2 with more models to cover all features.This splitting continues until no failure occurs.Then all sub-models are used to estimate variable importance by absolute information gain,in order to decide which features to include.Finally, a single model with the most important featuresis built using the feature count that did not lead to OOM.For 'auto', this option is set to 'off' when reproducible experiment is enabled,because the condition of running OOM can change for same experiment seed.Reduction is only done on features and not on rows for the feature selection step. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "allow_reduce_features_when_failure", + "output": "allow reduce features when failure refers to Whether to reduce features when model fails: Whether to reduce features until model does not fail.Currently for non-dask XGBoost models (i.e. GLMModel, XGBoostGBMModel, XGBoostDartModel, XGBoostRFModel),during normal fit or when using Optuna.Primarily useful for GPU OOM.If XGBoost runs out of GPU memory, this is detected, and(regardless of setting of skip_model_failures),we perform feature selection using XGBoost on subsets of features.The dataset is progressively reduced by factor of 2 with more models to cover all features.This splitting continues until no failure occurs.Then all sub-models are used to estimate variable importance by absolute information gain,in order to decide which features to include.Finally, a single model with the most important featuresis built using the feature count that did not lead to OOM.For 'auto', this option is set to 'off' when reproducible experiment is enabled,because the condition of running OOM can change for same experiment seed.Reduction is only done on features and not on rows for the feature selection step. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "allow reduce features when failure", + "output": "allow reduce features when failure refers to Whether to reduce features when model fails: Whether to reduce features until model does not fail.Currently for non-dask XGBoost models (i.e. GLMModel, XGBoostGBMModel, XGBoostDartModel, XGBoostRFModel),during normal fit or when using Optuna.Primarily useful for GPU OOM.If XGBoost runs out of GPU memory, this is detected, and(regardless of setting of skip_model_failures),we perform feature selection using XGBoost on subsets of features.The dataset is progressively reduced by factor of 2 with more models to cover all features.This splitting continues until no failure occurs.Then all sub-models are used to estimate variable importance by absolute information gain,in order to decide which features to include.Finally, a single model with the most important featuresis built using the feature count that did not lead to OOM.For 'auto', this option is set to 'off' when reproducible experiment is enabled,because the condition of running OOM can change for same experiment seed.Reduction is only done on features and not on rows for the feature selection step. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Whether to reduce features when model fails: ", + "output": "allow reduce features when failure refers to Whether to reduce features when model fails: Whether to reduce features until model does not fail.Currently for non-dask XGBoost models (i.e. GLMModel, XGBoostGBMModel, XGBoostDartModel, XGBoostRFModel),during normal fit or when using Optuna.Primarily useful for GPU OOM.If XGBoost runs out of GPU memory, this is detected, and(regardless of setting of skip_model_failures),we perform feature selection using XGBoost on subsets of features.The dataset is progressively reduced by factor of 2 with more models to cover all features.This splitting continues until no failure occurs.Then all sub-models are used to estimate variable importance by absolute information gain,in order to decide which features to include.Finally, a single model with the most important featuresis built using the feature count that did not lead to OOM.For 'auto', this option is set to 'off' when reproducible experiment is enabled,because the condition of running OOM can change for same experiment seed.Reduction is only done on features and not on rows for the feature selection step. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting allow_reduce_features_when_failure", + "output": "allow reduce features when failure refers to Whether to reduce features until model does not fail.Currently for non-dask XGBoost models (i.e. GLMModel, XGBoostGBMModel, XGBoostDartModel, XGBoostRFModel),during normal fit or when using Optuna.Primarily useful for GPU OOM.If XGBoost runs out of GPU memory, this is detected, and(regardless of setting of skip_model_failures),we perform feature selection using XGBoost on subsets of features.The dataset is progressively reduced by factor of 2 with more models to cover all features.This splitting continues until no failure occurs.Then all sub-models are used to estimate variable importance by absolute information gain,in order to decide which features to include.Finally, a single model with the most important featuresis built using the feature count that did not lead to OOM.For 'auto', this option is set to 'off' when reproducible experiment is enabled,because the condition of running OOM can change for same experiment seed.Reduction is only done on features and not on rows for the feature selection step. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting allow_reduce_features_when_failure", + "output": "allow reduce features when failure refers to Whether to reduce features when model fails: Whether to reduce features until model does not fail.Currently for non-dask XGBoost models (i.e. GLMModel, XGBoostGBMModel, XGBoostDartModel, XGBoostRFModel),during normal fit or when using Optuna.Primarily useful for GPU OOM.If XGBoost runs out of GPU memory, this is detected, and(regardless of setting of skip_model_failures),we perform feature selection using XGBoost on subsets of features.The dataset is progressively reduced by factor of 2 with more models to cover all features.This splitting continues until no failure occurs.Then all sub-models are used to estimate variable importance by absolute information gain,in order to decide which features to include.Finally, a single model with the most important featuresis built using the feature count that did not lead to OOM.For 'auto', this option is set to 'off' when reproducible experiment is enabled,because the condition of running OOM can change for same experiment seed.Reduction is only done on features and not on rows for the feature selection step. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "reduce_repeats_when_failure", + "output": "reduce repeats when failure refers to With allow_reduce_features_when_failure, this controls how many repeats of sub-modelsused for feature selection. A single repeat only has each sub-modelconsider a single sub-set of features, while repeats shuffle whichfeatures are considered allowing more chance to find important interactions.More repeats can lead to higher accuracy.The cost of this option is proportional to the repeat count. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "reduce_repeats_when_failure", + "output": "reduce repeats when failure refers to Number of repeats for models used for feature selection during failure recovery.: With allow_reduce_features_when_failure, this controls how many repeats of sub-modelsused for feature selection. A single repeat only has each sub-modelconsider a single sub-set of features, while repeats shuffle whichfeatures are considered allowing more chance to find important interactions.More repeats can lead to higher accuracy.The cost of this option is proportional to the repeat count. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "reduce repeats when failure", + "output": "reduce repeats when failure refers to Number of repeats for models used for feature selection during failure recovery.: With allow_reduce_features_when_failure, this controls how many repeats of sub-modelsused for feature selection. A single repeat only has each sub-modelconsider a single sub-set of features, while repeats shuffle whichfeatures are considered allowing more chance to find important interactions.More repeats can lead to higher accuracy.The cost of this option is proportional to the repeat count. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Number of repeats for models used for feature selection during failure recovery.: ", + "output": "reduce repeats when failure refers to Number of repeats for models used for feature selection during failure recovery.: With allow_reduce_features_when_failure, this controls how many repeats of sub-modelsused for feature selection. A single repeat only has each sub-modelconsider a single sub-set of features, while repeats shuffle whichfeatures are considered allowing more chance to find important interactions.More repeats can lead to higher accuracy.The cost of this option is proportional to the repeat count. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting reduce_repeats_when_failure", + "output": "reduce repeats when failure refers to With allow_reduce_features_when_failure, this controls how many repeats of sub-modelsused for feature selection. A single repeat only has each sub-modelconsider a single sub-set of features, while repeats shuffle whichfeatures are considered allowing more chance to find important interactions.More repeats can lead to higher accuracy.The cost of this option is proportional to the repeat count. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting reduce_repeats_when_failure", + "output": "reduce repeats when failure refers to Number of repeats for models used for feature selection during failure recovery.: With allow_reduce_features_when_failure, this controls how many repeats of sub-modelsused for feature selection. A single repeat only has each sub-modelconsider a single sub-set of features, while repeats shuffle whichfeatures are considered allowing more chance to find important interactions.More repeats can lead to higher accuracy.The cost of this option is proportional to the repeat count. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "fraction_anchor_reduce_features_when_failure", + "output": "fraction anchor reduce features when failure refers to With allow_reduce_features_when_failure, this controls the fraction of featurestreated as an anchor that are fixed for all sub-models.Each repeat gets new anchors.For tuning and evolution, the probability dependsupon any prior importance (if present) from other individuals,while final model uses uniform probability for anchor features. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "fraction_anchor_reduce_features_when_failure", + "output": "fraction anchor reduce features when failure refers to Fraction of features treated as anchor for feature selection during failure recovery.: With allow_reduce_features_when_failure, this controls the fraction of featurestreated as an anchor that are fixed for all sub-models.Each repeat gets new anchors.For tuning and evolution, the probability dependsupon any prior importance (if present) from other individuals,while final model uses uniform probability for anchor features. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "fraction anchor reduce features when failure", + "output": "fraction anchor reduce features when failure refers to Fraction of features treated as anchor for feature selection during failure recovery.: With allow_reduce_features_when_failure, this controls the fraction of featurestreated as an anchor that are fixed for all sub-models.Each repeat gets new anchors.For tuning and evolution, the probability dependsupon any prior importance (if present) from other individuals,while final model uses uniform probability for anchor features. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Fraction of features treated as anchor for feature selection during failure recovery.: ", + "output": "fraction anchor reduce features when failure refers to Fraction of features treated as anchor for feature selection during failure recovery.: With allow_reduce_features_when_failure, this controls the fraction of featurestreated as an anchor that are fixed for all sub-models.Each repeat gets new anchors.For tuning and evolution, the probability dependsupon any prior importance (if present) from other individuals,while final model uses uniform probability for anchor features. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting fraction_anchor_reduce_features_when_failure", + "output": "fraction anchor reduce features when failure refers to With allow_reduce_features_when_failure, this controls the fraction of featurestreated as an anchor that are fixed for all sub-models.Each repeat gets new anchors.For tuning and evolution, the probability dependsupon any prior importance (if present) from other individuals,while final model uses uniform probability for anchor features. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting fraction_anchor_reduce_features_when_failure", + "output": "fraction anchor reduce features when failure refers to Fraction of features treated as anchor for feature selection during failure recovery.: With allow_reduce_features_when_failure, this controls the fraction of featurestreated as an anchor that are fixed for all sub-models.Each repeat gets new anchors.For tuning and evolution, the probability dependsupon any prior importance (if present) from other individuals,while final model uses uniform probability for anchor features. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "xgboost_reduce_on_errors_list", + "output": "xgboost reduce on errors list refers to Error strings from XGBoost that are used to trigger re-fit on reduced sub-models.See allow_reduce_features_when_failure. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "xgboost_reduce_on_errors_list", + "output": "xgboost reduce on errors list refers to Errors from XGBoost that trigger reduction of features: Error strings from XGBoost that are used to trigger re-fit on reduced sub-models.See allow_reduce_features_when_failure. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "xgboost reduce on errors list", + "output": "xgboost reduce on errors list refers to Errors from XGBoost that trigger reduction of features: Error strings from XGBoost that are used to trigger re-fit on reduced sub-models.See allow_reduce_features_when_failure. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Errors from XGBoost that trigger reduction of features: ", + "output": "xgboost reduce on errors list refers to Errors from XGBoost that trigger reduction of features: Error strings from XGBoost that are used to trigger re-fit on reduced sub-models.See allow_reduce_features_when_failure. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting xgboost_reduce_on_errors_list", + "output": "xgboost reduce on errors list refers to Error strings from XGBoost that are used to trigger re-fit on reduced sub-models.See allow_reduce_features_when_failure. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting xgboost_reduce_on_errors_list", + "output": "xgboost reduce on errors list refers to Errors from XGBoost that trigger reduction of features: Error strings from XGBoost that are used to trigger re-fit on reduced sub-models.See allow_reduce_features_when_failure. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "lightgbm_reduce_on_errors_list", + "output": "lightgbm reduce on errors list refers to Error strings from LightGBM that are used to trigger re-fit on reduced sub-models.See allow_reduce_features_when_failure. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "lightgbm_reduce_on_errors_list", + "output": "lightgbm reduce on errors list refers to Errors from LightGBM that trigger reduction of features: Error strings from LightGBM that are used to trigger re-fit on reduced sub-models.See allow_reduce_features_when_failure. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "lightgbm reduce on errors list", + "output": "lightgbm reduce on errors list refers to Errors from LightGBM that trigger reduction of features: Error strings from LightGBM that are used to trigger re-fit on reduced sub-models.See allow_reduce_features_when_failure. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Errors from LightGBM that trigger reduction of features: ", + "output": "lightgbm reduce on errors list refers to Errors from LightGBM that trigger reduction of features: Error strings from LightGBM that are used to trigger re-fit on reduced sub-models.See allow_reduce_features_when_failure. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting lightgbm_reduce_on_errors_list", + "output": "lightgbm reduce on errors list refers to Error strings from LightGBM that are used to trigger re-fit on reduced sub-models.See allow_reduce_features_when_failure. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting lightgbm_reduce_on_errors_list", + "output": "lightgbm reduce on errors list refers to Errors from LightGBM that trigger reduction of features: Error strings from LightGBM that are used to trigger re-fit on reduced sub-models.See allow_reduce_features_when_failure. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "lightgbm_use_gpu", + "output": "lightgbm use gpu refers to LightGBM does not significantly benefit from GPUs, unlike other tools like XGBoost or Bert/Image Models. Each experiment will try to use all GPUs, and on systems with many cores and GPUs, this leads to many experiments running at once, all trying to lock the GPU for use, leaving the cores heavily under-utilized. So by default, DAI always uses CPU for LightGBM, unless 'on' is specified." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "lightgbm_use_gpu", + "output": "lightgbm use gpu refers to Whether to use GPUs for LightGBM: LightGBM does not significantly benefit from GPUs, unlike other tools like XGBoost or Bert/Image Models. Each experiment will try to use all GPUs, and on systems with many cores and GPUs, this leads to many experiments running at once, all trying to lock the GPU for use, leaving the cores heavily under-utilized. So by default, DAI always uses CPU for LightGBM, unless 'on' is specified." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "lightgbm use gpu", + "output": "lightgbm use gpu refers to Whether to use GPUs for LightGBM: LightGBM does not significantly benefit from GPUs, unlike other tools like XGBoost or Bert/Image Models. Each experiment will try to use all GPUs, and on systems with many cores and GPUs, this leads to many experiments running at once, all trying to lock the GPU for use, leaving the cores heavily under-utilized. So by default, DAI always uses CPU for LightGBM, unless 'on' is specified." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Whether to use GPUs for LightGBM: ", + "output": "lightgbm use gpu refers to Whether to use GPUs for LightGBM: LightGBM does not significantly benefit from GPUs, unlike other tools like XGBoost or Bert/Image Models. Each experiment will try to use all GPUs, and on systems with many cores and GPUs, this leads to many experiments running at once, all trying to lock the GPU for use, leaving the cores heavily under-utilized. So by default, DAI always uses CPU for LightGBM, unless 'on' is specified." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting lightgbm_use_gpu", + "output": "lightgbm use gpu refers to LightGBM does not significantly benefit from GPUs, unlike other tools like XGBoost or Bert/Image Models. Each experiment will try to use all GPUs, and on systems with many cores and GPUs, this leads to many experiments running at once, all trying to lock the GPU for use, leaving the cores heavily under-utilized. So by default, DAI always uses CPU for LightGBM, unless 'on' is specified." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting lightgbm_use_gpu", + "output": "lightgbm use gpu refers to Whether to use GPUs for LightGBM: LightGBM does not significantly benefit from GPUs, unlike other tools like XGBoost or Bert/Image Models. Each experiment will try to use all GPUs, and on systems with many cores and GPUs, this leads to many experiments running at once, all trying to lock the GPU for use, leaving the cores heavily under-utilized. So by default, DAI always uses CPU for LightGBM, unless 'on' is specified." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "kaggle_username", + "output": "kaggle username refers to Kaggle username for automatic submission and scoring of test set predictions.See https://github.com/Kaggle/kaggle-api#api-credentials for details on how to obtain Kaggle API credentials\", " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "kaggle_username", + "output": "kaggle username refers to Kaggle username: Kaggle username for automatic submission and scoring of test set predictions.See https://github.com/Kaggle/kaggle-api#api-credentials for details on how to obtain Kaggle API credentials\", " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "kaggle username", + "output": "kaggle username refers to Kaggle username: Kaggle username for automatic submission and scoring of test set predictions.See https://github.com/Kaggle/kaggle-api#api-credentials for details on how to obtain Kaggle API credentials\", " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Kaggle username: ", + "output": "kaggle username refers to Kaggle username: Kaggle username for automatic submission and scoring of test set predictions.See https://github.com/Kaggle/kaggle-api#api-credentials for details on how to obtain Kaggle API credentials\", " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting kaggle_username", + "output": "kaggle username refers to Kaggle username for automatic submission and scoring of test set predictions.See https://github.com/Kaggle/kaggle-api#api-credentials for details on how to obtain Kaggle API credentials\", " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting kaggle_username", + "output": "kaggle username refers to Kaggle username: Kaggle username for automatic submission and scoring of test set predictions.See https://github.com/Kaggle/kaggle-api#api-credentials for details on how to obtain Kaggle API credentials\", " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "kaggle_key", + "output": "kaggle key refers to Kaggle key for automatic submission and scoring of test set predictions.See https://github.com/Kaggle/kaggle-api#api-credentials for details on how to obtain Kaggle API credentials\", " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "kaggle_key", + "output": "kaggle key refers to Kaggle key: Kaggle key for automatic submission and scoring of test set predictions.See https://github.com/Kaggle/kaggle-api#api-credentials for details on how to obtain Kaggle API credentials\", " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "kaggle key", + "output": "kaggle key refers to Kaggle key: Kaggle key for automatic submission and scoring of test set predictions.See https://github.com/Kaggle/kaggle-api#api-credentials for details on how to obtain Kaggle API credentials\", " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Kaggle key: ", + "output": "kaggle key refers to Kaggle key: Kaggle key for automatic submission and scoring of test set predictions.See https://github.com/Kaggle/kaggle-api#api-credentials for details on how to obtain Kaggle API credentials\", " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting kaggle_key", + "output": "kaggle key refers to Kaggle key for automatic submission and scoring of test set predictions.See https://github.com/Kaggle/kaggle-api#api-credentials for details on how to obtain Kaggle API credentials\", " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting kaggle_key", + "output": "kaggle key refers to Kaggle key: Kaggle key for automatic submission and scoring of test set predictions.See https://github.com/Kaggle/kaggle-api#api-credentials for details on how to obtain Kaggle API credentials\", " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "kaggle_timeout", + "output": "kaggle timeout refers to Max. number of seconds to wait for Kaggle API call to return scores for given predictions" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "kaggle_timeout", + "output": "kaggle timeout refers to Kaggle submission timeout in seconds: Max. number of seconds to wait for Kaggle API call to return scores for given predictions" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "kaggle timeout", + "output": "kaggle timeout refers to Kaggle submission timeout in seconds: Max. number of seconds to wait for Kaggle API call to return scores for given predictions" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Kaggle submission timeout in seconds: ", + "output": "kaggle timeout refers to Kaggle submission timeout in seconds: Max. number of seconds to wait for Kaggle API call to return scores for given predictions" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting kaggle_timeout", + "output": "kaggle timeout refers to Max. number of seconds to wait for Kaggle API call to return scores for given predictions" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting kaggle_timeout", + "output": "kaggle timeout refers to Kaggle submission timeout in seconds: Max. number of seconds to wait for Kaggle API call to return scores for given predictions" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "kaggle_keep_submission", + "output": "kaggle keep submission refers to Whether to keep Kaggle submission file in experiment directory: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "kaggle_keep_submission", + "output": "kaggle keep submission refers to Whether to keep Kaggle submission file in experiment directory: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "kaggle keep submission", + "output": "kaggle keep submission refers to Whether to keep Kaggle submission file in experiment directory: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Whether to keep Kaggle submission file in experiment directory: ", + "output": "kaggle keep submission refers to Whether to keep Kaggle submission file in experiment directory: " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting kaggle_keep_submission", + "output": "kaggle keep submission refers to Whether to keep Kaggle submission file in experiment directory: " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting kaggle_keep_submission", + "output": "kaggle keep submission refers to Whether to keep Kaggle submission file in experiment directory: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "kaggle_competitions", + "output": "kaggle competitions refers to If provided, can extend the list to arbitrary and potentially future Kaggle competitions to make submissions for. Only used if kaggle_key and kaggle_username are provided. Provide a quoted comma-separated list of tuples (target column name, number of test rows, competition, metric) like this: kaggle_competitions='(\"target\", 200000, \"santander-customer-transaction-prediction\", \"AUC\"), (\"TARGET\", 75818, \"santander-customer-satisfaction\", \"AUC\")' " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "kaggle_competitions", + "output": "kaggle competitions refers to Custom Kaggle competitions to make automatic test set submissions for.: If provided, can extend the list to arbitrary and potentially future Kaggle competitions to make submissions for. Only used if kaggle_key and kaggle_username are provided. Provide a quoted comma-separated list of tuples (target column name, number of test rows, competition, metric) like this: kaggle_competitions='(\"target\", 200000, \"santander-customer-transaction-prediction\", \"AUC\"), (\"TARGET\", 75818, \"santander-customer-satisfaction\", \"AUC\")' " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "kaggle competitions", + "output": "kaggle competitions refers to Custom Kaggle competitions to make automatic test set submissions for.: If provided, can extend the list to arbitrary and potentially future Kaggle competitions to make submissions for. Only used if kaggle_key and kaggle_username are provided. Provide a quoted comma-separated list of tuples (target column name, number of test rows, competition, metric) like this: kaggle_competitions='(\"target\", 200000, \"santander-customer-transaction-prediction\", \"AUC\"), (\"TARGET\", 75818, \"santander-customer-satisfaction\", \"AUC\")' " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Custom Kaggle competitions to make automatic test set submissions for.: ", + "output": "kaggle competitions refers to Custom Kaggle competitions to make automatic test set submissions for.: If provided, can extend the list to arbitrary and potentially future Kaggle competitions to make submissions for. Only used if kaggle_key and kaggle_username are provided. Provide a quoted comma-separated list of tuples (target column name, number of test rows, competition, metric) like this: kaggle_competitions='(\"target\", 200000, \"santander-customer-transaction-prediction\", \"AUC\"), (\"TARGET\", 75818, \"santander-customer-satisfaction\", \"AUC\")' " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting kaggle_competitions", + "output": "kaggle competitions refers to If provided, can extend the list to arbitrary and potentially future Kaggle competitions to make submissions for. Only used if kaggle_key and kaggle_username are provided. Provide a quoted comma-separated list of tuples (target column name, number of test rows, competition, metric) like this: kaggle_competitions='(\"target\", 200000, \"santander-customer-transaction-prediction\", \"AUC\"), (\"TARGET\", 75818, \"santander-customer-satisfaction\", \"AUC\")' " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting kaggle_competitions", + "output": "kaggle competitions refers to Custom Kaggle competitions to make automatic test set submissions for.: If provided, can extend the list to arbitrary and potentially future Kaggle competitions to make submissions for. Only used if kaggle_key and kaggle_username are provided. Provide a quoted comma-separated list of tuples (target column name, number of test rows, competition, metric) like this: kaggle_competitions='(\"target\", 200000, \"santander-customer-transaction-prediction\", \"AUC\"), (\"TARGET\", 75818, \"santander-customer-satisfaction\", \"AUC\")' " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "ping_period", + "output": "ping period refers to Period (in seconds) of ping by Driverless AI server to each experiment (in order to get logger info like disk space and memory usage). 0 means don't print anything." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "ping_period", + "output": "ping period refers to Period (in seconds) of ping by Driverless AI server to each experiment (in order to get logger info like disk space and memory usage). 0 means don't print anything." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "ping period", + "output": "ping period refers to Period (in seconds) of ping by Driverless AI server to each experiment (in order to get logger info like disk space and memory usage). 0 means don't print anything." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "ping period refers to Period (in seconds) of ping by Driverless AI server to each experiment (in order to get logger info like disk space and memory usage). 0 means don't print anything." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting ping_period", + "output": "ping period refers to Period (in seconds) of ping by Driverless AI server to each experiment (in order to get logger info like disk space and memory usage). 0 means don't print anything." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting ping_period", + "output": "ping period refers to Period (in seconds) of ping by Driverless AI server to each experiment (in order to get logger info like disk space and memory usage). 0 means don't print anything." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "ping_autodl", + "output": "ping autodl refers to Whether to enable ping of system status during DAI experiments." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "ping_autodl", + "output": "ping autodl refers to Whether to enable ping of system status during DAI experiments.: Whether to enable ping of system status during DAI experiments." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "ping autodl", + "output": "ping autodl refers to Whether to enable ping of system status during DAI experiments.: Whether to enable ping of system status during DAI experiments." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Whether to enable ping of system status during DAI experiments.: ", + "output": "ping autodl refers to Whether to enable ping of system status during DAI experiments.: Whether to enable ping of system status during DAI experiments." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting ping_autodl", + "output": "ping autodl refers to Whether to enable ping of system status during DAI experiments." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting ping_autodl", + "output": "ping autodl refers to Whether to enable ping of system status during DAI experiments.: Whether to enable ping of system status during DAI experiments." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "disk_limit_gb", + "output": "disk limit gb refers to Minimum amount of disk space in GB needed to run experiments. Experiments will fail if this limit is crossed. This limit exists because Driverless AI needs to generate data for model training feature engineering, documentation and other such processes." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "disk_limit_gb", + "output": "disk limit gb refers to Minimum amount of disk space in GB needed to run experiments. Experiments will fail if this limit is crossed. This limit exists because Driverless AI needs to generate data for model training feature engineering, documentation and other such processes." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "disk limit gb", + "output": "disk limit gb refers to Minimum amount of disk space in GB needed to run experiments. Experiments will fail if this limit is crossed. This limit exists because Driverless AI needs to generate data for model training feature engineering, documentation and other such processes." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "disk limit gb refers to Minimum amount of disk space in GB needed to run experiments. Experiments will fail if this limit is crossed. This limit exists because Driverless AI needs to generate data for model training feature engineering, documentation and other such processes." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting disk_limit_gb", + "output": "disk limit gb refers to Minimum amount of disk space in GB needed to run experiments. Experiments will fail if this limit is crossed. This limit exists because Driverless AI needs to generate data for model training feature engineering, documentation and other such processes." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting disk_limit_gb", + "output": "disk limit gb refers to Minimum amount of disk space in GB needed to run experiments. Experiments will fail if this limit is crossed. This limit exists because Driverless AI needs to generate data for model training feature engineering, documentation and other such processes." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "stall_disk_limit_gb", + "output": "stall disk limit gb refers to Minimum amount of disk space in GB needed to before stall forking of new processes during an experiment." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "stall_disk_limit_gb", + "output": "stall disk limit gb refers to Minimum amount of disk space in GB needed to before stall forking of new processes during an experiment." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "stall disk limit gb", + "output": "stall disk limit gb refers to Minimum amount of disk space in GB needed to before stall forking of new processes during an experiment." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "stall disk limit gb refers to Minimum amount of disk space in GB needed to before stall forking of new processes during an experiment." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting stall_disk_limit_gb", + "output": "stall disk limit gb refers to Minimum amount of disk space in GB needed to before stall forking of new processes during an experiment." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting stall_disk_limit_gb", + "output": "stall disk limit gb refers to Minimum amount of disk space in GB needed to before stall forking of new processes during an experiment." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "memory_limit_gb", + "output": "memory limit gb refers to Minimum amount of system memory in GB needed to start experiments. Similarly with disk space, a certain amount of system memory is needed to run some basic operations." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "memory_limit_gb", + "output": "memory limit gb refers to Minimum amount of system memory in GB needed to start experiments. Similarly with disk space, a certain amount of system memory is needed to run some basic operations." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "memory limit gb", + "output": "memory limit gb refers to Minimum amount of system memory in GB needed to start experiments. Similarly with disk space, a certain amount of system memory is needed to run some basic operations." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "memory limit gb refers to Minimum amount of system memory in GB needed to start experiments. Similarly with disk space, a certain amount of system memory is needed to run some basic operations." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting memory_limit_gb", + "output": "memory limit gb refers to Minimum amount of system memory in GB needed to start experiments. Similarly with disk space, a certain amount of system memory is needed to run some basic operations." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting memory_limit_gb", + "output": "memory limit gb refers to Minimum amount of system memory in GB needed to start experiments. Similarly with disk space, a certain amount of system memory is needed to run some basic operations." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "min_num_rows", + "output": "min num rows refers to Minimum number of rows needed to run experiments (values lower than 100 might not work).A minimum threshold is set to ensure there is enough data to create a statisticallyreliable model and avoid other small-data related failures. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "min_num_rows", + "output": "min num rows refers to Min. number of rows needed to run experiment: Minimum number of rows needed to run experiments (values lower than 100 might not work).A minimum threshold is set to ensure there is enough data to create a statisticallyreliable model and avoid other small-data related failures. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "min num rows", + "output": "min num rows refers to Min. number of rows needed to run experiment: Minimum number of rows needed to run experiments (values lower than 100 might not work).A minimum threshold is set to ensure there is enough data to create a statisticallyreliable model and avoid other small-data related failures. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Min. number of rows needed to run experiment: ", + "output": "min num rows refers to Min. number of rows needed to run experiment: Minimum number of rows needed to run experiments (values lower than 100 might not work).A minimum threshold is set to ensure there is enough data to create a statisticallyreliable model and avoid other small-data related failures. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting min_num_rows", + "output": "min num rows refers to Minimum number of rows needed to run experiments (values lower than 100 might not work).A minimum threshold is set to ensure there is enough data to create a statisticallyreliable model and avoid other small-data related failures. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting min_num_rows", + "output": "min num rows refers to Min. number of rows needed to run experiment: Minimum number of rows needed to run experiments (values lower than 100 might not work).A minimum threshold is set to ensure there is enough data to create a statisticallyreliable model and avoid other small-data related failures. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "min_rows_per_class", + "output": "min rows per class refers to Minimum required number of rows (in the training data) for each class label for classification problems." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "min_rows_per_class", + "output": "min rows per class refers to Minimum required number of rows (in the training data) for each class label for classification problems." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "min rows per class", + "output": "min rows per class refers to Minimum required number of rows (in the training data) for each class label for classification problems." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "min rows per class refers to Minimum required number of rows (in the training data) for each class label for classification problems." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting min_rows_per_class", + "output": "min rows per class refers to Minimum required number of rows (in the training data) for each class label for classification problems." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting min_rows_per_class", + "output": "min rows per class refers to Minimum required number of rows (in the training data) for each class label for classification problems." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "min_rows_per_split", + "output": "min rows per split refers to Minimum required number of rows for each split when generating validation samples." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "min_rows_per_split", + "output": "min rows per split refers to Minimum required number of rows for each split when generating validation samples." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "min rows per split", + "output": "min rows per split refers to Minimum required number of rows for each split when generating validation samples." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "min rows per split refers to Minimum required number of rows for each split when generating validation samples." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting min_rows_per_split", + "output": "min rows per split refers to Minimum required number of rows for each split when generating validation samples." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting min_rows_per_split", + "output": "min rows per split refers to Minimum required number of rows for each split when generating validation samples." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "reproducibility_level", + "output": "reproducibility level refers to Level of reproducibility desired (for same data and same inputs).Only active if 'reproducible' mode is enabled (GUI button enabled or a seed is set from the client API).Supported levels are: reproducibility_level = 1 for same experiment results as long as same O/S, same CPU(s) and same GPU(s) reproducibility_level = 2 for same experiment results as long as same O/S, same CPU architecture and same GPU architecture reproducibility_level = 3 for same experiment results as long as same O/S, same CPU architecture, not using GPUs reproducibility_level = 4 for same experiment results as long as same O/S, (best effort) " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "reproducibility_level", + "output": "reproducibility level refers to Reproducibility Level: Level of reproducibility desired (for same data and same inputs).Only active if 'reproducible' mode is enabled (GUI button enabled or a seed is set from the client API).Supported levels are: reproducibility_level = 1 for same experiment results as long as same O/S, same CPU(s) and same GPU(s) reproducibility_level = 2 for same experiment results as long as same O/S, same CPU architecture and same GPU architecture reproducibility_level = 3 for same experiment results as long as same O/S, same CPU architecture, not using GPUs reproducibility_level = 4 for same experiment results as long as same O/S, (best effort) " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "reproducibility level", + "output": "reproducibility level refers to Reproducibility Level: Level of reproducibility desired (for same data and same inputs).Only active if 'reproducible' mode is enabled (GUI button enabled or a seed is set from the client API).Supported levels are: reproducibility_level = 1 for same experiment results as long as same O/S, same CPU(s) and same GPU(s) reproducibility_level = 2 for same experiment results as long as same O/S, same CPU architecture and same GPU architecture reproducibility_level = 3 for same experiment results as long as same O/S, same CPU architecture, not using GPUs reproducibility_level = 4 for same experiment results as long as same O/S, (best effort) " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Reproducibility Level: ", + "output": "reproducibility level refers to Reproducibility Level: Level of reproducibility desired (for same data and same inputs).Only active if 'reproducible' mode is enabled (GUI button enabled or a seed is set from the client API).Supported levels are: reproducibility_level = 1 for same experiment results as long as same O/S, same CPU(s) and same GPU(s) reproducibility_level = 2 for same experiment results as long as same O/S, same CPU architecture and same GPU architecture reproducibility_level = 3 for same experiment results as long as same O/S, same CPU architecture, not using GPUs reproducibility_level = 4 for same experiment results as long as same O/S, (best effort) " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting reproducibility_level", + "output": "reproducibility level refers to Level of reproducibility desired (for same data and same inputs).Only active if 'reproducible' mode is enabled (GUI button enabled or a seed is set from the client API).Supported levels are: reproducibility_level = 1 for same experiment results as long as same O/S, same CPU(s) and same GPU(s) reproducibility_level = 2 for same experiment results as long as same O/S, same CPU architecture and same GPU architecture reproducibility_level = 3 for same experiment results as long as same O/S, same CPU architecture, not using GPUs reproducibility_level = 4 for same experiment results as long as same O/S, (best effort) " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting reproducibility_level", + "output": "reproducibility level refers to Reproducibility Level: Level of reproducibility desired (for same data and same inputs).Only active if 'reproducible' mode is enabled (GUI button enabled or a seed is set from the client API).Supported levels are: reproducibility_level = 1 for same experiment results as long as same O/S, same CPU(s) and same GPU(s) reproducibility_level = 2 for same experiment results as long as same O/S, same CPU architecture and same GPU architecture reproducibility_level = 3 for same experiment results as long as same O/S, same CPU architecture, not using GPUs reproducibility_level = 4 for same experiment results as long as same O/S, (best effort) " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "seed", + "output": "seed refers to Seed for random number generator to make experiments reproducible, to a certain reproducibility level (see above).Only active if 'reproducible' mode is enabled (GUI button enabled or a seed is set from the client API). " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "seed", + "output": "seed refers to Random seed: Seed for random number generator to make experiments reproducible, to a certain reproducibility level (see above).Only active if 'reproducible' mode is enabled (GUI button enabled or a seed is set from the client API). " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "seed", + "output": "seed refers to Random seed: Seed for random number generator to make experiments reproducible, to a certain reproducibility level (see above).Only active if 'reproducible' mode is enabled (GUI button enabled or a seed is set from the client API). " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Random seed: ", + "output": "seed refers to Random seed: Seed for random number generator to make experiments reproducible, to a certain reproducibility level (see above).Only active if 'reproducible' mode is enabled (GUI button enabled or a seed is set from the client API). " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting seed", + "output": "seed refers to Seed for random number generator to make experiments reproducible, to a certain reproducibility level (see above).Only active if 'reproducible' mode is enabled (GUI button enabled or a seed is set from the client API). " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting seed", + "output": "seed refers to Random seed: Seed for random number generator to make experiments reproducible, to a certain reproducibility level (see above).Only active if 'reproducible' mode is enabled (GUI button enabled or a seed is set from the client API). " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "missing_values", + "output": "missing values refers to The list of values that should be interpreted as missing values during data import. This applies to both numeric and string columns. Note that the dataset must be reloaded after applying changes to this config via the expert settings. Also note that 'nan' is always interpreted as a missing value for numeric columns." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "missing_values", + "output": "missing values refers to The list of values that should be interpreted as missing values during data import. This applies to both numeric and string columns. Note that the dataset must be reloaded after applying changes to this config via the expert settings. Also note that 'nan' is always interpreted as a missing value for numeric columns." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "missing values", + "output": "missing values refers to The list of values that should be interpreted as missing values during data import. This applies to both numeric and string columns. Note that the dataset must be reloaded after applying changes to this config via the expert settings. Also note that 'nan' is always interpreted as a missing value for numeric columns." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "missing values refers to The list of values that should be interpreted as missing values during data import. This applies to both numeric and string columns. Note that the dataset must be reloaded after applying changes to this config via the expert settings. Also note that 'nan' is always interpreted as a missing value for numeric columns." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting missing_values", + "output": "missing values refers to The list of values that should be interpreted as missing values during data import. This applies to both numeric and string columns. Note that the dataset must be reloaded after applying changes to this config via the expert settings. Also note that 'nan' is always interpreted as a missing value for numeric columns." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting missing_values", + "output": "missing values refers to The list of values that should be interpreted as missing values during data import. This applies to both numeric and string columns. Note that the dataset must be reloaded after applying changes to this config via the expert settings. Also note that 'nan' is always interpreted as a missing value for numeric columns." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "glm_nan_impute_training_data", + "output": "glm nan impute training data refers to Whether to impute (to mean) for GLM on training data." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "glm_nan_impute_training_data", + "output": "glm nan impute training data refers to Whether to impute (to mean) for GLM on training data." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "glm nan impute training data", + "output": "glm nan impute training data refers to Whether to impute (to mean) for GLM on training data." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "glm nan impute training data refers to Whether to impute (to mean) for GLM on training data." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting glm_nan_impute_training_data", + "output": "glm nan impute training data refers to Whether to impute (to mean) for GLM on training data." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting glm_nan_impute_training_data", + "output": "glm nan impute training data refers to Whether to impute (to mean) for GLM on training data." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "glm_nan_impute_validation_data", + "output": "glm nan impute validation data refers to Whether to impute (to mean) for GLM on validation data." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "glm_nan_impute_validation_data", + "output": "glm nan impute validation data refers to Whether to impute (to mean) for GLM on validation data." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "glm nan impute validation data", + "output": "glm nan impute validation data refers to Whether to impute (to mean) for GLM on validation data." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "glm nan impute validation data refers to Whether to impute (to mean) for GLM on validation data." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting glm_nan_impute_validation_data", + "output": "glm nan impute validation data refers to Whether to impute (to mean) for GLM on validation data." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting glm_nan_impute_validation_data", + "output": "glm nan impute validation data refers to Whether to impute (to mean) for GLM on validation data." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "glm_nan_impute_prediction_data", + "output": "glm nan impute prediction data refers to Whether to impute (to mean) for GLM on prediction data (required for consistency with MOJO)." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "glm_nan_impute_prediction_data", + "output": "glm nan impute prediction data refers to Whether to impute (to mean) for GLM on prediction data (required for consistency with MOJO)." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "glm nan impute prediction data", + "output": "glm nan impute prediction data refers to Whether to impute (to mean) for GLM on prediction data (required for consistency with MOJO)." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "glm nan impute prediction data refers to Whether to impute (to mean) for GLM on prediction data (required for consistency with MOJO)." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting glm_nan_impute_prediction_data", + "output": "glm nan impute prediction data refers to Whether to impute (to mean) for GLM on prediction data (required for consistency with MOJO)." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting glm_nan_impute_prediction_data", + "output": "glm nan impute prediction data refers to Whether to impute (to mean) for GLM on prediction data (required for consistency with MOJO)." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "tf_nan_impute_value", + "output": "tf nan impute value refers to For tensorflow, what numerical value to give to missing values, where numeric values are standardized. So 0 is center of distribution, and if Normal distribution then +-5 is 5 standard deviations away from the center. In many cases, an out of bounds value is a good way to represent missings, but in some cases the mean (0) may be better." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "tf_nan_impute_value", + "output": "tf nan impute value refers to For tensorflow, what numerical value to give to missing values, where numeric values are standardized. So 0 is center of distribution, and if Normal distribution then +-5 is 5 standard deviations away from the center. In many cases, an out of bounds value is a good way to represent missings, but in some cases the mean (0) may be better." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "tf nan impute value", + "output": "tf nan impute value refers to For tensorflow, what numerical value to give to missing values, where numeric values are standardized. So 0 is center of distribution, and if Normal distribution then +-5 is 5 standard deviations away from the center. In many cases, an out of bounds value is a good way to represent missings, but in some cases the mean (0) may be better." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "tf nan impute value refers to For tensorflow, what numerical value to give to missing values, where numeric values are standardized. So 0 is center of distribution, and if Normal distribution then +-5 is 5 standard deviations away from the center. In many cases, an out of bounds value is a good way to represent missings, but in some cases the mean (0) may be better." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting tf_nan_impute_value", + "output": "tf nan impute value refers to For tensorflow, what numerical value to give to missing values, where numeric values are standardized. So 0 is center of distribution, and if Normal distribution then +-5 is 5 standard deviations away from the center. In many cases, an out of bounds value is a good way to represent missings, but in some cases the mean (0) may be better." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting tf_nan_impute_value", + "output": "tf nan impute value refers to For tensorflow, what numerical value to give to missing values, where numeric values are standardized. So 0 is center of distribution, and if Normal distribution then +-5 is 5 standard deviations away from the center. In many cases, an out of bounds value is a good way to represent missings, but in some cases the mean (0) may be better." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "statistical_threshold_data_size_small", + "output": "statistical threshold data size small refers to Internal threshold for number of rows x number of columns to trigger certain statistical techniques (small data recipe like including one hot encoding for all model types, and smaller learning rate) to increase model accuracy" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "statistical_threshold_data_size_small", + "output": "statistical threshold data size small refers to Internal threshold for number of rows x number of columns to trigger certain statistical techniques (small data recipe like including one hot encoding for all model types, and smaller learning rate) to increase model accuracy" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "statistical threshold data size small", + "output": "statistical threshold data size small refers to Internal threshold for number of rows x number of columns to trigger certain statistical techniques (small data recipe like including one hot encoding for all model types, and smaller learning rate) to increase model accuracy" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "statistical threshold data size small refers to Internal threshold for number of rows x number of columns to trigger certain statistical techniques (small data recipe like including one hot encoding for all model types, and smaller learning rate) to increase model accuracy" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting statistical_threshold_data_size_small", + "output": "statistical threshold data size small refers to Internal threshold for number of rows x number of columns to trigger certain statistical techniques (small data recipe like including one hot encoding for all model types, and smaller learning rate) to increase model accuracy" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting statistical_threshold_data_size_small", + "output": "statistical threshold data size small refers to Internal threshold for number of rows x number of columns to trigger certain statistical techniques (small data recipe like including one hot encoding for all model types, and smaller learning rate) to increase model accuracy" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "statistical_threshold_data_size_large", + "output": "statistical threshold data size large refers to Internal threshold for number of rows x number of columns to trigger certain statistical techniques (fewer genes created, removal of high max_depth for tree models, etc.) that can speed up modeling. Also controls maximum rows used in training final model, by sampling statistical_threshold_data_size_large / columns number of rows" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "statistical_threshold_data_size_large", + "output": "statistical threshold data size large refers to Internal threshold for number of rows x number of columns to trigger certain statistical techniques (fewer genes created, removal of high max_depth for tree models, etc.) that can speed up modeling. Also controls maximum rows used in training final model, by sampling statistical_threshold_data_size_large / columns number of rows" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "statistical threshold data size large", + "output": "statistical threshold data size large refers to Internal threshold for number of rows x number of columns to trigger certain statistical techniques (fewer genes created, removal of high max_depth for tree models, etc.) that can speed up modeling. Also controls maximum rows used in training final model, by sampling statistical_threshold_data_size_large / columns number of rows" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "statistical threshold data size large refers to Internal threshold for number of rows x number of columns to trigger certain statistical techniques (fewer genes created, removal of high max_depth for tree models, etc.) that can speed up modeling. Also controls maximum rows used in training final model, by sampling statistical_threshold_data_size_large / columns number of rows" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting statistical_threshold_data_size_large", + "output": "statistical threshold data size large refers to Internal threshold for number of rows x number of columns to trigger certain statistical techniques (fewer genes created, removal of high max_depth for tree models, etc.) that can speed up modeling. Also controls maximum rows used in training final model, by sampling statistical_threshold_data_size_large / columns number of rows" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting statistical_threshold_data_size_large", + "output": "statistical threshold data size large refers to Internal threshold for number of rows x number of columns to trigger certain statistical techniques (fewer genes created, removal of high max_depth for tree models, etc.) that can speed up modeling. Also controls maximum rows used in training final model, by sampling statistical_threshold_data_size_large / columns number of rows" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "aux_threshold_data_size_large", + "output": "aux threshold data size large refers to Internal threshold for number of rows x number of columns to trigger sampling for auxiliary data uses, like imbalanced data set detection and bootstrap scoring sample size and iterations" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "aux_threshold_data_size_large", + "output": "aux threshold data size large refers to Internal threshold for number of rows x number of columns to trigger sampling for auxiliary data uses, like imbalanced data set detection and bootstrap scoring sample size and iterations" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "aux threshold data size large", + "output": "aux threshold data size large refers to Internal threshold for number of rows x number of columns to trigger sampling for auxiliary data uses, like imbalanced data set detection and bootstrap scoring sample size and iterations" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "aux threshold data size large refers to Internal threshold for number of rows x number of columns to trigger sampling for auxiliary data uses, like imbalanced data set detection and bootstrap scoring sample size and iterations" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting aux_threshold_data_size_large", + "output": "aux threshold data size large refers to Internal threshold for number of rows x number of columns to trigger sampling for auxiliary data uses, like imbalanced data set detection and bootstrap scoring sample size and iterations" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting aux_threshold_data_size_large", + "output": "aux threshold data size large refers to Internal threshold for number of rows x number of columns to trigger sampling for auxiliary data uses, like imbalanced data set detection and bootstrap scoring sample size and iterations" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "set_method_sampling_row_limit", + "output": "set method sampling row limit refers to Internal threshold for set-based method for sampling without replacement. Can be 10x faster than np_random_choice internal optimized method, and up to 30x faster than np.random.choice to sample 250k rows from 1B rows etc." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "set_method_sampling_row_limit", + "output": "set method sampling row limit refers to Internal threshold for set-based method for sampling without replacement. Can be 10x faster than np_random_choice internal optimized method, and up to 30x faster than np.random.choice to sample 250k rows from 1B rows etc." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "set method sampling row limit", + "output": "set method sampling row limit refers to Internal threshold for set-based method for sampling without replacement. Can be 10x faster than np_random_choice internal optimized method, and up to 30x faster than np.random.choice to sample 250k rows from 1B rows etc." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "set method sampling row limit refers to Internal threshold for set-based method for sampling without replacement. Can be 10x faster than np_random_choice internal optimized method, and up to 30x faster than np.random.choice to sample 250k rows from 1B rows etc." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting set_method_sampling_row_limit", + "output": "set method sampling row limit refers to Internal threshold for set-based method for sampling without replacement. Can be 10x faster than np_random_choice internal optimized method, and up to 30x faster than np.random.choice to sample 250k rows from 1B rows etc." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting set_method_sampling_row_limit", + "output": "set method sampling row limit refers to Internal threshold for set-based method for sampling without replacement. Can be 10x faster than np_random_choice internal optimized method, and up to 30x faster than np.random.choice to sample 250k rows from 1B rows etc." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "performance_threshold_data_size_small", + "output": "performance threshold data size small refers to Internal threshold for number of rows x number of columns to trigger certain changes in performance (fewer threads if beyond large value) to help avoid OOM or unnecessary slowdowns (fewer threads if lower than small value) to avoid excess forking of tasks" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "performance_threshold_data_size_small", + "output": "performance threshold data size small refers to Internal threshold for number of rows x number of columns to trigger certain changes in performance (fewer threads if beyond large value) to help avoid OOM or unnecessary slowdowns (fewer threads if lower than small value) to avoid excess forking of tasks" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "performance threshold data size small", + "output": "performance threshold data size small refers to Internal threshold for number of rows x number of columns to trigger certain changes in performance (fewer threads if beyond large value) to help avoid OOM or unnecessary slowdowns (fewer threads if lower than small value) to avoid excess forking of tasks" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "performance threshold data size small refers to Internal threshold for number of rows x number of columns to trigger certain changes in performance (fewer threads if beyond large value) to help avoid OOM or unnecessary slowdowns (fewer threads if lower than small value) to avoid excess forking of tasks" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting performance_threshold_data_size_small", + "output": "performance threshold data size small refers to Internal threshold for number of rows x number of columns to trigger certain changes in performance (fewer threads if beyond large value) to help avoid OOM or unnecessary slowdowns (fewer threads if lower than small value) to avoid excess forking of tasks" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting performance_threshold_data_size_small", + "output": "performance threshold data size small refers to Internal threshold for number of rows x number of columns to trigger certain changes in performance (fewer threads if beyond large value) to help avoid OOM or unnecessary slowdowns (fewer threads if lower than small value) to avoid excess forking of tasks" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "performance_threshold_data_size_large", + "output": "performance threshold data size large refers to Internal threshold for number of rows x number of columns to trigger certain changes in performance (fewer threads if beyond large value) to help avoid OOM or unnecessary slowdowns (fewer threads if lower than small value) to avoid excess forking of tasks" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "performance_threshold_data_size_large", + "output": "performance threshold data size large refers to Internal threshold for number of rows x number of columns to trigger certain changes in performance (fewer threads if beyond large value) to help avoid OOM or unnecessary slowdowns (fewer threads if lower than small value) to avoid excess forking of tasks" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "performance threshold data size large", + "output": "performance threshold data size large refers to Internal threshold for number of rows x number of columns to trigger certain changes in performance (fewer threads if beyond large value) to help avoid OOM or unnecessary slowdowns (fewer threads if lower than small value) to avoid excess forking of tasks" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "performance threshold data size large refers to Internal threshold for number of rows x number of columns to trigger certain changes in performance (fewer threads if beyond large value) to help avoid OOM or unnecessary slowdowns (fewer threads if lower than small value) to avoid excess forking of tasks" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting performance_threshold_data_size_large", + "output": "performance threshold data size large refers to Internal threshold for number of rows x number of columns to trigger certain changes in performance (fewer threads if beyond large value) to help avoid OOM or unnecessary slowdowns (fewer threads if lower than small value) to avoid excess forking of tasks" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting performance_threshold_data_size_large", + "output": "performance threshold data size large refers to Internal threshold for number of rows x number of columns to trigger certain changes in performance (fewer threads if beyond large value) to help avoid OOM or unnecessary slowdowns (fewer threads if lower than small value) to avoid excess forking of tasks" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "gpu_default_threshold_data_size_large", + "output": "gpu default threshold data size large refers to Threshold for number of rows x number of columns to trigger GPU to be default for models like XGBoost GBM." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "gpu_default_threshold_data_size_large", + "output": "gpu default threshold data size large refers to Threshold for number of rows x number of columns to trigger GPU to be default for models like XGBoost GBM." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "gpu default threshold data size large", + "output": "gpu default threshold data size large refers to Threshold for number of rows x number of columns to trigger GPU to be default for models like XGBoost GBM." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "gpu default threshold data size large refers to Threshold for number of rows x number of columns to trigger GPU to be default for models like XGBoost GBM." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting gpu_default_threshold_data_size_large", + "output": "gpu default threshold data size large refers to Threshold for number of rows x number of columns to trigger GPU to be default for models like XGBoost GBM." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting gpu_default_threshold_data_size_large", + "output": "gpu default threshold data size large refers to Threshold for number of rows x number of columns to trigger GPU to be default for models like XGBoost GBM." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max_relative_cols_mismatch_allowed", + "output": "max relative cols mismatch allowed refers to Maximum fraction of mismatched columns to allow between train and either valid or test. Beyond this value the experiment will fail with invalid data error." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max_relative_cols_mismatch_allowed", + "output": "max relative cols mismatch allowed refers to Maximum fraction of mismatched columns to allow between train and either valid or test. Beyond this value the experiment will fail with invalid data error." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max relative cols mismatch allowed", + "output": "max relative cols mismatch allowed refers to Maximum fraction of mismatched columns to allow between train and either valid or test. Beyond this value the experiment will fail with invalid data error." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "max relative cols mismatch allowed refers to Maximum fraction of mismatched columns to allow between train and either valid or test. Beyond this value the experiment will fail with invalid data error." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting max_relative_cols_mismatch_allowed", + "output": "max relative cols mismatch allowed refers to Maximum fraction of mismatched columns to allow between train and either valid or test. Beyond this value the experiment will fail with invalid data error." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting max_relative_cols_mismatch_allowed", + "output": "max relative cols mismatch allowed refers to Maximum fraction of mismatched columns to allow between train and either valid or test. Beyond this value the experiment will fail with invalid data error." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable_wide_rules", + "output": "enable wide rules refers to Enable various rules to handle wide (Num. columns > Num. rows) datasets ('auto'/'on'/'off'). Setting on forces rules to be enabled regardless of columns." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable_wide_rules", + "output": "enable wide rules refers to Enable Wide Rules: Enable various rules to handle wide (Num. columns > Num. rows) datasets ('auto'/'on'/'off'). Setting on forces rules to be enabled regardless of columns." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable wide rules", + "output": "enable wide rules refers to Enable Wide Rules: Enable various rules to handle wide (Num. columns > Num. rows) datasets ('auto'/'on'/'off'). Setting on forces rules to be enabled regardless of columns." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Enable Wide Rules: ", + "output": "enable wide rules refers to Enable Wide Rules: Enable various rules to handle wide (Num. columns > Num. rows) datasets ('auto'/'on'/'off'). Setting on forces rules to be enabled regardless of columns." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting enable_wide_rules", + "output": "enable wide rules refers to Enable various rules to handle wide (Num. columns > Num. rows) datasets ('auto'/'on'/'off'). Setting on forces rules to be enabled regardless of columns." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting enable_wide_rules", + "output": "enable wide rules refers to Enable Wide Rules: Enable various rules to handle wide (Num. columns > Num. rows) datasets ('auto'/'on'/'off'). Setting on forces rules to be enabled regardless of columns." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "wide_factor", + "output": "wide factor refers to If columns > wide_factor * rows, then enable wide rules if auto. For columns > rows, random forest is always enabled." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "wide_factor", + "output": "wide factor refers to Wide rules factor: If columns > wide_factor * rows, then enable wide rules if auto. For columns > rows, random forest is always enabled." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "wide factor", + "output": "wide factor refers to Wide rules factor: If columns > wide_factor * rows, then enable wide rules if auto. For columns > rows, random forest is always enabled." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Wide rules factor: ", + "output": "wide factor refers to Wide rules factor: If columns > wide_factor * rows, then enable wide rules if auto. For columns > rows, random forest is always enabled." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting wide_factor", + "output": "wide factor refers to If columns > wide_factor * rows, then enable wide rules if auto. For columns > rows, random forest is always enabled." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting wide_factor", + "output": "wide factor refers to Wide rules factor: If columns > wide_factor * rows, then enable wide rules if auto. For columns > rows, random forest is always enabled." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max_cols", + "output": "max cols refers to Maximum number of columns to start an experiment. This threshold exists to constraint the # complexity and the length of the Driverless AI's processes." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max_cols", + "output": "max cols refers to Maximum number of columns to start an experiment. This threshold exists to constraint the # complexity and the length of the Driverless AI's processes." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max cols", + "output": "max cols refers to Maximum number of columns to start an experiment. This threshold exists to constraint the # complexity and the length of the Driverless AI's processes." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "max cols refers to Maximum number of columns to start an experiment. This threshold exists to constraint the # complexity and the length of the Driverless AI's processes." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting max_cols", + "output": "max cols refers to Maximum number of columns to start an experiment. This threshold exists to constraint the # complexity and the length of the Driverless AI's processes." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting max_cols", + "output": "max cols refers to Maximum number of columns to start an experiment. This threshold exists to constraint the # complexity and the length of the Driverless AI's processes." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max_rows_col_stats", + "output": "max rows col stats refers to Largest number of rows to use for column stats, otherwise sample randomly" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max_rows_col_stats", + "output": "max rows col stats refers to Largest number of rows to use for column stats, otherwise sample randomly" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max rows col stats", + "output": "max rows col stats refers to Largest number of rows to use for column stats, otherwise sample randomly" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "max rows col stats refers to Largest number of rows to use for column stats, otherwise sample randomly" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting max_rows_col_stats", + "output": "max rows col stats refers to Largest number of rows to use for column stats, otherwise sample randomly" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting max_rows_col_stats", + "output": "max rows col stats refers to Largest number of rows to use for column stats, otherwise sample randomly" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max_rows_cv_in_cv_gini", + "output": "max rows cv in cv gini refers to Largest number of rows to use for cv in cv for target encoding when doing gini scoring test" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max_rows_cv_in_cv_gini", + "output": "max rows cv in cv gini refers to Largest number of rows to use for cv in cv for target encoding when doing gini scoring test" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max rows cv in cv gini", + "output": "max rows cv in cv gini refers to Largest number of rows to use for cv in cv for target encoding when doing gini scoring test" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "max rows cv in cv gini refers to Largest number of rows to use for cv in cv for target encoding when doing gini scoring test" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting max_rows_cv_in_cv_gini", + "output": "max rows cv in cv gini refers to Largest number of rows to use for cv in cv for target encoding when doing gini scoring test" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting max_rows_cv_in_cv_gini", + "output": "max rows cv in cv gini refers to Largest number of rows to use for cv in cv for target encoding when doing gini scoring test" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max_rows_constant_model", + "output": "max rows constant model refers to Largest number of rows to use for constant model fit, otherwise sample randomly" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max_rows_constant_model", + "output": "max rows constant model refers to Largest number of rows to use for constant model fit, otherwise sample randomly" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max rows constant model", + "output": "max rows constant model refers to Largest number of rows to use for constant model fit, otherwise sample randomly" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "max rows constant model refers to Largest number of rows to use for constant model fit, otherwise sample randomly" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting max_rows_constant_model", + "output": "max rows constant model refers to Largest number of rows to use for constant model fit, otherwise sample randomly" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting max_rows_constant_model", + "output": "max rows constant model refers to Largest number of rows to use for constant model fit, otherwise sample randomly" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max_rows_final_ensemble_base_model_fold_scores", + "output": "max rows final ensemble base model fold scores refers to Largest number of rows to use for final ensemble base model fold cores, otherwise sample randomly" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max_rows_final_ensemble_base_model_fold_scores", + "output": "max rows final ensemble base model fold scores refers to Largest number of rows to use for final ensemble base model fold cores, otherwise sample randomly" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max rows final ensemble base model fold scores", + "output": "max rows final ensemble base model fold scores refers to Largest number of rows to use for final ensemble base model fold cores, otherwise sample randomly" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "max rows final ensemble base model fold scores refers to Largest number of rows to use for final ensemble base model fold cores, otherwise sample randomly" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting max_rows_final_ensemble_base_model_fold_scores", + "output": "max rows final ensemble base model fold scores refers to Largest number of rows to use for final ensemble base model fold cores, otherwise sample randomly" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting max_rows_final_ensemble_base_model_fold_scores", + "output": "max rows final ensemble base model fold scores refers to Largest number of rows to use for final ensemble base model fold cores, otherwise sample randomly" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max_rows_final_blender", + "output": "max rows final blender refers to Largest number of rows to use for final ensemble blender for regression and binary (scaled down linearly by number of classes for multiclass for >= 10 classes), otherwise sample randomly." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max_rows_final_blender", + "output": "max rows final blender refers to Largest number of rows to use for final ensemble blender for regression and binary (scaled down linearly by number of classes for multiclass for >= 10 classes), otherwise sample randomly." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max rows final blender", + "output": "max rows final blender refers to Largest number of rows to use for final ensemble blender for regression and binary (scaled down linearly by number of classes for multiclass for >= 10 classes), otherwise sample randomly." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "max rows final blender refers to Largest number of rows to use for final ensemble blender for regression and binary (scaled down linearly by number of classes for multiclass for >= 10 classes), otherwise sample randomly." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting max_rows_final_blender", + "output": "max rows final blender refers to Largest number of rows to use for final ensemble blender for regression and binary (scaled down linearly by number of classes for multiclass for >= 10 classes), otherwise sample randomly." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting max_rows_final_blender", + "output": "max rows final blender refers to Largest number of rows to use for final ensemble blender for regression and binary (scaled down linearly by number of classes for multiclass for >= 10 classes), otherwise sample randomly." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "min_rows_final_blender", + "output": "min rows final blender refers to Smallest number of rows (or number of rows if less than this) to use for final ensemble blender." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "min_rows_final_blender", + "output": "min rows final blender refers to Smallest number of rows (or number of rows if less than this) to use for final ensemble blender." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "min rows final blender", + "output": "min rows final blender refers to Smallest number of rows (or number of rows if less than this) to use for final ensemble blender." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "min rows final blender refers to Smallest number of rows (or number of rows if less than this) to use for final ensemble blender." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting min_rows_final_blender", + "output": "min rows final blender refers to Smallest number of rows (or number of rows if less than this) to use for final ensemble blender." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting min_rows_final_blender", + "output": "min rows final blender refers to Smallest number of rows (or number of rows if less than this) to use for final ensemble blender." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max_rows_final_train_score", + "output": "max rows final train score refers to Largest number of rows to use for final training score (no holdout), otherwise sample randomly" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max_rows_final_train_score", + "output": "max rows final train score refers to Largest number of rows to use for final training score (no holdout), otherwise sample randomly" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max rows final train score", + "output": "max rows final train score refers to Largest number of rows to use for final training score (no holdout), otherwise sample randomly" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "max rows final train score refers to Largest number of rows to use for final training score (no holdout), otherwise sample randomly" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting max_rows_final_train_score", + "output": "max rows final train score refers to Largest number of rows to use for final training score (no holdout), otherwise sample randomly" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting max_rows_final_train_score", + "output": "max rows final train score refers to Largest number of rows to use for final training score (no holdout), otherwise sample randomly" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max_rows_final_roccmconf", + "output": "max rows final roccmconf refers to Largest number of rows to use for final ROC, lift-gains, confusion matrix, residual, and actual vs. predicted. Otherwise sample randomly" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max_rows_final_roccmconf", + "output": "max rows final roccmconf refers to Largest number of rows to use for final ROC, lift-gains, confusion matrix, residual, and actual vs. predicted. Otherwise sample randomly" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max rows final roccmconf", + "output": "max rows final roccmconf refers to Largest number of rows to use for final ROC, lift-gains, confusion matrix, residual, and actual vs. predicted. Otherwise sample randomly" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "max rows final roccmconf refers to Largest number of rows to use for final ROC, lift-gains, confusion matrix, residual, and actual vs. predicted. Otherwise sample randomly" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting max_rows_final_roccmconf", + "output": "max rows final roccmconf refers to Largest number of rows to use for final ROC, lift-gains, confusion matrix, residual, and actual vs. predicted. Otherwise sample randomly" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting max_rows_final_roccmconf", + "output": "max rows final roccmconf refers to Largest number of rows to use for final ROC, lift-gains, confusion matrix, residual, and actual vs. predicted. Otherwise sample randomly" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max_rows_final_holdout_score", + "output": "max rows final holdout score refers to Largest number of rows to use for final holdout scores, otherwise sample randomly" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max_rows_final_holdout_score", + "output": "max rows final holdout score refers to Largest number of rows to use for final holdout scores, otherwise sample randomly" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max rows final holdout score", + "output": "max rows final holdout score refers to Largest number of rows to use for final holdout scores, otherwise sample randomly" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "max rows final holdout score refers to Largest number of rows to use for final holdout scores, otherwise sample randomly" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting max_rows_final_holdout_score", + "output": "max rows final holdout score refers to Largest number of rows to use for final holdout scores, otherwise sample randomly" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting max_rows_final_holdout_score", + "output": "max rows final holdout score refers to Largest number of rows to use for final holdout scores, otherwise sample randomly" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max_rows_final_holdout_bootstrap_score", + "output": "max rows final holdout bootstrap score refers to Largest number of rows to use for final holdout bootstrap scores, otherwise sample randomly" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max_rows_final_holdout_bootstrap_score", + "output": "max rows final holdout bootstrap score refers to Largest number of rows to use for final holdout bootstrap scores, otherwise sample randomly" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max rows final holdout bootstrap score", + "output": "max rows final holdout bootstrap score refers to Largest number of rows to use for final holdout bootstrap scores, otherwise sample randomly" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "max rows final holdout bootstrap score refers to Largest number of rows to use for final holdout bootstrap scores, otherwise sample randomly" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting max_rows_final_holdout_bootstrap_score", + "output": "max rows final holdout bootstrap score refers to Largest number of rows to use for final holdout bootstrap scores, otherwise sample randomly" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting max_rows_final_holdout_bootstrap_score", + "output": "max rows final holdout bootstrap score refers to Largest number of rows to use for final holdout bootstrap scores, otherwise sample randomly" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "orig_features_fs_report", + "output": "orig features fs report refers to Whether to obtain permutation feature importance on original features for reporting in logs and summary zip file(as files with pattern fs_*.json or fs_*.tab.txt).This computes feature importance on a single un-tuned model(typically LightGBM with pre-defined un-tuned hyperparameters)and simple set of features (encoding typically is frequency encoding or target encoding).Features with low importance are automatically dropped if there are many original features,or a model with feature selection by permutation importance is created if interpretability is high enough in order to see if it gives a better score.One can manually drop low importance features, but this can be risky as transformers or hyperparameters might recovertheir usefulness.Permutation importance is obtained by:1) Transforming categoricals to frequency or target encoding features.2) Fitting that model on many folds, different data sizes, and slightly varying hyperparameters.3) Predicting on that model for each feature where each feature has its data shuffled.4) Computing the score on each shuffled prediction.5) Computing the difference between the unshuffled score and the shuffled score to arrive at a delta score6) The delta score becomes the variable importance once normalized by the maximum. Positive delta scores indicate the feature helped the model score, while negative delta scores indicate the feature hurt the model score. The normalized scores are stored in the fs_normalized_* files in the summary zip. The unnormalized scores (actual delta scores) are stored in the fs_unnormalized_* files in the summary zip.AutoDoc has a similar functionality of providing permutation importance on original features,where that takes the specific final model of an experiment and runs training data set through permutation importance to get original importance,so shuffling of original features is performed and the full pipeline is computed in each shuffled set of original features. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "orig_features_fs_report", + "output": "orig features fs report refers to Report permutation importance on original features: Whether to obtain permutation feature importance on original features for reporting in logs and summary zip file(as files with pattern fs_*.json or fs_*.tab.txt).This computes feature importance on a single un-tuned model(typically LightGBM with pre-defined un-tuned hyperparameters)and simple set of features (encoding typically is frequency encoding or target encoding).Features with low importance are automatically dropped if there are many original features,or a model with feature selection by permutation importance is created if interpretability is high enough in order to see if it gives a better score.One can manually drop low importance features, but this can be risky as transformers or hyperparameters might recovertheir usefulness.Permutation importance is obtained by:1) Transforming categoricals to frequency or target encoding features.2) Fitting that model on many folds, different data sizes, and slightly varying hyperparameters.3) Predicting on that model for each feature where each feature has its data shuffled.4) Computing the score on each shuffled prediction.5) Computing the difference between the unshuffled score and the shuffled score to arrive at a delta score6) The delta score becomes the variable importance once normalized by the maximum. Positive delta scores indicate the feature helped the model score, while negative delta scores indicate the feature hurt the model score. The normalized scores are stored in the fs_normalized_* files in the summary zip. The unnormalized scores (actual delta scores) are stored in the fs_unnormalized_* files in the summary zip.AutoDoc has a similar functionality of providing permutation importance on original features,where that takes the specific final model of an experiment and runs training data set through permutation importance to get original importance,so shuffling of original features is performed and the full pipeline is computed in each shuffled set of original features. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "orig features fs report", + "output": "orig features fs report refers to Report permutation importance on original features: Whether to obtain permutation feature importance on original features for reporting in logs and summary zip file(as files with pattern fs_*.json or fs_*.tab.txt).This computes feature importance on a single un-tuned model(typically LightGBM with pre-defined un-tuned hyperparameters)and simple set of features (encoding typically is frequency encoding or target encoding).Features with low importance are automatically dropped if there are many original features,or a model with feature selection by permutation importance is created if interpretability is high enough in order to see if it gives a better score.One can manually drop low importance features, but this can be risky as transformers or hyperparameters might recovertheir usefulness.Permutation importance is obtained by:1) Transforming categoricals to frequency or target encoding features.2) Fitting that model on many folds, different data sizes, and slightly varying hyperparameters.3) Predicting on that model for each feature where each feature has its data shuffled.4) Computing the score on each shuffled prediction.5) Computing the difference between the unshuffled score and the shuffled score to arrive at a delta score6) The delta score becomes the variable importance once normalized by the maximum. Positive delta scores indicate the feature helped the model score, while negative delta scores indicate the feature hurt the model score. The normalized scores are stored in the fs_normalized_* files in the summary zip. The unnormalized scores (actual delta scores) are stored in the fs_unnormalized_* files in the summary zip.AutoDoc has a similar functionality of providing permutation importance on original features,where that takes the specific final model of an experiment and runs training data set through permutation importance to get original importance,so shuffling of original features is performed and the full pipeline is computed in each shuffled set of original features. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Report permutation importance on original features: ", + "output": "orig features fs report refers to Report permutation importance on original features: Whether to obtain permutation feature importance on original features for reporting in logs and summary zip file(as files with pattern fs_*.json or fs_*.tab.txt).This computes feature importance on a single un-tuned model(typically LightGBM with pre-defined un-tuned hyperparameters)and simple set of features (encoding typically is frequency encoding or target encoding).Features with low importance are automatically dropped if there are many original features,or a model with feature selection by permutation importance is created if interpretability is high enough in order to see if it gives a better score.One can manually drop low importance features, but this can be risky as transformers or hyperparameters might recovertheir usefulness.Permutation importance is obtained by:1) Transforming categoricals to frequency or target encoding features.2) Fitting that model on many folds, different data sizes, and slightly varying hyperparameters.3) Predicting on that model for each feature where each feature has its data shuffled.4) Computing the score on each shuffled prediction.5) Computing the difference between the unshuffled score and the shuffled score to arrive at a delta score6) The delta score becomes the variable importance once normalized by the maximum. Positive delta scores indicate the feature helped the model score, while negative delta scores indicate the feature hurt the model score. The normalized scores are stored in the fs_normalized_* files in the summary zip. The unnormalized scores (actual delta scores) are stored in the fs_unnormalized_* files in the summary zip.AutoDoc has a similar functionality of providing permutation importance on original features,where that takes the specific final model of an experiment and runs training data set through permutation importance to get original importance,so shuffling of original features is performed and the full pipeline is computed in each shuffled set of original features. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting orig_features_fs_report", + "output": "orig features fs report refers to Whether to obtain permutation feature importance on original features for reporting in logs and summary zip file(as files with pattern fs_*.json or fs_*.tab.txt).This computes feature importance on a single un-tuned model(typically LightGBM with pre-defined un-tuned hyperparameters)and simple set of features (encoding typically is frequency encoding or target encoding).Features with low importance are automatically dropped if there are many original features,or a model with feature selection by permutation importance is created if interpretability is high enough in order to see if it gives a better score.One can manually drop low importance features, but this can be risky as transformers or hyperparameters might recovertheir usefulness.Permutation importance is obtained by:1) Transforming categoricals to frequency or target encoding features.2) Fitting that model on many folds, different data sizes, and slightly varying hyperparameters.3) Predicting on that model for each feature where each feature has its data shuffled.4) Computing the score on each shuffled prediction.5) Computing the difference between the unshuffled score and the shuffled score to arrive at a delta score6) The delta score becomes the variable importance once normalized by the maximum. Positive delta scores indicate the feature helped the model score, while negative delta scores indicate the feature hurt the model score. The normalized scores are stored in the fs_normalized_* files in the summary zip. The unnormalized scores (actual delta scores) are stored in the fs_unnormalized_* files in the summary zip.AutoDoc has a similar functionality of providing permutation importance on original features,where that takes the specific final model of an experiment and runs training data set through permutation importance to get original importance,so shuffling of original features is performed and the full pipeline is computed in each shuffled set of original features. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting orig_features_fs_report", + "output": "orig features fs report refers to Report permutation importance on original features: Whether to obtain permutation feature importance on original features for reporting in logs and summary zip file(as files with pattern fs_*.json or fs_*.tab.txt).This computes feature importance on a single un-tuned model(typically LightGBM with pre-defined un-tuned hyperparameters)and simple set of features (encoding typically is frequency encoding or target encoding).Features with low importance are automatically dropped if there are many original features,or a model with feature selection by permutation importance is created if interpretability is high enough in order to see if it gives a better score.One can manually drop low importance features, but this can be risky as transformers or hyperparameters might recovertheir usefulness.Permutation importance is obtained by:1) Transforming categoricals to frequency or target encoding features.2) Fitting that model on many folds, different data sizes, and slightly varying hyperparameters.3) Predicting on that model for each feature where each feature has its data shuffled.4) Computing the score on each shuffled prediction.5) Computing the difference between the unshuffled score and the shuffled score to arrive at a delta score6) The delta score becomes the variable importance once normalized by the maximum. Positive delta scores indicate the feature helped the model score, while negative delta scores indicate the feature hurt the model score. The normalized scores are stored in the fs_normalized_* files in the summary zip. The unnormalized scores (actual delta scores) are stored in the fs_unnormalized_* files in the summary zip.AutoDoc has a similar functionality of providing permutation importance on original features,where that takes the specific final model of an experiment and runs training data set through permutation importance to get original importance,so shuffling of original features is performed and the full pipeline is computed in each shuffled set of original features. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max_rows_fs", + "output": "max rows fs refers to Maximum number of rows when doing permutation feature importance, reduced by (stratified) random sampling. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max_rows_fs", + "output": "max rows fs refers to Maximum number of rows to perform permutation-based feature selection: Maximum number of rows when doing permutation feature importance, reduced by (stratified) random sampling. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max rows fs", + "output": "max rows fs refers to Maximum number of rows to perform permutation-based feature selection: Maximum number of rows when doing permutation feature importance, reduced by (stratified) random sampling. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Maximum number of rows to perform permutation-based feature selection: ", + "output": "max rows fs refers to Maximum number of rows to perform permutation-based feature selection: Maximum number of rows when doing permutation feature importance, reduced by (stratified) random sampling. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting max_rows_fs", + "output": "max rows fs refers to Maximum number of rows when doing permutation feature importance, reduced by (stratified) random sampling. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting max_rows_fs", + "output": "max rows fs refers to Maximum number of rows to perform permutation-based feature selection: Maximum number of rows when doing permutation feature importance, reduced by (stratified) random sampling. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max_rows_leak", + "output": "max rows leak refers to Max. rows for leakage detection if wide rules used on wide data: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max_rows_leak", + "output": "max rows leak refers to Max. rows for leakage detection if wide rules used on wide data: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max rows leak", + "output": "max rows leak refers to Max. rows for leakage detection if wide rules used on wide data: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Max. rows for leakage detection if wide rules used on wide data: ", + "output": "max rows leak refers to Max. rows for leakage detection if wide rules used on wide data: " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting max_rows_leak", + "output": "max rows leak refers to Max. rows for leakage detection if wide rules used on wide data: " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting max_rows_leak", + "output": "max rows leak refers to Max. rows for leakage detection if wide rules used on wide data: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max_workers_fs", + "output": "max workers fs refers to How many workers to use for feature selection by permutation for predict phase. (0 = auto, > 0: min of DAI value and this value, < 0: exactly negative of this value) " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max_workers_fs", + "output": "max workers fs refers to Num. simultaneous predictions for feature selection (0 = auto): How many workers to use for feature selection by permutation for predict phase. (0 = auto, > 0: min of DAI value and this value, < 0: exactly negative of this value) " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max workers fs", + "output": "max workers fs refers to Num. simultaneous predictions for feature selection (0 = auto): How many workers to use for feature selection by permutation for predict phase. (0 = auto, > 0: min of DAI value and this value, < 0: exactly negative of this value) " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Num. simultaneous predictions for feature selection (0 = auto): ", + "output": "max workers fs refers to Num. simultaneous predictions for feature selection (0 = auto): How many workers to use for feature selection by permutation for predict phase. (0 = auto, > 0: min of DAI value and this value, < 0: exactly negative of this value) " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting max_workers_fs", + "output": "max workers fs refers to How many workers to use for feature selection by permutation for predict phase. (0 = auto, > 0: min of DAI value and this value, < 0: exactly negative of this value) " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting max_workers_fs", + "output": "max workers fs refers to Num. simultaneous predictions for feature selection (0 = auto): How many workers to use for feature selection by permutation for predict phase. (0 = auto, > 0: min of DAI value and this value, < 0: exactly negative of this value) " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max_workers_shift_leak", + "output": "max workers shift leak refers to How many workers to use for shift and leakage checks if using LightGBM on CPU. (0 = auto, > 0: min of DAI value and this value, < 0: exactly negative of this value) " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max_workers_shift_leak", + "output": "max workers shift leak refers to Num. simultaneous fits for shift and leak checks if using LightGBM on CPU (0 = auto): How many workers to use for shift and leakage checks if using LightGBM on CPU. (0 = auto, > 0: min of DAI value and this value, < 0: exactly negative of this value) " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max workers shift leak", + "output": "max workers shift leak refers to Num. simultaneous fits for shift and leak checks if using LightGBM on CPU (0 = auto): How many workers to use for shift and leakage checks if using LightGBM on CPU. (0 = auto, > 0: min of DAI value and this value, < 0: exactly negative of this value) " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Num. simultaneous fits for shift and leak checks if using LightGBM on CPU (0 = auto): ", + "output": "max workers shift leak refers to Num. simultaneous fits for shift and leak checks if using LightGBM on CPU (0 = auto): How many workers to use for shift and leakage checks if using LightGBM on CPU. (0 = auto, > 0: min of DAI value and this value, < 0: exactly negative of this value) " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting max_workers_shift_leak", + "output": "max workers shift leak refers to How many workers to use for shift and leakage checks if using LightGBM on CPU. (0 = auto, > 0: min of DAI value and this value, < 0: exactly negative of this value) " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting max_workers_shift_leak", + "output": "max workers shift leak refers to Num. simultaneous fits for shift and leak checks if using LightGBM on CPU (0 = auto): How many workers to use for shift and leakage checks if using LightGBM on CPU. (0 = auto, > 0: min of DAI value and this value, < 0: exactly negative of this value) " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max_orig_cols_selected", + "output": "max orig cols selected refers to Maximum number of columns selected out of original set of original columns, using feature selection.The selection is based upon how well target encoding (or frequency encoding if not available) on categoricals and numerics treated as categoricals.This is useful to reduce the final model complexity. First the best[max_orig_cols_selected] are found through feature selection methods and thenthese features are used in feature evolution (to derive other features) and in modelling. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max_orig_cols_selected", + "output": "max orig cols selected refers to Max. number of original features used: Maximum number of columns selected out of original set of original columns, using feature selection.The selection is based upon how well target encoding (or frequency encoding if not available) on categoricals and numerics treated as categoricals.This is useful to reduce the final model complexity. First the best[max_orig_cols_selected] are found through feature selection methods and thenthese features are used in feature evolution (to derive other features) and in modelling. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max orig cols selected", + "output": "max orig cols selected refers to Max. number of original features used: Maximum number of columns selected out of original set of original columns, using feature selection.The selection is based upon how well target encoding (or frequency encoding if not available) on categoricals and numerics treated as categoricals.This is useful to reduce the final model complexity. First the best[max_orig_cols_selected] are found through feature selection methods and thenthese features are used in feature evolution (to derive other features) and in modelling. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Max. number of original features used: ", + "output": "max orig cols selected refers to Max. number of original features used: Maximum number of columns selected out of original set of original columns, using feature selection.The selection is based upon how well target encoding (or frequency encoding if not available) on categoricals and numerics treated as categoricals.This is useful to reduce the final model complexity. First the best[max_orig_cols_selected] are found through feature selection methods and thenthese features are used in feature evolution (to derive other features) and in modelling. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting max_orig_cols_selected", + "output": "max orig cols selected refers to Maximum number of columns selected out of original set of original columns, using feature selection.The selection is based upon how well target encoding (or frequency encoding if not available) on categoricals and numerics treated as categoricals.This is useful to reduce the final model complexity. First the best[max_orig_cols_selected] are found through feature selection methods and thenthese features are used in feature evolution (to derive other features) and in modelling. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting max_orig_cols_selected", + "output": "max orig cols selected refers to Max. number of original features used: Maximum number of columns selected out of original set of original columns, using feature selection.The selection is based upon how well target encoding (or frequency encoding if not available) on categoricals and numerics treated as categoricals.This is useful to reduce the final model complexity. First the best[max_orig_cols_selected] are found through feature selection methods and thenthese features are used in feature evolution (to derive other features) and in modelling. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max_orig_numeric_cols_selected", + "output": "max orig numeric cols selected refers to Maximum number of numeric columns selected, above which will do feature selection same max_orig_cols_selected but for numeric columns." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max_orig_numeric_cols_selected", + "output": "max orig numeric cols selected refers to Maximum number of numeric columns selected, above which will do feature selection same max_orig_cols_selected but for numeric columns." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max orig numeric cols selected", + "output": "max orig numeric cols selected refers to Maximum number of numeric columns selected, above which will do feature selection same max_orig_cols_selected but for numeric columns." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "max orig numeric cols selected refers to Maximum number of numeric columns selected, above which will do feature selection same max_orig_cols_selected but for numeric columns." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting max_orig_numeric_cols_selected", + "output": "max orig numeric cols selected refers to Maximum number of numeric columns selected, above which will do feature selection same max_orig_cols_selected but for numeric columns." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting max_orig_numeric_cols_selected", + "output": "max orig numeric cols selected refers to Maximum number of numeric columns selected, above which will do feature selection same max_orig_cols_selected but for numeric columns." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max_orig_nonnumeric_cols_selected", + "output": "max orig nonnumeric cols selected refers to Maximum number of non-numeric columns selected, above which will do feature selection on all features. Same as max_orig_numeric_cols_selected but for categorical columns.If set to -1, then auto mode which uses max_orig_nonnumeric_cols_selected_default, but then for small data can be increased up to 10x larger. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max_orig_nonnumeric_cols_selected", + "output": "max orig nonnumeric cols selected refers to Max. number of original non-numeric features: Maximum number of non-numeric columns selected, above which will do feature selection on all features. Same as max_orig_numeric_cols_selected but for categorical columns.If set to -1, then auto mode which uses max_orig_nonnumeric_cols_selected_default, but then for small data can be increased up to 10x larger. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max orig nonnumeric cols selected", + "output": "max orig nonnumeric cols selected refers to Max. number of original non-numeric features: Maximum number of non-numeric columns selected, above which will do feature selection on all features. Same as max_orig_numeric_cols_selected but for categorical columns.If set to -1, then auto mode which uses max_orig_nonnumeric_cols_selected_default, but then for small data can be increased up to 10x larger. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Max. number of original non-numeric features: ", + "output": "max orig nonnumeric cols selected refers to Max. number of original non-numeric features: Maximum number of non-numeric columns selected, above which will do feature selection on all features. Same as max_orig_numeric_cols_selected but for categorical columns.If set to -1, then auto mode which uses max_orig_nonnumeric_cols_selected_default, but then for small data can be increased up to 10x larger. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting max_orig_nonnumeric_cols_selected", + "output": "max orig nonnumeric cols selected refers to Maximum number of non-numeric columns selected, above which will do feature selection on all features. Same as max_orig_numeric_cols_selected but for categorical columns.If set to -1, then auto mode which uses max_orig_nonnumeric_cols_selected_default, but then for small data can be increased up to 10x larger. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting max_orig_nonnumeric_cols_selected", + "output": "max orig nonnumeric cols selected refers to Max. number of original non-numeric features: Maximum number of non-numeric columns selected, above which will do feature selection on all features. Same as max_orig_numeric_cols_selected but for categorical columns.If set to -1, then auto mode which uses max_orig_nonnumeric_cols_selected_default, but then for small data can be increased up to 10x larger. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max_orig_cols_selected_simple_factor", + "output": "max orig cols selected simple factor refers to The factor times max_orig_cols_selected, by which column selection is based upon no target encoding and no treating numerical as categorical in order to limit performance cost of feature engineering" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max_orig_cols_selected_simple_factor", + "output": "max orig cols selected simple factor refers to The factor times max_orig_cols_selected, by which column selection is based upon no target encoding and no treating numerical as categorical in order to limit performance cost of feature engineering" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max orig cols selected simple factor", + "output": "max orig cols selected simple factor refers to The factor times max_orig_cols_selected, by which column selection is based upon no target encoding and no treating numerical as categorical in order to limit performance cost of feature engineering" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "max orig cols selected simple factor refers to The factor times max_orig_cols_selected, by which column selection is based upon no target encoding and no treating numerical as categorical in order to limit performance cost of feature engineering" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting max_orig_cols_selected_simple_factor", + "output": "max orig cols selected simple factor refers to The factor times max_orig_cols_selected, by which column selection is based upon no target encoding and no treating numerical as categorical in order to limit performance cost of feature engineering" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting max_orig_cols_selected_simple_factor", + "output": "max orig cols selected simple factor refers to The factor times max_orig_cols_selected, by which column selection is based upon no target encoding and no treating numerical as categorical in order to limit performance cost of feature engineering" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "fs_orig_cols_selected", + "output": "fs orig cols selected refers to Like max_orig_cols_selected, but columns above which add special individual with original columns reduced. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "fs_orig_cols_selected", + "output": "fs orig cols selected refers to Max. number of original features used for FS individual: Like max_orig_cols_selected, but columns above which add special individual with original columns reduced. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "fs orig cols selected", + "output": "fs orig cols selected refers to Max. number of original features used for FS individual: Like max_orig_cols_selected, but columns above which add special individual with original columns reduced. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Max. number of original features used for FS individual: ", + "output": "fs orig cols selected refers to Max. number of original features used for FS individual: Like max_orig_cols_selected, but columns above which add special individual with original columns reduced. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting fs_orig_cols_selected", + "output": "fs orig cols selected refers to Like max_orig_cols_selected, but columns above which add special individual with original columns reduced. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting fs_orig_cols_selected", + "output": "fs orig cols selected refers to Max. number of original features used for FS individual: Like max_orig_cols_selected, but columns above which add special individual with original columns reduced. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "fs_orig_numeric_cols_selected", + "output": "fs orig numeric cols selected refers to Like max_orig_numeric_cols_selected, but applicable to special individual with original columns reduced. A separate individual in the genetic algorithm is created by doing feature selection by permutation importance on original features. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "fs_orig_numeric_cols_selected", + "output": "fs orig numeric cols selected refers to Num. of original numeric features to trigger feature selection model type: Like max_orig_numeric_cols_selected, but applicable to special individual with original columns reduced. A separate individual in the genetic algorithm is created by doing feature selection by permutation importance on original features. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "fs orig numeric cols selected", + "output": "fs orig numeric cols selected refers to Num. of original numeric features to trigger feature selection model type: Like max_orig_numeric_cols_selected, but applicable to special individual with original columns reduced. A separate individual in the genetic algorithm is created by doing feature selection by permutation importance on original features. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Num. of original numeric features to trigger feature selection model type: ", + "output": "fs orig numeric cols selected refers to Num. of original numeric features to trigger feature selection model type: Like max_orig_numeric_cols_selected, but applicable to special individual with original columns reduced. A separate individual in the genetic algorithm is created by doing feature selection by permutation importance on original features. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting fs_orig_numeric_cols_selected", + "output": "fs orig numeric cols selected refers to Like max_orig_numeric_cols_selected, but applicable to special individual with original columns reduced. A separate individual in the genetic algorithm is created by doing feature selection by permutation importance on original features. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting fs_orig_numeric_cols_selected", + "output": "fs orig numeric cols selected refers to Num. of original numeric features to trigger feature selection model type: Like max_orig_numeric_cols_selected, but applicable to special individual with original columns reduced. A separate individual in the genetic algorithm is created by doing feature selection by permutation importance on original features. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "fs_orig_nonnumeric_cols_selected", + "output": "fs orig nonnumeric cols selected refers to Like max_orig_nonnumeric_cols_selected, but applicable to special individual with original columns reduced. A separate individual in the genetic algorithm is created by doing feature selection by permutation importance on original features. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "fs_orig_nonnumeric_cols_selected", + "output": "fs orig nonnumeric cols selected refers to Num. of original non-numeric features to trigger feature selection model type: Like max_orig_nonnumeric_cols_selected, but applicable to special individual with original columns reduced. A separate individual in the genetic algorithm is created by doing feature selection by permutation importance on original features. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "fs orig nonnumeric cols selected", + "output": "fs orig nonnumeric cols selected refers to Num. of original non-numeric features to trigger feature selection model type: Like max_orig_nonnumeric_cols_selected, but applicable to special individual with original columns reduced. A separate individual in the genetic algorithm is created by doing feature selection by permutation importance on original features. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Num. of original non-numeric features to trigger feature selection model type: ", + "output": "fs orig nonnumeric cols selected refers to Num. of original non-numeric features to trigger feature selection model type: Like max_orig_nonnumeric_cols_selected, but applicable to special individual with original columns reduced. A separate individual in the genetic algorithm is created by doing feature selection by permutation importance on original features. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting fs_orig_nonnumeric_cols_selected", + "output": "fs orig nonnumeric cols selected refers to Like max_orig_nonnumeric_cols_selected, but applicable to special individual with original columns reduced. A separate individual in the genetic algorithm is created by doing feature selection by permutation importance on original features. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting fs_orig_nonnumeric_cols_selected", + "output": "fs orig nonnumeric cols selected refers to Num. of original non-numeric features to trigger feature selection model type: Like max_orig_nonnumeric_cols_selected, but applicable to special individual with original columns reduced. A separate individual in the genetic algorithm is created by doing feature selection by permutation importance on original features. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "fs_orig_cols_selected_simple_factor", + "output": "fs orig cols selected simple factor refers to Like max_orig_cols_selected_simple_factor, but applicable to special individual with original columns reduced." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "fs_orig_cols_selected_simple_factor", + "output": "fs orig cols selected simple factor refers to Like max_orig_cols_selected_simple_factor, but applicable to special individual with original columns reduced." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "fs orig cols selected simple factor", + "output": "fs orig cols selected simple factor refers to Like max_orig_cols_selected_simple_factor, but applicable to special individual with original columns reduced." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "fs orig cols selected simple factor refers to Like max_orig_cols_selected_simple_factor, but applicable to special individual with original columns reduced." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting fs_orig_cols_selected_simple_factor", + "output": "fs orig cols selected simple factor refers to Like max_orig_cols_selected_simple_factor, but applicable to special individual with original columns reduced." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting fs_orig_cols_selected_simple_factor", + "output": "fs orig cols selected simple factor refers to Like max_orig_cols_selected_simple_factor, but applicable to special individual with original columns reduced." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "predict_shuffle_inside_model", + "output": "predict shuffle inside model refers to Allow supported models to do feature selection by permutation importance within model itself: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "predict_shuffle_inside_model", + "output": "predict shuffle inside model refers to Allow supported models to do feature selection by permutation importance within model itself: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "predict shuffle inside model", + "output": "predict shuffle inside model refers to Allow supported models to do feature selection by permutation importance within model itself: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Allow supported models to do feature selection by permutation importance within model itself: ", + "output": "predict shuffle inside model refers to Allow supported models to do feature selection by permutation importance within model itself: " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting predict_shuffle_inside_model", + "output": "predict shuffle inside model refers to Allow supported models to do feature selection by permutation importance within model itself: " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting predict_shuffle_inside_model", + "output": "predict shuffle inside model refers to Allow supported models to do feature selection by permutation importance within model itself: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "use_native_cats_for_lgbm_fs", + "output": "use native cats for lgbm fs refers to Whether to use native categorical handling (CPU only) for LightGBM when doing feature selection by permutation: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "use_native_cats_for_lgbm_fs", + "output": "use native cats for lgbm fs refers to Whether to use native categorical handling (CPU only) for LightGBM when doing feature selection by permutation: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "use native cats for lgbm fs", + "output": "use native cats for lgbm fs refers to Whether to use native categorical handling (CPU only) for LightGBM when doing feature selection by permutation: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Whether to use native categorical handling (CPU only) for LightGBM when doing feature selection by permutation: ", + "output": "use native cats for lgbm fs refers to Whether to use native categorical handling (CPU only) for LightGBM when doing feature selection by permutation: " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting use_native_cats_for_lgbm_fs", + "output": "use native cats for lgbm fs refers to Whether to use native categorical handling (CPU only) for LightGBM when doing feature selection by permutation: " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting use_native_cats_for_lgbm_fs", + "output": "use native cats for lgbm fs refers to Whether to use native categorical handling (CPU only) for LightGBM when doing feature selection by permutation: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "orig_stddev_max_cols", + "output": "orig stddev max cols refers to Maximum number of original columns up to which will compute standard deviation of original feature importance. Can be expensive if many features.: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "orig_stddev_max_cols", + "output": "orig stddev max cols refers to Maximum number of original columns up to which will compute standard deviation of original feature importance. Can be expensive if many features.: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "orig stddev max cols", + "output": "orig stddev max cols refers to Maximum number of original columns up to which will compute standard deviation of original feature importance. Can be expensive if many features.: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Maximum number of original columns up to which will compute standard deviation of original feature importance. Can be expensive if many features.: ", + "output": "orig stddev max cols refers to Maximum number of original columns up to which will compute standard deviation of original feature importance. Can be expensive if many features.: " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting orig_stddev_max_cols", + "output": "orig stddev max cols refers to Maximum number of original columns up to which will compute standard deviation of original feature importance. Can be expensive if many features.: " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting orig_stddev_max_cols", + "output": "orig stddev max cols refers to Maximum number of original columns up to which will compute standard deviation of original feature importance. Can be expensive if many features.: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max_relative_cardinality", + "output": "max relative cardinality refers to Maximum allowed fraction of unique values for integer and categorical columns (otherwise will treat column as ID and drop)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max_relative_cardinality", + "output": "max relative cardinality refers to Max. allowed fraction of uniques for integer and categorical cols: Maximum allowed fraction of unique values for integer and categorical columns (otherwise will treat column as ID and drop)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max relative cardinality", + "output": "max relative cardinality refers to Max. allowed fraction of uniques for integer and categorical cols: Maximum allowed fraction of unique values for integer and categorical columns (otherwise will treat column as ID and drop)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Max. allowed fraction of uniques for integer and categorical cols: ", + "output": "max relative cardinality refers to Max. allowed fraction of uniques for integer and categorical cols: Maximum allowed fraction of unique values for integer and categorical columns (otherwise will treat column as ID and drop)" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting max_relative_cardinality", + "output": "max relative cardinality refers to Maximum allowed fraction of unique values for integer and categorical columns (otherwise will treat column as ID and drop)" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting max_relative_cardinality", + "output": "max relative cardinality refers to Max. allowed fraction of uniques for integer and categorical cols: Maximum allowed fraction of unique values for integer and categorical columns (otherwise will treat column as ID and drop)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max_absolute_cardinality", + "output": "max absolute cardinality refers to Maximum allowed number of unique values for integer and categorical columns (otherwise will treat column as ID and drop)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max_absolute_cardinality", + "output": "max absolute cardinality refers to Maximum allowed number of unique values for integer and categorical columns (otherwise will treat column as ID and drop)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max absolute cardinality", + "output": "max absolute cardinality refers to Maximum allowed number of unique values for integer and categorical columns (otherwise will treat column as ID and drop)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "max absolute cardinality refers to Maximum allowed number of unique values for integer and categorical columns (otherwise will treat column as ID and drop)" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting max_absolute_cardinality", + "output": "max absolute cardinality refers to Maximum allowed number of unique values for integer and categorical columns (otherwise will treat column as ID and drop)" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting max_absolute_cardinality", + "output": "max absolute cardinality refers to Maximum allowed number of unique values for integer and categorical columns (otherwise will treat column as ID and drop)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "num_as_cat", + "output": "num as cat refers to Whether to treat some numerical features as categorical.For instance, sometimes an integer column may not represent a numerical feature butrepresent different numerical codes instead.Very restrictive to disable, since then even columns with few categorical levels that happen to be numericalin value will not be encoded like a categorical. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "num_as_cat", + "output": "num as cat refers to Allow treating numerical as categorical: Whether to treat some numerical features as categorical.For instance, sometimes an integer column may not represent a numerical feature butrepresent different numerical codes instead.Very restrictive to disable, since then even columns with few categorical levels that happen to be numericalin value will not be encoded like a categorical. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "num as cat", + "output": "num as cat refers to Allow treating numerical as categorical: Whether to treat some numerical features as categorical.For instance, sometimes an integer column may not represent a numerical feature butrepresent different numerical codes instead.Very restrictive to disable, since then even columns with few categorical levels that happen to be numericalin value will not be encoded like a categorical. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Allow treating numerical as categorical: ", + "output": "num as cat refers to Allow treating numerical as categorical: Whether to treat some numerical features as categorical.For instance, sometimes an integer column may not represent a numerical feature butrepresent different numerical codes instead.Very restrictive to disable, since then even columns with few categorical levels that happen to be numericalin value will not be encoded like a categorical. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting num_as_cat", + "output": "num as cat refers to Whether to treat some numerical features as categorical.For instance, sometimes an integer column may not represent a numerical feature butrepresent different numerical codes instead.Very restrictive to disable, since then even columns with few categorical levels that happen to be numericalin value will not be encoded like a categorical. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting num_as_cat", + "output": "num as cat refers to Allow treating numerical as categorical: Whether to treat some numerical features as categorical.For instance, sometimes an integer column may not represent a numerical feature butrepresent different numerical codes instead.Very restrictive to disable, since then even columns with few categorical levels that happen to be numericalin value will not be encoded like a categorical. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max_int_as_cat_uniques", + "output": "max int as cat uniques refers to Max number of unique values for integer/real columns to be treated as categoricals (test applies to first statistical_threshold_data_size_small rows only)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max_int_as_cat_uniques", + "output": "max int as cat uniques refers to Max. number of unique values for int/float to be categoricals: Max number of unique values for integer/real columns to be treated as categoricals (test applies to first statistical_threshold_data_size_small rows only)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max int as cat uniques", + "output": "max int as cat uniques refers to Max. number of unique values for int/float to be categoricals: Max number of unique values for integer/real columns to be treated as categoricals (test applies to first statistical_threshold_data_size_small rows only)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Max. number of unique values for int/float to be categoricals: ", + "output": "max int as cat uniques refers to Max. number of unique values for int/float to be categoricals: Max number of unique values for integer/real columns to be treated as categoricals (test applies to first statistical_threshold_data_size_small rows only)" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting max_int_as_cat_uniques", + "output": "max int as cat uniques refers to Max number of unique values for integer/real columns to be treated as categoricals (test applies to first statistical_threshold_data_size_small rows only)" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting max_int_as_cat_uniques", + "output": "max int as cat uniques refers to Max. number of unique values for int/float to be categoricals: Max number of unique values for integer/real columns to be treated as categoricals (test applies to first statistical_threshold_data_size_small rows only)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max_int_as_cat_uniques_if_not_benford", + "output": "max int as cat uniques if not benford refers to Max number of unique values for integer/real columns to be treated as categoricals (test applies to first statistical_threshold_data_size_small rows only). Applies to integer or real numerical feature that violates Benford's law, and so is ID-like but not entirely an ID." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max_int_as_cat_uniques_if_not_benford", + "output": "max int as cat uniques if not benford refers to Max. number of unique values for int/float to be categoricals if violates Benford's Law: Max number of unique values for integer/real columns to be treated as categoricals (test applies to first statistical_threshold_data_size_small rows only). Applies to integer or real numerical feature that violates Benford's law, and so is ID-like but not entirely an ID." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max int as cat uniques if not benford", + "output": "max int as cat uniques if not benford refers to Max. number of unique values for int/float to be categoricals if violates Benford's Law: Max number of unique values for integer/real columns to be treated as categoricals (test applies to first statistical_threshold_data_size_small rows only). Applies to integer or real numerical feature that violates Benford's law, and so is ID-like but not entirely an ID." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Max. number of unique values for int/float to be categoricals if violates Benford's Law: ", + "output": "max int as cat uniques if not benford refers to Max. number of unique values for int/float to be categoricals if violates Benford's Law: Max number of unique values for integer/real columns to be treated as categoricals (test applies to first statistical_threshold_data_size_small rows only). Applies to integer or real numerical feature that violates Benford's law, and so is ID-like but not entirely an ID." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting max_int_as_cat_uniques_if_not_benford", + "output": "max int as cat uniques if not benford refers to Max number of unique values for integer/real columns to be treated as categoricals (test applies to first statistical_threshold_data_size_small rows only). Applies to integer or real numerical feature that violates Benford's law, and so is ID-like but not entirely an ID." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting max_int_as_cat_uniques_if_not_benford", + "output": "max int as cat uniques if not benford refers to Max. number of unique values for int/float to be categoricals if violates Benford's Law: Max number of unique values for integer/real columns to be treated as categoricals (test applies to first statistical_threshold_data_size_small rows only). Applies to integer or real numerical feature that violates Benford's law, and so is ID-like but not entirely an ID." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max_fraction_invalid_numeric", + "output": "max fraction invalid numeric refers to When the fraction of non-numeric (and non-missing) values is less or equal than this value, consider thecolumn numeric. Can help with minor data quality issues for experimentation, > 0 is not recommended for production,since type inconsistencies can occur. Note: Replaces non-numeric values with missing valuesat start of experiment, so some information is lost, but column is now treated as numeric, which can help.If < 0, then disabled.If == 0, then if number of rows <= max_rows_col_stats, then convert any column of strings of numbers to numeric type. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max_fraction_invalid_numeric", + "output": "max fraction invalid numeric refers to Max. fraction of numeric values to be non-numeric (and not missing) for a column to still be considered numeric: When the fraction of non-numeric (and non-missing) values is less or equal than this value, consider thecolumn numeric. Can help with minor data quality issues for experimentation, > 0 is not recommended for production,since type inconsistencies can occur. Note: Replaces non-numeric values with missing valuesat start of experiment, so some information is lost, but column is now treated as numeric, which can help.If < 0, then disabled.If == 0, then if number of rows <= max_rows_col_stats, then convert any column of strings of numbers to numeric type. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max fraction invalid numeric", + "output": "max fraction invalid numeric refers to Max. fraction of numeric values to be non-numeric (and not missing) for a column to still be considered numeric: When the fraction of non-numeric (and non-missing) values is less or equal than this value, consider thecolumn numeric. Can help with minor data quality issues for experimentation, > 0 is not recommended for production,since type inconsistencies can occur. Note: Replaces non-numeric values with missing valuesat start of experiment, so some information is lost, but column is now treated as numeric, which can help.If < 0, then disabled.If == 0, then if number of rows <= max_rows_col_stats, then convert any column of strings of numbers to numeric type. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Max. fraction of numeric values to be non-numeric (and not missing) for a column to still be considered numeric: ", + "output": "max fraction invalid numeric refers to Max. fraction of numeric values to be non-numeric (and not missing) for a column to still be considered numeric: When the fraction of non-numeric (and non-missing) values is less or equal than this value, consider thecolumn numeric. Can help with minor data quality issues for experimentation, > 0 is not recommended for production,since type inconsistencies can occur. Note: Replaces non-numeric values with missing valuesat start of experiment, so some information is lost, but column is now treated as numeric, which can help.If < 0, then disabled.If == 0, then if number of rows <= max_rows_col_stats, then convert any column of strings of numbers to numeric type. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting max_fraction_invalid_numeric", + "output": "max fraction invalid numeric refers to When the fraction of non-numeric (and non-missing) values is less or equal than this value, consider thecolumn numeric. Can help with minor data quality issues for experimentation, > 0 is not recommended for production,since type inconsistencies can occur. Note: Replaces non-numeric values with missing valuesat start of experiment, so some information is lost, but column is now treated as numeric, which can help.If < 0, then disabled.If == 0, then if number of rows <= max_rows_col_stats, then convert any column of strings of numbers to numeric type. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting max_fraction_invalid_numeric", + "output": "max fraction invalid numeric refers to Max. fraction of numeric values to be non-numeric (and not missing) for a column to still be considered numeric: When the fraction of non-numeric (and non-missing) values is less or equal than this value, consider thecolumn numeric. Can help with minor data quality issues for experimentation, > 0 is not recommended for production,since type inconsistencies can occur. Note: Replaces non-numeric values with missing valuesat start of experiment, so some information is lost, but column is now treated as numeric, which can help.If < 0, then disabled.If == 0, then if number of rows <= max_rows_col_stats, then convert any column of strings of numbers to numeric type. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "num_folds", + "output": "num folds refers to Number of folds for models used during the feature engineering process.Increasing this will put a lower fraction of data into validation and more into training(e.g., num_folds=3 means 67%/33% training/validation splits).Actual value will vary for small or big data cases. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "num_folds", + "output": "num folds refers to Number of folds for models used during the feature engineering process.Increasing this will put a lower fraction of data into validation and more into training(e.g., num_folds=3 means 67%/33% training/validation splits).Actual value will vary for small or big data cases. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "num folds", + "output": "num folds refers to Number of folds for models used during the feature engineering process.Increasing this will put a lower fraction of data into validation and more into training(e.g., num_folds=3 means 67%/33% training/validation splits).Actual value will vary for small or big data cases. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "num folds refers to Number of folds for models used during the feature engineering process.Increasing this will put a lower fraction of data into validation and more into training(e.g., num_folds=3 means 67%/33% training/validation splits).Actual value will vary for small or big data cases. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting num_folds", + "output": "num folds refers to Number of folds for models used during the feature engineering process.Increasing this will put a lower fraction of data into validation and more into training(e.g., num_folds=3 means 67%/33% training/validation splits).Actual value will vary for small or big data cases. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting num_folds", + "output": "num folds refers to Number of folds for models used during the feature engineering process.Increasing this will put a lower fraction of data into validation and more into training(e.g., num_folds=3 means 67%/33% training/validation splits).Actual value will vary for small or big data cases. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "allow_different_classes_across_fold_splits", + "output": "allow different classes across fold splits refers to For multiclass problems only. Whether to allow different sets of target classes across (cross-)validationfold splits. Especially important when passing a fold column that isn't balanced w.r.t class distribution. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "allow_different_classes_across_fold_splits", + "output": "allow different classes across fold splits refers to Allow different sets of classes across all train/validation fold splits: For multiclass problems only. Whether to allow different sets of target classes across (cross-)validationfold splits. Especially important when passing a fold column that isn't balanced w.r.t class distribution. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "allow different classes across fold splits", + "output": "allow different classes across fold splits refers to Allow different sets of classes across all train/validation fold splits: For multiclass problems only. Whether to allow different sets of target classes across (cross-)validationfold splits. Especially important when passing a fold column that isn't balanced w.r.t class distribution. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Allow different sets of classes across all train/validation fold splits: ", + "output": "allow different classes across fold splits refers to Allow different sets of classes across all train/validation fold splits: For multiclass problems only. Whether to allow different sets of target classes across (cross-)validationfold splits. Especially important when passing a fold column that isn't balanced w.r.t class distribution. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting allow_different_classes_across_fold_splits", + "output": "allow different classes across fold splits refers to For multiclass problems only. Whether to allow different sets of target classes across (cross-)validationfold splits. Especially important when passing a fold column that isn't balanced w.r.t class distribution. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting allow_different_classes_across_fold_splits", + "output": "allow different classes across fold splits refers to Allow different sets of classes across all train/validation fold splits: For multiclass problems only. Whether to allow different sets of target classes across (cross-)validationfold splits. Especially important when passing a fold column that isn't balanced w.r.t class distribution. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "full_cv_accuracy_switch", + "output": "full cv accuracy switch refers to Accuracy setting equal and above which enables full cross-validation (multiple folds) during feature evolutionas opposed to only a single holdout split (e.g. 2/3 train and 1/3 validation holdout) " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "full_cv_accuracy_switch", + "output": "full cv accuracy switch refers to Accuracy setting equal and above which enables full cross-validation (multiple folds) during feature evolutionas opposed to only a single holdout split (e.g. 2/3 train and 1/3 validation holdout) " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "full cv accuracy switch", + "output": "full cv accuracy switch refers to Accuracy setting equal and above which enables full cross-validation (multiple folds) during feature evolutionas opposed to only a single holdout split (e.g. 2/3 train and 1/3 validation holdout) " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "full cv accuracy switch refers to Accuracy setting equal and above which enables full cross-validation (multiple folds) during feature evolutionas opposed to only a single holdout split (e.g. 2/3 train and 1/3 validation holdout) " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting full_cv_accuracy_switch", + "output": "full cv accuracy switch refers to Accuracy setting equal and above which enables full cross-validation (multiple folds) during feature evolutionas opposed to only a single holdout split (e.g. 2/3 train and 1/3 validation holdout) " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting full_cv_accuracy_switch", + "output": "full cv accuracy switch refers to Accuracy setting equal and above which enables full cross-validation (multiple folds) during feature evolutionas opposed to only a single holdout split (e.g. 2/3 train and 1/3 validation holdout) " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "ensemble_accuracy_switch", + "output": "ensemble accuracy switch refers to Accuracy setting equal and above which enables stacked ensemble as final model.Stacking commences at the end of the feature evolution process..It quite often leads to better model performance, but it does increase the complexityand execution time of the final model. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "ensemble_accuracy_switch", + "output": "ensemble accuracy switch refers to Accuracy setting equal and above which enables stacked ensemble as final model.Stacking commences at the end of the feature evolution process..It quite often leads to better model performance, but it does increase the complexityand execution time of the final model. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "ensemble accuracy switch", + "output": "ensemble accuracy switch refers to Accuracy setting equal and above which enables stacked ensemble as final model.Stacking commences at the end of the feature evolution process..It quite often leads to better model performance, but it does increase the complexityand execution time of the final model. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "ensemble accuracy switch refers to Accuracy setting equal and above which enables stacked ensemble as final model.Stacking commences at the end of the feature evolution process..It quite often leads to better model performance, but it does increase the complexityand execution time of the final model. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting ensemble_accuracy_switch", + "output": "ensemble accuracy switch refers to Accuracy setting equal and above which enables stacked ensemble as final model.Stacking commences at the end of the feature evolution process..It quite often leads to better model performance, but it does increase the complexityand execution time of the final model. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting ensemble_accuracy_switch", + "output": "ensemble accuracy switch refers to Accuracy setting equal and above which enables stacked ensemble as final model.Stacking commences at the end of the feature evolution process..It quite often leads to better model performance, but it does increase the complexityand execution time of the final model. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "num_ensemble_folds", + "output": "num ensemble folds refers to Number of fold splits to use for ensemble_level >= 2.The ensemble modelling may require predictions to be made on out-of-fold sampleshence the data needs to be split on different folds to generate these predictions.Less folds (like 2 or 3) normally create more stable models, but may be less accurateMore folds can get to higher accuracy at the expense of more time, but the performancemay be less stable when the training data is not enough (i.e. higher chance of overfitting).Actual value will vary for small or big data cases. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "num_ensemble_folds", + "output": "num ensemble folds refers to Number of fold splits to use for ensemble_level >= 2.The ensemble modelling may require predictions to be made on out-of-fold sampleshence the data needs to be split on different folds to generate these predictions.Less folds (like 2 or 3) normally create more stable models, but may be less accurateMore folds can get to higher accuracy at the expense of more time, but the performancemay be less stable when the training data is not enough (i.e. higher chance of overfitting).Actual value will vary for small or big data cases. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "num ensemble folds", + "output": "num ensemble folds refers to Number of fold splits to use for ensemble_level >= 2.The ensemble modelling may require predictions to be made on out-of-fold sampleshence the data needs to be split on different folds to generate these predictions.Less folds (like 2 or 3) normally create more stable models, but may be less accurateMore folds can get to higher accuracy at the expense of more time, but the performancemay be less stable when the training data is not enough (i.e. higher chance of overfitting).Actual value will vary for small or big data cases. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "num ensemble folds refers to Number of fold splits to use for ensemble_level >= 2.The ensemble modelling may require predictions to be made on out-of-fold sampleshence the data needs to be split on different folds to generate these predictions.Less folds (like 2 or 3) normally create more stable models, but may be less accurateMore folds can get to higher accuracy at the expense of more time, but the performancemay be less stable when the training data is not enough (i.e. higher chance of overfitting).Actual value will vary for small or big data cases. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting num_ensemble_folds", + "output": "num ensemble folds refers to Number of fold splits to use for ensemble_level >= 2.The ensemble modelling may require predictions to be made on out-of-fold sampleshence the data needs to be split on different folds to generate these predictions.Less folds (like 2 or 3) normally create more stable models, but may be less accurateMore folds can get to higher accuracy at the expense of more time, but the performancemay be less stable when the training data is not enough (i.e. higher chance of overfitting).Actual value will vary for small or big data cases. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting num_ensemble_folds", + "output": "num ensemble folds refers to Number of fold splits to use for ensemble_level >= 2.The ensemble modelling may require predictions to be made on out-of-fold sampleshence the data needs to be split on different folds to generate these predictions.Less folds (like 2 or 3) normally create more stable models, but may be less accurateMore folds can get to higher accuracy at the expense of more time, but the performancemay be less stable when the training data is not enough (i.e. higher chance of overfitting).Actual value will vary for small or big data cases. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "save_validation_splits", + "output": "save validation splits refers to Includes pickles of (train_idx, valid_idx) tuples (numpy row indices for original training data)for all internal validation folds in the experiment summary zip. For debugging. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "save_validation_splits", + "output": "save validation splits refers to Store internal validation split row indices: Includes pickles of (train_idx, valid_idx) tuples (numpy row indices for original training data)for all internal validation folds in the experiment summary zip. For debugging. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "save validation splits", + "output": "save validation splits refers to Store internal validation split row indices: Includes pickles of (train_idx, valid_idx) tuples (numpy row indices for original training data)for all internal validation folds in the experiment summary zip. For debugging. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Store internal validation split row indices: ", + "output": "save validation splits refers to Store internal validation split row indices: Includes pickles of (train_idx, valid_idx) tuples (numpy row indices for original training data)for all internal validation folds in the experiment summary zip. For debugging. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting save_validation_splits", + "output": "save validation splits refers to Includes pickles of (train_idx, valid_idx) tuples (numpy row indices for original training data)for all internal validation folds in the experiment summary zip. For debugging. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting save_validation_splits", + "output": "save validation splits refers to Store internal validation split row indices: Includes pickles of (train_idx, valid_idx) tuples (numpy row indices for original training data)for all internal validation folds in the experiment summary zip. For debugging. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "fold_reps", + "output": "fold reps refers to Number of repeats for each fold for all validation(modified slightly for small or big data cases) " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "fold_reps", + "output": "fold reps refers to Number of repeats for each fold for all validation(modified slightly for small or big data cases) " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "fold reps", + "output": "fold reps refers to Number of repeats for each fold for all validation(modified slightly for small or big data cases) " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "fold reps refers to Number of repeats for each fold for all validation(modified slightly for small or big data cases) " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting fold_reps", + "output": "fold reps refers to Number of repeats for each fold for all validation(modified slightly for small or big data cases) " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting fold_reps", + "output": "fold reps refers to Number of repeats for each fold for all validation(modified slightly for small or big data cases) " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max_num_classes", + "output": "max num classes refers to Maximum number of classes to allow for a classification problem.High number of classes may make certain processes of Driverless AI time-consuming.Memory requirements also increase with higher number of classes " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max_num_classes", + "output": "max num classes refers to Max. number of classes for classification problems: Maximum number of classes to allow for a classification problem.High number of classes may make certain processes of Driverless AI time-consuming.Memory requirements also increase with higher number of classes " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max num classes", + "output": "max num classes refers to Max. number of classes for classification problems: Maximum number of classes to allow for a classification problem.High number of classes may make certain processes of Driverless AI time-consuming.Memory requirements also increase with higher number of classes " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Max. number of classes for classification problems: ", + "output": "max num classes refers to Max. number of classes for classification problems: Maximum number of classes to allow for a classification problem.High number of classes may make certain processes of Driverless AI time-consuming.Memory requirements also increase with higher number of classes " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting max_num_classes", + "output": "max num classes refers to Maximum number of classes to allow for a classification problem.High number of classes may make certain processes of Driverless AI time-consuming.Memory requirements also increase with higher number of classes " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting max_num_classes", + "output": "max num classes refers to Max. number of classes for classification problems: Maximum number of classes to allow for a classification problem.High number of classes may make certain processes of Driverless AI time-consuming.Memory requirements also increase with higher number of classes " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max_num_classes_compute_roc", + "output": "max num classes compute roc refers to Maximum number of classes to compute ROC and CM for,beyond which roc_reduce_type choice for reduction is applied.Too many classes can take much longer than model building time. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max_num_classes_compute_roc", + "output": "max num classes compute roc refers to Max. number of classes to compute ROC and confusion matrix for classification problems: Maximum number of classes to compute ROC and CM for,beyond which roc_reduce_type choice for reduction is applied.Too many classes can take much longer than model building time. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max num classes compute roc", + "output": "max num classes compute roc refers to Max. number of classes to compute ROC and confusion matrix for classification problems: Maximum number of classes to compute ROC and CM for,beyond which roc_reduce_type choice for reduction is applied.Too many classes can take much longer than model building time. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Max. number of classes to compute ROC and confusion matrix for classification problems: ", + "output": "max num classes compute roc refers to Max. number of classes to compute ROC and confusion matrix for classification problems: Maximum number of classes to compute ROC and CM for,beyond which roc_reduce_type choice for reduction is applied.Too many classes can take much longer than model building time. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting max_num_classes_compute_roc", + "output": "max num classes compute roc refers to Maximum number of classes to compute ROC and CM for,beyond which roc_reduce_type choice for reduction is applied.Too many classes can take much longer than model building time. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting max_num_classes_compute_roc", + "output": "max num classes compute roc refers to Max. number of classes to compute ROC and confusion matrix for classification problems: Maximum number of classes to compute ROC and CM for,beyond which roc_reduce_type choice for reduction is applied.Too many classes can take much longer than model building time. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max_num_classes_client_and_gui", + "output": "max num classes client and gui refers to Maximum number of classes to show in GUI for confusion matrix, showing first max_num_classes_client_and_gui labels.Beyond 6 classes the diagnostics launched from GUI are visually truncated.This will only modify client-GUI launched diagnostics if changed in config.toml and server is restarted,while this value can be changed in expert settings to control experiment plots. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max_num_classes_client_and_gui", + "output": "max num classes client and gui refers to Max. number of classes to show in GUI for confusion matrix: Maximum number of classes to show in GUI for confusion matrix, showing first max_num_classes_client_and_gui labels.Beyond 6 classes the diagnostics launched from GUI are visually truncated.This will only modify client-GUI launched diagnostics if changed in config.toml and server is restarted,while this value can be changed in expert settings to control experiment plots. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max num classes client and gui", + "output": "max num classes client and gui refers to Max. number of classes to show in GUI for confusion matrix: Maximum number of classes to show in GUI for confusion matrix, showing first max_num_classes_client_and_gui labels.Beyond 6 classes the diagnostics launched from GUI are visually truncated.This will only modify client-GUI launched diagnostics if changed in config.toml and server is restarted,while this value can be changed in expert settings to control experiment plots. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Max. number of classes to show in GUI for confusion matrix: ", + "output": "max num classes client and gui refers to Max. number of classes to show in GUI for confusion matrix: Maximum number of classes to show in GUI for confusion matrix, showing first max_num_classes_client_and_gui labels.Beyond 6 classes the diagnostics launched from GUI are visually truncated.This will only modify client-GUI launched diagnostics if changed in config.toml and server is restarted,while this value can be changed in expert settings to control experiment plots. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting max_num_classes_client_and_gui", + "output": "max num classes client and gui refers to Maximum number of classes to show in GUI for confusion matrix, showing first max_num_classes_client_and_gui labels.Beyond 6 classes the diagnostics launched from GUI are visually truncated.This will only modify client-GUI launched diagnostics if changed in config.toml and server is restarted,while this value can be changed in expert settings to control experiment plots. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting max_num_classes_client_and_gui", + "output": "max num classes client and gui refers to Max. number of classes to show in GUI for confusion matrix: Maximum number of classes to show in GUI for confusion matrix, showing first max_num_classes_client_and_gui labels.Beyond 6 classes the diagnostics launched from GUI are visually truncated.This will only modify client-GUI launched diagnostics if changed in config.toml and server is restarted,while this value can be changed in expert settings to control experiment plots. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "roc_reduce_type", + "output": "roc reduce type refers to If too many classes when computing roc,reduce by \"rows\" by randomly sampling rows,or reduce by truncating classes to no more than max_num_classes_compute_roc.If have sufficient rows for class count, can reduce by rows. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "roc_reduce_type", + "output": "roc reduce type refers to ROC/CM reduction technique for large class counts: If too many classes when computing roc,reduce by \"rows\" by randomly sampling rows,or reduce by truncating classes to no more than max_num_classes_compute_roc.If have sufficient rows for class count, can reduce by rows. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "roc reduce type", + "output": "roc reduce type refers to ROC/CM reduction technique for large class counts: If too many classes when computing roc,reduce by \"rows\" by randomly sampling rows,or reduce by truncating classes to no more than max_num_classes_compute_roc.If have sufficient rows for class count, can reduce by rows. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "ROC/CM reduction technique for large class counts: ", + "output": "roc reduce type refers to ROC/CM reduction technique for large class counts: If too many classes when computing roc,reduce by \"rows\" by randomly sampling rows,or reduce by truncating classes to no more than max_num_classes_compute_roc.If have sufficient rows for class count, can reduce by rows. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting roc_reduce_type", + "output": "roc reduce type refers to If too many classes when computing roc,reduce by \"rows\" by randomly sampling rows,or reduce by truncating classes to no more than max_num_classes_compute_roc.If have sufficient rows for class count, can reduce by rows. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting roc_reduce_type", + "output": "roc reduce type refers to ROC/CM reduction technique for large class counts: If too many classes when computing roc,reduce by \"rows\" by randomly sampling rows,or reduce by truncating classes to no more than max_num_classes_compute_roc.If have sufficient rows for class count, can reduce by rows. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max_rows_cm_ga", + "output": "max rows cm ga refers to Maximum number of rows to obtain confusion matrix related plots during feature evolution.Does not limit final model calculation. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max_rows_cm_ga", + "output": "max rows cm ga refers to Maximum number of rows to obtain confusion matrix related plots during feature evolution: Maximum number of rows to obtain confusion matrix related plots during feature evolution.Does not limit final model calculation. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max rows cm ga", + "output": "max rows cm ga refers to Maximum number of rows to obtain confusion matrix related plots during feature evolution: Maximum number of rows to obtain confusion matrix related plots during feature evolution.Does not limit final model calculation. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Maximum number of rows to obtain confusion matrix related plots during feature evolution: ", + "output": "max rows cm ga refers to Maximum number of rows to obtain confusion matrix related plots during feature evolution: Maximum number of rows to obtain confusion matrix related plots during feature evolution.Does not limit final model calculation. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting max_rows_cm_ga", + "output": "max rows cm ga refers to Maximum number of rows to obtain confusion matrix related plots during feature evolution.Does not limit final model calculation. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting max_rows_cm_ga", + "output": "max rows cm ga refers to Maximum number of rows to obtain confusion matrix related plots during feature evolution: Maximum number of rows to obtain confusion matrix related plots during feature evolution.Does not limit final model calculation. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "num_actuals_vs_predicted", + "output": "num actuals vs predicted refers to Number of actuals vs. predicted data points to use in order to generate in the relevant plot/graph which is shown at the right part of the screen within an experiment." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "num_actuals_vs_predicted", + "output": "num actuals vs predicted refers to Number of actuals vs. predicted data points to use in order to generate in the relevant plot/graph which is shown at the right part of the screen within an experiment." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "num actuals vs predicted", + "output": "num actuals vs predicted refers to Number of actuals vs. predicted data points to use in order to generate in the relevant plot/graph which is shown at the right part of the screen within an experiment." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "num actuals vs predicted refers to Number of actuals vs. predicted data points to use in order to generate in the relevant plot/graph which is shown at the right part of the screen within an experiment." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting num_actuals_vs_predicted", + "output": "num actuals vs predicted refers to Number of actuals vs. predicted data points to use in order to generate in the relevant plot/graph which is shown at the right part of the screen within an experiment." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting num_actuals_vs_predicted", + "output": "num actuals vs predicted refers to Number of actuals vs. predicted data points to use in order to generate in the relevant plot/graph which is shown at the right part of the screen within an experiment." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "use_feature_brain_new_experiments", + "output": "use feature brain new experiments refers to Whether to use feature_brain results even if running new experiments. Feature brain can be risky with some types of changes to experiment setup. Even rescoring may be insufficient, so by default this is False. For example, one experiment may have training=external validation by accident, and get high score, and while feature_brain_reset_score='on' means we will rescore, it will have already seen during training the external validation and leak that data as part of what it learned from. If this is False, feature_brain_level just sets possible models to use and logs/notifies, but does not use these feature brain cached models. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "use_feature_brain_new_experiments", + "output": "use feature brain new experiments refers to Whether to use Feature Brain for new experiments.: Whether to use feature_brain results even if running new experiments. Feature brain can be risky with some types of changes to experiment setup. Even rescoring may be insufficient, so by default this is False. For example, one experiment may have training=external validation by accident, and get high score, and while feature_brain_reset_score='on' means we will rescore, it will have already seen during training the external validation and leak that data as part of what it learned from. If this is False, feature_brain_level just sets possible models to use and logs/notifies, but does not use these feature brain cached models. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "use feature brain new experiments", + "output": "use feature brain new experiments refers to Whether to use Feature Brain for new experiments.: Whether to use feature_brain results even if running new experiments. Feature brain can be risky with some types of changes to experiment setup. Even rescoring may be insufficient, so by default this is False. For example, one experiment may have training=external validation by accident, and get high score, and while feature_brain_reset_score='on' means we will rescore, it will have already seen during training the external validation and leak that data as part of what it learned from. If this is False, feature_brain_level just sets possible models to use and logs/notifies, but does not use these feature brain cached models. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Whether to use Feature Brain for new experiments.: ", + "output": "use feature brain new experiments refers to Whether to use Feature Brain for new experiments.: Whether to use feature_brain results even if running new experiments. Feature brain can be risky with some types of changes to experiment setup. Even rescoring may be insufficient, so by default this is False. For example, one experiment may have training=external validation by accident, and get high score, and while feature_brain_reset_score='on' means we will rescore, it will have already seen during training the external validation and leak that data as part of what it learned from. If this is False, feature_brain_level just sets possible models to use and logs/notifies, but does not use these feature brain cached models. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting use_feature_brain_new_experiments", + "output": "use feature brain new experiments refers to Whether to use feature_brain results even if running new experiments. Feature brain can be risky with some types of changes to experiment setup. Even rescoring may be insufficient, so by default this is False. For example, one experiment may have training=external validation by accident, and get high score, and while feature_brain_reset_score='on' means we will rescore, it will have already seen during training the external validation and leak that data as part of what it learned from. If this is False, feature_brain_level just sets possible models to use and logs/notifies, but does not use these feature brain cached models. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting use_feature_brain_new_experiments", + "output": "use feature brain new experiments refers to Whether to use Feature Brain for new experiments.: Whether to use feature_brain results even if running new experiments. Feature brain can be risky with some types of changes to experiment setup. Even rescoring may be insufficient, so by default this is False. For example, one experiment may have training=external validation by accident, and get high score, and while feature_brain_reset_score='on' means we will rescore, it will have already seen during training the external validation and leak that data as part of what it learned from. If this is False, feature_brain_level just sets possible models to use and logs/notifies, but does not use these feature brain cached models. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "resume_data_schema", + "output": "resume data schema refers to Whether reuse dataset schema, such as data types set in UI for each column, from parent experiment ('on') or to ignore original dataset schema and only use new schema ('off').resume_data_schema=True is a basic form of data lineage, but it may not be desirable if data colunn names changed to incompatible data types like int to string.'auto': for restart, retrain final pipeline, or refit best models, default is to resume data schema, but new experiments would not by default reuse old schema.'on': force reuse of data schema from parent experiment if possible'off': don't reuse data schema under any case.The reuse of the column schema can also be disabled by:in UI: selecting Parent Experiment as Nonein client: setting resume_experiment_id to None" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "resume_data_schema", + "output": "resume data schema refers to Whether to reuse dataset schema.: Whether reuse dataset schema, such as data types set in UI for each column, from parent experiment ('on') or to ignore original dataset schema and only use new schema ('off').resume_data_schema=True is a basic form of data lineage, but it may not be desirable if data colunn names changed to incompatible data types like int to string.'auto': for restart, retrain final pipeline, or refit best models, default is to resume data schema, but new experiments would not by default reuse old schema.'on': force reuse of data schema from parent experiment if possible'off': don't reuse data schema under any case.The reuse of the column schema can also be disabled by:in UI: selecting Parent Experiment as Nonein client: setting resume_experiment_id to None" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "resume data schema", + "output": "resume data schema refers to Whether to reuse dataset schema.: Whether reuse dataset schema, such as data types set in UI for each column, from parent experiment ('on') or to ignore original dataset schema and only use new schema ('off').resume_data_schema=True is a basic form of data lineage, but it may not be desirable if data colunn names changed to incompatible data types like int to string.'auto': for restart, retrain final pipeline, or refit best models, default is to resume data schema, but new experiments would not by default reuse old schema.'on': force reuse of data schema from parent experiment if possible'off': don't reuse data schema under any case.The reuse of the column schema can also be disabled by:in UI: selecting Parent Experiment as Nonein client: setting resume_experiment_id to None" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Whether to reuse dataset schema.: ", + "output": "resume data schema refers to Whether to reuse dataset schema.: Whether reuse dataset schema, such as data types set in UI for each column, from parent experiment ('on') or to ignore original dataset schema and only use new schema ('off').resume_data_schema=True is a basic form of data lineage, but it may not be desirable if data colunn names changed to incompatible data types like int to string.'auto': for restart, retrain final pipeline, or refit best models, default is to resume data schema, but new experiments would not by default reuse old schema.'on': force reuse of data schema from parent experiment if possible'off': don't reuse data schema under any case.The reuse of the column schema can also be disabled by:in UI: selecting Parent Experiment as Nonein client: setting resume_experiment_id to None" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting resume_data_schema", + "output": "resume data schema refers to Whether reuse dataset schema, such as data types set in UI for each column, from parent experiment ('on') or to ignore original dataset schema and only use new schema ('off').resume_data_schema=True is a basic form of data lineage, but it may not be desirable if data colunn names changed to incompatible data types like int to string.'auto': for restart, retrain final pipeline, or refit best models, default is to resume data schema, but new experiments would not by default reuse old schema.'on': force reuse of data schema from parent experiment if possible'off': don't reuse data schema under any case.The reuse of the column schema can also be disabled by:in UI: selecting Parent Experiment as Nonein client: setting resume_experiment_id to None" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting resume_data_schema", + "output": "resume data schema refers to Whether to reuse dataset schema.: Whether reuse dataset schema, such as data types set in UI for each column, from parent experiment ('on') or to ignore original dataset schema and only use new schema ('off').resume_data_schema=True is a basic form of data lineage, but it may not be desirable if data colunn names changed to incompatible data types like int to string.'auto': for restart, retrain final pipeline, or refit best models, default is to resume data schema, but new experiments would not by default reuse old schema.'on': force reuse of data schema from parent experiment if possible'off': don't reuse data schema under any case.The reuse of the column schema can also be disabled by:in UI: selecting Parent Experiment as Nonein client: setting resume_experiment_id to None" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "feature_brain_level", + "output": "feature brain level refers to Whether to show (or use) results from H2O.ai brain: the local caching and smart re-use of prior experiments,in order to generate more useful features and models for new experiments.See use_feature_brain_new_experiments for how new experiments by default do not use brain cache.It can also be used to control checkpointing for experiments that have been paused or interrupted.DAI will use H2O.ai brain cache if cache file hasa) any matching column names and types for a similar experiment typeb) exactly matches classesc) exactly matches class labelsd) matches basic time series choicese) interpretability of cache is equal or lowerf) main model (booster) is allowed by new experiment.Level of brain to use (for chosen level, where higher levels will also do all lower level operations automatically)-1 = Don't use any brain cache and don't write any cache0 = Don't use any brain cache but still write cache Use case: Want to save model for later use, but want current model to be built without any brain models1 = smart checkpoint from latest best individual model Use case: Want to use latest matching model, but match can be loose, so needs caution2 = smart checkpoint from H2O.ai brain cache of individual best models Use case: DAI scans through H2O.ai brain cache for best models to restart from3 = smart checkpoint like level #1, but for entire population. Tune only if brain population insufficient size (will re-score entire population in single iteration, so appears to take longer to complete first iteration)4 = smart checkpoint like level #2, but for entire population. Tune only if brain population insufficient size (will re-score entire population in single iteration, so appears to take longer to complete first iteration)5 = like #4, but will scan over entire brain cache of populations to get best scored individuals (can be slower due to brain cache scanning if big cache)1000 + feature_brain_level (above positive values) = use resumed_experiment_id and actual feature_brain_level, to use other specific experiment as base for individuals or population, instead of sampling from any old experimentsGUI has 3 options and corresponding settings:1) New Experiment: Uses feature brain level default of 22) New Experiment With Same Settings: Re-uses the same feature brain level as parent experiment3) Restart From Last Checkpoint: Resets feature brain level to 1003 and sets experiment ID to resume from (continued genetic algorithm iterations)4) Retrain Final Pipeline: Like Restart but also time=0 so skips any tuning and heads straight to final model (assumes had at least one tuning iteration in parent experiment)Other use cases:a) Restart on different data: Use same column names and fewer or more rows (applicable to 1 - 5)b) Re-fit only final pipeline: Like (a), but choose time=1 and feature_brain_level=3 - 5c) Restart with more columns: Add columns, so model builds upon old model built from old column names (1 - 5)d) Restart with focus on model tuning: Restart, then select feature_engineering_effort = 3 in expert settingse) can retrain final model but ignore any original features except those in final pipeline (normal retrain but set brain_add_features_for_new_columns=false)Notes:1) In all cases, we first check the resumed experiment id if given, and then the brain cache2) For Restart cases, may want to set min_dai_iterations to non-zero to force delayed early stopping, else may not be enough iterations to find better model.3) A \"New experiment with Same Settings\" of a Restart will use feature_brain_level=1003 for default Restart mode (revert to 2, or even 0 if want to start a fresh experiment otherwise)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "feature_brain_level", + "output": "feature brain level refers to Model/Feature Brain Level (0..10): Whether to show (or use) results from H2O.ai brain: the local caching and smart re-use of prior experiments,in order to generate more useful features and models for new experiments.See use_feature_brain_new_experiments for how new experiments by default do not use brain cache.It can also be used to control checkpointing for experiments that have been paused or interrupted.DAI will use H2O.ai brain cache if cache file hasa) any matching column names and types for a similar experiment typeb) exactly matches classesc) exactly matches class labelsd) matches basic time series choicese) interpretability of cache is equal or lowerf) main model (booster) is allowed by new experiment.Level of brain to use (for chosen level, where higher levels will also do all lower level operations automatically)-1 = Don't use any brain cache and don't write any cache0 = Don't use any brain cache but still write cache Use case: Want to save model for later use, but want current model to be built without any brain models1 = smart checkpoint from latest best individual model Use case: Want to use latest matching model, but match can be loose, so needs caution2 = smart checkpoint from H2O.ai brain cache of individual best models Use case: DAI scans through H2O.ai brain cache for best models to restart from3 = smart checkpoint like level #1, but for entire population. Tune only if brain population insufficient size (will re-score entire population in single iteration, so appears to take longer to complete first iteration)4 = smart checkpoint like level #2, but for entire population. Tune only if brain population insufficient size (will re-score entire population in single iteration, so appears to take longer to complete first iteration)5 = like #4, but will scan over entire brain cache of populations to get best scored individuals (can be slower due to brain cache scanning if big cache)1000 + feature_brain_level (above positive values) = use resumed_experiment_id and actual feature_brain_level, to use other specific experiment as base for individuals or population, instead of sampling from any old experimentsGUI has 3 options and corresponding settings:1) New Experiment: Uses feature brain level default of 22) New Experiment With Same Settings: Re-uses the same feature brain level as parent experiment3) Restart From Last Checkpoint: Resets feature brain level to 1003 and sets experiment ID to resume from (continued genetic algorithm iterations)4) Retrain Final Pipeline: Like Restart but also time=0 so skips any tuning and heads straight to final model (assumes had at least one tuning iteration in parent experiment)Other use cases:a) Restart on different data: Use same column names and fewer or more rows (applicable to 1 - 5)b) Re-fit only final pipeline: Like (a), but choose time=1 and feature_brain_level=3 - 5c) Restart with more columns: Add columns, so model builds upon old model built from old column names (1 - 5)d) Restart with focus on model tuning: Restart, then select feature_engineering_effort = 3 in expert settingse) can retrain final model but ignore any original features except those in final pipeline (normal retrain but set brain_add_features_for_new_columns=false)Notes:1) In all cases, we first check the resumed experiment id if given, and then the brain cache2) For Restart cases, may want to set min_dai_iterations to non-zero to force delayed early stopping, else may not be enough iterations to find better model.3) A \"New experiment with Same Settings\" of a Restart will use feature_brain_level=1003 for default Restart mode (revert to 2, or even 0 if want to start a fresh experiment otherwise)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "feature brain level", + "output": "feature brain level refers to Model/Feature Brain Level (0..10): Whether to show (or use) results from H2O.ai brain: the local caching and smart re-use of prior experiments,in order to generate more useful features and models for new experiments.See use_feature_brain_new_experiments for how new experiments by default do not use brain cache.It can also be used to control checkpointing for experiments that have been paused or interrupted.DAI will use H2O.ai brain cache if cache file hasa) any matching column names and types for a similar experiment typeb) exactly matches classesc) exactly matches class labelsd) matches basic time series choicese) interpretability of cache is equal or lowerf) main model (booster) is allowed by new experiment.Level of brain to use (for chosen level, where higher levels will also do all lower level operations automatically)-1 = Don't use any brain cache and don't write any cache0 = Don't use any brain cache but still write cache Use case: Want to save model for later use, but want current model to be built without any brain models1 = smart checkpoint from latest best individual model Use case: Want to use latest matching model, but match can be loose, so needs caution2 = smart checkpoint from H2O.ai brain cache of individual best models Use case: DAI scans through H2O.ai brain cache for best models to restart from3 = smart checkpoint like level #1, but for entire population. Tune only if brain population insufficient size (will re-score entire population in single iteration, so appears to take longer to complete first iteration)4 = smart checkpoint like level #2, but for entire population. Tune only if brain population insufficient size (will re-score entire population in single iteration, so appears to take longer to complete first iteration)5 = like #4, but will scan over entire brain cache of populations to get best scored individuals (can be slower due to brain cache scanning if big cache)1000 + feature_brain_level (above positive values) = use resumed_experiment_id and actual feature_brain_level, to use other specific experiment as base for individuals or population, instead of sampling from any old experimentsGUI has 3 options and corresponding settings:1) New Experiment: Uses feature brain level default of 22) New Experiment With Same Settings: Re-uses the same feature brain level as parent experiment3) Restart From Last Checkpoint: Resets feature brain level to 1003 and sets experiment ID to resume from (continued genetic algorithm iterations)4) Retrain Final Pipeline: Like Restart but also time=0 so skips any tuning and heads straight to final model (assumes had at least one tuning iteration in parent experiment)Other use cases:a) Restart on different data: Use same column names and fewer or more rows (applicable to 1 - 5)b) Re-fit only final pipeline: Like (a), but choose time=1 and feature_brain_level=3 - 5c) Restart with more columns: Add columns, so model builds upon old model built from old column names (1 - 5)d) Restart with focus on model tuning: Restart, then select feature_engineering_effort = 3 in expert settingse) can retrain final model but ignore any original features except those in final pipeline (normal retrain but set brain_add_features_for_new_columns=false)Notes:1) In all cases, we first check the resumed experiment id if given, and then the brain cache2) For Restart cases, may want to set min_dai_iterations to non-zero to force delayed early stopping, else may not be enough iterations to find better model.3) A \"New experiment with Same Settings\" of a Restart will use feature_brain_level=1003 for default Restart mode (revert to 2, or even 0 if want to start a fresh experiment otherwise)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Model/Feature Brain Level (0..10): ", + "output": "feature brain level refers to Model/Feature Brain Level (0..10): Whether to show (or use) results from H2O.ai brain: the local caching and smart re-use of prior experiments,in order to generate more useful features and models for new experiments.See use_feature_brain_new_experiments for how new experiments by default do not use brain cache.It can also be used to control checkpointing for experiments that have been paused or interrupted.DAI will use H2O.ai brain cache if cache file hasa) any matching column names and types for a similar experiment typeb) exactly matches classesc) exactly matches class labelsd) matches basic time series choicese) interpretability of cache is equal or lowerf) main model (booster) is allowed by new experiment.Level of brain to use (for chosen level, where higher levels will also do all lower level operations automatically)-1 = Don't use any brain cache and don't write any cache0 = Don't use any brain cache but still write cache Use case: Want to save model for later use, but want current model to be built without any brain models1 = smart checkpoint from latest best individual model Use case: Want to use latest matching model, but match can be loose, so needs caution2 = smart checkpoint from H2O.ai brain cache of individual best models Use case: DAI scans through H2O.ai brain cache for best models to restart from3 = smart checkpoint like level #1, but for entire population. Tune only if brain population insufficient size (will re-score entire population in single iteration, so appears to take longer to complete first iteration)4 = smart checkpoint like level #2, but for entire population. Tune only if brain population insufficient size (will re-score entire population in single iteration, so appears to take longer to complete first iteration)5 = like #4, but will scan over entire brain cache of populations to get best scored individuals (can be slower due to brain cache scanning if big cache)1000 + feature_brain_level (above positive values) = use resumed_experiment_id and actual feature_brain_level, to use other specific experiment as base for individuals or population, instead of sampling from any old experimentsGUI has 3 options and corresponding settings:1) New Experiment: Uses feature brain level default of 22) New Experiment With Same Settings: Re-uses the same feature brain level as parent experiment3) Restart From Last Checkpoint: Resets feature brain level to 1003 and sets experiment ID to resume from (continued genetic algorithm iterations)4) Retrain Final Pipeline: Like Restart but also time=0 so skips any tuning and heads straight to final model (assumes had at least one tuning iteration in parent experiment)Other use cases:a) Restart on different data: Use same column names and fewer or more rows (applicable to 1 - 5)b) Re-fit only final pipeline: Like (a), but choose time=1 and feature_brain_level=3 - 5c) Restart with more columns: Add columns, so model builds upon old model built from old column names (1 - 5)d) Restart with focus on model tuning: Restart, then select feature_engineering_effort = 3 in expert settingse) can retrain final model but ignore any original features except those in final pipeline (normal retrain but set brain_add_features_for_new_columns=false)Notes:1) In all cases, we first check the resumed experiment id if given, and then the brain cache2) For Restart cases, may want to set min_dai_iterations to non-zero to force delayed early stopping, else may not be enough iterations to find better model.3) A \"New experiment with Same Settings\" of a Restart will use feature_brain_level=1003 for default Restart mode (revert to 2, or even 0 if want to start a fresh experiment otherwise)" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting feature_brain_level", + "output": "feature brain level refers to Whether to show (or use) results from H2O.ai brain: the local caching and smart re-use of prior experiments,in order to generate more useful features and models for new experiments.See use_feature_brain_new_experiments for how new experiments by default do not use brain cache.It can also be used to control checkpointing for experiments that have been paused or interrupted.DAI will use H2O.ai brain cache if cache file hasa) any matching column names and types for a similar experiment typeb) exactly matches classesc) exactly matches class labelsd) matches basic time series choicese) interpretability of cache is equal or lowerf) main model (booster) is allowed by new experiment.Level of brain to use (for chosen level, where higher levels will also do all lower level operations automatically)-1 = Don't use any brain cache and don't write any cache0 = Don't use any brain cache but still write cache Use case: Want to save model for later use, but want current model to be built without any brain models1 = smart checkpoint from latest best individual model Use case: Want to use latest matching model, but match can be loose, so needs caution2 = smart checkpoint from H2O.ai brain cache of individual best models Use case: DAI scans through H2O.ai brain cache for best models to restart from3 = smart checkpoint like level #1, but for entire population. Tune only if brain population insufficient size (will re-score entire population in single iteration, so appears to take longer to complete first iteration)4 = smart checkpoint like level #2, but for entire population. Tune only if brain population insufficient size (will re-score entire population in single iteration, so appears to take longer to complete first iteration)5 = like #4, but will scan over entire brain cache of populations to get best scored individuals (can be slower due to brain cache scanning if big cache)1000 + feature_brain_level (above positive values) = use resumed_experiment_id and actual feature_brain_level, to use other specific experiment as base for individuals or population, instead of sampling from any old experimentsGUI has 3 options and corresponding settings:1) New Experiment: Uses feature brain level default of 22) New Experiment With Same Settings: Re-uses the same feature brain level as parent experiment3) Restart From Last Checkpoint: Resets feature brain level to 1003 and sets experiment ID to resume from (continued genetic algorithm iterations)4) Retrain Final Pipeline: Like Restart but also time=0 so skips any tuning and heads straight to final model (assumes had at least one tuning iteration in parent experiment)Other use cases:a) Restart on different data: Use same column names and fewer or more rows (applicable to 1 - 5)b) Re-fit only final pipeline: Like (a), but choose time=1 and feature_brain_level=3 - 5c) Restart with more columns: Add columns, so model builds upon old model built from old column names (1 - 5)d) Restart with focus on model tuning: Restart, then select feature_engineering_effort = 3 in expert settingse) can retrain final model but ignore any original features except those in final pipeline (normal retrain but set brain_add_features_for_new_columns=false)Notes:1) In all cases, we first check the resumed experiment id if given, and then the brain cache2) For Restart cases, may want to set min_dai_iterations to non-zero to force delayed early stopping, else may not be enough iterations to find better model.3) A \"New experiment with Same Settings\" of a Restart will use feature_brain_level=1003 for default Restart mode (revert to 2, or even 0 if want to start a fresh experiment otherwise)" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting feature_brain_level", + "output": "feature brain level refers to Model/Feature Brain Level (0..10): Whether to show (or use) results from H2O.ai brain: the local caching and smart re-use of prior experiments,in order to generate more useful features and models for new experiments.See use_feature_brain_new_experiments for how new experiments by default do not use brain cache.It can also be used to control checkpointing for experiments that have been paused or interrupted.DAI will use H2O.ai brain cache if cache file hasa) any matching column names and types for a similar experiment typeb) exactly matches classesc) exactly matches class labelsd) matches basic time series choicese) interpretability of cache is equal or lowerf) main model (booster) is allowed by new experiment.Level of brain to use (for chosen level, where higher levels will also do all lower level operations automatically)-1 = Don't use any brain cache and don't write any cache0 = Don't use any brain cache but still write cache Use case: Want to save model for later use, but want current model to be built without any brain models1 = smart checkpoint from latest best individual model Use case: Want to use latest matching model, but match can be loose, so needs caution2 = smart checkpoint from H2O.ai brain cache of individual best models Use case: DAI scans through H2O.ai brain cache for best models to restart from3 = smart checkpoint like level #1, but for entire population. Tune only if brain population insufficient size (will re-score entire population in single iteration, so appears to take longer to complete first iteration)4 = smart checkpoint like level #2, but for entire population. Tune only if brain population insufficient size (will re-score entire population in single iteration, so appears to take longer to complete first iteration)5 = like #4, but will scan over entire brain cache of populations to get best scored individuals (can be slower due to brain cache scanning if big cache)1000 + feature_brain_level (above positive values) = use resumed_experiment_id and actual feature_brain_level, to use other specific experiment as base for individuals or population, instead of sampling from any old experimentsGUI has 3 options and corresponding settings:1) New Experiment: Uses feature brain level default of 22) New Experiment With Same Settings: Re-uses the same feature brain level as parent experiment3) Restart From Last Checkpoint: Resets feature brain level to 1003 and sets experiment ID to resume from (continued genetic algorithm iterations)4) Retrain Final Pipeline: Like Restart but also time=0 so skips any tuning and heads straight to final model (assumes had at least one tuning iteration in parent experiment)Other use cases:a) Restart on different data: Use same column names and fewer or more rows (applicable to 1 - 5)b) Re-fit only final pipeline: Like (a), but choose time=1 and feature_brain_level=3 - 5c) Restart with more columns: Add columns, so model builds upon old model built from old column names (1 - 5)d) Restart with focus on model tuning: Restart, then select feature_engineering_effort = 3 in expert settingse) can retrain final model but ignore any original features except those in final pipeline (normal retrain but set brain_add_features_for_new_columns=false)Notes:1) In all cases, we first check the resumed experiment id if given, and then the brain cache2) For Restart cases, may want to set min_dai_iterations to non-zero to force delayed early stopping, else may not be enough iterations to find better model.3) A \"New experiment with Same Settings\" of a Restart will use feature_brain_level=1003 for default Restart mode (revert to 2, or even 0 if want to start a fresh experiment otherwise)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "feature_brain_reset_score", + "output": "feature brain reset score refers to Whether to smartly keep score to avoid re-munging/re-training/re-scoring steps brain models ('auto'), always force all steps for all brain imports ('on'), or never rescore ('off'). 'auto' only re-scores if a difference in current and prior experiment warrants re-scoring, like column changes, metric changes, etc. 'on' is useful when smart similarity checking is not reliable enough. 'off' is uesful when know want to keep exact same features and model for final model refit, despite changes in seed or other behaviors in features that might change the outcome if re-scored before reaching final model. If set off, then no limits are applied to features during brain ingestion, while can set brain_add_features_for_new_columns to false if want to ignore any new columns in data. In addition, any unscored individuals loaded from parent experiment are not rescored when doing refit or retrain. Can also set refit_same_best_individual True if want exact same best individual (highest scored model+features) to be used regardless of any scoring changes. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "feature_brain_reset_score", + "output": "feature brain reset score refers to Whether to re-score models from brain cache: Whether to smartly keep score to avoid re-munging/re-training/re-scoring steps brain models ('auto'), always force all steps for all brain imports ('on'), or never rescore ('off'). 'auto' only re-scores if a difference in current and prior experiment warrants re-scoring, like column changes, metric changes, etc. 'on' is useful when smart similarity checking is not reliable enough. 'off' is uesful when know want to keep exact same features and model for final model refit, despite changes in seed or other behaviors in features that might change the outcome if re-scored before reaching final model. If set off, then no limits are applied to features during brain ingestion, while can set brain_add_features_for_new_columns to false if want to ignore any new columns in data. In addition, any unscored individuals loaded from parent experiment are not rescored when doing refit or retrain. Can also set refit_same_best_individual True if want exact same best individual (highest scored model+features) to be used regardless of any scoring changes. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "feature brain reset score", + "output": "feature brain reset score refers to Whether to re-score models from brain cache: Whether to smartly keep score to avoid re-munging/re-training/re-scoring steps brain models ('auto'), always force all steps for all brain imports ('on'), or never rescore ('off'). 'auto' only re-scores if a difference in current and prior experiment warrants re-scoring, like column changes, metric changes, etc. 'on' is useful when smart similarity checking is not reliable enough. 'off' is uesful when know want to keep exact same features and model for final model refit, despite changes in seed or other behaviors in features that might change the outcome if re-scored before reaching final model. If set off, then no limits are applied to features during brain ingestion, while can set brain_add_features_for_new_columns to false if want to ignore any new columns in data. In addition, any unscored individuals loaded from parent experiment are not rescored when doing refit or retrain. Can also set refit_same_best_individual True if want exact same best individual (highest scored model+features) to be used regardless of any scoring changes. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Whether to re-score models from brain cache: ", + "output": "feature brain reset score refers to Whether to re-score models from brain cache: Whether to smartly keep score to avoid re-munging/re-training/re-scoring steps brain models ('auto'), always force all steps for all brain imports ('on'), or never rescore ('off'). 'auto' only re-scores if a difference in current and prior experiment warrants re-scoring, like column changes, metric changes, etc. 'on' is useful when smart similarity checking is not reliable enough. 'off' is uesful when know want to keep exact same features and model for final model refit, despite changes in seed or other behaviors in features that might change the outcome if re-scored before reaching final model. If set off, then no limits are applied to features during brain ingestion, while can set brain_add_features_for_new_columns to false if want to ignore any new columns in data. In addition, any unscored individuals loaded from parent experiment are not rescored when doing refit or retrain. Can also set refit_same_best_individual True if want exact same best individual (highest scored model+features) to be used regardless of any scoring changes. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting feature_brain_reset_score", + "output": "feature brain reset score refers to Whether to smartly keep score to avoid re-munging/re-training/re-scoring steps brain models ('auto'), always force all steps for all brain imports ('on'), or never rescore ('off'). 'auto' only re-scores if a difference in current and prior experiment warrants re-scoring, like column changes, metric changes, etc. 'on' is useful when smart similarity checking is not reliable enough. 'off' is uesful when know want to keep exact same features and model for final model refit, despite changes in seed or other behaviors in features that might change the outcome if re-scored before reaching final model. If set off, then no limits are applied to features during brain ingestion, while can set brain_add_features_for_new_columns to false if want to ignore any new columns in data. In addition, any unscored individuals loaded from parent experiment are not rescored when doing refit or retrain. Can also set refit_same_best_individual True if want exact same best individual (highest scored model+features) to be used regardless of any scoring changes. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting feature_brain_reset_score", + "output": "feature brain reset score refers to Whether to re-score models from brain cache: Whether to smartly keep score to avoid re-munging/re-training/re-scoring steps brain models ('auto'), always force all steps for all brain imports ('on'), or never rescore ('off'). 'auto' only re-scores if a difference in current and prior experiment warrants re-scoring, like column changes, metric changes, etc. 'on' is useful when smart similarity checking is not reliable enough. 'off' is uesful when know want to keep exact same features and model for final model refit, despite changes in seed or other behaviors in features that might change the outcome if re-scored before reaching final model. If set off, then no limits are applied to features during brain ingestion, while can set brain_add_features_for_new_columns to false if want to ignore any new columns in data. In addition, any unscored individuals loaded from parent experiment are not rescored when doing refit or retrain. Can also set refit_same_best_individual True if want exact same best individual (highest scored model+features) to be used regardless of any scoring changes. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "allow_change_layer_count_brain", + "output": "allow change layer count brain refers to For feature brain or restart/refit, whether to allow brain ingest to use different feature engineering layer count.: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "allow_change_layer_count_brain", + "output": "allow change layer count brain refers to For feature brain or restart/refit, whether to allow brain ingest to use different feature engineering layer count.: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "allow change layer count brain", + "output": "allow change layer count brain refers to For feature brain or restart/refit, whether to allow brain ingest to use different feature engineering layer count.: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "For feature brain or restart/refit, whether to allow brain ingest to use different feature engineering layer count.: ", + "output": "allow change layer count brain refers to For feature brain or restart/refit, whether to allow brain ingest to use different feature engineering layer count.: " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting allow_change_layer_count_brain", + "output": "allow change layer count brain refers to For feature brain or restart/refit, whether to allow brain ingest to use different feature engineering layer count.: " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting allow_change_layer_count_brain", + "output": "allow change layer count brain refers to For feature brain or restart/refit, whether to allow brain ingest to use different feature engineering layer count.: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "brain_maximum_diff_score", + "output": "brain maximum diff score refers to Relative number of columns that must match between current reference individual and brain individual. 0.0: perfect match 1.0: All columns are different, worst match e.g. 0.1 implies no more than 10% of columns mismatch between reference set of columns and brain individual. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "brain_maximum_diff_score", + "output": "brain maximum diff score refers to Relative number of columns that must match between current reference individual and brain individual. 0.0: perfect match 1.0: All columns are different, worst match e.g. 0.1 implies no more than 10% of columns mismatch between reference set of columns and brain individual. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "brain maximum diff score", + "output": "brain maximum diff score refers to Relative number of columns that must match between current reference individual and brain individual. 0.0: perfect match 1.0: All columns are different, worst match e.g. 0.1 implies no more than 10% of columns mismatch between reference set of columns and brain individual. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "brain maximum diff score refers to Relative number of columns that must match between current reference individual and brain individual. 0.0: perfect match 1.0: All columns are different, worst match e.g. 0.1 implies no more than 10% of columns mismatch between reference set of columns and brain individual. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting brain_maximum_diff_score", + "output": "brain maximum diff score refers to Relative number of columns that must match between current reference individual and brain individual. 0.0: perfect match 1.0: All columns are different, worst match e.g. 0.1 implies no more than 10% of columns mismatch between reference set of columns and brain individual. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting brain_maximum_diff_score", + "output": "brain maximum diff score refers to Relative number of columns that must match between current reference individual and brain individual. 0.0: perfect match 1.0: All columns are different, worst match e.g. 0.1 implies no more than 10% of columns mismatch between reference set of columns and brain individual. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max_num_brain_indivs", + "output": "max num brain indivs refers to Maximum number of brain individuals pulled from H2O.ai brain cache for feature_brain_level=1, 2" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max_num_brain_indivs", + "output": "max num brain indivs refers to Maximum number of brain individuals pulled from H2O.ai brain cache for feature_brain_level=1, 2" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max num brain indivs", + "output": "max num brain indivs refers to Maximum number of brain individuals pulled from H2O.ai brain cache for feature_brain_level=1, 2" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "max num brain indivs refers to Maximum number of brain individuals pulled from H2O.ai brain cache for feature_brain_level=1, 2" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting max_num_brain_indivs", + "output": "max num brain indivs refers to Maximum number of brain individuals pulled from H2O.ai brain cache for feature_brain_level=1, 2" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting max_num_brain_indivs", + "output": "max num brain indivs refers to Maximum number of brain individuals pulled from H2O.ai brain cache for feature_brain_level=1, 2" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "feature_brain_save_every_iteration", + "output": "feature brain save every iteration refers to Save feature brain iterations every iter_num % feature_brain_iterations_save_every_iteration == 0, to be able to restart/refit with which_iteration_brain >= 00 means disable " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "feature_brain_save_every_iteration", + "output": "feature brain save every iteration refers to Feature Brain Save every which iteration (0 = disable): Save feature brain iterations every iter_num % feature_brain_iterations_save_every_iteration == 0, to be able to restart/refit with which_iteration_brain >= 00 means disable " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "feature brain save every iteration", + "output": "feature brain save every iteration refers to Feature Brain Save every which iteration (0 = disable): Save feature brain iterations every iter_num % feature_brain_iterations_save_every_iteration == 0, to be able to restart/refit with which_iteration_brain >= 00 means disable " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Feature Brain Save every which iteration (0 = disable): ", + "output": "feature brain save every iteration refers to Feature Brain Save every which iteration (0 = disable): Save feature brain iterations every iter_num % feature_brain_iterations_save_every_iteration == 0, to be able to restart/refit with which_iteration_brain >= 00 means disable " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting feature_brain_save_every_iteration", + "output": "feature brain save every iteration refers to Save feature brain iterations every iter_num % feature_brain_iterations_save_every_iteration == 0, to be able to restart/refit with which_iteration_brain >= 00 means disable " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting feature_brain_save_every_iteration", + "output": "feature brain save every iteration refers to Feature Brain Save every which iteration (0 = disable): Save feature brain iterations every iter_num % feature_brain_iterations_save_every_iteration == 0, to be able to restart/refit with which_iteration_brain >= 00 means disable " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "which_iteration_brain", + "output": "which iteration brain refers to When doing restart or re-fit type feature_brain_level with resumed_experiment_id, choose which iteration to start from, instead of only last best-1 means just use last bestUsage:1) Run one experiment with feature_brain_iterations_save_every_iteration=1 or some other number2) Identify which iteration brain dump one wants to restart/refit from3) Restart/Refit from original experiment, setting which_iteration_brain to that number in expert settingsNote: If restart from a tuning iteration, this will pull in entire scored tuning population and use that for feature evolution " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "which_iteration_brain", + "output": "which iteration brain refers to Feature Brain Restart from which iteration (-1 = auto): When doing restart or re-fit type feature_brain_level with resumed_experiment_id, choose which iteration to start from, instead of only last best-1 means just use last bestUsage:1) Run one experiment with feature_brain_iterations_save_every_iteration=1 or some other number2) Identify which iteration brain dump one wants to restart/refit from3) Restart/Refit from original experiment, setting which_iteration_brain to that number in expert settingsNote: If restart from a tuning iteration, this will pull in entire scored tuning population and use that for feature evolution " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "which iteration brain", + "output": "which iteration brain refers to Feature Brain Restart from which iteration (-1 = auto): When doing restart or re-fit type feature_brain_level with resumed_experiment_id, choose which iteration to start from, instead of only last best-1 means just use last bestUsage:1) Run one experiment with feature_brain_iterations_save_every_iteration=1 or some other number2) Identify which iteration brain dump one wants to restart/refit from3) Restart/Refit from original experiment, setting which_iteration_brain to that number in expert settingsNote: If restart from a tuning iteration, this will pull in entire scored tuning population and use that for feature evolution " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Feature Brain Restart from which iteration (-1 = auto): ", + "output": "which iteration brain refers to Feature Brain Restart from which iteration (-1 = auto): When doing restart or re-fit type feature_brain_level with resumed_experiment_id, choose which iteration to start from, instead of only last best-1 means just use last bestUsage:1) Run one experiment with feature_brain_iterations_save_every_iteration=1 or some other number2) Identify which iteration brain dump one wants to restart/refit from3) Restart/Refit from original experiment, setting which_iteration_brain to that number in expert settingsNote: If restart from a tuning iteration, this will pull in entire scored tuning population and use that for feature evolution " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting which_iteration_brain", + "output": "which iteration brain refers to When doing restart or re-fit type feature_brain_level with resumed_experiment_id, choose which iteration to start from, instead of only last best-1 means just use last bestUsage:1) Run one experiment with feature_brain_iterations_save_every_iteration=1 or some other number2) Identify which iteration brain dump one wants to restart/refit from3) Restart/Refit from original experiment, setting which_iteration_brain to that number in expert settingsNote: If restart from a tuning iteration, this will pull in entire scored tuning population and use that for feature evolution " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting which_iteration_brain", + "output": "which iteration brain refers to Feature Brain Restart from which iteration (-1 = auto): When doing restart or re-fit type feature_brain_level with resumed_experiment_id, choose which iteration to start from, instead of only last best-1 means just use last bestUsage:1) Run one experiment with feature_brain_iterations_save_every_iteration=1 or some other number2) Identify which iteration brain dump one wants to restart/refit from3) Restart/Refit from original experiment, setting which_iteration_brain to that number in expert settingsNote: If restart from a tuning iteration, this will pull in entire scored tuning population and use that for feature evolution " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "refit_same_best_individual", + "output": "refit same best individual refers to When doing re-fit from feature brain, if change columns or features, population of individuals used to refit from may change order of which was best,leading to better result chosen (False case). But sometimes want to see exact same model/features with only one feature added,and then would need to set this to True case.E.g. if refit with just 1 extra column and have interpretability=1, then final model will be same features,with one more engineered feature applied to that new original feature. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "refit_same_best_individual", + "output": "refit same best individual refers to Feature Brain refit uses same best individual: When doing re-fit from feature brain, if change columns or features, population of individuals used to refit from may change order of which was best,leading to better result chosen (False case). But sometimes want to see exact same model/features with only one feature added,and then would need to set this to True case.E.g. if refit with just 1 extra column and have interpretability=1, then final model will be same features,with one more engineered feature applied to that new original feature. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "refit same best individual", + "output": "refit same best individual refers to Feature Brain refit uses same best individual: When doing re-fit from feature brain, if change columns or features, population of individuals used to refit from may change order of which was best,leading to better result chosen (False case). But sometimes want to see exact same model/features with only one feature added,and then would need to set this to True case.E.g. if refit with just 1 extra column and have interpretability=1, then final model will be same features,with one more engineered feature applied to that new original feature. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Feature Brain refit uses same best individual: ", + "output": "refit same best individual refers to Feature Brain refit uses same best individual: When doing re-fit from feature brain, if change columns or features, population of individuals used to refit from may change order of which was best,leading to better result chosen (False case). But sometimes want to see exact same model/features with only one feature added,and then would need to set this to True case.E.g. if refit with just 1 extra column and have interpretability=1, then final model will be same features,with one more engineered feature applied to that new original feature. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting refit_same_best_individual", + "output": "refit same best individual refers to When doing re-fit from feature brain, if change columns or features, population of individuals used to refit from may change order of which was best,leading to better result chosen (False case). But sometimes want to see exact same model/features with only one feature added,and then would need to set this to True case.E.g. if refit with just 1 extra column and have interpretability=1, then final model will be same features,with one more engineered feature applied to that new original feature. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting refit_same_best_individual", + "output": "refit same best individual refers to Feature Brain refit uses same best individual: When doing re-fit from feature brain, if change columns or features, population of individuals used to refit from may change order of which was best,leading to better result chosen (False case). But sometimes want to see exact same model/features with only one feature added,and then would need to set this to True case.E.g. if refit with just 1 extra column and have interpretability=1, then final model will be same features,with one more engineered feature applied to that new original feature. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "restart_refit_redo_origfs_shift_leak", + "output": "restart refit redo origfs shift leak refers to When doing restart or re-fit of experiment from feature brain,sometimes user might change data significantly and then warrantredoing reduction of original features by feature selection, shift detection, and leakage detection.However, in other cases, if data and all options are nearly (or exactly) identical, then thesesteps might change the features slightly (e.g. due to random seed if not setting reproducible mode),leading to changes in features and model that is refitted. By default, restart and refit avoidthese steps assuming data and experiment setup have no changed significantly.If check_distribution_shift is forced to on (instead of auto), then this option is ignored.In order to ensure exact same final pipeline is fitted, one should also set:1) brain_add_features_for_new_columns false2) refit_same_best_individual true3) feature_brain_reset_score 'off'4) force_model_restart_to_defaults falseThe score will still be reset if the experiment metric chosen changes,but changes to the scored model and features will be more frozen in place. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "restart_refit_redo_origfs_shift_leak", + "output": "restart refit redo origfs shift leak refers to For restart-refit, select which steps to do: When doing restart or re-fit of experiment from feature brain,sometimes user might change data significantly and then warrantredoing reduction of original features by feature selection, shift detection, and leakage detection.However, in other cases, if data and all options are nearly (or exactly) identical, then thesesteps might change the features slightly (e.g. due to random seed if not setting reproducible mode),leading to changes in features and model that is refitted. By default, restart and refit avoidthese steps assuming data and experiment setup have no changed significantly.If check_distribution_shift is forced to on (instead of auto), then this option is ignored.In order to ensure exact same final pipeline is fitted, one should also set:1) brain_add_features_for_new_columns false2) refit_same_best_individual true3) feature_brain_reset_score 'off'4) force_model_restart_to_defaults falseThe score will still be reset if the experiment metric chosen changes,but changes to the scored model and features will be more frozen in place. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "restart refit redo origfs shift leak", + "output": "restart refit redo origfs shift leak refers to For restart-refit, select which steps to do: When doing restart or re-fit of experiment from feature brain,sometimes user might change data significantly and then warrantredoing reduction of original features by feature selection, shift detection, and leakage detection.However, in other cases, if data and all options are nearly (or exactly) identical, then thesesteps might change the features slightly (e.g. due to random seed if not setting reproducible mode),leading to changes in features and model that is refitted. By default, restart and refit avoidthese steps assuming data and experiment setup have no changed significantly.If check_distribution_shift is forced to on (instead of auto), then this option is ignored.In order to ensure exact same final pipeline is fitted, one should also set:1) brain_add_features_for_new_columns false2) refit_same_best_individual true3) feature_brain_reset_score 'off'4) force_model_restart_to_defaults falseThe score will still be reset if the experiment metric chosen changes,but changes to the scored model and features will be more frozen in place. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "For restart-refit, select which steps to do: ", + "output": "restart refit redo origfs shift leak refers to For restart-refit, select which steps to do: When doing restart or re-fit of experiment from feature brain,sometimes user might change data significantly and then warrantredoing reduction of original features by feature selection, shift detection, and leakage detection.However, in other cases, if data and all options are nearly (or exactly) identical, then thesesteps might change the features slightly (e.g. due to random seed if not setting reproducible mode),leading to changes in features and model that is refitted. By default, restart and refit avoidthese steps assuming data and experiment setup have no changed significantly.If check_distribution_shift is forced to on (instead of auto), then this option is ignored.In order to ensure exact same final pipeline is fitted, one should also set:1) brain_add_features_for_new_columns false2) refit_same_best_individual true3) feature_brain_reset_score 'off'4) force_model_restart_to_defaults falseThe score will still be reset if the experiment metric chosen changes,but changes to the scored model and features will be more frozen in place. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting restart_refit_redo_origfs_shift_leak", + "output": "restart refit redo origfs shift leak refers to When doing restart or re-fit of experiment from feature brain,sometimes user might change data significantly and then warrantredoing reduction of original features by feature selection, shift detection, and leakage detection.However, in other cases, if data and all options are nearly (or exactly) identical, then thesesteps might change the features slightly (e.g. due to random seed if not setting reproducible mode),leading to changes in features and model that is refitted. By default, restart and refit avoidthese steps assuming data and experiment setup have no changed significantly.If check_distribution_shift is forced to on (instead of auto), then this option is ignored.In order to ensure exact same final pipeline is fitted, one should also set:1) brain_add_features_for_new_columns false2) refit_same_best_individual true3) feature_brain_reset_score 'off'4) force_model_restart_to_defaults falseThe score will still be reset if the experiment metric chosen changes,but changes to the scored model and features will be more frozen in place. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting restart_refit_redo_origfs_shift_leak", + "output": "restart refit redo origfs shift leak refers to For restart-refit, select which steps to do: When doing restart or re-fit of experiment from feature brain,sometimes user might change data significantly and then warrantredoing reduction of original features by feature selection, shift detection, and leakage detection.However, in other cases, if data and all options are nearly (or exactly) identical, then thesesteps might change the features slightly (e.g. due to random seed if not setting reproducible mode),leading to changes in features and model that is refitted. By default, restart and refit avoidthese steps assuming data and experiment setup have no changed significantly.If check_distribution_shift is forced to on (instead of auto), then this option is ignored.In order to ensure exact same final pipeline is fitted, one should also set:1) brain_add_features_for_new_columns false2) refit_same_best_individual true3) feature_brain_reset_score 'off'4) force_model_restart_to_defaults falseThe score will still be reset if the experiment metric chosen changes,but changes to the scored model and features will be more frozen in place. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "brain_rel_dir", + "output": "brain rel dir refers to Directory, relative to data_directory, to store H2O.ai brain meta model files" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "brain_rel_dir", + "output": "brain rel dir refers to Directory, relative to data_directory, to store H2O.ai brain meta model files" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "brain rel dir", + "output": "brain rel dir refers to Directory, relative to data_directory, to store H2O.ai brain meta model files" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "brain rel dir refers to Directory, relative to data_directory, to store H2O.ai brain meta model files" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting brain_rel_dir", + "output": "brain rel dir refers to Directory, relative to data_directory, to store H2O.ai brain meta model files" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting brain_rel_dir", + "output": "brain rel dir refers to Directory, relative to data_directory, to store H2O.ai brain meta model files" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "brain_max_size_GB", + "output": "brain max size GB refers to Maximum size in bytes the brain will store We reserve this memory to save data in order to ensure we can retrieve an experiment if for any reason it gets interrupted. -1: unlimited >=0 number of GB to limit brain to" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "brain_max_size_GB", + "output": "brain max size GB refers to Maximum size in bytes the brain will store We reserve this memory to save data in order to ensure we can retrieve an experiment if for any reason it gets interrupted. -1: unlimited >=0 number of GB to limit brain to" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "brain max size GB", + "output": "brain max size GB refers to Maximum size in bytes the brain will store We reserve this memory to save data in order to ensure we can retrieve an experiment if for any reason it gets interrupted. -1: unlimited >=0 number of GB to limit brain to" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "brain max size GB refers to Maximum size in bytes the brain will store We reserve this memory to save data in order to ensure we can retrieve an experiment if for any reason it gets interrupted. -1: unlimited >=0 number of GB to limit brain to" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting brain_max_size_GB", + "output": "brain max size GB refers to Maximum size in bytes the brain will store We reserve this memory to save data in order to ensure we can retrieve an experiment if for any reason it gets interrupted. -1: unlimited >=0 number of GB to limit brain to" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting brain_max_size_GB", + "output": "brain max size GB refers to Maximum size in bytes the brain will store We reserve this memory to save data in order to ensure we can retrieve an experiment if for any reason it gets interrupted. -1: unlimited >=0 number of GB to limit brain to" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "brain_add_features_for_new_columns", + "output": "brain add features for new columns refers to Whether to take any new columns and add additional features to pipeline, even if doing retrain final model.In some cases, one might have a new dataset but only want to keep same pipeline regardless of new columns,in which case one sets this to False. For example, new data might lead to new dropped features,due to shift or leak detection. To avoid change of feature set, one can disable all dropping of columns,but set this to False to avoid adding any columns as new features,so pipeline is perfectly preserved when changing data. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "brain_add_features_for_new_columns", + "output": "brain add features for new columns refers to Feature Brain adds features with new columns even during retraining final model: Whether to take any new columns and add additional features to pipeline, even if doing retrain final model.In some cases, one might have a new dataset but only want to keep same pipeline regardless of new columns,in which case one sets this to False. For example, new data might lead to new dropped features,due to shift or leak detection. To avoid change of feature set, one can disable all dropping of columns,but set this to False to avoid adding any columns as new features,so pipeline is perfectly preserved when changing data. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "brain add features for new columns", + "output": "brain add features for new columns refers to Feature Brain adds features with new columns even during retraining final model: Whether to take any new columns and add additional features to pipeline, even if doing retrain final model.In some cases, one might have a new dataset but only want to keep same pipeline regardless of new columns,in which case one sets this to False. For example, new data might lead to new dropped features,due to shift or leak detection. To avoid change of feature set, one can disable all dropping of columns,but set this to False to avoid adding any columns as new features,so pipeline is perfectly preserved when changing data. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Feature Brain adds features with new columns even during retraining final model: ", + "output": "brain add features for new columns refers to Feature Brain adds features with new columns even during retraining final model: Whether to take any new columns and add additional features to pipeline, even if doing retrain final model.In some cases, one might have a new dataset but only want to keep same pipeline regardless of new columns,in which case one sets this to False. For example, new data might lead to new dropped features,due to shift or leak detection. To avoid change of feature set, one can disable all dropping of columns,but set this to False to avoid adding any columns as new features,so pipeline is perfectly preserved when changing data. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting brain_add_features_for_new_columns", + "output": "brain add features for new columns refers to Whether to take any new columns and add additional features to pipeline, even if doing retrain final model.In some cases, one might have a new dataset but only want to keep same pipeline regardless of new columns,in which case one sets this to False. For example, new data might lead to new dropped features,due to shift or leak detection. To avoid change of feature set, one can disable all dropping of columns,but set this to False to avoid adding any columns as new features,so pipeline is perfectly preserved when changing data. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting brain_add_features_for_new_columns", + "output": "brain add features for new columns refers to Feature Brain adds features with new columns even during retraining final model: Whether to take any new columns and add additional features to pipeline, even if doing retrain final model.In some cases, one might have a new dataset but only want to keep same pipeline regardless of new columns,in which case one sets this to False. For example, new data might lead to new dropped features,due to shift or leak detection. To avoid change of feature set, one can disable all dropping of columns,but set this to False to avoid adding any columns as new features,so pipeline is perfectly preserved when changing data. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "force_model_restart_to_defaults", + "output": "force model restart to defaults refers to If restart/refit and no longer have the original model class available, be conservativeand go back to defaults for that model class. If False, then try to keep original hyperparameters,which can fail to work in general. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "force_model_restart_to_defaults", + "output": "force model restart to defaults refers to Restart-refit use default model settings if model switches: If restart/refit and no longer have the original model class available, be conservativeand go back to defaults for that model class. If False, then try to keep original hyperparameters,which can fail to work in general. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "force model restart to defaults", + "output": "force model restart to defaults refers to Restart-refit use default model settings if model switches: If restart/refit and no longer have the original model class available, be conservativeand go back to defaults for that model class. If False, then try to keep original hyperparameters,which can fail to work in general. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Restart-refit use default model settings if model switches: ", + "output": "force model restart to defaults refers to Restart-refit use default model settings if model switches: If restart/refit and no longer have the original model class available, be conservativeand go back to defaults for that model class. If False, then try to keep original hyperparameters,which can fail to work in general. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting force_model_restart_to_defaults", + "output": "force model restart to defaults refers to If restart/refit and no longer have the original model class available, be conservativeand go back to defaults for that model class. If False, then try to keep original hyperparameters,which can fail to work in general. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting force_model_restart_to_defaults", + "output": "force model restart to defaults refers to Restart-refit use default model settings if model switches: If restart/refit and no longer have the original model class available, be conservativeand go back to defaults for that model class. If False, then try to keep original hyperparameters,which can fail to work in general. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "early_stopping", + "output": "early stopping refers to Whether to enable early stopping Early stopping refers to stopping the feature evolution/engineering process when there is no performance uplift after a certain number of iterations. After early stopping has been triggered, Driverless AI will initiate the ensemble process if selected." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "early_stopping", + "output": "early stopping refers to Whether to enable early stopping Early stopping refers to stopping the feature evolution/engineering process when there is no performance uplift after a certain number of iterations. After early stopping has been triggered, Driverless AI will initiate the ensemble process if selected." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "early stopping", + "output": "early stopping refers to Whether to enable early stopping Early stopping refers to stopping the feature evolution/engineering process when there is no performance uplift after a certain number of iterations. After early stopping has been triggered, Driverless AI will initiate the ensemble process if selected." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "early stopping refers to Whether to enable early stopping Early stopping refers to stopping the feature evolution/engineering process when there is no performance uplift after a certain number of iterations. After early stopping has been triggered, Driverless AI will initiate the ensemble process if selected." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting early_stopping", + "output": "early stopping refers to Whether to enable early stopping Early stopping refers to stopping the feature evolution/engineering process when there is no performance uplift after a certain number of iterations. After early stopping has been triggered, Driverless AI will initiate the ensemble process if selected." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting early_stopping", + "output": "early stopping refers to Whether to enable early stopping Early stopping refers to stopping the feature evolution/engineering process when there is no performance uplift after a certain number of iterations. After early stopping has been triggered, Driverless AI will initiate the ensemble process if selected." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "early_stopping_per_individual", + "output": "early stopping per individual refers to Whether to enable early stopping per individual Each individual in the generic algorithm will stop early if no improvement, and it will no longer be mutated. Instead, the best individual will be additionally mutated." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "early_stopping_per_individual", + "output": "early stopping per individual refers to Whether to enable early stopping per individual Each individual in the generic algorithm will stop early if no improvement, and it will no longer be mutated. Instead, the best individual will be additionally mutated." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "early stopping per individual", + "output": "early stopping per individual refers to Whether to enable early stopping per individual Each individual in the generic algorithm will stop early if no improvement, and it will no longer be mutated. Instead, the best individual will be additionally mutated." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "early stopping per individual refers to Whether to enable early stopping per individual Each individual in the generic algorithm will stop early if no improvement, and it will no longer be mutated. Instead, the best individual will be additionally mutated." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting early_stopping_per_individual", + "output": "early stopping per individual refers to Whether to enable early stopping per individual Each individual in the generic algorithm will stop early if no improvement, and it will no longer be mutated. Instead, the best individual will be additionally mutated." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting early_stopping_per_individual", + "output": "early stopping per individual refers to Whether to enable early stopping per individual Each individual in the generic algorithm will stop early if no improvement, and it will no longer be mutated. Instead, the best individual will be additionally mutated." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "min_dai_iterations", + "output": "min dai iterations refers to Minimum number of Driverless AI iterations to stop the feature evolution/engineeringprocess even if score is not improving. Driverless AI needs to run for at least that manyiterations before deciding to stop. It can be seen a safeguard against suboptimal (early)convergence. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "min_dai_iterations", + "output": "min dai iterations refers to Min. DAI iterations: Minimum number of Driverless AI iterations to stop the feature evolution/engineeringprocess even if score is not improving. Driverless AI needs to run for at least that manyiterations before deciding to stop. It can be seen a safeguard against suboptimal (early)convergence. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "min dai iterations", + "output": "min dai iterations refers to Min. DAI iterations: Minimum number of Driverless AI iterations to stop the feature evolution/engineeringprocess even if score is not improving. Driverless AI needs to run for at least that manyiterations before deciding to stop. It can be seen a safeguard against suboptimal (early)convergence. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Min. DAI iterations: ", + "output": "min dai iterations refers to Min. DAI iterations: Minimum number of Driverless AI iterations to stop the feature evolution/engineeringprocess even if score is not improving. Driverless AI needs to run for at least that manyiterations before deciding to stop. It can be seen a safeguard against suboptimal (early)convergence. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting min_dai_iterations", + "output": "min dai iterations refers to Minimum number of Driverless AI iterations to stop the feature evolution/engineeringprocess even if score is not improving. Driverless AI needs to run for at least that manyiterations before deciding to stop. It can be seen a safeguard against suboptimal (early)convergence. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting min_dai_iterations", + "output": "min dai iterations refers to Min. DAI iterations: Minimum number of Driverless AI iterations to stop the feature evolution/engineeringprocess even if score is not improving. Driverless AI needs to run for at least that manyiterations before deciding to stop. It can be seen a safeguard against suboptimal (early)convergence. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "nfeatures_max", + "output": "nfeatures max refers to Maximum features per model (and each model within the final model if ensemble) kept.Keeps top variable importance features, prunes rest away, after each scoring.Final ensemble will exclude any pruned-away features and only train on kept features,but may contain a few new features due to fitting on different data view (e.g. new clusters)Final scoring pipeline will exclude any pruned-away features,but may contain a few new features due to fitting on different data view (e.g. new clusters)-1 means no restrictions except internally-determined memory and interpretability restrictions.Notes:* If interpretability > remove_scored_0gain_genes_in_postprocessing_above_interpretability, thenevery GA iteration post-processes features down to this value just after scoring them. Otherwise,only mutations of scored individuals will be pruned (until the final model where limits are strictly applied).* If ngenes_max is not also limited, then some individuals will have more genes and features untilpruned by mutation or by preparation for final model.* E.g. to generally limit every iteration to exactly 1 features, one must set nfeatures_max=ngenes_max=1and remove_scored_0gain_genes_in_postprocessing_above_interpretability=0, but the genetic algorithmwill have a harder time finding good features. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "nfeatures_max", + "output": "nfeatures max refers to Max. number of engineered features (-1 = auto): Maximum features per model (and each model within the final model if ensemble) kept.Keeps top variable importance features, prunes rest away, after each scoring.Final ensemble will exclude any pruned-away features and only train on kept features,but may contain a few new features due to fitting on different data view (e.g. new clusters)Final scoring pipeline will exclude any pruned-away features,but may contain a few new features due to fitting on different data view (e.g. new clusters)-1 means no restrictions except internally-determined memory and interpretability restrictions.Notes:* If interpretability > remove_scored_0gain_genes_in_postprocessing_above_interpretability, thenevery GA iteration post-processes features down to this value just after scoring them. Otherwise,only mutations of scored individuals will be pruned (until the final model where limits are strictly applied).* If ngenes_max is not also limited, then some individuals will have more genes and features untilpruned by mutation or by preparation for final model.* E.g. to generally limit every iteration to exactly 1 features, one must set nfeatures_max=ngenes_max=1and remove_scored_0gain_genes_in_postprocessing_above_interpretability=0, but the genetic algorithmwill have a harder time finding good features. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "nfeatures max", + "output": "nfeatures max refers to Max. number of engineered features (-1 = auto): Maximum features per model (and each model within the final model if ensemble) kept.Keeps top variable importance features, prunes rest away, after each scoring.Final ensemble will exclude any pruned-away features and only train on kept features,but may contain a few new features due to fitting on different data view (e.g. new clusters)Final scoring pipeline will exclude any pruned-away features,but may contain a few new features due to fitting on different data view (e.g. new clusters)-1 means no restrictions except internally-determined memory and interpretability restrictions.Notes:* If interpretability > remove_scored_0gain_genes_in_postprocessing_above_interpretability, thenevery GA iteration post-processes features down to this value just after scoring them. Otherwise,only mutations of scored individuals will be pruned (until the final model where limits are strictly applied).* If ngenes_max is not also limited, then some individuals will have more genes and features untilpruned by mutation or by preparation for final model.* E.g. to generally limit every iteration to exactly 1 features, one must set nfeatures_max=ngenes_max=1and remove_scored_0gain_genes_in_postprocessing_above_interpretability=0, but the genetic algorithmwill have a harder time finding good features. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Max. number of engineered features (-1 = auto): ", + "output": "nfeatures max refers to Max. number of engineered features (-1 = auto): Maximum features per model (and each model within the final model if ensemble) kept.Keeps top variable importance features, prunes rest away, after each scoring.Final ensemble will exclude any pruned-away features and only train on kept features,but may contain a few new features due to fitting on different data view (e.g. new clusters)Final scoring pipeline will exclude any pruned-away features,but may contain a few new features due to fitting on different data view (e.g. new clusters)-1 means no restrictions except internally-determined memory and interpretability restrictions.Notes:* If interpretability > remove_scored_0gain_genes_in_postprocessing_above_interpretability, thenevery GA iteration post-processes features down to this value just after scoring them. Otherwise,only mutations of scored individuals will be pruned (until the final model where limits are strictly applied).* If ngenes_max is not also limited, then some individuals will have more genes and features untilpruned by mutation or by preparation for final model.* E.g. to generally limit every iteration to exactly 1 features, one must set nfeatures_max=ngenes_max=1and remove_scored_0gain_genes_in_postprocessing_above_interpretability=0, but the genetic algorithmwill have a harder time finding good features. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting nfeatures_max", + "output": "nfeatures max refers to Maximum features per model (and each model within the final model if ensemble) kept.Keeps top variable importance features, prunes rest away, after each scoring.Final ensemble will exclude any pruned-away features and only train on kept features,but may contain a few new features due to fitting on different data view (e.g. new clusters)Final scoring pipeline will exclude any pruned-away features,but may contain a few new features due to fitting on different data view (e.g. new clusters)-1 means no restrictions except internally-determined memory and interpretability restrictions.Notes:* If interpretability > remove_scored_0gain_genes_in_postprocessing_above_interpretability, thenevery GA iteration post-processes features down to this value just after scoring them. Otherwise,only mutations of scored individuals will be pruned (until the final model where limits are strictly applied).* If ngenes_max is not also limited, then some individuals will have more genes and features untilpruned by mutation or by preparation for final model.* E.g. to generally limit every iteration to exactly 1 features, one must set nfeatures_max=ngenes_max=1and remove_scored_0gain_genes_in_postprocessing_above_interpretability=0, but the genetic algorithmwill have a harder time finding good features. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting nfeatures_max", + "output": "nfeatures max refers to Max. number of engineered features (-1 = auto): Maximum features per model (and each model within the final model if ensemble) kept.Keeps top variable importance features, prunes rest away, after each scoring.Final ensemble will exclude any pruned-away features and only train on kept features,but may contain a few new features due to fitting on different data view (e.g. new clusters)Final scoring pipeline will exclude any pruned-away features,but may contain a few new features due to fitting on different data view (e.g. new clusters)-1 means no restrictions except internally-determined memory and interpretability restrictions.Notes:* If interpretability > remove_scored_0gain_genes_in_postprocessing_above_interpretability, thenevery GA iteration post-processes features down to this value just after scoring them. Otherwise,only mutations of scored individuals will be pruned (until the final model where limits are strictly applied).* If ngenes_max is not also limited, then some individuals will have more genes and features untilpruned by mutation or by preparation for final model.* E.g. to generally limit every iteration to exactly 1 features, one must set nfeatures_max=ngenes_max=1and remove_scored_0gain_genes_in_postprocessing_above_interpretability=0, but the genetic algorithmwill have a harder time finding good features. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "ngenes_max", + "output": "ngenes max refers to Maximum genes (transformer instances) per model (and each model within the final model if ensemble) kept.Controls number of genes before features are scored, so just randomly samples genes if pruning occurs.If restriction occurs after scoring features, then aggregated gene importances are used for pruning genes.Instances includes all possible transformers, including original transformer for numeric features.-1 means no restrictions except internally-determined memory and interpretability restrictions " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "ngenes_max", + "output": "ngenes max refers to Max. number of genes (transformer instances) (-1 = auto): Maximum genes (transformer instances) per model (and each model within the final model if ensemble) kept.Controls number of genes before features are scored, so just randomly samples genes if pruning occurs.If restriction occurs after scoring features, then aggregated gene importances are used for pruning genes.Instances includes all possible transformers, including original transformer for numeric features.-1 means no restrictions except internally-determined memory and interpretability restrictions " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "ngenes max", + "output": "ngenes max refers to Max. number of genes (transformer instances) (-1 = auto): Maximum genes (transformer instances) per model (and each model within the final model if ensemble) kept.Controls number of genes before features are scored, so just randomly samples genes if pruning occurs.If restriction occurs after scoring features, then aggregated gene importances are used for pruning genes.Instances includes all possible transformers, including original transformer for numeric features.-1 means no restrictions except internally-determined memory and interpretability restrictions " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Max. number of genes (transformer instances) (-1 = auto): ", + "output": "ngenes max refers to Max. number of genes (transformer instances) (-1 = auto): Maximum genes (transformer instances) per model (and each model within the final model if ensemble) kept.Controls number of genes before features are scored, so just randomly samples genes if pruning occurs.If restriction occurs after scoring features, then aggregated gene importances are used for pruning genes.Instances includes all possible transformers, including original transformer for numeric features.-1 means no restrictions except internally-determined memory and interpretability restrictions " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting ngenes_max", + "output": "ngenes max refers to Maximum genes (transformer instances) per model (and each model within the final model if ensemble) kept.Controls number of genes before features are scored, so just randomly samples genes if pruning occurs.If restriction occurs after scoring features, then aggregated gene importances are used for pruning genes.Instances includes all possible transformers, including original transformer for numeric features.-1 means no restrictions except internally-determined memory and interpretability restrictions " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting ngenes_max", + "output": "ngenes max refers to Max. number of genes (transformer instances) (-1 = auto): Maximum genes (transformer instances) per model (and each model within the final model if ensemble) kept.Controls number of genes before features are scored, so just randomly samples genes if pruning occurs.If restriction occurs after scoring features, then aggregated gene importances are used for pruning genes.Instances includes all possible transformers, including original transformer for numeric features.-1 means no restrictions except internally-determined memory and interpretability restrictions " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "ngenes_min", + "output": "ngenes min refers to Like ngenes_max but controls minimum number of genes.Useful when DAI by default is making too few genes but want many more.This can be useful when one has few input features, so DAI may remain conservative and not make many transformed features. But user knows that some transformed features may be useful.E.g. only target encoding transformer might have been chosen, and one wants DAI to explore many more possible input features at once." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "ngenes_min", + "output": "ngenes min refers to Min. number of genes (transformer instances) (-1 = auto): Like ngenes_max but controls minimum number of genes.Useful when DAI by default is making too few genes but want many more.This can be useful when one has few input features, so DAI may remain conservative and not make many transformed features. But user knows that some transformed features may be useful.E.g. only target encoding transformer might have been chosen, and one wants DAI to explore many more possible input features at once." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "ngenes min", + "output": "ngenes min refers to Min. number of genes (transformer instances) (-1 = auto): Like ngenes_max but controls minimum number of genes.Useful when DAI by default is making too few genes but want many more.This can be useful when one has few input features, so DAI may remain conservative and not make many transformed features. But user knows that some transformed features may be useful.E.g. only target encoding transformer might have been chosen, and one wants DAI to explore many more possible input features at once." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Min. number of genes (transformer instances) (-1 = auto): ", + "output": "ngenes min refers to Min. number of genes (transformer instances) (-1 = auto): Like ngenes_max but controls minimum number of genes.Useful when DAI by default is making too few genes but want many more.This can be useful when one has few input features, so DAI may remain conservative and not make many transformed features. But user knows that some transformed features may be useful.E.g. only target encoding transformer might have been chosen, and one wants DAI to explore many more possible input features at once." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting ngenes_min", + "output": "ngenes min refers to Like ngenes_max but controls minimum number of genes.Useful when DAI by default is making too few genes but want many more.This can be useful when one has few input features, so DAI may remain conservative and not make many transformed features. But user knows that some transformed features may be useful.E.g. only target encoding transformer might have been chosen, and one wants DAI to explore many more possible input features at once." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting ngenes_min", + "output": "ngenes min refers to Min. number of genes (transformer instances) (-1 = auto): Like ngenes_max but controls minimum number of genes.Useful when DAI by default is making too few genes but want many more.This can be useful when one has few input features, so DAI may remain conservative and not make many transformed features. But user knows that some transformed features may be useful.E.g. only target encoding transformer might have been chosen, and one wants DAI to explore many more possible input features at once." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "nfeatures_min", + "output": "nfeatures min refers to Minimum genes (transformer instances) per model (and each model within the final model if ensemble) kept.Instances includes all possible transformers, including original transformer for numeric features.-1 means no restrictions except internally-determined memory and interpretability restrictions " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "nfeatures_min", + "output": "nfeatures min refers to Min. number of genes (transformer instances) (-1 = auto): Minimum genes (transformer instances) per model (and each model within the final model if ensemble) kept.Instances includes all possible transformers, including original transformer for numeric features.-1 means no restrictions except internally-determined memory and interpretability restrictions " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "nfeatures min", + "output": "nfeatures min refers to Min. number of genes (transformer instances) (-1 = auto): Minimum genes (transformer instances) per model (and each model within the final model if ensemble) kept.Instances includes all possible transformers, including original transformer for numeric features.-1 means no restrictions except internally-determined memory and interpretability restrictions " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Min. number of genes (transformer instances) (-1 = auto): ", + "output": "nfeatures min refers to Min. number of genes (transformer instances) (-1 = auto): Minimum genes (transformer instances) per model (and each model within the final model if ensemble) kept.Instances includes all possible transformers, including original transformer for numeric features.-1 means no restrictions except internally-determined memory and interpretability restrictions " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting nfeatures_min", + "output": "nfeatures min refers to Minimum genes (transformer instances) per model (and each model within the final model if ensemble) kept.Instances includes all possible transformers, including original transformer for numeric features.-1 means no restrictions except internally-determined memory and interpretability restrictions " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting nfeatures_min", + "output": "nfeatures min refers to Min. number of genes (transformer instances) (-1 = auto): Minimum genes (transformer instances) per model (and each model within the final model if ensemble) kept.Instances includes all possible transformers, including original transformer for numeric features.-1 means no restrictions except internally-determined memory and interpretability restrictions " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "limit_features_by_interpretability", + "output": "limit features by interpretability refers to Whether to limit feature counts by interpretability setting via features_allowed_by_interpretability" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "limit_features_by_interpretability", + "output": "limit features by interpretability refers to Limit features by interpretability: Whether to limit feature counts by interpretability setting via features_allowed_by_interpretability" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "limit features by interpretability", + "output": "limit features by interpretability refers to Limit features by interpretability: Whether to limit feature counts by interpretability setting via features_allowed_by_interpretability" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Limit features by interpretability: ", + "output": "limit features by interpretability refers to Limit features by interpretability: Whether to limit feature counts by interpretability setting via features_allowed_by_interpretability" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting limit_features_by_interpretability", + "output": "limit features by interpretability refers to Whether to limit feature counts by interpretability setting via features_allowed_by_interpretability" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting limit_features_by_interpretability", + "output": "limit features by interpretability refers to Limit features by interpretability: Whether to limit feature counts by interpretability setting via features_allowed_by_interpretability" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable_tensorflow_textcnn", + "output": "enable tensorflow textcnn refers to Whether to use out-of-fold predictions of Word-based CNN TensorFlow models as transformers for NLP if TensorFlow enabled" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable_tensorflow_textcnn", + "output": "enable tensorflow textcnn refers to Enable word-based CNN TensorFlow transformers for NLP: Whether to use out-of-fold predictions of Word-based CNN TensorFlow models as transformers for NLP if TensorFlow enabled" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable tensorflow textcnn", + "output": "enable tensorflow textcnn refers to Enable word-based CNN TensorFlow transformers for NLP: Whether to use out-of-fold predictions of Word-based CNN TensorFlow models as transformers for NLP if TensorFlow enabled" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Enable word-based CNN TensorFlow transformers for NLP: ", + "output": "enable tensorflow textcnn refers to Enable word-based CNN TensorFlow transformers for NLP: Whether to use out-of-fold predictions of Word-based CNN TensorFlow models as transformers for NLP if TensorFlow enabled" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting enable_tensorflow_textcnn", + "output": "enable tensorflow textcnn refers to Whether to use out-of-fold predictions of Word-based CNN TensorFlow models as transformers for NLP if TensorFlow enabled" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting enable_tensorflow_textcnn", + "output": "enable tensorflow textcnn refers to Enable word-based CNN TensorFlow transformers for NLP: Whether to use out-of-fold predictions of Word-based CNN TensorFlow models as transformers for NLP if TensorFlow enabled" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable_tensorflow_textbigru", + "output": "enable tensorflow textbigru refers to Whether to use out-of-fold predictions of Word-based Bi-GRU TensorFlow models as transformers for NLP if TensorFlow enabled" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable_tensorflow_textbigru", + "output": "enable tensorflow textbigru refers to Enable word-based BiGRU TensorFlow transformers for NLP: Whether to use out-of-fold predictions of Word-based Bi-GRU TensorFlow models as transformers for NLP if TensorFlow enabled" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable tensorflow textbigru", + "output": "enable tensorflow textbigru refers to Enable word-based BiGRU TensorFlow transformers for NLP: Whether to use out-of-fold predictions of Word-based Bi-GRU TensorFlow models as transformers for NLP if TensorFlow enabled" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Enable word-based BiGRU TensorFlow transformers for NLP: ", + "output": "enable tensorflow textbigru refers to Enable word-based BiGRU TensorFlow transformers for NLP: Whether to use out-of-fold predictions of Word-based Bi-GRU TensorFlow models as transformers for NLP if TensorFlow enabled" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting enable_tensorflow_textbigru", + "output": "enable tensorflow textbigru refers to Whether to use out-of-fold predictions of Word-based Bi-GRU TensorFlow models as transformers for NLP if TensorFlow enabled" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting enable_tensorflow_textbigru", + "output": "enable tensorflow textbigru refers to Enable word-based BiGRU TensorFlow transformers for NLP: Whether to use out-of-fold predictions of Word-based Bi-GRU TensorFlow models as transformers for NLP if TensorFlow enabled" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable_tensorflow_charcnn", + "output": "enable tensorflow charcnn refers to Whether to use out-of-fold predictions of Character-level CNN TensorFlow models as transformers for NLP if TensorFlow enabled" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable_tensorflow_charcnn", + "output": "enable tensorflow charcnn refers to Enable character-based CNN TensorFlow transformers for NLP: Whether to use out-of-fold predictions of Character-level CNN TensorFlow models as transformers for NLP if TensorFlow enabled" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable tensorflow charcnn", + "output": "enable tensorflow charcnn refers to Enable character-based CNN TensorFlow transformers for NLP: Whether to use out-of-fold predictions of Character-level CNN TensorFlow models as transformers for NLP if TensorFlow enabled" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Enable character-based CNN TensorFlow transformers for NLP: ", + "output": "enable tensorflow charcnn refers to Enable character-based CNN TensorFlow transformers for NLP: Whether to use out-of-fold predictions of Character-level CNN TensorFlow models as transformers for NLP if TensorFlow enabled" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting enable_tensorflow_charcnn", + "output": "enable tensorflow charcnn refers to Whether to use out-of-fold predictions of Character-level CNN TensorFlow models as transformers for NLP if TensorFlow enabled" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting enable_tensorflow_charcnn", + "output": "enable tensorflow charcnn refers to Enable character-based CNN TensorFlow transformers for NLP: Whether to use out-of-fold predictions of Character-level CNN TensorFlow models as transformers for NLP if TensorFlow enabled" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable_pytorch_nlp_transformer", + "output": "enable pytorch nlp transformer refers to Whether to use pretrained PyTorch models as transformers for NLP tasks. Fits a linear model on top of pretrained embeddings. Requires internet connection. Default of 'auto' means disabled. To enable, set to 'on'. GPU(s) are highly recommended.Set string_col_as_text_min_relative_cardinality=0.0 to force string column to be treated as text despite low number of uniques." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable_pytorch_nlp_transformer", + "output": "enable pytorch nlp transformer refers to Enable PyTorch transformers for NLP: Whether to use pretrained PyTorch models as transformers for NLP tasks. Fits a linear model on top of pretrained embeddings. Requires internet connection. Default of 'auto' means disabled. To enable, set to 'on'. GPU(s) are highly recommended.Set string_col_as_text_min_relative_cardinality=0.0 to force string column to be treated as text despite low number of uniques." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable pytorch nlp transformer", + "output": "enable pytorch nlp transformer refers to Enable PyTorch transformers for NLP: Whether to use pretrained PyTorch models as transformers for NLP tasks. Fits a linear model on top of pretrained embeddings. Requires internet connection. Default of 'auto' means disabled. To enable, set to 'on'. GPU(s) are highly recommended.Set string_col_as_text_min_relative_cardinality=0.0 to force string column to be treated as text despite low number of uniques." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Enable PyTorch transformers for NLP: ", + "output": "enable pytorch nlp transformer refers to Enable PyTorch transformers for NLP: Whether to use pretrained PyTorch models as transformers for NLP tasks. Fits a linear model on top of pretrained embeddings. Requires internet connection. Default of 'auto' means disabled. To enable, set to 'on'. GPU(s) are highly recommended.Set string_col_as_text_min_relative_cardinality=0.0 to force string column to be treated as text despite low number of uniques." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting enable_pytorch_nlp_transformer", + "output": "enable pytorch nlp transformer refers to Whether to use pretrained PyTorch models as transformers for NLP tasks. Fits a linear model on top of pretrained embeddings. Requires internet connection. Default of 'auto' means disabled. To enable, set to 'on'. GPU(s) are highly recommended.Set string_col_as_text_min_relative_cardinality=0.0 to force string column to be treated as text despite low number of uniques." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting enable_pytorch_nlp_transformer", + "output": "enable pytorch nlp transformer refers to Enable PyTorch transformers for NLP: Whether to use pretrained PyTorch models as transformers for NLP tasks. Fits a linear model on top of pretrained embeddings. Requires internet connection. Default of 'auto' means disabled. To enable, set to 'on'. GPU(s) are highly recommended.Set string_col_as_text_min_relative_cardinality=0.0 to force string column to be treated as text despite low number of uniques." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "pytorch_nlp_transformer_max_rows_linear_model", + "output": "pytorch nlp transformer max rows linear model refers to More rows can slow down the fitting process. Recommended values are less than 100000." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "pytorch_nlp_transformer_max_rows_linear_model", + "output": "pytorch nlp transformer max rows linear model refers to Max number of rows to use for fitting the linear model on top of the pretrained embeddings.: More rows can slow down the fitting process. Recommended values are less than 100000." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "pytorch nlp transformer max rows linear model", + "output": "pytorch nlp transformer max rows linear model refers to Max number of rows to use for fitting the linear model on top of the pretrained embeddings.: More rows can slow down the fitting process. Recommended values are less than 100000." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Max number of rows to use for fitting the linear model on top of the pretrained embeddings.: ", + "output": "pytorch nlp transformer max rows linear model refers to Max number of rows to use for fitting the linear model on top of the pretrained embeddings.: More rows can slow down the fitting process. Recommended values are less than 100000." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting pytorch_nlp_transformer_max_rows_linear_model", + "output": "pytorch nlp transformer max rows linear model refers to More rows can slow down the fitting process. Recommended values are less than 100000." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting pytorch_nlp_transformer_max_rows_linear_model", + "output": "pytorch nlp transformer max rows linear model refers to Max number of rows to use for fitting the linear model on top of the pretrained embeddings.: More rows can slow down the fitting process. Recommended values are less than 100000." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable_pytorch_nlp_model", + "output": "enable pytorch nlp model refers to Whether to use pretrained PyTorch models and fine-tune them for NLP tasks. Requires internet connection. Default of 'auto' means disabled. To enable, set to 'on'. These models are only using the first text column, and can be slow to train. GPU(s) are highly recommended.Set string_col_as_text_min_relative_cardinality=0.0 to force string column to be treated as text despite low number of uniques." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable_pytorch_nlp_model", + "output": "enable pytorch nlp model refers to Enable PyTorch models for NLP: Whether to use pretrained PyTorch models and fine-tune them for NLP tasks. Requires internet connection. Default of 'auto' means disabled. To enable, set to 'on'. These models are only using the first text column, and can be slow to train. GPU(s) are highly recommended.Set string_col_as_text_min_relative_cardinality=0.0 to force string column to be treated as text despite low number of uniques." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable pytorch nlp model", + "output": "enable pytorch nlp model refers to Enable PyTorch models for NLP: Whether to use pretrained PyTorch models and fine-tune them for NLP tasks. Requires internet connection. Default of 'auto' means disabled. To enable, set to 'on'. These models are only using the first text column, and can be slow to train. GPU(s) are highly recommended.Set string_col_as_text_min_relative_cardinality=0.0 to force string column to be treated as text despite low number of uniques." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Enable PyTorch models for NLP: ", + "output": "enable pytorch nlp model refers to Enable PyTorch models for NLP: Whether to use pretrained PyTorch models and fine-tune them for NLP tasks. Requires internet connection. Default of 'auto' means disabled. To enable, set to 'on'. These models are only using the first text column, and can be slow to train. GPU(s) are highly recommended.Set string_col_as_text_min_relative_cardinality=0.0 to force string column to be treated as text despite low number of uniques." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting enable_pytorch_nlp_model", + "output": "enable pytorch nlp model refers to Whether to use pretrained PyTorch models and fine-tune them for NLP tasks. Requires internet connection. Default of 'auto' means disabled. To enable, set to 'on'. These models are only using the first text column, and can be slow to train. GPU(s) are highly recommended.Set string_col_as_text_min_relative_cardinality=0.0 to force string column to be treated as text despite low number of uniques." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting enable_pytorch_nlp_model", + "output": "enable pytorch nlp model refers to Enable PyTorch models for NLP: Whether to use pretrained PyTorch models and fine-tune them for NLP tasks. Requires internet connection. Default of 'auto' means disabled. To enable, set to 'on'. These models are only using the first text column, and can be slow to train. GPU(s) are highly recommended.Set string_col_as_text_min_relative_cardinality=0.0 to force string column to be treated as text despite low number of uniques." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "pytorch_nlp_pretrained_models", + "output": "pytorch nlp pretrained models refers to Select which pretrained PyTorch NLP model(s) to use. Non-default ones might have no MOJO support. Requires internet connection. Only if PyTorch models or transformers for NLP are set to 'on'." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "pytorch_nlp_pretrained_models", + "output": "pytorch nlp pretrained models refers to Select which pretrained PyTorch NLP model(s) to use.: Select which pretrained PyTorch NLP model(s) to use. Non-default ones might have no MOJO support. Requires internet connection. Only if PyTorch models or transformers for NLP are set to 'on'." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "pytorch nlp pretrained models", + "output": "pytorch nlp pretrained models refers to Select which pretrained PyTorch NLP model(s) to use.: Select which pretrained PyTorch NLP model(s) to use. Non-default ones might have no MOJO support. Requires internet connection. Only if PyTorch models or transformers for NLP are set to 'on'." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Select which pretrained PyTorch NLP model(s) to use.: ", + "output": "pytorch nlp pretrained models refers to Select which pretrained PyTorch NLP model(s) to use.: Select which pretrained PyTorch NLP model(s) to use. Non-default ones might have no MOJO support. Requires internet connection. Only if PyTorch models or transformers for NLP are set to 'on'." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting pytorch_nlp_pretrained_models", + "output": "pytorch nlp pretrained models refers to Select which pretrained PyTorch NLP model(s) to use. Non-default ones might have no MOJO support. Requires internet connection. Only if PyTorch models or transformers for NLP are set to 'on'." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting pytorch_nlp_pretrained_models", + "output": "pytorch nlp pretrained models refers to Select which pretrained PyTorch NLP model(s) to use.: Select which pretrained PyTorch NLP model(s) to use. Non-default ones might have no MOJO support. Requires internet connection. Only if PyTorch models or transformers for NLP are set to 'on'." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "tensorflow_max_epochs_nlp", + "output": "tensorflow max epochs nlp refers to Max. number of epochs for TensorFlow models for making NLP features" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "tensorflow_max_epochs_nlp", + "output": "tensorflow max epochs nlp refers to Max. TensorFlow epochs for NLP: Max. number of epochs for TensorFlow models for making NLP features" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "tensorflow max epochs nlp", + "output": "tensorflow max epochs nlp refers to Max. TensorFlow epochs for NLP: Max. number of epochs for TensorFlow models for making NLP features" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Max. TensorFlow epochs for NLP: ", + "output": "tensorflow max epochs nlp refers to Max. TensorFlow epochs for NLP: Max. number of epochs for TensorFlow models for making NLP features" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting tensorflow_max_epochs_nlp", + "output": "tensorflow max epochs nlp refers to Max. number of epochs for TensorFlow models for making NLP features" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting tensorflow_max_epochs_nlp", + "output": "tensorflow max epochs nlp refers to Max. TensorFlow epochs for NLP: Max. number of epochs for TensorFlow models for making NLP features" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable_tensorflow_nlp_accuracy_switch", + "output": "enable tensorflow nlp accuracy switch refers to Accuracy setting equal and above which will add all enabled TensorFlow NLP models below at start of experiment for text dominated problemswhen TensorFlow NLP transformers are set to auto. If set to on, this parameter is ignored.Otherwise, at lower accuracy, TensorFlow NLP transformations will only be created as a mutation. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable_tensorflow_nlp_accuracy_switch", + "output": "enable tensorflow nlp accuracy switch refers to Accuracy above enable TensorFlow NLP by default for all models: Accuracy setting equal and above which will add all enabled TensorFlow NLP models below at start of experiment for text dominated problemswhen TensorFlow NLP transformers are set to auto. If set to on, this parameter is ignored.Otherwise, at lower accuracy, TensorFlow NLP transformations will only be created as a mutation. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable tensorflow nlp accuracy switch", + "output": "enable tensorflow nlp accuracy switch refers to Accuracy above enable TensorFlow NLP by default for all models: Accuracy setting equal and above which will add all enabled TensorFlow NLP models below at start of experiment for text dominated problemswhen TensorFlow NLP transformers are set to auto. If set to on, this parameter is ignored.Otherwise, at lower accuracy, TensorFlow NLP transformations will only be created as a mutation. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Accuracy above enable TensorFlow NLP by default for all models: ", + "output": "enable tensorflow nlp accuracy switch refers to Accuracy above enable TensorFlow NLP by default for all models: Accuracy setting equal and above which will add all enabled TensorFlow NLP models below at start of experiment for text dominated problemswhen TensorFlow NLP transformers are set to auto. If set to on, this parameter is ignored.Otherwise, at lower accuracy, TensorFlow NLP transformations will only be created as a mutation. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting enable_tensorflow_nlp_accuracy_switch", + "output": "enable tensorflow nlp accuracy switch refers to Accuracy setting equal and above which will add all enabled TensorFlow NLP models below at start of experiment for text dominated problemswhen TensorFlow NLP transformers are set to auto. If set to on, this parameter is ignored.Otherwise, at lower accuracy, TensorFlow NLP transformations will only be created as a mutation. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting enable_tensorflow_nlp_accuracy_switch", + "output": "enable tensorflow nlp accuracy switch refers to Accuracy above enable TensorFlow NLP by default for all models: Accuracy setting equal and above which will add all enabled TensorFlow NLP models below at start of experiment for text dominated problemswhen TensorFlow NLP transformers are set to auto. If set to on, this parameter is ignored.Otherwise, at lower accuracy, TensorFlow NLP transformations will only be created as a mutation. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "tensorflow_nlp_pretrained_embeddings_file_path", + "output": "tensorflow nlp pretrained embeddings file path refers to Path to pretrained embeddings for TensorFlow NLP models, can be a path in local file system or an S3 location (s3://).For example, download and unzip https://nlp.stanford.edu/data/glove.6B.ziptensorflow_nlp_pretrained_embeddings_file_path = /path/on/server/to/glove.6B.300d.txt " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "tensorflow_nlp_pretrained_embeddings_file_path", + "output": "tensorflow nlp pretrained embeddings file path refers to Path to pretrained embeddings for TensorFlow NLP models. If empty, will train from scratch.: Path to pretrained embeddings for TensorFlow NLP models, can be a path in local file system or an S3 location (s3://).For example, download and unzip https://nlp.stanford.edu/data/glove.6B.ziptensorflow_nlp_pretrained_embeddings_file_path = /path/on/server/to/glove.6B.300d.txt " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "tensorflow nlp pretrained embeddings file path", + "output": "tensorflow nlp pretrained embeddings file path refers to Path to pretrained embeddings for TensorFlow NLP models. If empty, will train from scratch.: Path to pretrained embeddings for TensorFlow NLP models, can be a path in local file system or an S3 location (s3://).For example, download and unzip https://nlp.stanford.edu/data/glove.6B.ziptensorflow_nlp_pretrained_embeddings_file_path = /path/on/server/to/glove.6B.300d.txt " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Path to pretrained embeddings for TensorFlow NLP models. If empty, will train from scratch.: ", + "output": "tensorflow nlp pretrained embeddings file path refers to Path to pretrained embeddings for TensorFlow NLP models. If empty, will train from scratch.: Path to pretrained embeddings for TensorFlow NLP models, can be a path in local file system or an S3 location (s3://).For example, download and unzip https://nlp.stanford.edu/data/glove.6B.ziptensorflow_nlp_pretrained_embeddings_file_path = /path/on/server/to/glove.6B.300d.txt " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting tensorflow_nlp_pretrained_embeddings_file_path", + "output": "tensorflow nlp pretrained embeddings file path refers to Path to pretrained embeddings for TensorFlow NLP models, can be a path in local file system or an S3 location (s3://).For example, download and unzip https://nlp.stanford.edu/data/glove.6B.ziptensorflow_nlp_pretrained_embeddings_file_path = /path/on/server/to/glove.6B.300d.txt " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting tensorflow_nlp_pretrained_embeddings_file_path", + "output": "tensorflow nlp pretrained embeddings file path refers to Path to pretrained embeddings for TensorFlow NLP models. If empty, will train from scratch.: Path to pretrained embeddings for TensorFlow NLP models, can be a path in local file system or an S3 location (s3://).For example, download and unzip https://nlp.stanford.edu/data/glove.6B.ziptensorflow_nlp_pretrained_embeddings_file_path = /path/on/server/to/glove.6B.300d.txt " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "tensorflow_nlp_pretrained_s3_access_key_id", + "output": "tensorflow nlp pretrained s3 access key id refers to S3 access key Id to use when tensorflow_nlp_pretrained_embeddings_file_path is set to an S3 location.: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "tensorflow_nlp_pretrained_s3_access_key_id", + "output": "tensorflow nlp pretrained s3 access key id refers to S3 access key Id to use when tensorflow_nlp_pretrained_embeddings_file_path is set to an S3 location.: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "tensorflow nlp pretrained s3 access key id", + "output": "tensorflow nlp pretrained s3 access key id refers to S3 access key Id to use when tensorflow_nlp_pretrained_embeddings_file_path is set to an S3 location.: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "S3 access key Id to use when tensorflow_nlp_pretrained_embeddings_file_path is set to an S3 location.: ", + "output": "tensorflow nlp pretrained s3 access key id refers to S3 access key Id to use when tensorflow_nlp_pretrained_embeddings_file_path is set to an S3 location.: " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting tensorflow_nlp_pretrained_s3_access_key_id", + "output": "tensorflow nlp pretrained s3 access key id refers to S3 access key Id to use when tensorflow_nlp_pretrained_embeddings_file_path is set to an S3 location.: " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting tensorflow_nlp_pretrained_s3_access_key_id", + "output": "tensorflow nlp pretrained s3 access key id refers to S3 access key Id to use when tensorflow_nlp_pretrained_embeddings_file_path is set to an S3 location.: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "tensorflow_nlp_pretrained_s3_secret_access_key", + "output": "tensorflow nlp pretrained s3 secret access key refers to S3 secret access key to use when tensorflow_nlp_pretrained_embeddings_file_path is set to an S3 location.: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "tensorflow_nlp_pretrained_s3_secret_access_key", + "output": "tensorflow nlp pretrained s3 secret access key refers to S3 secret access key to use when tensorflow_nlp_pretrained_embeddings_file_path is set to an S3 location.: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "tensorflow nlp pretrained s3 secret access key", + "output": "tensorflow nlp pretrained s3 secret access key refers to S3 secret access key to use when tensorflow_nlp_pretrained_embeddings_file_path is set to an S3 location.: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "S3 secret access key to use when tensorflow_nlp_pretrained_embeddings_file_path is set to an S3 location.: ", + "output": "tensorflow nlp pretrained s3 secret access key refers to S3 secret access key to use when tensorflow_nlp_pretrained_embeddings_file_path is set to an S3 location.: " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting tensorflow_nlp_pretrained_s3_secret_access_key", + "output": "tensorflow nlp pretrained s3 secret access key refers to S3 secret access key to use when tensorflow_nlp_pretrained_embeddings_file_path is set to an S3 location.: " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting tensorflow_nlp_pretrained_s3_secret_access_key", + "output": "tensorflow nlp pretrained s3 secret access key refers to S3 secret access key to use when tensorflow_nlp_pretrained_embeddings_file_path is set to an S3 location.: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "tensorflow_nlp_pretrained_embeddings_trainable", + "output": "tensorflow nlp pretrained embeddings trainable refers to Allow training of all weights of the neural network graph, including the pretrained embedding layer weights. If disabled, then the embedding layer is frozen, but all other weights are still fine-tuned." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "tensorflow_nlp_pretrained_embeddings_trainable", + "output": "tensorflow nlp pretrained embeddings trainable refers to For TensorFlow NLP, allow training of unfrozen pretrained embeddings (in addition to fine-tuning of the rest of the graph): Allow training of all weights of the neural network graph, including the pretrained embedding layer weights. If disabled, then the embedding layer is frozen, but all other weights are still fine-tuned." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "tensorflow nlp pretrained embeddings trainable", + "output": "tensorflow nlp pretrained embeddings trainable refers to For TensorFlow NLP, allow training of unfrozen pretrained embeddings (in addition to fine-tuning of the rest of the graph): Allow training of all weights of the neural network graph, including the pretrained embedding layer weights. If disabled, then the embedding layer is frozen, but all other weights are still fine-tuned." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "For TensorFlow NLP, allow training of unfrozen pretrained embeddings (in addition to fine-tuning of the rest of the graph): ", + "output": "tensorflow nlp pretrained embeddings trainable refers to For TensorFlow NLP, allow training of unfrozen pretrained embeddings (in addition to fine-tuning of the rest of the graph): Allow training of all weights of the neural network graph, including the pretrained embedding layer weights. If disabled, then the embedding layer is frozen, but all other weights are still fine-tuned." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting tensorflow_nlp_pretrained_embeddings_trainable", + "output": "tensorflow nlp pretrained embeddings trainable refers to Allow training of all weights of the neural network graph, including the pretrained embedding layer weights. If disabled, then the embedding layer is frozen, but all other weights are still fine-tuned." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting tensorflow_nlp_pretrained_embeddings_trainable", + "output": "tensorflow nlp pretrained embeddings trainable refers to For TensorFlow NLP, allow training of unfrozen pretrained embeddings (in addition to fine-tuning of the rest of the graph): Allow training of all weights of the neural network graph, including the pretrained embedding layer weights. If disabled, then the embedding layer is frozen, but all other weights are still fine-tuned." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "pytorch_tokenizer_parallel", + "output": "pytorch tokenizer parallel refers to Whether to parallelize tokenization for BERT Models/Transformers." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "pytorch_tokenizer_parallel", + "output": "pytorch tokenizer parallel refers to Whether to parallelize tokenization for BERT Models/Transformers." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "pytorch tokenizer parallel", + "output": "pytorch tokenizer parallel refers to Whether to parallelize tokenization for BERT Models/Transformers." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "pytorch tokenizer parallel refers to Whether to parallelize tokenization for BERT Models/Transformers." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting pytorch_tokenizer_parallel", + "output": "pytorch tokenizer parallel refers to Whether to parallelize tokenization for BERT Models/Transformers." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting pytorch_tokenizer_parallel", + "output": "pytorch tokenizer parallel refers to Whether to parallelize tokenization for BERT Models/Transformers." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "pytorch_nlp_fine_tuning_num_epochs", + "output": "pytorch nlp fine tuning num epochs refers to Number of epochs for fine-tuning of PyTorch NLP models. Larger values can increase accuracy but take longer to train." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "pytorch_nlp_fine_tuning_num_epochs", + "output": "pytorch nlp fine tuning num epochs refers to Number of epochs for fine-tuning of PyTorch NLP models.: Number of epochs for fine-tuning of PyTorch NLP models. Larger values can increase accuracy but take longer to train." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "pytorch nlp fine tuning num epochs", + "output": "pytorch nlp fine tuning num epochs refers to Number of epochs for fine-tuning of PyTorch NLP models.: Number of epochs for fine-tuning of PyTorch NLP models. Larger values can increase accuracy but take longer to train." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Number of epochs for fine-tuning of PyTorch NLP models.: ", + "output": "pytorch nlp fine tuning num epochs refers to Number of epochs for fine-tuning of PyTorch NLP models.: Number of epochs for fine-tuning of PyTorch NLP models. Larger values can increase accuracy but take longer to train." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting pytorch_nlp_fine_tuning_num_epochs", + "output": "pytorch nlp fine tuning num epochs refers to Number of epochs for fine-tuning of PyTorch NLP models. Larger values can increase accuracy but take longer to train." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting pytorch_nlp_fine_tuning_num_epochs", + "output": "pytorch nlp fine tuning num epochs refers to Number of epochs for fine-tuning of PyTorch NLP models.: Number of epochs for fine-tuning of PyTorch NLP models. Larger values can increase accuracy but take longer to train." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "pytorch_nlp_fine_tuning_batch_size", + "output": "pytorch nlp fine tuning batch size refers to Batch size for PyTorch NLP models. Larger models and larger batch sizes will use more memory." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "pytorch_nlp_fine_tuning_batch_size", + "output": "pytorch nlp fine tuning batch size refers to Batch size for PyTorch NLP models. -1 for automatic.: Batch size for PyTorch NLP models. Larger models and larger batch sizes will use more memory." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "pytorch nlp fine tuning batch size", + "output": "pytorch nlp fine tuning batch size refers to Batch size for PyTorch NLP models. -1 for automatic.: Batch size for PyTorch NLP models. Larger models and larger batch sizes will use more memory." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Batch size for PyTorch NLP models. -1 for automatic.: ", + "output": "pytorch nlp fine tuning batch size refers to Batch size for PyTorch NLP models. -1 for automatic.: Batch size for PyTorch NLP models. Larger models and larger batch sizes will use more memory." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting pytorch_nlp_fine_tuning_batch_size", + "output": "pytorch nlp fine tuning batch size refers to Batch size for PyTorch NLP models. Larger models and larger batch sizes will use more memory." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting pytorch_nlp_fine_tuning_batch_size", + "output": "pytorch nlp fine tuning batch size refers to Batch size for PyTorch NLP models. -1 for automatic.: Batch size for PyTorch NLP models. Larger models and larger batch sizes will use more memory." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "pytorch_nlp_fine_tuning_padding_length", + "output": "pytorch nlp fine tuning padding length refers to Maximum sequence length (padding length) for PyTorch NLP models. Larger models and larger padding lengths will use more memory." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "pytorch_nlp_fine_tuning_padding_length", + "output": "pytorch nlp fine tuning padding length refers to Maximum sequence length (padding length) for PyTorch NLP models. -1 for automatic.: Maximum sequence length (padding length) for PyTorch NLP models. Larger models and larger padding lengths will use more memory." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "pytorch nlp fine tuning padding length", + "output": "pytorch nlp fine tuning padding length refers to Maximum sequence length (padding length) for PyTorch NLP models. -1 for automatic.: Maximum sequence length (padding length) for PyTorch NLP models. Larger models and larger padding lengths will use more memory." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Maximum sequence length (padding length) for PyTorch NLP models. -1 for automatic.: ", + "output": "pytorch nlp fine tuning padding length refers to Maximum sequence length (padding length) for PyTorch NLP models. -1 for automatic.: Maximum sequence length (padding length) for PyTorch NLP models. Larger models and larger padding lengths will use more memory." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting pytorch_nlp_fine_tuning_padding_length", + "output": "pytorch nlp fine tuning padding length refers to Maximum sequence length (padding length) for PyTorch NLP models. Larger models and larger padding lengths will use more memory." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting pytorch_nlp_fine_tuning_padding_length", + "output": "pytorch nlp fine tuning padding length refers to Maximum sequence length (padding length) for PyTorch NLP models. -1 for automatic.: Maximum sequence length (padding length) for PyTorch NLP models. Larger models and larger padding lengths will use more memory." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "pytorch_nlp_pretrained_models_dir", + "output": "pytorch nlp pretrained models dir refers to Path to pretrained PyTorch NLP models. Note that this can be either a path in the local file system(/path/on/server/to/bert_models_folder), an URL or a S3 location (s3://).To get all models, download http://s3.amazonaws.com/artifacts.h2o.ai/releases/ai/h2o/pretrained/bert_models.zipand unzip and store it in a directory on the instance where DAI is installed.``pytorch_nlp_pretrained_models_dir=/path/on/server/to/bert_models_folder`` " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "pytorch_nlp_pretrained_models_dir", + "output": "pytorch nlp pretrained models dir refers to Path to pretrained PyTorch NLP models. If empty, will get models from S3: Path to pretrained PyTorch NLP models. Note that this can be either a path in the local file system(/path/on/server/to/bert_models_folder), an URL or a S3 location (s3://).To get all models, download http://s3.amazonaws.com/artifacts.h2o.ai/releases/ai/h2o/pretrained/bert_models.zipand unzip and store it in a directory on the instance where DAI is installed.``pytorch_nlp_pretrained_models_dir=/path/on/server/to/bert_models_folder`` " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "pytorch nlp pretrained models dir", + "output": "pytorch nlp pretrained models dir refers to Path to pretrained PyTorch NLP models. If empty, will get models from S3: Path to pretrained PyTorch NLP models. Note that this can be either a path in the local file system(/path/on/server/to/bert_models_folder), an URL or a S3 location (s3://).To get all models, download http://s3.amazonaws.com/artifacts.h2o.ai/releases/ai/h2o/pretrained/bert_models.zipand unzip and store it in a directory on the instance where DAI is installed.``pytorch_nlp_pretrained_models_dir=/path/on/server/to/bert_models_folder`` " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Path to pretrained PyTorch NLP models. If empty, will get models from S3: ", + "output": "pytorch nlp pretrained models dir refers to Path to pretrained PyTorch NLP models. If empty, will get models from S3: Path to pretrained PyTorch NLP models. Note that this can be either a path in the local file system(/path/on/server/to/bert_models_folder), an URL or a S3 location (s3://).To get all models, download http://s3.amazonaws.com/artifacts.h2o.ai/releases/ai/h2o/pretrained/bert_models.zipand unzip and store it in a directory on the instance where DAI is installed.``pytorch_nlp_pretrained_models_dir=/path/on/server/to/bert_models_folder`` " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting pytorch_nlp_pretrained_models_dir", + "output": "pytorch nlp pretrained models dir refers to Path to pretrained PyTorch NLP models. Note that this can be either a path in the local file system(/path/on/server/to/bert_models_folder), an URL or a S3 location (s3://).To get all models, download http://s3.amazonaws.com/artifacts.h2o.ai/releases/ai/h2o/pretrained/bert_models.zipand unzip and store it in a directory on the instance where DAI is installed.``pytorch_nlp_pretrained_models_dir=/path/on/server/to/bert_models_folder`` " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting pytorch_nlp_pretrained_models_dir", + "output": "pytorch nlp pretrained models dir refers to Path to pretrained PyTorch NLP models. If empty, will get models from S3: Path to pretrained PyTorch NLP models. Note that this can be either a path in the local file system(/path/on/server/to/bert_models_folder), an URL or a S3 location (s3://).To get all models, download http://s3.amazonaws.com/artifacts.h2o.ai/releases/ai/h2o/pretrained/bert_models.zipand unzip and store it in a directory on the instance where DAI is installed.``pytorch_nlp_pretrained_models_dir=/path/on/server/to/bert_models_folder`` " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "pytorch_nlp_pretrained_s3_access_key_id", + "output": "pytorch nlp pretrained s3 access key id refers to S3 access key Id to use when pytorch_nlp_pretrained_models_dir is set to an S3 location.: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "pytorch_nlp_pretrained_s3_access_key_id", + "output": "pytorch nlp pretrained s3 access key id refers to S3 access key Id to use when pytorch_nlp_pretrained_models_dir is set to an S3 location.: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "pytorch nlp pretrained s3 access key id", + "output": "pytorch nlp pretrained s3 access key id refers to S3 access key Id to use when pytorch_nlp_pretrained_models_dir is set to an S3 location.: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "S3 access key Id to use when pytorch_nlp_pretrained_models_dir is set to an S3 location.: ", + "output": "pytorch nlp pretrained s3 access key id refers to S3 access key Id to use when pytorch_nlp_pretrained_models_dir is set to an S3 location.: " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting pytorch_nlp_pretrained_s3_access_key_id", + "output": "pytorch nlp pretrained s3 access key id refers to S3 access key Id to use when pytorch_nlp_pretrained_models_dir is set to an S3 location.: " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting pytorch_nlp_pretrained_s3_access_key_id", + "output": "pytorch nlp pretrained s3 access key id refers to S3 access key Id to use when pytorch_nlp_pretrained_models_dir is set to an S3 location.: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "pytorch_nlp_pretrained_s3_secret_access_key", + "output": "pytorch nlp pretrained s3 secret access key refers to S3 secret access key to use when pytorch_nlp_pretrained_models_dir is set to an S3 location.: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "pytorch_nlp_pretrained_s3_secret_access_key", + "output": "pytorch nlp pretrained s3 secret access key refers to S3 secret access key to use when pytorch_nlp_pretrained_models_dir is set to an S3 location.: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "pytorch nlp pretrained s3 secret access key", + "output": "pytorch nlp pretrained s3 secret access key refers to S3 secret access key to use when pytorch_nlp_pretrained_models_dir is set to an S3 location.: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "S3 secret access key to use when pytorch_nlp_pretrained_models_dir is set to an S3 location.: ", + "output": "pytorch nlp pretrained s3 secret access key refers to S3 secret access key to use when pytorch_nlp_pretrained_models_dir is set to an S3 location.: " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting pytorch_nlp_pretrained_s3_secret_access_key", + "output": "pytorch nlp pretrained s3 secret access key refers to S3 secret access key to use when pytorch_nlp_pretrained_models_dir is set to an S3 location.: " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting pytorch_nlp_pretrained_s3_secret_access_key", + "output": "pytorch nlp pretrained s3 secret access key refers to S3 secret access key to use when pytorch_nlp_pretrained_models_dir is set to an S3 location.: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "text_fraction_for_text_dominated_problem", + "output": "text fraction for text dominated problem refers to Fraction of text columns out of all features to be considered a text-dominated problem" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "text_fraction_for_text_dominated_problem", + "output": "text fraction for text dominated problem refers to Fraction of text columns out of all features to be considered a text-dominated problem: Fraction of text columns out of all features to be considered a text-dominated problem" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "text fraction for text dominated problem", + "output": "text fraction for text dominated problem refers to Fraction of text columns out of all features to be considered a text-dominated problem: Fraction of text columns out of all features to be considered a text-dominated problem" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Fraction of text columns out of all features to be considered a text-dominated problem: ", + "output": "text fraction for text dominated problem refers to Fraction of text columns out of all features to be considered a text-dominated problem: Fraction of text columns out of all features to be considered a text-dominated problem" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting text_fraction_for_text_dominated_problem", + "output": "text fraction for text dominated problem refers to Fraction of text columns out of all features to be considered a text-dominated problem" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting text_fraction_for_text_dominated_problem", + "output": "text fraction for text dominated problem refers to Fraction of text columns out of all features to be considered a text-dominated problem: Fraction of text columns out of all features to be considered a text-dominated problem" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "text_transformer_fraction_for_text_dominated_problem", + "output": "text transformer fraction for text dominated problem refers to Fraction of text transformers to all transformers above which to trigger that text dominated problem" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "text_transformer_fraction_for_text_dominated_problem", + "output": "text transformer fraction for text dominated problem refers to Fraction of text per all transformers to trigger that text dominated: Fraction of text transformers to all transformers above which to trigger that text dominated problem" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "text transformer fraction for text dominated problem", + "output": "text transformer fraction for text dominated problem refers to Fraction of text per all transformers to trigger that text dominated: Fraction of text transformers to all transformers above which to trigger that text dominated problem" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Fraction of text per all transformers to trigger that text dominated: ", + "output": "text transformer fraction for text dominated problem refers to Fraction of text per all transformers to trigger that text dominated: Fraction of text transformers to all transformers above which to trigger that text dominated problem" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting text_transformer_fraction_for_text_dominated_problem", + "output": "text transformer fraction for text dominated problem refers to Fraction of text transformers to all transformers above which to trigger that text dominated problem" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting text_transformer_fraction_for_text_dominated_problem", + "output": "text transformer fraction for text dominated problem refers to Fraction of text per all transformers to trigger that text dominated: Fraction of text transformers to all transformers above which to trigger that text dominated problem" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "text_dominated_limit_tuning", + "output": "text dominated limit tuning refers to Whether to reduce options for text-dominated models to reduce expense, e.g. disable ensemble, disable genetic algorithm, single identity target encoder for classification, etc." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "text_dominated_limit_tuning", + "output": "text dominated limit tuning refers to Whether to reduce options for text-dominated models to reduce expense, e.g. disable ensemble, disable genetic algorithm, single identity target encoder for classification, etc." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "text dominated limit tuning", + "output": "text dominated limit tuning refers to Whether to reduce options for text-dominated models to reduce expense, e.g. disable ensemble, disable genetic algorithm, single identity target encoder for classification, etc." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "text dominated limit tuning refers to Whether to reduce options for text-dominated models to reduce expense, e.g. disable ensemble, disable genetic algorithm, single identity target encoder for classification, etc." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting text_dominated_limit_tuning", + "output": "text dominated limit tuning refers to Whether to reduce options for text-dominated models to reduce expense, e.g. disable ensemble, disable genetic algorithm, single identity target encoder for classification, etc." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting text_dominated_limit_tuning", + "output": "text dominated limit tuning refers to Whether to reduce options for text-dominated models to reduce expense, e.g. disable ensemble, disable genetic algorithm, single identity target encoder for classification, etc." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "image_dominated_limit_tuning", + "output": "image dominated limit tuning refers to Whether to reduce options for image-dominated models to reduce expense, e.g. disable ensemble, disable genetic algorithm, single identity target encoder for classification, etc." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "image_dominated_limit_tuning", + "output": "image dominated limit tuning refers to Whether to reduce options for image-dominated models to reduce expense, e.g. disable ensemble, disable genetic algorithm, single identity target encoder for classification, etc." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "image dominated limit tuning", + "output": "image dominated limit tuning refers to Whether to reduce options for image-dominated models to reduce expense, e.g. disable ensemble, disable genetic algorithm, single identity target encoder for classification, etc." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "image dominated limit tuning refers to Whether to reduce options for image-dominated models to reduce expense, e.g. disable ensemble, disable genetic algorithm, single identity target encoder for classification, etc." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting image_dominated_limit_tuning", + "output": "image dominated limit tuning refers to Whether to reduce options for image-dominated models to reduce expense, e.g. disable ensemble, disable genetic algorithm, single identity target encoder for classification, etc." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting image_dominated_limit_tuning", + "output": "image dominated limit tuning refers to Whether to reduce options for image-dominated models to reduce expense, e.g. disable ensemble, disable genetic algorithm, single identity target encoder for classification, etc." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "string_col_as_text_threshold", + "output": "string col as text threshold refers to Threshold for average string-is-text score as determined by internal heuristicsIt decides when a string column will be treated as text (for an NLP problem) or just asa standard categorical variable.Higher values will favor string columns as categoricals, lower values will favor string columns as text.Set string_col_as_text_min_relative_cardinality=0.0 to force string column to be treated as text despite low number of uniques." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "string_col_as_text_threshold", + "output": "string col as text threshold refers to Threshold for string columns to be treated as text (0.0 - text, 1.0 - string): Threshold for average string-is-text score as determined by internal heuristicsIt decides when a string column will be treated as text (for an NLP problem) or just asa standard categorical variable.Higher values will favor string columns as categoricals, lower values will favor string columns as text.Set string_col_as_text_min_relative_cardinality=0.0 to force string column to be treated as text despite low number of uniques." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "string col as text threshold", + "output": "string col as text threshold refers to Threshold for string columns to be treated as text (0.0 - text, 1.0 - string): Threshold for average string-is-text score as determined by internal heuristicsIt decides when a string column will be treated as text (for an NLP problem) or just asa standard categorical variable.Higher values will favor string columns as categoricals, lower values will favor string columns as text.Set string_col_as_text_min_relative_cardinality=0.0 to force string column to be treated as text despite low number of uniques." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Threshold for string columns to be treated as text (0.0 - text, 1.0 - string): ", + "output": "string col as text threshold refers to Threshold for string columns to be treated as text (0.0 - text, 1.0 - string): Threshold for average string-is-text score as determined by internal heuristicsIt decides when a string column will be treated as text (for an NLP problem) or just asa standard categorical variable.Higher values will favor string columns as categoricals, lower values will favor string columns as text.Set string_col_as_text_min_relative_cardinality=0.0 to force string column to be treated as text despite low number of uniques." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting string_col_as_text_threshold", + "output": "string col as text threshold refers to Threshold for average string-is-text score as determined by internal heuristicsIt decides when a string column will be treated as text (for an NLP problem) or just asa standard categorical variable.Higher values will favor string columns as categoricals, lower values will favor string columns as text.Set string_col_as_text_min_relative_cardinality=0.0 to force string column to be treated as text despite low number of uniques." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting string_col_as_text_threshold", + "output": "string col as text threshold refers to Threshold for string columns to be treated as text (0.0 - text, 1.0 - string): Threshold for average string-is-text score as determined by internal heuristicsIt decides when a string column will be treated as text (for an NLP problem) or just asa standard categorical variable.Higher values will favor string columns as categoricals, lower values will favor string columns as text.Set string_col_as_text_min_relative_cardinality=0.0 to force string column to be treated as text despite low number of uniques." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "string_col_as_text_threshold_preview", + "output": "string col as text threshold preview refers to Threshold for string columns to be treated as text during preview - should be less than string_col_as_text_threshold to allow data with first 20 rows that don't look like text to still work for Text-only transformers (0.0 - text, 1.0 - string)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "string_col_as_text_threshold_preview", + "output": "string col as text threshold preview refers to Threshold for string columns to be treated as text during preview - should be less than string_col_as_text_threshold to allow data with first 20 rows that don't look like text to still work for Text-only transformers (0.0 - text, 1.0 - string)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "string col as text threshold preview", + "output": "string col as text threshold preview refers to Threshold for string columns to be treated as text during preview - should be less than string_col_as_text_threshold to allow data with first 20 rows that don't look like text to still work for Text-only transformers (0.0 - text, 1.0 - string)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "string col as text threshold preview refers to Threshold for string columns to be treated as text during preview - should be less than string_col_as_text_threshold to allow data with first 20 rows that don't look like text to still work for Text-only transformers (0.0 - text, 1.0 - string)" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting string_col_as_text_threshold_preview", + "output": "string col as text threshold preview refers to Threshold for string columns to be treated as text during preview - should be less than string_col_as_text_threshold to allow data with first 20 rows that don't look like text to still work for Text-only transformers (0.0 - text, 1.0 - string)" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting string_col_as_text_threshold_preview", + "output": "string col as text threshold preview refers to Threshold for string columns to be treated as text during preview - should be less than string_col_as_text_threshold to allow data with first 20 rows that don't look like text to still work for Text-only transformers (0.0 - text, 1.0 - string)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "string_col_as_text_min_relative_cardinality", + "output": "string col as text min relative cardinality refers to Mininum fraction of unique values for string columns to be considered as possible text (otherwise categorical)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "string_col_as_text_min_relative_cardinality", + "output": "string col as text min relative cardinality refers to Mininum fraction of unique values for string columns to be considered as possible text (otherwise categorical)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "string col as text min relative cardinality", + "output": "string col as text min relative cardinality refers to Mininum fraction of unique values for string columns to be considered as possible text (otherwise categorical)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "string col as text min relative cardinality refers to Mininum fraction of unique values for string columns to be considered as possible text (otherwise categorical)" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting string_col_as_text_min_relative_cardinality", + "output": "string col as text min relative cardinality refers to Mininum fraction of unique values for string columns to be considered as possible text (otherwise categorical)" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting string_col_as_text_min_relative_cardinality", + "output": "string col as text min relative cardinality refers to Mininum fraction of unique values for string columns to be considered as possible text (otherwise categorical)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "string_col_as_text_min_absolute_cardinality", + "output": "string col as text min absolute cardinality refers to Mininum number of uniques for string columns to be considered as possible text (if not already)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "string_col_as_text_min_absolute_cardinality", + "output": "string col as text min absolute cardinality refers to Mininum number of uniques for string columns to be considered as possible text (if not already)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "string col as text min absolute cardinality", + "output": "string col as text min absolute cardinality refers to Mininum number of uniques for string columns to be considered as possible text (if not already)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "string col as text min absolute cardinality refers to Mininum number of uniques for string columns to be considered as possible text (if not already)" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting string_col_as_text_min_absolute_cardinality", + "output": "string col as text min absolute cardinality refers to Mininum number of uniques for string columns to be considered as possible text (if not already)" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting string_col_as_text_min_absolute_cardinality", + "output": "string col as text min absolute cardinality refers to Mininum number of uniques for string columns to be considered as possible text (if not already)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "tokenize_single_chars", + "output": "tokenize single chars refers to If disabled, require 2 or more alphanumeric characters for a token in Text (Count and TF/IDF) transformers, otherwise create tokens out of single alphanumeric characters. True means that 'Street 3' is tokenized into 'Street' and '3', while False means that it's tokenized into 'Street'." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "tokenize_single_chars", + "output": "tokenize single chars refers to Tokenize single characters.: If disabled, require 2 or more alphanumeric characters for a token in Text (Count and TF/IDF) transformers, otherwise create tokens out of single alphanumeric characters. True means that 'Street 3' is tokenized into 'Street' and '3', while False means that it's tokenized into 'Street'." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "tokenize single chars", + "output": "tokenize single chars refers to Tokenize single characters.: If disabled, require 2 or more alphanumeric characters for a token in Text (Count and TF/IDF) transformers, otherwise create tokens out of single alphanumeric characters. True means that 'Street 3' is tokenized into 'Street' and '3', while False means that it's tokenized into 'Street'." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Tokenize single characters.: ", + "output": "tokenize single chars refers to Tokenize single characters.: If disabled, require 2 or more alphanumeric characters for a token in Text (Count and TF/IDF) transformers, otherwise create tokens out of single alphanumeric characters. True means that 'Street 3' is tokenized into 'Street' and '3', while False means that it's tokenized into 'Street'." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting tokenize_single_chars", + "output": "tokenize single chars refers to If disabled, require 2 or more alphanumeric characters for a token in Text (Count and TF/IDF) transformers, otherwise create tokens out of single alphanumeric characters. True means that 'Street 3' is tokenized into 'Street' and '3', while False means that it's tokenized into 'Street'." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting tokenize_single_chars", + "output": "tokenize single chars refers to Tokenize single characters.: If disabled, require 2 or more alphanumeric characters for a token in Text (Count and TF/IDF) transformers, otherwise create tokens out of single alphanumeric characters. True means that 'Street 3' is tokenized into 'Street' and '3', while False means that it's tokenized into 'Street'." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "supported_image_types", + "output": "supported image types refers to Supported image types. URIs with these endings will be considered as image paths (local or remote)." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "supported_image_types", + "output": "supported image types refers to Supported image types. URIs with these endings will be considered as image paths (local or remote)." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "supported image types", + "output": "supported image types refers to Supported image types. URIs with these endings will be considered as image paths (local or remote)." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "supported image types refers to Supported image types. URIs with these endings will be considered as image paths (local or remote)." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting supported_image_types", + "output": "supported image types refers to Supported image types. URIs with these endings will be considered as image paths (local or remote)." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting supported_image_types", + "output": "supported image types refers to Supported image types. URIs with these endings will be considered as image paths (local or remote)." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "image_paths_absolute", + "output": "image paths absolute refers to Whether to create absolute paths for images when importing datasets containing images. Can faciliate testing or re-use of frames for scoring." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "image_paths_absolute", + "output": "image paths absolute refers to Whether to create absolute paths for images when importing datasets containing images. Can faciliate testing or re-use of frames for scoring." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "image paths absolute", + "output": "image paths absolute refers to Whether to create absolute paths for images when importing datasets containing images. Can faciliate testing or re-use of frames for scoring." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "image paths absolute refers to Whether to create absolute paths for images when importing datasets containing images. Can faciliate testing or re-use of frames for scoring." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting image_paths_absolute", + "output": "image paths absolute refers to Whether to create absolute paths for images when importing datasets containing images. Can faciliate testing or re-use of frames for scoring." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting image_paths_absolute", + "output": "image paths absolute refers to Whether to create absolute paths for images when importing datasets containing images. Can faciliate testing or re-use of frames for scoring." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable_tensorflow_image", + "output": "enable tensorflow image refers to Whether to use pretrained deep learning models for processing of image data as part of the feature engineering pipeline. A column of URIs to images (jpg, png, etc.) will be converted to a numeric representation using ImageNet-pretrained deep learning models. If no GPUs are found, then must be set to 'on' to enable." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable_tensorflow_image", + "output": "enable tensorflow image refers to Enable Image Transformer for processing of image data: Whether to use pretrained deep learning models for processing of image data as part of the feature engineering pipeline. A column of URIs to images (jpg, png, etc.) will be converted to a numeric representation using ImageNet-pretrained deep learning models. If no GPUs are found, then must be set to 'on' to enable." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable tensorflow image", + "output": "enable tensorflow image refers to Enable Image Transformer for processing of image data: Whether to use pretrained deep learning models for processing of image data as part of the feature engineering pipeline. A column of URIs to images (jpg, png, etc.) will be converted to a numeric representation using ImageNet-pretrained deep learning models. If no GPUs are found, then must be set to 'on' to enable." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Enable Image Transformer for processing of image data: ", + "output": "enable tensorflow image refers to Enable Image Transformer for processing of image data: Whether to use pretrained deep learning models for processing of image data as part of the feature engineering pipeline. A column of URIs to images (jpg, png, etc.) will be converted to a numeric representation using ImageNet-pretrained deep learning models. If no GPUs are found, then must be set to 'on' to enable." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting enable_tensorflow_image", + "output": "enable tensorflow image refers to Whether to use pretrained deep learning models for processing of image data as part of the feature engineering pipeline. A column of URIs to images (jpg, png, etc.) will be converted to a numeric representation using ImageNet-pretrained deep learning models. If no GPUs are found, then must be set to 'on' to enable." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting enable_tensorflow_image", + "output": "enable tensorflow image refers to Enable Image Transformer for processing of image data: Whether to use pretrained deep learning models for processing of image data as part of the feature engineering pipeline. A column of URIs to images (jpg, png, etc.) will be converted to a numeric representation using ImageNet-pretrained deep learning models. If no GPUs are found, then must be set to 'on' to enable." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "tensorflow_image_pretrained_models", + "output": "tensorflow image pretrained models refers to Supported ImageNet pretrained architectures for Image Transformer. Non-default ones will require internet access to download pretrained models from H2O S3 buckets (To get all models, download http://s3.amazonaws.com/artifacts.h2o.ai/releases/ai/h2o/pretrained/dai_image_models_1_10.zip and unzip inside tensorflow_image_pretrained_models_dir)." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "tensorflow_image_pretrained_models", + "output": "tensorflow image pretrained models refers to Supported ImageNet pretrained architectures for Image Transformer: Supported ImageNet pretrained architectures for Image Transformer. Non-default ones will require internet access to download pretrained models from H2O S3 buckets (To get all models, download http://s3.amazonaws.com/artifacts.h2o.ai/releases/ai/h2o/pretrained/dai_image_models_1_10.zip and unzip inside tensorflow_image_pretrained_models_dir)." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "tensorflow image pretrained models", + "output": "tensorflow image pretrained models refers to Supported ImageNet pretrained architectures for Image Transformer: Supported ImageNet pretrained architectures for Image Transformer. Non-default ones will require internet access to download pretrained models from H2O S3 buckets (To get all models, download http://s3.amazonaws.com/artifacts.h2o.ai/releases/ai/h2o/pretrained/dai_image_models_1_10.zip and unzip inside tensorflow_image_pretrained_models_dir)." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Supported ImageNet pretrained architectures for Image Transformer: ", + "output": "tensorflow image pretrained models refers to Supported ImageNet pretrained architectures for Image Transformer: Supported ImageNet pretrained architectures for Image Transformer. Non-default ones will require internet access to download pretrained models from H2O S3 buckets (To get all models, download http://s3.amazonaws.com/artifacts.h2o.ai/releases/ai/h2o/pretrained/dai_image_models_1_10.zip and unzip inside tensorflow_image_pretrained_models_dir)." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting tensorflow_image_pretrained_models", + "output": "tensorflow image pretrained models refers to Supported ImageNet pretrained architectures for Image Transformer. Non-default ones will require internet access to download pretrained models from H2O S3 buckets (To get all models, download http://s3.amazonaws.com/artifacts.h2o.ai/releases/ai/h2o/pretrained/dai_image_models_1_10.zip and unzip inside tensorflow_image_pretrained_models_dir)." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting tensorflow_image_pretrained_models", + "output": "tensorflow image pretrained models refers to Supported ImageNet pretrained architectures for Image Transformer: Supported ImageNet pretrained architectures for Image Transformer. Non-default ones will require internet access to download pretrained models from H2O S3 buckets (To get all models, download http://s3.amazonaws.com/artifacts.h2o.ai/releases/ai/h2o/pretrained/dai_image_models_1_10.zip and unzip inside tensorflow_image_pretrained_models_dir)." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "tensorflow_image_vectorization_output_dimension", + "output": "tensorflow image vectorization output dimension refers to Dimensionality of feature (embedding) space created by Image Transformer. If more than one is selected, multiple transformers can be active at the same time." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "tensorflow_image_vectorization_output_dimension", + "output": "tensorflow image vectorization output dimension refers to Dimensionality of feature space created by Image Transformer: Dimensionality of feature (embedding) space created by Image Transformer. If more than one is selected, multiple transformers can be active at the same time." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "tensorflow image vectorization output dimension", + "output": "tensorflow image vectorization output dimension refers to Dimensionality of feature space created by Image Transformer: Dimensionality of feature (embedding) space created by Image Transformer. If more than one is selected, multiple transformers can be active at the same time." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Dimensionality of feature space created by Image Transformer: ", + "output": "tensorflow image vectorization output dimension refers to Dimensionality of feature space created by Image Transformer: Dimensionality of feature (embedding) space created by Image Transformer. If more than one is selected, multiple transformers can be active at the same time." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting tensorflow_image_vectorization_output_dimension", + "output": "tensorflow image vectorization output dimension refers to Dimensionality of feature (embedding) space created by Image Transformer. If more than one is selected, multiple transformers can be active at the same time." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting tensorflow_image_vectorization_output_dimension", + "output": "tensorflow image vectorization output dimension refers to Dimensionality of feature space created by Image Transformer: Dimensionality of feature (embedding) space created by Image Transformer. If more than one is selected, multiple transformers can be active at the same time." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "tensorflow_image_fine_tune", + "output": "tensorflow image fine tune refers to Enable fine-tuning of the ImageNet pretrained models used for the Image Transformer. Enabling this will slow down training, but should increase accuracy." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "tensorflow_image_fine_tune", + "output": "tensorflow image fine tune refers to Enable fine-tuning of pretrained models used for Image Transformer: Enable fine-tuning of the ImageNet pretrained models used for the Image Transformer. Enabling this will slow down training, but should increase accuracy." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "tensorflow image fine tune", + "output": "tensorflow image fine tune refers to Enable fine-tuning of pretrained models used for Image Transformer: Enable fine-tuning of the ImageNet pretrained models used for the Image Transformer. Enabling this will slow down training, but should increase accuracy." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Enable fine-tuning of pretrained models used for Image Transformer: ", + "output": "tensorflow image fine tune refers to Enable fine-tuning of pretrained models used for Image Transformer: Enable fine-tuning of the ImageNet pretrained models used for the Image Transformer. Enabling this will slow down training, but should increase accuracy." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting tensorflow_image_fine_tune", + "output": "tensorflow image fine tune refers to Enable fine-tuning of the ImageNet pretrained models used for the Image Transformer. Enabling this will slow down training, but should increase accuracy." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting tensorflow_image_fine_tune", + "output": "tensorflow image fine tune refers to Enable fine-tuning of pretrained models used for Image Transformer: Enable fine-tuning of the ImageNet pretrained models used for the Image Transformer. Enabling this will slow down training, but should increase accuracy." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "tensorflow_image_fine_tuning_num_epochs", + "output": "tensorflow image fine tuning num epochs refers to Number of epochs for fine-tuning of ImageNet pretrained models used for the Image Transformer." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "tensorflow_image_fine_tuning_num_epochs", + "output": "tensorflow image fine tuning num epochs refers to Number of epochs for fine-tuning used for Image Transformer: Number of epochs for fine-tuning of ImageNet pretrained models used for the Image Transformer." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "tensorflow image fine tuning num epochs", + "output": "tensorflow image fine tuning num epochs refers to Number of epochs for fine-tuning used for Image Transformer: Number of epochs for fine-tuning of ImageNet pretrained models used for the Image Transformer." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Number of epochs for fine-tuning used for Image Transformer: ", + "output": "tensorflow image fine tuning num epochs refers to Number of epochs for fine-tuning used for Image Transformer: Number of epochs for fine-tuning of ImageNet pretrained models used for the Image Transformer." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting tensorflow_image_fine_tuning_num_epochs", + "output": "tensorflow image fine tuning num epochs refers to Number of epochs for fine-tuning of ImageNet pretrained models used for the Image Transformer." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting tensorflow_image_fine_tuning_num_epochs", + "output": "tensorflow image fine tuning num epochs refers to Number of epochs for fine-tuning used for Image Transformer: Number of epochs for fine-tuning of ImageNet pretrained models used for the Image Transformer." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "tensorflow_image_augmentations", + "output": "tensorflow image augmentations refers to The list of possible image augmentations to apply while fine-tuning the ImageNet pretrained models used for the Image Transformer. Details about individual augmentations could be found here: https://albumentations.ai/docs/." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "tensorflow_image_augmentations", + "output": "tensorflow image augmentations refers to List of augmentations for fine-tuning used for Image Transformer: The list of possible image augmentations to apply while fine-tuning the ImageNet pretrained models used for the Image Transformer. Details about individual augmentations could be found here: https://albumentations.ai/docs/." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "tensorflow image augmentations", + "output": "tensorflow image augmentations refers to List of augmentations for fine-tuning used for Image Transformer: The list of possible image augmentations to apply while fine-tuning the ImageNet pretrained models used for the Image Transformer. Details about individual augmentations could be found here: https://albumentations.ai/docs/." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "List of augmentations for fine-tuning used for Image Transformer: ", + "output": "tensorflow image augmentations refers to List of augmentations for fine-tuning used for Image Transformer: The list of possible image augmentations to apply while fine-tuning the ImageNet pretrained models used for the Image Transformer. Details about individual augmentations could be found here: https://albumentations.ai/docs/." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting tensorflow_image_augmentations", + "output": "tensorflow image augmentations refers to The list of possible image augmentations to apply while fine-tuning the ImageNet pretrained models used for the Image Transformer. Details about individual augmentations could be found here: https://albumentations.ai/docs/." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting tensorflow_image_augmentations", + "output": "tensorflow image augmentations refers to List of augmentations for fine-tuning used for Image Transformer: The list of possible image augmentations to apply while fine-tuning the ImageNet pretrained models used for the Image Transformer. Details about individual augmentations could be found here: https://albumentations.ai/docs/." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "tensorflow_image_batch_size", + "output": "tensorflow image batch size refers to Batch size for Image Transformer. Larger architectures and larger batch sizes will use more memory." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "tensorflow_image_batch_size", + "output": "tensorflow image batch size refers to Batch size for Image Transformer. Automatic: -1: Batch size for Image Transformer. Larger architectures and larger batch sizes will use more memory." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "tensorflow image batch size", + "output": "tensorflow image batch size refers to Batch size for Image Transformer. Automatic: -1: Batch size for Image Transformer. Larger architectures and larger batch sizes will use more memory." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Batch size for Image Transformer. Automatic: -1: ", + "output": "tensorflow image batch size refers to Batch size for Image Transformer. Automatic: -1: Batch size for Image Transformer. Larger architectures and larger batch sizes will use more memory." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting tensorflow_image_batch_size", + "output": "tensorflow image batch size refers to Batch size for Image Transformer. Larger architectures and larger batch sizes will use more memory." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting tensorflow_image_batch_size", + "output": "tensorflow image batch size refers to Batch size for Image Transformer. Automatic: -1: Batch size for Image Transformer. Larger architectures and larger batch sizes will use more memory." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "tensorflow_image_pretrained_models_dir", + "output": "tensorflow image pretrained models dir refers to Path to pretrained Image models. To get all models, download http://s3.amazonaws.com/artifacts.h2o.ai/releases/ai/h2o/pretrained/dai_image_models_1_10.zip, then extract it in a directory on the instance where Driverless AI is installed. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "tensorflow_image_pretrained_models_dir", + "output": "tensorflow image pretrained models dir refers to Path to pretrained Image models. It is used to load the pretrained models if there is no Internet access.: Path to pretrained Image models. To get all models, download http://s3.amazonaws.com/artifacts.h2o.ai/releases/ai/h2o/pretrained/dai_image_models_1_10.zip, then extract it in a directory on the instance where Driverless AI is installed. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "tensorflow image pretrained models dir", + "output": "tensorflow image pretrained models dir refers to Path to pretrained Image models. It is used to load the pretrained models if there is no Internet access.: Path to pretrained Image models. To get all models, download http://s3.amazonaws.com/artifacts.h2o.ai/releases/ai/h2o/pretrained/dai_image_models_1_10.zip, then extract it in a directory on the instance where Driverless AI is installed. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Path to pretrained Image models. It is used to load the pretrained models if there is no Internet access.: ", + "output": "tensorflow image pretrained models dir refers to Path to pretrained Image models. It is used to load the pretrained models if there is no Internet access.: Path to pretrained Image models. To get all models, download http://s3.amazonaws.com/artifacts.h2o.ai/releases/ai/h2o/pretrained/dai_image_models_1_10.zip, then extract it in a directory on the instance where Driverless AI is installed. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting tensorflow_image_pretrained_models_dir", + "output": "tensorflow image pretrained models dir refers to Path to pretrained Image models. To get all models, download http://s3.amazonaws.com/artifacts.h2o.ai/releases/ai/h2o/pretrained/dai_image_models_1_10.zip, then extract it in a directory on the instance where Driverless AI is installed. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting tensorflow_image_pretrained_models_dir", + "output": "tensorflow image pretrained models dir refers to Path to pretrained Image models. It is used to load the pretrained models if there is no Internet access.: Path to pretrained Image models. To get all models, download http://s3.amazonaws.com/artifacts.h2o.ai/releases/ai/h2o/pretrained/dai_image_models_1_10.zip, then extract it in a directory on the instance where Driverless AI is installed. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "image_download_timeout", + "output": "image download timeout refers to Max. number of seconds to wait for image download if images are provided by URL" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "image_download_timeout", + "output": "image download timeout refers to Image download timeout in seconds: Max. number of seconds to wait for image download if images are provided by URL" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "image download timeout", + "output": "image download timeout refers to Image download timeout in seconds: Max. number of seconds to wait for image download if images are provided by URL" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Image download timeout in seconds: ", + "output": "image download timeout refers to Image download timeout in seconds: Max. number of seconds to wait for image download if images are provided by URL" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting image_download_timeout", + "output": "image download timeout refers to Max. number of seconds to wait for image download if images are provided by URL" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting image_download_timeout", + "output": "image download timeout refers to Image download timeout in seconds: Max. number of seconds to wait for image download if images are provided by URL" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "string_col_as_image_max_missing_fraction", + "output": "string col as image max missing fraction refers to Maximum fraction of missing elements in a string column for it to be considered as possible image paths (URIs)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "string_col_as_image_max_missing_fraction", + "output": "string col as image max missing fraction refers to Max allowed fraction of missing values for image column: Maximum fraction of missing elements in a string column for it to be considered as possible image paths (URIs)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "string col as image max missing fraction", + "output": "string col as image max missing fraction refers to Max allowed fraction of missing values for image column: Maximum fraction of missing elements in a string column for it to be considered as possible image paths (URIs)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Max allowed fraction of missing values for image column: ", + "output": "string col as image max missing fraction refers to Max allowed fraction of missing values for image column: Maximum fraction of missing elements in a string column for it to be considered as possible image paths (URIs)" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting string_col_as_image_max_missing_fraction", + "output": "string col as image max missing fraction refers to Maximum fraction of missing elements in a string column for it to be considered as possible image paths (URIs)" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting string_col_as_image_max_missing_fraction", + "output": "string col as image max missing fraction refers to Max allowed fraction of missing values for image column: Maximum fraction of missing elements in a string column for it to be considered as possible image paths (URIs)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "string_col_as_image_min_valid_types_fraction", + "output": "string col as image min valid types fraction refers to Fraction of (unique) image URIs that need to have valid endings (as defined by string_col_as_image_valid_types) for a string column to be considered as image data" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "string_col_as_image_min_valid_types_fraction", + "output": "string col as image min valid types fraction refers to Min. fraction of images that need to be of valid types for image column to be used: Fraction of (unique) image URIs that need to have valid endings (as defined by string_col_as_image_valid_types) for a string column to be considered as image data" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "string col as image min valid types fraction", + "output": "string col as image min valid types fraction refers to Min. fraction of images that need to be of valid types for image column to be used: Fraction of (unique) image URIs that need to have valid endings (as defined by string_col_as_image_valid_types) for a string column to be considered as image data" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Min. fraction of images that need to be of valid types for image column to be used: ", + "output": "string col as image min valid types fraction refers to Min. fraction of images that need to be of valid types for image column to be used: Fraction of (unique) image URIs that need to have valid endings (as defined by string_col_as_image_valid_types) for a string column to be considered as image data" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting string_col_as_image_min_valid_types_fraction", + "output": "string col as image min valid types fraction refers to Fraction of (unique) image URIs that need to have valid endings (as defined by string_col_as_image_valid_types) for a string column to be considered as image data" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting string_col_as_image_min_valid_types_fraction", + "output": "string col as image min valid types fraction refers to Min. fraction of images that need to be of valid types for image column to be used: Fraction of (unique) image URIs that need to have valid endings (as defined by string_col_as_image_valid_types) for a string column to be considered as image data" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "tensorflow_image_use_gpu", + "output": "tensorflow image use gpu refers to Whether to use GPU(s), if available, to transform images into embeddings with Image Transformer. Can lead to significant speedups." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "tensorflow_image_use_gpu", + "output": "tensorflow image use gpu refers to Enable GPU(s) for faster transformations of Image Transformer.: Whether to use GPU(s), if available, to transform images into embeddings with Image Transformer. Can lead to significant speedups." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "tensorflow image use gpu", + "output": "tensorflow image use gpu refers to Enable GPU(s) for faster transformations of Image Transformer.: Whether to use GPU(s), if available, to transform images into embeddings with Image Transformer. Can lead to significant speedups." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Enable GPU(s) for faster transformations of Image Transformer.: ", + "output": "tensorflow image use gpu refers to Enable GPU(s) for faster transformations of Image Transformer.: Whether to use GPU(s), if available, to transform images into embeddings with Image Transformer. Can lead to significant speedups." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting tensorflow_image_use_gpu", + "output": "tensorflow image use gpu refers to Whether to use GPU(s), if available, to transform images into embeddings with Image Transformer. Can lead to significant speedups." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting tensorflow_image_use_gpu", + "output": "tensorflow image use gpu refers to Enable GPU(s) for faster transformations of Image Transformer.: Whether to use GPU(s), if available, to transform images into embeddings with Image Transformer. Can lead to significant speedups." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "params_image_auto_search_space", + "output": "params image auto search space refers to Nominally, the time dial controls the search space, with higher time trying more options, but any keys present in this dictionary will override the automatic choices.e.g. ``params_image_auto_search_space=\"{'augmentation': ['safe'], 'crop_strategy': ['Resize'], 'optimizer': ['AdamW'], 'dropout': [0.1], 'epochs_per_stage': [5], 'warmup_epochs': [0], 'mixup': [0.0], 'cutmix': [0.0], 'global_pool': ['avg'], 'learning_rate': [3e-4]}\"``Options, e.g. used for time>=8# Overfit Protection Options:'augmentation': ``[\"safe\", \"semi_safe\", \"hard\"]``'crop_strategy': ``[\"Resize\", \"RandomResizedCropSoft\", \"RandomResizedCropHard\"]``'dropout': ``[0.1, 0.3, 0.5]``# Global Pool Options: avgmax -- sum of AVG and MAX poolings catavgmax -- concatenation of AVG and MAX poolings https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/layers/adaptive_avgmax_pool.py ``'global_pool': ['avg', 'avgmax', 'catavgmax']``# Regression: No MixUp and CutMix: ``'mixup': [0.0]`` ``'cutmix': [0.0]``# Classification: Beta distribution coeff to generate weights for MixUp: ``'mixup': [0.0, 0.4, 1.0, 3.0]`` ``'cutmix': [0.0, 0.4, 1.0, 3.0]``# Optimization Options:``'epochs_per_stage': [5, 10, 15]`` # from 40 to 135 epochs``'warmup_epochs': [0, 0.5, 1]````'optimizer': [\"AdamW\", \"SGD\"]````'learning_rate': [1e-3, 3e-4, 1e-4]``" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "params_image_auto_search_space", + "output": "params image auto search space refers to Search parameter overrides for image auto: Nominally, the time dial controls the search space, with higher time trying more options, but any keys present in this dictionary will override the automatic choices.e.g. ``params_image_auto_search_space=\"{'augmentation': ['safe'], 'crop_strategy': ['Resize'], 'optimizer': ['AdamW'], 'dropout': [0.1], 'epochs_per_stage': [5], 'warmup_epochs': [0], 'mixup': [0.0], 'cutmix': [0.0], 'global_pool': ['avg'], 'learning_rate': [3e-4]}\"``Options, e.g. used for time>=8# Overfit Protection Options:'augmentation': ``[\"safe\", \"semi_safe\", \"hard\"]``'crop_strategy': ``[\"Resize\", \"RandomResizedCropSoft\", \"RandomResizedCropHard\"]``'dropout': ``[0.1, 0.3, 0.5]``# Global Pool Options: avgmax -- sum of AVG and MAX poolings catavgmax -- concatenation of AVG and MAX poolings https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/layers/adaptive_avgmax_pool.py ``'global_pool': ['avg', 'avgmax', 'catavgmax']``# Regression: No MixUp and CutMix: ``'mixup': [0.0]`` ``'cutmix': [0.0]``# Classification: Beta distribution coeff to generate weights for MixUp: ``'mixup': [0.0, 0.4, 1.0, 3.0]`` ``'cutmix': [0.0, 0.4, 1.0, 3.0]``# Optimization Options:``'epochs_per_stage': [5, 10, 15]`` # from 40 to 135 epochs``'warmup_epochs': [0, 0.5, 1]````'optimizer': [\"AdamW\", \"SGD\"]````'learning_rate': [1e-3, 3e-4, 1e-4]``" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "params image auto search space", + "output": "params image auto search space refers to Search parameter overrides for image auto: Nominally, the time dial controls the search space, with higher time trying more options, but any keys present in this dictionary will override the automatic choices.e.g. ``params_image_auto_search_space=\"{'augmentation': ['safe'], 'crop_strategy': ['Resize'], 'optimizer': ['AdamW'], 'dropout': [0.1], 'epochs_per_stage': [5], 'warmup_epochs': [0], 'mixup': [0.0], 'cutmix': [0.0], 'global_pool': ['avg'], 'learning_rate': [3e-4]}\"``Options, e.g. used for time>=8# Overfit Protection Options:'augmentation': ``[\"safe\", \"semi_safe\", \"hard\"]``'crop_strategy': ``[\"Resize\", \"RandomResizedCropSoft\", \"RandomResizedCropHard\"]``'dropout': ``[0.1, 0.3, 0.5]``# Global Pool Options: avgmax -- sum of AVG and MAX poolings catavgmax -- concatenation of AVG and MAX poolings https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/layers/adaptive_avgmax_pool.py ``'global_pool': ['avg', 'avgmax', 'catavgmax']``# Regression: No MixUp and CutMix: ``'mixup': [0.0]`` ``'cutmix': [0.0]``# Classification: Beta distribution coeff to generate weights for MixUp: ``'mixup': [0.0, 0.4, 1.0, 3.0]`` ``'cutmix': [0.0, 0.4, 1.0, 3.0]``# Optimization Options:``'epochs_per_stage': [5, 10, 15]`` # from 40 to 135 epochs``'warmup_epochs': [0, 0.5, 1]````'optimizer': [\"AdamW\", \"SGD\"]````'learning_rate': [1e-3, 3e-4, 1e-4]``" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Search parameter overrides for image auto: ", + "output": "params image auto search space refers to Search parameter overrides for image auto: Nominally, the time dial controls the search space, with higher time trying more options, but any keys present in this dictionary will override the automatic choices.e.g. ``params_image_auto_search_space=\"{'augmentation': ['safe'], 'crop_strategy': ['Resize'], 'optimizer': ['AdamW'], 'dropout': [0.1], 'epochs_per_stage': [5], 'warmup_epochs': [0], 'mixup': [0.0], 'cutmix': [0.0], 'global_pool': ['avg'], 'learning_rate': [3e-4]}\"``Options, e.g. used for time>=8# Overfit Protection Options:'augmentation': ``[\"safe\", \"semi_safe\", \"hard\"]``'crop_strategy': ``[\"Resize\", \"RandomResizedCropSoft\", \"RandomResizedCropHard\"]``'dropout': ``[0.1, 0.3, 0.5]``# Global Pool Options: avgmax -- sum of AVG and MAX poolings catavgmax -- concatenation of AVG and MAX poolings https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/layers/adaptive_avgmax_pool.py ``'global_pool': ['avg', 'avgmax', 'catavgmax']``# Regression: No MixUp and CutMix: ``'mixup': [0.0]`` ``'cutmix': [0.0]``# Classification: Beta distribution coeff to generate weights for MixUp: ``'mixup': [0.0, 0.4, 1.0, 3.0]`` ``'cutmix': [0.0, 0.4, 1.0, 3.0]``# Optimization Options:``'epochs_per_stage': [5, 10, 15]`` # from 40 to 135 epochs``'warmup_epochs': [0, 0.5, 1]````'optimizer': [\"AdamW\", \"SGD\"]````'learning_rate': [1e-3, 3e-4, 1e-4]``" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting params_image_auto_search_space", + "output": "params image auto search space refers to Nominally, the time dial controls the search space, with higher time trying more options, but any keys present in this dictionary will override the automatic choices.e.g. ``params_image_auto_search_space=\"{'augmentation': ['safe'], 'crop_strategy': ['Resize'], 'optimizer': ['AdamW'], 'dropout': [0.1], 'epochs_per_stage': [5], 'warmup_epochs': [0], 'mixup': [0.0], 'cutmix': [0.0], 'global_pool': ['avg'], 'learning_rate': [3e-4]}\"``Options, e.g. used for time>=8# Overfit Protection Options:'augmentation': ``[\"safe\", \"semi_safe\", \"hard\"]``'crop_strategy': ``[\"Resize\", \"RandomResizedCropSoft\", \"RandomResizedCropHard\"]``'dropout': ``[0.1, 0.3, 0.5]``# Global Pool Options: avgmax -- sum of AVG and MAX poolings catavgmax -- concatenation of AVG and MAX poolings https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/layers/adaptive_avgmax_pool.py ``'global_pool': ['avg', 'avgmax', 'catavgmax']``# Regression: No MixUp and CutMix: ``'mixup': [0.0]`` ``'cutmix': [0.0]``# Classification: Beta distribution coeff to generate weights for MixUp: ``'mixup': [0.0, 0.4, 1.0, 3.0]`` ``'cutmix': [0.0, 0.4, 1.0, 3.0]``# Optimization Options:``'epochs_per_stage': [5, 10, 15]`` # from 40 to 135 epochs``'warmup_epochs': [0, 0.5, 1]````'optimizer': [\"AdamW\", \"SGD\"]````'learning_rate': [1e-3, 3e-4, 1e-4]``" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting params_image_auto_search_space", + "output": "params image auto search space refers to Search parameter overrides for image auto: Nominally, the time dial controls the search space, with higher time trying more options, but any keys present in this dictionary will override the automatic choices.e.g. ``params_image_auto_search_space=\"{'augmentation': ['safe'], 'crop_strategy': ['Resize'], 'optimizer': ['AdamW'], 'dropout': [0.1], 'epochs_per_stage': [5], 'warmup_epochs': [0], 'mixup': [0.0], 'cutmix': [0.0], 'global_pool': ['avg'], 'learning_rate': [3e-4]}\"``Options, e.g. used for time>=8# Overfit Protection Options:'augmentation': ``[\"safe\", \"semi_safe\", \"hard\"]``'crop_strategy': ``[\"Resize\", \"RandomResizedCropSoft\", \"RandomResizedCropHard\"]``'dropout': ``[0.1, 0.3, 0.5]``# Global Pool Options: avgmax -- sum of AVG and MAX poolings catavgmax -- concatenation of AVG and MAX poolings https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/layers/adaptive_avgmax_pool.py ``'global_pool': ['avg', 'avgmax', 'catavgmax']``# Regression: No MixUp and CutMix: ``'mixup': [0.0]`` ``'cutmix': [0.0]``# Classification: Beta distribution coeff to generate weights for MixUp: ``'mixup': [0.0, 0.4, 1.0, 3.0]`` ``'cutmix': [0.0, 0.4, 1.0, 3.0]``# Optimization Options:``'epochs_per_stage': [5, 10, 15]`` # from 40 to 135 epochs``'warmup_epochs': [0, 0.5, 1]````'optimizer': [\"AdamW\", \"SGD\"]````'learning_rate': [1e-3, 3e-4, 1e-4]``" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "image_auto_arch", + "output": "image auto arch refers to Nominally, the accuracy dial controls the architectures considered if this is left empty, but one can choose specific ones. The options in the list are ordered by complexity." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "image_auto_arch", + "output": "image auto arch refers to Architectures for image auto: Nominally, the accuracy dial controls the architectures considered if this is left empty, but one can choose specific ones. The options in the list are ordered by complexity." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "image auto arch", + "output": "image auto arch refers to Architectures for image auto: Nominally, the accuracy dial controls the architectures considered if this is left empty, but one can choose specific ones. The options in the list are ordered by complexity." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Architectures for image auto: ", + "output": "image auto arch refers to Architectures for image auto: Nominally, the accuracy dial controls the architectures considered if this is left empty, but one can choose specific ones. The options in the list are ordered by complexity." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting image_auto_arch", + "output": "image auto arch refers to Nominally, the accuracy dial controls the architectures considered if this is left empty, but one can choose specific ones. The options in the list are ordered by complexity." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting image_auto_arch", + "output": "image auto arch refers to Architectures for image auto: Nominally, the accuracy dial controls the architectures considered if this is left empty, but one can choose specific ones. The options in the list are ordered by complexity." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "image_auto_min_shape", + "output": "image auto min shape refers to Any images smaller are upscaled to the minimum. Default is 64, but can be as small as 32 given the pooling layers used." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "image_auto_min_shape", + "output": "image auto min shape refers to Minimum image size: Any images smaller are upscaled to the minimum. Default is 64, but can be as small as 32 given the pooling layers used." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "image auto min shape", + "output": "image auto min shape refers to Minimum image size: Any images smaller are upscaled to the minimum. Default is 64, but can be as small as 32 given the pooling layers used." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Minimum image size: ", + "output": "image auto min shape refers to Minimum image size: Any images smaller are upscaled to the minimum. Default is 64, but can be as small as 32 given the pooling layers used." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting image_auto_min_shape", + "output": "image auto min shape refers to Any images smaller are upscaled to the minimum. Default is 64, but can be as small as 32 given the pooling layers used." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting image_auto_min_shape", + "output": "image auto min shape refers to Minimum image size: Any images smaller are upscaled to the minimum. Default is 64, but can be as small as 32 given the pooling layers used." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "image_auto_num_final_models", + "output": "image auto num final models refers to 0 means automatic based upon time dial of min(1, time//2)." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "image_auto_num_final_models", + "output": "image auto num final models refers to Number of models in final ensemble: 0 means automatic based upon time dial of min(1, time//2)." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "image auto num final models", + "output": "image auto num final models refers to Number of models in final ensemble: 0 means automatic based upon time dial of min(1, time//2)." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Number of models in final ensemble: ", + "output": "image auto num final models refers to Number of models in final ensemble: 0 means automatic based upon time dial of min(1, time//2)." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting image_auto_num_final_models", + "output": "image auto num final models refers to 0 means automatic based upon time dial of min(1, time//2)." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting image_auto_num_final_models", + "output": "image auto num final models refers to Number of models in final ensemble: 0 means automatic based upon time dial of min(1, time//2)." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "image_auto_num_models", + "output": "image auto num models refers to 0 means automatic based upon time dial of max(4 * (time - 1), 2)." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "image_auto_num_models", + "output": "image auto num models refers to Number of models in search space: 0 means automatic based upon time dial of max(4 * (time - 1), 2)." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "image auto num models", + "output": "image auto num models refers to Number of models in search space: 0 means automatic based upon time dial of max(4 * (time - 1), 2)." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Number of models in search space: ", + "output": "image auto num models refers to Number of models in search space: 0 means automatic based upon time dial of max(4 * (time - 1), 2)." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting image_auto_num_models", + "output": "image auto num models refers to 0 means automatic based upon time dial of max(4 * (time - 1), 2)." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting image_auto_num_models", + "output": "image auto num models refers to Number of models in search space: 0 means automatic based upon time dial of max(4 * (time - 1), 2)." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "image_auto_num_stages", + "output": "image auto num stages refers to 0 means automatic based upon time dial of time + 1 if time < 6 else time - 1." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "image_auto_num_stages", + "output": "image auto num stages refers to Number of stages for hyperparameter search: 0 means automatic based upon time dial of time + 1 if time < 6 else time - 1." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "image auto num stages", + "output": "image auto num stages refers to Number of stages for hyperparameter search: 0 means automatic based upon time dial of time + 1 if time < 6 else time - 1." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Number of stages for hyperparameter search: ", + "output": "image auto num stages refers to Number of stages for hyperparameter search: 0 means automatic based upon time dial of time + 1 if time < 6 else time - 1." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting image_auto_num_stages", + "output": "image auto num stages refers to 0 means automatic based upon time dial of time + 1 if time < 6 else time - 1." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting image_auto_num_stages", + "output": "image auto num stages refers to Number of stages for hyperparameter search: 0 means automatic based upon time dial of time + 1 if time < 6 else time - 1." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "image_auto_iterations", + "output": "image auto iterations refers to 0 means automatic based upon time dial or number of models and stages set by image_auto_num_models and image_auto_num_stages." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "image_auto_iterations", + "output": "image auto iterations refers to Number of iterations for successive halving: 0 means automatic based upon time dial or number of models and stages set by image_auto_num_models and image_auto_num_stages." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "image auto iterations", + "output": "image auto iterations refers to Number of iterations for successive halving: 0 means automatic based upon time dial or number of models and stages set by image_auto_num_models and image_auto_num_stages." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Number of iterations for successive halving: ", + "output": "image auto iterations refers to Number of iterations for successive halving: 0 means automatic based upon time dial or number of models and stages set by image_auto_num_models and image_auto_num_stages." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting image_auto_iterations", + "output": "image auto iterations refers to 0 means automatic based upon time dial or number of models and stages set by image_auto_num_models and image_auto_num_stages." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting image_auto_iterations", + "output": "image auto iterations refers to Number of iterations for successive halving: 0 means automatic based upon time dial or number of models and stages set by image_auto_num_models and image_auto_num_stages." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "image_auto_shape_factor", + "output": "image auto shape factor refers to 0.0 means automatic based upon the current stage, where stage 0 uses half, stage 1 uses 3/4, and stage 2 uses full image. One can pass 1.0 to override and always use full image. 0.5 would mean use half." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "image_auto_shape_factor", + "output": "image auto shape factor refers to Image downscale ratio to use for training: 0.0 means automatic based upon the current stage, where stage 0 uses half, stage 1 uses 3/4, and stage 2 uses full image. One can pass 1.0 to override and always use full image. 0.5 would mean use half." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "image auto shape factor", + "output": "image auto shape factor refers to Image downscale ratio to use for training: 0.0 means automatic based upon the current stage, where stage 0 uses half, stage 1 uses 3/4, and stage 2 uses full image. One can pass 1.0 to override and always use full image. 0.5 would mean use half." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Image downscale ratio to use for training: ", + "output": "image auto shape factor refers to Image downscale ratio to use for training: 0.0 means automatic based upon the current stage, where stage 0 uses half, stage 1 uses 3/4, and stage 2 uses full image. One can pass 1.0 to override and always use full image. 0.5 would mean use half." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting image_auto_shape_factor", + "output": "image auto shape factor refers to 0.0 means automatic based upon the current stage, where stage 0 uses half, stage 1 uses 3/4, and stage 2 uses full image. One can pass 1.0 to override and always use full image. 0.5 would mean use half." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting image_auto_shape_factor", + "output": "image auto shape factor refers to Image downscale ratio to use for training: 0.0 means automatic based upon the current stage, where stage 0 uses half, stage 1 uses 3/4, and stage 2 uses full image. One can pass 1.0 to override and always use full image. 0.5 would mean use half." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max_image_auto_ddp_cores", + "output": "max image auto ddp cores refers to Control maximum number of cores to use for image auto model parallel data management. 0 will disable mp: https://pytorch-lightning.readthedocs.io/en/latest/guides/speed.html" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max_image_auto_ddp_cores", + "output": "max image auto ddp cores refers to Maximum number of cores to use for image auto model parallel data management: Control maximum number of cores to use for image auto model parallel data management. 0 will disable mp: https://pytorch-lightning.readthedocs.io/en/latest/guides/speed.html" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max image auto ddp cores", + "output": "max image auto ddp cores refers to Maximum number of cores to use for image auto model parallel data management: Control maximum number of cores to use for image auto model parallel data management. 0 will disable mp: https://pytorch-lightning.readthedocs.io/en/latest/guides/speed.html" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Maximum number of cores to use for image auto model parallel data management: ", + "output": "max image auto ddp cores refers to Maximum number of cores to use for image auto model parallel data management: Control maximum number of cores to use for image auto model parallel data management. 0 will disable mp: https://pytorch-lightning.readthedocs.io/en/latest/guides/speed.html" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting max_image_auto_ddp_cores", + "output": "max image auto ddp cores refers to Control maximum number of cores to use for image auto model parallel data management. 0 will disable mp: https://pytorch-lightning.readthedocs.io/en/latest/guides/speed.html" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting max_image_auto_ddp_cores", + "output": "max image auto ddp cores refers to Maximum number of cores to use for image auto model parallel data management: Control maximum number of cores to use for image auto model parallel data management. 0 will disable mp: https://pytorch-lightning.readthedocs.io/en/latest/guides/speed.html" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "text_dl_token_pad_percentile", + "output": "text dl token pad percentile refers to Percentile value cutoff of input text token lengths for nlp deep learning models" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "text_dl_token_pad_percentile", + "output": "text dl token pad percentile refers to Percentile value cutoff of input text token lengths for nlp deep learning models" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "text dl token pad percentile", + "output": "text dl token pad percentile refers to Percentile value cutoff of input text token lengths for nlp deep learning models" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "text dl token pad percentile refers to Percentile value cutoff of input text token lengths for nlp deep learning models" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting text_dl_token_pad_percentile", + "output": "text dl token pad percentile refers to Percentile value cutoff of input text token lengths for nlp deep learning models" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting text_dl_token_pad_percentile", + "output": "text dl token pad percentile refers to Percentile value cutoff of input text token lengths for nlp deep learning models" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "text_dl_token_pad_max", + "output": "text dl token pad max refers to Maximum token length of input text to be used in nlp deep learning models" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "text_dl_token_pad_max", + "output": "text dl token pad max refers to Maximum token length of input text to be used in nlp deep learning models" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "text dl token pad max", + "output": "text dl token pad max refers to Maximum token length of input text to be used in nlp deep learning models" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "text dl token pad max refers to Maximum token length of input text to be used in nlp deep learning models" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting text_dl_token_pad_max", + "output": "text dl token pad max refers to Maximum token length of input text to be used in nlp deep learning models" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting text_dl_token_pad_max", + "output": "text dl token pad max refers to Maximum token length of input text to be used in nlp deep learning models" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "monotonicity_constraints_interpretability_switch", + "output": "monotonicity constraints interpretability switch refers to Interpretability setting equal and above which will use automatic monotonicity constraints inXGBoostGBM/LightGBM/DecisionTree models. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "monotonicity_constraints_interpretability_switch", + "output": "monotonicity constraints interpretability switch refers to Threshold for interpretability above which to enable automatic monotonicity constraints for tree models: Interpretability setting equal and above which will use automatic monotonicity constraints inXGBoostGBM/LightGBM/DecisionTree models. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "monotonicity constraints interpretability switch", + "output": "monotonicity constraints interpretability switch refers to Threshold for interpretability above which to enable automatic monotonicity constraints for tree models: Interpretability setting equal and above which will use automatic monotonicity constraints inXGBoostGBM/LightGBM/DecisionTree models. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Threshold for interpretability above which to enable automatic monotonicity constraints for tree models: ", + "output": "monotonicity constraints interpretability switch refers to Threshold for interpretability above which to enable automatic monotonicity constraints for tree models: Interpretability setting equal and above which will use automatic monotonicity constraints inXGBoostGBM/LightGBM/DecisionTree models. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting monotonicity_constraints_interpretability_switch", + "output": "monotonicity constraints interpretability switch refers to Interpretability setting equal and above which will use automatic monotonicity constraints inXGBoostGBM/LightGBM/DecisionTree models. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting monotonicity_constraints_interpretability_switch", + "output": "monotonicity constraints interpretability switch refers to Threshold for interpretability above which to enable automatic monotonicity constraints for tree models: Interpretability setting equal and above which will use automatic monotonicity constraints inXGBoostGBM/LightGBM/DecisionTree models. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "monotonicity_constraints_log_level", + "output": "monotonicity constraints log level refers to For models that support monotonicity constraints, and if enabled, show automatically determined monotonicity constraints for each feature going into the model based on its correlation with the target. 'low' shows only monotonicity constraint direction. 'medium' shows correlation of positively and negatively constraint features. 'high' shows all correlation values." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "monotonicity_constraints_log_level", + "output": "monotonicity constraints log level refers to Control amount of logging when calculating automatic monotonicity constraints (if enabled): For models that support monotonicity constraints, and if enabled, show automatically determined monotonicity constraints for each feature going into the model based on its correlation with the target. 'low' shows only monotonicity constraint direction. 'medium' shows correlation of positively and negatively constraint features. 'high' shows all correlation values." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "monotonicity constraints log level", + "output": "monotonicity constraints log level refers to Control amount of logging when calculating automatic monotonicity constraints (if enabled): For models that support monotonicity constraints, and if enabled, show automatically determined monotonicity constraints for each feature going into the model based on its correlation with the target. 'low' shows only monotonicity constraint direction. 'medium' shows correlation of positively and negatively constraint features. 'high' shows all correlation values." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Control amount of logging when calculating automatic monotonicity constraints (if enabled): ", + "output": "monotonicity constraints log level refers to Control amount of logging when calculating automatic monotonicity constraints (if enabled): For models that support monotonicity constraints, and if enabled, show automatically determined monotonicity constraints for each feature going into the model based on its correlation with the target. 'low' shows only monotonicity constraint direction. 'medium' shows correlation of positively and negatively constraint features. 'high' shows all correlation values." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting monotonicity_constraints_log_level", + "output": "monotonicity constraints log level refers to For models that support monotonicity constraints, and if enabled, show automatically determined monotonicity constraints for each feature going into the model based on its correlation with the target. 'low' shows only monotonicity constraint direction. 'medium' shows correlation of positively and negatively constraint features. 'high' shows all correlation values." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting monotonicity_constraints_log_level", + "output": "monotonicity constraints log level refers to Control amount of logging when calculating automatic monotonicity constraints (if enabled): For models that support monotonicity constraints, and if enabled, show automatically determined monotonicity constraints for each feature going into the model based on its correlation with the target. 'low' shows only monotonicity constraint direction. 'medium' shows correlation of positively and negatively constraint features. 'high' shows all correlation values." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "monotonicity_constraints_correlation_threshold", + "output": "monotonicity constraints correlation threshold refers to Threshold, of Pearson product-moment correlation coefficient between numerical or encoded transformedfeature and target, above (below negative for) which will enforce positive (negative) monotonicityfor XGBoostGBM, LightGBM and DecisionTree models.Enabled when interpretability >= monotonicity_constraints_interpretability_switch config toml value.Only if monotonicity_constraints_dict is not provided. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "monotonicity_constraints_correlation_threshold", + "output": "monotonicity constraints correlation threshold refers to Correlation beyond which triggers monotonicity constraints (if enabled): Threshold, of Pearson product-moment correlation coefficient between numerical or encoded transformedfeature and target, above (below negative for) which will enforce positive (negative) monotonicityfor XGBoostGBM, LightGBM and DecisionTree models.Enabled when interpretability >= monotonicity_constraints_interpretability_switch config toml value.Only if monotonicity_constraints_dict is not provided. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "monotonicity constraints correlation threshold", + "output": "monotonicity constraints correlation threshold refers to Correlation beyond which triggers monotonicity constraints (if enabled): Threshold, of Pearson product-moment correlation coefficient between numerical or encoded transformedfeature and target, above (below negative for) which will enforce positive (negative) monotonicityfor XGBoostGBM, LightGBM and DecisionTree models.Enabled when interpretability >= monotonicity_constraints_interpretability_switch config toml value.Only if monotonicity_constraints_dict is not provided. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Correlation beyond which triggers monotonicity constraints (if enabled): ", + "output": "monotonicity constraints correlation threshold refers to Correlation beyond which triggers monotonicity constraints (if enabled): Threshold, of Pearson product-moment correlation coefficient between numerical or encoded transformedfeature and target, above (below negative for) which will enforce positive (negative) monotonicityfor XGBoostGBM, LightGBM and DecisionTree models.Enabled when interpretability >= monotonicity_constraints_interpretability_switch config toml value.Only if monotonicity_constraints_dict is not provided. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting monotonicity_constraints_correlation_threshold", + "output": "monotonicity constraints correlation threshold refers to Threshold, of Pearson product-moment correlation coefficient between numerical or encoded transformedfeature and target, above (below negative for) which will enforce positive (negative) monotonicityfor XGBoostGBM, LightGBM and DecisionTree models.Enabled when interpretability >= monotonicity_constraints_interpretability_switch config toml value.Only if monotonicity_constraints_dict is not provided. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting monotonicity_constraints_correlation_threshold", + "output": "monotonicity constraints correlation threshold refers to Correlation beyond which triggers monotonicity constraints (if enabled): Threshold, of Pearson product-moment correlation coefficient between numerical or encoded transformedfeature and target, above (below negative for) which will enforce positive (negative) monotonicityfor XGBoostGBM, LightGBM and DecisionTree models.Enabled when interpretability >= monotonicity_constraints_interpretability_switch config toml value.Only if monotonicity_constraints_dict is not provided. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "monotonicity_constraints_drop_low_correlation_features", + "output": "monotonicity constraints drop low correlation features refers to If enabled, only monotonic features with +1/-1 constraints will be passed to the model(s), and featureswithout monotonicity constraints (0, as set by monotonicity_constraints_dict or determined automatically)will be dropped. Otherwise all features will be in the model.Only active when interpretability >= monotonicity_constraints_interpretability_switch ormonotonicity_constraints_dict is provided. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "monotonicity_constraints_drop_low_correlation_features", + "output": "monotonicity constraints drop low correlation features refers to Whether to drop features that have no monotonicity constraint applied (e.g., due to low correlation with target).: If enabled, only monotonic features with +1/-1 constraints will be passed to the model(s), and featureswithout monotonicity constraints (0, as set by monotonicity_constraints_dict or determined automatically)will be dropped. Otherwise all features will be in the model.Only active when interpretability >= monotonicity_constraints_interpretability_switch ormonotonicity_constraints_dict is provided. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "monotonicity constraints drop low correlation features", + "output": "monotonicity constraints drop low correlation features refers to Whether to drop features that have no monotonicity constraint applied (e.g., due to low correlation with target).: If enabled, only monotonic features with +1/-1 constraints will be passed to the model(s), and featureswithout monotonicity constraints (0, as set by monotonicity_constraints_dict or determined automatically)will be dropped. Otherwise all features will be in the model.Only active when interpretability >= monotonicity_constraints_interpretability_switch ormonotonicity_constraints_dict is provided. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Whether to drop features that have no monotonicity constraint applied (e.g., due to low correlation with target).: ", + "output": "monotonicity constraints drop low correlation features refers to Whether to drop features that have no monotonicity constraint applied (e.g., due to low correlation with target).: If enabled, only monotonic features with +1/-1 constraints will be passed to the model(s), and featureswithout monotonicity constraints (0, as set by monotonicity_constraints_dict or determined automatically)will be dropped. Otherwise all features will be in the model.Only active when interpretability >= monotonicity_constraints_interpretability_switch ormonotonicity_constraints_dict is provided. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting monotonicity_constraints_drop_low_correlation_features", + "output": "monotonicity constraints drop low correlation features refers to If enabled, only monotonic features with +1/-1 constraints will be passed to the model(s), and featureswithout monotonicity constraints (0, as set by monotonicity_constraints_dict or determined automatically)will be dropped. Otherwise all features will be in the model.Only active when interpretability >= monotonicity_constraints_interpretability_switch ormonotonicity_constraints_dict is provided. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting monotonicity_constraints_drop_low_correlation_features", + "output": "monotonicity constraints drop low correlation features refers to Whether to drop features that have no monotonicity constraint applied (e.g., due to low correlation with target).: If enabled, only monotonic features with +1/-1 constraints will be passed to the model(s), and featureswithout monotonicity constraints (0, as set by monotonicity_constraints_dict or determined automatically)will be dropped. Otherwise all features will be in the model.Only active when interpretability >= monotonicity_constraints_interpretability_switch ormonotonicity_constraints_dict is provided. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "monotonicity_constraints_dict", + "output": "monotonicity constraints dict refers to Manual override for monotonicity constraints. Mapping of original numeric features to desired constraint(1 for pos, -1 for neg, or 0 to disable. True can be set for automatic handling, False is same as 0).Features that are not listed here will be treated automatically,and so get no constraint (i.e., 0) if interpretability < monotonicity_constraints_interpretability_switchand otherwise the constraint is automatically determined from the correlation between each feature and the target.Example: {'PAY_0': -1, 'PAY_2': -1, 'AGE': -1, 'BILL_AMT1': 1, 'PAY_AMT1': -1} " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "monotonicity_constraints_dict", + "output": "monotonicity constraints dict refers to Manual override for monotonicity constraints: Manual override for monotonicity constraints. Mapping of original numeric features to desired constraint(1 for pos, -1 for neg, or 0 to disable. True can be set for automatic handling, False is same as 0).Features that are not listed here will be treated automatically,and so get no constraint (i.e., 0) if interpretability < monotonicity_constraints_interpretability_switchand otherwise the constraint is automatically determined from the correlation between each feature and the target.Example: {'PAY_0': -1, 'PAY_2': -1, 'AGE': -1, 'BILL_AMT1': 1, 'PAY_AMT1': -1} " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "monotonicity constraints dict", + "output": "monotonicity constraints dict refers to Manual override for monotonicity constraints: Manual override for monotonicity constraints. Mapping of original numeric features to desired constraint(1 for pos, -1 for neg, or 0 to disable. True can be set for automatic handling, False is same as 0).Features that are not listed here will be treated automatically,and so get no constraint (i.e., 0) if interpretability < monotonicity_constraints_interpretability_switchand otherwise the constraint is automatically determined from the correlation between each feature and the target.Example: {'PAY_0': -1, 'PAY_2': -1, 'AGE': -1, 'BILL_AMT1': 1, 'PAY_AMT1': -1} " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Manual override for monotonicity constraints: ", + "output": "monotonicity constraints dict refers to Manual override for monotonicity constraints: Manual override for monotonicity constraints. Mapping of original numeric features to desired constraint(1 for pos, -1 for neg, or 0 to disable. True can be set for automatic handling, False is same as 0).Features that are not listed here will be treated automatically,and so get no constraint (i.e., 0) if interpretability < monotonicity_constraints_interpretability_switchand otherwise the constraint is automatically determined from the correlation between each feature and the target.Example: {'PAY_0': -1, 'PAY_2': -1, 'AGE': -1, 'BILL_AMT1': 1, 'PAY_AMT1': -1} " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting monotonicity_constraints_dict", + "output": "monotonicity constraints dict refers to Manual override for monotonicity constraints. Mapping of original numeric features to desired constraint(1 for pos, -1 for neg, or 0 to disable. True can be set for automatic handling, False is same as 0).Features that are not listed here will be treated automatically,and so get no constraint (i.e., 0) if interpretability < monotonicity_constraints_interpretability_switchand otherwise the constraint is automatically determined from the correlation between each feature and the target.Example: {'PAY_0': -1, 'PAY_2': -1, 'AGE': -1, 'BILL_AMT1': 1, 'PAY_AMT1': -1} " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting monotonicity_constraints_dict", + "output": "monotonicity constraints dict refers to Manual override for monotonicity constraints: Manual override for monotonicity constraints. Mapping of original numeric features to desired constraint(1 for pos, -1 for neg, or 0 to disable. True can be set for automatic handling, False is same as 0).Features that are not listed here will be treated automatically,and so get no constraint (i.e., 0) if interpretability < monotonicity_constraints_interpretability_switchand otherwise the constraint is automatically determined from the correlation between each feature and the target.Example: {'PAY_0': -1, 'PAY_2': -1, 'AGE': -1, 'BILL_AMT1': 1, 'PAY_AMT1': -1} " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max_feature_interaction_depth", + "output": "max feature interaction depth refers to Exploring feature interactions can be important in gaining better predictive performance.The interaction can take multiple forms (i.e. feature1 + feature2 or feature1 * feature2 + ... featureN)Although certain machine learning algorithms (like tree-based methods) can do well incapturing these interactions as part of their training process, still generating them mayhelp them (or other algorithms) yield better performance.The depth of the interaction level (as in \"up to\" how many features may be combined atonce to create one single feature) can be specified to control the complexity of thefeature engineering process. For transformers that use both numeric and categorical features, this constrainsthe number of each type, not the total number. Higher values might be able to make more predictive modelsat the expense of time (-1 means automatic). " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max_feature_interaction_depth", + "output": "max feature interaction depth refers to Max. feature interaction depth: Exploring feature interactions can be important in gaining better predictive performance.The interaction can take multiple forms (i.e. feature1 + feature2 or feature1 * feature2 + ... featureN)Although certain machine learning algorithms (like tree-based methods) can do well incapturing these interactions as part of their training process, still generating them mayhelp them (or other algorithms) yield better performance.The depth of the interaction level (as in \"up to\" how many features may be combined atonce to create one single feature) can be specified to control the complexity of thefeature engineering process. For transformers that use both numeric and categorical features, this constrainsthe number of each type, not the total number. Higher values might be able to make more predictive modelsat the expense of time (-1 means automatic). " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max feature interaction depth", + "output": "max feature interaction depth refers to Max. feature interaction depth: Exploring feature interactions can be important in gaining better predictive performance.The interaction can take multiple forms (i.e. feature1 + feature2 or feature1 * feature2 + ... featureN)Although certain machine learning algorithms (like tree-based methods) can do well incapturing these interactions as part of their training process, still generating them mayhelp them (or other algorithms) yield better performance.The depth of the interaction level (as in \"up to\" how many features may be combined atonce to create one single feature) can be specified to control the complexity of thefeature engineering process. For transformers that use both numeric and categorical features, this constrainsthe number of each type, not the total number. Higher values might be able to make more predictive modelsat the expense of time (-1 means automatic). " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Max. feature interaction depth: ", + "output": "max feature interaction depth refers to Max. feature interaction depth: Exploring feature interactions can be important in gaining better predictive performance.The interaction can take multiple forms (i.e. feature1 + feature2 or feature1 * feature2 + ... featureN)Although certain machine learning algorithms (like tree-based methods) can do well incapturing these interactions as part of their training process, still generating them mayhelp them (or other algorithms) yield better performance.The depth of the interaction level (as in \"up to\" how many features may be combined atonce to create one single feature) can be specified to control the complexity of thefeature engineering process. For transformers that use both numeric and categorical features, this constrainsthe number of each type, not the total number. Higher values might be able to make more predictive modelsat the expense of time (-1 means automatic). " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting max_feature_interaction_depth", + "output": "max feature interaction depth refers to Exploring feature interactions can be important in gaining better predictive performance.The interaction can take multiple forms (i.e. feature1 + feature2 or feature1 * feature2 + ... featureN)Although certain machine learning algorithms (like tree-based methods) can do well incapturing these interactions as part of their training process, still generating them mayhelp them (or other algorithms) yield better performance.The depth of the interaction level (as in \"up to\" how many features may be combined atonce to create one single feature) can be specified to control the complexity of thefeature engineering process. For transformers that use both numeric and categorical features, this constrainsthe number of each type, not the total number. Higher values might be able to make more predictive modelsat the expense of time (-1 means automatic). " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting max_feature_interaction_depth", + "output": "max feature interaction depth refers to Max. feature interaction depth: Exploring feature interactions can be important in gaining better predictive performance.The interaction can take multiple forms (i.e. feature1 + feature2 or feature1 * feature2 + ... featureN)Although certain machine learning algorithms (like tree-based methods) can do well incapturing these interactions as part of their training process, still generating them mayhelp them (or other algorithms) yield better performance.The depth of the interaction level (as in \"up to\" how many features may be combined atonce to create one single feature) can be specified to control the complexity of thefeature engineering process. For transformers that use both numeric and categorical features, this constrainsthe number of each type, not the total number. Higher values might be able to make more predictive modelsat the expense of time (-1 means automatic). " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "fixed_feature_interaction_depth", + "output": "fixed feature interaction depth refers to Instead of sampling from min to max (up to max_feature_interaction_depth unless all specified)columns allowed for each transformer (0), choose fixed non-zero number of columns to use.Can make same as number of columns to use all columns for each transformers if allowed by each transformer.-n can be chosen to do 50/50 sample and fixed of n features. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "fixed_feature_interaction_depth", + "output": "fixed feature interaction depth refers to Fixed feature interaction depth: Instead of sampling from min to max (up to max_feature_interaction_depth unless all specified)columns allowed for each transformer (0), choose fixed non-zero number of columns to use.Can make same as number of columns to use all columns for each transformers if allowed by each transformer.-n can be chosen to do 50/50 sample and fixed of n features. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "fixed feature interaction depth", + "output": "fixed feature interaction depth refers to Fixed feature interaction depth: Instead of sampling from min to max (up to max_feature_interaction_depth unless all specified)columns allowed for each transformer (0), choose fixed non-zero number of columns to use.Can make same as number of columns to use all columns for each transformers if allowed by each transformer.-n can be chosen to do 50/50 sample and fixed of n features. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Fixed feature interaction depth: ", + "output": "fixed feature interaction depth refers to Fixed feature interaction depth: Instead of sampling from min to max (up to max_feature_interaction_depth unless all specified)columns allowed for each transformer (0), choose fixed non-zero number of columns to use.Can make same as number of columns to use all columns for each transformers if allowed by each transformer.-n can be chosen to do 50/50 sample and fixed of n features. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting fixed_feature_interaction_depth", + "output": "fixed feature interaction depth refers to Instead of sampling from min to max (up to max_feature_interaction_depth unless all specified)columns allowed for each transformer (0), choose fixed non-zero number of columns to use.Can make same as number of columns to use all columns for each transformers if allowed by each transformer.-n can be chosen to do 50/50 sample and fixed of n features. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting fixed_feature_interaction_depth", + "output": "fixed feature interaction depth refers to Fixed feature interaction depth: Instead of sampling from min to max (up to max_feature_interaction_depth unless all specified)columns allowed for each transformer (0), choose fixed non-zero number of columns to use.Can make same as number of columns to use all columns for each transformers if allowed by each transformer.-n can be chosen to do 50/50 sample and fixed of n features. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "tune_parameters_accuracy_switch", + "output": "tune parameters accuracy switch refers to Accuracy setting equal and above which enables tuning of model parameters Only applicable if parameter_tuning_num_models=-1 (auto)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "tune_parameters_accuracy_switch", + "output": "tune parameters accuracy switch refers to Accuracy setting equal and above which enables tuning of model parameters Only applicable if parameter_tuning_num_models=-1 (auto)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "tune parameters accuracy switch", + "output": "tune parameters accuracy switch refers to Accuracy setting equal and above which enables tuning of model parameters Only applicable if parameter_tuning_num_models=-1 (auto)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "tune parameters accuracy switch refers to Accuracy setting equal and above which enables tuning of model parameters Only applicable if parameter_tuning_num_models=-1 (auto)" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting tune_parameters_accuracy_switch", + "output": "tune parameters accuracy switch refers to Accuracy setting equal and above which enables tuning of model parameters Only applicable if parameter_tuning_num_models=-1 (auto)" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting tune_parameters_accuracy_switch", + "output": "tune parameters accuracy switch refers to Accuracy setting equal and above which enables tuning of model parameters Only applicable if parameter_tuning_num_models=-1 (auto)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "tune_target_transform_accuracy_switch", + "output": "tune target transform accuracy switch refers to Accuracy setting equal and above which enables tuning of target transform for regression. This is useful for time series when instead of predicting the actual target value, it might be better to predict a transformed target variable like sqrt(target) or log(target) as a means to control for outliers." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "tune_target_transform_accuracy_switch", + "output": "tune target transform accuracy switch refers to Accuracy setting equal and above which enables tuning of target transform for regression. This is useful for time series when instead of predicting the actual target value, it might be better to predict a transformed target variable like sqrt(target) or log(target) as a means to control for outliers." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "tune target transform accuracy switch", + "output": "tune target transform accuracy switch refers to Accuracy setting equal and above which enables tuning of target transform for regression. This is useful for time series when instead of predicting the actual target value, it might be better to predict a transformed target variable like sqrt(target) or log(target) as a means to control for outliers." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "tune target transform accuracy switch refers to Accuracy setting equal and above which enables tuning of target transform for regression. This is useful for time series when instead of predicting the actual target value, it might be better to predict a transformed target variable like sqrt(target) or log(target) as a means to control for outliers." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting tune_target_transform_accuracy_switch", + "output": "tune target transform accuracy switch refers to Accuracy setting equal and above which enables tuning of target transform for regression. This is useful for time series when instead of predicting the actual target value, it might be better to predict a transformed target variable like sqrt(target) or log(target) as a means to control for outliers." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting tune_target_transform_accuracy_switch", + "output": "tune target transform accuracy switch refers to Accuracy setting equal and above which enables tuning of target transform for regression. This is useful for time series when instead of predicting the actual target value, it might be better to predict a transformed target variable like sqrt(target) or log(target) as a means to control for outliers." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "target_transformer", + "output": "target transformer refers to Select a target transformation for regression problems. Must be one of: ['auto','identity', 'identity_noclip', 'center', 'standardize', 'unit_box', 'log', 'log_noclip', 'square','sqrt', 'double_sqrt', 'inverse', 'anscombe', 'logit', 'sigmoid'].If set to 'auto', will automatically pick the best target transformer (if accuracy is set totune_target_transform_accuracy_switch or larger, considering interpretability level of each target transformer),otherwise will fall back to 'identity_noclip' (easiest to interpret, Shapley values are in original space, etc.).All transformers except for 'center', 'standardize', 'identity_noclip' and 'log_noclip' perform clippingto constrain the predictions to the domain of the target in the training data. Use 'center', 'standardize','identity_noclip' or 'log_noclip' to disable clipping and to allow predictions outside of the target domain observed inthe training data (for parametric models or custom models that support extrapolation). " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "target_transformer", + "output": "target transformer refers to Select target transformation of the target for regression problems: Select a target transformation for regression problems. Must be one of: ['auto','identity', 'identity_noclip', 'center', 'standardize', 'unit_box', 'log', 'log_noclip', 'square','sqrt', 'double_sqrt', 'inverse', 'anscombe', 'logit', 'sigmoid'].If set to 'auto', will automatically pick the best target transformer (if accuracy is set totune_target_transform_accuracy_switch or larger, considering interpretability level of each target transformer),otherwise will fall back to 'identity_noclip' (easiest to interpret, Shapley values are in original space, etc.).All transformers except for 'center', 'standardize', 'identity_noclip' and 'log_noclip' perform clippingto constrain the predictions to the domain of the target in the training data. Use 'center', 'standardize','identity_noclip' or 'log_noclip' to disable clipping and to allow predictions outside of the target domain observed inthe training data (for parametric models or custom models that support extrapolation). " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "target transformer", + "output": "target transformer refers to Select target transformation of the target for regression problems: Select a target transformation for regression problems. Must be one of: ['auto','identity', 'identity_noclip', 'center', 'standardize', 'unit_box', 'log', 'log_noclip', 'square','sqrt', 'double_sqrt', 'inverse', 'anscombe', 'logit', 'sigmoid'].If set to 'auto', will automatically pick the best target transformer (if accuracy is set totune_target_transform_accuracy_switch or larger, considering interpretability level of each target transformer),otherwise will fall back to 'identity_noclip' (easiest to interpret, Shapley values are in original space, etc.).All transformers except for 'center', 'standardize', 'identity_noclip' and 'log_noclip' perform clippingto constrain the predictions to the domain of the target in the training data. Use 'center', 'standardize','identity_noclip' or 'log_noclip' to disable clipping and to allow predictions outside of the target domain observed inthe training data (for parametric models or custom models that support extrapolation). " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Select target transformation of the target for regression problems: ", + "output": "target transformer refers to Select target transformation of the target for regression problems: Select a target transformation for regression problems. Must be one of: ['auto','identity', 'identity_noclip', 'center', 'standardize', 'unit_box', 'log', 'log_noclip', 'square','sqrt', 'double_sqrt', 'inverse', 'anscombe', 'logit', 'sigmoid'].If set to 'auto', will automatically pick the best target transformer (if accuracy is set totune_target_transform_accuracy_switch or larger, considering interpretability level of each target transformer),otherwise will fall back to 'identity_noclip' (easiest to interpret, Shapley values are in original space, etc.).All transformers except for 'center', 'standardize', 'identity_noclip' and 'log_noclip' perform clippingto constrain the predictions to the domain of the target in the training data. Use 'center', 'standardize','identity_noclip' or 'log_noclip' to disable clipping and to allow predictions outside of the target domain observed inthe training data (for parametric models or custom models that support extrapolation). " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting target_transformer", + "output": "target transformer refers to Select a target transformation for regression problems. Must be one of: ['auto','identity', 'identity_noclip', 'center', 'standardize', 'unit_box', 'log', 'log_noclip', 'square','sqrt', 'double_sqrt', 'inverse', 'anscombe', 'logit', 'sigmoid'].If set to 'auto', will automatically pick the best target transformer (if accuracy is set totune_target_transform_accuracy_switch or larger, considering interpretability level of each target transformer),otherwise will fall back to 'identity_noclip' (easiest to interpret, Shapley values are in original space, etc.).All transformers except for 'center', 'standardize', 'identity_noclip' and 'log_noclip' perform clippingto constrain the predictions to the domain of the target in the training data. Use 'center', 'standardize','identity_noclip' or 'log_noclip' to disable clipping and to allow predictions outside of the target domain observed inthe training data (for parametric models or custom models that support extrapolation). " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting target_transformer", + "output": "target transformer refers to Select target transformation of the target for regression problems: Select a target transformation for regression problems. Must be one of: ['auto','identity', 'identity_noclip', 'center', 'standardize', 'unit_box', 'log', 'log_noclip', 'square','sqrt', 'double_sqrt', 'inverse', 'anscombe', 'logit', 'sigmoid'].If set to 'auto', will automatically pick the best target transformer (if accuracy is set totune_target_transform_accuracy_switch or larger, considering interpretability level of each target transformer),otherwise will fall back to 'identity_noclip' (easiest to interpret, Shapley values are in original space, etc.).All transformers except for 'center', 'standardize', 'identity_noclip' and 'log_noclip' perform clippingto constrain the predictions to the domain of the target in the training data. Use 'center', 'standardize','identity_noclip' or 'log_noclip' to disable clipping and to allow predictions outside of the target domain observed inthe training data (for parametric models or custom models that support extrapolation). " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "target_transformer_tuning_choices", + "output": "target transformer tuning choices refers to Select list of target transformers to use for tuning. Only for target_transformer='auto' and accuracy >= tune_target_transform_accuracy_switch. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "target_transformer_tuning_choices", + "output": "target transformer tuning choices refers to Select all allowed target transformations of the target for regression problems when doing target transformer tuning: Select list of target transformers to use for tuning. Only for target_transformer='auto' and accuracy >= tune_target_transform_accuracy_switch. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "target transformer tuning choices", + "output": "target transformer tuning choices refers to Select all allowed target transformations of the target for regression problems when doing target transformer tuning: Select list of target transformers to use for tuning. Only for target_transformer='auto' and accuracy >= tune_target_transform_accuracy_switch. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Select all allowed target transformations of the target for regression problems when doing target transformer tuning: ", + "output": "target transformer tuning choices refers to Select all allowed target transformations of the target for regression problems when doing target transformer tuning: Select list of target transformers to use for tuning. Only for target_transformer='auto' and accuracy >= tune_target_transform_accuracy_switch. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting target_transformer_tuning_choices", + "output": "target transformer tuning choices refers to Select list of target transformers to use for tuning. Only for target_transformer='auto' and accuracy >= tune_target_transform_accuracy_switch. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting target_transformer_tuning_choices", + "output": "target transformer tuning choices refers to Select all allowed target transformations of the target for regression problems when doing target transformer tuning: Select list of target transformers to use for tuning. Only for target_transformer='auto' and accuracy >= tune_target_transform_accuracy_switch. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "tournament_style", + "output": "tournament style refers to Tournament style (method to decide which models are best at each iteration)'auto' : Choose based upon accuracy and interpretability'uniform' : all individuals in population compete to win as best (can lead to all, e.g. LightGBM models in final ensemble, which may not improve ensemble performance due to lack of diversity)'model' : individuals with same model type compete (good if multiple models do well but some models that do not do as well still contribute to improving ensemble)'feature' : individuals with similar feature types compete (good if target encoding, frequency encoding, and other feature sets lead to good results)'fullstack' : Choose among optimal model and feature types'model' and 'feature' styles preserve at least one winner for each type (and so 2 total indivs of each type after mutation)For each case, a round robin approach is used to choose best scores among type of models to choose from.If enable_genetic_algorithm=='Optuna', then every individual is self-mutated without any tournamentduring the genetic algorithm. The tournament is only used to prune-down individuals for, e.g.,tuning -> evolution and evolution -> final model. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "tournament_style", + "output": "tournament style refers to Tournament model for genetic algorithm: Tournament style (method to decide which models are best at each iteration)'auto' : Choose based upon accuracy and interpretability'uniform' : all individuals in population compete to win as best (can lead to all, e.g. LightGBM models in final ensemble, which may not improve ensemble performance due to lack of diversity)'model' : individuals with same model type compete (good if multiple models do well but some models that do not do as well still contribute to improving ensemble)'feature' : individuals with similar feature types compete (good if target encoding, frequency encoding, and other feature sets lead to good results)'fullstack' : Choose among optimal model and feature types'model' and 'feature' styles preserve at least one winner for each type (and so 2 total indivs of each type after mutation)For each case, a round robin approach is used to choose best scores among type of models to choose from.If enable_genetic_algorithm=='Optuna', then every individual is self-mutated without any tournamentduring the genetic algorithm. The tournament is only used to prune-down individuals for, e.g.,tuning -> evolution and evolution -> final model. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "tournament style", + "output": "tournament style refers to Tournament model for genetic algorithm: Tournament style (method to decide which models are best at each iteration)'auto' : Choose based upon accuracy and interpretability'uniform' : all individuals in population compete to win as best (can lead to all, e.g. LightGBM models in final ensemble, which may not improve ensemble performance due to lack of diversity)'model' : individuals with same model type compete (good if multiple models do well but some models that do not do as well still contribute to improving ensemble)'feature' : individuals with similar feature types compete (good if target encoding, frequency encoding, and other feature sets lead to good results)'fullstack' : Choose among optimal model and feature types'model' and 'feature' styles preserve at least one winner for each type (and so 2 total indivs of each type after mutation)For each case, a round robin approach is used to choose best scores among type of models to choose from.If enable_genetic_algorithm=='Optuna', then every individual is self-mutated without any tournamentduring the genetic algorithm. The tournament is only used to prune-down individuals for, e.g.,tuning -> evolution and evolution -> final model. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Tournament model for genetic algorithm: ", + "output": "tournament style refers to Tournament model for genetic algorithm: Tournament style (method to decide which models are best at each iteration)'auto' : Choose based upon accuracy and interpretability'uniform' : all individuals in population compete to win as best (can lead to all, e.g. LightGBM models in final ensemble, which may not improve ensemble performance due to lack of diversity)'model' : individuals with same model type compete (good if multiple models do well but some models that do not do as well still contribute to improving ensemble)'feature' : individuals with similar feature types compete (good if target encoding, frequency encoding, and other feature sets lead to good results)'fullstack' : Choose among optimal model and feature types'model' and 'feature' styles preserve at least one winner for each type (and so 2 total indivs of each type after mutation)For each case, a round robin approach is used to choose best scores among type of models to choose from.If enable_genetic_algorithm=='Optuna', then every individual is self-mutated without any tournamentduring the genetic algorithm. The tournament is only used to prune-down individuals for, e.g.,tuning -> evolution and evolution -> final model. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting tournament_style", + "output": "tournament style refers to Tournament style (method to decide which models are best at each iteration)'auto' : Choose based upon accuracy and interpretability'uniform' : all individuals in population compete to win as best (can lead to all, e.g. LightGBM models in final ensemble, which may not improve ensemble performance due to lack of diversity)'model' : individuals with same model type compete (good if multiple models do well but some models that do not do as well still contribute to improving ensemble)'feature' : individuals with similar feature types compete (good if target encoding, frequency encoding, and other feature sets lead to good results)'fullstack' : Choose among optimal model and feature types'model' and 'feature' styles preserve at least one winner for each type (and so 2 total indivs of each type after mutation)For each case, a round robin approach is used to choose best scores among type of models to choose from.If enable_genetic_algorithm=='Optuna', then every individual is self-mutated without any tournamentduring the genetic algorithm. The tournament is only used to prune-down individuals for, e.g.,tuning -> evolution and evolution -> final model. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting tournament_style", + "output": "tournament style refers to Tournament model for genetic algorithm: Tournament style (method to decide which models are best at each iteration)'auto' : Choose based upon accuracy and interpretability'uniform' : all individuals in population compete to win as best (can lead to all, e.g. LightGBM models in final ensemble, which may not improve ensemble performance due to lack of diversity)'model' : individuals with same model type compete (good if multiple models do well but some models that do not do as well still contribute to improving ensemble)'feature' : individuals with similar feature types compete (good if target encoding, frequency encoding, and other feature sets lead to good results)'fullstack' : Choose among optimal model and feature types'model' and 'feature' styles preserve at least one winner for each type (and so 2 total indivs of each type after mutation)For each case, a round robin approach is used to choose best scores among type of models to choose from.If enable_genetic_algorithm=='Optuna', then every individual is self-mutated without any tournamentduring the genetic algorithm. The tournament is only used to prune-down individuals for, e.g.,tuning -> evolution and evolution -> final model. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "tournament_uniform_style_interpretability_switch", + "output": "tournament uniform style interpretability switch refers to Interpretability above which will use 'uniform' tournament style" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "tournament_uniform_style_interpretability_switch", + "output": "tournament uniform style interpretability switch refers to Interpretability above which will use 'uniform' tournament style" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "tournament uniform style interpretability switch", + "output": "tournament uniform style interpretability switch refers to Interpretability above which will use 'uniform' tournament style" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "tournament uniform style interpretability switch refers to Interpretability above which will use 'uniform' tournament style" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting tournament_uniform_style_interpretability_switch", + "output": "tournament uniform style interpretability switch refers to Interpretability above which will use 'uniform' tournament style" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting tournament_uniform_style_interpretability_switch", + "output": "tournament uniform style interpretability switch refers to Interpretability above which will use 'uniform' tournament style" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "tournament_uniform_style_accuracy_switch", + "output": "tournament uniform style accuracy switch refers to Accuracy below which will use uniform style if tournament_style = 'auto' (regardless of other accuracy tournament style switch values)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "tournament_uniform_style_accuracy_switch", + "output": "tournament uniform style accuracy switch refers to Accuracy below which will use uniform style if tournament_style = 'auto' (regardless of other accuracy tournament style switch values)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "tournament uniform style accuracy switch", + "output": "tournament uniform style accuracy switch refers to Accuracy below which will use uniform style if tournament_style = 'auto' (regardless of other accuracy tournament style switch values)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "tournament uniform style accuracy switch refers to Accuracy below which will use uniform style if tournament_style = 'auto' (regardless of other accuracy tournament style switch values)" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting tournament_uniform_style_accuracy_switch", + "output": "tournament uniform style accuracy switch refers to Accuracy below which will use uniform style if tournament_style = 'auto' (regardless of other accuracy tournament style switch values)" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting tournament_uniform_style_accuracy_switch", + "output": "tournament uniform style accuracy switch refers to Accuracy below which will use uniform style if tournament_style = 'auto' (regardless of other accuracy tournament style switch values)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "tournament_model_style_accuracy_switch", + "output": "tournament model style accuracy switch refers to Accuracy equal and above which uses model style if tournament_style = 'auto' " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "tournament_model_style_accuracy_switch", + "output": "tournament model style accuracy switch refers to Accuracy equal and above which uses model style if tournament_style = 'auto' " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "tournament model style accuracy switch", + "output": "tournament model style accuracy switch refers to Accuracy equal and above which uses model style if tournament_style = 'auto' " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "tournament model style accuracy switch refers to Accuracy equal and above which uses model style if tournament_style = 'auto' " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting tournament_model_style_accuracy_switch", + "output": "tournament model style accuracy switch refers to Accuracy equal and above which uses model style if tournament_style = 'auto' " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting tournament_model_style_accuracy_switch", + "output": "tournament model style accuracy switch refers to Accuracy equal and above which uses model style if tournament_style = 'auto' " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "tournament_feature_style_accuracy_switch", + "output": "tournament feature style accuracy switch refers to Accuracy equal and above which uses feature style if tournament_style = 'auto' " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "tournament_feature_style_accuracy_switch", + "output": "tournament feature style accuracy switch refers to Accuracy equal and above which uses feature style if tournament_style = 'auto' " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "tournament feature style accuracy switch", + "output": "tournament feature style accuracy switch refers to Accuracy equal and above which uses feature style if tournament_style = 'auto' " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "tournament feature style accuracy switch refers to Accuracy equal and above which uses feature style if tournament_style = 'auto' " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting tournament_feature_style_accuracy_switch", + "output": "tournament feature style accuracy switch refers to Accuracy equal and above which uses feature style if tournament_style = 'auto' " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting tournament_feature_style_accuracy_switch", + "output": "tournament feature style accuracy switch refers to Accuracy equal and above which uses feature style if tournament_style = 'auto' " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "tournament_fullstack_style_accuracy_switch", + "output": "tournament fullstack style accuracy switch refers to Accuracy equal and above which uses fullstack style if tournament_style = 'auto' " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "tournament_fullstack_style_accuracy_switch", + "output": "tournament fullstack style accuracy switch refers to Accuracy equal and above which uses fullstack style if tournament_style = 'auto' " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "tournament fullstack style accuracy switch", + "output": "tournament fullstack style accuracy switch refers to Accuracy equal and above which uses fullstack style if tournament_style = 'auto' " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "tournament fullstack style accuracy switch refers to Accuracy equal and above which uses fullstack style if tournament_style = 'auto' " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting tournament_fullstack_style_accuracy_switch", + "output": "tournament fullstack style accuracy switch refers to Accuracy equal and above which uses fullstack style if tournament_style = 'auto' " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting tournament_fullstack_style_accuracy_switch", + "output": "tournament fullstack style accuracy switch refers to Accuracy equal and above which uses fullstack style if tournament_style = 'auto' " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "tournament_use_feature_penalized_score", + "output": "tournament use feature penalized score refers to Whether to use penalized score for GA tournament or actual score" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "tournament_use_feature_penalized_score", + "output": "tournament use feature penalized score refers to Whether to use penalized score for GA tournament or actual score" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "tournament use feature penalized score", + "output": "tournament use feature penalized score refers to Whether to use penalized score for GA tournament or actual score" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "tournament use feature penalized score refers to Whether to use penalized score for GA tournament or actual score" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting tournament_use_feature_penalized_score", + "output": "tournament use feature penalized score refers to Whether to use penalized score for GA tournament or actual score" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting tournament_use_feature_penalized_score", + "output": "tournament use feature penalized score refers to Whether to use penalized score for GA tournament or actual score" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "tournament_keep_poor_scores_for_small_data", + "output": "tournament keep poor scores for small data refers to Whether to keep poor scores for small data (<10k rows) in case exploration will find good model. sets tournament_remove_poor_scores_before_evolution_model_factor=1.1 tournament_remove_worse_than_constant_before_evolution=false tournament_keep_absolute_ok_scores_before_evolution_model_factor=1.1 tournament_remove_poor_scores_before_final_model_factor=1.1 tournament_remove_worse_than_constant_before_final_model=true" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "tournament_keep_poor_scores_for_small_data", + "output": "tournament keep poor scores for small data refers to Whether to keep poor scores for small data (<10k rows) in case exploration will find good model. sets tournament_remove_poor_scores_before_evolution_model_factor=1.1 tournament_remove_worse_than_constant_before_evolution=false tournament_keep_absolute_ok_scores_before_evolution_model_factor=1.1 tournament_remove_poor_scores_before_final_model_factor=1.1 tournament_remove_worse_than_constant_before_final_model=true" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "tournament keep poor scores for small data", + "output": "tournament keep poor scores for small data refers to Whether to keep poor scores for small data (<10k rows) in case exploration will find good model. sets tournament_remove_poor_scores_before_evolution_model_factor=1.1 tournament_remove_worse_than_constant_before_evolution=false tournament_keep_absolute_ok_scores_before_evolution_model_factor=1.1 tournament_remove_poor_scores_before_final_model_factor=1.1 tournament_remove_worse_than_constant_before_final_model=true" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "tournament keep poor scores for small data refers to Whether to keep poor scores for small data (<10k rows) in case exploration will find good model. sets tournament_remove_poor_scores_before_evolution_model_factor=1.1 tournament_remove_worse_than_constant_before_evolution=false tournament_keep_absolute_ok_scores_before_evolution_model_factor=1.1 tournament_remove_poor_scores_before_final_model_factor=1.1 tournament_remove_worse_than_constant_before_final_model=true" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting tournament_keep_poor_scores_for_small_data", + "output": "tournament keep poor scores for small data refers to Whether to keep poor scores for small data (<10k rows) in case exploration will find good model. sets tournament_remove_poor_scores_before_evolution_model_factor=1.1 tournament_remove_worse_than_constant_before_evolution=false tournament_keep_absolute_ok_scores_before_evolution_model_factor=1.1 tournament_remove_poor_scores_before_final_model_factor=1.1 tournament_remove_worse_than_constant_before_final_model=true" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting tournament_keep_poor_scores_for_small_data", + "output": "tournament keep poor scores for small data refers to Whether to keep poor scores for small data (<10k rows) in case exploration will find good model. sets tournament_remove_poor_scores_before_evolution_model_factor=1.1 tournament_remove_worse_than_constant_before_evolution=false tournament_keep_absolute_ok_scores_before_evolution_model_factor=1.1 tournament_remove_poor_scores_before_final_model_factor=1.1 tournament_remove_worse_than_constant_before_final_model=true" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "tournament_remove_poor_scores_before_evolution_model_factor", + "output": "tournament remove poor scores before evolution model factor refers to Factor (compared to best score plus each score) beyond which to drop poorly scoring models before evolution. This is useful in cases when poorly scoring models take a long time to train." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "tournament_remove_poor_scores_before_evolution_model_factor", + "output": "tournament remove poor scores before evolution model factor refers to Factor (compared to best score plus each score) beyond which to drop poorly scoring models before evolution. This is useful in cases when poorly scoring models take a long time to train." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "tournament remove poor scores before evolution model factor", + "output": "tournament remove poor scores before evolution model factor refers to Factor (compared to best score plus each score) beyond which to drop poorly scoring models before evolution. This is useful in cases when poorly scoring models take a long time to train." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "tournament remove poor scores before evolution model factor refers to Factor (compared to best score plus each score) beyond which to drop poorly scoring models before evolution. This is useful in cases when poorly scoring models take a long time to train." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting tournament_remove_poor_scores_before_evolution_model_factor", + "output": "tournament remove poor scores before evolution model factor refers to Factor (compared to best score plus each score) beyond which to drop poorly scoring models before evolution. This is useful in cases when poorly scoring models take a long time to train." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting tournament_remove_poor_scores_before_evolution_model_factor", + "output": "tournament remove poor scores before evolution model factor refers to Factor (compared to best score plus each score) beyond which to drop poorly scoring models before evolution. This is useful in cases when poorly scoring models take a long time to train." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "tournament_remove_worse_than_constant_before_evolution", + "output": "tournament remove worse than constant before evolution refers to For before evolution after tuning, whether to remove models that are worse than (optimized to scorer) constant prediction model" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "tournament_remove_worse_than_constant_before_evolution", + "output": "tournament remove worse than constant before evolution refers to For before evolution after tuning, whether to remove models that are worse than (optimized to scorer) constant prediction model" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "tournament remove worse than constant before evolution", + "output": "tournament remove worse than constant before evolution refers to For before evolution after tuning, whether to remove models that are worse than (optimized to scorer) constant prediction model" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "tournament remove worse than constant before evolution refers to For before evolution after tuning, whether to remove models that are worse than (optimized to scorer) constant prediction model" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting tournament_remove_worse_than_constant_before_evolution", + "output": "tournament remove worse than constant before evolution refers to For before evolution after tuning, whether to remove models that are worse than (optimized to scorer) constant prediction model" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting tournament_remove_worse_than_constant_before_evolution", + "output": "tournament remove worse than constant before evolution refers to For before evolution after tuning, whether to remove models that are worse than (optimized to scorer) constant prediction model" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "tournament_keep_absolute_ok_scores_before_evolution_model_factor", + "output": "tournament keep absolute ok scores before evolution model factor refers to For before evolution after tuning, where on scale of 0 (perfect) to 1 (constant model) to keep ok scores by absolute value." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "tournament_keep_absolute_ok_scores_before_evolution_model_factor", + "output": "tournament keep absolute ok scores before evolution model factor refers to For before evolution after tuning, where on scale of 0 (perfect) to 1 (constant model) to keep ok scores by absolute value." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "tournament keep absolute ok scores before evolution model factor", + "output": "tournament keep absolute ok scores before evolution model factor refers to For before evolution after tuning, where on scale of 0 (perfect) to 1 (constant model) to keep ok scores by absolute value." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "tournament keep absolute ok scores before evolution model factor refers to For before evolution after tuning, where on scale of 0 (perfect) to 1 (constant model) to keep ok scores by absolute value." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting tournament_keep_absolute_ok_scores_before_evolution_model_factor", + "output": "tournament keep absolute ok scores before evolution model factor refers to For before evolution after tuning, where on scale of 0 (perfect) to 1 (constant model) to keep ok scores by absolute value." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting tournament_keep_absolute_ok_scores_before_evolution_model_factor", + "output": "tournament keep absolute ok scores before evolution model factor refers to For before evolution after tuning, where on scale of 0 (perfect) to 1 (constant model) to keep ok scores by absolute value." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "tournament_remove_poor_scores_before_final_model_factor", + "output": "tournament remove poor scores before final model factor refers to Factor (compared to best score) beyond which to drop poorly scoring models before building final ensemble. This is useful in cases when poorly scoring models take a long time to train." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "tournament_remove_poor_scores_before_final_model_factor", + "output": "tournament remove poor scores before final model factor refers to Factor (compared to best score) beyond which to drop poorly scoring models before building final ensemble. This is useful in cases when poorly scoring models take a long time to train." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "tournament remove poor scores before final model factor", + "output": "tournament remove poor scores before final model factor refers to Factor (compared to best score) beyond which to drop poorly scoring models before building final ensemble. This is useful in cases when poorly scoring models take a long time to train." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "tournament remove poor scores before final model factor refers to Factor (compared to best score) beyond which to drop poorly scoring models before building final ensemble. This is useful in cases when poorly scoring models take a long time to train." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting tournament_remove_poor_scores_before_final_model_factor", + "output": "tournament remove poor scores before final model factor refers to Factor (compared to best score) beyond which to drop poorly scoring models before building final ensemble. This is useful in cases when poorly scoring models take a long time to train." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting tournament_remove_poor_scores_before_final_model_factor", + "output": "tournament remove poor scores before final model factor refers to Factor (compared to best score) beyond which to drop poorly scoring models before building final ensemble. This is useful in cases when poorly scoring models take a long time to train." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "tournament_remove_worse_than_constant_before_final_model", + "output": "tournament remove worse than constant before final model refers to For before final model after evolution, whether to remove models that are worse than (optimized to scorer) constant prediction model" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "tournament_remove_worse_than_constant_before_final_model", + "output": "tournament remove worse than constant before final model refers to For before final model after evolution, whether to remove models that are worse than (optimized to scorer) constant prediction model" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "tournament remove worse than constant before final model", + "output": "tournament remove worse than constant before final model refers to For before final model after evolution, whether to remove models that are worse than (optimized to scorer) constant prediction model" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "tournament remove worse than constant before final model refers to For before final model after evolution, whether to remove models that are worse than (optimized to scorer) constant prediction model" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting tournament_remove_worse_than_constant_before_final_model", + "output": "tournament remove worse than constant before final model refers to For before final model after evolution, whether to remove models that are worse than (optimized to scorer) constant prediction model" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting tournament_remove_worse_than_constant_before_final_model", + "output": "tournament remove worse than constant before final model refers to For before final model after evolution, whether to remove models that are worse than (optimized to scorer) constant prediction model" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "num_individuals", + "output": "num individuals refers to Driverless AI uses a genetic algorithm (GA) to find the best features, best models and best hyper parameters for these models. The GA facilitates getting good results while not requiring torun/try every possible model/feature/parameter. This version of GA has reinforcement learning elements - it uses a form of exploration-exploitation to reach optimum solutions. This means it will capitalise on models/features/parameters that seem # to be working well and continue to exploit them even more, while allowing some room for trying new (and semi-random) models/features/parameters to avoid settling on a local minimum. These models/features/parameters tried are what-we-call individuals of a population. More # individuals connote more models/features/parameters to be tried and compete to find the best # ones." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "num_individuals", + "output": "num individuals refers to Driverless AI uses a genetic algorithm (GA) to find the best features, best models and best hyper parameters for these models. The GA facilitates getting good results while not requiring torun/try every possible model/feature/parameter. This version of GA has reinforcement learning elements - it uses a form of exploration-exploitation to reach optimum solutions. This means it will capitalise on models/features/parameters that seem # to be working well and continue to exploit them even more, while allowing some room for trying new (and semi-random) models/features/parameters to avoid settling on a local minimum. These models/features/parameters tried are what-we-call individuals of a population. More # individuals connote more models/features/parameters to be tried and compete to find the best # ones." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "num individuals", + "output": "num individuals refers to Driverless AI uses a genetic algorithm (GA) to find the best features, best models and best hyper parameters for these models. The GA facilitates getting good results while not requiring torun/try every possible model/feature/parameter. This version of GA has reinforcement learning elements - it uses a form of exploration-exploitation to reach optimum solutions. This means it will capitalise on models/features/parameters that seem # to be working well and continue to exploit them even more, while allowing some room for trying new (and semi-random) models/features/parameters to avoid settling on a local minimum. These models/features/parameters tried are what-we-call individuals of a population. More # individuals connote more models/features/parameters to be tried and compete to find the best # ones." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "num individuals refers to Driverless AI uses a genetic algorithm (GA) to find the best features, best models and best hyper parameters for these models. The GA facilitates getting good results while not requiring torun/try every possible model/feature/parameter. This version of GA has reinforcement learning elements - it uses a form of exploration-exploitation to reach optimum solutions. This means it will capitalise on models/features/parameters that seem # to be working well and continue to exploit them even more, while allowing some room for trying new (and semi-random) models/features/parameters to avoid settling on a local minimum. These models/features/parameters tried are what-we-call individuals of a population. More # individuals connote more models/features/parameters to be tried and compete to find the best # ones." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting num_individuals", + "output": "num individuals refers to Driverless AI uses a genetic algorithm (GA) to find the best features, best models and best hyper parameters for these models. The GA facilitates getting good results while not requiring torun/try every possible model/feature/parameter. This version of GA has reinforcement learning elements - it uses a form of exploration-exploitation to reach optimum solutions. This means it will capitalise on models/features/parameters that seem # to be working well and continue to exploit them even more, while allowing some room for trying new (and semi-random) models/features/parameters to avoid settling on a local minimum. These models/features/parameters tried are what-we-call individuals of a population. More # individuals connote more models/features/parameters to be tried and compete to find the best # ones." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting num_individuals", + "output": "num individuals refers to Driverless AI uses a genetic algorithm (GA) to find the best features, best models and best hyper parameters for these models. The GA facilitates getting good results while not requiring torun/try every possible model/feature/parameter. This version of GA has reinforcement learning elements - it uses a form of exploration-exploitation to reach optimum solutions. This means it will capitalise on models/features/parameters that seem # to be working well and continue to exploit them even more, while allowing some room for trying new (and semi-random) models/features/parameters to avoid settling on a local minimum. These models/features/parameters tried are what-we-call individuals of a population. More # individuals connote more models/features/parameters to be tried and compete to find the best # ones." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "fixed_num_individuals", + "output": "fixed num individuals refers to set fixed number of individuals (if > 0) - useful to compare different hardware configurations. If want 3 individuals in GA race to be preserved, choose 6, since need 1 mutatable loser per surviving individual." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "fixed_num_individuals", + "output": "fixed num individuals refers to set fixed number of individuals (if > 0) - useful to compare different hardware configurations. If want 3 individuals in GA race to be preserved, choose 6, since need 1 mutatable loser per surviving individual." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "fixed num individuals", + "output": "fixed num individuals refers to set fixed number of individuals (if > 0) - useful to compare different hardware configurations. If want 3 individuals in GA race to be preserved, choose 6, since need 1 mutatable loser per surviving individual." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "fixed num individuals refers to set fixed number of individuals (if > 0) - useful to compare different hardware configurations. If want 3 individuals in GA race to be preserved, choose 6, since need 1 mutatable loser per surviving individual." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting fixed_num_individuals", + "output": "fixed num individuals refers to set fixed number of individuals (if > 0) - useful to compare different hardware configurations. If want 3 individuals in GA race to be preserved, choose 6, since need 1 mutatable loser per surviving individual." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting fixed_num_individuals", + "output": "fixed num individuals refers to set fixed number of individuals (if > 0) - useful to compare different hardware configurations. If want 3 individuals in GA race to be preserved, choose 6, since need 1 mutatable loser per surviving individual." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "sanitize_natural_sort_limit", + "output": "sanitize natural sort limit refers to number of unique targets or folds counts after which switch to faster/simpler non-natural sorting and print outs" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "sanitize_natural_sort_limit", + "output": "sanitize natural sort limit refers to number of unique targets or folds counts after which switch to faster/simpler non-natural sorting and print outs" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "sanitize natural sort limit", + "output": "sanitize natural sort limit refers to number of unique targets or folds counts after which switch to faster/simpler non-natural sorting and print outs" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "sanitize natural sort limit refers to number of unique targets or folds counts after which switch to faster/simpler non-natural sorting and print outs" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting sanitize_natural_sort_limit", + "output": "sanitize natural sort limit refers to number of unique targets or folds counts after which switch to faster/simpler non-natural sorting and print outs" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting sanitize_natural_sort_limit", + "output": "sanitize natural sort limit refers to number of unique targets or folds counts after which switch to faster/simpler non-natural sorting and print outs" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "head_tail_fold_id_report_length", + "output": "head tail fold id report length refers to number of fold ids to report cardinality for, both most common (head) and least common (tail)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "head_tail_fold_id_report_length", + "output": "head tail fold id report length refers to number of fold ids to report cardinality for, both most common (head) and least common (tail)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "head tail fold id report length", + "output": "head tail fold id report length refers to number of fold ids to report cardinality for, both most common (head) and least common (tail)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "head tail fold id report length refers to number of fold ids to report cardinality for, both most common (head) and least common (tail)" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting head_tail_fold_id_report_length", + "output": "head tail fold id report length refers to number of fold ids to report cardinality for, both most common (head) and least common (tail)" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting head_tail_fold_id_report_length", + "output": "head tail fold id report length refers to number of fold ids to report cardinality for, both most common (head) and least common (tail)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable_target_encoding", + "output": "enable target encoding refers to Whether target encoding (CV target encoding, weight of evidence, etc.) could be enabledTarget encoding refers to several different feature transformations (primarily focused oncategorical data) that aim to represent the feature using information of the actualtarget variable. A simple example can be to use the mean of the target to replace eachunique category of a categorical feature. This type of features can be very predictive,but are prone to overfitting and require more memory as they need to store mappings ofthe unique categories and the target values. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable_target_encoding", + "output": "enable target encoding refers to Enable Target Encoding (auto disables for time series): Whether target encoding (CV target encoding, weight of evidence, etc.) could be enabledTarget encoding refers to several different feature transformations (primarily focused oncategorical data) that aim to represent the feature using information of the actualtarget variable. A simple example can be to use the mean of the target to replace eachunique category of a categorical feature. This type of features can be very predictive,but are prone to overfitting and require more memory as they need to store mappings ofthe unique categories and the target values. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable target encoding", + "output": "enable target encoding refers to Enable Target Encoding (auto disables for time series): Whether target encoding (CV target encoding, weight of evidence, etc.) could be enabledTarget encoding refers to several different feature transformations (primarily focused oncategorical data) that aim to represent the feature using information of the actualtarget variable. A simple example can be to use the mean of the target to replace eachunique category of a categorical feature. This type of features can be very predictive,but are prone to overfitting and require more memory as they need to store mappings ofthe unique categories and the target values. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Enable Target Encoding (auto disables for time series): ", + "output": "enable target encoding refers to Enable Target Encoding (auto disables for time series): Whether target encoding (CV target encoding, weight of evidence, etc.) could be enabledTarget encoding refers to several different feature transformations (primarily focused oncategorical data) that aim to represent the feature using information of the actualtarget variable. A simple example can be to use the mean of the target to replace eachunique category of a categorical feature. This type of features can be very predictive,but are prone to overfitting and require more memory as they need to store mappings ofthe unique categories and the target values. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting enable_target_encoding", + "output": "enable target encoding refers to Whether target encoding (CV target encoding, weight of evidence, etc.) could be enabledTarget encoding refers to several different feature transformations (primarily focused oncategorical data) that aim to represent the feature using information of the actualtarget variable. A simple example can be to use the mean of the target to replace eachunique category of a categorical feature. This type of features can be very predictive,but are prone to overfitting and require more memory as they need to store mappings ofthe unique categories and the target values. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting enable_target_encoding", + "output": "enable target encoding refers to Enable Target Encoding (auto disables for time series): Whether target encoding (CV target encoding, weight of evidence, etc.) could be enabledTarget encoding refers to several different feature transformations (primarily focused oncategorical data) that aim to represent the feature using information of the actualtarget variable. A simple example can be to use the mean of the target to replace eachunique category of a categorical feature. This type of features can be very predictive,but are prone to overfitting and require more memory as they need to store mappings ofthe unique categories and the target values. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "cvte_cv_in_cv_use_model", + "output": "cvte cv in cv use model refers to For target encoding, whether a model is used to compute Ginis for checking sanity of transformer. Requires cvte_cv_in_cv to be enabled. If enabled, CV-in-CV isn't done in case the check fails." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "cvte_cv_in_cv_use_model", + "output": "cvte cv in cv use model refers to For target encoding, whether a model is used to compute Ginis for checking sanity of transformer. Requires cvte_cv_in_cv to be enabled. If enabled, CV-in-CV isn't done in case the check fails." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "cvte cv in cv use model", + "output": "cvte cv in cv use model refers to For target encoding, whether a model is used to compute Ginis for checking sanity of transformer. Requires cvte_cv_in_cv to be enabled. If enabled, CV-in-CV isn't done in case the check fails." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "cvte cv in cv use model refers to For target encoding, whether a model is used to compute Ginis for checking sanity of transformer. Requires cvte_cv_in_cv to be enabled. If enabled, CV-in-CV isn't done in case the check fails." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting cvte_cv_in_cv_use_model", + "output": "cvte cv in cv use model refers to For target encoding, whether a model is used to compute Ginis for checking sanity of transformer. Requires cvte_cv_in_cv to be enabled. If enabled, CV-in-CV isn't done in case the check fails." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting cvte_cv_in_cv_use_model", + "output": "cvte cv in cv use model refers to For target encoding, whether a model is used to compute Ginis for checking sanity of transformer. Requires cvte_cv_in_cv to be enabled. If enabled, CV-in-CV isn't done in case the check fails." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "cvte_cv_in_cv", + "output": "cvte cv in cv refers to For target encoding,whether an outer level of cross-fold validation is performed,in cases when GINI is detected to flip sign (or have inconsistent sign for weight of evidence)between fit_transform on training, transform on training, and transform on validation data.The degree to which GINI is poor is also used to perform fold-averaging of look-up tables insteadof using global look-up tables. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "cvte_cv_in_cv", + "output": "cvte cv in cv refers to Enable outer CV for Target Encoding: For target encoding,whether an outer level of cross-fold validation is performed,in cases when GINI is detected to flip sign (or have inconsistent sign for weight of evidence)between fit_transform on training, transform on training, and transform on validation data.The degree to which GINI is poor is also used to perform fold-averaging of look-up tables insteadof using global look-up tables. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "cvte cv in cv", + "output": "cvte cv in cv refers to Enable outer CV for Target Encoding: For target encoding,whether an outer level of cross-fold validation is performed,in cases when GINI is detected to flip sign (or have inconsistent sign for weight of evidence)between fit_transform on training, transform on training, and transform on validation data.The degree to which GINI is poor is also used to perform fold-averaging of look-up tables insteadof using global look-up tables. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Enable outer CV for Target Encoding: ", + "output": "cvte cv in cv refers to Enable outer CV for Target Encoding: For target encoding,whether an outer level of cross-fold validation is performed,in cases when GINI is detected to flip sign (or have inconsistent sign for weight of evidence)between fit_transform on training, transform on training, and transform on validation data.The degree to which GINI is poor is also used to perform fold-averaging of look-up tables insteadof using global look-up tables. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting cvte_cv_in_cv", + "output": "cvte cv in cv refers to For target encoding,whether an outer level of cross-fold validation is performed,in cases when GINI is detected to flip sign (or have inconsistent sign for weight of evidence)between fit_transform on training, transform on training, and transform on validation data.The degree to which GINI is poor is also used to perform fold-averaging of look-up tables insteadof using global look-up tables. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting cvte_cv_in_cv", + "output": "cvte cv in cv refers to Enable outer CV for Target Encoding: For target encoding,whether an outer level of cross-fold validation is performed,in cases when GINI is detected to flip sign (or have inconsistent sign for weight of evidence)between fit_transform on training, transform on training, and transform on validation data.The degree to which GINI is poor is also used to perform fold-averaging of look-up tables insteadof using global look-up tables. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "cv_in_cv_overconfidence_protection", + "output": "cv in cv overconfidence protection refers to For target encoding,when an outer level of cross-fold validation is performed,increase number of outer folds or abort target encoding when GINI between feature and targetare not close between fit_transform on training, transform on training, and transform on validation data. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "cv_in_cv_overconfidence_protection", + "output": "cv in cv overconfidence protection refers to Enable outer CV for Target Encoding with overconfidence protection: For target encoding,when an outer level of cross-fold validation is performed,increase number of outer folds or abort target encoding when GINI between feature and targetare not close between fit_transform on training, transform on training, and transform on validation data. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "cv in cv overconfidence protection", + "output": "cv in cv overconfidence protection refers to Enable outer CV for Target Encoding with overconfidence protection: For target encoding,when an outer level of cross-fold validation is performed,increase number of outer folds or abort target encoding when GINI between feature and targetare not close between fit_transform on training, transform on training, and transform on validation data. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Enable outer CV for Target Encoding with overconfidence protection: ", + "output": "cv in cv overconfidence protection refers to Enable outer CV for Target Encoding with overconfidence protection: For target encoding,when an outer level of cross-fold validation is performed,increase number of outer folds or abort target encoding when GINI between feature and targetare not close between fit_transform on training, transform on training, and transform on validation data. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting cv_in_cv_overconfidence_protection", + "output": "cv in cv overconfidence protection refers to For target encoding,when an outer level of cross-fold validation is performed,increase number of outer folds or abort target encoding when GINI between feature and targetare not close between fit_transform on training, transform on training, and transform on validation data. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting cv_in_cv_overconfidence_protection", + "output": "cv in cv overconfidence protection refers to Enable outer CV for Target Encoding with overconfidence protection: For target encoding,when an outer level of cross-fold validation is performed,increase number of outer folds or abort target encoding when GINI between feature and targetare not close between fit_transform on training, transform on training, and transform on validation data. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable_lexilabel_encoding", + "output": "enable lexilabel encoding refers to Enable Lexicographical Label Encoding: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable_lexilabel_encoding", + "output": "enable lexilabel encoding refers to Enable Lexicographical Label Encoding: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable lexilabel encoding", + "output": "enable lexilabel encoding refers to Enable Lexicographical Label Encoding: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Enable Lexicographical Label Encoding: ", + "output": "enable lexilabel encoding refers to Enable Lexicographical Label Encoding: " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting enable_lexilabel_encoding", + "output": "enable lexilabel encoding refers to Enable Lexicographical Label Encoding: " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting enable_lexilabel_encoding", + "output": "enable lexilabel encoding refers to Enable Lexicographical Label Encoding: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable_isolation_forest", + "output": "enable isolation forest refers to Enable Isolation Forest Anomaly Score Encoding: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable_isolation_forest", + "output": "enable isolation forest refers to Enable Isolation Forest Anomaly Score Encoding: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable isolation forest", + "output": "enable isolation forest refers to Enable Isolation Forest Anomaly Score Encoding: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Enable Isolation Forest Anomaly Score Encoding: ", + "output": "enable isolation forest refers to Enable Isolation Forest Anomaly Score Encoding: " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting enable_isolation_forest", + "output": "enable isolation forest refers to Enable Isolation Forest Anomaly Score Encoding: " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting enable_isolation_forest", + "output": "enable isolation forest refers to Enable Isolation Forest Anomaly Score Encoding: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable_one_hot_encoding", + "output": "enable one hot encoding refers to Whether one hot encoding could be enabled. If auto, then only applied for small data and GLM." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable_one_hot_encoding", + "output": "enable one hot encoding refers to Enable One HotEncoding (auto enables only for GLM): Whether one hot encoding could be enabled. If auto, then only applied for small data and GLM." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable one hot encoding", + "output": "enable one hot encoding refers to Enable One HotEncoding (auto enables only for GLM): Whether one hot encoding could be enabled. If auto, then only applied for small data and GLM." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Enable One HotEncoding (auto enables only for GLM): ", + "output": "enable one hot encoding refers to Enable One HotEncoding (auto enables only for GLM): Whether one hot encoding could be enabled. If auto, then only applied for small data and GLM." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting enable_one_hot_encoding", + "output": "enable one hot encoding refers to Whether one hot encoding could be enabled. If auto, then only applied for small data and GLM." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting enable_one_hot_encoding", + "output": "enable one hot encoding refers to Enable One HotEncoding (auto enables only for GLM): Whether one hot encoding could be enabled. If auto, then only applied for small data and GLM." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "binner_cardinality_limiter", + "output": "binner cardinality limiter refers to Limit number of output features (total number of bins) created by all BinnerTransformers based on this value, scaled by accuracy, interpretability and dataset size. 0 means unlimited." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "binner_cardinality_limiter", + "output": "binner cardinality limiter refers to Limit number of output features (total number of bins) created by all BinnerTransformers based on this value, scaled by accuracy, interpretability and dataset size. 0 means unlimited." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "binner cardinality limiter", + "output": "binner cardinality limiter refers to Limit number of output features (total number of bins) created by all BinnerTransformers based on this value, scaled by accuracy, interpretability and dataset size. 0 means unlimited." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "binner cardinality limiter refers to Limit number of output features (total number of bins) created by all BinnerTransformers based on this value, scaled by accuracy, interpretability and dataset size. 0 means unlimited." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting binner_cardinality_limiter", + "output": "binner cardinality limiter refers to Limit number of output features (total number of bins) created by all BinnerTransformers based on this value, scaled by accuracy, interpretability and dataset size. 0 means unlimited." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting binner_cardinality_limiter", + "output": "binner cardinality limiter refers to Limit number of output features (total number of bins) created by all BinnerTransformers based on this value, scaled by accuracy, interpretability and dataset size. 0 means unlimited." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable_binning", + "output": "enable binning refers to Whether simple binning of numeric features should be enabled by default. If auto, then only for GLM/FTRL/TensorFlow/GrowNet for time-series or for interpretability >= 6. Binning can help linear (or simple) models by exposing more signal for features that are not linearly correlated with the target. Note that NumCatTransformer and NumToCatTransformer already do binning, but also perform target encoding, which makes them less interpretable. The BinnerTransformer is more interpretable, and also works for time series." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable_binning", + "output": "enable binning refers to Enable BinnerTransformer for simple numeric binning (auto enables only for GLM/FTRL/TensorFlow/GrowNet): Whether simple binning of numeric features should be enabled by default. If auto, then only for GLM/FTRL/TensorFlow/GrowNet for time-series or for interpretability >= 6. Binning can help linear (or simple) models by exposing more signal for features that are not linearly correlated with the target. Note that NumCatTransformer and NumToCatTransformer already do binning, but also perform target encoding, which makes them less interpretable. The BinnerTransformer is more interpretable, and also works for time series." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable binning", + "output": "enable binning refers to Enable BinnerTransformer for simple numeric binning (auto enables only for GLM/FTRL/TensorFlow/GrowNet): Whether simple binning of numeric features should be enabled by default. If auto, then only for GLM/FTRL/TensorFlow/GrowNet for time-series or for interpretability >= 6. Binning can help linear (or simple) models by exposing more signal for features that are not linearly correlated with the target. Note that NumCatTransformer and NumToCatTransformer already do binning, but also perform target encoding, which makes them less interpretable. The BinnerTransformer is more interpretable, and also works for time series." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Enable BinnerTransformer for simple numeric binning (auto enables only for GLM/FTRL/TensorFlow/GrowNet): ", + "output": "enable binning refers to Enable BinnerTransformer for simple numeric binning (auto enables only for GLM/FTRL/TensorFlow/GrowNet): Whether simple binning of numeric features should be enabled by default. If auto, then only for GLM/FTRL/TensorFlow/GrowNet for time-series or for interpretability >= 6. Binning can help linear (or simple) models by exposing more signal for features that are not linearly correlated with the target. Note that NumCatTransformer and NumToCatTransformer already do binning, but also perform target encoding, which makes them less interpretable. The BinnerTransformer is more interpretable, and also works for time series." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting enable_binning", + "output": "enable binning refers to Whether simple binning of numeric features should be enabled by default. If auto, then only for GLM/FTRL/TensorFlow/GrowNet for time-series or for interpretability >= 6. Binning can help linear (or simple) models by exposing more signal for features that are not linearly correlated with the target. Note that NumCatTransformer and NumToCatTransformer already do binning, but also perform target encoding, which makes them less interpretable. The BinnerTransformer is more interpretable, and also works for time series." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting enable_binning", + "output": "enable binning refers to Enable BinnerTransformer for simple numeric binning (auto enables only for GLM/FTRL/TensorFlow/GrowNet): Whether simple binning of numeric features should be enabled by default. If auto, then only for GLM/FTRL/TensorFlow/GrowNet for time-series or for interpretability >= 6. Binning can help linear (or simple) models by exposing more signal for features that are not linearly correlated with the target. Note that NumCatTransformer and NumToCatTransformer already do binning, but also perform target encoding, which makes them less interpretable. The BinnerTransformer is more interpretable, and also works for time series." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "binner_bin_method", + "output": "binner bin method refers to Tree uses XGBoost to find optimal split points for binning of numeric features. Quantile use quantile-based binning. Might fall back to quantile-based if too many classes or not enough unique values." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "binner_bin_method", + "output": "binner bin method refers to Select methods used to find bins for Binner Transformer: Tree uses XGBoost to find optimal split points for binning of numeric features. Quantile use quantile-based binning. Might fall back to quantile-based if too many classes or not enough unique values." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "binner bin method", + "output": "binner bin method refers to Select methods used to find bins for Binner Transformer: Tree uses XGBoost to find optimal split points for binning of numeric features. Quantile use quantile-based binning. Might fall back to quantile-based if too many classes or not enough unique values." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Select methods used to find bins for Binner Transformer: ", + "output": "binner bin method refers to Select methods used to find bins for Binner Transformer: Tree uses XGBoost to find optimal split points for binning of numeric features. Quantile use quantile-based binning. Might fall back to quantile-based if too many classes or not enough unique values." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting binner_bin_method", + "output": "binner bin method refers to Tree uses XGBoost to find optimal split points for binning of numeric features. Quantile use quantile-based binning. Might fall back to quantile-based if too many classes or not enough unique values." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting binner_bin_method", + "output": "binner bin method refers to Select methods used to find bins for Binner Transformer: Tree uses XGBoost to find optimal split points for binning of numeric features. Quantile use quantile-based binning. Might fall back to quantile-based if too many classes or not enough unique values." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "binner_minimize_bins", + "output": "binner minimize bins refers to If enabled, will attempt to reduce the number of bins during binning of numeric features. Applies to both tree-based and quantile-based bins." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "binner_minimize_bins", + "output": "binner minimize bins refers to Enable automatic reduction of number of bins for Binner Transformer: If enabled, will attempt to reduce the number of bins during binning of numeric features. Applies to both tree-based and quantile-based bins." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "binner minimize bins", + "output": "binner minimize bins refers to Enable automatic reduction of number of bins for Binner Transformer: If enabled, will attempt to reduce the number of bins during binning of numeric features. Applies to both tree-based and quantile-based bins." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Enable automatic reduction of number of bins for Binner Transformer: ", + "output": "binner minimize bins refers to Enable automatic reduction of number of bins for Binner Transformer: If enabled, will attempt to reduce the number of bins during binning of numeric features. Applies to both tree-based and quantile-based bins." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting binner_minimize_bins", + "output": "binner minimize bins refers to If enabled, will attempt to reduce the number of bins during binning of numeric features. Applies to both tree-based and quantile-based bins." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting binner_minimize_bins", + "output": "binner minimize bins refers to Enable automatic reduction of number of bins for Binner Transformer: If enabled, will attempt to reduce the number of bins during binning of numeric features. Applies to both tree-based and quantile-based bins." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "binner_encoding", + "output": "binner encoding refers to Given a set of bins (cut points along min...max), the encoding scheme converts the original numeric feature values into the values of the output columns (one column per bin, and one extra bin for missing values if any). Piecewise linear is 0 left of the bin, and 1 right of the bin, and grows linearly from 0 to 1 inside the bin. Binary is 1 inside the bin and 0 outside the bin. Missing value bin encoding is always binary, either 0 or 1. If no missing values in the data, then there is no missing value bin. Piecewise linear helps to encode growing values and keeps smooth transitions across the bin boundaries, while binary is best suited for detecting specific values in the data. Both are great at providing features to models that otherwise lack non-linear pattern detection." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "binner_encoding", + "output": "binner encoding refers to Select encoding schemes for Binner Transformer: Given a set of bins (cut points along min...max), the encoding scheme converts the original numeric feature values into the values of the output columns (one column per bin, and one extra bin for missing values if any). Piecewise linear is 0 left of the bin, and 1 right of the bin, and grows linearly from 0 to 1 inside the bin. Binary is 1 inside the bin and 0 outside the bin. Missing value bin encoding is always binary, either 0 or 1. If no missing values in the data, then there is no missing value bin. Piecewise linear helps to encode growing values and keeps smooth transitions across the bin boundaries, while binary is best suited for detecting specific values in the data. Both are great at providing features to models that otherwise lack non-linear pattern detection." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "binner encoding", + "output": "binner encoding refers to Select encoding schemes for Binner Transformer: Given a set of bins (cut points along min...max), the encoding scheme converts the original numeric feature values into the values of the output columns (one column per bin, and one extra bin for missing values if any). Piecewise linear is 0 left of the bin, and 1 right of the bin, and grows linearly from 0 to 1 inside the bin. Binary is 1 inside the bin and 0 outside the bin. Missing value bin encoding is always binary, either 0 or 1. If no missing values in the data, then there is no missing value bin. Piecewise linear helps to encode growing values and keeps smooth transitions across the bin boundaries, while binary is best suited for detecting specific values in the data. Both are great at providing features to models that otherwise lack non-linear pattern detection." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Select encoding schemes for Binner Transformer: ", + "output": "binner encoding refers to Select encoding schemes for Binner Transformer: Given a set of bins (cut points along min...max), the encoding scheme converts the original numeric feature values into the values of the output columns (one column per bin, and one extra bin for missing values if any). Piecewise linear is 0 left of the bin, and 1 right of the bin, and grows linearly from 0 to 1 inside the bin. Binary is 1 inside the bin and 0 outside the bin. Missing value bin encoding is always binary, either 0 or 1. If no missing values in the data, then there is no missing value bin. Piecewise linear helps to encode growing values and keeps smooth transitions across the bin boundaries, while binary is best suited for detecting specific values in the data. Both are great at providing features to models that otherwise lack non-linear pattern detection." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting binner_encoding", + "output": "binner encoding refers to Given a set of bins (cut points along min...max), the encoding scheme converts the original numeric feature values into the values of the output columns (one column per bin, and one extra bin for missing values if any). Piecewise linear is 0 left of the bin, and 1 right of the bin, and grows linearly from 0 to 1 inside the bin. Binary is 1 inside the bin and 0 outside the bin. Missing value bin encoding is always binary, either 0 or 1. If no missing values in the data, then there is no missing value bin. Piecewise linear helps to encode growing values and keeps smooth transitions across the bin boundaries, while binary is best suited for detecting specific values in the data. Both are great at providing features to models that otherwise lack non-linear pattern detection." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting binner_encoding", + "output": "binner encoding refers to Select encoding schemes for Binner Transformer: Given a set of bins (cut points along min...max), the encoding scheme converts the original numeric feature values into the values of the output columns (one column per bin, and one extra bin for missing values if any). Piecewise linear is 0 left of the bin, and 1 right of the bin, and grows linearly from 0 to 1 inside the bin. Binary is 1 inside the bin and 0 outside the bin. Missing value bin encoding is always binary, either 0 or 1. If no missing values in the data, then there is no missing value bin. Piecewise linear helps to encode growing values and keeps smooth transitions across the bin boundaries, while binary is best suited for detecting specific values in the data. Both are great at providing features to models that otherwise lack non-linear pattern detection." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "binner_include_original", + "output": "binner include original refers to If enabled (default), include the original feature value as a output feature for the BinnerTransformer. This ensures that the BinnerTransformer never has less signal than the OriginalTransformer, since they can be chosen exclusively. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "binner_include_original", + "output": "binner include original refers to Include Original feature value as part of output of Binner Transformer: If enabled (default), include the original feature value as a output feature for the BinnerTransformer. This ensures that the BinnerTransformer never has less signal than the OriginalTransformer, since they can be chosen exclusively. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "binner include original", + "output": "binner include original refers to Include Original feature value as part of output of Binner Transformer: If enabled (default), include the original feature value as a output feature for the BinnerTransformer. This ensures that the BinnerTransformer never has less signal than the OriginalTransformer, since they can be chosen exclusively. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Include Original feature value as part of output of Binner Transformer: ", + "output": "binner include original refers to Include Original feature value as part of output of Binner Transformer: If enabled (default), include the original feature value as a output feature for the BinnerTransformer. This ensures that the BinnerTransformer never has less signal than the OriginalTransformer, since they can be chosen exclusively. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting binner_include_original", + "output": "binner include original refers to If enabled (default), include the original feature value as a output feature for the BinnerTransformer. This ensures that the BinnerTransformer never has less signal than the OriginalTransformer, since they can be chosen exclusively. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting binner_include_original", + "output": "binner include original refers to Include Original feature value as part of output of Binner Transformer: If enabled (default), include the original feature value as a output feature for the BinnerTransformer. This ensures that the BinnerTransformer never has less signal than the OriginalTransformer, since they can be chosen exclusively. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "isolation_forest_nestimators", + "output": "isolation forest nestimators refers to Num. Estimators for Isolation Forest Encoding: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "isolation_forest_nestimators", + "output": "isolation forest nestimators refers to Num. Estimators for Isolation Forest Encoding: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "isolation forest nestimators", + "output": "isolation forest nestimators refers to Num. Estimators for Isolation Forest Encoding: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Num. Estimators for Isolation Forest Encoding: ", + "output": "isolation forest nestimators refers to Num. Estimators for Isolation Forest Encoding: " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting isolation_forest_nestimators", + "output": "isolation forest nestimators refers to Num. Estimators for Isolation Forest Encoding: " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting isolation_forest_nestimators", + "output": "isolation forest nestimators refers to Num. Estimators for Isolation Forest Encoding: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "included_transformers", + "output": "included transformers refers to Transformer display names to indicate which transformers to use in experiment.More information for these transformers can be viewed here:http://docs.h2o.ai/driverless-ai/latest-stable/docs/userguide/transformations.htmlThis section allows including/excluding these transformations and may be useful whensimpler (more interpretable) models are sought at the expense of accuracy.the interpretability setting)for multi-class: '['NumCatTETransformer', 'TextLinModelTransformer','FrequentTransformer', 'CVTargetEncodeTransformer', 'ClusterDistTransformer','WeightOfEvidenceTransformer', 'TruncSVDNumTransformer', 'CVCatNumEncodeTransformer','DatesTransformer', 'TextTransformer', 'OriginalTransformer','NumToCatWoETransformer', 'NumToCatTETransformer', 'ClusterTETransformer','InteractionsTransformer']'for regression/binary: '['TextTransformer', 'ClusterDistTransformer','OriginalTransformer', 'TextLinModelTransformer', 'NumToCatTETransformer','DatesTransformer', 'WeightOfEvidenceTransformer', 'InteractionsTransformer','FrequentTransformer', 'CVTargetEncodeTransformer', 'NumCatTETransformer','NumToCatWoETransformer', 'TruncSVDNumTransformer', 'ClusterTETransformer','CVCatNumEncodeTransformer']'This list appears in the experiment logs (search for 'Transformers used') " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "included_transformers", + "output": "included transformers refers to Include specific transformers: Transformer display names to indicate which transformers to use in experiment.More information for these transformers can be viewed here:http://docs.h2o.ai/driverless-ai/latest-stable/docs/userguide/transformations.htmlThis section allows including/excluding these transformations and may be useful whensimpler (more interpretable) models are sought at the expense of accuracy.the interpretability setting)for multi-class: '['NumCatTETransformer', 'TextLinModelTransformer','FrequentTransformer', 'CVTargetEncodeTransformer', 'ClusterDistTransformer','WeightOfEvidenceTransformer', 'TruncSVDNumTransformer', 'CVCatNumEncodeTransformer','DatesTransformer', 'TextTransformer', 'OriginalTransformer','NumToCatWoETransformer', 'NumToCatTETransformer', 'ClusterTETransformer','InteractionsTransformer']'for regression/binary: '['TextTransformer', 'ClusterDistTransformer','OriginalTransformer', 'TextLinModelTransformer', 'NumToCatTETransformer','DatesTransformer', 'WeightOfEvidenceTransformer', 'InteractionsTransformer','FrequentTransformer', 'CVTargetEncodeTransformer', 'NumCatTETransformer','NumToCatWoETransformer', 'TruncSVDNumTransformer', 'ClusterTETransformer','CVCatNumEncodeTransformer']'This list appears in the experiment logs (search for 'Transformers used') " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "included transformers", + "output": "included transformers refers to Include specific transformers: Transformer display names to indicate which transformers to use in experiment.More information for these transformers can be viewed here:http://docs.h2o.ai/driverless-ai/latest-stable/docs/userguide/transformations.htmlThis section allows including/excluding these transformations and may be useful whensimpler (more interpretable) models are sought at the expense of accuracy.the interpretability setting)for multi-class: '['NumCatTETransformer', 'TextLinModelTransformer','FrequentTransformer', 'CVTargetEncodeTransformer', 'ClusterDistTransformer','WeightOfEvidenceTransformer', 'TruncSVDNumTransformer', 'CVCatNumEncodeTransformer','DatesTransformer', 'TextTransformer', 'OriginalTransformer','NumToCatWoETransformer', 'NumToCatTETransformer', 'ClusterTETransformer','InteractionsTransformer']'for regression/binary: '['TextTransformer', 'ClusterDistTransformer','OriginalTransformer', 'TextLinModelTransformer', 'NumToCatTETransformer','DatesTransformer', 'WeightOfEvidenceTransformer', 'InteractionsTransformer','FrequentTransformer', 'CVTargetEncodeTransformer', 'NumCatTETransformer','NumToCatWoETransformer', 'TruncSVDNumTransformer', 'ClusterTETransformer','CVCatNumEncodeTransformer']'This list appears in the experiment logs (search for 'Transformers used') " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Include specific transformers: ", + "output": "included transformers refers to Include specific transformers: Transformer display names to indicate which transformers to use in experiment.More information for these transformers can be viewed here:http://docs.h2o.ai/driverless-ai/latest-stable/docs/userguide/transformations.htmlThis section allows including/excluding these transformations and may be useful whensimpler (more interpretable) models are sought at the expense of accuracy.the interpretability setting)for multi-class: '['NumCatTETransformer', 'TextLinModelTransformer','FrequentTransformer', 'CVTargetEncodeTransformer', 'ClusterDistTransformer','WeightOfEvidenceTransformer', 'TruncSVDNumTransformer', 'CVCatNumEncodeTransformer','DatesTransformer', 'TextTransformer', 'OriginalTransformer','NumToCatWoETransformer', 'NumToCatTETransformer', 'ClusterTETransformer','InteractionsTransformer']'for regression/binary: '['TextTransformer', 'ClusterDistTransformer','OriginalTransformer', 'TextLinModelTransformer', 'NumToCatTETransformer','DatesTransformer', 'WeightOfEvidenceTransformer', 'InteractionsTransformer','FrequentTransformer', 'CVTargetEncodeTransformer', 'NumCatTETransformer','NumToCatWoETransformer', 'TruncSVDNumTransformer', 'ClusterTETransformer','CVCatNumEncodeTransformer']'This list appears in the experiment logs (search for 'Transformers used') " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting included_transformers", + "output": "included transformers refers to Transformer display names to indicate which transformers to use in experiment.More information for these transformers can be viewed here:http://docs.h2o.ai/driverless-ai/latest-stable/docs/userguide/transformations.htmlThis section allows including/excluding these transformations and may be useful whensimpler (more interpretable) models are sought at the expense of accuracy.the interpretability setting)for multi-class: '['NumCatTETransformer', 'TextLinModelTransformer','FrequentTransformer', 'CVTargetEncodeTransformer', 'ClusterDistTransformer','WeightOfEvidenceTransformer', 'TruncSVDNumTransformer', 'CVCatNumEncodeTransformer','DatesTransformer', 'TextTransformer', 'OriginalTransformer','NumToCatWoETransformer', 'NumToCatTETransformer', 'ClusterTETransformer','InteractionsTransformer']'for regression/binary: '['TextTransformer', 'ClusterDistTransformer','OriginalTransformer', 'TextLinModelTransformer', 'NumToCatTETransformer','DatesTransformer', 'WeightOfEvidenceTransformer', 'InteractionsTransformer','FrequentTransformer', 'CVTargetEncodeTransformer', 'NumCatTETransformer','NumToCatWoETransformer', 'TruncSVDNumTransformer', 'ClusterTETransformer','CVCatNumEncodeTransformer']'This list appears in the experiment logs (search for 'Transformers used') " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting included_transformers", + "output": "included transformers refers to Include specific transformers: Transformer display names to indicate which transformers to use in experiment.More information for these transformers can be viewed here:http://docs.h2o.ai/driverless-ai/latest-stable/docs/userguide/transformations.htmlThis section allows including/excluding these transformations and may be useful whensimpler (more interpretable) models are sought at the expense of accuracy.the interpretability setting)for multi-class: '['NumCatTETransformer', 'TextLinModelTransformer','FrequentTransformer', 'CVTargetEncodeTransformer', 'ClusterDistTransformer','WeightOfEvidenceTransformer', 'TruncSVDNumTransformer', 'CVCatNumEncodeTransformer','DatesTransformer', 'TextTransformer', 'OriginalTransformer','NumToCatWoETransformer', 'NumToCatTETransformer', 'ClusterTETransformer','InteractionsTransformer']'for regression/binary: '['TextTransformer', 'ClusterDistTransformer','OriginalTransformer', 'TextLinModelTransformer', 'NumToCatTETransformer','DatesTransformer', 'WeightOfEvidenceTransformer', 'InteractionsTransformer','FrequentTransformer', 'CVTargetEncodeTransformer', 'NumCatTETransformer','NumToCatWoETransformer', 'TruncSVDNumTransformer', 'ClusterTETransformer','CVCatNumEncodeTransformer']'This list appears in the experiment logs (search for 'Transformers used') " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "excluded_transformers", + "output": "excluded transformers refers to Auxiliary to included_transformers e.g. to disable all Target Encoding: excluded_transformers = '['NumCatTETransformer', 'CVTargetEncodeF', 'NumToCatTETransformer', 'ClusterTETransformer']'. Does not affect transformers used for preprocessing with included_pretransformers. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "excluded_transformers", + "output": "excluded transformers refers to Exclude specific transformers: Auxiliary to included_transformers e.g. to disable all Target Encoding: excluded_transformers = '['NumCatTETransformer', 'CVTargetEncodeF', 'NumToCatTETransformer', 'ClusterTETransformer']'. Does not affect transformers used for preprocessing with included_pretransformers. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "excluded transformers", + "output": "excluded transformers refers to Exclude specific transformers: Auxiliary to included_transformers e.g. to disable all Target Encoding: excluded_transformers = '['NumCatTETransformer', 'CVTargetEncodeF', 'NumToCatTETransformer', 'ClusterTETransformer']'. Does not affect transformers used for preprocessing with included_pretransformers. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Exclude specific transformers: ", + "output": "excluded transformers refers to Exclude specific transformers: Auxiliary to included_transformers e.g. to disable all Target Encoding: excluded_transformers = '['NumCatTETransformer', 'CVTargetEncodeF', 'NumToCatTETransformer', 'ClusterTETransformer']'. Does not affect transformers used for preprocessing with included_pretransformers. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting excluded_transformers", + "output": "excluded transformers refers to Auxiliary to included_transformers e.g. to disable all Target Encoding: excluded_transformers = '['NumCatTETransformer', 'CVTargetEncodeF', 'NumToCatTETransformer', 'ClusterTETransformer']'. Does not affect transformers used for preprocessing with included_pretransformers. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting excluded_transformers", + "output": "excluded transformers refers to Exclude specific transformers: Auxiliary to included_transformers e.g. to disable all Target Encoding: excluded_transformers = '['NumCatTETransformer', 'CVTargetEncodeF', 'NumToCatTETransformer', 'ClusterTETransformer']'. Does not affect transformers used for preprocessing with included_pretransformers. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "excluded_genes", + "output": "excluded genes refers to Exclude list of genes (i.e. genes (built on top of transformers) to not use,independent of the interpretability setting)Some transformers are used by multiple genes, so this allows different control over feature engineeringfor multi-class: '['InteractionsGene', 'WeightOfEvidenceGene','NumToCatTargetEncodeSingleGene', 'OriginalGene', 'TextGene', 'FrequentGene','NumToCatWeightOfEvidenceGene', 'NumToCatWeightOfEvidenceMonotonicGene', 'CvTargetEncodeSingleGene', 'DateGene', 'NumToCatTargetEncodeMultiGene', 'DateTimeGene', 'TextLinRegressorGene', 'ClusterIDTargetEncodeSingleGene','CvCatNumEncodeGene', 'TruncSvdNumGene', 'ClusterIDTargetEncodeMultiGene','NumCatTargetEncodeMultiGene', 'CvTargetEncodeMultiGene', 'TextLinClassifierGene','NumCatTargetEncodeSingleGene', 'ClusterDistGene']'for regression/binary: '['CvTargetEncodeSingleGene', 'NumToCatTargetEncodeSingleGene','CvCatNumEncodeGene', 'ClusterIDTargetEncodeSingleGene', 'TextLinRegressorGene','CvTargetEncodeMultiGene', 'ClusterDistGene', 'OriginalGene', 'DateGene','ClusterIDTargetEncodeMultiGene', 'NumToCatTargetEncodeMultiGene','NumCatTargetEncodeMultiGene', 'TextLinClassifierGene', 'WeightOfEvidenceGene','FrequentGene', 'TruncSvdNumGene', 'InteractionsGene', 'TextGene','DateTimeGene', 'NumToCatWeightOfEvidenceGene','NumToCatWeightOfEvidenceMonotonicGene', ''NumCatTargetEncodeSingleGene']'This list appears in the experiment logs (search for 'Genes used')e.g. to disable interaction gene, use: excluded_genes ='['InteractionsGene']'.Does not affect transformers used for preprocessing with included_pretransformers. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "excluded_genes", + "output": "excluded genes refers to Exclude specific genes: Exclude list of genes (i.e. genes (built on top of transformers) to not use,independent of the interpretability setting)Some transformers are used by multiple genes, so this allows different control over feature engineeringfor multi-class: '['InteractionsGene', 'WeightOfEvidenceGene','NumToCatTargetEncodeSingleGene', 'OriginalGene', 'TextGene', 'FrequentGene','NumToCatWeightOfEvidenceGene', 'NumToCatWeightOfEvidenceMonotonicGene', 'CvTargetEncodeSingleGene', 'DateGene', 'NumToCatTargetEncodeMultiGene', 'DateTimeGene', 'TextLinRegressorGene', 'ClusterIDTargetEncodeSingleGene','CvCatNumEncodeGene', 'TruncSvdNumGene', 'ClusterIDTargetEncodeMultiGene','NumCatTargetEncodeMultiGene', 'CvTargetEncodeMultiGene', 'TextLinClassifierGene','NumCatTargetEncodeSingleGene', 'ClusterDistGene']'for regression/binary: '['CvTargetEncodeSingleGene', 'NumToCatTargetEncodeSingleGene','CvCatNumEncodeGene', 'ClusterIDTargetEncodeSingleGene', 'TextLinRegressorGene','CvTargetEncodeMultiGene', 'ClusterDistGene', 'OriginalGene', 'DateGene','ClusterIDTargetEncodeMultiGene', 'NumToCatTargetEncodeMultiGene','NumCatTargetEncodeMultiGene', 'TextLinClassifierGene', 'WeightOfEvidenceGene','FrequentGene', 'TruncSvdNumGene', 'InteractionsGene', 'TextGene','DateTimeGene', 'NumToCatWeightOfEvidenceGene','NumToCatWeightOfEvidenceMonotonicGene', ''NumCatTargetEncodeSingleGene']'This list appears in the experiment logs (search for 'Genes used')e.g. to disable interaction gene, use: excluded_genes ='['InteractionsGene']'.Does not affect transformers used for preprocessing with included_pretransformers. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "excluded genes", + "output": "excluded genes refers to Exclude specific genes: Exclude list of genes (i.e. genes (built on top of transformers) to not use,independent of the interpretability setting)Some transformers are used by multiple genes, so this allows different control over feature engineeringfor multi-class: '['InteractionsGene', 'WeightOfEvidenceGene','NumToCatTargetEncodeSingleGene', 'OriginalGene', 'TextGene', 'FrequentGene','NumToCatWeightOfEvidenceGene', 'NumToCatWeightOfEvidenceMonotonicGene', 'CvTargetEncodeSingleGene', 'DateGene', 'NumToCatTargetEncodeMultiGene', 'DateTimeGene', 'TextLinRegressorGene', 'ClusterIDTargetEncodeSingleGene','CvCatNumEncodeGene', 'TruncSvdNumGene', 'ClusterIDTargetEncodeMultiGene','NumCatTargetEncodeMultiGene', 'CvTargetEncodeMultiGene', 'TextLinClassifierGene','NumCatTargetEncodeSingleGene', 'ClusterDistGene']'for regression/binary: '['CvTargetEncodeSingleGene', 'NumToCatTargetEncodeSingleGene','CvCatNumEncodeGene', 'ClusterIDTargetEncodeSingleGene', 'TextLinRegressorGene','CvTargetEncodeMultiGene', 'ClusterDistGene', 'OriginalGene', 'DateGene','ClusterIDTargetEncodeMultiGene', 'NumToCatTargetEncodeMultiGene','NumCatTargetEncodeMultiGene', 'TextLinClassifierGene', 'WeightOfEvidenceGene','FrequentGene', 'TruncSvdNumGene', 'InteractionsGene', 'TextGene','DateTimeGene', 'NumToCatWeightOfEvidenceGene','NumToCatWeightOfEvidenceMonotonicGene', ''NumCatTargetEncodeSingleGene']'This list appears in the experiment logs (search for 'Genes used')e.g. to disable interaction gene, use: excluded_genes ='['InteractionsGene']'.Does not affect transformers used for preprocessing with included_pretransformers. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Exclude specific genes: ", + "output": "excluded genes refers to Exclude specific genes: Exclude list of genes (i.e. genes (built on top of transformers) to not use,independent of the interpretability setting)Some transformers are used by multiple genes, so this allows different control over feature engineeringfor multi-class: '['InteractionsGene', 'WeightOfEvidenceGene','NumToCatTargetEncodeSingleGene', 'OriginalGene', 'TextGene', 'FrequentGene','NumToCatWeightOfEvidenceGene', 'NumToCatWeightOfEvidenceMonotonicGene', 'CvTargetEncodeSingleGene', 'DateGene', 'NumToCatTargetEncodeMultiGene', 'DateTimeGene', 'TextLinRegressorGene', 'ClusterIDTargetEncodeSingleGene','CvCatNumEncodeGene', 'TruncSvdNumGene', 'ClusterIDTargetEncodeMultiGene','NumCatTargetEncodeMultiGene', 'CvTargetEncodeMultiGene', 'TextLinClassifierGene','NumCatTargetEncodeSingleGene', 'ClusterDistGene']'for regression/binary: '['CvTargetEncodeSingleGene', 'NumToCatTargetEncodeSingleGene','CvCatNumEncodeGene', 'ClusterIDTargetEncodeSingleGene', 'TextLinRegressorGene','CvTargetEncodeMultiGene', 'ClusterDistGene', 'OriginalGene', 'DateGene','ClusterIDTargetEncodeMultiGene', 'NumToCatTargetEncodeMultiGene','NumCatTargetEncodeMultiGene', 'TextLinClassifierGene', 'WeightOfEvidenceGene','FrequentGene', 'TruncSvdNumGene', 'InteractionsGene', 'TextGene','DateTimeGene', 'NumToCatWeightOfEvidenceGene','NumToCatWeightOfEvidenceMonotonicGene', ''NumCatTargetEncodeSingleGene']'This list appears in the experiment logs (search for 'Genes used')e.g. to disable interaction gene, use: excluded_genes ='['InteractionsGene']'.Does not affect transformers used for preprocessing with included_pretransformers. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting excluded_genes", + "output": "excluded genes refers to Exclude list of genes (i.e. genes (built on top of transformers) to not use,independent of the interpretability setting)Some transformers are used by multiple genes, so this allows different control over feature engineeringfor multi-class: '['InteractionsGene', 'WeightOfEvidenceGene','NumToCatTargetEncodeSingleGene', 'OriginalGene', 'TextGene', 'FrequentGene','NumToCatWeightOfEvidenceGene', 'NumToCatWeightOfEvidenceMonotonicGene', 'CvTargetEncodeSingleGene', 'DateGene', 'NumToCatTargetEncodeMultiGene', 'DateTimeGene', 'TextLinRegressorGene', 'ClusterIDTargetEncodeSingleGene','CvCatNumEncodeGene', 'TruncSvdNumGene', 'ClusterIDTargetEncodeMultiGene','NumCatTargetEncodeMultiGene', 'CvTargetEncodeMultiGene', 'TextLinClassifierGene','NumCatTargetEncodeSingleGene', 'ClusterDistGene']'for regression/binary: '['CvTargetEncodeSingleGene', 'NumToCatTargetEncodeSingleGene','CvCatNumEncodeGene', 'ClusterIDTargetEncodeSingleGene', 'TextLinRegressorGene','CvTargetEncodeMultiGene', 'ClusterDistGene', 'OriginalGene', 'DateGene','ClusterIDTargetEncodeMultiGene', 'NumToCatTargetEncodeMultiGene','NumCatTargetEncodeMultiGene', 'TextLinClassifierGene', 'WeightOfEvidenceGene','FrequentGene', 'TruncSvdNumGene', 'InteractionsGene', 'TextGene','DateTimeGene', 'NumToCatWeightOfEvidenceGene','NumToCatWeightOfEvidenceMonotonicGene', ''NumCatTargetEncodeSingleGene']'This list appears in the experiment logs (search for 'Genes used')e.g. to disable interaction gene, use: excluded_genes ='['InteractionsGene']'.Does not affect transformers used for preprocessing with included_pretransformers. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting excluded_genes", + "output": "excluded genes refers to Exclude specific genes: Exclude list of genes (i.e. genes (built on top of transformers) to not use,independent of the interpretability setting)Some transformers are used by multiple genes, so this allows different control over feature engineeringfor multi-class: '['InteractionsGene', 'WeightOfEvidenceGene','NumToCatTargetEncodeSingleGene', 'OriginalGene', 'TextGene', 'FrequentGene','NumToCatWeightOfEvidenceGene', 'NumToCatWeightOfEvidenceMonotonicGene', 'CvTargetEncodeSingleGene', 'DateGene', 'NumToCatTargetEncodeMultiGene', 'DateTimeGene', 'TextLinRegressorGene', 'ClusterIDTargetEncodeSingleGene','CvCatNumEncodeGene', 'TruncSvdNumGene', 'ClusterIDTargetEncodeMultiGene','NumCatTargetEncodeMultiGene', 'CvTargetEncodeMultiGene', 'TextLinClassifierGene','NumCatTargetEncodeSingleGene', 'ClusterDistGene']'for regression/binary: '['CvTargetEncodeSingleGene', 'NumToCatTargetEncodeSingleGene','CvCatNumEncodeGene', 'ClusterIDTargetEncodeSingleGene', 'TextLinRegressorGene','CvTargetEncodeMultiGene', 'ClusterDistGene', 'OriginalGene', 'DateGene','ClusterIDTargetEncodeMultiGene', 'NumToCatTargetEncodeMultiGene','NumCatTargetEncodeMultiGene', 'TextLinClassifierGene', 'WeightOfEvidenceGene','FrequentGene', 'TruncSvdNumGene', 'InteractionsGene', 'TextGene','DateTimeGene', 'NumToCatWeightOfEvidenceGene','NumToCatWeightOfEvidenceMonotonicGene', ''NumCatTargetEncodeSingleGene']'This list appears in the experiment logs (search for 'Genes used')e.g. to disable interaction gene, use: excluded_genes ='['InteractionsGene']'.Does not affect transformers used for preprocessing with included_pretransformers. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "included_models", + "output": "included models refers to Include specific models: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "included_models", + "output": "included models refers to Include specific models: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "included models", + "output": "included models refers to Include specific models: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Include specific models: ", + "output": "included models refers to Include specific models: " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting included_models", + "output": "included models refers to Include specific models: " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting included_models", + "output": "included models refers to Include specific models: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "excluded_models", + "output": "excluded models refers to Auxiliary to included_models" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "excluded_models", + "output": "excluded models refers to Exclude specific models: Auxiliary to included_models" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "excluded models", + "output": "excluded models refers to Exclude specific models: Auxiliary to included_models" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Exclude specific models: ", + "output": "excluded models refers to Exclude specific models: Auxiliary to included_models" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting excluded_models", + "output": "excluded models refers to Auxiliary to included_models" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting excluded_models", + "output": "excluded models refers to Exclude specific models: Auxiliary to included_models" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "included_scorers", + "output": "included scorers refers to Include specific scorers: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "included_scorers", + "output": "included scorers refers to Include specific scorers: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "included scorers", + "output": "included scorers refers to Include specific scorers: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Include specific scorers: ", + "output": "included scorers refers to Include specific scorers: " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting included_scorers", + "output": "included scorers refers to Include specific scorers: " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting included_scorers", + "output": "included scorers refers to Include specific scorers: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "included_pretransformers", + "output": "included pretransformers refers to Select transformers to be used for preprocessing before other transformers operate.Pre-processing transformers can potentially take any original features and outputarbitrary features, which will then be used by the normal layer of transformerswhose selection is controlled by toml included_transformers or via the GUI\"Include specific transformers\".Notes:1) preprocessing transformers (and all other layers of transformers) are part of the python and (if applicable) mojo scoring packages.2) any BYOR transformer recipe or native DAI transformer can be used as a preprocessing transformer.So, e.g., a preprocessing transformer can do interactions, string concatenations, date extractions as a preprocessing step, and next layer of Date and DateTime transformers will use that as input data.Caveats:1) one cannot currently do a time-series experiment on a time_column that hasn't yet been made (setup of experiment only knows about original data, not transformed) However, one can use a run-time data recipe to (e.g.) convert a float date-time into string date-time, and this will be used by DAIs Date and DateTime transformers as well as auto-detection of time series.2) in order to do a time series experiment with the GUI/client auto-selecting groups, periods, etc. the dataset must have time column and groups prepared ahead of experiment by user or via a one-time data recipe. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "included_pretransformers", + "output": "included pretransformers refers to Include specific preprocessing transformers: Select transformers to be used for preprocessing before other transformers operate.Pre-processing transformers can potentially take any original features and outputarbitrary features, which will then be used by the normal layer of transformerswhose selection is controlled by toml included_transformers or via the GUI\"Include specific transformers\".Notes:1) preprocessing transformers (and all other layers of transformers) are part of the python and (if applicable) mojo scoring packages.2) any BYOR transformer recipe or native DAI transformer can be used as a preprocessing transformer.So, e.g., a preprocessing transformer can do interactions, string concatenations, date extractions as a preprocessing step, and next layer of Date and DateTime transformers will use that as input data.Caveats:1) one cannot currently do a time-series experiment on a time_column that hasn't yet been made (setup of experiment only knows about original data, not transformed) However, one can use a run-time data recipe to (e.g.) convert a float date-time into string date-time, and this will be used by DAIs Date and DateTime transformers as well as auto-detection of time series.2) in order to do a time series experiment with the GUI/client auto-selecting groups, periods, etc. the dataset must have time column and groups prepared ahead of experiment by user or via a one-time data recipe. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "included pretransformers", + "output": "included pretransformers refers to Include specific preprocessing transformers: Select transformers to be used for preprocessing before other transformers operate.Pre-processing transformers can potentially take any original features and outputarbitrary features, which will then be used by the normal layer of transformerswhose selection is controlled by toml included_transformers or via the GUI\"Include specific transformers\".Notes:1) preprocessing transformers (and all other layers of transformers) are part of the python and (if applicable) mojo scoring packages.2) any BYOR transformer recipe or native DAI transformer can be used as a preprocessing transformer.So, e.g., a preprocessing transformer can do interactions, string concatenations, date extractions as a preprocessing step, and next layer of Date and DateTime transformers will use that as input data.Caveats:1) one cannot currently do a time-series experiment on a time_column that hasn't yet been made (setup of experiment only knows about original data, not transformed) However, one can use a run-time data recipe to (e.g.) convert a float date-time into string date-time, and this will be used by DAIs Date and DateTime transformers as well as auto-detection of time series.2) in order to do a time series experiment with the GUI/client auto-selecting groups, periods, etc. the dataset must have time column and groups prepared ahead of experiment by user or via a one-time data recipe. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Include specific preprocessing transformers: ", + "output": "included pretransformers refers to Include specific preprocessing transformers: Select transformers to be used for preprocessing before other transformers operate.Pre-processing transformers can potentially take any original features and outputarbitrary features, which will then be used by the normal layer of transformerswhose selection is controlled by toml included_transformers or via the GUI\"Include specific transformers\".Notes:1) preprocessing transformers (and all other layers of transformers) are part of the python and (if applicable) mojo scoring packages.2) any BYOR transformer recipe or native DAI transformer can be used as a preprocessing transformer.So, e.g., a preprocessing transformer can do interactions, string concatenations, date extractions as a preprocessing step, and next layer of Date and DateTime transformers will use that as input data.Caveats:1) one cannot currently do a time-series experiment on a time_column that hasn't yet been made (setup of experiment only knows about original data, not transformed) However, one can use a run-time data recipe to (e.g.) convert a float date-time into string date-time, and this will be used by DAIs Date and DateTime transformers as well as auto-detection of time series.2) in order to do a time series experiment with the GUI/client auto-selecting groups, periods, etc. the dataset must have time column and groups prepared ahead of experiment by user or via a one-time data recipe. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting included_pretransformers", + "output": "included pretransformers refers to Select transformers to be used for preprocessing before other transformers operate.Pre-processing transformers can potentially take any original features and outputarbitrary features, which will then be used by the normal layer of transformerswhose selection is controlled by toml included_transformers or via the GUI\"Include specific transformers\".Notes:1) preprocessing transformers (and all other layers of transformers) are part of the python and (if applicable) mojo scoring packages.2) any BYOR transformer recipe or native DAI transformer can be used as a preprocessing transformer.So, e.g., a preprocessing transformer can do interactions, string concatenations, date extractions as a preprocessing step, and next layer of Date and DateTime transformers will use that as input data.Caveats:1) one cannot currently do a time-series experiment on a time_column that hasn't yet been made (setup of experiment only knows about original data, not transformed) However, one can use a run-time data recipe to (e.g.) convert a float date-time into string date-time, and this will be used by DAIs Date and DateTime transformers as well as auto-detection of time series.2) in order to do a time series experiment with the GUI/client auto-selecting groups, periods, etc. the dataset must have time column and groups prepared ahead of experiment by user or via a one-time data recipe. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting included_pretransformers", + "output": "included pretransformers refers to Include specific preprocessing transformers: Select transformers to be used for preprocessing before other transformers operate.Pre-processing transformers can potentially take any original features and outputarbitrary features, which will then be used by the normal layer of transformerswhose selection is controlled by toml included_transformers or via the GUI\"Include specific transformers\".Notes:1) preprocessing transformers (and all other layers of transformers) are part of the python and (if applicable) mojo scoring packages.2) any BYOR transformer recipe or native DAI transformer can be used as a preprocessing transformer.So, e.g., a preprocessing transformer can do interactions, string concatenations, date extractions as a preprocessing step, and next layer of Date and DateTime transformers will use that as input data.Caveats:1) one cannot currently do a time-series experiment on a time_column that hasn't yet been made (setup of experiment only knows about original data, not transformed) However, one can use a run-time data recipe to (e.g.) convert a float date-time into string date-time, and this will be used by DAIs Date and DateTime transformers as well as auto-detection of time series.2) in order to do a time series experiment with the GUI/client auto-selecting groups, periods, etc. the dataset must have time column and groups prepared ahead of experiment by user or via a one-time data recipe. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "excluded_pretransformers", + "output": "excluded pretransformers refers to Auxiliary to included_pretransformers" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "excluded_pretransformers", + "output": "excluded pretransformers refers to Exclude specific pretransformers: Auxiliary to included_pretransformers" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "excluded pretransformers", + "output": "excluded pretransformers refers to Exclude specific pretransformers: Auxiliary to included_pretransformers" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Exclude specific pretransformers: ", + "output": "excluded pretransformers refers to Exclude specific pretransformers: Auxiliary to included_pretransformers" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting excluded_pretransformers", + "output": "excluded pretransformers refers to Auxiliary to included_pretransformers" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting excluded_pretransformers", + "output": "excluded pretransformers refers to Exclude specific pretransformers: Auxiliary to included_pretransformers" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "num_pipeline_layers", + "output": "num pipeline layers refers to Number of full pipeline layers (not including preprocessing layer when included_pretransformers is not empty). " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "num_pipeline_layers", + "output": "num pipeline layers refers to Number of pipeline layers: Number of full pipeline layers (not including preprocessing layer when included_pretransformers is not empty). " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "num pipeline layers", + "output": "num pipeline layers refers to Number of pipeline layers: Number of full pipeline layers (not including preprocessing layer when included_pretransformers is not empty). " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Number of pipeline layers: ", + "output": "num pipeline layers refers to Number of pipeline layers: Number of full pipeline layers (not including preprocessing layer when included_pretransformers is not empty). " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting num_pipeline_layers", + "output": "num pipeline layers refers to Number of full pipeline layers (not including preprocessing layer when included_pretransformers is not empty). " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting num_pipeline_layers", + "output": "num pipeline layers refers to Number of pipeline layers: Number of full pipeline layers (not including preprocessing layer when included_pretransformers is not empty). " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "included_datas", + "output": "included datas refers to There are 2 data recipes:1) that adds new dataset or modifies dataset outside experiment by file/url (pre-experiment data recipe)2) that modifies dataset during experiment and python scoring (run-time data recipe)This list applies to the 2nd case. One can use the same data recipe code for either case, but note:A) the 1st case can make any new data, but is not part of scoring package.B) the 2nd case modifies data during the experiment, so needs some original dataset. The recipe can still create all new features, as long as it has same *name* for: target, weight_column, fold_column, time_column, time group columns. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "included_datas", + "output": "included datas refers to Include specific data recipes during experiment: There are 2 data recipes:1) that adds new dataset or modifies dataset outside experiment by file/url (pre-experiment data recipe)2) that modifies dataset during experiment and python scoring (run-time data recipe)This list applies to the 2nd case. One can use the same data recipe code for either case, but note:A) the 1st case can make any new data, but is not part of scoring package.B) the 2nd case modifies data during the experiment, so needs some original dataset. The recipe can still create all new features, as long as it has same *name* for: target, weight_column, fold_column, time_column, time group columns. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "included datas", + "output": "included datas refers to Include specific data recipes during experiment: There are 2 data recipes:1) that adds new dataset or modifies dataset outside experiment by file/url (pre-experiment data recipe)2) that modifies dataset during experiment and python scoring (run-time data recipe)This list applies to the 2nd case. One can use the same data recipe code for either case, but note:A) the 1st case can make any new data, but is not part of scoring package.B) the 2nd case modifies data during the experiment, so needs some original dataset. The recipe can still create all new features, as long as it has same *name* for: target, weight_column, fold_column, time_column, time group columns. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Include specific data recipes during experiment: ", + "output": "included datas refers to Include specific data recipes during experiment: There are 2 data recipes:1) that adds new dataset or modifies dataset outside experiment by file/url (pre-experiment data recipe)2) that modifies dataset during experiment and python scoring (run-time data recipe)This list applies to the 2nd case. One can use the same data recipe code for either case, but note:A) the 1st case can make any new data, but is not part of scoring package.B) the 2nd case modifies data during the experiment, so needs some original dataset. The recipe can still create all new features, as long as it has same *name* for: target, weight_column, fold_column, time_column, time group columns. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting included_datas", + "output": "included datas refers to There are 2 data recipes:1) that adds new dataset or modifies dataset outside experiment by file/url (pre-experiment data recipe)2) that modifies dataset during experiment and python scoring (run-time data recipe)This list applies to the 2nd case. One can use the same data recipe code for either case, but note:A) the 1st case can make any new data, but is not part of scoring package.B) the 2nd case modifies data during the experiment, so needs some original dataset. The recipe can still create all new features, as long as it has same *name* for: target, weight_column, fold_column, time_column, time group columns. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting included_datas", + "output": "included datas refers to Include specific data recipes during experiment: There are 2 data recipes:1) that adds new dataset or modifies dataset outside experiment by file/url (pre-experiment data recipe)2) that modifies dataset during experiment and python scoring (run-time data recipe)This list applies to the 2nd case. One can use the same data recipe code for either case, but note:A) the 1st case can make any new data, but is not part of scoring package.B) the 2nd case modifies data during the experiment, so needs some original dataset. The recipe can still create all new features, as long as it has same *name* for: target, weight_column, fold_column, time_column, time group columns. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "excluded_datas", + "output": "excluded datas refers to Auxiliary to included_datas" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "excluded_datas", + "output": "excluded datas refers to Exclude specific data recipes: Auxiliary to included_datas" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "excluded datas", + "output": "excluded datas refers to Exclude specific data recipes: Auxiliary to included_datas" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Exclude specific data recipes: ", + "output": "excluded datas refers to Exclude specific data recipes: Auxiliary to included_datas" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting excluded_datas", + "output": "excluded datas refers to Auxiliary to included_datas" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting excluded_datas", + "output": "excluded datas refers to Exclude specific data recipes: Auxiliary to included_datas" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "included_individuals", + "output": "included individuals refers to Custom individuals to use in experiment.DAI contains most information about model type, model hyperparameters, data science types for input features, transformers used, and transformer parameters an Individual Recipe (an object that is evolved by mutation within the context of DAI's genetic algorithm).Every completed experiment auto-generates python code for the experiment that corresponds to the individual(s) used to build the final model. This auto-generated python code can be edited offline and uploaded as a recipe, or it can be edited within the custom recipe management editor and saved. This allowed one a code-first access to a significant portion of DAI's internal transformer and model generation.Choices are:* Empty means all individuals are freshly generated and treated by DAI's AutoML as a container of model and transformer choices.* Recipe display names of custom individuals, usually chosen via the UI. If the number of included custom individuals is less than DAI would need, then the remaining individuals are freshly generated.The expert experiment-level option fixed_num_individuals can be used to enforce how many individuals to use in evolution stage.The expert experiment-level option fixed_ensemble_level can be used to enforce how many individuals (each with one base model) will be used in the final model.These individuals act in similar way as the feature brain acts for restart and retrain/refit, and one can retrain/refit custom individuals (i.e. skip the tuning and evolution stages) to use them in building a final model.See toml make_python_code for more details." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "included_individuals", + "output": "included individuals refers to Include specific individuals: Custom individuals to use in experiment.DAI contains most information about model type, model hyperparameters, data science types for input features, transformers used, and transformer parameters an Individual Recipe (an object that is evolved by mutation within the context of DAI's genetic algorithm).Every completed experiment auto-generates python code for the experiment that corresponds to the individual(s) used to build the final model. This auto-generated python code can be edited offline and uploaded as a recipe, or it can be edited within the custom recipe management editor and saved. This allowed one a code-first access to a significant portion of DAI's internal transformer and model generation.Choices are:* Empty means all individuals are freshly generated and treated by DAI's AutoML as a container of model and transformer choices.* Recipe display names of custom individuals, usually chosen via the UI. If the number of included custom individuals is less than DAI would need, then the remaining individuals are freshly generated.The expert experiment-level option fixed_num_individuals can be used to enforce how many individuals to use in evolution stage.The expert experiment-level option fixed_ensemble_level can be used to enforce how many individuals (each with one base model) will be used in the final model.These individuals act in similar way as the feature brain acts for restart and retrain/refit, and one can retrain/refit custom individuals (i.e. skip the tuning and evolution stages) to use them in building a final model.See toml make_python_code for more details." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "included individuals", + "output": "included individuals refers to Include specific individuals: Custom individuals to use in experiment.DAI contains most information about model type, model hyperparameters, data science types for input features, transformers used, and transformer parameters an Individual Recipe (an object that is evolved by mutation within the context of DAI's genetic algorithm).Every completed experiment auto-generates python code for the experiment that corresponds to the individual(s) used to build the final model. This auto-generated python code can be edited offline and uploaded as a recipe, or it can be edited within the custom recipe management editor and saved. This allowed one a code-first access to a significant portion of DAI's internal transformer and model generation.Choices are:* Empty means all individuals are freshly generated and treated by DAI's AutoML as a container of model and transformer choices.* Recipe display names of custom individuals, usually chosen via the UI. If the number of included custom individuals is less than DAI would need, then the remaining individuals are freshly generated.The expert experiment-level option fixed_num_individuals can be used to enforce how many individuals to use in evolution stage.The expert experiment-level option fixed_ensemble_level can be used to enforce how many individuals (each with one base model) will be used in the final model.These individuals act in similar way as the feature brain acts for restart and retrain/refit, and one can retrain/refit custom individuals (i.e. skip the tuning and evolution stages) to use them in building a final model.See toml make_python_code for more details." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Include specific individuals: ", + "output": "included individuals refers to Include specific individuals: Custom individuals to use in experiment.DAI contains most information about model type, model hyperparameters, data science types for input features, transformers used, and transformer parameters an Individual Recipe (an object that is evolved by mutation within the context of DAI's genetic algorithm).Every completed experiment auto-generates python code for the experiment that corresponds to the individual(s) used to build the final model. This auto-generated python code can be edited offline and uploaded as a recipe, or it can be edited within the custom recipe management editor and saved. This allowed one a code-first access to a significant portion of DAI's internal transformer and model generation.Choices are:* Empty means all individuals are freshly generated and treated by DAI's AutoML as a container of model and transformer choices.* Recipe display names of custom individuals, usually chosen via the UI. If the number of included custom individuals is less than DAI would need, then the remaining individuals are freshly generated.The expert experiment-level option fixed_num_individuals can be used to enforce how many individuals to use in evolution stage.The expert experiment-level option fixed_ensemble_level can be used to enforce how many individuals (each with one base model) will be used in the final model.These individuals act in similar way as the feature brain acts for restart and retrain/refit, and one can retrain/refit custom individuals (i.e. skip the tuning and evolution stages) to use them in building a final model.See toml make_python_code for more details." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting included_individuals", + "output": "included individuals refers to Custom individuals to use in experiment.DAI contains most information about model type, model hyperparameters, data science types for input features, transformers used, and transformer parameters an Individual Recipe (an object that is evolved by mutation within the context of DAI's genetic algorithm).Every completed experiment auto-generates python code for the experiment that corresponds to the individual(s) used to build the final model. This auto-generated python code can be edited offline and uploaded as a recipe, or it can be edited within the custom recipe management editor and saved. This allowed one a code-first access to a significant portion of DAI's internal transformer and model generation.Choices are:* Empty means all individuals are freshly generated and treated by DAI's AutoML as a container of model and transformer choices.* Recipe display names of custom individuals, usually chosen via the UI. If the number of included custom individuals is less than DAI would need, then the remaining individuals are freshly generated.The expert experiment-level option fixed_num_individuals can be used to enforce how many individuals to use in evolution stage.The expert experiment-level option fixed_ensemble_level can be used to enforce how many individuals (each with one base model) will be used in the final model.These individuals act in similar way as the feature brain acts for restart and retrain/refit, and one can retrain/refit custom individuals (i.e. skip the tuning and evolution stages) to use them in building a final model.See toml make_python_code for more details." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting included_individuals", + "output": "included individuals refers to Include specific individuals: Custom individuals to use in experiment.DAI contains most information about model type, model hyperparameters, data science types for input features, transformers used, and transformer parameters an Individual Recipe (an object that is evolved by mutation within the context of DAI's genetic algorithm).Every completed experiment auto-generates python code for the experiment that corresponds to the individual(s) used to build the final model. This auto-generated python code can be edited offline and uploaded as a recipe, or it can be edited within the custom recipe management editor and saved. This allowed one a code-first access to a significant portion of DAI's internal transformer and model generation.Choices are:* Empty means all individuals are freshly generated and treated by DAI's AutoML as a container of model and transformer choices.* Recipe display names of custom individuals, usually chosen via the UI. If the number of included custom individuals is less than DAI would need, then the remaining individuals are freshly generated.The expert experiment-level option fixed_num_individuals can be used to enforce how many individuals to use in evolution stage.The expert experiment-level option fixed_ensemble_level can be used to enforce how many individuals (each with one base model) will be used in the final model.These individuals act in similar way as the feature brain acts for restart and retrain/refit, and one can retrain/refit custom individuals (i.e. skip the tuning and evolution stages) to use them in building a final model.See toml make_python_code for more details." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "excluded_individuals", + "output": "excluded individuals refers to Auxiliary to included_individuals" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "excluded_individuals", + "output": "excluded individuals refers to Exclude specific individual recipes: Auxiliary to included_individuals" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "excluded individuals", + "output": "excluded individuals refers to Exclude specific individual recipes: Auxiliary to included_individuals" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Exclude specific individual recipes: ", + "output": "excluded individuals refers to Exclude specific individual recipes: Auxiliary to included_individuals" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting excluded_individuals", + "output": "excluded individuals refers to Auxiliary to included_individuals" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting excluded_individuals", + "output": "excluded individuals refers to Exclude specific individual recipes: Auxiliary to included_individuals" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "make_python_code", + "output": "make python code refers to Whether to generate python code for the best individuals for the experiment.This python code contains a CustomIndividual class that is a recipe that can be edited and customized. The CustomIndividual class itself can also be customized for expert use.By default, 'auto' means on.At the end of an experiment, the summary zip contains auto-generated python code for the individuals used in the experiment, including the last best population (best_population_indivXX.py where XX iterates the population), last best individual (best_individual.py), final base models (final_indivYY.py where YY iterates the final base models).The summary zip also contains an example_indiv.py file that generates other transformers that may be useful that did not happen to be used in the experiment.In addition, the GUI and python client allow one to generate custom individuals from an aborted or finished experiment.For finished experiments, this will provide a zip file containing the final_indivYY.py files, and for aborted experiments this will contain the best population and best individual files.See included_individuals for more details." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "make_python_code", + "output": "make python code refers to Generate python code for individual: Whether to generate python code for the best individuals for the experiment.This python code contains a CustomIndividual class that is a recipe that can be edited and customized. The CustomIndividual class itself can also be customized for expert use.By default, 'auto' means on.At the end of an experiment, the summary zip contains auto-generated python code for the individuals used in the experiment, including the last best population (best_population_indivXX.py where XX iterates the population), last best individual (best_individual.py), final base models (final_indivYY.py where YY iterates the final base models).The summary zip also contains an example_indiv.py file that generates other transformers that may be useful that did not happen to be used in the experiment.In addition, the GUI and python client allow one to generate custom individuals from an aborted or finished experiment.For finished experiments, this will provide a zip file containing the final_indivYY.py files, and for aborted experiments this will contain the best population and best individual files.See included_individuals for more details." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "make python code", + "output": "make python code refers to Generate python code for individual: Whether to generate python code for the best individuals for the experiment.This python code contains a CustomIndividual class that is a recipe that can be edited and customized. The CustomIndividual class itself can also be customized for expert use.By default, 'auto' means on.At the end of an experiment, the summary zip contains auto-generated python code for the individuals used in the experiment, including the last best population (best_population_indivXX.py where XX iterates the population), last best individual (best_individual.py), final base models (final_indivYY.py where YY iterates the final base models).The summary zip also contains an example_indiv.py file that generates other transformers that may be useful that did not happen to be used in the experiment.In addition, the GUI and python client allow one to generate custom individuals from an aborted or finished experiment.For finished experiments, this will provide a zip file containing the final_indivYY.py files, and for aborted experiments this will contain the best population and best individual files.See included_individuals for more details." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Generate python code for individual: ", + "output": "make python code refers to Generate python code for individual: Whether to generate python code for the best individuals for the experiment.This python code contains a CustomIndividual class that is a recipe that can be edited and customized. The CustomIndividual class itself can also be customized for expert use.By default, 'auto' means on.At the end of an experiment, the summary zip contains auto-generated python code for the individuals used in the experiment, including the last best population (best_population_indivXX.py where XX iterates the population), last best individual (best_individual.py), final base models (final_indivYY.py where YY iterates the final base models).The summary zip also contains an example_indiv.py file that generates other transformers that may be useful that did not happen to be used in the experiment.In addition, the GUI and python client allow one to generate custom individuals from an aborted or finished experiment.For finished experiments, this will provide a zip file containing the final_indivYY.py files, and for aborted experiments this will contain the best population and best individual files.See included_individuals for more details." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting make_python_code", + "output": "make python code refers to Whether to generate python code for the best individuals for the experiment.This python code contains a CustomIndividual class that is a recipe that can be edited and customized. The CustomIndividual class itself can also be customized for expert use.By default, 'auto' means on.At the end of an experiment, the summary zip contains auto-generated python code for the individuals used in the experiment, including the last best population (best_population_indivXX.py where XX iterates the population), last best individual (best_individual.py), final base models (final_indivYY.py where YY iterates the final base models).The summary zip also contains an example_indiv.py file that generates other transformers that may be useful that did not happen to be used in the experiment.In addition, the GUI and python client allow one to generate custom individuals from an aborted or finished experiment.For finished experiments, this will provide a zip file containing the final_indivYY.py files, and for aborted experiments this will contain the best population and best individual files.See included_individuals for more details." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting make_python_code", + "output": "make python code refers to Generate python code for individual: Whether to generate python code for the best individuals for the experiment.This python code contains a CustomIndividual class that is a recipe that can be edited and customized. The CustomIndividual class itself can also be customized for expert use.By default, 'auto' means on.At the end of an experiment, the summary zip contains auto-generated python code for the individuals used in the experiment, including the last best population (best_population_indivXX.py where XX iterates the population), last best individual (best_individual.py), final base models (final_indivYY.py where YY iterates the final base models).The summary zip also contains an example_indiv.py file that generates other transformers that may be useful that did not happen to be used in the experiment.In addition, the GUI and python client allow one to generate custom individuals from an aborted or finished experiment.For finished experiments, this will provide a zip file containing the final_indivYY.py files, and for aborted experiments this will contain the best population and best individual files.See included_individuals for more details." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "make_json_code", + "output": "make json code refers to Whether to generate json code for the best individuals for the experiment. This python code contains the essential attributes from the internal DAI individual class. Reading the json code as a recipe is not supported. By default, 'auto' means off. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "make_json_code", + "output": "make json code refers to Generate json code for individual: Whether to generate json code for the best individuals for the experiment. This python code contains the essential attributes from the internal DAI individual class. Reading the json code as a recipe is not supported. By default, 'auto' means off. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "make json code", + "output": "make json code refers to Generate json code for individual: Whether to generate json code for the best individuals for the experiment. This python code contains the essential attributes from the internal DAI individual class. Reading the json code as a recipe is not supported. By default, 'auto' means off. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Generate json code for individual: ", + "output": "make json code refers to Generate json code for individual: Whether to generate json code for the best individuals for the experiment. This python code contains the essential attributes from the internal DAI individual class. Reading the json code as a recipe is not supported. By default, 'auto' means off. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting make_json_code", + "output": "make json code refers to Whether to generate json code for the best individuals for the experiment. This python code contains the essential attributes from the internal DAI individual class. Reading the json code as a recipe is not supported. By default, 'auto' means off. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting make_json_code", + "output": "make json code refers to Generate json code for individual: Whether to generate json code for the best individuals for the experiment. This python code contains the essential attributes from the internal DAI individual class. Reading the json code as a recipe is not supported. By default, 'auto' means off. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "python_code_ngenes_max", + "output": "python code ngenes max refers to Maximum number of genes to make for example auto-generated custom individual, called example_indiv.py in the summary zip file. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "python_code_ngenes_max", + "output": "python code ngenes max refers to Max. Num. genes for example auto-generated individual: Maximum number of genes to make for example auto-generated custom individual, called example_indiv.py in the summary zip file. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "python code ngenes max", + "output": "python code ngenes max refers to Max. Num. genes for example auto-generated individual: Maximum number of genes to make for example auto-generated custom individual, called example_indiv.py in the summary zip file. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Max. Num. genes for example auto-generated individual: ", + "output": "python code ngenes max refers to Max. Num. genes for example auto-generated individual: Maximum number of genes to make for example auto-generated custom individual, called example_indiv.py in the summary zip file. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting python_code_ngenes_max", + "output": "python code ngenes max refers to Maximum number of genes to make for example auto-generated custom individual, called example_indiv.py in the summary zip file. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting python_code_ngenes_max", + "output": "python code ngenes max refers to Max. Num. genes for example auto-generated individual: Maximum number of genes to make for example auto-generated custom individual, called example_indiv.py in the summary zip file. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "python_code_ngenes_min", + "output": "python code ngenes min refers to Minimum number of genes to make for example auto-generated custom individual, called example_indiv.py in the summary zip file. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "python_code_ngenes_min", + "output": "python code ngenes min refers to Min. Num. genes for example auto-generated individual: Minimum number of genes to make for example auto-generated custom individual, called example_indiv.py in the summary zip file. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "python code ngenes min", + "output": "python code ngenes min refers to Min. Num. genes for example auto-generated individual: Minimum number of genes to make for example auto-generated custom individual, called example_indiv.py in the summary zip file. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Min. Num. genes for example auto-generated individual: ", + "output": "python code ngenes min refers to Min. Num. genes for example auto-generated individual: Minimum number of genes to make for example auto-generated custom individual, called example_indiv.py in the summary zip file. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting python_code_ngenes_min", + "output": "python code ngenes min refers to Minimum number of genes to make for example auto-generated custom individual, called example_indiv.py in the summary zip file. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting python_code_ngenes_min", + "output": "python code ngenes min refers to Min. Num. genes for example auto-generated individual: Minimum number of genes to make for example auto-generated custom individual, called example_indiv.py in the summary zip file. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "threshold_scorer", + "output": "threshold scorer refers to Select the scorer to optimize the binary probability threshold that is being used in related Confusion Matrix based scorers that are trivial to optimize otherwise: Precision, Recall, FalsePositiveRate, FalseDiscoveryRate, FalseOmissionRate, TrueNegativeRate, FalseNegativeRate, NegativePredictiveValue. Use F1 if the target class matters more, and MCC if all classes are equally important. AUTO will try to sync the threshold scorer with the scorer used for the experiment, otherwise falls back to F1. The optimized threshold is also used for creating labels in addition to probabilities in MOJO/Python scorers." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "threshold_scorer", + "output": "threshold scorer refers to For binary classification only: Scorer to optimize threshold to be used in confusion-matrix based scorers that are trivial to optimize and for label creation in MOJO/Python scorers.: Select the scorer to optimize the binary probability threshold that is being used in related Confusion Matrix based scorers that are trivial to optimize otherwise: Precision, Recall, FalsePositiveRate, FalseDiscoveryRate, FalseOmissionRate, TrueNegativeRate, FalseNegativeRate, NegativePredictiveValue. Use F1 if the target class matters more, and MCC if all classes are equally important. AUTO will try to sync the threshold scorer with the scorer used for the experiment, otherwise falls back to F1. The optimized threshold is also used for creating labels in addition to probabilities in MOJO/Python scorers." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "threshold scorer", + "output": "threshold scorer refers to For binary classification only: Scorer to optimize threshold to be used in confusion-matrix based scorers that are trivial to optimize and for label creation in MOJO/Python scorers.: Select the scorer to optimize the binary probability threshold that is being used in related Confusion Matrix based scorers that are trivial to optimize otherwise: Precision, Recall, FalsePositiveRate, FalseDiscoveryRate, FalseOmissionRate, TrueNegativeRate, FalseNegativeRate, NegativePredictiveValue. Use F1 if the target class matters more, and MCC if all classes are equally important. AUTO will try to sync the threshold scorer with the scorer used for the experiment, otherwise falls back to F1. The optimized threshold is also used for creating labels in addition to probabilities in MOJO/Python scorers." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "For binary classification only: Scorer to optimize threshold to be used in confusion-matrix based scorers that are trivial to optimize and for label creation in MOJO/Python scorers.: ", + "output": "threshold scorer refers to For binary classification only: Scorer to optimize threshold to be used in confusion-matrix based scorers that are trivial to optimize and for label creation in MOJO/Python scorers.: Select the scorer to optimize the binary probability threshold that is being used in related Confusion Matrix based scorers that are trivial to optimize otherwise: Precision, Recall, FalsePositiveRate, FalseDiscoveryRate, FalseOmissionRate, TrueNegativeRate, FalseNegativeRate, NegativePredictiveValue. Use F1 if the target class matters more, and MCC if all classes are equally important. AUTO will try to sync the threshold scorer with the scorer used for the experiment, otherwise falls back to F1. The optimized threshold is also used for creating labels in addition to probabilities in MOJO/Python scorers." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting threshold_scorer", + "output": "threshold scorer refers to Select the scorer to optimize the binary probability threshold that is being used in related Confusion Matrix based scorers that are trivial to optimize otherwise: Precision, Recall, FalsePositiveRate, FalseDiscoveryRate, FalseOmissionRate, TrueNegativeRate, FalseNegativeRate, NegativePredictiveValue. Use F1 if the target class matters more, and MCC if all classes are equally important. AUTO will try to sync the threshold scorer with the scorer used for the experiment, otherwise falls back to F1. The optimized threshold is also used for creating labels in addition to probabilities in MOJO/Python scorers." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting threshold_scorer", + "output": "threshold scorer refers to For binary classification only: Scorer to optimize threshold to be used in confusion-matrix based scorers that are trivial to optimize and for label creation in MOJO/Python scorers.: Select the scorer to optimize the binary probability threshold that is being used in related Confusion Matrix based scorers that are trivial to optimize otherwise: Precision, Recall, FalsePositiveRate, FalseDiscoveryRate, FalseOmissionRate, TrueNegativeRate, FalseNegativeRate, NegativePredictiveValue. Use F1 if the target class matters more, and MCC if all classes are equally important. AUTO will try to sync the threshold scorer with the scorer used for the experiment, otherwise falls back to F1. The optimized threshold is also used for creating labels in addition to probabilities in MOJO/Python scorers." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "excluded_scorers", + "output": "excluded scorers refers to Auxiliary to included_scorers" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "excluded_scorers", + "output": "excluded scorers refers to Exclude specific scorers: Auxiliary to included_scorers" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "excluded scorers", + "output": "excluded scorers refers to Exclude specific scorers: Auxiliary to included_scorers" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Exclude specific scorers: ", + "output": "excluded scorers refers to Exclude specific scorers: Auxiliary to included_scorers" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting excluded_scorers", + "output": "excluded scorers refers to Auxiliary to included_scorers" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting excluded_scorers", + "output": "excluded scorers refers to Exclude specific scorers: Auxiliary to included_scorers" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable_constant_model", + "output": "enable constant model refers to Whether to enable constant models ('auto'/'on'/'off')" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable_constant_model", + "output": "enable constant model refers to Constant models: Whether to enable constant models ('auto'/'on'/'off')" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable constant model", + "output": "enable constant model refers to Constant models: Whether to enable constant models ('auto'/'on'/'off')" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Constant models: ", + "output": "enable constant model refers to Constant models: Whether to enable constant models ('auto'/'on'/'off')" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting enable_constant_model", + "output": "enable constant model refers to Whether to enable constant models ('auto'/'on'/'off')" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting enable_constant_model", + "output": "enable constant model refers to Constant models: Whether to enable constant models ('auto'/'on'/'off')" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable_decision_tree", + "output": "enable decision tree refers to Whether to enable Decision Tree models ('auto'/'on'/'off'). 'auto' disables decision tree unless only non-constant model chosen." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable_decision_tree", + "output": "enable decision tree refers to Decision Tree models: Whether to enable Decision Tree models ('auto'/'on'/'off'). 'auto' disables decision tree unless only non-constant model chosen." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable decision tree", + "output": "enable decision tree refers to Decision Tree models: Whether to enable Decision Tree models ('auto'/'on'/'off'). 'auto' disables decision tree unless only non-constant model chosen." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Decision Tree models: ", + "output": "enable decision tree refers to Decision Tree models: Whether to enable Decision Tree models ('auto'/'on'/'off'). 'auto' disables decision tree unless only non-constant model chosen." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting enable_decision_tree", + "output": "enable decision tree refers to Whether to enable Decision Tree models ('auto'/'on'/'off'). 'auto' disables decision tree unless only non-constant model chosen." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting enable_decision_tree", + "output": "enable decision tree refers to Decision Tree models: Whether to enable Decision Tree models ('auto'/'on'/'off'). 'auto' disables decision tree unless only non-constant model chosen." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable_glm", + "output": "enable glm refers to Whether to enable GLM models ('auto'/'on'/'off')" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable_glm", + "output": "enable glm refers to GLM models: Whether to enable GLM models ('auto'/'on'/'off')" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable glm", + "output": "enable glm refers to GLM models: Whether to enable GLM models ('auto'/'on'/'off')" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "GLM models: ", + "output": "enable glm refers to GLM models: Whether to enable GLM models ('auto'/'on'/'off')" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting enable_glm", + "output": "enable glm refers to Whether to enable GLM models ('auto'/'on'/'off')" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting enable_glm", + "output": "enable glm refers to GLM models: Whether to enable GLM models ('auto'/'on'/'off')" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable_glm_rapids", + "output": "enable glm rapids refers to Whether to enable RAPIDS extensions to GLM models (not available until fixes are in xgboost 1.3.0)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable_glm_rapids", + "output": "enable glm rapids refers to Enable RAPIDS-cudf extensions to GLM: Whether to enable RAPIDS extensions to GLM models (not available until fixes are in xgboost 1.3.0)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable glm rapids", + "output": "enable glm rapids refers to Enable RAPIDS-cudf extensions to GLM: Whether to enable RAPIDS extensions to GLM models (not available until fixes are in xgboost 1.3.0)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Enable RAPIDS-cudf extensions to GLM: ", + "output": "enable glm rapids refers to Enable RAPIDS-cudf extensions to GLM: Whether to enable RAPIDS extensions to GLM models (not available until fixes are in xgboost 1.3.0)" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting enable_glm_rapids", + "output": "enable glm rapids refers to Whether to enable RAPIDS extensions to GLM models (not available until fixes are in xgboost 1.3.0)" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting enable_glm_rapids", + "output": "enable glm rapids refers to Enable RAPIDS-cudf extensions to GLM: Whether to enable RAPIDS extensions to GLM models (not available until fixes are in xgboost 1.3.0)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable_xgboost_gbm", + "output": "enable xgboost gbm refers to Whether to enable XGBoost GBM models ('auto'/'on'/'off')" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable_xgboost_gbm", + "output": "enable xgboost gbm refers to XGBoost GBM models: Whether to enable XGBoost GBM models ('auto'/'on'/'off')" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable xgboost gbm", + "output": "enable xgboost gbm refers to XGBoost GBM models: Whether to enable XGBoost GBM models ('auto'/'on'/'off')" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "XGBoost GBM models: ", + "output": "enable xgboost gbm refers to XGBoost GBM models: Whether to enable XGBoost GBM models ('auto'/'on'/'off')" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting enable_xgboost_gbm", + "output": "enable xgboost gbm refers to Whether to enable XGBoost GBM models ('auto'/'on'/'off')" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting enable_xgboost_gbm", + "output": "enable xgboost gbm refers to XGBoost GBM models: Whether to enable XGBoost GBM models ('auto'/'on'/'off')" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable_lightgbm", + "output": "enable lightgbm refers to Whether to enable LightGBM models ('auto'/'on'/'off')" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable_lightgbm", + "output": "enable lightgbm refers to LightGBM models: Whether to enable LightGBM models ('auto'/'on'/'off')" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable lightgbm", + "output": "enable lightgbm refers to LightGBM models: Whether to enable LightGBM models ('auto'/'on'/'off')" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "LightGBM models: ", + "output": "enable lightgbm refers to LightGBM models: Whether to enable LightGBM models ('auto'/'on'/'off')" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting enable_lightgbm", + "output": "enable lightgbm refers to Whether to enable LightGBM models ('auto'/'on'/'off')" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting enable_lightgbm", + "output": "enable lightgbm refers to LightGBM models: Whether to enable LightGBM models ('auto'/'on'/'off')" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable_tensorflow", + "output": "enable tensorflow refers to Whether to enable TensorFlow models ('auto'/'on'/'off')" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable_tensorflow", + "output": "enable tensorflow refers to TensorFlow models: Whether to enable TensorFlow models ('auto'/'on'/'off')" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable tensorflow", + "output": "enable tensorflow refers to TensorFlow models: Whether to enable TensorFlow models ('auto'/'on'/'off')" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "TensorFlow models: ", + "output": "enable tensorflow refers to TensorFlow models: Whether to enable TensorFlow models ('auto'/'on'/'off')" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting enable_tensorflow", + "output": "enable tensorflow refers to Whether to enable TensorFlow models ('auto'/'on'/'off')" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting enable_tensorflow", + "output": "enable tensorflow refers to TensorFlow models: Whether to enable TensorFlow models ('auto'/'on'/'off')" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable_grownet", + "output": "enable grownet refers to Whether to enable PyTorch-based GrowNet models ('auto'/'on'/'off')" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable_grownet", + "output": "enable grownet refers to PyTorch GrowNet models: Whether to enable PyTorch-based GrowNet models ('auto'/'on'/'off')" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable grownet", + "output": "enable grownet refers to PyTorch GrowNet models: Whether to enable PyTorch-based GrowNet models ('auto'/'on'/'off')" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "PyTorch GrowNet models: ", + "output": "enable grownet refers to PyTorch GrowNet models: Whether to enable PyTorch-based GrowNet models ('auto'/'on'/'off')" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting enable_grownet", + "output": "enable grownet refers to Whether to enable PyTorch-based GrowNet models ('auto'/'on'/'off')" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting enable_grownet", + "output": "enable grownet refers to PyTorch GrowNet models: Whether to enable PyTorch-based GrowNet models ('auto'/'on'/'off')" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable_ftrl", + "output": "enable ftrl refers to Whether to enable FTRL support (follow the regularized leader) model ('auto'/'on'/'off')" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable_ftrl", + "output": "enable ftrl refers to FTRL models: Whether to enable FTRL support (follow the regularized leader) model ('auto'/'on'/'off')" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable ftrl", + "output": "enable ftrl refers to FTRL models: Whether to enable FTRL support (follow the regularized leader) model ('auto'/'on'/'off')" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "FTRL models: ", + "output": "enable ftrl refers to FTRL models: Whether to enable FTRL support (follow the regularized leader) model ('auto'/'on'/'off')" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting enable_ftrl", + "output": "enable ftrl refers to Whether to enable FTRL support (follow the regularized leader) model ('auto'/'on'/'off')" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting enable_ftrl", + "output": "enable ftrl refers to FTRL models: Whether to enable FTRL support (follow the regularized leader) model ('auto'/'on'/'off')" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable_rulefit", + "output": "enable rulefit refers to Whether to enable RuleFit support (beta version, no mojo) ('auto'/'on'/'off')" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable_rulefit", + "output": "enable rulefit refers to RuleFit models: Whether to enable RuleFit support (beta version, no mojo) ('auto'/'on'/'off')" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable rulefit", + "output": "enable rulefit refers to RuleFit models: Whether to enable RuleFit support (beta version, no mojo) ('auto'/'on'/'off')" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "RuleFit models: ", + "output": "enable rulefit refers to RuleFit models: Whether to enable RuleFit support (beta version, no mojo) ('auto'/'on'/'off')" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting enable_rulefit", + "output": "enable rulefit refers to Whether to enable RuleFit support (beta version, no mojo) ('auto'/'on'/'off')" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting enable_rulefit", + "output": "enable rulefit refers to RuleFit models: Whether to enable RuleFit support (beta version, no mojo) ('auto'/'on'/'off')" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable_zero_inflated_models", + "output": "enable zero inflated models refers to Whether to enable automatic addition of zero-inflated models for regression problems with zero-inflated target values that meet certain conditions: y >= 0, y.std() > y.mean()" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable_zero_inflated_models", + "output": "enable zero inflated models refers to Zero-Inflated models: Whether to enable automatic addition of zero-inflated models for regression problems with zero-inflated target values that meet certain conditions: y >= 0, y.std() > y.mean()" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable zero inflated models", + "output": "enable zero inflated models refers to Zero-Inflated models: Whether to enable automatic addition of zero-inflated models for regression problems with zero-inflated target values that meet certain conditions: y >= 0, y.std() > y.mean()" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Zero-Inflated models: ", + "output": "enable zero inflated models refers to Zero-Inflated models: Whether to enable automatic addition of zero-inflated models for regression problems with zero-inflated target values that meet certain conditions: y >= 0, y.std() > y.mean()" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting enable_zero_inflated_models", + "output": "enable zero inflated models refers to Whether to enable automatic addition of zero-inflated models for regression problems with zero-inflated target values that meet certain conditions: y >= 0, y.std() > y.mean()" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting enable_zero_inflated_models", + "output": "enable zero inflated models refers to Zero-Inflated models: Whether to enable automatic addition of zero-inflated models for regression problems with zero-inflated target values that meet certain conditions: y >= 0, y.std() > y.mean()" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable_xgboost_rapids", + "output": "enable xgboost rapids refers to Whether to enable RAPIDS extensions to XGBoost GBM/Dart. If selected, python scoring package can only be used on GPU system." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable_xgboost_rapids", + "output": "enable xgboost rapids refers to Enable RAPIDS-cudf extensions to XGBoost GBM/Dart: Whether to enable RAPIDS extensions to XGBoost GBM/Dart. If selected, python scoring package can only be used on GPU system." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable xgboost rapids", + "output": "enable xgboost rapids refers to Enable RAPIDS-cudf extensions to XGBoost GBM/Dart: Whether to enable RAPIDS extensions to XGBoost GBM/Dart. If selected, python scoring package can only be used on GPU system." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Enable RAPIDS-cudf extensions to XGBoost GBM/Dart: ", + "output": "enable xgboost rapids refers to Enable RAPIDS-cudf extensions to XGBoost GBM/Dart: Whether to enable RAPIDS extensions to XGBoost GBM/Dart. If selected, python scoring package can only be used on GPU system." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting enable_xgboost_rapids", + "output": "enable xgboost rapids refers to Whether to enable RAPIDS extensions to XGBoost GBM/Dart. If selected, python scoring package can only be used on GPU system." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting enable_xgboost_rapids", + "output": "enable xgboost rapids refers to Enable RAPIDS-cudf extensions to XGBoost GBM/Dart: Whether to enable RAPIDS extensions to XGBoost GBM/Dart. If selected, python scoring package can only be used on GPU system." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable_rapids_cuml_models", + "output": "enable rapids cuml models refers to Whether to enable GPU-based RAPIDS CUML models.No mojo support, but python scoring is supported.In alpha testing status. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable_rapids_cuml_models", + "output": "enable rapids cuml models refers to Whether to enable RAPIDS CUML GPU models (no mojo): Whether to enable GPU-based RAPIDS CUML models.No mojo support, but python scoring is supported.In alpha testing status. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable rapids cuml models", + "output": "enable rapids cuml models refers to Whether to enable RAPIDS CUML GPU models (no mojo): Whether to enable GPU-based RAPIDS CUML models.No mojo support, but python scoring is supported.In alpha testing status. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Whether to enable RAPIDS CUML GPU models (no mojo): ", + "output": "enable rapids cuml models refers to Whether to enable RAPIDS CUML GPU models (no mojo): Whether to enable GPU-based RAPIDS CUML models.No mojo support, but python scoring is supported.In alpha testing status. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting enable_rapids_cuml_models", + "output": "enable rapids cuml models refers to Whether to enable GPU-based RAPIDS CUML models.No mojo support, but python scoring is supported.In alpha testing status. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting enable_rapids_cuml_models", + "output": "enable rapids cuml models refers to Whether to enable RAPIDS CUML GPU models (no mojo): Whether to enable GPU-based RAPIDS CUML models.No mojo support, but python scoring is supported.In alpha testing status. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable_rapids_models_dask", + "output": "enable rapids models dask refers to Whether to enable Multi-GPU mode for capable RAPIDS CUML models.No mojo support, but python scoring is supported.In alpha testing status. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable_rapids_models_dask", + "output": "enable rapids models dask refers to Whether to enable RAPIDS CUML GPU models to use Dask (no mojo): Whether to enable Multi-GPU mode for capable RAPIDS CUML models.No mojo support, but python scoring is supported.In alpha testing status. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable rapids models dask", + "output": "enable rapids models dask refers to Whether to enable RAPIDS CUML GPU models to use Dask (no mojo): Whether to enable Multi-GPU mode for capable RAPIDS CUML models.No mojo support, but python scoring is supported.In alpha testing status. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Whether to enable RAPIDS CUML GPU models to use Dask (no mojo): ", + "output": "enable rapids models dask refers to Whether to enable RAPIDS CUML GPU models to use Dask (no mojo): Whether to enable Multi-GPU mode for capable RAPIDS CUML models.No mojo support, but python scoring is supported.In alpha testing status. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting enable_rapids_models_dask", + "output": "enable rapids models dask refers to Whether to enable Multi-GPU mode for capable RAPIDS CUML models.No mojo support, but python scoring is supported.In alpha testing status. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting enable_rapids_models_dask", + "output": "enable rapids models dask refers to Whether to enable RAPIDS CUML GPU models to use Dask (no mojo): Whether to enable Multi-GPU mode for capable RAPIDS CUML models.No mojo support, but python scoring is supported.In alpha testing status. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "use_dask_for_1_gpu", + "output": "use dask for 1 gpu refers to Whether to use dask_cudf even for 1 GPU. If False, will use plain cudf." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "use_dask_for_1_gpu", + "output": "use dask for 1 gpu refers to Whether to use dask_cudf even for 1 GPU. If False, will use plain cudf." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "use dask for 1 gpu", + "output": "use dask for 1 gpu refers to Whether to use dask_cudf even for 1 GPU. If False, will use plain cudf." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "use dask for 1 gpu refers to Whether to use dask_cudf even for 1 GPU. If False, will use plain cudf." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting use_dask_for_1_gpu", + "output": "use dask for 1 gpu refers to Whether to use dask_cudf even for 1 GPU. If False, will use plain cudf." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting use_dask_for_1_gpu", + "output": "use dask for 1 gpu refers to Whether to use dask_cudf even for 1 GPU. If False, will use plain cudf." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "dask_retrials_allreduce_empty_issue", + "output": "dask retrials allreduce empty issue refers to Number of retrials for dask fit to protect against known xgboost issues https://github.com/dmlc/xgboost/issues/6272 https://github.com/dmlc/xgboost/issues/6551" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "dask_retrials_allreduce_empty_issue", + "output": "dask retrials allreduce empty issue refers to Number of retrials for dask fit to protect against known xgboost issues https://github.com/dmlc/xgboost/issues/6272 https://github.com/dmlc/xgboost/issues/6551" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "dask retrials allreduce empty issue", + "output": "dask retrials allreduce empty issue refers to Number of retrials for dask fit to protect against known xgboost issues https://github.com/dmlc/xgboost/issues/6272 https://github.com/dmlc/xgboost/issues/6551" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "dask retrials allreduce empty issue refers to Number of retrials for dask fit to protect against known xgboost issues https://github.com/dmlc/xgboost/issues/6272 https://github.com/dmlc/xgboost/issues/6551" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting dask_retrials_allreduce_empty_issue", + "output": "dask retrials allreduce empty issue refers to Number of retrials for dask fit to protect against known xgboost issues https://github.com/dmlc/xgboost/issues/6272 https://github.com/dmlc/xgboost/issues/6551" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting dask_retrials_allreduce_empty_issue", + "output": "dask retrials allreduce empty issue refers to Number of retrials for dask fit to protect against known xgboost issues https://github.com/dmlc/xgboost/issues/6272 https://github.com/dmlc/xgboost/issues/6551" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable_xgboost_rf", + "output": "enable xgboost rf refers to Whether to enable XGBoost RF mode without early stopping. Disabled unless switched on. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable_xgboost_rf", + "output": "enable xgboost rf refers to Enable XGBoost RF mode: Whether to enable XGBoost RF mode without early stopping. Disabled unless switched on. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable xgboost rf", + "output": "enable xgboost rf refers to Enable XGBoost RF mode: Whether to enable XGBoost RF mode without early stopping. Disabled unless switched on. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Enable XGBoost RF mode: ", + "output": "enable xgboost rf refers to Enable XGBoost RF mode: Whether to enable XGBoost RF mode without early stopping. Disabled unless switched on. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting enable_xgboost_rf", + "output": "enable xgboost rf refers to Whether to enable XGBoost RF mode without early stopping. Disabled unless switched on. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting enable_xgboost_rf", + "output": "enable xgboost rf refers to Enable XGBoost RF mode: Whether to enable XGBoost RF mode without early stopping. Disabled unless switched on. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable_xgboost_gbm_dask", + "output": "enable xgboost gbm dask refers to Whether to enable dask_cudf (multi-GPU) version of XGBoost GBM/RF. Disabled unless switched on. Only applicable for single final model without early stopping. No Shapley possible. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable_xgboost_gbm_dask", + "output": "enable xgboost gbm dask refers to Enable dask_cudf (multi-GPU) XGBoost GBM/RF: Whether to enable dask_cudf (multi-GPU) version of XGBoost GBM/RF. Disabled unless switched on. Only applicable for single final model without early stopping. No Shapley possible. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable xgboost gbm dask", + "output": "enable xgboost gbm dask refers to Enable dask_cudf (multi-GPU) XGBoost GBM/RF: Whether to enable dask_cudf (multi-GPU) version of XGBoost GBM/RF. Disabled unless switched on. Only applicable for single final model without early stopping. No Shapley possible. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Enable dask_cudf (multi-GPU) XGBoost GBM/RF: ", + "output": "enable xgboost gbm dask refers to Enable dask_cudf (multi-GPU) XGBoost GBM/RF: Whether to enable dask_cudf (multi-GPU) version of XGBoost GBM/RF. Disabled unless switched on. Only applicable for single final model without early stopping. No Shapley possible. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting enable_xgboost_gbm_dask", + "output": "enable xgboost gbm dask refers to Whether to enable dask_cudf (multi-GPU) version of XGBoost GBM/RF. Disabled unless switched on. Only applicable for single final model without early stopping. No Shapley possible. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting enable_xgboost_gbm_dask", + "output": "enable xgboost gbm dask refers to Enable dask_cudf (multi-GPU) XGBoost GBM/RF: Whether to enable dask_cudf (multi-GPU) version of XGBoost GBM/RF. Disabled unless switched on. Only applicable for single final model without early stopping. No Shapley possible. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable_lightgbm_dask", + "output": "enable lightgbm dask refers to Whether to enable multi-node LightGBM. Disabled unless switched on. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable_lightgbm_dask", + "output": "enable lightgbm dask refers to Enable dask (multi-node) LightGBM: Whether to enable multi-node LightGBM. Disabled unless switched on. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable lightgbm dask", + "output": "enable lightgbm dask refers to Enable dask (multi-node) LightGBM: Whether to enable multi-node LightGBM. Disabled unless switched on. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Enable dask (multi-node) LightGBM: ", + "output": "enable lightgbm dask refers to Enable dask (multi-node) LightGBM: Whether to enable multi-node LightGBM. Disabled unless switched on. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting enable_lightgbm_dask", + "output": "enable lightgbm dask refers to Whether to enable multi-node LightGBM. Disabled unless switched on. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting enable_lightgbm_dask", + "output": "enable lightgbm dask refers to Enable dask (multi-node) LightGBM: Whether to enable multi-node LightGBM. Disabled unless switched on. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "hyperopt_shift_leak", + "output": "hyperopt shift leak refers to If num_inner_hyperopt_trials_prefinal > 0, then whether to do hyper parameter tuning during leakage/shift detection. Might be useful to find non-trivial leakage/shift, but usually not necessary. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "hyperopt_shift_leak", + "output": "hyperopt shift leak refers to Whether to do hyperopt for leakage/shift: If num_inner_hyperopt_trials_prefinal > 0, then whether to do hyper parameter tuning during leakage/shift detection. Might be useful to find non-trivial leakage/shift, but usually not necessary. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "hyperopt shift leak", + "output": "hyperopt shift leak refers to Whether to do hyperopt for leakage/shift: If num_inner_hyperopt_trials_prefinal > 0, then whether to do hyper parameter tuning during leakage/shift detection. Might be useful to find non-trivial leakage/shift, but usually not necessary. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Whether to do hyperopt for leakage/shift: ", + "output": "hyperopt shift leak refers to Whether to do hyperopt for leakage/shift: If num_inner_hyperopt_trials_prefinal > 0, then whether to do hyper parameter tuning during leakage/shift detection. Might be useful to find non-trivial leakage/shift, but usually not necessary. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting hyperopt_shift_leak", + "output": "hyperopt shift leak refers to If num_inner_hyperopt_trials_prefinal > 0, then whether to do hyper parameter tuning during leakage/shift detection. Might be useful to find non-trivial leakage/shift, but usually not necessary. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting hyperopt_shift_leak", + "output": "hyperopt shift leak refers to Whether to do hyperopt for leakage/shift: If num_inner_hyperopt_trials_prefinal > 0, then whether to do hyper parameter tuning during leakage/shift detection. Might be useful to find non-trivial leakage/shift, but usually not necessary. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "hyperopt_shift_leak_per_column", + "output": "hyperopt shift leak per column refers to If num_inner_hyperopt_trials_prefinal > 0, then whether to do hyper parameter tuning during leakage/shift detection, when checking each column. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "hyperopt_shift_leak_per_column", + "output": "hyperopt shift leak per column refers to Whether to do hyperopt for leakage/shift for each column: If num_inner_hyperopt_trials_prefinal > 0, then whether to do hyper parameter tuning during leakage/shift detection, when checking each column. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "hyperopt shift leak per column", + "output": "hyperopt shift leak per column refers to Whether to do hyperopt for leakage/shift for each column: If num_inner_hyperopt_trials_prefinal > 0, then whether to do hyper parameter tuning during leakage/shift detection, when checking each column. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Whether to do hyperopt for leakage/shift for each column: ", + "output": "hyperopt shift leak per column refers to Whether to do hyperopt for leakage/shift for each column: If num_inner_hyperopt_trials_prefinal > 0, then whether to do hyper parameter tuning during leakage/shift detection, when checking each column. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting hyperopt_shift_leak_per_column", + "output": "hyperopt shift leak per column refers to If num_inner_hyperopt_trials_prefinal > 0, then whether to do hyper parameter tuning during leakage/shift detection, when checking each column. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting hyperopt_shift_leak_per_column", + "output": "hyperopt shift leak per column refers to Whether to do hyperopt for leakage/shift for each column: If num_inner_hyperopt_trials_prefinal > 0, then whether to do hyper parameter tuning during leakage/shift detection, when checking each column. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "num_inner_hyperopt_trials_prefinal", + "output": "num inner hyperopt trials prefinal refers to Number of trials for Optuna hyperparameter optimization for tuning and evolution models. 0 means no trials. For small data, 100 is ok choice, while for larger data smaller values are reasonable if need results quickly. If using RAPIDS or DASK, hyperparameter optimization keeps data on GPU entire time. Currently applies to XGBoost GBM/Dart and LightGBM. Useful when there is high overhead of DAI outside inner model fit/predict, so this tunes without that overhead. However, can overfit on a single fold when doing tuning or evolution, and if using CV then averaging the fold hyperparameters can lead to unexpected results. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "num_inner_hyperopt_trials_prefinal", + "output": "num inner hyperopt trials prefinal refers to Number of trials for hyperparameter optimization during model tuning only: Number of trials for Optuna hyperparameter optimization for tuning and evolution models. 0 means no trials. For small data, 100 is ok choice, while for larger data smaller values are reasonable if need results quickly. If using RAPIDS or DASK, hyperparameter optimization keeps data on GPU entire time. Currently applies to XGBoost GBM/Dart and LightGBM. Useful when there is high overhead of DAI outside inner model fit/predict, so this tunes without that overhead. However, can overfit on a single fold when doing tuning or evolution, and if using CV then averaging the fold hyperparameters can lead to unexpected results. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "num inner hyperopt trials prefinal", + "output": "num inner hyperopt trials prefinal refers to Number of trials for hyperparameter optimization during model tuning only: Number of trials for Optuna hyperparameter optimization for tuning and evolution models. 0 means no trials. For small data, 100 is ok choice, while for larger data smaller values are reasonable if need results quickly. If using RAPIDS or DASK, hyperparameter optimization keeps data on GPU entire time. Currently applies to XGBoost GBM/Dart and LightGBM. Useful when there is high overhead of DAI outside inner model fit/predict, so this tunes without that overhead. However, can overfit on a single fold when doing tuning or evolution, and if using CV then averaging the fold hyperparameters can lead to unexpected results. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Number of trials for hyperparameter optimization during model tuning only: ", + "output": "num inner hyperopt trials prefinal refers to Number of trials for hyperparameter optimization during model tuning only: Number of trials for Optuna hyperparameter optimization for tuning and evolution models. 0 means no trials. For small data, 100 is ok choice, while for larger data smaller values are reasonable if need results quickly. If using RAPIDS or DASK, hyperparameter optimization keeps data on GPU entire time. Currently applies to XGBoost GBM/Dart and LightGBM. Useful when there is high overhead of DAI outside inner model fit/predict, so this tunes without that overhead. However, can overfit on a single fold when doing tuning or evolution, and if using CV then averaging the fold hyperparameters can lead to unexpected results. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting num_inner_hyperopt_trials_prefinal", + "output": "num inner hyperopt trials prefinal refers to Number of trials for Optuna hyperparameter optimization for tuning and evolution models. 0 means no trials. For small data, 100 is ok choice, while for larger data smaller values are reasonable if need results quickly. If using RAPIDS or DASK, hyperparameter optimization keeps data on GPU entire time. Currently applies to XGBoost GBM/Dart and LightGBM. Useful when there is high overhead of DAI outside inner model fit/predict, so this tunes without that overhead. However, can overfit on a single fold when doing tuning or evolution, and if using CV then averaging the fold hyperparameters can lead to unexpected results. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting num_inner_hyperopt_trials_prefinal", + "output": "num inner hyperopt trials prefinal refers to Number of trials for hyperparameter optimization during model tuning only: Number of trials for Optuna hyperparameter optimization for tuning and evolution models. 0 means no trials. For small data, 100 is ok choice, while for larger data smaller values are reasonable if need results quickly. If using RAPIDS or DASK, hyperparameter optimization keeps data on GPU entire time. Currently applies to XGBoost GBM/Dart and LightGBM. Useful when there is high overhead of DAI outside inner model fit/predict, so this tunes without that overhead. However, can overfit on a single fold when doing tuning or evolution, and if using CV then averaging the fold hyperparameters can lead to unexpected results. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "num_inner_hyperopt_trials_final", + "output": "num inner hyperopt trials final refers to Number of trials for Optuna hyperparameter optimization for final models. 0 means no trials. For small data, 100 is ok choice, while for larger data smaller values are reasonable if need results quickly. Applies to final model only even if num_inner_hyperopt_trials=0. If using RAPIDS or DASK, hyperparameter optimization keeps data on GPU entire time. Currently applies to XGBoost GBM/Dart and LightGBM. Useful when there is high overhead of DAI outside inner model fit/predict, so this tunes without that overhead. However, for final model each fold is independently optimized and can overfit on each fold, after which predictions are averaged (so no issue with averaging hyperparameters when doing CV with tuning or evolution). " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "num_inner_hyperopt_trials_final", + "output": "num inner hyperopt trials final refers to Number of trials for hyperparameter optimization for final model only: Number of trials for Optuna hyperparameter optimization for final models. 0 means no trials. For small data, 100 is ok choice, while for larger data smaller values are reasonable if need results quickly. Applies to final model only even if num_inner_hyperopt_trials=0. If using RAPIDS or DASK, hyperparameter optimization keeps data on GPU entire time. Currently applies to XGBoost GBM/Dart and LightGBM. Useful when there is high overhead of DAI outside inner model fit/predict, so this tunes without that overhead. However, for final model each fold is independently optimized and can overfit on each fold, after which predictions are averaged (so no issue with averaging hyperparameters when doing CV with tuning or evolution). " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "num inner hyperopt trials final", + "output": "num inner hyperopt trials final refers to Number of trials for hyperparameter optimization for final model only: Number of trials for Optuna hyperparameter optimization for final models. 0 means no trials. For small data, 100 is ok choice, while for larger data smaller values are reasonable if need results quickly. Applies to final model only even if num_inner_hyperopt_trials=0. If using RAPIDS or DASK, hyperparameter optimization keeps data on GPU entire time. Currently applies to XGBoost GBM/Dart and LightGBM. Useful when there is high overhead of DAI outside inner model fit/predict, so this tunes without that overhead. However, for final model each fold is independently optimized and can overfit on each fold, after which predictions are averaged (so no issue with averaging hyperparameters when doing CV with tuning or evolution). " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Number of trials for hyperparameter optimization for final model only: ", + "output": "num inner hyperopt trials final refers to Number of trials for hyperparameter optimization for final model only: Number of trials for Optuna hyperparameter optimization for final models. 0 means no trials. For small data, 100 is ok choice, while for larger data smaller values are reasonable if need results quickly. Applies to final model only even if num_inner_hyperopt_trials=0. If using RAPIDS or DASK, hyperparameter optimization keeps data on GPU entire time. Currently applies to XGBoost GBM/Dart and LightGBM. Useful when there is high overhead of DAI outside inner model fit/predict, so this tunes without that overhead. However, for final model each fold is independently optimized and can overfit on each fold, after which predictions are averaged (so no issue with averaging hyperparameters when doing CV with tuning or evolution). " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting num_inner_hyperopt_trials_final", + "output": "num inner hyperopt trials final refers to Number of trials for Optuna hyperparameter optimization for final models. 0 means no trials. For small data, 100 is ok choice, while for larger data smaller values are reasonable if need results quickly. Applies to final model only even if num_inner_hyperopt_trials=0. If using RAPIDS or DASK, hyperparameter optimization keeps data on GPU entire time. Currently applies to XGBoost GBM/Dart and LightGBM. Useful when there is high overhead of DAI outside inner model fit/predict, so this tunes without that overhead. However, for final model each fold is independently optimized and can overfit on each fold, after which predictions are averaged (so no issue with averaging hyperparameters when doing CV with tuning or evolution). " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting num_inner_hyperopt_trials_final", + "output": "num inner hyperopt trials final refers to Number of trials for hyperparameter optimization for final model only: Number of trials for Optuna hyperparameter optimization for final models. 0 means no trials. For small data, 100 is ok choice, while for larger data smaller values are reasonable if need results quickly. Applies to final model only even if num_inner_hyperopt_trials=0. If using RAPIDS or DASK, hyperparameter optimization keeps data on GPU entire time. Currently applies to XGBoost GBM/Dart and LightGBM. Useful when there is high overhead of DAI outside inner model fit/predict, so this tunes without that overhead. However, for final model each fold is independently optimized and can overfit on each fold, after which predictions are averaged (so no issue with averaging hyperparameters when doing CV with tuning or evolution). " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "num_hyperopt_individuals_final", + "output": "num hyperopt individuals final refers to Number of individuals in final model (all folds/repeats for given base model) tooptimize with Optuna hyperparameter tuning. -1 means all. 0 is same as choosing no Optuna trials. Might be only beneficial to optimize hyperparameters of best individual (i.e. value of 1) in ensemble. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "num_hyperopt_individuals_final", + "output": "num hyperopt individuals final refers to Number of individuals in final ensemble to use Optuna on: Number of individuals in final model (all folds/repeats for given base model) tooptimize with Optuna hyperparameter tuning. -1 means all. 0 is same as choosing no Optuna trials. Might be only beneficial to optimize hyperparameters of best individual (i.e. value of 1) in ensemble. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "num hyperopt individuals final", + "output": "num hyperopt individuals final refers to Number of individuals in final ensemble to use Optuna on: Number of individuals in final model (all folds/repeats for given base model) tooptimize with Optuna hyperparameter tuning. -1 means all. 0 is same as choosing no Optuna trials. Might be only beneficial to optimize hyperparameters of best individual (i.e. value of 1) in ensemble. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Number of individuals in final ensemble to use Optuna on: ", + "output": "num hyperopt individuals final refers to Number of individuals in final ensemble to use Optuna on: Number of individuals in final model (all folds/repeats for given base model) tooptimize with Optuna hyperparameter tuning. -1 means all. 0 is same as choosing no Optuna trials. Might be only beneficial to optimize hyperparameters of best individual (i.e. value of 1) in ensemble. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting num_hyperopt_individuals_final", + "output": "num hyperopt individuals final refers to Number of individuals in final model (all folds/repeats for given base model) tooptimize with Optuna hyperparameter tuning. -1 means all. 0 is same as choosing no Optuna trials. Might be only beneficial to optimize hyperparameters of best individual (i.e. value of 1) in ensemble. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting num_hyperopt_individuals_final", + "output": "num hyperopt individuals final refers to Number of individuals in final ensemble to use Optuna on: Number of individuals in final model (all folds/repeats for given base model) tooptimize with Optuna hyperparameter tuning. -1 means all. 0 is same as choosing no Optuna trials. Might be only beneficial to optimize hyperparameters of best individual (i.e. value of 1) in ensemble. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "optuna_pruner", + "output": "optuna pruner refers to Optuna Pruner to use (applicable to XGBoost and LightGBM that support Optuna callbacks). To disable choose None." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "optuna_pruner", + "output": "optuna pruner refers to Optuna Pruners: Optuna Pruner to use (applicable to XGBoost and LightGBM that support Optuna callbacks). To disable choose None." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "optuna pruner", + "output": "optuna pruner refers to Optuna Pruners: Optuna Pruner to use (applicable to XGBoost and LightGBM that support Optuna callbacks). To disable choose None." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Optuna Pruners: ", + "output": "optuna pruner refers to Optuna Pruners: Optuna Pruner to use (applicable to XGBoost and LightGBM that support Optuna callbacks). To disable choose None." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting optuna_pruner", + "output": "optuna pruner refers to Optuna Pruner to use (applicable to XGBoost and LightGBM that support Optuna callbacks). To disable choose None." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting optuna_pruner", + "output": "optuna pruner refers to Optuna Pruners: Optuna Pruner to use (applicable to XGBoost and LightGBM that support Optuna callbacks). To disable choose None." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "optuna_pruner_kwargs", + "output": "optuna pruner kwargs refers to Set Optuna constructor arguments for particular applicable pruners. https://optuna.readthedocs.io/en/stable/reference/pruners.html " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "optuna_pruner_kwargs", + "output": "optuna pruner kwargs refers to Set Optuna pruner constructor args.: Set Optuna constructor arguments for particular applicable pruners. https://optuna.readthedocs.io/en/stable/reference/pruners.html " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "optuna pruner kwargs", + "output": "optuna pruner kwargs refers to Set Optuna pruner constructor args.: Set Optuna constructor arguments for particular applicable pruners. https://optuna.readthedocs.io/en/stable/reference/pruners.html " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Set Optuna pruner constructor args.: ", + "output": "optuna pruner kwargs refers to Set Optuna pruner constructor args.: Set Optuna constructor arguments for particular applicable pruners. https://optuna.readthedocs.io/en/stable/reference/pruners.html " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting optuna_pruner_kwargs", + "output": "optuna pruner kwargs refers to Set Optuna constructor arguments for particular applicable pruners. https://optuna.readthedocs.io/en/stable/reference/pruners.html " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting optuna_pruner_kwargs", + "output": "optuna pruner kwargs refers to Set Optuna pruner constructor args.: Set Optuna constructor arguments for particular applicable pruners. https://optuna.readthedocs.io/en/stable/reference/pruners.html " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "optuna_sampler", + "output": "optuna sampler refers to Optuna Pruner to use (applicable to XGBoost and LightGBM that support Optuna callbacks)." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "optuna_sampler", + "output": "optuna sampler refers to Optuna Samplers: Optuna Pruner to use (applicable to XGBoost and LightGBM that support Optuna callbacks)." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "optuna sampler", + "output": "optuna sampler refers to Optuna Samplers: Optuna Pruner to use (applicable to XGBoost and LightGBM that support Optuna callbacks)." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Optuna Samplers: ", + "output": "optuna sampler refers to Optuna Samplers: Optuna Pruner to use (applicable to XGBoost and LightGBM that support Optuna callbacks)." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting optuna_sampler", + "output": "optuna sampler refers to Optuna Pruner to use (applicable to XGBoost and LightGBM that support Optuna callbacks)." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting optuna_sampler", + "output": "optuna sampler refers to Optuna Samplers: Optuna Pruner to use (applicable to XGBoost and LightGBM that support Optuna callbacks)." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "optuna_sampler_kwargs", + "output": "optuna sampler kwargs refers to Set Optuna constructor arguments for particular applicable samplers. https://optuna.readthedocs.io/en/stable/reference/samplers.html " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "optuna_sampler_kwargs", + "output": "optuna sampler kwargs refers to Set Optuna sampler constructor args.: Set Optuna constructor arguments for particular applicable samplers. https://optuna.readthedocs.io/en/stable/reference/samplers.html " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "optuna sampler kwargs", + "output": "optuna sampler kwargs refers to Set Optuna sampler constructor args.: Set Optuna constructor arguments for particular applicable samplers. https://optuna.readthedocs.io/en/stable/reference/samplers.html " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Set Optuna sampler constructor args.: ", + "output": "optuna sampler kwargs refers to Set Optuna sampler constructor args.: Set Optuna constructor arguments for particular applicable samplers. https://optuna.readthedocs.io/en/stable/reference/samplers.html " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting optuna_sampler_kwargs", + "output": "optuna sampler kwargs refers to Set Optuna constructor arguments for particular applicable samplers. https://optuna.readthedocs.io/en/stable/reference/samplers.html " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting optuna_sampler_kwargs", + "output": "optuna sampler kwargs refers to Set Optuna sampler constructor args.: Set Optuna constructor arguments for particular applicable samplers. https://optuna.readthedocs.io/en/stable/reference/samplers.html " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable_xgboost_hyperopt_callback", + "output": "enable xgboost hyperopt callback refers to Whether to enable Optuna's XGBoost Pruning callback to abort unpromising runs. Not done if tuning learning rate." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable_xgboost_hyperopt_callback", + "output": "enable xgboost hyperopt callback refers to Enable Optuna XGBoost Pruning callback: Whether to enable Optuna's XGBoost Pruning callback to abort unpromising runs. Not done if tuning learning rate." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable xgboost hyperopt callback", + "output": "enable xgboost hyperopt callback refers to Enable Optuna XGBoost Pruning callback: Whether to enable Optuna's XGBoost Pruning callback to abort unpromising runs. Not done if tuning learning rate." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Enable Optuna XGBoost Pruning callback: ", + "output": "enable xgboost hyperopt callback refers to Enable Optuna XGBoost Pruning callback: Whether to enable Optuna's XGBoost Pruning callback to abort unpromising runs. Not done if tuning learning rate." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting enable_xgboost_hyperopt_callback", + "output": "enable xgboost hyperopt callback refers to Whether to enable Optuna's XGBoost Pruning callback to abort unpromising runs. Not done if tuning learning rate." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting enable_xgboost_hyperopt_callback", + "output": "enable xgboost hyperopt callback refers to Enable Optuna XGBoost Pruning callback: Whether to enable Optuna's XGBoost Pruning callback to abort unpromising runs. Not done if tuning learning rate." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable_lightgbm_hyperopt_callback", + "output": "enable lightgbm hyperopt callback refers to Whether to enable Optuna's LightGBM Pruning callback to abort unpromising runs. Not done if tuning learning rate." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable_lightgbm_hyperopt_callback", + "output": "enable lightgbm hyperopt callback refers to Enable Optuna LightGBM Pruning callback: Whether to enable Optuna's LightGBM Pruning callback to abort unpromising runs. Not done if tuning learning rate." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable lightgbm hyperopt callback", + "output": "enable lightgbm hyperopt callback refers to Enable Optuna LightGBM Pruning callback: Whether to enable Optuna's LightGBM Pruning callback to abort unpromising runs. Not done if tuning learning rate." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Enable Optuna LightGBM Pruning callback: ", + "output": "enable lightgbm hyperopt callback refers to Enable Optuna LightGBM Pruning callback: Whether to enable Optuna's LightGBM Pruning callback to abort unpromising runs. Not done if tuning learning rate." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting enable_lightgbm_hyperopt_callback", + "output": "enable lightgbm hyperopt callback refers to Whether to enable Optuna's LightGBM Pruning callback to abort unpromising runs. Not done if tuning learning rate." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting enable_lightgbm_hyperopt_callback", + "output": "enable lightgbm hyperopt callback refers to Enable Optuna LightGBM Pruning callback: Whether to enable Optuna's LightGBM Pruning callback to abort unpromising runs. Not done if tuning learning rate." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable_xgboost_dart", + "output": "enable xgboost dart refers to Whether to enable XGBoost Dart models ('auto'/'on'/'off')" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable_xgboost_dart", + "output": "enable xgboost dart refers to XGBoost Dart models: Whether to enable XGBoost Dart models ('auto'/'on'/'off')" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable xgboost dart", + "output": "enable xgboost dart refers to XGBoost Dart models: Whether to enable XGBoost Dart models ('auto'/'on'/'off')" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "XGBoost Dart models: ", + "output": "enable xgboost dart refers to XGBoost Dart models: Whether to enable XGBoost Dart models ('auto'/'on'/'off')" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting enable_xgboost_dart", + "output": "enable xgboost dart refers to Whether to enable XGBoost Dart models ('auto'/'on'/'off')" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting enable_xgboost_dart", + "output": "enable xgboost dart refers to XGBoost Dart models: Whether to enable XGBoost Dart models ('auto'/'on'/'off')" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable_xgboost_dart_dask", + "output": "enable xgboost dart dask refers to Whether to enable dask_cudf (multi-GPU) version of XGBoost Dart. Disabled unless switched on. If have only 1 GPU, then only uses dask_cudf if use_dask_for_1_gpu is True Only applicable for single final model without early stopping. No Shapley possible. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable_xgboost_dart_dask", + "output": "enable xgboost dart dask refers to Enable dask_cudf (multi-GPU) XGBoost Dart: Whether to enable dask_cudf (multi-GPU) version of XGBoost Dart. Disabled unless switched on. If have only 1 GPU, then only uses dask_cudf if use_dask_for_1_gpu is True Only applicable for single final model without early stopping. No Shapley possible. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable xgboost dart dask", + "output": "enable xgboost dart dask refers to Enable dask_cudf (multi-GPU) XGBoost Dart: Whether to enable dask_cudf (multi-GPU) version of XGBoost Dart. Disabled unless switched on. If have only 1 GPU, then only uses dask_cudf if use_dask_for_1_gpu is True Only applicable for single final model without early stopping. No Shapley possible. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Enable dask_cudf (multi-GPU) XGBoost Dart: ", + "output": "enable xgboost dart dask refers to Enable dask_cudf (multi-GPU) XGBoost Dart: Whether to enable dask_cudf (multi-GPU) version of XGBoost Dart. Disabled unless switched on. If have only 1 GPU, then only uses dask_cudf if use_dask_for_1_gpu is True Only applicable for single final model without early stopping. No Shapley possible. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting enable_xgboost_dart_dask", + "output": "enable xgboost dart dask refers to Whether to enable dask_cudf (multi-GPU) version of XGBoost Dart. Disabled unless switched on. If have only 1 GPU, then only uses dask_cudf if use_dask_for_1_gpu is True Only applicable for single final model without early stopping. No Shapley possible. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting enable_xgboost_dart_dask", + "output": "enable xgboost dart dask refers to Enable dask_cudf (multi-GPU) XGBoost Dart: Whether to enable dask_cudf (multi-GPU) version of XGBoost Dart. Disabled unless switched on. If have only 1 GPU, then only uses dask_cudf if use_dask_for_1_gpu is True Only applicable for single final model without early stopping. No Shapley possible. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable_xgboost_rf_dask", + "output": "enable xgboost rf dask refers to Whether to enable dask_cudf (multi-GPU) version of XGBoost RF. Disabled unless switched on. If have only 1 GPU, then only uses dask_cudf if use_dask_for_1_gpu is True Only applicable for single final model without early stopping. No Shapley possible. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable_xgboost_rf_dask", + "output": "enable xgboost rf dask refers to Enable dask_cudf (multi-GPU) XGBoost RF: Whether to enable dask_cudf (multi-GPU) version of XGBoost RF. Disabled unless switched on. If have only 1 GPU, then only uses dask_cudf if use_dask_for_1_gpu is True Only applicable for single final model without early stopping. No Shapley possible. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable xgboost rf dask", + "output": "enable xgboost rf dask refers to Enable dask_cudf (multi-GPU) XGBoost RF: Whether to enable dask_cudf (multi-GPU) version of XGBoost RF. Disabled unless switched on. If have only 1 GPU, then only uses dask_cudf if use_dask_for_1_gpu is True Only applicable for single final model without early stopping. No Shapley possible. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Enable dask_cudf (multi-GPU) XGBoost RF: ", + "output": "enable xgboost rf dask refers to Enable dask_cudf (multi-GPU) XGBoost RF: Whether to enable dask_cudf (multi-GPU) version of XGBoost RF. Disabled unless switched on. If have only 1 GPU, then only uses dask_cudf if use_dask_for_1_gpu is True Only applicable for single final model without early stopping. No Shapley possible. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting enable_xgboost_rf_dask", + "output": "enable xgboost rf dask refers to Whether to enable dask_cudf (multi-GPU) version of XGBoost RF. Disabled unless switched on. If have only 1 GPU, then only uses dask_cudf if use_dask_for_1_gpu is True Only applicable for single final model without early stopping. No Shapley possible. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting enable_xgboost_rf_dask", + "output": "enable xgboost rf dask refers to Enable dask_cudf (multi-GPU) XGBoost RF: Whether to enable dask_cudf (multi-GPU) version of XGBoost RF. Disabled unless switched on. If have only 1 GPU, then only uses dask_cudf if use_dask_for_1_gpu is True Only applicable for single final model without early stopping. No Shapley possible. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "num_gpus_per_hyperopt_dask", + "output": "num gpus per hyperopt dask refers to Number of GPUs to use per model hyperopt training task. Set to -1 for all GPUs.For example, when this is set to -1 and there are 4 GPUs available, all of them can be used for the training of a single model across a Dask cluster.Ignored if GPUs disabled or no GPUs on system.In multinode context, this refers to the per-node value. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "num_gpus_per_hyperopt_dask", + "output": "num gpus per hyperopt dask refers to #GPUs/HyperOptDask (-1 = all): Number of GPUs to use per model hyperopt training task. Set to -1 for all GPUs.For example, when this is set to -1 and there are 4 GPUs available, all of them can be used for the training of a single model across a Dask cluster.Ignored if GPUs disabled or no GPUs on system.In multinode context, this refers to the per-node value. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "num gpus per hyperopt dask", + "output": "num gpus per hyperopt dask refers to #GPUs/HyperOptDask (-1 = all): Number of GPUs to use per model hyperopt training task. Set to -1 for all GPUs.For example, when this is set to -1 and there are 4 GPUs available, all of them can be used for the training of a single model across a Dask cluster.Ignored if GPUs disabled or no GPUs on system.In multinode context, this refers to the per-node value. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "#GPUs/HyperOptDask (-1 = all): ", + "output": "num gpus per hyperopt dask refers to #GPUs/HyperOptDask (-1 = all): Number of GPUs to use per model hyperopt training task. Set to -1 for all GPUs.For example, when this is set to -1 and there are 4 GPUs available, all of them can be used for the training of a single model across a Dask cluster.Ignored if GPUs disabled or no GPUs on system.In multinode context, this refers to the per-node value. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting num_gpus_per_hyperopt_dask", + "output": "num gpus per hyperopt dask refers to Number of GPUs to use per model hyperopt training task. Set to -1 for all GPUs.For example, when this is set to -1 and there are 4 GPUs available, all of them can be used for the training of a single model across a Dask cluster.Ignored if GPUs disabled or no GPUs on system.In multinode context, this refers to the per-node value. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting num_gpus_per_hyperopt_dask", + "output": "num gpus per hyperopt dask refers to #GPUs/HyperOptDask (-1 = all): Number of GPUs to use per model hyperopt training task. Set to -1 for all GPUs.For example, when this is set to -1 and there are 4 GPUs available, all of them can be used for the training of a single model across a Dask cluster.Ignored if GPUs disabled or no GPUs on system.In multinode context, this refers to the per-node value. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "use_xgboost_xgbfi", + "output": "use xgboost xgbfi refers to Whether to use (and expect exists) xgbfi feature interactions for xgboost." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "use_xgboost_xgbfi", + "output": "use xgboost xgbfi refers to Whether to use (and expect exists) xgbfi feature interactions for xgboost." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "use xgboost xgbfi", + "output": "use xgboost xgbfi refers to Whether to use (and expect exists) xgbfi feature interactions for xgboost." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "use xgboost xgbfi refers to Whether to use (and expect exists) xgbfi feature interactions for xgboost." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting use_xgboost_xgbfi", + "output": "use xgboost xgbfi refers to Whether to use (and expect exists) xgbfi feature interactions for xgboost." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting use_xgboost_xgbfi", + "output": "use xgboost xgbfi refers to Whether to use (and expect exists) xgbfi feature interactions for xgboost." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable_lightgbm_boosting_types", + "output": "enable lightgbm boosting types refers to Which boosting types to enable for LightGBM (gbdt = boosted trees, rf_early_stopping = random forest with early stopping rf = random forest (no early stopping), dart = drop-out boosted trees with no early stopping" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable_lightgbm_boosting_types", + "output": "enable lightgbm boosting types refers to LightGBM Boosting types: Which boosting types to enable for LightGBM (gbdt = boosted trees, rf_early_stopping = random forest with early stopping rf = random forest (no early stopping), dart = drop-out boosted trees with no early stopping" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable lightgbm boosting types", + "output": "enable lightgbm boosting types refers to LightGBM Boosting types: Which boosting types to enable for LightGBM (gbdt = boosted trees, rf_early_stopping = random forest with early stopping rf = random forest (no early stopping), dart = drop-out boosted trees with no early stopping" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "LightGBM Boosting types: ", + "output": "enable lightgbm boosting types refers to LightGBM Boosting types: Which boosting types to enable for LightGBM (gbdt = boosted trees, rf_early_stopping = random forest with early stopping rf = random forest (no early stopping), dart = drop-out boosted trees with no early stopping" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting enable_lightgbm_boosting_types", + "output": "enable lightgbm boosting types refers to Which boosting types to enable for LightGBM (gbdt = boosted trees, rf_early_stopping = random forest with early stopping rf = random forest (no early stopping), dart = drop-out boosted trees with no early stopping" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting enable_lightgbm_boosting_types", + "output": "enable lightgbm boosting types refers to LightGBM Boosting types: Which boosting types to enable for LightGBM (gbdt = boosted trees, rf_early_stopping = random forest with early stopping rf = random forest (no early stopping), dart = drop-out boosted trees with no early stopping" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable_lightgbm_multiclass_balancing", + "output": "enable lightgbm multiclass balancing refers to Whether to enable automatic class weighting for imbalanced multiclass problems. Can make worse probabilities, but improve confusion-matrix based scorers for rare classes without the need to manually calibrate probabilities or fine-tune the label creation process." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable_lightgbm_multiclass_balancing", + "output": "enable lightgbm multiclass balancing refers to LightGBM multiclass balancing: Whether to enable automatic class weighting for imbalanced multiclass problems. Can make worse probabilities, but improve confusion-matrix based scorers for rare classes without the need to manually calibrate probabilities or fine-tune the label creation process." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable lightgbm multiclass balancing", + "output": "enable lightgbm multiclass balancing refers to LightGBM multiclass balancing: Whether to enable automatic class weighting for imbalanced multiclass problems. Can make worse probabilities, but improve confusion-matrix based scorers for rare classes without the need to manually calibrate probabilities or fine-tune the label creation process." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "LightGBM multiclass balancing: ", + "output": "enable lightgbm multiclass balancing refers to LightGBM multiclass balancing: Whether to enable automatic class weighting for imbalanced multiclass problems. Can make worse probabilities, but improve confusion-matrix based scorers for rare classes without the need to manually calibrate probabilities or fine-tune the label creation process." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting enable_lightgbm_multiclass_balancing", + "output": "enable lightgbm multiclass balancing refers to Whether to enable automatic class weighting for imbalanced multiclass problems. Can make worse probabilities, but improve confusion-matrix based scorers for rare classes without the need to manually calibrate probabilities or fine-tune the label creation process." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting enable_lightgbm_multiclass_balancing", + "output": "enable lightgbm multiclass balancing refers to LightGBM multiclass balancing: Whether to enable automatic class weighting for imbalanced multiclass problems. Can make worse probabilities, but improve confusion-matrix based scorers for rare classes without the need to manually calibrate probabilities or fine-tune the label creation process." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable_lightgbm_cat_support", + "output": "enable lightgbm cat support refers to Whether to enable LightGBM categorical feature support (runs in CPU mode even if GPUs enabled, and no MOJO built)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable_lightgbm_cat_support", + "output": "enable lightgbm cat support refers to LightGBM categorical support: Whether to enable LightGBM categorical feature support (runs in CPU mode even if GPUs enabled, and no MOJO built)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable lightgbm cat support", + "output": "enable lightgbm cat support refers to LightGBM categorical support: Whether to enable LightGBM categorical feature support (runs in CPU mode even if GPUs enabled, and no MOJO built)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "LightGBM categorical support: ", + "output": "enable lightgbm cat support refers to LightGBM categorical support: Whether to enable LightGBM categorical feature support (runs in CPU mode even if GPUs enabled, and no MOJO built)" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting enable_lightgbm_cat_support", + "output": "enable lightgbm cat support refers to Whether to enable LightGBM categorical feature support (runs in CPU mode even if GPUs enabled, and no MOJO built)" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting enable_lightgbm_cat_support", + "output": "enable lightgbm cat support refers to LightGBM categorical support: Whether to enable LightGBM categorical feature support (runs in CPU mode even if GPUs enabled, and no MOJO built)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable_lightgbm_linear_tree", + "output": "enable lightgbm linear tree refers to Whether to enable LightGBM linear_tree handling(only CPU mode currently, no L1 regularization -- mae objective, and no MOJO build). " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable_lightgbm_linear_tree", + "output": "enable lightgbm linear tree refers to LightGBM linear_tree mode: Whether to enable LightGBM linear_tree handling(only CPU mode currently, no L1 regularization -- mae objective, and no MOJO build). " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable lightgbm linear tree", + "output": "enable lightgbm linear tree refers to LightGBM linear_tree mode: Whether to enable LightGBM linear_tree handling(only CPU mode currently, no L1 regularization -- mae objective, and no MOJO build). " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "LightGBM linear_tree mode: ", + "output": "enable lightgbm linear tree refers to LightGBM linear_tree mode: Whether to enable LightGBM linear_tree handling(only CPU mode currently, no L1 regularization -- mae objective, and no MOJO build). " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting enable_lightgbm_linear_tree", + "output": "enable lightgbm linear tree refers to Whether to enable LightGBM linear_tree handling(only CPU mode currently, no L1 regularization -- mae objective, and no MOJO build). " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting enable_lightgbm_linear_tree", + "output": "enable lightgbm linear tree refers to LightGBM linear_tree mode: Whether to enable LightGBM linear_tree handling(only CPU mode currently, no L1 regularization -- mae objective, and no MOJO build). " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable_lightgbm_extra_trees", + "output": "enable lightgbm extra trees refers to Whether to enable LightGBM extra trees mode to help avoid overfitting" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable_lightgbm_extra_trees", + "output": "enable lightgbm extra trees refers to LightGBM extra trees mode: Whether to enable LightGBM extra trees mode to help avoid overfitting" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable lightgbm extra trees", + "output": "enable lightgbm extra trees refers to LightGBM extra trees mode: Whether to enable LightGBM extra trees mode to help avoid overfitting" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "LightGBM extra trees mode: ", + "output": "enable lightgbm extra trees refers to LightGBM extra trees mode: Whether to enable LightGBM extra trees mode to help avoid overfitting" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting enable_lightgbm_extra_trees", + "output": "enable lightgbm extra trees refers to Whether to enable LightGBM extra trees mode to help avoid overfitting" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting enable_lightgbm_extra_trees", + "output": "enable lightgbm extra trees refers to LightGBM extra trees mode: Whether to enable LightGBM extra trees mode to help avoid overfitting" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "lightgbm_monotone_constraints_method", + "output": "lightgbm monotone constraints method refers to basic: as fast as when no constraints applied, but over-constrains the predictions.intermediate: very slightly slower, but much less constraining while still holding monotonicity and should be more accurate than basic.advanced: slower, but even more accurate than intermediate. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "lightgbm_monotone_constraints_method", + "output": "lightgbm monotone constraints method refers to Method to use for monotonicity constraints for LightGBM: basic: as fast as when no constraints applied, but over-constrains the predictions.intermediate: very slightly slower, but much less constraining while still holding monotonicity and should be more accurate than basic.advanced: slower, but even more accurate than intermediate. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "lightgbm monotone constraints method", + "output": "lightgbm monotone constraints method refers to Method to use for monotonicity constraints for LightGBM: basic: as fast as when no constraints applied, but over-constrains the predictions.intermediate: very slightly slower, but much less constraining while still holding monotonicity and should be more accurate than basic.advanced: slower, but even more accurate than intermediate. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Method to use for monotonicity constraints for LightGBM: ", + "output": "lightgbm monotone constraints method refers to Method to use for monotonicity constraints for LightGBM: basic: as fast as when no constraints applied, but over-constrains the predictions.intermediate: very slightly slower, but much less constraining while still holding monotonicity and should be more accurate than basic.advanced: slower, but even more accurate than intermediate. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting lightgbm_monotone_constraints_method", + "output": "lightgbm monotone constraints method refers to basic: as fast as when no constraints applied, but over-constrains the predictions.intermediate: very slightly slower, but much less constraining while still holding monotonicity and should be more accurate than basic.advanced: slower, but even more accurate than intermediate. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting lightgbm_monotone_constraints_method", + "output": "lightgbm monotone constraints method refers to Method to use for monotonicity constraints for LightGBM: basic: as fast as when no constraints applied, but over-constrains the predictions.intermediate: very slightly slower, but much less constraining while still holding monotonicity and should be more accurate than basic.advanced: slower, but even more accurate than intermediate. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "lightgbm_monotone_penalty", + "output": "lightgbm monotone penalty refers to Forbids any monotone splits on the first x (rounded down) level(s) of the tree.The penalty applied to monotone splits on a given depth is a continuous,increasing function the penalization parameter.https://lightgbm.readthedocs.io/en/latest/Parameters.html#monotone_penalty " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "lightgbm_monotone_penalty", + "output": "lightgbm monotone penalty refers to LightGBM Monotone Penalty: Forbids any monotone splits on the first x (rounded down) level(s) of the tree.The penalty applied to monotone splits on a given depth is a continuous,increasing function the penalization parameter.https://lightgbm.readthedocs.io/en/latest/Parameters.html#monotone_penalty " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "lightgbm monotone penalty", + "output": "lightgbm monotone penalty refers to LightGBM Monotone Penalty: Forbids any monotone splits on the first x (rounded down) level(s) of the tree.The penalty applied to monotone splits on a given depth is a continuous,increasing function the penalization parameter.https://lightgbm.readthedocs.io/en/latest/Parameters.html#monotone_penalty " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "LightGBM Monotone Penalty: ", + "output": "lightgbm monotone penalty refers to LightGBM Monotone Penalty: Forbids any monotone splits on the first x (rounded down) level(s) of the tree.The penalty applied to monotone splits on a given depth is a continuous,increasing function the penalization parameter.https://lightgbm.readthedocs.io/en/latest/Parameters.html#monotone_penalty " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting lightgbm_monotone_penalty", + "output": "lightgbm monotone penalty refers to Forbids any monotone splits on the first x (rounded down) level(s) of the tree.The penalty applied to monotone splits on a given depth is a continuous,increasing function the penalization parameter.https://lightgbm.readthedocs.io/en/latest/Parameters.html#monotone_penalty " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting lightgbm_monotone_penalty", + "output": "lightgbm monotone penalty refers to LightGBM Monotone Penalty: Forbids any monotone splits on the first x (rounded down) level(s) of the tree.The penalty applied to monotone splits on a given depth is a continuous,increasing function the penalization parameter.https://lightgbm.readthedocs.io/en/latest/Parameters.html#monotone_penalty " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable_lightgbm_cuda_support", + "output": "enable lightgbm cuda support refers to Whether to enable LightGBM CUDA implementation instead of OpenCL. CUDA with LightGBM only supported for Pascal+ (compute capability >=6.0)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable_lightgbm_cuda_support", + "output": "enable lightgbm cuda support refers to LightGBM CUDA support: Whether to enable LightGBM CUDA implementation instead of OpenCL. CUDA with LightGBM only supported for Pascal+ (compute capability >=6.0)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable lightgbm cuda support", + "output": "enable lightgbm cuda support refers to LightGBM CUDA support: Whether to enable LightGBM CUDA implementation instead of OpenCL. CUDA with LightGBM only supported for Pascal+ (compute capability >=6.0)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "LightGBM CUDA support: ", + "output": "enable lightgbm cuda support refers to LightGBM CUDA support: Whether to enable LightGBM CUDA implementation instead of OpenCL. CUDA with LightGBM only supported for Pascal+ (compute capability >=6.0)" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting enable_lightgbm_cuda_support", + "output": "enable lightgbm cuda support refers to Whether to enable LightGBM CUDA implementation instead of OpenCL. CUDA with LightGBM only supported for Pascal+ (compute capability >=6.0)" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting enable_lightgbm_cuda_support", + "output": "enable lightgbm cuda support refers to LightGBM CUDA support: Whether to enable LightGBM CUDA implementation instead of OpenCL. CUDA with LightGBM only supported for Pascal+ (compute capability >=6.0)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "show_constant_model", + "output": "show constant model refers to Whether to show constant models in iteration panel even when not best model." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "show_constant_model", + "output": "show constant model refers to Whether to show constant models in iteration panel even when not best model: Whether to show constant models in iteration panel even when not best model." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "show constant model", + "output": "show constant model refers to Whether to show constant models in iteration panel even when not best model: Whether to show constant models in iteration panel even when not best model." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Whether to show constant models in iteration panel even when not best model: ", + "output": "show constant model refers to Whether to show constant models in iteration panel even when not best model: Whether to show constant models in iteration panel even when not best model." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting show_constant_model", + "output": "show constant model refers to Whether to show constant models in iteration panel even when not best model." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting show_constant_model", + "output": "show constant model refers to Whether to show constant models in iteration panel even when not best model: Whether to show constant models in iteration panel even when not best model." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "xgboost_reg_objectives", + "output": "xgboost reg objectives refers to Select objectives allowed for XGBoost. Added to allowed mutations (the default reg:squarederror is in sample list 3 times) Note: logistic, tweedie, gamma, poisson are only valid for targets with positive values. Note: The objective relates to the form of the (regularized) loss function, used to determine the split with maximum information gain, while the metric is the non-regularized metric measured on the validation set (external or internally generated by DAI). " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "xgboost_reg_objectives", + "output": "xgboost reg objectives refers to Select XGBoost regression objectives.: Select objectives allowed for XGBoost. Added to allowed mutations (the default reg:squarederror is in sample list 3 times) Note: logistic, tweedie, gamma, poisson are only valid for targets with positive values. Note: The objective relates to the form of the (regularized) loss function, used to determine the split with maximum information gain, while the metric is the non-regularized metric measured on the validation set (external or internally generated by DAI). " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "xgboost reg objectives", + "output": "xgboost reg objectives refers to Select XGBoost regression objectives.: Select objectives allowed for XGBoost. Added to allowed mutations (the default reg:squarederror is in sample list 3 times) Note: logistic, tweedie, gamma, poisson are only valid for targets with positive values. Note: The objective relates to the form of the (regularized) loss function, used to determine the split with maximum information gain, while the metric is the non-regularized metric measured on the validation set (external or internally generated by DAI). " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Select XGBoost regression objectives.: ", + "output": "xgboost reg objectives refers to Select XGBoost regression objectives.: Select objectives allowed for XGBoost. Added to allowed mutations (the default reg:squarederror is in sample list 3 times) Note: logistic, tweedie, gamma, poisson are only valid for targets with positive values. Note: The objective relates to the form of the (regularized) loss function, used to determine the split with maximum information gain, while the metric is the non-regularized metric measured on the validation set (external or internally generated by DAI). " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting xgboost_reg_objectives", + "output": "xgboost reg objectives refers to Select objectives allowed for XGBoost. Added to allowed mutations (the default reg:squarederror is in sample list 3 times) Note: logistic, tweedie, gamma, poisson are only valid for targets with positive values. Note: The objective relates to the form of the (regularized) loss function, used to determine the split with maximum information gain, while the metric is the non-regularized metric measured on the validation set (external or internally generated by DAI). " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting xgboost_reg_objectives", + "output": "xgboost reg objectives refers to Select XGBoost regression objectives.: Select objectives allowed for XGBoost. Added to allowed mutations (the default reg:squarederror is in sample list 3 times) Note: logistic, tweedie, gamma, poisson are only valid for targets with positive values. Note: The objective relates to the form of the (regularized) loss function, used to determine the split with maximum information gain, while the metric is the non-regularized metric measured on the validation set (external or internally generated by DAI). " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "xgboost_reg_metrics", + "output": "xgboost reg metrics refers to Select metrics allowed for XGBoost. Added to allowed mutations (the default rmse and mae are in sample list twice). Note: tweedie, gamma, poisson are only valid for targets with positive values. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "xgboost_reg_metrics", + "output": "xgboost reg metrics refers to Select XGBoost regression metrics.: Select metrics allowed for XGBoost. Added to allowed mutations (the default rmse and mae are in sample list twice). Note: tweedie, gamma, poisson are only valid for targets with positive values. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "xgboost reg metrics", + "output": "xgboost reg metrics refers to Select XGBoost regression metrics.: Select metrics allowed for XGBoost. Added to allowed mutations (the default rmse and mae are in sample list twice). Note: tweedie, gamma, poisson are only valid for targets with positive values. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Select XGBoost regression metrics.: ", + "output": "xgboost reg metrics refers to Select XGBoost regression metrics.: Select metrics allowed for XGBoost. Added to allowed mutations (the default rmse and mae are in sample list twice). Note: tweedie, gamma, poisson are only valid for targets with positive values. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting xgboost_reg_metrics", + "output": "xgboost reg metrics refers to Select metrics allowed for XGBoost. Added to allowed mutations (the default rmse and mae are in sample list twice). Note: tweedie, gamma, poisson are only valid for targets with positive values. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting xgboost_reg_metrics", + "output": "xgboost reg metrics refers to Select XGBoost regression metrics.: Select metrics allowed for XGBoost. Added to allowed mutations (the default rmse and mae are in sample list twice). Note: tweedie, gamma, poisson are only valid for targets with positive values. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "xgboost_binary_metrics", + "output": "xgboost binary metrics refers to Select which objectives allowed for XGBoost. Added to allowed mutations (all evenly sampled)." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "xgboost_binary_metrics", + "output": "xgboost binary metrics refers to Select XGBoost binary metrics.: Select which objectives allowed for XGBoost. Added to allowed mutations (all evenly sampled)." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "xgboost binary metrics", + "output": "xgboost binary metrics refers to Select XGBoost binary metrics.: Select which objectives allowed for XGBoost. Added to allowed mutations (all evenly sampled)." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Select XGBoost binary metrics.: ", + "output": "xgboost binary metrics refers to Select XGBoost binary metrics.: Select which objectives allowed for XGBoost. Added to allowed mutations (all evenly sampled)." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting xgboost_binary_metrics", + "output": "xgboost binary metrics refers to Select which objectives allowed for XGBoost. Added to allowed mutations (all evenly sampled)." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting xgboost_binary_metrics", + "output": "xgboost binary metrics refers to Select XGBoost binary metrics.: Select which objectives allowed for XGBoost. Added to allowed mutations (all evenly sampled)." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "lightgbm_reg_objectives", + "output": "lightgbm reg objectives refers to Select objectives allowed for LightGBM. Added to allowed mutations (the default mse is in sample list 2 times if selected). \"binary\" refers to logistic regression. Note: If choose quantile/huber or fair and data is not normalized, recommendation is to use params_lightgbm to specify reasonable value of alpha (for quantile or huber) or fairc (for fair) to LightGBM. Note: mse is same as rmse correponding to L2 loss. mae is L1 loss. Note: tweedie, gamma, poisson are only valid for targets with positive values. Note: The objective relates to the form of the (regularized) loss function, used to determine the split with maximum information gain, while the metric is the non-regularized metric measured on the validation set (external or internally generated by DAI). " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "lightgbm_reg_objectives", + "output": "lightgbm reg objectives refers to Select LightGBM regression objectives.: Select objectives allowed for LightGBM. Added to allowed mutations (the default mse is in sample list 2 times if selected). \"binary\" refers to logistic regression. Note: If choose quantile/huber or fair and data is not normalized, recommendation is to use params_lightgbm to specify reasonable value of alpha (for quantile or huber) or fairc (for fair) to LightGBM. Note: mse is same as rmse correponding to L2 loss. mae is L1 loss. Note: tweedie, gamma, poisson are only valid for targets with positive values. Note: The objective relates to the form of the (regularized) loss function, used to determine the split with maximum information gain, while the metric is the non-regularized metric measured on the validation set (external or internally generated by DAI). " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "lightgbm reg objectives", + "output": "lightgbm reg objectives refers to Select LightGBM regression objectives.: Select objectives allowed for LightGBM. Added to allowed mutations (the default mse is in sample list 2 times if selected). \"binary\" refers to logistic regression. Note: If choose quantile/huber or fair and data is not normalized, recommendation is to use params_lightgbm to specify reasonable value of alpha (for quantile or huber) or fairc (for fair) to LightGBM. Note: mse is same as rmse correponding to L2 loss. mae is L1 loss. Note: tweedie, gamma, poisson are only valid for targets with positive values. Note: The objective relates to the form of the (regularized) loss function, used to determine the split with maximum information gain, while the metric is the non-regularized metric measured on the validation set (external or internally generated by DAI). " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Select LightGBM regression objectives.: ", + "output": "lightgbm reg objectives refers to Select LightGBM regression objectives.: Select objectives allowed for LightGBM. Added to allowed mutations (the default mse is in sample list 2 times if selected). \"binary\" refers to logistic regression. Note: If choose quantile/huber or fair and data is not normalized, recommendation is to use params_lightgbm to specify reasonable value of alpha (for quantile or huber) or fairc (for fair) to LightGBM. Note: mse is same as rmse correponding to L2 loss. mae is L1 loss. Note: tweedie, gamma, poisson are only valid for targets with positive values. Note: The objective relates to the form of the (regularized) loss function, used to determine the split with maximum information gain, while the metric is the non-regularized metric measured on the validation set (external or internally generated by DAI). " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting lightgbm_reg_objectives", + "output": "lightgbm reg objectives refers to Select objectives allowed for LightGBM. Added to allowed mutations (the default mse is in sample list 2 times if selected). \"binary\" refers to logistic regression. Note: If choose quantile/huber or fair and data is not normalized, recommendation is to use params_lightgbm to specify reasonable value of alpha (for quantile or huber) or fairc (for fair) to LightGBM. Note: mse is same as rmse correponding to L2 loss. mae is L1 loss. Note: tweedie, gamma, poisson are only valid for targets with positive values. Note: The objective relates to the form of the (regularized) loss function, used to determine the split with maximum information gain, while the metric is the non-regularized metric measured on the validation set (external or internally generated by DAI). " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting lightgbm_reg_objectives", + "output": "lightgbm reg objectives refers to Select LightGBM regression objectives.: Select objectives allowed for LightGBM. Added to allowed mutations (the default mse is in sample list 2 times if selected). \"binary\" refers to logistic regression. Note: If choose quantile/huber or fair and data is not normalized, recommendation is to use params_lightgbm to specify reasonable value of alpha (for quantile or huber) or fairc (for fair) to LightGBM. Note: mse is same as rmse correponding to L2 loss. mae is L1 loss. Note: tweedie, gamma, poisson are only valid for targets with positive values. Note: The objective relates to the form of the (regularized) loss function, used to determine the split with maximum information gain, while the metric is the non-regularized metric measured on the validation set (external or internally generated by DAI). " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "lightgbm_reg_metrics", + "output": "lightgbm reg metrics refers to Select metrics allowed for LightGBM. Added to allowed mutations (the default rmse is in sample list three times if selected). Note: If choose huber or fair and data is not normalized, recommendation is to use params_lightgbm to specify reasonable value of alpha (for huber or quantile) or fairc (for fair) to LightGBM. Note: tweedie, gamma, poisson are only valid for targets with positive values. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "lightgbm_reg_metrics", + "output": "lightgbm reg metrics refers to Select LightGBM regression metrics.: Select metrics allowed for LightGBM. Added to allowed mutations (the default rmse is in sample list three times if selected). Note: If choose huber or fair and data is not normalized, recommendation is to use params_lightgbm to specify reasonable value of alpha (for huber or quantile) or fairc (for fair) to LightGBM. Note: tweedie, gamma, poisson are only valid for targets with positive values. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "lightgbm reg metrics", + "output": "lightgbm reg metrics refers to Select LightGBM regression metrics.: Select metrics allowed for LightGBM. Added to allowed mutations (the default rmse is in sample list three times if selected). Note: If choose huber or fair and data is not normalized, recommendation is to use params_lightgbm to specify reasonable value of alpha (for huber or quantile) or fairc (for fair) to LightGBM. Note: tweedie, gamma, poisson are only valid for targets with positive values. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Select LightGBM regression metrics.: ", + "output": "lightgbm reg metrics refers to Select LightGBM regression metrics.: Select metrics allowed for LightGBM. Added to allowed mutations (the default rmse is in sample list three times if selected). Note: If choose huber or fair and data is not normalized, recommendation is to use params_lightgbm to specify reasonable value of alpha (for huber or quantile) or fairc (for fair) to LightGBM. Note: tweedie, gamma, poisson are only valid for targets with positive values. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting lightgbm_reg_metrics", + "output": "lightgbm reg metrics refers to Select metrics allowed for LightGBM. Added to allowed mutations (the default rmse is in sample list three times if selected). Note: If choose huber or fair and data is not normalized, recommendation is to use params_lightgbm to specify reasonable value of alpha (for huber or quantile) or fairc (for fair) to LightGBM. Note: tweedie, gamma, poisson are only valid for targets with positive values. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting lightgbm_reg_metrics", + "output": "lightgbm reg metrics refers to Select LightGBM regression metrics.: Select metrics allowed for LightGBM. Added to allowed mutations (the default rmse is in sample list three times if selected). Note: If choose huber or fair and data is not normalized, recommendation is to use params_lightgbm to specify reasonable value of alpha (for huber or quantile) or fairc (for fair) to LightGBM. Note: tweedie, gamma, poisson are only valid for targets with positive values. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "lightgbm_binary_objectives", + "output": "lightgbm binary objectives refers to Select objectives allowed for LightGBM. Added to allowed mutations (the default binary is in sample list 2 times if selected)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "lightgbm_binary_objectives", + "output": "lightgbm binary objectives refers to Select LightGBM binary objectives.: Select objectives allowed for LightGBM. Added to allowed mutations (the default binary is in sample list 2 times if selected)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "lightgbm binary objectives", + "output": "lightgbm binary objectives refers to Select LightGBM binary objectives.: Select objectives allowed for LightGBM. Added to allowed mutations (the default binary is in sample list 2 times if selected)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Select LightGBM binary objectives.: ", + "output": "lightgbm binary objectives refers to Select LightGBM binary objectives.: Select objectives allowed for LightGBM. Added to allowed mutations (the default binary is in sample list 2 times if selected)" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting lightgbm_binary_objectives", + "output": "lightgbm binary objectives refers to Select objectives allowed for LightGBM. Added to allowed mutations (the default binary is in sample list 2 times if selected)" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting lightgbm_binary_objectives", + "output": "lightgbm binary objectives refers to Select LightGBM binary objectives.: Select objectives allowed for LightGBM. Added to allowed mutations (the default binary is in sample list 2 times if selected)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "lightgbm_binary_metrics", + "output": "lightgbm binary metrics refers to Select which binary metrics allowed for LightGBM. Added to allowed mutations (all evenly sampled)." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "lightgbm_binary_metrics", + "output": "lightgbm binary metrics refers to Select LightGBM binary metrics.: Select which binary metrics allowed for LightGBM. Added to allowed mutations (all evenly sampled)." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "lightgbm binary metrics", + "output": "lightgbm binary metrics refers to Select LightGBM binary metrics.: Select which binary metrics allowed for LightGBM. Added to allowed mutations (all evenly sampled)." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Select LightGBM binary metrics.: ", + "output": "lightgbm binary metrics refers to Select LightGBM binary metrics.: Select which binary metrics allowed for LightGBM. Added to allowed mutations (all evenly sampled)." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting lightgbm_binary_metrics", + "output": "lightgbm binary metrics refers to Select which binary metrics allowed for LightGBM. Added to allowed mutations (all evenly sampled)." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting lightgbm_binary_metrics", + "output": "lightgbm binary metrics refers to Select LightGBM binary metrics.: Select which binary metrics allowed for LightGBM. Added to allowed mutations (all evenly sampled)." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "lightgbm_multi_metrics", + "output": "lightgbm multi metrics refers to Select which metrics allowed for multiclass LightGBM. Added to allowed mutations (evenly sampled if selected)." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "lightgbm_multi_metrics", + "output": "lightgbm multi metrics refers to Select LightGBM multiclass metrics.: Select which metrics allowed for multiclass LightGBM. Added to allowed mutations (evenly sampled if selected)." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "lightgbm multi metrics", + "output": "lightgbm multi metrics refers to Select LightGBM multiclass metrics.: Select which metrics allowed for multiclass LightGBM. Added to allowed mutations (evenly sampled if selected)." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Select LightGBM multiclass metrics.: ", + "output": "lightgbm multi metrics refers to Select LightGBM multiclass metrics.: Select which metrics allowed for multiclass LightGBM. Added to allowed mutations (evenly sampled if selected)." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting lightgbm_multi_metrics", + "output": "lightgbm multi metrics refers to Select which metrics allowed for multiclass LightGBM. Added to allowed mutations (evenly sampled if selected)." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting lightgbm_multi_metrics", + "output": "lightgbm multi metrics refers to Select LightGBM multiclass metrics.: Select which metrics allowed for multiclass LightGBM. Added to allowed mutations (evenly sampled if selected)." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "tweedie_variance_power_list", + "output": "tweedie variance power list refers to tweedie_variance_power parameters to try for XGBoostModel and LightGBMModel if tweedie is used. First value is default." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "tweedie_variance_power_list", + "output": "tweedie variance power list refers to tweedie_variance_power parameters: tweedie_variance_power parameters to try for XGBoostModel and LightGBMModel if tweedie is used. First value is default." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "tweedie variance power list", + "output": "tweedie variance power list refers to tweedie_variance_power parameters: tweedie_variance_power parameters to try for XGBoostModel and LightGBMModel if tweedie is used. First value is default." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "tweedie_variance_power parameters: ", + "output": "tweedie variance power list refers to tweedie_variance_power parameters: tweedie_variance_power parameters to try for XGBoostModel and LightGBMModel if tweedie is used. First value is default." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting tweedie_variance_power_list", + "output": "tweedie variance power list refers to tweedie_variance_power parameters to try for XGBoostModel and LightGBMModel if tweedie is used. First value is default." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting tweedie_variance_power_list", + "output": "tweedie variance power list refers to tweedie_variance_power parameters: tweedie_variance_power parameters to try for XGBoostModel and LightGBMModel if tweedie is used. First value is default." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "huber_alpha_list", + "output": "huber alpha list refers to huber parameters to try for LightGBMModel if huber is used. First value is default." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "huber_alpha_list", + "output": "huber alpha list refers to huber parameters: huber parameters to try for LightGBMModel if huber is used. First value is default." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "huber alpha list", + "output": "huber alpha list refers to huber parameters: huber parameters to try for LightGBMModel if huber is used. First value is default." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "huber parameters: ", + "output": "huber alpha list refers to huber parameters: huber parameters to try for LightGBMModel if huber is used. First value is default." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting huber_alpha_list", + "output": "huber alpha list refers to huber parameters to try for LightGBMModel if huber is used. First value is default." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting huber_alpha_list", + "output": "huber alpha list refers to huber parameters: huber parameters to try for LightGBMModel if huber is used. First value is default." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "fair_c_list", + "output": "fair c list refers to fair c parameters to try for LightGBMModel if fair is used. First value is default." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "fair_c_list", + "output": "fair c list refers to fair c parameters: fair c parameters to try for LightGBMModel if fair is used. First value is default." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "fair c list", + "output": "fair c list refers to fair c parameters: fair c parameters to try for LightGBMModel if fair is used. First value is default." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "fair c parameters: ", + "output": "fair c list refers to fair c parameters: fair c parameters to try for LightGBMModel if fair is used. First value is default." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting fair_c_list", + "output": "fair c list refers to fair c parameters to try for LightGBMModel if fair is used. First value is default." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting fair_c_list", + "output": "fair c list refers to fair c parameters: fair c parameters to try for LightGBMModel if fair is used. First value is default." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "poisson_max_delta_step_list", + "output": "poisson max delta step list refers to poisson max_delta_step parameters to try for LightGBMModel if poisson is used. First value is default." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "poisson_max_delta_step_list", + "output": "poisson max delta step list refers to poisson_max_delta_step parameters: poisson max_delta_step parameters to try for LightGBMModel if poisson is used. First value is default." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "poisson max delta step list", + "output": "poisson max delta step list refers to poisson_max_delta_step parameters: poisson max_delta_step parameters to try for LightGBMModel if poisson is used. First value is default." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "poisson_max_delta_step parameters: ", + "output": "poisson max delta step list refers to poisson_max_delta_step parameters: poisson max_delta_step parameters to try for LightGBMModel if poisson is used. First value is default." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting poisson_max_delta_step_list", + "output": "poisson max delta step list refers to poisson max_delta_step parameters to try for LightGBMModel if poisson is used. First value is default." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting poisson_max_delta_step_list", + "output": "poisson max delta step list refers to poisson_max_delta_step parameters: poisson max_delta_step parameters to try for LightGBMModel if poisson is used. First value is default." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "quantile_alpha", + "output": "quantile alpha refers to quantile alpha parameters to try for LightGBMModel if quantile is used. First value is default." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "quantile_alpha", + "output": "quantile alpha refers to quantile alpha parameters: quantile alpha parameters to try for LightGBMModel if quantile is used. First value is default." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "quantile alpha", + "output": "quantile alpha refers to quantile alpha parameters: quantile alpha parameters to try for LightGBMModel if quantile is used. First value is default." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "quantile alpha parameters: ", + "output": "quantile alpha refers to quantile alpha parameters: quantile alpha parameters to try for LightGBMModel if quantile is used. First value is default." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting quantile_alpha", + "output": "quantile alpha refers to quantile alpha parameters to try for LightGBMModel if quantile is used. First value is default." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting quantile_alpha", + "output": "quantile alpha refers to quantile alpha parameters: quantile alpha parameters to try for LightGBMModel if quantile is used. First value is default." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "reg_lambda_glm_default", + "output": "reg lambda glm default refers to Default reg_lambda regularization for GLM." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "reg_lambda_glm_default", + "output": "reg lambda glm default refers to default reg_lambda regularization parameter: Default reg_lambda regularization for GLM." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "reg lambda glm default", + "output": "reg lambda glm default refers to default reg_lambda regularization parameter: Default reg_lambda regularization for GLM." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "default reg_lambda regularization parameter: ", + "output": "reg lambda glm default refers to default reg_lambda regularization parameter: Default reg_lambda regularization for GLM." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting reg_lambda_glm_default", + "output": "reg lambda glm default refers to Default reg_lambda regularization for GLM." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting reg_lambda_glm_default", + "output": "reg lambda glm default refers to default reg_lambda regularization parameter: Default reg_lambda regularization for GLM." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "lossguide_drop_factor", + "output": "lossguide drop factor refers to Factor by which to drop max_leaves from effective max_depth value when doing loss_guide. E.g. if max_depth is normally 12, this makes leaves 2**11 not 2**12: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "lossguide_drop_factor", + "output": "lossguide drop factor refers to Factor by which to drop max_leaves from effective max_depth value when doing loss_guide. E.g. if max_depth is normally 12, this makes leaves 2**11 not 2**12: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "lossguide drop factor", + "output": "lossguide drop factor refers to Factor by which to drop max_leaves from effective max_depth value when doing loss_guide. E.g. if max_depth is normally 12, this makes leaves 2**11 not 2**12: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Factor by which to drop max_leaves from effective max_depth value when doing loss_guide. E.g. if max_depth is normally 12, this makes leaves 2**11 not 2**12: ", + "output": "lossguide drop factor refers to Factor by which to drop max_leaves from effective max_depth value when doing loss_guide. E.g. if max_depth is normally 12, this makes leaves 2**11 not 2**12: " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting lossguide_drop_factor", + "output": "lossguide drop factor refers to Factor by which to drop max_leaves from effective max_depth value when doing loss_guide. E.g. if max_depth is normally 12, this makes leaves 2**11 not 2**12: " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting lossguide_drop_factor", + "output": "lossguide drop factor refers to Factor by which to drop max_leaves from effective max_depth value when doing loss_guide. E.g. if max_depth is normally 12, this makes leaves 2**11 not 2**12: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "lossguide_max_depth_extend_factor", + "output": "lossguide max depth extend factor refers to Factor by which to extend max_depth mutations when doing loss_guide. E.g. if max_leaves ends up as x let max_depth be factor * x.: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "lossguide_max_depth_extend_factor", + "output": "lossguide max depth extend factor refers to Factor by which to extend max_depth mutations when doing loss_guide. E.g. if max_leaves ends up as x let max_depth be factor * x.: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "lossguide max depth extend factor", + "output": "lossguide max depth extend factor refers to Factor by which to extend max_depth mutations when doing loss_guide. E.g. if max_leaves ends up as x let max_depth be factor * x.: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Factor by which to extend max_depth mutations when doing loss_guide. E.g. if max_leaves ends up as x let max_depth be factor * x.: ", + "output": "lossguide max depth extend factor refers to Factor by which to extend max_depth mutations when doing loss_guide. E.g. if max_leaves ends up as x let max_depth be factor * x.: " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting lossguide_max_depth_extend_factor", + "output": "lossguide max depth extend factor refers to Factor by which to extend max_depth mutations when doing loss_guide. E.g. if max_leaves ends up as x let max_depth be factor * x.: " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting lossguide_max_depth_extend_factor", + "output": "lossguide max depth extend factor refers to Factor by which to extend max_depth mutations when doing loss_guide. E.g. if max_leaves ends up as x let max_depth be factor * x.: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "params_lightgbm", + "output": "params lightgbm refers to Parameters for LightGBM to override DAI parameters e.g. ``'eval_metric'`` instead of ``'metric'`` should be used e.g. ``params_lightgbm=\"{'objective': 'binary', 'n_estimators': 100, 'max_leaves': 64, 'random_state': 1234}\"`` e.g. ``params_lightgbm=\"{'n_estimators': 600, 'learning_rate': 0.1, 'reg_alpha': 0.0, 'reg_lambda': 0.5, 'gamma': 0, 'max_depth': 0, 'max_bin': 128, 'max_leaves': 256, 'scale_pos_weight': 1.0, 'max_delta_step': 3.469919910597877, 'min_child_weight': 1, 'subsample': 0.9, 'colsample_bytree': 0.3, 'tree_method': 'gpu_hist', 'grow_policy': 'lossguide', 'min_data_in_bin': 3, 'min_child_samples': 5, 'early_stopping_rounds': 20, 'num_classes': 2, 'objective': 'binary', 'eval_metric': 'binary', 'random_state': 987654, 'early_stopping_threshold': 0.01, 'monotonicity_constraints': False, 'silent': True, 'debug_verbose': 0, 'subsample_freq': 1}\"`` avoid including \"system\"-level parameters like ``'n_gpus': 1, 'gpu_id': 0, , 'n_jobs': 1, 'booster': 'lightgbm'`` also likely should avoid parameters like: 'objective': 'binary', unless one really knows what one is doing (e.g. alternative objectives) See: https://xgboost.readthedocs.io/en/latest/parameter.html And see: https://github.com/Microsoft/LightGBM/blob/master/docs/Parameters.rst Can also pass objective parameters if choose (or in case automatically chosen) certain objectives https://lightgbm.readthedocs.io/en/latest/Parameters.html#metric-parameters" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "params_lightgbm", + "output": "params lightgbm refers to Parameters for LightGBM to override DAI parameters e.g. ``'eval_metric'`` instead of ``'metric'`` should be used e.g. ``params_lightgbm=\"{'objective': 'binary', 'n_estimators': 100, 'max_leaves': 64, 'random_state': 1234}\"`` e.g. ``params_lightgbm=\"{'n_estimators': 600, 'learning_rate': 0.1, 'reg_alpha': 0.0, 'reg_lambda': 0.5, 'gamma': 0, 'max_depth': 0, 'max_bin': 128, 'max_leaves': 256, 'scale_pos_weight': 1.0, 'max_delta_step': 3.469919910597877, 'min_child_weight': 1, 'subsample': 0.9, 'colsample_bytree': 0.3, 'tree_method': 'gpu_hist', 'grow_policy': 'lossguide', 'min_data_in_bin': 3, 'min_child_samples': 5, 'early_stopping_rounds': 20, 'num_classes': 2, 'objective': 'binary', 'eval_metric': 'binary', 'random_state': 987654, 'early_stopping_threshold': 0.01, 'monotonicity_constraints': False, 'silent': True, 'debug_verbose': 0, 'subsample_freq': 1}\"`` avoid including \"system\"-level parameters like ``'n_gpus': 1, 'gpu_id': 0, , 'n_jobs': 1, 'booster': 'lightgbm'`` also likely should avoid parameters like: 'objective': 'binary', unless one really knows what one is doing (e.g. alternative objectives) See: https://xgboost.readthedocs.io/en/latest/parameter.html And see: https://github.com/Microsoft/LightGBM/blob/master/docs/Parameters.rst Can also pass objective parameters if choose (or in case automatically chosen) certain objectives https://lightgbm.readthedocs.io/en/latest/Parameters.html#metric-parameters" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "params lightgbm", + "output": "params lightgbm refers to Parameters for LightGBM to override DAI parameters e.g. ``'eval_metric'`` instead of ``'metric'`` should be used e.g. ``params_lightgbm=\"{'objective': 'binary', 'n_estimators': 100, 'max_leaves': 64, 'random_state': 1234}\"`` e.g. ``params_lightgbm=\"{'n_estimators': 600, 'learning_rate': 0.1, 'reg_alpha': 0.0, 'reg_lambda': 0.5, 'gamma': 0, 'max_depth': 0, 'max_bin': 128, 'max_leaves': 256, 'scale_pos_weight': 1.0, 'max_delta_step': 3.469919910597877, 'min_child_weight': 1, 'subsample': 0.9, 'colsample_bytree': 0.3, 'tree_method': 'gpu_hist', 'grow_policy': 'lossguide', 'min_data_in_bin': 3, 'min_child_samples': 5, 'early_stopping_rounds': 20, 'num_classes': 2, 'objective': 'binary', 'eval_metric': 'binary', 'random_state': 987654, 'early_stopping_threshold': 0.01, 'monotonicity_constraints': False, 'silent': True, 'debug_verbose': 0, 'subsample_freq': 1}\"`` avoid including \"system\"-level parameters like ``'n_gpus': 1, 'gpu_id': 0, , 'n_jobs': 1, 'booster': 'lightgbm'`` also likely should avoid parameters like: 'objective': 'binary', unless one really knows what one is doing (e.g. alternative objectives) See: https://xgboost.readthedocs.io/en/latest/parameter.html And see: https://github.com/Microsoft/LightGBM/blob/master/docs/Parameters.rst Can also pass objective parameters if choose (or in case automatically chosen) certain objectives https://lightgbm.readthedocs.io/en/latest/Parameters.html#metric-parameters" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "params lightgbm refers to Parameters for LightGBM to override DAI parameters e.g. ``'eval_metric'`` instead of ``'metric'`` should be used e.g. ``params_lightgbm=\"{'objective': 'binary', 'n_estimators': 100, 'max_leaves': 64, 'random_state': 1234}\"`` e.g. ``params_lightgbm=\"{'n_estimators': 600, 'learning_rate': 0.1, 'reg_alpha': 0.0, 'reg_lambda': 0.5, 'gamma': 0, 'max_depth': 0, 'max_bin': 128, 'max_leaves': 256, 'scale_pos_weight': 1.0, 'max_delta_step': 3.469919910597877, 'min_child_weight': 1, 'subsample': 0.9, 'colsample_bytree': 0.3, 'tree_method': 'gpu_hist', 'grow_policy': 'lossguide', 'min_data_in_bin': 3, 'min_child_samples': 5, 'early_stopping_rounds': 20, 'num_classes': 2, 'objective': 'binary', 'eval_metric': 'binary', 'random_state': 987654, 'early_stopping_threshold': 0.01, 'monotonicity_constraints': False, 'silent': True, 'debug_verbose': 0, 'subsample_freq': 1}\"`` avoid including \"system\"-level parameters like ``'n_gpus': 1, 'gpu_id': 0, , 'n_jobs': 1, 'booster': 'lightgbm'`` also likely should avoid parameters like: 'objective': 'binary', unless one really knows what one is doing (e.g. alternative objectives) See: https://xgboost.readthedocs.io/en/latest/parameter.html And see: https://github.com/Microsoft/LightGBM/blob/master/docs/Parameters.rst Can also pass objective parameters if choose (or in case automatically chosen) certain objectives https://lightgbm.readthedocs.io/en/latest/Parameters.html#metric-parameters" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting params_lightgbm", + "output": "params lightgbm refers to Parameters for LightGBM to override DAI parameters e.g. ``'eval_metric'`` instead of ``'metric'`` should be used e.g. ``params_lightgbm=\"{'objective': 'binary', 'n_estimators': 100, 'max_leaves': 64, 'random_state': 1234}\"`` e.g. ``params_lightgbm=\"{'n_estimators': 600, 'learning_rate': 0.1, 'reg_alpha': 0.0, 'reg_lambda': 0.5, 'gamma': 0, 'max_depth': 0, 'max_bin': 128, 'max_leaves': 256, 'scale_pos_weight': 1.0, 'max_delta_step': 3.469919910597877, 'min_child_weight': 1, 'subsample': 0.9, 'colsample_bytree': 0.3, 'tree_method': 'gpu_hist', 'grow_policy': 'lossguide', 'min_data_in_bin': 3, 'min_child_samples': 5, 'early_stopping_rounds': 20, 'num_classes': 2, 'objective': 'binary', 'eval_metric': 'binary', 'random_state': 987654, 'early_stopping_threshold': 0.01, 'monotonicity_constraints': False, 'silent': True, 'debug_verbose': 0, 'subsample_freq': 1}\"`` avoid including \"system\"-level parameters like ``'n_gpus': 1, 'gpu_id': 0, , 'n_jobs': 1, 'booster': 'lightgbm'`` also likely should avoid parameters like: 'objective': 'binary', unless one really knows what one is doing (e.g. alternative objectives) See: https://xgboost.readthedocs.io/en/latest/parameter.html And see: https://github.com/Microsoft/LightGBM/blob/master/docs/Parameters.rst Can also pass objective parameters if choose (or in case automatically chosen) certain objectives https://lightgbm.readthedocs.io/en/latest/Parameters.html#metric-parameters" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting params_lightgbm", + "output": "params lightgbm refers to Parameters for LightGBM to override DAI parameters e.g. ``'eval_metric'`` instead of ``'metric'`` should be used e.g. ``params_lightgbm=\"{'objective': 'binary', 'n_estimators': 100, 'max_leaves': 64, 'random_state': 1234}\"`` e.g. ``params_lightgbm=\"{'n_estimators': 600, 'learning_rate': 0.1, 'reg_alpha': 0.0, 'reg_lambda': 0.5, 'gamma': 0, 'max_depth': 0, 'max_bin': 128, 'max_leaves': 256, 'scale_pos_weight': 1.0, 'max_delta_step': 3.469919910597877, 'min_child_weight': 1, 'subsample': 0.9, 'colsample_bytree': 0.3, 'tree_method': 'gpu_hist', 'grow_policy': 'lossguide', 'min_data_in_bin': 3, 'min_child_samples': 5, 'early_stopping_rounds': 20, 'num_classes': 2, 'objective': 'binary', 'eval_metric': 'binary', 'random_state': 987654, 'early_stopping_threshold': 0.01, 'monotonicity_constraints': False, 'silent': True, 'debug_verbose': 0, 'subsample_freq': 1}\"`` avoid including \"system\"-level parameters like ``'n_gpus': 1, 'gpu_id': 0, , 'n_jobs': 1, 'booster': 'lightgbm'`` also likely should avoid parameters like: 'objective': 'binary', unless one really knows what one is doing (e.g. alternative objectives) See: https://xgboost.readthedocs.io/en/latest/parameter.html And see: https://github.com/Microsoft/LightGBM/blob/master/docs/Parameters.rst Can also pass objective parameters if choose (or in case automatically chosen) certain objectives https://lightgbm.readthedocs.io/en/latest/Parameters.html#metric-parameters" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "params_xgboost", + "output": "params xgboost refers to Parameters for XGBoost to override DAI parameters similar parameters as LightGBM since LightGBM parameters are transcribed from XGBoost equivalent versions e.g. ``params_xgboost=\"{'n_estimators': 100, 'max_leaves': 64, 'max_depth': 0, 'random_state': 1234}\"`` See: https://xgboost.readthedocs.io/en/latest/parameter.html" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "params_xgboost", + "output": "params xgboost refers to Parameters for XGBoost to override DAI parameters similar parameters as LightGBM since LightGBM parameters are transcribed from XGBoost equivalent versions e.g. ``params_xgboost=\"{'n_estimators': 100, 'max_leaves': 64, 'max_depth': 0, 'random_state': 1234}\"`` See: https://xgboost.readthedocs.io/en/latest/parameter.html" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "params xgboost", + "output": "params xgboost refers to Parameters for XGBoost to override DAI parameters similar parameters as LightGBM since LightGBM parameters are transcribed from XGBoost equivalent versions e.g. ``params_xgboost=\"{'n_estimators': 100, 'max_leaves': 64, 'max_depth': 0, 'random_state': 1234}\"`` See: https://xgboost.readthedocs.io/en/latest/parameter.html" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "params xgboost refers to Parameters for XGBoost to override DAI parameters similar parameters as LightGBM since LightGBM parameters are transcribed from XGBoost equivalent versions e.g. ``params_xgboost=\"{'n_estimators': 100, 'max_leaves': 64, 'max_depth': 0, 'random_state': 1234}\"`` See: https://xgboost.readthedocs.io/en/latest/parameter.html" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting params_xgboost", + "output": "params xgboost refers to Parameters for XGBoost to override DAI parameters similar parameters as LightGBM since LightGBM parameters are transcribed from XGBoost equivalent versions e.g. ``params_xgboost=\"{'n_estimators': 100, 'max_leaves': 64, 'max_depth': 0, 'random_state': 1234}\"`` See: https://xgboost.readthedocs.io/en/latest/parameter.html" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting params_xgboost", + "output": "params xgboost refers to Parameters for XGBoost to override DAI parameters similar parameters as LightGBM since LightGBM parameters are transcribed from XGBoost equivalent versions e.g. ``params_xgboost=\"{'n_estimators': 100, 'max_leaves': 64, 'max_depth': 0, 'random_state': 1234}\"`` See: https://xgboost.readthedocs.io/en/latest/parameter.html" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "params_xgboost_rf", + "output": "params xgboost rf refers to Like params_xgboost but for XGBoost random forest." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "params_xgboost_rf", + "output": "params xgboost rf refers to Like params_xgboost but for XGBoost random forest." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "params xgboost rf", + "output": "params xgboost rf refers to Like params_xgboost but for XGBoost random forest." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "params xgboost rf refers to Like params_xgboost but for XGBoost random forest." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting params_xgboost_rf", + "output": "params xgboost rf refers to Like params_xgboost but for XGBoost random forest." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting params_xgboost_rf", + "output": "params xgboost rf refers to Like params_xgboost but for XGBoost random forest." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "params_dart", + "output": "params dart refers to Like params_xgboost but for XGBoost's dart method" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "params_dart", + "output": "params dart refers to Like params_xgboost but for XGBoost's dart method" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "params dart", + "output": "params dart refers to Like params_xgboost but for XGBoost's dart method" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "params dart refers to Like params_xgboost but for XGBoost's dart method" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting params_dart", + "output": "params dart refers to Like params_xgboost but for XGBoost's dart method" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting params_dart", + "output": "params dart refers to Like params_xgboost but for XGBoost's dart method" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "params_tensorflow", + "output": "params tensorflow refers to Parameters for TensorFlow to override DAI parameterse.g. ``params_tensorflow=\"{'lr': 0.01, 'add_wide': False, 'add_attention': True, 'epochs': 30, 'layers': (100, 100), 'activation': 'selu', 'batch_size': 64, 'chunk_size': 1000, 'dropout': 0.3, 'strategy': '1cycle', 'l1': 0.0, 'l2': 0.0, 'ort_loss': 0.5, 'ort_loss_tau': 0.01, 'normalize_type': 'streaming'}\"``See: https://keras.io/ , e.g. for activations: https://keras.io/activations/Example layers: ``(500, 500, 500), (100, 100, 100), (100, 100), (50, 50)``Strategies: ``'1cycle'`` or ``'one_shot'``, See: https://github.com/fastai/fastai'one_shot\" is not allowed for ensembles.normalize_type: 'streaming' or 'global' (using sklearn StandardScaler) " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "params_tensorflow", + "output": "params tensorflow refers to Parameters for TensorFlow: Parameters for TensorFlow to override DAI parameterse.g. ``params_tensorflow=\"{'lr': 0.01, 'add_wide': False, 'add_attention': True, 'epochs': 30, 'layers': (100, 100), 'activation': 'selu', 'batch_size': 64, 'chunk_size': 1000, 'dropout': 0.3, 'strategy': '1cycle', 'l1': 0.0, 'l2': 0.0, 'ort_loss': 0.5, 'ort_loss_tau': 0.01, 'normalize_type': 'streaming'}\"``See: https://keras.io/ , e.g. for activations: https://keras.io/activations/Example layers: ``(500, 500, 500), (100, 100, 100), (100, 100), (50, 50)``Strategies: ``'1cycle'`` or ``'one_shot'``, See: https://github.com/fastai/fastai'one_shot\" is not allowed for ensembles.normalize_type: 'streaming' or 'global' (using sklearn StandardScaler) " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "params tensorflow", + "output": "params tensorflow refers to Parameters for TensorFlow: Parameters for TensorFlow to override DAI parameterse.g. ``params_tensorflow=\"{'lr': 0.01, 'add_wide': False, 'add_attention': True, 'epochs': 30, 'layers': (100, 100), 'activation': 'selu', 'batch_size': 64, 'chunk_size': 1000, 'dropout': 0.3, 'strategy': '1cycle', 'l1': 0.0, 'l2': 0.0, 'ort_loss': 0.5, 'ort_loss_tau': 0.01, 'normalize_type': 'streaming'}\"``See: https://keras.io/ , e.g. for activations: https://keras.io/activations/Example layers: ``(500, 500, 500), (100, 100, 100), (100, 100), (50, 50)``Strategies: ``'1cycle'`` or ``'one_shot'``, See: https://github.com/fastai/fastai'one_shot\" is not allowed for ensembles.normalize_type: 'streaming' or 'global' (using sklearn StandardScaler) " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Parameters for TensorFlow: ", + "output": "params tensorflow refers to Parameters for TensorFlow: Parameters for TensorFlow to override DAI parameterse.g. ``params_tensorflow=\"{'lr': 0.01, 'add_wide': False, 'add_attention': True, 'epochs': 30, 'layers': (100, 100), 'activation': 'selu', 'batch_size': 64, 'chunk_size': 1000, 'dropout': 0.3, 'strategy': '1cycle', 'l1': 0.0, 'l2': 0.0, 'ort_loss': 0.5, 'ort_loss_tau': 0.01, 'normalize_type': 'streaming'}\"``See: https://keras.io/ , e.g. for activations: https://keras.io/activations/Example layers: ``(500, 500, 500), (100, 100, 100), (100, 100), (50, 50)``Strategies: ``'1cycle'`` or ``'one_shot'``, See: https://github.com/fastai/fastai'one_shot\" is not allowed for ensembles.normalize_type: 'streaming' or 'global' (using sklearn StandardScaler) " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting params_tensorflow", + "output": "params tensorflow refers to Parameters for TensorFlow to override DAI parameterse.g. ``params_tensorflow=\"{'lr': 0.01, 'add_wide': False, 'add_attention': True, 'epochs': 30, 'layers': (100, 100), 'activation': 'selu', 'batch_size': 64, 'chunk_size': 1000, 'dropout': 0.3, 'strategy': '1cycle', 'l1': 0.0, 'l2': 0.0, 'ort_loss': 0.5, 'ort_loss_tau': 0.01, 'normalize_type': 'streaming'}\"``See: https://keras.io/ , e.g. for activations: https://keras.io/activations/Example layers: ``(500, 500, 500), (100, 100, 100), (100, 100), (50, 50)``Strategies: ``'1cycle'`` or ``'one_shot'``, See: https://github.com/fastai/fastai'one_shot\" is not allowed for ensembles.normalize_type: 'streaming' or 'global' (using sklearn StandardScaler) " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting params_tensorflow", + "output": "params tensorflow refers to Parameters for TensorFlow: Parameters for TensorFlow to override DAI parameterse.g. ``params_tensorflow=\"{'lr': 0.01, 'add_wide': False, 'add_attention': True, 'epochs': 30, 'layers': (100, 100), 'activation': 'selu', 'batch_size': 64, 'chunk_size': 1000, 'dropout': 0.3, 'strategy': '1cycle', 'l1': 0.0, 'l2': 0.0, 'ort_loss': 0.5, 'ort_loss_tau': 0.01, 'normalize_type': 'streaming'}\"``See: https://keras.io/ , e.g. for activations: https://keras.io/activations/Example layers: ``(500, 500, 500), (100, 100, 100), (100, 100), (50, 50)``Strategies: ``'1cycle'`` or ``'one_shot'``, See: https://github.com/fastai/fastai'one_shot\" is not allowed for ensembles.normalize_type: 'streaming' or 'global' (using sklearn StandardScaler) " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "params_gblinear", + "output": "params gblinear refers to Parameters for XGBoost's gblinear to override DAI parameters e.g. ``params_gblinear=\"{'n_estimators': 100}\"`` See: https://xgboost.readthedocs.io/en/latest/parameter.html" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "params_gblinear", + "output": "params gblinear refers to Parameters for XGBoost's gblinear to override DAI parameters e.g. ``params_gblinear=\"{'n_estimators': 100}\"`` See: https://xgboost.readthedocs.io/en/latest/parameter.html" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "params gblinear", + "output": "params gblinear refers to Parameters for XGBoost's gblinear to override DAI parameters e.g. ``params_gblinear=\"{'n_estimators': 100}\"`` See: https://xgboost.readthedocs.io/en/latest/parameter.html" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "params gblinear refers to Parameters for XGBoost's gblinear to override DAI parameters e.g. ``params_gblinear=\"{'n_estimators': 100}\"`` See: https://xgboost.readthedocs.io/en/latest/parameter.html" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting params_gblinear", + "output": "params gblinear refers to Parameters for XGBoost's gblinear to override DAI parameters e.g. ``params_gblinear=\"{'n_estimators': 100}\"`` See: https://xgboost.readthedocs.io/en/latest/parameter.html" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting params_gblinear", + "output": "params gblinear refers to Parameters for XGBoost's gblinear to override DAI parameters e.g. ``params_gblinear=\"{'n_estimators': 100}\"`` See: https://xgboost.readthedocs.io/en/latest/parameter.html" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "params_decision_tree", + "output": "params decision tree refers to Parameters for Decision Tree to override DAI parameters parameters should be given as XGBoost equivalent unless unique LightGBM parameter e.g. ``'eval_metric'`` instead of ``'metric'`` should be used e.g. ``params_decision_tree=\"{'objective': 'binary', 'n_estimators': 100, 'max_leaves': 64, 'random_state': 1234}\"`` e.g. ``params_decision_tree=\"{'n_estimators': 1, 'learning_rate': 1, 'reg_alpha': 0.0, 'reg_lambda': 0.5, 'gamma': 0, 'max_depth': 0, 'max_bin': 128, 'max_leaves': 256, 'scale_pos_weight': 1.0, 'max_delta_step': 3.469919910597877, 'min_child_weight': 1, 'subsample': 0.9, 'colsample_bytree': 0.3, 'tree_method': 'gpu_hist', 'grow_policy': 'lossguide', 'min_data_in_bin': 3, 'min_child_samples': 5, 'early_stopping_rounds': 20, 'num_classes': 2, 'objective': 'binary', 'eval_metric': 'logloss', 'random_state': 987654, 'early_stopping_threshold': 0.01, 'monotonicity_constraints': False, 'silent': True, 'debug_verbose': 0, 'subsample_freq': 1}\"`` avoid including \"system\"-level parameters like ``'n_gpus': 1, 'gpu_id': 0, , 'n_jobs': 1, 'booster': 'lightgbm'`` also likely should avoid parameters like: ``'objective': 'binary:logistic'``, unless one really knows what one is doing (e.g. alternative objectives) See: https://xgboost.readthedocs.io/en/latest/parameter.html And see: https://github.com/Microsoft/LightGBM/blob/master/docs/Parameters.rst Can also pass objective parameters if choose (or in case automatically chosen) certain objectives https://lightgbm.readthedocs.io/en/latest/Parameters.html#metric-parameters" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "params_decision_tree", + "output": "params decision tree refers to Parameters for Decision Tree to override DAI parameters parameters should be given as XGBoost equivalent unless unique LightGBM parameter e.g. ``'eval_metric'`` instead of ``'metric'`` should be used e.g. ``params_decision_tree=\"{'objective': 'binary', 'n_estimators': 100, 'max_leaves': 64, 'random_state': 1234}\"`` e.g. ``params_decision_tree=\"{'n_estimators': 1, 'learning_rate': 1, 'reg_alpha': 0.0, 'reg_lambda': 0.5, 'gamma': 0, 'max_depth': 0, 'max_bin': 128, 'max_leaves': 256, 'scale_pos_weight': 1.0, 'max_delta_step': 3.469919910597877, 'min_child_weight': 1, 'subsample': 0.9, 'colsample_bytree': 0.3, 'tree_method': 'gpu_hist', 'grow_policy': 'lossguide', 'min_data_in_bin': 3, 'min_child_samples': 5, 'early_stopping_rounds': 20, 'num_classes': 2, 'objective': 'binary', 'eval_metric': 'logloss', 'random_state': 987654, 'early_stopping_threshold': 0.01, 'monotonicity_constraints': False, 'silent': True, 'debug_verbose': 0, 'subsample_freq': 1}\"`` avoid including \"system\"-level parameters like ``'n_gpus': 1, 'gpu_id': 0, , 'n_jobs': 1, 'booster': 'lightgbm'`` also likely should avoid parameters like: ``'objective': 'binary:logistic'``, unless one really knows what one is doing (e.g. alternative objectives) See: https://xgboost.readthedocs.io/en/latest/parameter.html And see: https://github.com/Microsoft/LightGBM/blob/master/docs/Parameters.rst Can also pass objective parameters if choose (or in case automatically chosen) certain objectives https://lightgbm.readthedocs.io/en/latest/Parameters.html#metric-parameters" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "params decision tree", + "output": "params decision tree refers to Parameters for Decision Tree to override DAI parameters parameters should be given as XGBoost equivalent unless unique LightGBM parameter e.g. ``'eval_metric'`` instead of ``'metric'`` should be used e.g. ``params_decision_tree=\"{'objective': 'binary', 'n_estimators': 100, 'max_leaves': 64, 'random_state': 1234}\"`` e.g. ``params_decision_tree=\"{'n_estimators': 1, 'learning_rate': 1, 'reg_alpha': 0.0, 'reg_lambda': 0.5, 'gamma': 0, 'max_depth': 0, 'max_bin': 128, 'max_leaves': 256, 'scale_pos_weight': 1.0, 'max_delta_step': 3.469919910597877, 'min_child_weight': 1, 'subsample': 0.9, 'colsample_bytree': 0.3, 'tree_method': 'gpu_hist', 'grow_policy': 'lossguide', 'min_data_in_bin': 3, 'min_child_samples': 5, 'early_stopping_rounds': 20, 'num_classes': 2, 'objective': 'binary', 'eval_metric': 'logloss', 'random_state': 987654, 'early_stopping_threshold': 0.01, 'monotonicity_constraints': False, 'silent': True, 'debug_verbose': 0, 'subsample_freq': 1}\"`` avoid including \"system\"-level parameters like ``'n_gpus': 1, 'gpu_id': 0, , 'n_jobs': 1, 'booster': 'lightgbm'`` also likely should avoid parameters like: ``'objective': 'binary:logistic'``, unless one really knows what one is doing (e.g. alternative objectives) See: https://xgboost.readthedocs.io/en/latest/parameter.html And see: https://github.com/Microsoft/LightGBM/blob/master/docs/Parameters.rst Can also pass objective parameters if choose (or in case automatically chosen) certain objectives https://lightgbm.readthedocs.io/en/latest/Parameters.html#metric-parameters" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "params decision tree refers to Parameters for Decision Tree to override DAI parameters parameters should be given as XGBoost equivalent unless unique LightGBM parameter e.g. ``'eval_metric'`` instead of ``'metric'`` should be used e.g. ``params_decision_tree=\"{'objective': 'binary', 'n_estimators': 100, 'max_leaves': 64, 'random_state': 1234}\"`` e.g. ``params_decision_tree=\"{'n_estimators': 1, 'learning_rate': 1, 'reg_alpha': 0.0, 'reg_lambda': 0.5, 'gamma': 0, 'max_depth': 0, 'max_bin': 128, 'max_leaves': 256, 'scale_pos_weight': 1.0, 'max_delta_step': 3.469919910597877, 'min_child_weight': 1, 'subsample': 0.9, 'colsample_bytree': 0.3, 'tree_method': 'gpu_hist', 'grow_policy': 'lossguide', 'min_data_in_bin': 3, 'min_child_samples': 5, 'early_stopping_rounds': 20, 'num_classes': 2, 'objective': 'binary', 'eval_metric': 'logloss', 'random_state': 987654, 'early_stopping_threshold': 0.01, 'monotonicity_constraints': False, 'silent': True, 'debug_verbose': 0, 'subsample_freq': 1}\"`` avoid including \"system\"-level parameters like ``'n_gpus': 1, 'gpu_id': 0, , 'n_jobs': 1, 'booster': 'lightgbm'`` also likely should avoid parameters like: ``'objective': 'binary:logistic'``, unless one really knows what one is doing (e.g. alternative objectives) See: https://xgboost.readthedocs.io/en/latest/parameter.html And see: https://github.com/Microsoft/LightGBM/blob/master/docs/Parameters.rst Can also pass objective parameters if choose (or in case automatically chosen) certain objectives https://lightgbm.readthedocs.io/en/latest/Parameters.html#metric-parameters" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting params_decision_tree", + "output": "params decision tree refers to Parameters for Decision Tree to override DAI parameters parameters should be given as XGBoost equivalent unless unique LightGBM parameter e.g. ``'eval_metric'`` instead of ``'metric'`` should be used e.g. ``params_decision_tree=\"{'objective': 'binary', 'n_estimators': 100, 'max_leaves': 64, 'random_state': 1234}\"`` e.g. ``params_decision_tree=\"{'n_estimators': 1, 'learning_rate': 1, 'reg_alpha': 0.0, 'reg_lambda': 0.5, 'gamma': 0, 'max_depth': 0, 'max_bin': 128, 'max_leaves': 256, 'scale_pos_weight': 1.0, 'max_delta_step': 3.469919910597877, 'min_child_weight': 1, 'subsample': 0.9, 'colsample_bytree': 0.3, 'tree_method': 'gpu_hist', 'grow_policy': 'lossguide', 'min_data_in_bin': 3, 'min_child_samples': 5, 'early_stopping_rounds': 20, 'num_classes': 2, 'objective': 'binary', 'eval_metric': 'logloss', 'random_state': 987654, 'early_stopping_threshold': 0.01, 'monotonicity_constraints': False, 'silent': True, 'debug_verbose': 0, 'subsample_freq': 1}\"`` avoid including \"system\"-level parameters like ``'n_gpus': 1, 'gpu_id': 0, , 'n_jobs': 1, 'booster': 'lightgbm'`` also likely should avoid parameters like: ``'objective': 'binary:logistic'``, unless one really knows what one is doing (e.g. alternative objectives) See: https://xgboost.readthedocs.io/en/latest/parameter.html And see: https://github.com/Microsoft/LightGBM/blob/master/docs/Parameters.rst Can also pass objective parameters if choose (or in case automatically chosen) certain objectives https://lightgbm.readthedocs.io/en/latest/Parameters.html#metric-parameters" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting params_decision_tree", + "output": "params decision tree refers to Parameters for Decision Tree to override DAI parameters parameters should be given as XGBoost equivalent unless unique LightGBM parameter e.g. ``'eval_metric'`` instead of ``'metric'`` should be used e.g. ``params_decision_tree=\"{'objective': 'binary', 'n_estimators': 100, 'max_leaves': 64, 'random_state': 1234}\"`` e.g. ``params_decision_tree=\"{'n_estimators': 1, 'learning_rate': 1, 'reg_alpha': 0.0, 'reg_lambda': 0.5, 'gamma': 0, 'max_depth': 0, 'max_bin': 128, 'max_leaves': 256, 'scale_pos_weight': 1.0, 'max_delta_step': 3.469919910597877, 'min_child_weight': 1, 'subsample': 0.9, 'colsample_bytree': 0.3, 'tree_method': 'gpu_hist', 'grow_policy': 'lossguide', 'min_data_in_bin': 3, 'min_child_samples': 5, 'early_stopping_rounds': 20, 'num_classes': 2, 'objective': 'binary', 'eval_metric': 'logloss', 'random_state': 987654, 'early_stopping_threshold': 0.01, 'monotonicity_constraints': False, 'silent': True, 'debug_verbose': 0, 'subsample_freq': 1}\"`` avoid including \"system\"-level parameters like ``'n_gpus': 1, 'gpu_id': 0, , 'n_jobs': 1, 'booster': 'lightgbm'`` also likely should avoid parameters like: ``'objective': 'binary:logistic'``, unless one really knows what one is doing (e.g. alternative objectives) See: https://xgboost.readthedocs.io/en/latest/parameter.html And see: https://github.com/Microsoft/LightGBM/blob/master/docs/Parameters.rst Can also pass objective parameters if choose (or in case automatically chosen) certain objectives https://lightgbm.readthedocs.io/en/latest/Parameters.html#metric-parameters" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "params_rulefit", + "output": "params rulefit refers to Parameters for Rulefit to override DAI parameters e.g. ``params_rulefit=\"{'max_leaves': 64}\"`` See: https://xgboost.readthedocs.io/en/latest/parameter.html" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "params_rulefit", + "output": "params rulefit refers to Parameters for Rulefit to override DAI parameters e.g. ``params_rulefit=\"{'max_leaves': 64}\"`` See: https://xgboost.readthedocs.io/en/latest/parameter.html" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "params rulefit", + "output": "params rulefit refers to Parameters for Rulefit to override DAI parameters e.g. ``params_rulefit=\"{'max_leaves': 64}\"`` See: https://xgboost.readthedocs.io/en/latest/parameter.html" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "params rulefit refers to Parameters for Rulefit to override DAI parameters e.g. ``params_rulefit=\"{'max_leaves': 64}\"`` See: https://xgboost.readthedocs.io/en/latest/parameter.html" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting params_rulefit", + "output": "params rulefit refers to Parameters for Rulefit to override DAI parameters e.g. ``params_rulefit=\"{'max_leaves': 64}\"`` See: https://xgboost.readthedocs.io/en/latest/parameter.html" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting params_rulefit", + "output": "params rulefit refers to Parameters for Rulefit to override DAI parameters e.g. ``params_rulefit=\"{'max_leaves': 64}\"`` See: https://xgboost.readthedocs.io/en/latest/parameter.html" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "params_ftrl", + "output": "params ftrl refers to Parameters for FTRL to override DAI parameters" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "params_ftrl", + "output": "params ftrl refers to Parameters for FTRL to override DAI parameters" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "params ftrl", + "output": "params ftrl refers to Parameters for FTRL to override DAI parameters" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "params ftrl refers to Parameters for FTRL to override DAI parameters" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting params_ftrl", + "output": "params ftrl refers to Parameters for FTRL to override DAI parameters" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting params_ftrl", + "output": "params ftrl refers to Parameters for FTRL to override DAI parameters" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "params_grownet", + "output": "params grownet refers to Parameters for GrowNet to override DAI parameters" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "params_grownet", + "output": "params grownet refers to Parameters for GrowNet to override DAI parameters" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "params grownet", + "output": "params grownet refers to Parameters for GrowNet to override DAI parameters" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "params grownet refers to Parameters for GrowNet to override DAI parameters" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting params_grownet", + "output": "params grownet refers to Parameters for GrowNet to override DAI parameters" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting params_grownet", + "output": "params grownet refers to Parameters for GrowNet to override DAI parameters" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "params_tune_mode", + "output": "params tune mode refers to How to handle tomls like params_tune_lightgbm.override: For any key in the params_tune_ toml dict, use the list of values instead of DAI's list of values.override_and_first_as_default: like override, but also use first entry in tuple/list (if present) as override as replacement for (e.g.) params_lightgbm when using params_tune_lightgbm.exclusive: Only tune the keys in the params_tune_ toml dict, unless no keys are present. Otherwise use DAI's default values.exclusive_and_first_as_default: Like exclusive but same first as default behavior as override_and_first_as_default.In order to fully control hyperparameter tuning, either one should set \"override\" mode and include every hyperparameter and at least one value in each list within the dictionary, or choose \"exclusive\" and then rely upon DAI unchanging default values for any keys not given.For custom recipes, one can use recipe_dict to pass hyperparameters and if using the \"get_one()\" function in a custom recipe, and if user_tune passed contains the hyperparameter dictionary equivalent of params_tune_ tomls, then this params_tune_mode will also work for custom recipes." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "params_tune_mode", + "output": "params tune mode refers to Mode to handle params_tune_ tomls: How to handle tomls like params_tune_lightgbm.override: For any key in the params_tune_ toml dict, use the list of values instead of DAI's list of values.override_and_first_as_default: like override, but also use first entry in tuple/list (if present) as override as replacement for (e.g.) params_lightgbm when using params_tune_lightgbm.exclusive: Only tune the keys in the params_tune_ toml dict, unless no keys are present. Otherwise use DAI's default values.exclusive_and_first_as_default: Like exclusive but same first as default behavior as override_and_first_as_default.In order to fully control hyperparameter tuning, either one should set \"override\" mode and include every hyperparameter and at least one value in each list within the dictionary, or choose \"exclusive\" and then rely upon DAI unchanging default values for any keys not given.For custom recipes, one can use recipe_dict to pass hyperparameters and if using the \"get_one()\" function in a custom recipe, and if user_tune passed contains the hyperparameter dictionary equivalent of params_tune_ tomls, then this params_tune_mode will also work for custom recipes." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "params tune mode", + "output": "params tune mode refers to Mode to handle params_tune_ tomls: How to handle tomls like params_tune_lightgbm.override: For any key in the params_tune_ toml dict, use the list of values instead of DAI's list of values.override_and_first_as_default: like override, but also use first entry in tuple/list (if present) as override as replacement for (e.g.) params_lightgbm when using params_tune_lightgbm.exclusive: Only tune the keys in the params_tune_ toml dict, unless no keys are present. Otherwise use DAI's default values.exclusive_and_first_as_default: Like exclusive but same first as default behavior as override_and_first_as_default.In order to fully control hyperparameter tuning, either one should set \"override\" mode and include every hyperparameter and at least one value in each list within the dictionary, or choose \"exclusive\" and then rely upon DAI unchanging default values for any keys not given.For custom recipes, one can use recipe_dict to pass hyperparameters and if using the \"get_one()\" function in a custom recipe, and if user_tune passed contains the hyperparameter dictionary equivalent of params_tune_ tomls, then this params_tune_mode will also work for custom recipes." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Mode to handle params_tune_ tomls: ", + "output": "params tune mode refers to Mode to handle params_tune_ tomls: How to handle tomls like params_tune_lightgbm.override: For any key in the params_tune_ toml dict, use the list of values instead of DAI's list of values.override_and_first_as_default: like override, but also use first entry in tuple/list (if present) as override as replacement for (e.g.) params_lightgbm when using params_tune_lightgbm.exclusive: Only tune the keys in the params_tune_ toml dict, unless no keys are present. Otherwise use DAI's default values.exclusive_and_first_as_default: Like exclusive but same first as default behavior as override_and_first_as_default.In order to fully control hyperparameter tuning, either one should set \"override\" mode and include every hyperparameter and at least one value in each list within the dictionary, or choose \"exclusive\" and then rely upon DAI unchanging default values for any keys not given.For custom recipes, one can use recipe_dict to pass hyperparameters and if using the \"get_one()\" function in a custom recipe, and if user_tune passed contains the hyperparameter dictionary equivalent of params_tune_ tomls, then this params_tune_mode will also work for custom recipes." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting params_tune_mode", + "output": "params tune mode refers to How to handle tomls like params_tune_lightgbm.override: For any key in the params_tune_ toml dict, use the list of values instead of DAI's list of values.override_and_first_as_default: like override, but also use first entry in tuple/list (if present) as override as replacement for (e.g.) params_lightgbm when using params_tune_lightgbm.exclusive: Only tune the keys in the params_tune_ toml dict, unless no keys are present. Otherwise use DAI's default values.exclusive_and_first_as_default: Like exclusive but same first as default behavior as override_and_first_as_default.In order to fully control hyperparameter tuning, either one should set \"override\" mode and include every hyperparameter and at least one value in each list within the dictionary, or choose \"exclusive\" and then rely upon DAI unchanging default values for any keys not given.For custom recipes, one can use recipe_dict to pass hyperparameters and if using the \"get_one()\" function in a custom recipe, and if user_tune passed contains the hyperparameter dictionary equivalent of params_tune_ tomls, then this params_tune_mode will also work for custom recipes." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting params_tune_mode", + "output": "params tune mode refers to Mode to handle params_tune_ tomls: How to handle tomls like params_tune_lightgbm.override: For any key in the params_tune_ toml dict, use the list of values instead of DAI's list of values.override_and_first_as_default: like override, but also use first entry in tuple/list (if present) as override as replacement for (e.g.) params_lightgbm when using params_tune_lightgbm.exclusive: Only tune the keys in the params_tune_ toml dict, unless no keys are present. Otherwise use DAI's default values.exclusive_and_first_as_default: Like exclusive but same first as default behavior as override_and_first_as_default.In order to fully control hyperparameter tuning, either one should set \"override\" mode and include every hyperparameter and at least one value in each list within the dictionary, or choose \"exclusive\" and then rely upon DAI unchanging default values for any keys not given.For custom recipes, one can use recipe_dict to pass hyperparameters and if using the \"get_one()\" function in a custom recipe, and if user_tune passed contains the hyperparameter dictionary equivalent of params_tune_ tomls, then this params_tune_mode will also work for custom recipes." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "params_final_auto_adjust", + "output": "params final auto adjust refers to Whether to adjust GBM trees, learning rate, and early_stopping_rounds for GBM models or recipes with _is_gbm=True.True: auto mode, that changes trees/LR/stopping if tune_learning_rate=false and early stopping is supported by the model and model is GBM or from custom individual with parameter in adjusted_params.False: disable any adjusting from tuning-evolution into final model.Setting this to false is required if (e.g.) one changes params_lightgbm or params_tune_lightgbm and wanted to preserve the tuning-evolution values into the final model.One should also set tune_learning_rate to true to tune the learning_rate, else it will be fixed to some single value." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "params_final_auto_adjust", + "output": "params final auto adjust refers to Adjust trees/LR: Whether to adjust GBM trees, learning rate, and early_stopping_rounds for GBM models or recipes with _is_gbm=True.True: auto mode, that changes trees/LR/stopping if tune_learning_rate=false and early stopping is supported by the model and model is GBM or from custom individual with parameter in adjusted_params.False: disable any adjusting from tuning-evolution into final model.Setting this to false is required if (e.g.) one changes params_lightgbm or params_tune_lightgbm and wanted to preserve the tuning-evolution values into the final model.One should also set tune_learning_rate to true to tune the learning_rate, else it will be fixed to some single value." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "params final auto adjust", + "output": "params final auto adjust refers to Adjust trees/LR: Whether to adjust GBM trees, learning rate, and early_stopping_rounds for GBM models or recipes with _is_gbm=True.True: auto mode, that changes trees/LR/stopping if tune_learning_rate=false and early stopping is supported by the model and model is GBM or from custom individual with parameter in adjusted_params.False: disable any adjusting from tuning-evolution into final model.Setting this to false is required if (e.g.) one changes params_lightgbm or params_tune_lightgbm and wanted to preserve the tuning-evolution values into the final model.One should also set tune_learning_rate to true to tune the learning_rate, else it will be fixed to some single value." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Adjust trees/LR: ", + "output": "params final auto adjust refers to Adjust trees/LR: Whether to adjust GBM trees, learning rate, and early_stopping_rounds for GBM models or recipes with _is_gbm=True.True: auto mode, that changes trees/LR/stopping if tune_learning_rate=false and early stopping is supported by the model and model is GBM or from custom individual with parameter in adjusted_params.False: disable any adjusting from tuning-evolution into final model.Setting this to false is required if (e.g.) one changes params_lightgbm or params_tune_lightgbm and wanted to preserve the tuning-evolution values into the final model.One should also set tune_learning_rate to true to tune the learning_rate, else it will be fixed to some single value." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting params_final_auto_adjust", + "output": "params final auto adjust refers to Whether to adjust GBM trees, learning rate, and early_stopping_rounds for GBM models or recipes with _is_gbm=True.True: auto mode, that changes trees/LR/stopping if tune_learning_rate=false and early stopping is supported by the model and model is GBM or from custom individual with parameter in adjusted_params.False: disable any adjusting from tuning-evolution into final model.Setting this to false is required if (e.g.) one changes params_lightgbm or params_tune_lightgbm and wanted to preserve the tuning-evolution values into the final model.One should also set tune_learning_rate to true to tune the learning_rate, else it will be fixed to some single value." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting params_final_auto_adjust", + "output": "params final auto adjust refers to Adjust trees/LR: Whether to adjust GBM trees, learning rate, and early_stopping_rounds for GBM models or recipes with _is_gbm=True.True: auto mode, that changes trees/LR/stopping if tune_learning_rate=false and early stopping is supported by the model and model is GBM or from custom individual with parameter in adjusted_params.False: disable any adjusting from tuning-evolution into final model.Setting this to false is required if (e.g.) one changes params_lightgbm or params_tune_lightgbm and wanted to preserve the tuning-evolution values into the final model.One should also set tune_learning_rate to true to tune the learning_rate, else it will be fixed to some single value." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "params_tune_lightgbm", + "output": "params tune lightgbm refers to Dictionary of key:lists of values to use for LightGBM tuning, overrides DAI's choice per key e.g. ``params_tune_lightgbm=\"{'min_child_samples': [1,2,5,100,1000], 'min_data_in_bin': [1,2,3,10,100,1000]}\"`` " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "params_tune_lightgbm", + "output": "params tune lightgbm refers to Dictionary of key:lists of values to use for LightGBM tuning, overrides DAI's choice per key e.g. ``params_tune_lightgbm=\"{'min_child_samples': [1,2,5,100,1000], 'min_data_in_bin': [1,2,3,10,100,1000]}\"`` " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "params tune lightgbm", + "output": "params tune lightgbm refers to Dictionary of key:lists of values to use for LightGBM tuning, overrides DAI's choice per key e.g. ``params_tune_lightgbm=\"{'min_child_samples': [1,2,5,100,1000], 'min_data_in_bin': [1,2,3,10,100,1000]}\"`` " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "params tune lightgbm refers to Dictionary of key:lists of values to use for LightGBM tuning, overrides DAI's choice per key e.g. ``params_tune_lightgbm=\"{'min_child_samples': [1,2,5,100,1000], 'min_data_in_bin': [1,2,3,10,100,1000]}\"`` " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting params_tune_lightgbm", + "output": "params tune lightgbm refers to Dictionary of key:lists of values to use for LightGBM tuning, overrides DAI's choice per key e.g. ``params_tune_lightgbm=\"{'min_child_samples': [1,2,5,100,1000], 'min_data_in_bin': [1,2,3,10,100,1000]}\"`` " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting params_tune_lightgbm", + "output": "params tune lightgbm refers to Dictionary of key:lists of values to use for LightGBM tuning, overrides DAI's choice per key e.g. ``params_tune_lightgbm=\"{'min_child_samples': [1,2,5,100,1000], 'min_data_in_bin': [1,2,3,10,100,1000]}\"`` " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "params_tune_xgboost", + "output": "params tune xgboost refers to Like params_tune_lightgbm but for XGBoost e.g. ``params_tune_xgboost=\"{'max_leaves': [8, 16, 32, 64]}\"`` " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "params_tune_xgboost", + "output": "params tune xgboost refers to Like params_tune_lightgbm but for XGBoost e.g. ``params_tune_xgboost=\"{'max_leaves': [8, 16, 32, 64]}\"`` " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "params tune xgboost", + "output": "params tune xgboost refers to Like params_tune_lightgbm but for XGBoost e.g. ``params_tune_xgboost=\"{'max_leaves': [8, 16, 32, 64]}\"`` " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "params tune xgboost refers to Like params_tune_lightgbm but for XGBoost e.g. ``params_tune_xgboost=\"{'max_leaves': [8, 16, 32, 64]}\"`` " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting params_tune_xgboost", + "output": "params tune xgboost refers to Like params_tune_lightgbm but for XGBoost e.g. ``params_tune_xgboost=\"{'max_leaves': [8, 16, 32, 64]}\"`` " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting params_tune_xgboost", + "output": "params tune xgboost refers to Like params_tune_lightgbm but for XGBoost e.g. ``params_tune_xgboost=\"{'max_leaves': [8, 16, 32, 64]}\"`` " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "params_tune_xgboost_rf", + "output": "params tune xgboost rf refers to Like params_tune_lightgbm but for XGBoost random forest e.g. ``params_tune_xgboost_rf=\"{'max_leaves': [8, 16, 32, 64]}\"`` " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "params_tune_xgboost_rf", + "output": "params tune xgboost rf refers to Like params_tune_lightgbm but for XGBoost random forest e.g. ``params_tune_xgboost_rf=\"{'max_leaves': [8, 16, 32, 64]}\"`` " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "params tune xgboost rf", + "output": "params tune xgboost rf refers to Like params_tune_lightgbm but for XGBoost random forest e.g. ``params_tune_xgboost_rf=\"{'max_leaves': [8, 16, 32, 64]}\"`` " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "params tune xgboost rf refers to Like params_tune_lightgbm but for XGBoost random forest e.g. ``params_tune_xgboost_rf=\"{'max_leaves': [8, 16, 32, 64]}\"`` " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting params_tune_xgboost_rf", + "output": "params tune xgboost rf refers to Like params_tune_lightgbm but for XGBoost random forest e.g. ``params_tune_xgboost_rf=\"{'max_leaves': [8, 16, 32, 64]}\"`` " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting params_tune_xgboost_rf", + "output": "params tune xgboost rf refers to Like params_tune_lightgbm but for XGBoost random forest e.g. ``params_tune_xgboost_rf=\"{'max_leaves': [8, 16, 32, 64]}\"`` " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "params_tune_decision_tree", + "output": "params tune decision tree refers to Dictionary of key:lists of values to use for LightGBM Decision Tree tuning, overrides DAI's choice per key e.g. ``params_tune_decision_tree=\"{'min_child_samples': [1,2,5,100,1000], 'min_data_in_bin': [1,2,3,10,100,1000]}\"`` " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "params_tune_decision_tree", + "output": "params tune decision tree refers to Dictionary of key:lists of values to use for LightGBM Decision Tree tuning, overrides DAI's choice per key e.g. ``params_tune_decision_tree=\"{'min_child_samples': [1,2,5,100,1000], 'min_data_in_bin': [1,2,3,10,100,1000]}\"`` " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "params tune decision tree", + "output": "params tune decision tree refers to Dictionary of key:lists of values to use for LightGBM Decision Tree tuning, overrides DAI's choice per key e.g. ``params_tune_decision_tree=\"{'min_child_samples': [1,2,5,100,1000], 'min_data_in_bin': [1,2,3,10,100,1000]}\"`` " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "params tune decision tree refers to Dictionary of key:lists of values to use for LightGBM Decision Tree tuning, overrides DAI's choice per key e.g. ``params_tune_decision_tree=\"{'min_child_samples': [1,2,5,100,1000], 'min_data_in_bin': [1,2,3,10,100,1000]}\"`` " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting params_tune_decision_tree", + "output": "params tune decision tree refers to Dictionary of key:lists of values to use for LightGBM Decision Tree tuning, overrides DAI's choice per key e.g. ``params_tune_decision_tree=\"{'min_child_samples': [1,2,5,100,1000], 'min_data_in_bin': [1,2,3,10,100,1000]}\"`` " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting params_tune_decision_tree", + "output": "params tune decision tree refers to Dictionary of key:lists of values to use for LightGBM Decision Tree tuning, overrides DAI's choice per key e.g. ``params_tune_decision_tree=\"{'min_child_samples': [1,2,5,100,1000], 'min_data_in_bin': [1,2,3,10,100,1000]}\"`` " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "params_tune_dart", + "output": "params tune dart refers to Like params_tune_lightgbm but for XGBoost's Dart e.g. ``params_tune_dart=\"{'max_leaves': [8, 16, 32, 64]}\"`` " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "params_tune_dart", + "output": "params tune dart refers to Like params_tune_lightgbm but for XGBoost's Dart e.g. ``params_tune_dart=\"{'max_leaves': [8, 16, 32, 64]}\"`` " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "params tune dart", + "output": "params tune dart refers to Like params_tune_lightgbm but for XGBoost's Dart e.g. ``params_tune_dart=\"{'max_leaves': [8, 16, 32, 64]}\"`` " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "params tune dart refers to Like params_tune_lightgbm but for XGBoost's Dart e.g. ``params_tune_dart=\"{'max_leaves': [8, 16, 32, 64]}\"`` " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting params_tune_dart", + "output": "params tune dart refers to Like params_tune_lightgbm but for XGBoost's Dart e.g. ``params_tune_dart=\"{'max_leaves': [8, 16, 32, 64]}\"`` " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting params_tune_dart", + "output": "params tune dart refers to Like params_tune_lightgbm but for XGBoost's Dart e.g. ``params_tune_dart=\"{'max_leaves': [8, 16, 32, 64]}\"`` " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "params_tune_tensorflow", + "output": "params tune tensorflow refers to Like params_tune_lightgbm but for TensorFlow e.g. ``params_tune_tensorflow=\"{'layers': [(10,10,10), (10, 10, 10, 10)]}\"`` " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "params_tune_tensorflow", + "output": "params tune tensorflow refers to Like params_tune_lightgbm but for TensorFlow e.g. ``params_tune_tensorflow=\"{'layers': [(10,10,10), (10, 10, 10, 10)]}\"`` " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "params tune tensorflow", + "output": "params tune tensorflow refers to Like params_tune_lightgbm but for TensorFlow e.g. ``params_tune_tensorflow=\"{'layers': [(10,10,10), (10, 10, 10, 10)]}\"`` " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "params tune tensorflow refers to Like params_tune_lightgbm but for TensorFlow e.g. ``params_tune_tensorflow=\"{'layers': [(10,10,10), (10, 10, 10, 10)]}\"`` " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting params_tune_tensorflow", + "output": "params tune tensorflow refers to Like params_tune_lightgbm but for TensorFlow e.g. ``params_tune_tensorflow=\"{'layers': [(10,10,10), (10, 10, 10, 10)]}\"`` " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting params_tune_tensorflow", + "output": "params tune tensorflow refers to Like params_tune_lightgbm but for TensorFlow e.g. ``params_tune_tensorflow=\"{'layers': [(10,10,10), (10, 10, 10, 10)]}\"`` " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "params_tune_gblinear", + "output": "params tune gblinear refers to Like params_tune_lightgbm but for gblinear e.g. ``params_tune_gblinear=\"{'reg_lambda': [.01, .001, .0001, .0002]}\"`` " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "params_tune_gblinear", + "output": "params tune gblinear refers to Like params_tune_lightgbm but for gblinear e.g. ``params_tune_gblinear=\"{'reg_lambda': [.01, .001, .0001, .0002]}\"`` " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "params tune gblinear", + "output": "params tune gblinear refers to Like params_tune_lightgbm but for gblinear e.g. ``params_tune_gblinear=\"{'reg_lambda': [.01, .001, .0001, .0002]}\"`` " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "params tune gblinear refers to Like params_tune_lightgbm but for gblinear e.g. ``params_tune_gblinear=\"{'reg_lambda': [.01, .001, .0001, .0002]}\"`` " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting params_tune_gblinear", + "output": "params tune gblinear refers to Like params_tune_lightgbm but for gblinear e.g. ``params_tune_gblinear=\"{'reg_lambda': [.01, .001, .0001, .0002]}\"`` " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting params_tune_gblinear", + "output": "params tune gblinear refers to Like params_tune_lightgbm but for gblinear e.g. ``params_tune_gblinear=\"{'reg_lambda': [.01, .001, .0001, .0002]}\"`` " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "params_tune_rulefit", + "output": "params tune rulefit refers to Like params_tune_lightgbm but for rulefit e.g. ``params_tune_rulefit=\"{'max_depth': [4, 5, 6]}\"`` " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "params_tune_rulefit", + "output": "params tune rulefit refers to Like params_tune_lightgbm but for rulefit e.g. ``params_tune_rulefit=\"{'max_depth': [4, 5, 6]}\"`` " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "params tune rulefit", + "output": "params tune rulefit refers to Like params_tune_lightgbm but for rulefit e.g. ``params_tune_rulefit=\"{'max_depth': [4, 5, 6]}\"`` " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "params tune rulefit refers to Like params_tune_lightgbm but for rulefit e.g. ``params_tune_rulefit=\"{'max_depth': [4, 5, 6]}\"`` " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting params_tune_rulefit", + "output": "params tune rulefit refers to Like params_tune_lightgbm but for rulefit e.g. ``params_tune_rulefit=\"{'max_depth': [4, 5, 6]}\"`` " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting params_tune_rulefit", + "output": "params tune rulefit refers to Like params_tune_lightgbm but for rulefit e.g. ``params_tune_rulefit=\"{'max_depth': [4, 5, 6]}\"`` " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "params_tune_ftrl", + "output": "params tune ftrl refers to Like params_tune_lightgbm but for ftrl" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "params_tune_ftrl", + "output": "params tune ftrl refers to Like params_tune_lightgbm but for ftrl" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "params tune ftrl", + "output": "params tune ftrl refers to Like params_tune_lightgbm but for ftrl" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "params tune ftrl refers to Like params_tune_lightgbm but for ftrl" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting params_tune_ftrl", + "output": "params tune ftrl refers to Like params_tune_lightgbm but for ftrl" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting params_tune_ftrl", + "output": "params tune ftrl refers to Like params_tune_lightgbm but for ftrl" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "params_tune_grownet", + "output": "params tune grownet refers to Like params_tune_lightgbm but for GrowNet e.g. ``params_tune_grownet=\"{'input_dropout': [0.2, 0.5]}\"`` " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "params_tune_grownet", + "output": "params tune grownet refers to Like params_tune_lightgbm but for GrowNet e.g. ``params_tune_grownet=\"{'input_dropout': [0.2, 0.5]}\"`` " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "params tune grownet", + "output": "params tune grownet refers to Like params_tune_lightgbm but for GrowNet e.g. ``params_tune_grownet=\"{'input_dropout': [0.2, 0.5]}\"`` " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "params tune grownet refers to Like params_tune_lightgbm but for GrowNet e.g. ``params_tune_grownet=\"{'input_dropout': [0.2, 0.5]}\"`` " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting params_tune_grownet", + "output": "params tune grownet refers to Like params_tune_lightgbm but for GrowNet e.g. ``params_tune_grownet=\"{'input_dropout': [0.2, 0.5]}\"`` " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting params_tune_grownet", + "output": "params tune grownet refers to Like params_tune_lightgbm but for GrowNet e.g. ``params_tune_grownet=\"{'input_dropout': [0.2, 0.5]}\"`` " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "params_tune_grow_policy_simple_trees", + "output": "params tune grow policy simple trees refers to Whether to force max_leaves and max_depth to be 0 if grow_policy is depthwise and lossguide, respectively." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "params_tune_grow_policy_simple_trees", + "output": "params tune grow policy simple trees refers to Whether to force max_leaves and max_depth to be 0 if grow_policy is depthwise and lossguide, respectively." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "params tune grow policy simple trees", + "output": "params tune grow policy simple trees refers to Whether to force max_leaves and max_depth to be 0 if grow_policy is depthwise and lossguide, respectively." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "params tune grow policy simple trees refers to Whether to force max_leaves and max_depth to be 0 if grow_policy is depthwise and lossguide, respectively." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting params_tune_grow_policy_simple_trees", + "output": "params tune grow policy simple trees refers to Whether to force max_leaves and max_depth to be 0 if grow_policy is depthwise and lossguide, respectively." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting params_tune_grow_policy_simple_trees", + "output": "params tune grow policy simple trees refers to Whether to force max_leaves and max_depth to be 0 if grow_policy is depthwise and lossguide, respectively." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max_nestimators", + "output": "max nestimators refers to Maximum number of GBM trees or GLM iterations. Can be reduced for lower accuracy and/or higher interpretability. Early-stopping usually chooses less. Ignored if fixed_max_nestimators is > 0. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max_nestimators", + "output": "max nestimators refers to Max. number of trees/iterations: Maximum number of GBM trees or GLM iterations. Can be reduced for lower accuracy and/or higher interpretability. Early-stopping usually chooses less. Ignored if fixed_max_nestimators is > 0. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max nestimators", + "output": "max nestimators refers to Max. number of trees/iterations: Maximum number of GBM trees or GLM iterations. Can be reduced for lower accuracy and/or higher interpretability. Early-stopping usually chooses less. Ignored if fixed_max_nestimators is > 0. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Max. number of trees/iterations: ", + "output": "max nestimators refers to Max. number of trees/iterations: Maximum number of GBM trees or GLM iterations. Can be reduced for lower accuracy and/or higher interpretability. Early-stopping usually chooses less. Ignored if fixed_max_nestimators is > 0. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting max_nestimators", + "output": "max nestimators refers to Maximum number of GBM trees or GLM iterations. Can be reduced for lower accuracy and/or higher interpretability. Early-stopping usually chooses less. Ignored if fixed_max_nestimators is > 0. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting max_nestimators", + "output": "max nestimators refers to Max. number of trees/iterations: Maximum number of GBM trees or GLM iterations. Can be reduced for lower accuracy and/or higher interpretability. Early-stopping usually chooses less. Ignored if fixed_max_nestimators is > 0. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "fixed_max_nestimators", + "output": "fixed max nestimators refers to Fixed maximum number of GBM trees or GLM iterations. If > 0, ignores max_nestimators and disables automatic reductiondue to lower accuracy or higher interpretability. Early-stopping usually chooses less. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "fixed_max_nestimators", + "output": "fixed max nestimators refers to Fixed max. number of trees/iterations (-1 = auto mode): Fixed maximum number of GBM trees or GLM iterations. If > 0, ignores max_nestimators and disables automatic reductiondue to lower accuracy or higher interpretability. Early-stopping usually chooses less. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "fixed max nestimators", + "output": "fixed max nestimators refers to Fixed max. number of trees/iterations (-1 = auto mode): Fixed maximum number of GBM trees or GLM iterations. If > 0, ignores max_nestimators and disables automatic reductiondue to lower accuracy or higher interpretability. Early-stopping usually chooses less. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Fixed max. number of trees/iterations (-1 = auto mode): ", + "output": "fixed max nestimators refers to Fixed max. number of trees/iterations (-1 = auto mode): Fixed maximum number of GBM trees or GLM iterations. If > 0, ignores max_nestimators and disables automatic reductiondue to lower accuracy or higher interpretability. Early-stopping usually chooses less. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting fixed_max_nestimators", + "output": "fixed max nestimators refers to Fixed maximum number of GBM trees or GLM iterations. If > 0, ignores max_nestimators and disables automatic reductiondue to lower accuracy or higher interpretability. Early-stopping usually chooses less. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting fixed_max_nestimators", + "output": "fixed max nestimators refers to Fixed max. number of trees/iterations (-1 = auto mode): Fixed maximum number of GBM trees or GLM iterations. If > 0, ignores max_nestimators and disables automatic reductiondue to lower accuracy or higher interpretability. Early-stopping usually chooses less. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "n_estimators_list_no_early_stopping", + "output": "n estimators list no early stopping refers to LightGBM dart mode and normal rf mode do not use early stopping, and they will sample from these values for n_estimators. XGBoost Dart mode will also sample from these n_estimators. Also applies to XGBoost Dask models that do not yet support early stopping or callbacks. For default parameters it chooses first value in list, while mutations sample from the list. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "n_estimators_list_no_early_stopping", + "output": "n estimators list no early stopping refers to n_estimators list to sample from for model mutations for models that do not use early stopping: LightGBM dart mode and normal rf mode do not use early stopping, and they will sample from these values for n_estimators. XGBoost Dart mode will also sample from these n_estimators. Also applies to XGBoost Dask models that do not yet support early stopping or callbacks. For default parameters it chooses first value in list, while mutations sample from the list. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "n estimators list no early stopping", + "output": "n estimators list no early stopping refers to n_estimators list to sample from for model mutations for models that do not use early stopping: LightGBM dart mode and normal rf mode do not use early stopping, and they will sample from these values for n_estimators. XGBoost Dart mode will also sample from these n_estimators. Also applies to XGBoost Dask models that do not yet support early stopping or callbacks. For default parameters it chooses first value in list, while mutations sample from the list. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "n_estimators list to sample from for model mutations for models that do not use early stopping: ", + "output": "n estimators list no early stopping refers to n_estimators list to sample from for model mutations for models that do not use early stopping: LightGBM dart mode and normal rf mode do not use early stopping, and they will sample from these values for n_estimators. XGBoost Dart mode will also sample from these n_estimators. Also applies to XGBoost Dask models that do not yet support early stopping or callbacks. For default parameters it chooses first value in list, while mutations sample from the list. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting n_estimators_list_no_early_stopping", + "output": "n estimators list no early stopping refers to LightGBM dart mode and normal rf mode do not use early stopping, and they will sample from these values for n_estimators. XGBoost Dart mode will also sample from these n_estimators. Also applies to XGBoost Dask models that do not yet support early stopping or callbacks. For default parameters it chooses first value in list, while mutations sample from the list. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting n_estimators_list_no_early_stopping", + "output": "n estimators list no early stopping refers to n_estimators list to sample from for model mutations for models that do not use early stopping: LightGBM dart mode and normal rf mode do not use early stopping, and they will sample from these values for n_estimators. XGBoost Dart mode will also sample from these n_estimators. Also applies to XGBoost Dask models that do not yet support early stopping or callbacks. For default parameters it chooses first value in list, while mutations sample from the list. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "min_learning_rate_final", + "output": "min learning rate final refers to Lower limit on learning rate for final ensemble GBM models.In some cases, the maximum number of trees/iterations is insufficient for the final learning rate,which can lead to no early stopping triggered and poor final model performance.Then, one can try increasing the learning rate by raising this minimum,or one can try increasing the maximum number of trees/iterations. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "min_learning_rate_final", + "output": "min learning rate final refers to Minimum learning rate for final ensemble GBM models: Lower limit on learning rate for final ensemble GBM models.In some cases, the maximum number of trees/iterations is insufficient for the final learning rate,which can lead to no early stopping triggered and poor final model performance.Then, one can try increasing the learning rate by raising this minimum,or one can try increasing the maximum number of trees/iterations. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "min learning rate final", + "output": "min learning rate final refers to Minimum learning rate for final ensemble GBM models: Lower limit on learning rate for final ensemble GBM models.In some cases, the maximum number of trees/iterations is insufficient for the final learning rate,which can lead to no early stopping triggered and poor final model performance.Then, one can try increasing the learning rate by raising this minimum,or one can try increasing the maximum number of trees/iterations. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Minimum learning rate for final ensemble GBM models: ", + "output": "min learning rate final refers to Minimum learning rate for final ensemble GBM models: Lower limit on learning rate for final ensemble GBM models.In some cases, the maximum number of trees/iterations is insufficient for the final learning rate,which can lead to no early stopping triggered and poor final model performance.Then, one can try increasing the learning rate by raising this minimum,or one can try increasing the maximum number of trees/iterations. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting min_learning_rate_final", + "output": "min learning rate final refers to Lower limit on learning rate for final ensemble GBM models.In some cases, the maximum number of trees/iterations is insufficient for the final learning rate,which can lead to no early stopping triggered and poor final model performance.Then, one can try increasing the learning rate by raising this minimum,or one can try increasing the maximum number of trees/iterations. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting min_learning_rate_final", + "output": "min learning rate final refers to Minimum learning rate for final ensemble GBM models: Lower limit on learning rate for final ensemble GBM models.In some cases, the maximum number of trees/iterations is insufficient for the final learning rate,which can lead to no early stopping triggered and poor final model performance.Then, one can try increasing the learning rate by raising this minimum,or one can try increasing the maximum number of trees/iterations. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max_learning_rate_final", + "output": "max learning rate final refers to Upper limit on learning rate for final ensemble GBM models" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max_learning_rate_final", + "output": "max learning rate final refers to Maximum learning rate for final ensemble GBM models: Upper limit on learning rate for final ensemble GBM models" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max learning rate final", + "output": "max learning rate final refers to Maximum learning rate for final ensemble GBM models: Upper limit on learning rate for final ensemble GBM models" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Maximum learning rate for final ensemble GBM models: ", + "output": "max learning rate final refers to Maximum learning rate for final ensemble GBM models: Upper limit on learning rate for final ensemble GBM models" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting max_learning_rate_final", + "output": "max learning rate final refers to Upper limit on learning rate for final ensemble GBM models" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting max_learning_rate_final", + "output": "max learning rate final refers to Maximum learning rate for final ensemble GBM models: Upper limit on learning rate for final ensemble GBM models" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max_nestimators_feature_evolution_factor", + "output": "max nestimators feature evolution factor refers to factor by which max_nestimators is reduced for tuning and feature evolution" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max_nestimators_feature_evolution_factor", + "output": "max nestimators feature evolution factor refers to Reduction factor for max. number of trees/iterations during feature evolution: factor by which max_nestimators is reduced for tuning and feature evolution" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max nestimators feature evolution factor", + "output": "max nestimators feature evolution factor refers to Reduction factor for max. number of trees/iterations during feature evolution: factor by which max_nestimators is reduced for tuning and feature evolution" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Reduction factor for max. number of trees/iterations during feature evolution: ", + "output": "max nestimators feature evolution factor refers to Reduction factor for max. number of trees/iterations during feature evolution: factor by which max_nestimators is reduced for tuning and feature evolution" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting max_nestimators_feature_evolution_factor", + "output": "max nestimators feature evolution factor refers to factor by which max_nestimators is reduced for tuning and feature evolution" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting max_nestimators_feature_evolution_factor", + "output": "max nestimators feature evolution factor refers to Reduction factor for max. number of trees/iterations during feature evolution: factor by which max_nestimators is reduced for tuning and feature evolution" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "min_learning_rate", + "output": "min learning rate refers to Lower limit on learning rate for feature engineering GBM models" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "min_learning_rate", + "output": "min learning rate refers to Min. learning rate for feature engineering GBM models: Lower limit on learning rate for feature engineering GBM models" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "min learning rate", + "output": "min learning rate refers to Min. learning rate for feature engineering GBM models: Lower limit on learning rate for feature engineering GBM models" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Min. learning rate for feature engineering GBM models: ", + "output": "min learning rate refers to Min. learning rate for feature engineering GBM models: Lower limit on learning rate for feature engineering GBM models" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting min_learning_rate", + "output": "min learning rate refers to Lower limit on learning rate for feature engineering GBM models" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting min_learning_rate", + "output": "min learning rate refers to Min. learning rate for feature engineering GBM models: Lower limit on learning rate for feature engineering GBM models" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max_learning_rate", + "output": "max learning rate refers to Upper limit on learning rate for GBM modelsIf want to override min_learning_rate and min_learning_rate_final, set this to smaller value " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max_learning_rate", + "output": "max learning rate refers to Max. learning rate for feature engineering GBM models: Upper limit on learning rate for GBM modelsIf want to override min_learning_rate and min_learning_rate_final, set this to smaller value " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max learning rate", + "output": "max learning rate refers to Max. learning rate for feature engineering GBM models: Upper limit on learning rate for GBM modelsIf want to override min_learning_rate and min_learning_rate_final, set this to smaller value " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Max. learning rate for feature engineering GBM models: ", + "output": "max learning rate refers to Max. learning rate for feature engineering GBM models: Upper limit on learning rate for GBM modelsIf want to override min_learning_rate and min_learning_rate_final, set this to smaller value " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting max_learning_rate", + "output": "max learning rate refers to Upper limit on learning rate for GBM modelsIf want to override min_learning_rate and min_learning_rate_final, set this to smaller value " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting max_learning_rate", + "output": "max learning rate refers to Max. learning rate for feature engineering GBM models: Upper limit on learning rate for GBM modelsIf want to override min_learning_rate and min_learning_rate_final, set this to smaller value " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "lock_ga_to_final_trees", + "output": "lock ga to final trees refers to Whether to lock learning rate, tree count, early stopping rounds for GBM algorithms to the final model values." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "lock_ga_to_final_trees", + "output": "lock ga to final trees refers to Whether to lock tree parameters to final model values: Whether to lock learning rate, tree count, early stopping rounds for GBM algorithms to the final model values." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "lock ga to final trees", + "output": "lock ga to final trees refers to Whether to lock tree parameters to final model values: Whether to lock learning rate, tree count, early stopping rounds for GBM algorithms to the final model values." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Whether to lock tree parameters to final model values: ", + "output": "lock ga to final trees refers to Whether to lock tree parameters to final model values: Whether to lock learning rate, tree count, early stopping rounds for GBM algorithms to the final model values." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting lock_ga_to_final_trees", + "output": "lock ga to final trees refers to Whether to lock learning rate, tree count, early stopping rounds for GBM algorithms to the final model values." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting lock_ga_to_final_trees", + "output": "lock ga to final trees refers to Whether to lock tree parameters to final model values: Whether to lock learning rate, tree count, early stopping rounds for GBM algorithms to the final model values." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "tune_learning_rate", + "output": "tune learning rate refers to Whether to tune learning rate for GBM algorithms (if not doing just single final model).If tuning with Optuna, might help isolate optimal learning rate. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "tune_learning_rate", + "output": "tune learning rate refers to Whether to tune learning rate even for GBM algorithms with early stopping: Whether to tune learning rate for GBM algorithms (if not doing just single final model).If tuning with Optuna, might help isolate optimal learning rate. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "tune learning rate", + "output": "tune learning rate refers to Whether to tune learning rate even for GBM algorithms with early stopping: Whether to tune learning rate for GBM algorithms (if not doing just single final model).If tuning with Optuna, might help isolate optimal learning rate. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Whether to tune learning rate even for GBM algorithms with early stopping: ", + "output": "tune learning rate refers to Whether to tune learning rate even for GBM algorithms with early stopping: Whether to tune learning rate for GBM algorithms (if not doing just single final model).If tuning with Optuna, might help isolate optimal learning rate. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting tune_learning_rate", + "output": "tune learning rate refers to Whether to tune learning rate for GBM algorithms (if not doing just single final model).If tuning with Optuna, might help isolate optimal learning rate. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting tune_learning_rate", + "output": "tune learning rate refers to Whether to tune learning rate even for GBM algorithms with early stopping: Whether to tune learning rate for GBM algorithms (if not doing just single final model).If tuning with Optuna, might help isolate optimal learning rate. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max_epochs", + "output": "max epochs refers to Max. number of epochs for TensorFlow and FTRL models" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max_epochs", + "output": "max epochs refers to Max. number of epochs for TensorFlow / FTRL: Max. number of epochs for TensorFlow and FTRL models" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max epochs", + "output": "max epochs refers to Max. number of epochs for TensorFlow / FTRL: Max. number of epochs for TensorFlow and FTRL models" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Max. number of epochs for TensorFlow / FTRL: ", + "output": "max epochs refers to Max. number of epochs for TensorFlow / FTRL: Max. number of epochs for TensorFlow and FTRL models" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting max_epochs", + "output": "max epochs refers to Max. number of epochs for TensorFlow and FTRL models" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting max_epochs", + "output": "max epochs refers to Max. number of epochs for TensorFlow / FTRL: Max. number of epochs for TensorFlow and FTRL models" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max_epochs_tf_big_data", + "output": "max epochs tf big data refers to Number of epochs for TensorFlow when larger data size." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max_epochs_tf_big_data", + "output": "max epochs tf big data refers to Number of epochs for TensorFlow when larger data size." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max epochs tf big data", + "output": "max epochs tf big data refers to Number of epochs for TensorFlow when larger data size." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "max epochs tf big data refers to Number of epochs for TensorFlow when larger data size." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting max_epochs_tf_big_data", + "output": "max epochs tf big data refers to Number of epochs for TensorFlow when larger data size." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting max_epochs_tf_big_data", + "output": "max epochs tf big data refers to Number of epochs for TensorFlow when larger data size." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max_max_depth", + "output": "max max depth refers to Maximum tree depth (and corresponding max max_leaves as 2**max_max_depth)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max_max_depth", + "output": "max max depth refers to Max. tree depth (and Max. max_leaves as 2**max_max_depth): Maximum tree depth (and corresponding max max_leaves as 2**max_max_depth)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max max depth", + "output": "max max depth refers to Max. tree depth (and Max. max_leaves as 2**max_max_depth): Maximum tree depth (and corresponding max max_leaves as 2**max_max_depth)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Max. tree depth (and Max. max_leaves as 2**max_max_depth): ", + "output": "max max depth refers to Max. tree depth (and Max. max_leaves as 2**max_max_depth): Maximum tree depth (and corresponding max max_leaves as 2**max_max_depth)" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting max_max_depth", + "output": "max max depth refers to Maximum tree depth (and corresponding max max_leaves as 2**max_max_depth)" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting max_max_depth", + "output": "max max depth refers to Max. tree depth (and Max. max_leaves as 2**max_max_depth): Maximum tree depth (and corresponding max max_leaves as 2**max_max_depth)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "default_max_bin", + "output": "default max bin refers to Default max_bin for tree methods" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "default_max_bin", + "output": "default max bin refers to Default max_bin for tree methods" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "default max bin", + "output": "default max bin refers to Default max_bin for tree methods" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "default max bin refers to Default max_bin for tree methods" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting default_max_bin", + "output": "default max bin refers to Default max_bin for tree methods" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting default_max_bin", + "output": "default max bin refers to Default max_bin for tree methods" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "default_lightgbm_max_bin", + "output": "default lightgbm max bin refers to Default max_bin for LightGBM (64 recommended for GPU LightGBM for speed)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "default_lightgbm_max_bin", + "output": "default lightgbm max bin refers to Default max_bin for LightGBM (64 recommended for GPU LightGBM for speed)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "default lightgbm max bin", + "output": "default lightgbm max bin refers to Default max_bin for LightGBM (64 recommended for GPU LightGBM for speed)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "default lightgbm max bin refers to Default max_bin for LightGBM (64 recommended for GPU LightGBM for speed)" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting default_lightgbm_max_bin", + "output": "default lightgbm max bin refers to Default max_bin for LightGBM (64 recommended for GPU LightGBM for speed)" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting default_lightgbm_max_bin", + "output": "default lightgbm max bin refers to Default max_bin for LightGBM (64 recommended for GPU LightGBM for speed)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max_max_bin", + "output": "max max bin refers to Maximum max_bin for tree features" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max_max_bin", + "output": "max max bin refers to Max. max_bin for tree features: Maximum max_bin for tree features" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max max bin", + "output": "max max bin refers to Max. max_bin for tree features: Maximum max_bin for tree features" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Max. max_bin for tree features: ", + "output": "max max bin refers to Max. max_bin for tree features: Maximum max_bin for tree features" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting max_max_bin", + "output": "max max bin refers to Maximum max_bin for tree features" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting max_max_bin", + "output": "max max bin refers to Max. max_bin for tree features: Maximum max_bin for tree features" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "min_max_bin", + "output": "min max bin refers to Minimum max_bin for any tree" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "min_max_bin", + "output": "min max bin refers to Minimum max_bin for any tree" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "min max bin", + "output": "min max bin refers to Minimum max_bin for any tree" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "min max bin refers to Minimum max_bin for any tree" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting min_max_bin", + "output": "min max bin refers to Minimum max_bin for any tree" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting min_max_bin", + "output": "min max bin refers to Minimum max_bin for any tree" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "scale_mem_for_max_bin", + "output": "scale mem for max bin refers to Amount of memory which can handle max_bin = 256 can handle 125 columns and max_bin = 32 for 1000 columns As available memory on system goes higher than this scale, can handle proportionally more columns at higher max_bin Currently set to 10GB" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "scale_mem_for_max_bin", + "output": "scale mem for max bin refers to Amount of memory which can handle max_bin = 256 can handle 125 columns and max_bin = 32 for 1000 columns As available memory on system goes higher than this scale, can handle proportionally more columns at higher max_bin Currently set to 10GB" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "scale mem for max bin", + "output": "scale mem for max bin refers to Amount of memory which can handle max_bin = 256 can handle 125 columns and max_bin = 32 for 1000 columns As available memory on system goes higher than this scale, can handle proportionally more columns at higher max_bin Currently set to 10GB" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "scale mem for max bin refers to Amount of memory which can handle max_bin = 256 can handle 125 columns and max_bin = 32 for 1000 columns As available memory on system goes higher than this scale, can handle proportionally more columns at higher max_bin Currently set to 10GB" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting scale_mem_for_max_bin", + "output": "scale mem for max bin refers to Amount of memory which can handle max_bin = 256 can handle 125 columns and max_bin = 32 for 1000 columns As available memory on system goes higher than this scale, can handle proportionally more columns at higher max_bin Currently set to 10GB" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting scale_mem_for_max_bin", + "output": "scale mem for max bin refers to Amount of memory which can handle max_bin = 256 can handle 125 columns and max_bin = 32 for 1000 columns As available memory on system goes higher than this scale, can handle proportionally more columns at higher max_bin Currently set to 10GB" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "factor_rf", + "output": "factor rf refers to Factor by which rf gets more depth than gbdt" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "factor_rf", + "output": "factor rf refers to Factor by which rf gets more depth than gbdt" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "factor rf", + "output": "factor rf refers to Factor by which rf gets more depth than gbdt" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "factor rf refers to Factor by which rf gets more depth than gbdt" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting factor_rf", + "output": "factor rf refers to Factor by which rf gets more depth than gbdt" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting factor_rf", + "output": "factor rf refers to Factor by which rf gets more depth than gbdt" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "tensorflow_use_all_cores", + "output": "tensorflow use all cores refers to Whether TensorFlow will use all CPU cores, or if it will split among all transformers. Only for transformers, not TensorFlow model." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "tensorflow_use_all_cores", + "output": "tensorflow use all cores refers to Whether TensorFlow will use all CPU cores, or if it will split among all transformers. Only for transformers, not TensorFlow model." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "tensorflow use all cores", + "output": "tensorflow use all cores refers to Whether TensorFlow will use all CPU cores, or if it will split among all transformers. Only for transformers, not TensorFlow model." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "tensorflow use all cores refers to Whether TensorFlow will use all CPU cores, or if it will split among all transformers. Only for transformers, not TensorFlow model." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting tensorflow_use_all_cores", + "output": "tensorflow use all cores refers to Whether TensorFlow will use all CPU cores, or if it will split among all transformers. Only for transformers, not TensorFlow model." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting tensorflow_use_all_cores", + "output": "tensorflow use all cores refers to Whether TensorFlow will use all CPU cores, or if it will split among all transformers. Only for transformers, not TensorFlow model." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "tensorflow_use_all_cores_even_if_reproducible_true", + "output": "tensorflow use all cores even if reproducible true refers to Whether TensorFlow will use all CPU cores if reproducible is set, or if it will split among all transformers" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "tensorflow_use_all_cores_even_if_reproducible_true", + "output": "tensorflow use all cores even if reproducible true refers to Whether TensorFlow will use all CPU cores if reproducible is set, or if it will split among all transformers" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "tensorflow use all cores even if reproducible true", + "output": "tensorflow use all cores even if reproducible true refers to Whether TensorFlow will use all CPU cores if reproducible is set, or if it will split among all transformers" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "tensorflow use all cores even if reproducible true refers to Whether TensorFlow will use all CPU cores if reproducible is set, or if it will split among all transformers" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting tensorflow_use_all_cores_even_if_reproducible_true", + "output": "tensorflow use all cores even if reproducible true refers to Whether TensorFlow will use all CPU cores if reproducible is set, or if it will split among all transformers" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting tensorflow_use_all_cores_even_if_reproducible_true", + "output": "tensorflow use all cores even if reproducible true refers to Whether TensorFlow will use all CPU cores if reproducible is set, or if it will split among all transformers" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "tensorflow_disable_memory_optimization", + "output": "tensorflow disable memory optimization refers to Whether to disable TensorFlow memory optimizations. Can help fix tensorflow.python.framework.errors_impl.AlreadyExistsError" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "tensorflow_disable_memory_optimization", + "output": "tensorflow disable memory optimization refers to Whether to disable TensorFlow memory optimizations. Can help fix tensorflow.python.framework.errors_impl.AlreadyExistsError" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "tensorflow disable memory optimization", + "output": "tensorflow disable memory optimization refers to Whether to disable TensorFlow memory optimizations. Can help fix tensorflow.python.framework.errors_impl.AlreadyExistsError" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "tensorflow disable memory optimization refers to Whether to disable TensorFlow memory optimizations. Can help fix tensorflow.python.framework.errors_impl.AlreadyExistsError" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting tensorflow_disable_memory_optimization", + "output": "tensorflow disable memory optimization refers to Whether to disable TensorFlow memory optimizations. Can help fix tensorflow.python.framework.errors_impl.AlreadyExistsError" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting tensorflow_disable_memory_optimization", + "output": "tensorflow disable memory optimization refers to Whether to disable TensorFlow memory optimizations. Can help fix tensorflow.python.framework.errors_impl.AlreadyExistsError" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "tensorflow_cores", + "output": "tensorflow cores refers to How many cores to use for each TensorFlow model, regardless if GPU or CPU based (0 = auto mode)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "tensorflow_cores", + "output": "tensorflow cores refers to How many cores to use for each TensorFlow model, regardless if GPU or CPU based (0 = auto mode)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "tensorflow cores", + "output": "tensorflow cores refers to How many cores to use for each TensorFlow model, regardless if GPU or CPU based (0 = auto mode)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "tensorflow cores refers to How many cores to use for each TensorFlow model, regardless if GPU or CPU based (0 = auto mode)" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting tensorflow_cores", + "output": "tensorflow cores refers to How many cores to use for each TensorFlow model, regardless if GPU or CPU based (0 = auto mode)" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting tensorflow_cores", + "output": "tensorflow cores refers to How many cores to use for each TensorFlow model, regardless if GPU or CPU based (0 = auto mode)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "tensorflow_model_max_cores", + "output": "tensorflow model max cores refers to For TensorFlow models, maximum number of cores to use if tensorflow_cores=0 (auto mode), because TensorFlow model is inefficient at using many cores. See also max_fit_cores for all models." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "tensorflow_model_max_cores", + "output": "tensorflow model max cores refers to For TensorFlow models, maximum number of cores to use if tensorflow_cores=0 (auto mode), because TensorFlow model is inefficient at using many cores. See also max_fit_cores for all models." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "tensorflow model max cores", + "output": "tensorflow model max cores refers to For TensorFlow models, maximum number of cores to use if tensorflow_cores=0 (auto mode), because TensorFlow model is inefficient at using many cores. See also max_fit_cores for all models." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "tensorflow model max cores refers to For TensorFlow models, maximum number of cores to use if tensorflow_cores=0 (auto mode), because TensorFlow model is inefficient at using many cores. See also max_fit_cores for all models." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting tensorflow_model_max_cores", + "output": "tensorflow model max cores refers to For TensorFlow models, maximum number of cores to use if tensorflow_cores=0 (auto mode), because TensorFlow model is inefficient at using many cores. See also max_fit_cores for all models." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting tensorflow_model_max_cores", + "output": "tensorflow model max cores refers to For TensorFlow models, maximum number of cores to use if tensorflow_cores=0 (auto mode), because TensorFlow model is inefficient at using many cores. See also max_fit_cores for all models." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "bert_cores", + "output": "bert cores refers to How many cores to use for each Bert Model and Transformer, regardless if GPU or CPU based (0 = auto mode)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "bert_cores", + "output": "bert cores refers to How many cores to use for each Bert Model and Transformer, regardless if GPU or CPU based (0 = auto mode)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "bert cores", + "output": "bert cores refers to How many cores to use for each Bert Model and Transformer, regardless if GPU or CPU based (0 = auto mode)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "bert cores refers to How many cores to use for each Bert Model and Transformer, regardless if GPU or CPU based (0 = auto mode)" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting bert_cores", + "output": "bert cores refers to How many cores to use for each Bert Model and Transformer, regardless if GPU or CPU based (0 = auto mode)" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting bert_cores", + "output": "bert cores refers to How many cores to use for each Bert Model and Transformer, regardless if GPU or CPU based (0 = auto mode)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "bert_use_all_cores", + "output": "bert use all cores refers to Whether Bert will use all CPU cores, or if it will split among all transformers. Only for transformers, not Bert model." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "bert_use_all_cores", + "output": "bert use all cores refers to Whether Bert will use all CPU cores, or if it will split among all transformers. Only for transformers, not Bert model." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "bert use all cores", + "output": "bert use all cores refers to Whether Bert will use all CPU cores, or if it will split among all transformers. Only for transformers, not Bert model." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "bert use all cores refers to Whether Bert will use all CPU cores, or if it will split among all transformers. Only for transformers, not Bert model." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting bert_use_all_cores", + "output": "bert use all cores refers to Whether Bert will use all CPU cores, or if it will split among all transformers. Only for transformers, not Bert model." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting bert_use_all_cores", + "output": "bert use all cores refers to Whether Bert will use all CPU cores, or if it will split among all transformers. Only for transformers, not Bert model." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "bert_model_max_cores", + "output": "bert model max cores refers to For Bert models, maximum number of cores to use if bert_cores=0 (auto mode), because Bert model is inefficient at using many cores. See also max_fit_cores for all models." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "bert_model_max_cores", + "output": "bert model max cores refers to For Bert models, maximum number of cores to use if bert_cores=0 (auto mode), because Bert model is inefficient at using many cores. See also max_fit_cores for all models." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "bert model max cores", + "output": "bert model max cores refers to For Bert models, maximum number of cores to use if bert_cores=0 (auto mode), because Bert model is inefficient at using many cores. See also max_fit_cores for all models." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "bert model max cores refers to For Bert models, maximum number of cores to use if bert_cores=0 (auto mode), because Bert model is inefficient at using many cores. See also max_fit_cores for all models." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting bert_model_max_cores", + "output": "bert model max cores refers to For Bert models, maximum number of cores to use if bert_cores=0 (auto mode), because Bert model is inefficient at using many cores. See also max_fit_cores for all models." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting bert_model_max_cores", + "output": "bert model max cores refers to For Bert models, maximum number of cores to use if bert_cores=0 (auto mode), because Bert model is inefficient at using many cores. See also max_fit_cores for all models." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "rulefit_max_num_rules", + "output": "rulefit max num rules refers to Max number of rules to be used for RuleFit models (-1 for all)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "rulefit_max_num_rules", + "output": "rulefit max num rules refers to Max. number of rules for RuleFit (-1 for all): Max number of rules to be used for RuleFit models (-1 for all)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "rulefit max num rules", + "output": "rulefit max num rules refers to Max. number of rules for RuleFit (-1 for all): Max number of rules to be used for RuleFit models (-1 for all)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Max. number of rules for RuleFit (-1 for all): ", + "output": "rulefit max num rules refers to Max. number of rules for RuleFit (-1 for all): Max number of rules to be used for RuleFit models (-1 for all)" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting rulefit_max_num_rules", + "output": "rulefit max num rules refers to Max number of rules to be used for RuleFit models (-1 for all)" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting rulefit_max_num_rules", + "output": "rulefit max num rules refers to Max. number of rules for RuleFit (-1 for all): Max number of rules to be used for RuleFit models (-1 for all)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "rulefit_max_tree_depth", + "output": "rulefit max tree depth refers to Max tree depth for RuleFit models" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "rulefit_max_tree_depth", + "output": "rulefit max tree depth refers to Max tree depth for RuleFit models" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "rulefit max tree depth", + "output": "rulefit max tree depth refers to Max tree depth for RuleFit models" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "rulefit max tree depth refers to Max tree depth for RuleFit models" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting rulefit_max_tree_depth", + "output": "rulefit max tree depth refers to Max tree depth for RuleFit models" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting rulefit_max_tree_depth", + "output": "rulefit max tree depth refers to Max tree depth for RuleFit models" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "rulefit_max_num_trees", + "output": "rulefit max num trees refers to Max number of trees for RuleFit models" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "rulefit_max_num_trees", + "output": "rulefit max num trees refers to Max number of trees for RuleFit models" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "rulefit max num trees", + "output": "rulefit max num trees refers to Max number of trees for RuleFit models" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "rulefit max num trees refers to Max number of trees for RuleFit models" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting rulefit_max_num_trees", + "output": "rulefit max num trees refers to Max number of trees for RuleFit models" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting rulefit_max_num_trees", + "output": "rulefit max num trees refers to Max number of trees for RuleFit models" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "one_hot_encoding_cardinality_threshold", + "output": "one hot encoding cardinality threshold refers to Enable One-Hot-Encoding (which does binning to limit to number of bins to no more than 100 anyway) for categorical columns with fewer than this many unique values Set to 0 to disable" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "one_hot_encoding_cardinality_threshold", + "output": "one hot encoding cardinality threshold refers to Enable One-Hot-Encoding (which does binning to limit to number of bins to no more than 100 anyway) for categorical columns with fewer than this many unique values Set to 0 to disable" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "one hot encoding cardinality threshold", + "output": "one hot encoding cardinality threshold refers to Enable One-Hot-Encoding (which does binning to limit to number of bins to no more than 100 anyway) for categorical columns with fewer than this many unique values Set to 0 to disable" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "one hot encoding cardinality threshold refers to Enable One-Hot-Encoding (which does binning to limit to number of bins to no more than 100 anyway) for categorical columns with fewer than this many unique values Set to 0 to disable" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting one_hot_encoding_cardinality_threshold", + "output": "one hot encoding cardinality threshold refers to Enable One-Hot-Encoding (which does binning to limit to number of bins to no more than 100 anyway) for categorical columns with fewer than this many unique values Set to 0 to disable" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting one_hot_encoding_cardinality_threshold", + "output": "one hot encoding cardinality threshold refers to Enable One-Hot-Encoding (which does binning to limit to number of bins to no more than 100 anyway) for categorical columns with fewer than this many unique values Set to 0 to disable" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "one_hot_encoding_cardinality_threshold_default_use", + "output": "one hot encoding cardinality threshold default use refers to How many levels to choose one-hot by default instead of other encodings, restricted down to 10x less (down to 2 levels) when number of columns able to be used with OHE exceeds 500. Note the total number of bins is reduced if bigger data independently of this." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "one_hot_encoding_cardinality_threshold_default_use", + "output": "one hot encoding cardinality threshold default use refers to How many levels to choose one-hot by default instead of other encodings, restricted down to 10x less (down to 2 levels) when number of columns able to be used with OHE exceeds 500. Note the total number of bins is reduced if bigger data independently of this." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "one hot encoding cardinality threshold default use", + "output": "one hot encoding cardinality threshold default use refers to How many levels to choose one-hot by default instead of other encodings, restricted down to 10x less (down to 2 levels) when number of columns able to be used with OHE exceeds 500. Note the total number of bins is reduced if bigger data independently of this." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "one hot encoding cardinality threshold default use refers to How many levels to choose one-hot by default instead of other encodings, restricted down to 10x less (down to 2 levels) when number of columns able to be used with OHE exceeds 500. Note the total number of bins is reduced if bigger data independently of this." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting one_hot_encoding_cardinality_threshold_default_use", + "output": "one hot encoding cardinality threshold default use refers to How many levels to choose one-hot by default instead of other encodings, restricted down to 10x less (down to 2 levels) when number of columns able to be used with OHE exceeds 500. Note the total number of bins is reduced if bigger data independently of this." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting one_hot_encoding_cardinality_threshold_default_use", + "output": "one hot encoding cardinality threshold default use refers to How many levels to choose one-hot by default instead of other encodings, restricted down to 10x less (down to 2 levels) when number of columns able to be used with OHE exceeds 500. Note the total number of bins is reduced if bigger data independently of this." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "text_as_categorical_cardinality_threshold", + "output": "text as categorical cardinality threshold refers to Treat text columns also as categorical columns if the cardinality is <= this value. Set to 0 to treat text columns only as text." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "text_as_categorical_cardinality_threshold", + "output": "text as categorical cardinality threshold refers to Treat text columns also as categorical columns if the cardinality is <= this value. Set to 0 to treat text columns only as text." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "text as categorical cardinality threshold", + "output": "text as categorical cardinality threshold refers to Treat text columns also as categorical columns if the cardinality is <= this value. Set to 0 to treat text columns only as text." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "text as categorical cardinality threshold refers to Treat text columns also as categorical columns if the cardinality is <= this value. Set to 0 to treat text columns only as text." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting text_as_categorical_cardinality_threshold", + "output": "text as categorical cardinality threshold refers to Treat text columns also as categorical columns if the cardinality is <= this value. Set to 0 to treat text columns only as text." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting text_as_categorical_cardinality_threshold", + "output": "text as categorical cardinality threshold refers to Treat text columns also as categorical columns if the cardinality is <= this value. Set to 0 to treat text columns only as text." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "numeric_as_categorical_cardinality_threshold", + "output": "numeric as categorical cardinality threshold refers to If num_as_cat is true, then treat numeric columns also as categorical columns if the cardinality is > this value. Setting to 0 allows all numeric to be treated as categorical if num_as_cat is True." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "numeric_as_categorical_cardinality_threshold", + "output": "numeric as categorical cardinality threshold refers to If num_as_cat is true, then treat numeric columns also as categorical columns if the cardinality is > this value. Setting to 0 allows all numeric to be treated as categorical if num_as_cat is True." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "numeric as categorical cardinality threshold", + "output": "numeric as categorical cardinality threshold refers to If num_as_cat is true, then treat numeric columns also as categorical columns if the cardinality is > this value. Setting to 0 allows all numeric to be treated as categorical if num_as_cat is True." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "numeric as categorical cardinality threshold refers to If num_as_cat is true, then treat numeric columns also as categorical columns if the cardinality is > this value. Setting to 0 allows all numeric to be treated as categorical if num_as_cat is True." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting numeric_as_categorical_cardinality_threshold", + "output": "numeric as categorical cardinality threshold refers to If num_as_cat is true, then treat numeric columns also as categorical columns if the cardinality is > this value. Setting to 0 allows all numeric to be treated as categorical if num_as_cat is True." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting numeric_as_categorical_cardinality_threshold", + "output": "numeric as categorical cardinality threshold refers to If num_as_cat is true, then treat numeric columns also as categorical columns if the cardinality is > this value. Setting to 0 allows all numeric to be treated as categorical if num_as_cat is True." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "numeric_as_ohe_categorical_cardinality_threshold", + "output": "numeric as ohe categorical cardinality threshold refers to If num_as_cat is true, then treat numeric columns also as categorical columns to possibly one-hot encode if the cardinality is > this value. Setting to 0 allows all numeric to be treated as categorical to possibly ohe-hot encode if num_as_cat is True." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "numeric_as_ohe_categorical_cardinality_threshold", + "output": "numeric as ohe categorical cardinality threshold refers to If num_as_cat is true, then treat numeric columns also as categorical columns to possibly one-hot encode if the cardinality is > this value. Setting to 0 allows all numeric to be treated as categorical to possibly ohe-hot encode if num_as_cat is True." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "numeric as ohe categorical cardinality threshold", + "output": "numeric as ohe categorical cardinality threshold refers to If num_as_cat is true, then treat numeric columns also as categorical columns to possibly one-hot encode if the cardinality is > this value. Setting to 0 allows all numeric to be treated as categorical to possibly ohe-hot encode if num_as_cat is True." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "numeric as ohe categorical cardinality threshold refers to If num_as_cat is true, then treat numeric columns also as categorical columns to possibly one-hot encode if the cardinality is > this value. Setting to 0 allows all numeric to be treated as categorical to possibly ohe-hot encode if num_as_cat is True." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting numeric_as_ohe_categorical_cardinality_threshold", + "output": "numeric as ohe categorical cardinality threshold refers to If num_as_cat is true, then treat numeric columns also as categorical columns to possibly one-hot encode if the cardinality is > this value. Setting to 0 allows all numeric to be treated as categorical to possibly ohe-hot encode if num_as_cat is True." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting numeric_as_ohe_categorical_cardinality_threshold", + "output": "numeric as ohe categorical cardinality threshold refers to If num_as_cat is true, then treat numeric columns also as categorical columns to possibly one-hot encode if the cardinality is > this value. Setting to 0 allows all numeric to be treated as categorical to possibly ohe-hot encode if num_as_cat is True." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "one_hot_encoding_show_actual_levels_in_features", + "output": "one hot encoding show actual levels in features refers to Whether to show real levels in One Hot Encoding feature names. Leads to feature aggregation problems when switch between binning and not binning in fold splits. Feature description will still contain levels in each bin if True or False.: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "one_hot_encoding_show_actual_levels_in_features", + "output": "one hot encoding show actual levels in features refers to Whether to show real levels in One Hot Encoding feature names. Leads to feature aggregation problems when switch between binning and not binning in fold splits. Feature description will still contain levels in each bin if True or False.: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "one hot encoding show actual levels in features", + "output": "one hot encoding show actual levels in features refers to Whether to show real levels in One Hot Encoding feature names. Leads to feature aggregation problems when switch between binning and not binning in fold splits. Feature description will still contain levels in each bin if True or False.: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Whether to show real levels in One Hot Encoding feature names. Leads to feature aggregation problems when switch between binning and not binning in fold splits. Feature description will still contain levels in each bin if True or False.: ", + "output": "one hot encoding show actual levels in features refers to Whether to show real levels in One Hot Encoding feature names. Leads to feature aggregation problems when switch between binning and not binning in fold splits. Feature description will still contain levels in each bin if True or False.: " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting one_hot_encoding_show_actual_levels_in_features", + "output": "one hot encoding show actual levels in features refers to Whether to show real levels in One Hot Encoding feature names. Leads to feature aggregation problems when switch between binning and not binning in fold splits. Feature description will still contain levels in each bin if True or False.: " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting one_hot_encoding_show_actual_levels_in_features", + "output": "one hot encoding show actual levels in features refers to Whether to show real levels in One Hot Encoding feature names. Leads to feature aggregation problems when switch between binning and not binning in fold splits. Feature description will still contain levels in each bin if True or False.: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "fixed_ensemble_level", + "output": "fixed ensemble level refers to Fixed ensemble_level-1 = auto, based upon ensemble_accuracy_switch, accuracy, size of data, etc.0 = No ensemble, only final single model on validated iteration/tree count1 = 1 model, multiple ensemble folds (cross-validation)>=2 = >=2 models, multiple ensemble folds (cross-validation) " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "fixed_ensemble_level", + "output": "fixed ensemble level refers to Ensemble level for final modeling pipeline: Fixed ensemble_level-1 = auto, based upon ensemble_accuracy_switch, accuracy, size of data, etc.0 = No ensemble, only final single model on validated iteration/tree count1 = 1 model, multiple ensemble folds (cross-validation)>=2 = >=2 models, multiple ensemble folds (cross-validation) " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "fixed ensemble level", + "output": "fixed ensemble level refers to Ensemble level for final modeling pipeline: Fixed ensemble_level-1 = auto, based upon ensemble_accuracy_switch, accuracy, size of data, etc.0 = No ensemble, only final single model on validated iteration/tree count1 = 1 model, multiple ensemble folds (cross-validation)>=2 = >=2 models, multiple ensemble folds (cross-validation) " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Ensemble level for final modeling pipeline: ", + "output": "fixed ensemble level refers to Ensemble level for final modeling pipeline: Fixed ensemble_level-1 = auto, based upon ensemble_accuracy_switch, accuracy, size of data, etc.0 = No ensemble, only final single model on validated iteration/tree count1 = 1 model, multiple ensemble folds (cross-validation)>=2 = >=2 models, multiple ensemble folds (cross-validation) " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting fixed_ensemble_level", + "output": "fixed ensemble level refers to Fixed ensemble_level-1 = auto, based upon ensemble_accuracy_switch, accuracy, size of data, etc.0 = No ensemble, only final single model on validated iteration/tree count1 = 1 model, multiple ensemble folds (cross-validation)>=2 = >=2 models, multiple ensemble folds (cross-validation) " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting fixed_ensemble_level", + "output": "fixed ensemble level refers to Ensemble level for final modeling pipeline: Fixed ensemble_level-1 = auto, based upon ensemble_accuracy_switch, accuracy, size of data, etc.0 = No ensemble, only final single model on validated iteration/tree count1 = 1 model, multiple ensemble folds (cross-validation)>=2 = >=2 models, multiple ensemble folds (cross-validation) " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "cross_validate_single_final_model", + "output": "cross validate single final model refers to If enabled, use cross-validation to determine optimal parameters for single final model, and to be able to create training holdout predictions." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "cross_validate_single_final_model", + "output": "cross validate single final model refers to Cross-validate single final model: If enabled, use cross-validation to determine optimal parameters for single final model, and to be able to create training holdout predictions." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "cross validate single final model", + "output": "cross validate single final model refers to Cross-validate single final model: If enabled, use cross-validation to determine optimal parameters for single final model, and to be able to create training holdout predictions." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Cross-validate single final model: ", + "output": "cross validate single final model refers to Cross-validate single final model: If enabled, use cross-validation to determine optimal parameters for single final model, and to be able to create training holdout predictions." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting cross_validate_single_final_model", + "output": "cross validate single final model refers to If enabled, use cross-validation to determine optimal parameters for single final model, and to be able to create training holdout predictions." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting cross_validate_single_final_model", + "output": "cross validate single final model refers to Cross-validate single final model: If enabled, use cross-validation to determine optimal parameters for single final model, and to be able to create training holdout predictions." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "ensemble_meta_learner", + "output": "ensemble meta learner refers to Model to combine base model predictions, for experiments that create a final pipelineconsisting of multiple base models. blender: Creates a linear blend with non-negative weights that add to 1 (blending) - recommended extra_trees: Creates a tree model to non-linearly combine the base models (stacking) - experimental, and recommended to also set enable cross_validate_meta_learner. neural_net: Creates a neural net model to non-linearly combine the base models (stacking) - experimental, and recommended to also set enable cross_validate_meta_learner. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "ensemble_meta_learner", + "output": "ensemble meta learner refers to Type of ensemble meta learner. Blender is recommended for most use cases.: Model to combine base model predictions, for experiments that create a final pipelineconsisting of multiple base models. blender: Creates a linear blend with non-negative weights that add to 1 (blending) - recommended extra_trees: Creates a tree model to non-linearly combine the base models (stacking) - experimental, and recommended to also set enable cross_validate_meta_learner. neural_net: Creates a neural net model to non-linearly combine the base models (stacking) - experimental, and recommended to also set enable cross_validate_meta_learner. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "ensemble meta learner", + "output": "ensemble meta learner refers to Type of ensemble meta learner. Blender is recommended for most use cases.: Model to combine base model predictions, for experiments that create a final pipelineconsisting of multiple base models. blender: Creates a linear blend with non-negative weights that add to 1 (blending) - recommended extra_trees: Creates a tree model to non-linearly combine the base models (stacking) - experimental, and recommended to also set enable cross_validate_meta_learner. neural_net: Creates a neural net model to non-linearly combine the base models (stacking) - experimental, and recommended to also set enable cross_validate_meta_learner. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Type of ensemble meta learner. Blender is recommended for most use cases.: ", + "output": "ensemble meta learner refers to Type of ensemble meta learner. Blender is recommended for most use cases.: Model to combine base model predictions, for experiments that create a final pipelineconsisting of multiple base models. blender: Creates a linear blend with non-negative weights that add to 1 (blending) - recommended extra_trees: Creates a tree model to non-linearly combine the base models (stacking) - experimental, and recommended to also set enable cross_validate_meta_learner. neural_net: Creates a neural net model to non-linearly combine the base models (stacking) - experimental, and recommended to also set enable cross_validate_meta_learner. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting ensemble_meta_learner", + "output": "ensemble meta learner refers to Model to combine base model predictions, for experiments that create a final pipelineconsisting of multiple base models. blender: Creates a linear blend with non-negative weights that add to 1 (blending) - recommended extra_trees: Creates a tree model to non-linearly combine the base models (stacking) - experimental, and recommended to also set enable cross_validate_meta_learner. neural_net: Creates a neural net model to non-linearly combine the base models (stacking) - experimental, and recommended to also set enable cross_validate_meta_learner. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting ensemble_meta_learner", + "output": "ensemble meta learner refers to Type of ensemble meta learner. Blender is recommended for most use cases.: Model to combine base model predictions, for experiments that create a final pipelineconsisting of multiple base models. blender: Creates a linear blend with non-negative weights that add to 1 (blending) - recommended extra_trees: Creates a tree model to non-linearly combine the base models (stacking) - experimental, and recommended to also set enable cross_validate_meta_learner. neural_net: Creates a neural net model to non-linearly combine the base models (stacking) - experimental, and recommended to also set enable cross_validate_meta_learner. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "cross_validate_meta_learner", + "output": "cross validate meta learner refers to If enabled, use cross-validation to create an ensemble for the meta learner itself. Especially recommended for``ensemble_meta_learner='extra_trees'``, to make unbiased training holdout predictions.Will disable MOJO if enabled. Not needed for ``ensemble_meta_learner='blender'``.\" " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "cross_validate_meta_learner", + "output": "cross validate meta learner refers to Cross-validate meta learner for final ensemble.: If enabled, use cross-validation to create an ensemble for the meta learner itself. Especially recommended for``ensemble_meta_learner='extra_trees'``, to make unbiased training holdout predictions.Will disable MOJO if enabled. Not needed for ``ensemble_meta_learner='blender'``.\" " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "cross validate meta learner", + "output": "cross validate meta learner refers to Cross-validate meta learner for final ensemble.: If enabled, use cross-validation to create an ensemble for the meta learner itself. Especially recommended for``ensemble_meta_learner='extra_trees'``, to make unbiased training holdout predictions.Will disable MOJO if enabled. Not needed for ``ensemble_meta_learner='blender'``.\" " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Cross-validate meta learner for final ensemble.: ", + "output": "cross validate meta learner refers to Cross-validate meta learner for final ensemble.: If enabled, use cross-validation to create an ensemble for the meta learner itself. Especially recommended for``ensemble_meta_learner='extra_trees'``, to make unbiased training holdout predictions.Will disable MOJO if enabled. Not needed for ``ensemble_meta_learner='blender'``.\" " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting cross_validate_meta_learner", + "output": "cross validate meta learner refers to If enabled, use cross-validation to create an ensemble for the meta learner itself. Especially recommended for``ensemble_meta_learner='extra_trees'``, to make unbiased training holdout predictions.Will disable MOJO if enabled. Not needed for ``ensemble_meta_learner='blender'``.\" " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting cross_validate_meta_learner", + "output": "cross validate meta learner refers to Cross-validate meta learner for final ensemble.: If enabled, use cross-validation to create an ensemble for the meta learner itself. Especially recommended for``ensemble_meta_learner='extra_trees'``, to make unbiased training holdout predictions.Will disable MOJO if enabled. Not needed for ``ensemble_meta_learner='blender'``.\" " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "parameter_tuning_num_models", + "output": "parameter tuning num models refers to Number of models to tune during pre-evolution phase Can make this lower to avoid excessive tuning, or make higher to do enhanced tuning. ``-1 : auto`` " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "parameter_tuning_num_models", + "output": "parameter tuning num models refers to Number of models during tuning phase (-1 = auto): Number of models to tune during pre-evolution phase Can make this lower to avoid excessive tuning, or make higher to do enhanced tuning. ``-1 : auto`` " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "parameter tuning num models", + "output": "parameter tuning num models refers to Number of models during tuning phase (-1 = auto): Number of models to tune during pre-evolution phase Can make this lower to avoid excessive tuning, or make higher to do enhanced tuning. ``-1 : auto`` " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Number of models during tuning phase (-1 = auto): ", + "output": "parameter tuning num models refers to Number of models during tuning phase (-1 = auto): Number of models to tune during pre-evolution phase Can make this lower to avoid excessive tuning, or make higher to do enhanced tuning. ``-1 : auto`` " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting parameter_tuning_num_models", + "output": "parameter tuning num models refers to Number of models to tune during pre-evolution phase Can make this lower to avoid excessive tuning, or make higher to do enhanced tuning. ``-1 : auto`` " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting parameter_tuning_num_models", + "output": "parameter tuning num models refers to Number of models during tuning phase (-1 = auto): Number of models to tune during pre-evolution phase Can make this lower to avoid excessive tuning, or make higher to do enhanced tuning. ``-1 : auto`` " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "parameter_tuning_num_models_sequence", + "output": "parameter tuning num models sequence refers to Number of models (out of all parameter_tuning_num_models) to have as SEQUENCE instead of random features/parameters. ``-1 : auto, use at least one default individual per model class tuned`` " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "parameter_tuning_num_models_sequence", + "output": "parameter tuning num models sequence refers to Number of default simple models during tuning phase (-1 = auto): Number of models (out of all parameter_tuning_num_models) to have as SEQUENCE instead of random features/parameters. ``-1 : auto, use at least one default individual per model class tuned`` " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "parameter tuning num models sequence", + "output": "parameter tuning num models sequence refers to Number of default simple models during tuning phase (-1 = auto): Number of models (out of all parameter_tuning_num_models) to have as SEQUENCE instead of random features/parameters. ``-1 : auto, use at least one default individual per model class tuned`` " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Number of default simple models during tuning phase (-1 = auto): ", + "output": "parameter tuning num models sequence refers to Number of default simple models during tuning phase (-1 = auto): Number of models (out of all parameter_tuning_num_models) to have as SEQUENCE instead of random features/parameters. ``-1 : auto, use at least one default individual per model class tuned`` " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting parameter_tuning_num_models_sequence", + "output": "parameter tuning num models sequence refers to Number of models (out of all parameter_tuning_num_models) to have as SEQUENCE instead of random features/parameters. ``-1 : auto, use at least one default individual per model class tuned`` " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting parameter_tuning_num_models_sequence", + "output": "parameter tuning num models sequence refers to Number of default simple models during tuning phase (-1 = auto): Number of models (out of all parameter_tuning_num_models) to have as SEQUENCE instead of random features/parameters. ``-1 : auto, use at least one default individual per model class tuned`` " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "parameter_tuning_num_models_extra", + "output": "parameter tuning num models extra refers to Number of models to add during tuning that cover other cases, like for TS having no TE on time column groups. ``-1 : auto, adds additional models to protect against overfit on high-gain training features.`` " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "parameter_tuning_num_models_extra", + "output": "parameter tuning num models extra refers to Number of extra models during tuning phase (-1 = auto): Number of models to add during tuning that cover other cases, like for TS having no TE on time column groups. ``-1 : auto, adds additional models to protect against overfit on high-gain training features.`` " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "parameter tuning num models extra", + "output": "parameter tuning num models extra refers to Number of extra models during tuning phase (-1 = auto): Number of models to add during tuning that cover other cases, like for TS having no TE on time column groups. ``-1 : auto, adds additional models to protect against overfit on high-gain training features.`` " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Number of extra models during tuning phase (-1 = auto): ", + "output": "parameter tuning num models extra refers to Number of extra models during tuning phase (-1 = auto): Number of models to add during tuning that cover other cases, like for TS having no TE on time column groups. ``-1 : auto, adds additional models to protect against overfit on high-gain training features.`` " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting parameter_tuning_num_models_extra", + "output": "parameter tuning num models extra refers to Number of models to add during tuning that cover other cases, like for TS having no TE on time column groups. ``-1 : auto, adds additional models to protect against overfit on high-gain training features.`` " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting parameter_tuning_num_models_extra", + "output": "parameter tuning num models extra refers to Number of extra models during tuning phase (-1 = auto): Number of models to add during tuning that cover other cases, like for TS having no TE on time column groups. ``-1 : auto, adds additional models to protect against overfit on high-gain training features.`` " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "num_tuning_instances", + "output": "num tuning instances refers to Dictionary of model class name (keys) and number (values) of instances." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "num_tuning_instances", + "output": "num tuning instances refers to Num. in tuning: Dictionary of model class name (keys) and number (values) of instances." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "num tuning instances", + "output": "num tuning instances refers to Num. in tuning: Dictionary of model class name (keys) and number (values) of instances." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Num. in tuning: ", + "output": "num tuning instances refers to Num. in tuning: Dictionary of model class name (keys) and number (values) of instances." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting num_tuning_instances", + "output": "num tuning instances refers to Dictionary of model class name (keys) and number (values) of instances." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting num_tuning_instances", + "output": "num tuning instances refers to Num. in tuning: Dictionary of model class name (keys) and number (values) of instances." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "validate_meta_learner", + "output": "validate meta learner refers to Enable basic logging and notifications for ensemble meta learner: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "validate_meta_learner", + "output": "validate meta learner refers to Enable basic logging and notifications for ensemble meta learner: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "validate meta learner", + "output": "validate meta learner refers to Enable basic logging and notifications for ensemble meta learner: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Enable basic logging and notifications for ensemble meta learner: ", + "output": "validate meta learner refers to Enable basic logging and notifications for ensemble meta learner: " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting validate_meta_learner", + "output": "validate meta learner refers to Enable basic logging and notifications for ensemble meta learner: " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting validate_meta_learner", + "output": "validate meta learner refers to Enable basic logging and notifications for ensemble meta learner: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "validate_meta_learner_extra", + "output": "validate meta learner extra refers to Enable extra logging for ensemble meta learner: ensemble must be at least as good as each base model: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "validate_meta_learner_extra", + "output": "validate meta learner extra refers to Enable extra logging for ensemble meta learner: ensemble must be at least as good as each base model: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "validate meta learner extra", + "output": "validate meta learner extra refers to Enable extra logging for ensemble meta learner: ensemble must be at least as good as each base model: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Enable extra logging for ensemble meta learner: ensemble must be at least as good as each base model: ", + "output": "validate meta learner extra refers to Enable extra logging for ensemble meta learner: ensemble must be at least as good as each base model: " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting validate_meta_learner_extra", + "output": "validate meta learner extra refers to Enable extra logging for ensemble meta learner: ensemble must be at least as good as each base model: " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting validate_meta_learner_extra", + "output": "validate meta learner extra refers to Enable extra logging for ensemble meta learner: ensemble must be at least as good as each base model: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "fixed_num_folds_evolution", + "output": "fixed num folds evolution refers to Specify the fixed number of cross-validation folds (if >= 2) for feature evolution. (The actual number of splits allowed can be less and is determined at experiment run-time)." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "fixed_num_folds_evolution", + "output": "fixed num folds evolution refers to Number of cross-validation folds for feature evolution (-1 = auto): Specify the fixed number of cross-validation folds (if >= 2) for feature evolution. (The actual number of splits allowed can be less and is determined at experiment run-time)." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "fixed num folds evolution", + "output": "fixed num folds evolution refers to Number of cross-validation folds for feature evolution (-1 = auto): Specify the fixed number of cross-validation folds (if >= 2) for feature evolution. (The actual number of splits allowed can be less and is determined at experiment run-time)." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Number of cross-validation folds for feature evolution (-1 = auto): ", + "output": "fixed num folds evolution refers to Number of cross-validation folds for feature evolution (-1 = auto): Specify the fixed number of cross-validation folds (if >= 2) for feature evolution. (The actual number of splits allowed can be less and is determined at experiment run-time)." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting fixed_num_folds_evolution", + "output": "fixed num folds evolution refers to Specify the fixed number of cross-validation folds (if >= 2) for feature evolution. (The actual number of splits allowed can be less and is determined at experiment run-time)." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting fixed_num_folds_evolution", + "output": "fixed num folds evolution refers to Number of cross-validation folds for feature evolution (-1 = auto): Specify the fixed number of cross-validation folds (if >= 2) for feature evolution. (The actual number of splits allowed can be less and is determined at experiment run-time)." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "fixed_num_folds", + "output": "fixed num folds refers to Specify the fixed number of cross-validation folds (if >= 2) for the final model. (The actual number of splits allowed can be less and is determined at experiment run-time)." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "fixed_num_folds", + "output": "fixed num folds refers to Number of cross-validation folds for final model (-1 = auto): Specify the fixed number of cross-validation folds (if >= 2) for the final model. (The actual number of splits allowed can be less and is determined at experiment run-time)." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "fixed num folds", + "output": "fixed num folds refers to Number of cross-validation folds for final model (-1 = auto): Specify the fixed number of cross-validation folds (if >= 2) for the final model. (The actual number of splits allowed can be less and is determined at experiment run-time)." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Number of cross-validation folds for final model (-1 = auto): ", + "output": "fixed num folds refers to Number of cross-validation folds for final model (-1 = auto): Specify the fixed number of cross-validation folds (if >= 2) for the final model. (The actual number of splits allowed can be less and is determined at experiment run-time)." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting fixed_num_folds", + "output": "fixed num folds refers to Specify the fixed number of cross-validation folds (if >= 2) for the final model. (The actual number of splits allowed can be less and is determined at experiment run-time)." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting fixed_num_folds", + "output": "fixed num folds refers to Number of cross-validation folds for final model (-1 = auto): Specify the fixed number of cross-validation folds (if >= 2) for the final model. (The actual number of splits allowed can be less and is determined at experiment run-time)." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "fixed_only_first_fold_model", + "output": "fixed only first fold model refers to set \"on\" to force only first fold for models - useful for quick runs regardless of data" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "fixed_only_first_fold_model", + "output": "fixed only first fold model refers to Force only first fold for models: set \"on\" to force only first fold for models - useful for quick runs regardless of data" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "fixed only first fold model", + "output": "fixed only first fold model refers to Force only first fold for models: set \"on\" to force only first fold for models - useful for quick runs regardless of data" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Force only first fold for models: ", + "output": "fixed only first fold model refers to Force only first fold for models: set \"on\" to force only first fold for models - useful for quick runs regardless of data" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting fixed_only_first_fold_model", + "output": "fixed only first fold model refers to set \"on\" to force only first fold for models - useful for quick runs regardless of data" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting fixed_only_first_fold_model", + "output": "fixed only first fold model refers to Force only first fold for models: set \"on\" to force only first fold for models - useful for quick runs regardless of data" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "fixed_fold_reps", + "output": "fixed fold reps refers to Set the number of repeated cross-validation folds for feature evolution and final models (if > 0), 0 is default. Only for ensembles that do cross-validation (so no external validation and not time-series), not for single final models." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "fixed_fold_reps", + "output": "fixed fold reps refers to Number of repeated cross-validation folds. 0 is auto.: Set the number of repeated cross-validation folds for feature evolution and final models (if > 0), 0 is default. Only for ensembles that do cross-validation (so no external validation and not time-series), not for single final models." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "fixed fold reps", + "output": "fixed fold reps refers to Number of repeated cross-validation folds. 0 is auto.: Set the number of repeated cross-validation folds for feature evolution and final models (if > 0), 0 is default. Only for ensembles that do cross-validation (so no external validation and not time-series), not for single final models." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Number of repeated cross-validation folds. 0 is auto.: ", + "output": "fixed fold reps refers to Number of repeated cross-validation folds. 0 is auto.: Set the number of repeated cross-validation folds for feature evolution and final models (if > 0), 0 is default. Only for ensembles that do cross-validation (so no external validation and not time-series), not for single final models." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting fixed_fold_reps", + "output": "fixed fold reps refers to Set the number of repeated cross-validation folds for feature evolution and final models (if > 0), 0 is default. Only for ensembles that do cross-validation (so no external validation and not time-series), not for single final models." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting fixed_fold_reps", + "output": "fixed fold reps refers to Number of repeated cross-validation folds. 0 is auto.: Set the number of repeated cross-validation folds for feature evolution and final models (if > 0), 0 is default. Only for ensembles that do cross-validation (so no external validation and not time-series), not for single final models." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "num_fold_ids_show", + "output": "num fold ids show refers to Maximum number of fold IDs to show in logs: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "num_fold_ids_show", + "output": "num fold ids show refers to Maximum number of fold IDs to show in logs: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "num fold ids show", + "output": "num fold ids show refers to Maximum number of fold IDs to show in logs: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Maximum number of fold IDs to show in logs: ", + "output": "num fold ids show refers to Maximum number of fold IDs to show in logs: " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting num_fold_ids_show", + "output": "num fold ids show refers to Maximum number of fold IDs to show in logs: " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting num_fold_ids_show", + "output": "num fold ids show refers to Maximum number of fold IDs to show in logs: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "fold_scores_instability_warning_threshold", + "output": "fold scores instability warning threshold refers to Declare positive fold scores as unstable if stddev / mean is larger than this value: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "fold_scores_instability_warning_threshold", + "output": "fold scores instability warning threshold refers to Declare positive fold scores as unstable if stddev / mean is larger than this value: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "fold scores instability warning threshold", + "output": "fold scores instability warning threshold refers to Declare positive fold scores as unstable if stddev / mean is larger than this value: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Declare positive fold scores as unstable if stddev / mean is larger than this value: ", + "output": "fold scores instability warning threshold refers to Declare positive fold scores as unstable if stddev / mean is larger than this value: " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting fold_scores_instability_warning_threshold", + "output": "fold scores instability warning threshold refers to Declare positive fold scores as unstable if stddev / mean is larger than this value: " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting fold_scores_instability_warning_threshold", + "output": "fold scores instability warning threshold refers to Declare positive fold scores as unstable if stddev / mean is larger than this value: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "feature_evolution_data_size", + "output": "feature evolution data size refers to Upper limit on the number of rows x number of columns for feature evolution (applies to both training and validation/holdout splits)feature evolution is the process that determines which features will be derived.Depending on accuracy settings, a fraction of this value will be used " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "feature_evolution_data_size", + "output": "feature evolution data size refers to Max. num. of rows x num. of columns for feature evolution data splits (not for final pipeline): Upper limit on the number of rows x number of columns for feature evolution (applies to both training and validation/holdout splits)feature evolution is the process that determines which features will be derived.Depending on accuracy settings, a fraction of this value will be used " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "feature evolution data size", + "output": "feature evolution data size refers to Max. num. of rows x num. of columns for feature evolution data splits (not for final pipeline): Upper limit on the number of rows x number of columns for feature evolution (applies to both training and validation/holdout splits)feature evolution is the process that determines which features will be derived.Depending on accuracy settings, a fraction of this value will be used " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Max. num. of rows x num. of columns for feature evolution data splits (not for final pipeline): ", + "output": "feature evolution data size refers to Max. num. of rows x num. of columns for feature evolution data splits (not for final pipeline): Upper limit on the number of rows x number of columns for feature evolution (applies to both training and validation/holdout splits)feature evolution is the process that determines which features will be derived.Depending on accuracy settings, a fraction of this value will be used " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting feature_evolution_data_size", + "output": "feature evolution data size refers to Upper limit on the number of rows x number of columns for feature evolution (applies to both training and validation/holdout splits)feature evolution is the process that determines which features will be derived.Depending on accuracy settings, a fraction of this value will be used " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting feature_evolution_data_size", + "output": "feature evolution data size refers to Max. num. of rows x num. of columns for feature evolution data splits (not for final pipeline): Upper limit on the number of rows x number of columns for feature evolution (applies to both training and validation/holdout splits)feature evolution is the process that determines which features will be derived.Depending on accuracy settings, a fraction of this value will be used " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "final_pipeline_data_size", + "output": "final pipeline data size refers to Upper limit on the number of rows x number of columns for training final pipeline. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "final_pipeline_data_size", + "output": "final pipeline data size refers to Max. num. of rows x num. of columns for reducing training data set (for final pipeline): Upper limit on the number of rows x number of columns for training final pipeline. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "final pipeline data size", + "output": "final pipeline data size refers to Max. num. of rows x num. of columns for reducing training data set (for final pipeline): Upper limit on the number of rows x number of columns for training final pipeline. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Max. num. of rows x num. of columns for reducing training data set (for final pipeline): ", + "output": "final pipeline data size refers to Max. num. of rows x num. of columns for reducing training data set (for final pipeline): Upper limit on the number of rows x number of columns for training final pipeline. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting final_pipeline_data_size", + "output": "final pipeline data size refers to Upper limit on the number of rows x number of columns for training final pipeline. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting final_pipeline_data_size", + "output": "final pipeline data size refers to Max. num. of rows x num. of columns for reducing training data set (for final pipeline): Upper limit on the number of rows x number of columns for training final pipeline. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "limit_validation_size", + "output": "limit validation size refers to Whether to automatically limit validation data size using feature_evolution_data_size (giving max_rows_feature_evolution shown in logs) for tuning-evolution, and using final_pipeline_data_size, max_validation_to_training_size_ratio_for_final_ensemble for final model." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "limit_validation_size", + "output": "limit validation size refers to Limit validation size: Whether to automatically limit validation data size using feature_evolution_data_size (giving max_rows_feature_evolution shown in logs) for tuning-evolution, and using final_pipeline_data_size, max_validation_to_training_size_ratio_for_final_ensemble for final model." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "limit validation size", + "output": "limit validation size refers to Limit validation size: Whether to automatically limit validation data size using feature_evolution_data_size (giving max_rows_feature_evolution shown in logs) for tuning-evolution, and using final_pipeline_data_size, max_validation_to_training_size_ratio_for_final_ensemble for final model." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Limit validation size: ", + "output": "limit validation size refers to Limit validation size: Whether to automatically limit validation data size using feature_evolution_data_size (giving max_rows_feature_evolution shown in logs) for tuning-evolution, and using final_pipeline_data_size, max_validation_to_training_size_ratio_for_final_ensemble for final model." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting limit_validation_size", + "output": "limit validation size refers to Whether to automatically limit validation data size using feature_evolution_data_size (giving max_rows_feature_evolution shown in logs) for tuning-evolution, and using final_pipeline_data_size, max_validation_to_training_size_ratio_for_final_ensemble for final model." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting limit_validation_size", + "output": "limit validation size refers to Limit validation size: Whether to automatically limit validation data size using feature_evolution_data_size (giving max_rows_feature_evolution shown in logs) for tuning-evolution, and using final_pipeline_data_size, max_validation_to_training_size_ratio_for_final_ensemble for final model." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max_validation_to_training_size_ratio_for_final_ensemble", + "output": "max validation to training size ratio for final ensemble refers to Smaller values can speed up final pipeline model training, as validation data is only used for early stopping.Note that final model predictions and scores will always be provided on the full dataset provided. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max_validation_to_training_size_ratio_for_final_ensemble", + "output": "max validation to training size ratio for final ensemble refers to Max. size of validation data relative to training data (for final pipeline), otherwise will sample: Smaller values can speed up final pipeline model training, as validation data is only used for early stopping.Note that final model predictions and scores will always be provided on the full dataset provided. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max validation to training size ratio for final ensemble", + "output": "max validation to training size ratio for final ensemble refers to Max. size of validation data relative to training data (for final pipeline), otherwise will sample: Smaller values can speed up final pipeline model training, as validation data is only used for early stopping.Note that final model predictions and scores will always be provided on the full dataset provided. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Max. size of validation data relative to training data (for final pipeline), otherwise will sample: ", + "output": "max validation to training size ratio for final ensemble refers to Max. size of validation data relative to training data (for final pipeline), otherwise will sample: Smaller values can speed up final pipeline model training, as validation data is only used for early stopping.Note that final model predictions and scores will always be provided on the full dataset provided. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting max_validation_to_training_size_ratio_for_final_ensemble", + "output": "max validation to training size ratio for final ensemble refers to Smaller values can speed up final pipeline model training, as validation data is only used for early stopping.Note that final model predictions and scores will always be provided on the full dataset provided. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting max_validation_to_training_size_ratio_for_final_ensemble", + "output": "max validation to training size ratio for final ensemble refers to Max. size of validation data relative to training data (for final pipeline), otherwise will sample: Smaller values can speed up final pipeline model training, as validation data is only used for early stopping.Note that final model predictions and scores will always be provided on the full dataset provided. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "force_stratified_splits_for_imbalanced_threshold_binary", + "output": "force stratified splits for imbalanced threshold binary refers to Ratio of minority to majority class of the target column beyond which stratified sampling is done for binary classification. Otherwise perform random sampling. Set to 0 to always do random sampling. Set to 1 to always do stratified sampling." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "force_stratified_splits_for_imbalanced_threshold_binary", + "output": "force stratified splits for imbalanced threshold binary refers to Perform stratified sampling for binary classification if the target is more imbalanced than this.: Ratio of minority to majority class of the target column beyond which stratified sampling is done for binary classification. Otherwise perform random sampling. Set to 0 to always do random sampling. Set to 1 to always do stratified sampling." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "force stratified splits for imbalanced threshold binary", + "output": "force stratified splits for imbalanced threshold binary refers to Perform stratified sampling for binary classification if the target is more imbalanced than this.: Ratio of minority to majority class of the target column beyond which stratified sampling is done for binary classification. Otherwise perform random sampling. Set to 0 to always do random sampling. Set to 1 to always do stratified sampling." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Perform stratified sampling for binary classification if the target is more imbalanced than this.: ", + "output": "force stratified splits for imbalanced threshold binary refers to Perform stratified sampling for binary classification if the target is more imbalanced than this.: Ratio of minority to majority class of the target column beyond which stratified sampling is done for binary classification. Otherwise perform random sampling. Set to 0 to always do random sampling. Set to 1 to always do stratified sampling." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting force_stratified_splits_for_imbalanced_threshold_binary", + "output": "force stratified splits for imbalanced threshold binary refers to Ratio of minority to majority class of the target column beyond which stratified sampling is done for binary classification. Otherwise perform random sampling. Set to 0 to always do random sampling. Set to 1 to always do stratified sampling." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting force_stratified_splits_for_imbalanced_threshold_binary", + "output": "force stratified splits for imbalanced threshold binary refers to Perform stratified sampling for binary classification if the target is more imbalanced than this.: Ratio of minority to majority class of the target column beyond which stratified sampling is done for binary classification. Otherwise perform random sampling. Set to 0 to always do random sampling. Set to 1 to always do stratified sampling." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "force_stratified_splits_for_binary_max_rows", + "output": "force stratified splits for binary max rows refers to Perform stratified sampling for binary classification if the dataset has fewer rows than this.: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "force_stratified_splits_for_binary_max_rows", + "output": "force stratified splits for binary max rows refers to Perform stratified sampling for binary classification if the dataset has fewer rows than this.: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "force stratified splits for binary max rows", + "output": "force stratified splits for binary max rows refers to Perform stratified sampling for binary classification if the dataset has fewer rows than this.: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Perform stratified sampling for binary classification if the dataset has fewer rows than this.: ", + "output": "force stratified splits for binary max rows refers to Perform stratified sampling for binary classification if the dataset has fewer rows than this.: " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting force_stratified_splits_for_binary_max_rows", + "output": "force stratified splits for binary max rows refers to Perform stratified sampling for binary classification if the dataset has fewer rows than this.: " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting force_stratified_splits_for_binary_max_rows", + "output": "force stratified splits for binary max rows refers to Perform stratified sampling for binary classification if the dataset has fewer rows than this.: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "stratify_for_regression", + "output": "stratify for regression refers to Specify whether to do stratified sampling for validation fold creation for iid regression problems. Otherwise perform random sampling." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "stratify_for_regression", + "output": "stratify for regression refers to Perform stratified sampling for regression problems (using binning).: Specify whether to do stratified sampling for validation fold creation for iid regression problems. Otherwise perform random sampling." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "stratify for regression", + "output": "stratify for regression refers to Perform stratified sampling for regression problems (using binning).: Specify whether to do stratified sampling for validation fold creation for iid regression problems. Otherwise perform random sampling." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Perform stratified sampling for regression problems (using binning).: ", + "output": "stratify for regression refers to Perform stratified sampling for regression problems (using binning).: Specify whether to do stratified sampling for validation fold creation for iid regression problems. Otherwise perform random sampling." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting stratify_for_regression", + "output": "stratify for regression refers to Specify whether to do stratified sampling for validation fold creation for iid regression problems. Otherwise perform random sampling." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting stratify_for_regression", + "output": "stratify for regression refers to Perform stratified sampling for regression problems (using binning).: Specify whether to do stratified sampling for validation fold creation for iid regression problems. Otherwise perform random sampling." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "imbalance_sampling_method", + "output": "imbalance sampling method refers to Sampling method for imbalanced binary classification problems. Choices are:\"auto\": sample both classes as needed, depending on data\"over_under_sampling\": over-sample the minority class and under-sample the majority class, depending on data\"under_sampling\": under-sample the majority class to reach class balance\"off\": do not perform any sampling " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "imbalance_sampling_method", + "output": "imbalance sampling method refers to Sampling method for imbalanced binary classification problems: Sampling method for imbalanced binary classification problems. Choices are:\"auto\": sample both classes as needed, depending on data\"over_under_sampling\": over-sample the minority class and under-sample the majority class, depending on data\"under_sampling\": under-sample the majority class to reach class balance\"off\": do not perform any sampling " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "imbalance sampling method", + "output": "imbalance sampling method refers to Sampling method for imbalanced binary classification problems: Sampling method for imbalanced binary classification problems. Choices are:\"auto\": sample both classes as needed, depending on data\"over_under_sampling\": over-sample the minority class and under-sample the majority class, depending on data\"under_sampling\": under-sample the majority class to reach class balance\"off\": do not perform any sampling " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Sampling method for imbalanced binary classification problems: ", + "output": "imbalance sampling method refers to Sampling method for imbalanced binary classification problems: Sampling method for imbalanced binary classification problems. Choices are:\"auto\": sample both classes as needed, depending on data\"over_under_sampling\": over-sample the minority class and under-sample the majority class, depending on data\"under_sampling\": under-sample the majority class to reach class balance\"off\": do not perform any sampling " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting imbalance_sampling_method", + "output": "imbalance sampling method refers to Sampling method for imbalanced binary classification problems. Choices are:\"auto\": sample both classes as needed, depending on data\"over_under_sampling\": over-sample the minority class and under-sample the majority class, depending on data\"under_sampling\": under-sample the majority class to reach class balance\"off\": do not perform any sampling " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting imbalance_sampling_method", + "output": "imbalance sampling method refers to Sampling method for imbalanced binary classification problems: Sampling method for imbalanced binary classification problems. Choices are:\"auto\": sample both classes as needed, depending on data\"over_under_sampling\": over-sample the minority class and under-sample the majority class, depending on data\"under_sampling\": under-sample the majority class to reach class balance\"off\": do not perform any sampling " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "imbalance_sampling_threshold_min_rows_original", + "output": "imbalance sampling threshold min rows original refers to For smaller data, there's no generally no benefit in using imbalanced sampling methods." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "imbalance_sampling_threshold_min_rows_original", + "output": "imbalance sampling threshold min rows original refers to Threshold for minimum number of rows in original training data to allow imbalanced sampling techniques. For smaller data, will disable imbalanced sampling, no matter what imbalance_sampling_method is set to.: For smaller data, there's no generally no benefit in using imbalanced sampling methods." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "imbalance sampling threshold min rows original", + "output": "imbalance sampling threshold min rows original refers to Threshold for minimum number of rows in original training data to allow imbalanced sampling techniques. For smaller data, will disable imbalanced sampling, no matter what imbalance_sampling_method is set to.: For smaller data, there's no generally no benefit in using imbalanced sampling methods." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Threshold for minimum number of rows in original training data to allow imbalanced sampling techniques. For smaller data, will disable imbalanced sampling, no matter what imbalance_sampling_method is set to.: ", + "output": "imbalance sampling threshold min rows original refers to Threshold for minimum number of rows in original training data to allow imbalanced sampling techniques. For smaller data, will disable imbalanced sampling, no matter what imbalance_sampling_method is set to.: For smaller data, there's no generally no benefit in using imbalanced sampling methods." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting imbalance_sampling_threshold_min_rows_original", + "output": "imbalance sampling threshold min rows original refers to For smaller data, there's no generally no benefit in using imbalanced sampling methods." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting imbalance_sampling_threshold_min_rows_original", + "output": "imbalance sampling threshold min rows original refers to Threshold for minimum number of rows in original training data to allow imbalanced sampling techniques. For smaller data, will disable imbalanced sampling, no matter what imbalance_sampling_method is set to.: For smaller data, there's no generally no benefit in using imbalanced sampling methods." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "imbalance_ratio_sampling_threshold", + "output": "imbalance ratio sampling threshold refers to For imbalanced binary classification: ratio of majority to minority class equal and above which to enablespecial imbalanced models with sampling techniques (specified by imbalance_sampling_method) to attempt to improve model performance. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "imbalance_ratio_sampling_threshold", + "output": "imbalance ratio sampling threshold refers to Ratio of majority to minority class for imbalanced binary classification to trigger special sampling techniques if enabled: For imbalanced binary classification: ratio of majority to minority class equal and above which to enablespecial imbalanced models with sampling techniques (specified by imbalance_sampling_method) to attempt to improve model performance. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "imbalance ratio sampling threshold", + "output": "imbalance ratio sampling threshold refers to Ratio of majority to minority class for imbalanced binary classification to trigger special sampling techniques if enabled: For imbalanced binary classification: ratio of majority to minority class equal and above which to enablespecial imbalanced models with sampling techniques (specified by imbalance_sampling_method) to attempt to improve model performance. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Ratio of majority to minority class for imbalanced binary classification to trigger special sampling techniques if enabled: ", + "output": "imbalance ratio sampling threshold refers to Ratio of majority to minority class for imbalanced binary classification to trigger special sampling techniques if enabled: For imbalanced binary classification: ratio of majority to minority class equal and above which to enablespecial imbalanced models with sampling techniques (specified by imbalance_sampling_method) to attempt to improve model performance. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting imbalance_ratio_sampling_threshold", + "output": "imbalance ratio sampling threshold refers to For imbalanced binary classification: ratio of majority to minority class equal and above which to enablespecial imbalanced models with sampling techniques (specified by imbalance_sampling_method) to attempt to improve model performance. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting imbalance_ratio_sampling_threshold", + "output": "imbalance ratio sampling threshold refers to Ratio of majority to minority class for imbalanced binary classification to trigger special sampling techniques if enabled: For imbalanced binary classification: ratio of majority to minority class equal and above which to enablespecial imbalanced models with sampling techniques (specified by imbalance_sampling_method) to attempt to improve model performance. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "heavy_imbalance_ratio_sampling_threshold", + "output": "heavy imbalance ratio sampling threshold refers to For heavily imbalanced binary classification: ratio of majority to minority class equal and above which to enable onlyspecial imbalanced models on full original data, without upfront sampling. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "heavy_imbalance_ratio_sampling_threshold", + "output": "heavy imbalance ratio sampling threshold refers to Ratio of majority to minority class for heavily imbalanced binary classification to only enable special sampling techniques if enabled: For heavily imbalanced binary classification: ratio of majority to minority class equal and above which to enable onlyspecial imbalanced models on full original data, without upfront sampling. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "heavy imbalance ratio sampling threshold", + "output": "heavy imbalance ratio sampling threshold refers to Ratio of majority to minority class for heavily imbalanced binary classification to only enable special sampling techniques if enabled: For heavily imbalanced binary classification: ratio of majority to minority class equal and above which to enable onlyspecial imbalanced models on full original data, without upfront sampling. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Ratio of majority to minority class for heavily imbalanced binary classification to only enable special sampling techniques if enabled: ", + "output": "heavy imbalance ratio sampling threshold refers to Ratio of majority to minority class for heavily imbalanced binary classification to only enable special sampling techniques if enabled: For heavily imbalanced binary classification: ratio of majority to minority class equal and above which to enable onlyspecial imbalanced models on full original data, without upfront sampling. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting heavy_imbalance_ratio_sampling_threshold", + "output": "heavy imbalance ratio sampling threshold refers to For heavily imbalanced binary classification: ratio of majority to minority class equal and above which to enable onlyspecial imbalanced models on full original data, without upfront sampling. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting heavy_imbalance_ratio_sampling_threshold", + "output": "heavy imbalance ratio sampling threshold refers to Ratio of majority to minority class for heavily imbalanced binary classification to only enable special sampling techniques if enabled: For heavily imbalanced binary classification: ratio of majority to minority class equal and above which to enable onlyspecial imbalanced models on full original data, without upfront sampling. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "imbalance_ratio_multiclass_threshold", + "output": "imbalance ratio multiclass threshold refers to Special handling can include special models, special scorers, special feature engineering. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "imbalance_ratio_multiclass_threshold", + "output": "imbalance ratio multiclass threshold refers to Ratio of most frequent to least frequent class for imbalanced multiclass classification problems equal and above which to trigger special handling due to class imbalance: Special handling can include special models, special scorers, special feature engineering. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "imbalance ratio multiclass threshold", + "output": "imbalance ratio multiclass threshold refers to Ratio of most frequent to least frequent class for imbalanced multiclass classification problems equal and above which to trigger special handling due to class imbalance: Special handling can include special models, special scorers, special feature engineering. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Ratio of most frequent to least frequent class for imbalanced multiclass classification problems equal and above which to trigger special handling due to class imbalance: ", + "output": "imbalance ratio multiclass threshold refers to Ratio of most frequent to least frequent class for imbalanced multiclass classification problems equal and above which to trigger special handling due to class imbalance: Special handling can include special models, special scorers, special feature engineering. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting imbalance_ratio_multiclass_threshold", + "output": "imbalance ratio multiclass threshold refers to Special handling can include special models, special scorers, special feature engineering. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting imbalance_ratio_multiclass_threshold", + "output": "imbalance ratio multiclass threshold refers to Ratio of most frequent to least frequent class for imbalanced multiclass classification problems equal and above which to trigger special handling due to class imbalance: Special handling can include special models, special scorers, special feature engineering. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "heavy_imbalance_ratio_multiclass_threshold", + "output": "heavy imbalance ratio multiclass threshold refers to Special handling can include special models, special scorers, special feature engineering. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "heavy_imbalance_ratio_multiclass_threshold", + "output": "heavy imbalance ratio multiclass threshold refers to Ratio of most frequent to least frequent class for imbalanced multiclass classification problems equal and above which to trigger special handling due to heavy class imbalance: Special handling can include special models, special scorers, special feature engineering. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "heavy imbalance ratio multiclass threshold", + "output": "heavy imbalance ratio multiclass threshold refers to Ratio of most frequent to least frequent class for imbalanced multiclass classification problems equal and above which to trigger special handling due to heavy class imbalance: Special handling can include special models, special scorers, special feature engineering. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Ratio of most frequent to least frequent class for imbalanced multiclass classification problems equal and above which to trigger special handling due to heavy class imbalance: ", + "output": "heavy imbalance ratio multiclass threshold refers to Ratio of most frequent to least frequent class for imbalanced multiclass classification problems equal and above which to trigger special handling due to heavy class imbalance: Special handling can include special models, special scorers, special feature engineering. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting heavy_imbalance_ratio_multiclass_threshold", + "output": "heavy imbalance ratio multiclass threshold refers to Special handling can include special models, special scorers, special feature engineering. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting heavy_imbalance_ratio_multiclass_threshold", + "output": "heavy imbalance ratio multiclass threshold refers to Ratio of most frequent to least frequent class for imbalanced multiclass classification problems equal and above which to trigger special handling due to heavy class imbalance: Special handling can include special models, special scorers, special feature engineering. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "imbalance_sampling_number_of_bags", + "output": "imbalance sampling number of bags refers to -1: automatic" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "imbalance_sampling_number_of_bags", + "output": "imbalance sampling number of bags refers to Number of bags for sampling methods for imbalanced binary classification (if enabled). -1 for automatic.: -1: automatic" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "imbalance sampling number of bags", + "output": "imbalance sampling number of bags refers to Number of bags for sampling methods for imbalanced binary classification (if enabled). -1 for automatic.: -1: automatic" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Number of bags for sampling methods for imbalanced binary classification (if enabled). -1 for automatic.: ", + "output": "imbalance sampling number of bags refers to Number of bags for sampling methods for imbalanced binary classification (if enabled). -1 for automatic.: -1: automatic" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting imbalance_sampling_number_of_bags", + "output": "imbalance sampling number of bags refers to -1: automatic" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting imbalance_sampling_number_of_bags", + "output": "imbalance sampling number of bags refers to Number of bags for sampling methods for imbalanced binary classification (if enabled). -1 for automatic.: -1: automatic" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "imbalance_sampling_max_number_of_bags", + "output": "imbalance sampling max number of bags refers to -1: automatic" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "imbalance_sampling_max_number_of_bags", + "output": "imbalance sampling max number of bags refers to Hard limit on number of bags for sampling methods for imbalanced binary classification.: -1: automatic" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "imbalance sampling max number of bags", + "output": "imbalance sampling max number of bags refers to Hard limit on number of bags for sampling methods for imbalanced binary classification.: -1: automatic" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Hard limit on number of bags for sampling methods for imbalanced binary classification.: ", + "output": "imbalance sampling max number of bags refers to Hard limit on number of bags for sampling methods for imbalanced binary classification.: -1: automatic" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting imbalance_sampling_max_number_of_bags", + "output": "imbalance sampling max number of bags refers to -1: automatic" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting imbalance_sampling_max_number_of_bags", + "output": "imbalance sampling max number of bags refers to Hard limit on number of bags for sampling methods for imbalanced binary classification.: -1: automatic" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "imbalance_sampling_max_number_of_bags_feature_evolution", + "output": "imbalance sampling max number of bags feature evolution refers to Only for shift/leakage/tuning/feature evolution models. Not used for final models. Final models can be limited by imbalance_sampling_max_number_of_bags." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "imbalance_sampling_max_number_of_bags_feature_evolution", + "output": "imbalance sampling max number of bags feature evolution refers to Hard limit on number of bags for sampling methods for imbalanced binary classification during feature evolution phase.: Only for shift/leakage/tuning/feature evolution models. Not used for final models. Final models can be limited by imbalance_sampling_max_number_of_bags." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "imbalance sampling max number of bags feature evolution", + "output": "imbalance sampling max number of bags feature evolution refers to Hard limit on number of bags for sampling methods for imbalanced binary classification during feature evolution phase.: Only for shift/leakage/tuning/feature evolution models. Not used for final models. Final models can be limited by imbalance_sampling_max_number_of_bags." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Hard limit on number of bags for sampling methods for imbalanced binary classification during feature evolution phase.: ", + "output": "imbalance sampling max number of bags feature evolution refers to Hard limit on number of bags for sampling methods for imbalanced binary classification during feature evolution phase.: Only for shift/leakage/tuning/feature evolution models. Not used for final models. Final models can be limited by imbalance_sampling_max_number_of_bags." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting imbalance_sampling_max_number_of_bags_feature_evolution", + "output": "imbalance sampling max number of bags feature evolution refers to Only for shift/leakage/tuning/feature evolution models. Not used for final models. Final models can be limited by imbalance_sampling_max_number_of_bags." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting imbalance_sampling_max_number_of_bags_feature_evolution", + "output": "imbalance sampling max number of bags feature evolution refers to Hard limit on number of bags for sampling methods for imbalanced binary classification during feature evolution phase.: Only for shift/leakage/tuning/feature evolution models. Not used for final models. Final models can be limited by imbalance_sampling_max_number_of_bags." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "imbalance_sampling_max_multiple_data_size", + "output": "imbalance sampling max multiple data size refers to Max. size of data sampled during imbalanced sampling (in terms of dataset size), controls number of bags (approximately). Only for imbalance_sampling_number_of_bags == -1." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "imbalance_sampling_max_multiple_data_size", + "output": "imbalance sampling max multiple data size refers to Max. size of data sampled during imbalanced sampling (in terms of dataset size): Max. size of data sampled during imbalanced sampling (in terms of dataset size), controls number of bags (approximately). Only for imbalance_sampling_number_of_bags == -1." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "imbalance sampling max multiple data size", + "output": "imbalance sampling max multiple data size refers to Max. size of data sampled during imbalanced sampling (in terms of dataset size): Max. size of data sampled during imbalanced sampling (in terms of dataset size), controls number of bags (approximately). Only for imbalance_sampling_number_of_bags == -1." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Max. size of data sampled during imbalanced sampling (in terms of dataset size): ", + "output": "imbalance sampling max multiple data size refers to Max. size of data sampled during imbalanced sampling (in terms of dataset size): Max. size of data sampled during imbalanced sampling (in terms of dataset size), controls number of bags (approximately). Only for imbalance_sampling_number_of_bags == -1." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting imbalance_sampling_max_multiple_data_size", + "output": "imbalance sampling max multiple data size refers to Max. size of data sampled during imbalanced sampling (in terms of dataset size), controls number of bags (approximately). Only for imbalance_sampling_number_of_bags == -1." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting imbalance_sampling_max_multiple_data_size", + "output": "imbalance sampling max multiple data size refers to Max. size of data sampled during imbalanced sampling (in terms of dataset size): Max. size of data sampled during imbalanced sampling (in terms of dataset size), controls number of bags (approximately). Only for imbalance_sampling_number_of_bags == -1." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "imbalance_sampling_rank_averaging", + "output": "imbalance sampling rank averaging refers to Rank averaging can be helpful when ensembling diverse models when ranking metrics like AUC/Gini metrics are optimized. No MOJO support yet." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "imbalance_sampling_rank_averaging", + "output": "imbalance sampling rank averaging refers to Whether to do rank averaging bagged models inside of imbalanced models, instead of probability averaging: Rank averaging can be helpful when ensembling diverse models when ranking metrics like AUC/Gini metrics are optimized. No MOJO support yet." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "imbalance sampling rank averaging", + "output": "imbalance sampling rank averaging refers to Whether to do rank averaging bagged models inside of imbalanced models, instead of probability averaging: Rank averaging can be helpful when ensembling diverse models when ranking metrics like AUC/Gini metrics are optimized. No MOJO support yet." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Whether to do rank averaging bagged models inside of imbalanced models, instead of probability averaging: ", + "output": "imbalance sampling rank averaging refers to Whether to do rank averaging bagged models inside of imbalanced models, instead of probability averaging: Rank averaging can be helpful when ensembling diverse models when ranking metrics like AUC/Gini metrics are optimized. No MOJO support yet." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting imbalance_sampling_rank_averaging", + "output": "imbalance sampling rank averaging refers to Rank averaging can be helpful when ensembling diverse models when ranking metrics like AUC/Gini metrics are optimized. No MOJO support yet." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting imbalance_sampling_rank_averaging", + "output": "imbalance sampling rank averaging refers to Whether to do rank averaging bagged models inside of imbalanced models, instead of probability averaging: Rank averaging can be helpful when ensembling diverse models when ranking metrics like AUC/Gini metrics are optimized. No MOJO support yet." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "imbalance_sampling_target_minority_fraction", + "output": "imbalance sampling target minority fraction refers to A value of 0.5 means that models/algorithms will be presented a balanced target class distribution after applying under/over-sampling techniques on the training data. Sometimes it makes sense to choose a smaller value like 0.1 or 0.01 when starting from an extremely imbalanced original target distribution. -1.0: automatic" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "imbalance_sampling_target_minority_fraction", + "output": "imbalance sampling target minority fraction refers to Target fraction of minority class after applying under/over-sampling techniques. -1.0 for automatic: A value of 0.5 means that models/algorithms will be presented a balanced target class distribution after applying under/over-sampling techniques on the training data. Sometimes it makes sense to choose a smaller value like 0.1 or 0.01 when starting from an extremely imbalanced original target distribution. -1.0: automatic" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "imbalance sampling target minority fraction", + "output": "imbalance sampling target minority fraction refers to Target fraction of minority class after applying under/over-sampling techniques. -1.0 for automatic: A value of 0.5 means that models/algorithms will be presented a balanced target class distribution after applying under/over-sampling techniques on the training data. Sometimes it makes sense to choose a smaller value like 0.1 or 0.01 when starting from an extremely imbalanced original target distribution. -1.0: automatic" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Target fraction of minority class after applying under/over-sampling techniques. -1.0 for automatic: ", + "output": "imbalance sampling target minority fraction refers to Target fraction of minority class after applying under/over-sampling techniques. -1.0 for automatic: A value of 0.5 means that models/algorithms will be presented a balanced target class distribution after applying under/over-sampling techniques on the training data. Sometimes it makes sense to choose a smaller value like 0.1 or 0.01 when starting from an extremely imbalanced original target distribution. -1.0: automatic" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting imbalance_sampling_target_minority_fraction", + "output": "imbalance sampling target minority fraction refers to A value of 0.5 means that models/algorithms will be presented a balanced target class distribution after applying under/over-sampling techniques on the training data. Sometimes it makes sense to choose a smaller value like 0.1 or 0.01 when starting from an extremely imbalanced original target distribution. -1.0: automatic" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting imbalance_sampling_target_minority_fraction", + "output": "imbalance sampling target minority fraction refers to Target fraction of minority class after applying under/over-sampling techniques. -1.0 for automatic: A value of 0.5 means that models/algorithms will be presented a balanced target class distribution after applying under/over-sampling techniques on the training data. Sometimes it makes sense to choose a smaller value like 0.1 or 0.01 when starting from an extremely imbalanced original target distribution. -1.0: automatic" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "imbalance_ratio_notification_threshold", + "output": "imbalance ratio notification threshold refers to For binary classification: ratio of majority to minority class equal and above which to notify of imbalance in GUI to say slightly imbalanced. More than ``imbalance_ratio_sampling_threshold`` will say problem is imbalanced. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "imbalance_ratio_notification_threshold", + "output": "imbalance ratio notification threshold refers to For binary classification: ratio of majority to minority class equal and above which to notify of imbalance in GUI to say slightly imbalanced. More than ``imbalance_ratio_sampling_threshold`` will say problem is imbalanced. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "imbalance ratio notification threshold", + "output": "imbalance ratio notification threshold refers to For binary classification: ratio of majority to minority class equal and above which to notify of imbalance in GUI to say slightly imbalanced. More than ``imbalance_ratio_sampling_threshold`` will say problem is imbalanced. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "imbalance ratio notification threshold refers to For binary classification: ratio of majority to minority class equal and above which to notify of imbalance in GUI to say slightly imbalanced. More than ``imbalance_ratio_sampling_threshold`` will say problem is imbalanced. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting imbalance_ratio_notification_threshold", + "output": "imbalance ratio notification threshold refers to For binary classification: ratio of majority to minority class equal and above which to notify of imbalance in GUI to say slightly imbalanced. More than ``imbalance_ratio_sampling_threshold`` will say problem is imbalanced. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting imbalance_ratio_notification_threshold", + "output": "imbalance ratio notification threshold refers to For binary classification: ratio of majority to minority class equal and above which to notify of imbalance in GUI to say slightly imbalanced. More than ``imbalance_ratio_sampling_threshold`` will say problem is imbalanced. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "nbins_ftrl_list", + "output": "nbins ftrl list refers to List of possible bins for FTRL (largest is default best value)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "nbins_ftrl_list", + "output": "nbins ftrl list refers to List of possible bins for FTRL (largest is default best value)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "nbins ftrl list", + "output": "nbins ftrl list refers to List of possible bins for FTRL (largest is default best value)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "nbins ftrl list refers to List of possible bins for FTRL (largest is default best value)" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting nbins_ftrl_list", + "output": "nbins ftrl list refers to List of possible bins for FTRL (largest is default best value)" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting nbins_ftrl_list", + "output": "nbins ftrl list refers to List of possible bins for FTRL (largest is default best value)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "ftrl_max_interaction_terms_per_degree", + "output": "ftrl max interaction terms per degree refers to Samples the number of automatic FTRL interactions terms to no more than this value (for each of 2nd, 3rd, 4th order terms)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "ftrl_max_interaction_terms_per_degree", + "output": "ftrl max interaction terms per degree refers to Max. number of automatic FTRL interactions terms for 2nd, 3rd, 4th order interactions terms (each): Samples the number of automatic FTRL interactions terms to no more than this value (for each of 2nd, 3rd, 4th order terms)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "ftrl max interaction terms per degree", + "output": "ftrl max interaction terms per degree refers to Max. number of automatic FTRL interactions terms for 2nd, 3rd, 4th order interactions terms (each): Samples the number of automatic FTRL interactions terms to no more than this value (for each of 2nd, 3rd, 4th order terms)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Max. number of automatic FTRL interactions terms for 2nd, 3rd, 4th order interactions terms (each): ", + "output": "ftrl max interaction terms per degree refers to Max. number of automatic FTRL interactions terms for 2nd, 3rd, 4th order interactions terms (each): Samples the number of automatic FTRL interactions terms to no more than this value (for each of 2nd, 3rd, 4th order terms)" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting ftrl_max_interaction_terms_per_degree", + "output": "ftrl max interaction terms per degree refers to Samples the number of automatic FTRL interactions terms to no more than this value (for each of 2nd, 3rd, 4th order terms)" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting ftrl_max_interaction_terms_per_degree", + "output": "ftrl max interaction terms per degree refers to Max. number of automatic FTRL interactions terms for 2nd, 3rd, 4th order interactions terms (each): Samples the number of automatic FTRL interactions terms to no more than this value (for each of 2nd, 3rd, 4th order terms)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "te_bin_list", + "output": "te bin list refers to List of possible bins for target encoding (first is default value)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "te_bin_list", + "output": "te bin list refers to List of possible bins for target encoding (first is default value)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "te bin list", + "output": "te bin list refers to List of possible bins for target encoding (first is default value)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "te bin list refers to List of possible bins for target encoding (first is default value)" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting te_bin_list", + "output": "te bin list refers to List of possible bins for target encoding (first is default value)" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting te_bin_list", + "output": "te bin list refers to List of possible bins for target encoding (first is default value)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "woe_bin_list", + "output": "woe bin list refers to List of possible bins for weight of evidence encoding (first is default value) If only want one value: woe_bin_list = [2]" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "woe_bin_list", + "output": "woe bin list refers to List of possible bins for weight of evidence encoding (first is default value) If only want one value: woe_bin_list = [2]" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "woe bin list", + "output": "woe bin list refers to List of possible bins for weight of evidence encoding (first is default value) If only want one value: woe_bin_list = [2]" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "woe bin list refers to List of possible bins for weight of evidence encoding (first is default value) If only want one value: woe_bin_list = [2]" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting woe_bin_list", + "output": "woe bin list refers to List of possible bins for weight of evidence encoding (first is default value) If only want one value: woe_bin_list = [2]" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting woe_bin_list", + "output": "woe bin list refers to List of possible bins for weight of evidence encoding (first is default value) If only want one value: woe_bin_list = [2]" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "ohe_bin_list", + "output": "ohe bin list refers to List of possible bins for ohe hot encoding (first is default value). If left as default, the actual list is changed for given data size and dials." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "ohe_bin_list", + "output": "ohe bin list refers to List of possible bins for ohe hot encoding (first is default value). If left as default, the actual list is changed for given data size and dials." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "ohe bin list", + "output": "ohe bin list refers to List of possible bins for ohe hot encoding (first is default value). If left as default, the actual list is changed for given data size and dials." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "ohe bin list refers to List of possible bins for ohe hot encoding (first is default value). If left as default, the actual list is changed for given data size and dials." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting ohe_bin_list", + "output": "ohe bin list refers to List of possible bins for ohe hot encoding (first is default value). If left as default, the actual list is changed for given data size and dials." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting ohe_bin_list", + "output": "ohe bin list refers to List of possible bins for ohe hot encoding (first is default value). If left as default, the actual list is changed for given data size and dials." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "binner_bin_list", + "output": "binner bin list refers to List of max possible number of bins for numeric binning (first is default value). If left as default, the actual list is changed for given data size and dials. The binner will automatically reduce the number of bins based on predictive power." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "binner_bin_list", + "output": "binner bin list refers to List of max possible number of bins for numeric binning (first is default value). If left as default, the actual list is changed for given data size and dials. The binner will automatically reduce the number of bins based on predictive power." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "binner bin list", + "output": "binner bin list refers to List of max possible number of bins for numeric binning (first is default value). If left as default, the actual list is changed for given data size and dials. The binner will automatically reduce the number of bins based on predictive power." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "binner bin list refers to List of max possible number of bins for numeric binning (first is default value). If left as default, the actual list is changed for given data size and dials. The binner will automatically reduce the number of bins based on predictive power." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting binner_bin_list", + "output": "binner bin list refers to List of max possible number of bins for numeric binning (first is default value). If left as default, the actual list is changed for given data size and dials. The binner will automatically reduce the number of bins based on predictive power." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting binner_bin_list", + "output": "binner bin list refers to List of max possible number of bins for numeric binning (first is default value). If left as default, the actual list is changed for given data size and dials. The binner will automatically reduce the number of bins based on predictive power." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "drop_redundant_columns_limit", + "output": "drop redundant columns limit refers to If dataset has more columns, then will check only first such columns. Set to 0 to disable." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "drop_redundant_columns_limit", + "output": "drop redundant columns limit refers to Max number of columns to check for redundancy in training dataset.: If dataset has more columns, then will check only first such columns. Set to 0 to disable." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "drop redundant columns limit", + "output": "drop redundant columns limit refers to Max number of columns to check for redundancy in training dataset.: If dataset has more columns, then will check only first such columns. Set to 0 to disable." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Max number of columns to check for redundancy in training dataset.: ", + "output": "drop redundant columns limit refers to Max number of columns to check for redundancy in training dataset.: If dataset has more columns, then will check only first such columns. Set to 0 to disable." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting drop_redundant_columns_limit", + "output": "drop redundant columns limit refers to If dataset has more columns, then will check only first such columns. Set to 0 to disable." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting drop_redundant_columns_limit", + "output": "drop redundant columns limit refers to Max number of columns to check for redundancy in training dataset.: If dataset has more columns, then will check only first such columns. Set to 0 to disable." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "drop_constant_columns", + "output": "drop constant columns refers to Whether to drop columns with constant values" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "drop_constant_columns", + "output": "drop constant columns refers to Drop constant columns: Whether to drop columns with constant values" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "drop constant columns", + "output": "drop constant columns refers to Drop constant columns: Whether to drop columns with constant values" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Drop constant columns: ", + "output": "drop constant columns refers to Drop constant columns: Whether to drop columns with constant values" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting drop_constant_columns", + "output": "drop constant columns refers to Whether to drop columns with constant values" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting drop_constant_columns", + "output": "drop constant columns refers to Drop constant columns: Whether to drop columns with constant values" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "detect_duplicate_rows", + "output": "detect duplicate rows refers to Whether to detect duplicate rows in training, validation and testing datasets. Done after doing type detection and dropping of redundant or missing columns across datasets, just before the experiment starts, still before leakage detection. Any further dropping of columns can change the amount of duplicate rows. Informative only, if want to drop rows in training data, make sure to check the drop_duplicate_rows setting. Uses a sample size, given by detect_duplicate_rows_max_rows_x_cols." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "detect_duplicate_rows", + "output": "detect duplicate rows refers to Detect duplicate rows: Whether to detect duplicate rows in training, validation and testing datasets. Done after doing type detection and dropping of redundant or missing columns across datasets, just before the experiment starts, still before leakage detection. Any further dropping of columns can change the amount of duplicate rows. Informative only, if want to drop rows in training data, make sure to check the drop_duplicate_rows setting. Uses a sample size, given by detect_duplicate_rows_max_rows_x_cols." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "detect duplicate rows", + "output": "detect duplicate rows refers to Detect duplicate rows: Whether to detect duplicate rows in training, validation and testing datasets. Done after doing type detection and dropping of redundant or missing columns across datasets, just before the experiment starts, still before leakage detection. Any further dropping of columns can change the amount of duplicate rows. Informative only, if want to drop rows in training data, make sure to check the drop_duplicate_rows setting. Uses a sample size, given by detect_duplicate_rows_max_rows_x_cols." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Detect duplicate rows: ", + "output": "detect duplicate rows refers to Detect duplicate rows: Whether to detect duplicate rows in training, validation and testing datasets. Done after doing type detection and dropping of redundant or missing columns across datasets, just before the experiment starts, still before leakage detection. Any further dropping of columns can change the amount of duplicate rows. Informative only, if want to drop rows in training data, make sure to check the drop_duplicate_rows setting. Uses a sample size, given by detect_duplicate_rows_max_rows_x_cols." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting detect_duplicate_rows", + "output": "detect duplicate rows refers to Whether to detect duplicate rows in training, validation and testing datasets. Done after doing type detection and dropping of redundant or missing columns across datasets, just before the experiment starts, still before leakage detection. Any further dropping of columns can change the amount of duplicate rows. Informative only, if want to drop rows in training data, make sure to check the drop_duplicate_rows setting. Uses a sample size, given by detect_duplicate_rows_max_rows_x_cols." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting detect_duplicate_rows", + "output": "detect duplicate rows refers to Detect duplicate rows: Whether to detect duplicate rows in training, validation and testing datasets. Done after doing type detection and dropping of redundant or missing columns across datasets, just before the experiment starts, still before leakage detection. Any further dropping of columns can change the amount of duplicate rows. Informative only, if want to drop rows in training data, make sure to check the drop_duplicate_rows setting. Uses a sample size, given by detect_duplicate_rows_max_rows_x_cols." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "drop_duplicate_rows_timeout", + "output": "drop duplicate rows timeout refers to Timeout in seconds for dropping duplicate rows in training data, propportionally increases as rows*cols grows as compared to detect_duplicate_rows_max_rows_x_cols.: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "drop_duplicate_rows_timeout", + "output": "drop duplicate rows timeout refers to Timeout in seconds for dropping duplicate rows in training data, propportionally increases as rows*cols grows as compared to detect_duplicate_rows_max_rows_x_cols.: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "drop duplicate rows timeout", + "output": "drop duplicate rows timeout refers to Timeout in seconds for dropping duplicate rows in training data, propportionally increases as rows*cols grows as compared to detect_duplicate_rows_max_rows_x_cols.: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Timeout in seconds for dropping duplicate rows in training data, propportionally increases as rows*cols grows as compared to detect_duplicate_rows_max_rows_x_cols.: ", + "output": "drop duplicate rows timeout refers to Timeout in seconds for dropping duplicate rows in training data, propportionally increases as rows*cols grows as compared to detect_duplicate_rows_max_rows_x_cols.: " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting drop_duplicate_rows_timeout", + "output": "drop duplicate rows timeout refers to Timeout in seconds for dropping duplicate rows in training data, propportionally increases as rows*cols grows as compared to detect_duplicate_rows_max_rows_x_cols.: " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting drop_duplicate_rows_timeout", + "output": "drop duplicate rows timeout refers to Timeout in seconds for dropping duplicate rows in training data, propportionally increases as rows*cols grows as compared to detect_duplicate_rows_max_rows_x_cols.: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "drop_duplicate_rows", + "output": "drop duplicate rows refers to Whether to drop duplicate rows in training data. Done at the start of Driverless AI, only considering columns to drop as given by the user, not considering validation or training datasets or leakage or redundant columns. Any further dropping of columns can change the amount of duplicate rows. Time limited by drop_duplicate_rows_timeout seconds. 'auto': \"off\"\" 'weight': If duplicates, then convert dropped duplicates into a weight column for training. Useful when duplicates are added to preserve some distribution of instances expected. Only allowed if no weight columnn is present, else duplicates are just dropped. 'drop': Drop any duplicates, keeping only first instances. 'off': Do not drop any duplicates. This may lead to over-estimation of accuracy." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "drop_duplicate_rows", + "output": "drop duplicate rows refers to Drop duplicate rows in training data: Whether to drop duplicate rows in training data. Done at the start of Driverless AI, only considering columns to drop as given by the user, not considering validation or training datasets or leakage or redundant columns. Any further dropping of columns can change the amount of duplicate rows. Time limited by drop_duplicate_rows_timeout seconds. 'auto': \"off\"\" 'weight': If duplicates, then convert dropped duplicates into a weight column for training. Useful when duplicates are added to preserve some distribution of instances expected. Only allowed if no weight columnn is present, else duplicates are just dropped. 'drop': Drop any duplicates, keeping only first instances. 'off': Do not drop any duplicates. This may lead to over-estimation of accuracy." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "drop duplicate rows", + "output": "drop duplicate rows refers to Drop duplicate rows in training data: Whether to drop duplicate rows in training data. Done at the start of Driverless AI, only considering columns to drop as given by the user, not considering validation or training datasets or leakage or redundant columns. Any further dropping of columns can change the amount of duplicate rows. Time limited by drop_duplicate_rows_timeout seconds. 'auto': \"off\"\" 'weight': If duplicates, then convert dropped duplicates into a weight column for training. Useful when duplicates are added to preserve some distribution of instances expected. Only allowed if no weight columnn is present, else duplicates are just dropped. 'drop': Drop any duplicates, keeping only first instances. 'off': Do not drop any duplicates. This may lead to over-estimation of accuracy." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Drop duplicate rows in training data: ", + "output": "drop duplicate rows refers to Drop duplicate rows in training data: Whether to drop duplicate rows in training data. Done at the start of Driverless AI, only considering columns to drop as given by the user, not considering validation or training datasets or leakage or redundant columns. Any further dropping of columns can change the amount of duplicate rows. Time limited by drop_duplicate_rows_timeout seconds. 'auto': \"off\"\" 'weight': If duplicates, then convert dropped duplicates into a weight column for training. Useful when duplicates are added to preserve some distribution of instances expected. Only allowed if no weight columnn is present, else duplicates are just dropped. 'drop': Drop any duplicates, keeping only first instances. 'off': Do not drop any duplicates. This may lead to over-estimation of accuracy." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting drop_duplicate_rows", + "output": "drop duplicate rows refers to Whether to drop duplicate rows in training data. Done at the start of Driverless AI, only considering columns to drop as given by the user, not considering validation or training datasets or leakage or redundant columns. Any further dropping of columns can change the amount of duplicate rows. Time limited by drop_duplicate_rows_timeout seconds. 'auto': \"off\"\" 'weight': If duplicates, then convert dropped duplicates into a weight column for training. Useful when duplicates are added to preserve some distribution of instances expected. Only allowed if no weight columnn is present, else duplicates are just dropped. 'drop': Drop any duplicates, keeping only first instances. 'off': Do not drop any duplicates. This may lead to over-estimation of accuracy." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting drop_duplicate_rows", + "output": "drop duplicate rows refers to Drop duplicate rows in training data: Whether to drop duplicate rows in training data. Done at the start of Driverless AI, only considering columns to drop as given by the user, not considering validation or training datasets or leakage or redundant columns. Any further dropping of columns can change the amount of duplicate rows. Time limited by drop_duplicate_rows_timeout seconds. 'auto': \"off\"\" 'weight': If duplicates, then convert dropped duplicates into a weight column for training. Useful when duplicates are added to preserve some distribution of instances expected. Only allowed if no weight columnn is present, else duplicates are just dropped. 'drop': Drop any duplicates, keeping only first instances. 'off': Do not drop any duplicates. This may lead to over-estimation of accuracy." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "detect_duplicate_rows_max_rows_x_cols", + "output": "detect duplicate rows max rows x cols refers to If > 0, then acts as sampling size for informative duplicate row detection. If set to 0, will do checks for all dataset sizes." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "detect_duplicate_rows_max_rows_x_cols", + "output": "detect duplicate rows max rows x cols refers to Limit of dataset size in rows x cols for data when detecting duplicate rows: If > 0, then acts as sampling size for informative duplicate row detection. If set to 0, will do checks for all dataset sizes." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "detect duplicate rows max rows x cols", + "output": "detect duplicate rows max rows x cols refers to Limit of dataset size in rows x cols for data when detecting duplicate rows: If > 0, then acts as sampling size for informative duplicate row detection. If set to 0, will do checks for all dataset sizes." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Limit of dataset size in rows x cols for data when detecting duplicate rows: ", + "output": "detect duplicate rows max rows x cols refers to Limit of dataset size in rows x cols for data when detecting duplicate rows: If > 0, then acts as sampling size for informative duplicate row detection. If set to 0, will do checks for all dataset sizes." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting detect_duplicate_rows_max_rows_x_cols", + "output": "detect duplicate rows max rows x cols refers to If > 0, then acts as sampling size for informative duplicate row detection. If set to 0, will do checks for all dataset sizes." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting detect_duplicate_rows_max_rows_x_cols", + "output": "detect duplicate rows max rows x cols refers to Limit of dataset size in rows x cols for data when detecting duplicate rows: If > 0, then acts as sampling size for informative duplicate row detection. If set to 0, will do checks for all dataset sizes." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "drop_id_columns", + "output": "drop id columns refers to Whether to drop columns that appear to be an ID" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "drop_id_columns", + "output": "drop id columns refers to Drop ID columns: Whether to drop columns that appear to be an ID" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "drop id columns", + "output": "drop id columns refers to Drop ID columns: Whether to drop columns that appear to be an ID" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Drop ID columns: ", + "output": "drop id columns refers to Drop ID columns: Whether to drop columns that appear to be an ID" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting drop_id_columns", + "output": "drop id columns refers to Whether to drop columns that appear to be an ID" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting drop_id_columns", + "output": "drop id columns refers to Drop ID columns: Whether to drop columns that appear to be an ID" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "no_drop_features", + "output": "no drop features refers to Whether to avoid dropping any columns (original or derived)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "no_drop_features", + "output": "no drop features refers to Don't drop any columns: Whether to avoid dropping any columns (original or derived)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "no drop features", + "output": "no drop features refers to Don't drop any columns: Whether to avoid dropping any columns (original or derived)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Don't drop any columns: ", + "output": "no drop features refers to Don't drop any columns: Whether to avoid dropping any columns (original or derived)" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting no_drop_features", + "output": "no drop features refers to Whether to avoid dropping any columns (original or derived)" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting no_drop_features", + "output": "no drop features refers to Don't drop any columns: Whether to avoid dropping any columns (original or derived)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "cols_to_drop", + "output": "cols to drop refers to Direct control over columns to drop in bulk so can copy-paste large lists instead of selecting each one separately in GUI" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "cols_to_drop", + "output": "cols to drop refers to Features to drop, e.g. [\"V1\", \"V2\", \"V3\"]: Direct control over columns to drop in bulk so can copy-paste large lists instead of selecting each one separately in GUI" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "cols to drop", + "output": "cols to drop refers to Features to drop, e.g. [\"V1\", \"V2\", \"V3\"]: Direct control over columns to drop in bulk so can copy-paste large lists instead of selecting each one separately in GUI" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Features to drop, e.g. [\"V1\", \"V2\", \"V3\"]: ", + "output": "cols to drop refers to Features to drop, e.g. [\"V1\", \"V2\", \"V3\"]: Direct control over columns to drop in bulk so can copy-paste large lists instead of selecting each one separately in GUI" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting cols_to_drop", + "output": "cols to drop refers to Direct control over columns to drop in bulk so can copy-paste large lists instead of selecting each one separately in GUI" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting cols_to_drop", + "output": "cols to drop refers to Features to drop, e.g. [\"V1\", \"V2\", \"V3\"]: Direct control over columns to drop in bulk so can copy-paste large lists instead of selecting each one separately in GUI" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "cols_to_group_by", + "output": "cols to group by refers to Control over columns to group by for CVCatNumEncode Transformer, default is empty list that means DAI automatically searches all columns,selected randomly or by which have top variable importance.The CVCatNumEncode Transformer takes a list of categoricals (or these cols_to_group_by) and uses those columnsas new feature to perform aggregations on (agg_funcs_for_group_by)." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "cols_to_group_by", + "output": "cols to group by refers to Features to group by, e.g. [\"G1\", \"G2\", \"G3\"]: Control over columns to group by for CVCatNumEncode Transformer, default is empty list that means DAI automatically searches all columns,selected randomly or by which have top variable importance.The CVCatNumEncode Transformer takes a list of categoricals (or these cols_to_group_by) and uses those columnsas new feature to perform aggregations on (agg_funcs_for_group_by)." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "cols to group by", + "output": "cols to group by refers to Features to group by, e.g. [\"G1\", \"G2\", \"G3\"]: Control over columns to group by for CVCatNumEncode Transformer, default is empty list that means DAI automatically searches all columns,selected randomly or by which have top variable importance.The CVCatNumEncode Transformer takes a list of categoricals (or these cols_to_group_by) and uses those columnsas new feature to perform aggregations on (agg_funcs_for_group_by)." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Features to group by, e.g. [\"G1\", \"G2\", \"G3\"]: ", + "output": "cols to group by refers to Features to group by, e.g. [\"G1\", \"G2\", \"G3\"]: Control over columns to group by for CVCatNumEncode Transformer, default is empty list that means DAI automatically searches all columns,selected randomly or by which have top variable importance.The CVCatNumEncode Transformer takes a list of categoricals (or these cols_to_group_by) and uses those columnsas new feature to perform aggregations on (agg_funcs_for_group_by)." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting cols_to_group_by", + "output": "cols to group by refers to Control over columns to group by for CVCatNumEncode Transformer, default is empty list that means DAI automatically searches all columns,selected randomly or by which have top variable importance.The CVCatNumEncode Transformer takes a list of categoricals (or these cols_to_group_by) and uses those columnsas new feature to perform aggregations on (agg_funcs_for_group_by)." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting cols_to_group_by", + "output": "cols to group by refers to Features to group by, e.g. [\"G1\", \"G2\", \"G3\"]: Control over columns to group by for CVCatNumEncode Transformer, default is empty list that means DAI automatically searches all columns,selected randomly or by which have top variable importance.The CVCatNumEncode Transformer takes a list of categoricals (or these cols_to_group_by) and uses those columnsas new feature to perform aggregations on (agg_funcs_for_group_by)." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "sample_cols_to_group_by", + "output": "sample cols to group by refers to Whether to sample from given features to group by (True) or to always group by all features (False) when using cols_to_group_by." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "sample_cols_to_group_by", + "output": "sample cols to group by refers to Sample from features to group by: Whether to sample from given features to group by (True) or to always group by all features (False) when using cols_to_group_by." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "sample cols to group by", + "output": "sample cols to group by refers to Sample from features to group by: Whether to sample from given features to group by (True) or to always group by all features (False) when using cols_to_group_by." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Sample from features to group by: ", + "output": "sample cols to group by refers to Sample from features to group by: Whether to sample from given features to group by (True) or to always group by all features (False) when using cols_to_group_by." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting sample_cols_to_group_by", + "output": "sample cols to group by refers to Whether to sample from given features to group by (True) or to always group by all features (False) when using cols_to_group_by." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting sample_cols_to_group_by", + "output": "sample cols to group by refers to Sample from features to group by: Whether to sample from given features to group by (True) or to always group by all features (False) when using cols_to_group_by." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "agg_funcs_for_group_by", + "output": "agg funcs for group by refers to Aggregation functions to use for groupby operations for CVCatNumEncode Transformer, see also cols_to_group_by and sample_cols_to_group_by." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "agg_funcs_for_group_by", + "output": "agg funcs for group by refers to Aggregation functions (non-time-series) for group by operations: Aggregation functions to use for groupby operations for CVCatNumEncode Transformer, see also cols_to_group_by and sample_cols_to_group_by." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "agg funcs for group by", + "output": "agg funcs for group by refers to Aggregation functions (non-time-series) for group by operations: Aggregation functions to use for groupby operations for CVCatNumEncode Transformer, see also cols_to_group_by and sample_cols_to_group_by." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Aggregation functions (non-time-series) for group by operations: ", + "output": "agg funcs for group by refers to Aggregation functions (non-time-series) for group by operations: Aggregation functions to use for groupby operations for CVCatNumEncode Transformer, see also cols_to_group_by and sample_cols_to_group_by." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting agg_funcs_for_group_by", + "output": "agg funcs for group by refers to Aggregation functions to use for groupby operations for CVCatNumEncode Transformer, see also cols_to_group_by and sample_cols_to_group_by." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting agg_funcs_for_group_by", + "output": "agg funcs for group by refers to Aggregation functions (non-time-series) for group by operations: Aggregation functions to use for groupby operations for CVCatNumEncode Transformer, see also cols_to_group_by and sample_cols_to_group_by." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "folds_for_group_by", + "output": "folds for group by refers to Out of fold aggregations ensure less overfitting, but see less data in each fold. For controlling how many folds used by CVCatNumEncode Transformer." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "folds_for_group_by", + "output": "folds for group by refers to Number of folds to obtain aggregation when grouping: Out of fold aggregations ensure less overfitting, but see less data in each fold. For controlling how many folds used by CVCatNumEncode Transformer." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "folds for group by", + "output": "folds for group by refers to Number of folds to obtain aggregation when grouping: Out of fold aggregations ensure less overfitting, but see less data in each fold. For controlling how many folds used by CVCatNumEncode Transformer." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Number of folds to obtain aggregation when grouping: ", + "output": "folds for group by refers to Number of folds to obtain aggregation when grouping: Out of fold aggregations ensure less overfitting, but see less data in each fold. For controlling how many folds used by CVCatNumEncode Transformer." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting folds_for_group_by", + "output": "folds for group by refers to Out of fold aggregations ensure less overfitting, but see less data in each fold. For controlling how many folds used by CVCatNumEncode Transformer." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting folds_for_group_by", + "output": "folds for group by refers to Number of folds to obtain aggregation when grouping: Out of fold aggregations ensure less overfitting, but see less data in each fold. For controlling how many folds used by CVCatNumEncode Transformer." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "cols_to_force_in", + "output": "cols to force in refers to Control over columns to force-in. Forced-in features are are handled by the most interpretable transformer allowed by experimentoptions, and they are never removed (although model may assign 0 importance to them still).Transformers used by default include:OriginalTransformer for numeric,CatOriginalTransformer or FrequencyTransformer for categorical,TextOriginalTransformer for text,DateTimeOriginalTransformer for date-times,DateOriginalTransformer for dates,ImageOriginalTransformer or ImageVectorizerTransformer for images,etc." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "cols_to_force_in", + "output": "cols to force in refers to Features to force in, e.g. [\"G1\", \"G2\", \"G3\"]: Control over columns to force-in. Forced-in features are are handled by the most interpretable transformer allowed by experimentoptions, and they are never removed (although model may assign 0 importance to them still).Transformers used by default include:OriginalTransformer for numeric,CatOriginalTransformer or FrequencyTransformer for categorical,TextOriginalTransformer for text,DateTimeOriginalTransformer for date-times,DateOriginalTransformer for dates,ImageOriginalTransformer or ImageVectorizerTransformer for images,etc." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "cols to force in", + "output": "cols to force in refers to Features to force in, e.g. [\"G1\", \"G2\", \"G3\"]: Control over columns to force-in. Forced-in features are are handled by the most interpretable transformer allowed by experimentoptions, and they are never removed (although model may assign 0 importance to them still).Transformers used by default include:OriginalTransformer for numeric,CatOriginalTransformer or FrequencyTransformer for categorical,TextOriginalTransformer for text,DateTimeOriginalTransformer for date-times,DateOriginalTransformer for dates,ImageOriginalTransformer or ImageVectorizerTransformer for images,etc." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Features to force in, e.g. [\"G1\", \"G2\", \"G3\"]: ", + "output": "cols to force in refers to Features to force in, e.g. [\"G1\", \"G2\", \"G3\"]: Control over columns to force-in. Forced-in features are are handled by the most interpretable transformer allowed by experimentoptions, and they are never removed (although model may assign 0 importance to them still).Transformers used by default include:OriginalTransformer for numeric,CatOriginalTransformer or FrequencyTransformer for categorical,TextOriginalTransformer for text,DateTimeOriginalTransformer for date-times,DateOriginalTransformer for dates,ImageOriginalTransformer or ImageVectorizerTransformer for images,etc." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting cols_to_force_in", + "output": "cols to force in refers to Control over columns to force-in. Forced-in features are are handled by the most interpretable transformer allowed by experimentoptions, and they are never removed (although model may assign 0 importance to them still).Transformers used by default include:OriginalTransformer for numeric,CatOriginalTransformer or FrequencyTransformer for categorical,TextOriginalTransformer for text,DateTimeOriginalTransformer for date-times,DateOriginalTransformer for dates,ImageOriginalTransformer or ImageVectorizerTransformer for images,etc." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting cols_to_force_in", + "output": "cols to force in refers to Features to force in, e.g. [\"G1\", \"G2\", \"G3\"]: Control over columns to force-in. Forced-in features are are handled by the most interpretable transformer allowed by experimentoptions, and they are never removed (although model may assign 0 importance to them still).Transformers used by default include:OriginalTransformer for numeric,CatOriginalTransformer or FrequencyTransformer for categorical,TextOriginalTransformer for text,DateTimeOriginalTransformer for date-times,DateOriginalTransformer for dates,ImageOriginalTransformer or ImageVectorizerTransformer for images,etc." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mutation_mode", + "output": "mutation mode refers to Strategy to apply when doing mutations on transformers. Sample mode is default, with tendency to sample transformer parameters. Batched mode tends to do multiple types of the same transformation together. Full mode does even more types of the same transformation together. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mutation_mode", + "output": "mutation mode refers to Type of mutation strategy: Strategy to apply when doing mutations on transformers. Sample mode is default, with tendency to sample transformer parameters. Batched mode tends to do multiple types of the same transformation together. Full mode does even more types of the same transformation together. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mutation mode", + "output": "mutation mode refers to Type of mutation strategy: Strategy to apply when doing mutations on transformers. Sample mode is default, with tendency to sample transformer parameters. Batched mode tends to do multiple types of the same transformation together. Full mode does even more types of the same transformation together. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Type of mutation strategy: ", + "output": "mutation mode refers to Type of mutation strategy: Strategy to apply when doing mutations on transformers. Sample mode is default, with tendency to sample transformer parameters. Batched mode tends to do multiple types of the same transformation together. Full mode does even more types of the same transformation together. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting mutation_mode", + "output": "mutation mode refers to Strategy to apply when doing mutations on transformers. Sample mode is default, with tendency to sample transformer parameters. Batched mode tends to do multiple types of the same transformation together. Full mode does even more types of the same transformation together. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting mutation_mode", + "output": "mutation mode refers to Type of mutation strategy: Strategy to apply when doing mutations on transformers. Sample mode is default, with tendency to sample transformer parameters. Batched mode tends to do multiple types of the same transformation together. Full mode does even more types of the same transformation together. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "leaderboard_mode", + "output": "leaderboard mode refers to 'baseline': Explore exemplar set of models with baselines as reference.'random': Explore 10 random seeds for same setup. Useful since nature of genetic algorithm is noisy and repeats might get better results, or one can ensemble the custom individuals from such repeats.'line': Explore good model with all features and original features with all models. Useful as first exploration.'line_all': Like 'line', but enable all models and transformers possible instead of only what base experiment setup would have inferred.'product': Explore one-by-one Cartesian product of each model and transformer. Useful for exhaustive exploration." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "leaderboard_mode", + "output": "leaderboard mode refers to Control the automatic leaderboard mode: 'baseline': Explore exemplar set of models with baselines as reference.'random': Explore 10 random seeds for same setup. Useful since nature of genetic algorithm is noisy and repeats might get better results, or one can ensemble the custom individuals from such repeats.'line': Explore good model with all features and original features with all models. Useful as first exploration.'line_all': Like 'line', but enable all models and transformers possible instead of only what base experiment setup would have inferred.'product': Explore one-by-one Cartesian product of each model and transformer. Useful for exhaustive exploration." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "leaderboard mode", + "output": "leaderboard mode refers to Control the automatic leaderboard mode: 'baseline': Explore exemplar set of models with baselines as reference.'random': Explore 10 random seeds for same setup. Useful since nature of genetic algorithm is noisy and repeats might get better results, or one can ensemble the custom individuals from such repeats.'line': Explore good model with all features and original features with all models. Useful as first exploration.'line_all': Like 'line', but enable all models and transformers possible instead of only what base experiment setup would have inferred.'product': Explore one-by-one Cartesian product of each model and transformer. Useful for exhaustive exploration." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Control the automatic leaderboard mode: ", + "output": "leaderboard mode refers to Control the automatic leaderboard mode: 'baseline': Explore exemplar set of models with baselines as reference.'random': Explore 10 random seeds for same setup. Useful since nature of genetic algorithm is noisy and repeats might get better results, or one can ensemble the custom individuals from such repeats.'line': Explore good model with all features and original features with all models. Useful as first exploration.'line_all': Like 'line', but enable all models and transformers possible instead of only what base experiment setup would have inferred.'product': Explore one-by-one Cartesian product of each model and transformer. Useful for exhaustive exploration." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting leaderboard_mode", + "output": "leaderboard mode refers to 'baseline': Explore exemplar set of models with baselines as reference.'random': Explore 10 random seeds for same setup. Useful since nature of genetic algorithm is noisy and repeats might get better results, or one can ensemble the custom individuals from such repeats.'line': Explore good model with all features and original features with all models. Useful as first exploration.'line_all': Like 'line', but enable all models and transformers possible instead of only what base experiment setup would have inferred.'product': Explore one-by-one Cartesian product of each model and transformer. Useful for exhaustive exploration." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting leaderboard_mode", + "output": "leaderboard mode refers to Control the automatic leaderboard mode: 'baseline': Explore exemplar set of models with baselines as reference.'random': Explore 10 random seeds for same setup. Useful since nature of genetic algorithm is noisy and repeats might get better results, or one can ensemble the custom individuals from such repeats.'line': Explore good model with all features and original features with all models. Useful as first exploration.'line_all': Like 'line', but enable all models and transformers possible instead of only what base experiment setup would have inferred.'product': Explore one-by-one Cartesian product of each model and transformer. Useful for exhaustive exploration." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "default_knob_offset_accuracy", + "output": "default knob offset accuracy refers to Allows control over default accuracy knob setting. If default models are too complex, set to -1 or -2, etc. If default models are not accurate enough, set to 1 or 2, etc. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "default_knob_offset_accuracy", + "output": "default knob offset accuracy refers to Offset for default accuracy knob: Allows control over default accuracy knob setting. If default models are too complex, set to -1 or -2, etc. If default models are not accurate enough, set to 1 or 2, etc. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "default knob offset accuracy", + "output": "default knob offset accuracy refers to Offset for default accuracy knob: Allows control over default accuracy knob setting. If default models are too complex, set to -1 or -2, etc. If default models are not accurate enough, set to 1 or 2, etc. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Offset for default accuracy knob: ", + "output": "default knob offset accuracy refers to Offset for default accuracy knob: Allows control over default accuracy knob setting. If default models are too complex, set to -1 or -2, etc. If default models are not accurate enough, set to 1 or 2, etc. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting default_knob_offset_accuracy", + "output": "default knob offset accuracy refers to Allows control over default accuracy knob setting. If default models are too complex, set to -1 or -2, etc. If default models are not accurate enough, set to 1 or 2, etc. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting default_knob_offset_accuracy", + "output": "default knob offset accuracy refers to Offset for default accuracy knob: Allows control over default accuracy knob setting. If default models are too complex, set to -1 or -2, etc. If default models are not accurate enough, set to 1 or 2, etc. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "default_knob_offset_time", + "output": "default knob offset time refers to Allows control over default time knob setting. If default experiments are too slow, set to -1 or -2, etc. If default experiments finish too fast, set to 1 or 2, etc. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "default_knob_offset_time", + "output": "default knob offset time refers to Offset for default time knob: Allows control over default time knob setting. If default experiments are too slow, set to -1 or -2, etc. If default experiments finish too fast, set to 1 or 2, etc. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "default knob offset time", + "output": "default knob offset time refers to Offset for default time knob: Allows control over default time knob setting. If default experiments are too slow, set to -1 or -2, etc. If default experiments finish too fast, set to 1 or 2, etc. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Offset for default time knob: ", + "output": "default knob offset time refers to Offset for default time knob: Allows control over default time knob setting. If default experiments are too slow, set to -1 or -2, etc. If default experiments finish too fast, set to 1 or 2, etc. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting default_knob_offset_time", + "output": "default knob offset time refers to Allows control over default time knob setting. If default experiments are too slow, set to -1 or -2, etc. If default experiments finish too fast, set to 1 or 2, etc. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting default_knob_offset_time", + "output": "default knob offset time refers to Offset for default time knob: Allows control over default time knob setting. If default experiments are too slow, set to -1 or -2, etc. If default experiments finish too fast, set to 1 or 2, etc. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "default_knob_offset_interpretability", + "output": "default knob offset interpretability refers to Allows control over default interpretability knob setting. If default models are too simple, set to -1 or -2, etc. If default models are too complex, set to 1 or 2, etc. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "default_knob_offset_interpretability", + "output": "default knob offset interpretability refers to Offset for default interpretability knob: Allows control over default interpretability knob setting. If default models are too simple, set to -1 or -2, etc. If default models are too complex, set to 1 or 2, etc. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "default knob offset interpretability", + "output": "default knob offset interpretability refers to Offset for default interpretability knob: Allows control over default interpretability knob setting. If default models are too simple, set to -1 or -2, etc. If default models are too complex, set to 1 or 2, etc. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Offset for default interpretability knob: ", + "output": "default knob offset interpretability refers to Offset for default interpretability knob: Allows control over default interpretability knob setting. If default models are too simple, set to -1 or -2, etc. If default models are too complex, set to 1 or 2, etc. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting default_knob_offset_interpretability", + "output": "default knob offset interpretability refers to Allows control over default interpretability knob setting. If default models are too simple, set to -1 or -2, etc. If default models are too complex, set to 1 or 2, etc. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting default_knob_offset_interpretability", + "output": "default knob offset interpretability refers to Offset for default interpretability knob: Allows control over default interpretability knob setting. If default models are too simple, set to -1 or -2, etc. If default models are too complex, set to 1 or 2, etc. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "shift_check_text", + "output": "shift check text refers to Whether to enable checking text for shift, currently only via label encoding." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "shift_check_text", + "output": "shift check text refers to Whether to enable checking text for shift, currently only via label encoding." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "shift check text", + "output": "shift check text refers to Whether to enable checking text for shift, currently only via label encoding." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "shift check text refers to Whether to enable checking text for shift, currently only via label encoding." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting shift_check_text", + "output": "shift check text refers to Whether to enable checking text for shift, currently only via label encoding." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting shift_check_text", + "output": "shift check text refers to Whether to enable checking text for shift, currently only via label encoding." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "use_rf_for_shift_if_have_lgbm", + "output": "use rf for shift if have lgbm refers to Whether to use LightGBM random forest mode without early stopping for shift detection." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "use_rf_for_shift_if_have_lgbm", + "output": "use rf for shift if have lgbm refers to Whether to use LightGBM random forest mode without early stopping for shift detection." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "use rf for shift if have lgbm", + "output": "use rf for shift if have lgbm refers to Whether to use LightGBM random forest mode without early stopping for shift detection." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "use rf for shift if have lgbm refers to Whether to use LightGBM random forest mode without early stopping for shift detection." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting use_rf_for_shift_if_have_lgbm", + "output": "use rf for shift if have lgbm refers to Whether to use LightGBM random forest mode without early stopping for shift detection." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting use_rf_for_shift_if_have_lgbm", + "output": "use rf for shift if have lgbm refers to Whether to use LightGBM random forest mode without early stopping for shift detection." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "shift_key_features_varimp", + "output": "shift key features varimp refers to Normalized training variable importance above which to check the feature for shift Useful to avoid checking likely unimportant features" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "shift_key_features_varimp", + "output": "shift key features varimp refers to Normalized training variable importance above which to check the feature for shift Useful to avoid checking likely unimportant features" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "shift key features varimp", + "output": "shift key features varimp refers to Normalized training variable importance above which to check the feature for shift Useful to avoid checking likely unimportant features" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "shift key features varimp refers to Normalized training variable importance above which to check the feature for shift Useful to avoid checking likely unimportant features" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting shift_key_features_varimp", + "output": "shift key features varimp refers to Normalized training variable importance above which to check the feature for shift Useful to avoid checking likely unimportant features" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting shift_key_features_varimp", + "output": "shift key features varimp refers to Normalized training variable importance above which to check the feature for shift Useful to avoid checking likely unimportant features" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "shift_check_reduced_features", + "output": "shift check reduced features refers to Whether to only check certain features based upon the value of shift_key_features_varimp" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "shift_check_reduced_features", + "output": "shift check reduced features refers to Whether to only check certain features based upon the value of shift_key_features_varimp" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "shift check reduced features", + "output": "shift check reduced features refers to Whether to only check certain features based upon the value of shift_key_features_varimp" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "shift check reduced features refers to Whether to only check certain features based upon the value of shift_key_features_varimp" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting shift_check_reduced_features", + "output": "shift check reduced features refers to Whether to only check certain features based upon the value of shift_key_features_varimp" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting shift_check_reduced_features", + "output": "shift check reduced features refers to Whether to only check certain features based upon the value of shift_key_features_varimp" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "shift_trees", + "output": "shift trees refers to Number of trees to use to train model to check shift in distribution No larger than max_nestimators" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "shift_trees", + "output": "shift trees refers to Number of trees to use to train model to check shift in distribution No larger than max_nestimators" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "shift trees", + "output": "shift trees refers to Number of trees to use to train model to check shift in distribution No larger than max_nestimators" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "shift trees refers to Number of trees to use to train model to check shift in distribution No larger than max_nestimators" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting shift_trees", + "output": "shift trees refers to Number of trees to use to train model to check shift in distribution No larger than max_nestimators" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting shift_trees", + "output": "shift trees refers to Number of trees to use to train model to check shift in distribution No larger than max_nestimators" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "shift_max_bin", + "output": "shift max bin refers to The value of max_bin to use for trees to use to train model to check shift in distribution" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "shift_max_bin", + "output": "shift max bin refers to The value of max_bin to use for trees to use to train model to check shift in distribution" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "shift max bin", + "output": "shift max bin refers to The value of max_bin to use for trees to use to train model to check shift in distribution" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "shift max bin refers to The value of max_bin to use for trees to use to train model to check shift in distribution" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting shift_max_bin", + "output": "shift max bin refers to The value of max_bin to use for trees to use to train model to check shift in distribution" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting shift_max_bin", + "output": "shift max bin refers to The value of max_bin to use for trees to use to train model to check shift in distribution" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "shift_min_max_depth", + "output": "shift min max depth refers to The min. value of max_depth to use for trees to use to train model to check shift in distribution" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "shift_min_max_depth", + "output": "shift min max depth refers to The min. value of max_depth to use for trees to use to train model to check shift in distribution" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "shift min max depth", + "output": "shift min max depth refers to The min. value of max_depth to use for trees to use to train model to check shift in distribution" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "shift min max depth refers to The min. value of max_depth to use for trees to use to train model to check shift in distribution" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting shift_min_max_depth", + "output": "shift min max depth refers to The min. value of max_depth to use for trees to use to train model to check shift in distribution" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting shift_min_max_depth", + "output": "shift min max depth refers to The min. value of max_depth to use for trees to use to train model to check shift in distribution" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "shift_max_max_depth", + "output": "shift max max depth refers to The max. value of max_depth to use for trees to use to train model to check shift in distribution" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "shift_max_max_depth", + "output": "shift max max depth refers to The max. value of max_depth to use for trees to use to train model to check shift in distribution" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "shift max max depth", + "output": "shift max max depth refers to The max. value of max_depth to use for trees to use to train model to check shift in distribution" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "shift max max depth refers to The max. value of max_depth to use for trees to use to train model to check shift in distribution" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting shift_max_max_depth", + "output": "shift max max depth refers to The max. value of max_depth to use for trees to use to train model to check shift in distribution" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting shift_max_max_depth", + "output": "shift max max depth refers to The max. value of max_depth to use for trees to use to train model to check shift in distribution" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "detect_features_distribution_shift_threshold_auc", + "output": "detect features distribution shift threshold auc refers to If distribution shift detection is enabled, show features for which shift AUC is above this value (AUC of a binary classifier that predicts whether given feature value belongs to train or test data)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "detect_features_distribution_shift_threshold_auc", + "output": "detect features distribution shift threshold auc refers to If distribution shift detection is enabled, show features for which shift AUC is above this value (AUC of a binary classifier that predicts whether given feature value belongs to train or test data)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "detect features distribution shift threshold auc", + "output": "detect features distribution shift threshold auc refers to If distribution shift detection is enabled, show features for which shift AUC is above this value (AUC of a binary classifier that predicts whether given feature value belongs to train or test data)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "detect features distribution shift threshold auc refers to If distribution shift detection is enabled, show features for which shift AUC is above this value (AUC of a binary classifier that predicts whether given feature value belongs to train or test data)" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting detect_features_distribution_shift_threshold_auc", + "output": "detect features distribution shift threshold auc refers to If distribution shift detection is enabled, show features for which shift AUC is above this value (AUC of a binary classifier that predicts whether given feature value belongs to train or test data)" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting detect_features_distribution_shift_threshold_auc", + "output": "detect features distribution shift threshold auc refers to If distribution shift detection is enabled, show features for which shift AUC is above this value (AUC of a binary classifier that predicts whether given feature value belongs to train or test data)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "drop_features_distribution_shift_min_features", + "output": "drop features distribution shift min features refers to Minimum number of features to keep, keeping least shifted feature at least if 1" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "drop_features_distribution_shift_min_features", + "output": "drop features distribution shift min features refers to Minimum number of features to keep, keeping least shifted feature at least if 1" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "drop features distribution shift min features", + "output": "drop features distribution shift min features refers to Minimum number of features to keep, keeping least shifted feature at least if 1" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "drop features distribution shift min features refers to Minimum number of features to keep, keeping least shifted feature at least if 1" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting drop_features_distribution_shift_min_features", + "output": "drop features distribution shift min features refers to Minimum number of features to keep, keeping least shifted feature at least if 1" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting drop_features_distribution_shift_min_features", + "output": "drop features distribution shift min features refers to Minimum number of features to keep, keeping least shifted feature at least if 1" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "shift_high_notification_level", + "output": "shift high notification level refers to Shift beyond which shows HIGH notification, else MEDIUM" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "shift_high_notification_level", + "output": "shift high notification level refers to Shift beyond which shows HIGH notification, else MEDIUM" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "shift high notification level", + "output": "shift high notification level refers to Shift beyond which shows HIGH notification, else MEDIUM" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "shift high notification level refers to Shift beyond which shows HIGH notification, else MEDIUM" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting shift_high_notification_level", + "output": "shift high notification level refers to Shift beyond which shows HIGH notification, else MEDIUM" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting shift_high_notification_level", + "output": "shift high notification level refers to Shift beyond which shows HIGH notification, else MEDIUM" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "leakage_check_text", + "output": "leakage check text refers to Whether to enable checking text for leakage, currently only via label encoding." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "leakage_check_text", + "output": "leakage check text refers to Whether to enable checking text for leakage, currently only via label encoding." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "leakage check text", + "output": "leakage check text refers to Whether to enable checking text for leakage, currently only via label encoding." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "leakage check text refers to Whether to enable checking text for leakage, currently only via label encoding." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting leakage_check_text", + "output": "leakage check text refers to Whether to enable checking text for leakage, currently only via label encoding." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting leakage_check_text", + "output": "leakage check text refers to Whether to enable checking text for leakage, currently only via label encoding." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "leakage_key_features_varimp", + "output": "leakage key features varimp refers to Normalized training variable importance (per 1 minus AUC/R2 to control for leaky varimp dominance) above which to check the feature for leakage Useful to avoid checking likely unimportant features" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "leakage_key_features_varimp", + "output": "leakage key features varimp refers to Normalized training variable importance (per 1 minus AUC/R2 to control for leaky varimp dominance) above which to check the feature for leakage Useful to avoid checking likely unimportant features" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "leakage key features varimp", + "output": "leakage key features varimp refers to Normalized training variable importance (per 1 minus AUC/R2 to control for leaky varimp dominance) above which to check the feature for leakage Useful to avoid checking likely unimportant features" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "leakage key features varimp refers to Normalized training variable importance (per 1 minus AUC/R2 to control for leaky varimp dominance) above which to check the feature for leakage Useful to avoid checking likely unimportant features" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting leakage_key_features_varimp", + "output": "leakage key features varimp refers to Normalized training variable importance (per 1 minus AUC/R2 to control for leaky varimp dominance) above which to check the feature for leakage Useful to avoid checking likely unimportant features" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting leakage_key_features_varimp", + "output": "leakage key features varimp refers to Normalized training variable importance (per 1 minus AUC/R2 to control for leaky varimp dominance) above which to check the feature for leakage Useful to avoid checking likely unimportant features" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "leakage_key_features_varimp_if_no_early_stopping", + "output": "leakage key features varimp if no early stopping refers to Like leakage_key_features_varimp, but applies if early stopping disabled when can trust multiple leaks to get uniform varimp." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "leakage_key_features_varimp_if_no_early_stopping", + "output": "leakage key features varimp if no early stopping refers to Like leakage_key_features_varimp, but applies if early stopping disabled when can trust multiple leaks to get uniform varimp." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "leakage key features varimp if no early stopping", + "output": "leakage key features varimp if no early stopping refers to Like leakage_key_features_varimp, but applies if early stopping disabled when can trust multiple leaks to get uniform varimp." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "leakage key features varimp if no early stopping refers to Like leakage_key_features_varimp, but applies if early stopping disabled when can trust multiple leaks to get uniform varimp." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting leakage_key_features_varimp_if_no_early_stopping", + "output": "leakage key features varimp if no early stopping refers to Like leakage_key_features_varimp, but applies if early stopping disabled when can trust multiple leaks to get uniform varimp." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting leakage_key_features_varimp_if_no_early_stopping", + "output": "leakage key features varimp if no early stopping refers to Like leakage_key_features_varimp, but applies if early stopping disabled when can trust multiple leaks to get uniform varimp." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "leakage_check_reduced_features", + "output": "leakage check reduced features refers to Whether to only check certain features based upon the value of leakage_key_features_varimp. If any feature has AUC near 1, will consume all variable importance, even if another feature is also leaky. So False is safest option, but True generally good if many columns." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "leakage_check_reduced_features", + "output": "leakage check reduced features refers to Whether to only check certain features based upon the value of leakage_key_features_varimp. If any feature has AUC near 1, will consume all variable importance, even if another feature is also leaky. So False is safest option, but True generally good if many columns." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "leakage check reduced features", + "output": "leakage check reduced features refers to Whether to only check certain features based upon the value of leakage_key_features_varimp. If any feature has AUC near 1, will consume all variable importance, even if another feature is also leaky. So False is safest option, but True generally good if many columns." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "leakage check reduced features refers to Whether to only check certain features based upon the value of leakage_key_features_varimp. If any feature has AUC near 1, will consume all variable importance, even if another feature is also leaky. So False is safest option, but True generally good if many columns." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting leakage_check_reduced_features", + "output": "leakage check reduced features refers to Whether to only check certain features based upon the value of leakage_key_features_varimp. If any feature has AUC near 1, will consume all variable importance, even if another feature is also leaky. So False is safest option, but True generally good if many columns." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting leakage_check_reduced_features", + "output": "leakage check reduced features refers to Whether to only check certain features based upon the value of leakage_key_features_varimp. If any feature has AUC near 1, will consume all variable importance, even if another feature is also leaky. So False is safest option, but True generally good if many columns." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "use_rf_for_leakage_if_have_lgbm", + "output": "use rf for leakage if have lgbm refers to Whether to use LightGBM random forest mode without early stopping for leakage detection." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "use_rf_for_leakage_if_have_lgbm", + "output": "use rf for leakage if have lgbm refers to Whether to use LightGBM random forest mode without early stopping for leakage detection." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "use rf for leakage if have lgbm", + "output": "use rf for leakage if have lgbm refers to Whether to use LightGBM random forest mode without early stopping for leakage detection." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "use rf for leakage if have lgbm refers to Whether to use LightGBM random forest mode without early stopping for leakage detection." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting use_rf_for_leakage_if_have_lgbm", + "output": "use rf for leakage if have lgbm refers to Whether to use LightGBM random forest mode without early stopping for leakage detection." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting use_rf_for_leakage_if_have_lgbm", + "output": "use rf for leakage if have lgbm refers to Whether to use LightGBM random forest mode without early stopping for leakage detection." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "leakage_trees", + "output": "leakage trees refers to Number of trees to use to train model to check for leakage No larger than max_nestimators" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "leakage_trees", + "output": "leakage trees refers to Number of trees to use to train model to check for leakage No larger than max_nestimators" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "leakage trees", + "output": "leakage trees refers to Number of trees to use to train model to check for leakage No larger than max_nestimators" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "leakage trees refers to Number of trees to use to train model to check for leakage No larger than max_nestimators" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting leakage_trees", + "output": "leakage trees refers to Number of trees to use to train model to check for leakage No larger than max_nestimators" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting leakage_trees", + "output": "leakage trees refers to Number of trees to use to train model to check for leakage No larger than max_nestimators" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "leakage_max_bin", + "output": "leakage max bin refers to The value of max_bin to use for trees to use to train model to check for leakage" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "leakage_max_bin", + "output": "leakage max bin refers to The value of max_bin to use for trees to use to train model to check for leakage" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "leakage max bin", + "output": "leakage max bin refers to The value of max_bin to use for trees to use to train model to check for leakage" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "leakage max bin refers to The value of max_bin to use for trees to use to train model to check for leakage" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting leakage_max_bin", + "output": "leakage max bin refers to The value of max_bin to use for trees to use to train model to check for leakage" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting leakage_max_bin", + "output": "leakage max bin refers to The value of max_bin to use for trees to use to train model to check for leakage" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "leakage_min_max_depth", + "output": "leakage min max depth refers to The value of max_depth to use for trees to use to train model to check for leakage" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "leakage_min_max_depth", + "output": "leakage min max depth refers to The value of max_depth to use for trees to use to train model to check for leakage" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "leakage min max depth", + "output": "leakage min max depth refers to The value of max_depth to use for trees to use to train model to check for leakage" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "leakage min max depth refers to The value of max_depth to use for trees to use to train model to check for leakage" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting leakage_min_max_depth", + "output": "leakage min max depth refers to The value of max_depth to use for trees to use to train model to check for leakage" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting leakage_min_max_depth", + "output": "leakage min max depth refers to The value of max_depth to use for trees to use to train model to check for leakage" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "leakage_max_max_depth", + "output": "leakage max max depth refers to The value of max_depth to use for trees to use to train model to check for leakage" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "leakage_max_max_depth", + "output": "leakage max max depth refers to The value of max_depth to use for trees to use to train model to check for leakage" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "leakage max max depth", + "output": "leakage max max depth refers to The value of max_depth to use for trees to use to train model to check for leakage" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "leakage max max depth refers to The value of max_depth to use for trees to use to train model to check for leakage" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting leakage_max_max_depth", + "output": "leakage max max depth refers to The value of max_depth to use for trees to use to train model to check for leakage" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting leakage_max_max_depth", + "output": "leakage max max depth refers to The value of max_depth to use for trees to use to train model to check for leakage" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "detect_features_leakage_threshold_auc", + "output": "detect features leakage threshold auc refers to When leakage detection is enabled, if AUC (R2 for regression) on original data (label-encoded)is above or equal to this value, then trigger per-feature leakage detection " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "detect_features_leakage_threshold_auc", + "output": "detect features leakage threshold auc refers to Leakage feature detection AUC/R2 threshold: When leakage detection is enabled, if AUC (R2 for regression) on original data (label-encoded)is above or equal to this value, then trigger per-feature leakage detection " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "detect features leakage threshold auc", + "output": "detect features leakage threshold auc refers to Leakage feature detection AUC/R2 threshold: When leakage detection is enabled, if AUC (R2 for regression) on original data (label-encoded)is above or equal to this value, then trigger per-feature leakage detection " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Leakage feature detection AUC/R2 threshold: ", + "output": "detect features leakage threshold auc refers to Leakage feature detection AUC/R2 threshold: When leakage detection is enabled, if AUC (R2 for regression) on original data (label-encoded)is above or equal to this value, then trigger per-feature leakage detection " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting detect_features_leakage_threshold_auc", + "output": "detect features leakage threshold auc refers to When leakage detection is enabled, if AUC (R2 for regression) on original data (label-encoded)is above or equal to this value, then trigger per-feature leakage detection " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting detect_features_leakage_threshold_auc", + "output": "detect features leakage threshold auc refers to Leakage feature detection AUC/R2 threshold: When leakage detection is enabled, if AUC (R2 for regression) on original data (label-encoded)is above or equal to this value, then trigger per-feature leakage detection " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "detect_features_per_feature_leakage_threshold_auc", + "output": "detect features per feature leakage threshold auc refers to When leakage detection is enabled, show features for which AUC (R2 for regression,for whether that predictor/feature alone predicts the target) is above or equal to this value.Feature is dropped if AUC/R2 is above or equal to drop_features_leakage_threshold_auc " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "detect_features_per_feature_leakage_threshold_auc", + "output": "detect features per feature leakage threshold auc refers to Leakage features per feature detection AUC/R2 threshold: When leakage detection is enabled, show features for which AUC (R2 for regression,for whether that predictor/feature alone predicts the target) is above or equal to this value.Feature is dropped if AUC/R2 is above or equal to drop_features_leakage_threshold_auc " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "detect features per feature leakage threshold auc", + "output": "detect features per feature leakage threshold auc refers to Leakage features per feature detection AUC/R2 threshold: When leakage detection is enabled, show features for which AUC (R2 for regression,for whether that predictor/feature alone predicts the target) is above or equal to this value.Feature is dropped if AUC/R2 is above or equal to drop_features_leakage_threshold_auc " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Leakage features per feature detection AUC/R2 threshold: ", + "output": "detect features per feature leakage threshold auc refers to Leakage features per feature detection AUC/R2 threshold: When leakage detection is enabled, show features for which AUC (R2 for regression,for whether that predictor/feature alone predicts the target) is above or equal to this value.Feature is dropped if AUC/R2 is above or equal to drop_features_leakage_threshold_auc " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting detect_features_per_feature_leakage_threshold_auc", + "output": "detect features per feature leakage threshold auc refers to When leakage detection is enabled, show features for which AUC (R2 for regression,for whether that predictor/feature alone predicts the target) is above or equal to this value.Feature is dropped if AUC/R2 is above or equal to drop_features_leakage_threshold_auc " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting detect_features_per_feature_leakage_threshold_auc", + "output": "detect features per feature leakage threshold auc refers to Leakage features per feature detection AUC/R2 threshold: When leakage detection is enabled, show features for which AUC (R2 for regression,for whether that predictor/feature alone predicts the target) is above or equal to this value.Feature is dropped if AUC/R2 is above or equal to drop_features_leakage_threshold_auc " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "drop_features_leakage_min_features", + "output": "drop features leakage min features refers to Minimum number of features to keep, keeping least leakage feature at least if 1" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "drop_features_leakage_min_features", + "output": "drop features leakage min features refers to Minimum number of features to keep, keeping least leakage feature at least if 1" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "drop features leakage min features", + "output": "drop features leakage min features refers to Minimum number of features to keep, keeping least leakage feature at least if 1" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "drop features leakage min features refers to Minimum number of features to keep, keeping least leakage feature at least if 1" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting drop_features_leakage_min_features", + "output": "drop features leakage min features refers to Minimum number of features to keep, keeping least leakage feature at least if 1" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting drop_features_leakage_min_features", + "output": "drop features leakage min features refers to Minimum number of features to keep, keeping least leakage feature at least if 1" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "leakage_train_test_split", + "output": "leakage train test split refers to Ratio of train to validation holdout when testing for leakage" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "leakage_train_test_split", + "output": "leakage train test split refers to Ratio of train to validation holdout when testing for leakage" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "leakage train test split", + "output": "leakage train test split refers to Ratio of train to validation holdout when testing for leakage" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "leakage train test split refers to Ratio of train to validation holdout when testing for leakage" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting leakage_train_test_split", + "output": "leakage train test split refers to Ratio of train to validation holdout when testing for leakage" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting leakage_train_test_split", + "output": "leakage train test split refers to Ratio of train to validation holdout when testing for leakage" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "detailed_traces", + "output": "detailed traces refers to Whether to enable detailed traces (in GUI Trace)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "detailed_traces", + "output": "detailed traces refers to Enable detailed traces: Whether to enable detailed traces (in GUI Trace)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "detailed traces", + "output": "detailed traces refers to Enable detailed traces: Whether to enable detailed traces (in GUI Trace)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Enable detailed traces: ", + "output": "detailed traces refers to Enable detailed traces: Whether to enable detailed traces (in GUI Trace)" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting detailed_traces", + "output": "detailed traces refers to Whether to enable detailed traces (in GUI Trace)" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting detailed_traces", + "output": "detailed traces refers to Enable detailed traces: Whether to enable detailed traces (in GUI Trace)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "debug_log", + "output": "debug log refers to Whether to enable debug log level (in log files)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "debug_log", + "output": "debug log refers to Enable debug log level: Whether to enable debug log level (in log files)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "debug log", + "output": "debug log refers to Enable debug log level: Whether to enable debug log level (in log files)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Enable debug log level: ", + "output": "debug log refers to Enable debug log level: Whether to enable debug log level (in log files)" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting debug_log", + "output": "debug log refers to Whether to enable debug log level (in log files)" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting debug_log", + "output": "debug log refers to Enable debug log level: Whether to enable debug log level (in log files)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "log_system_info_per_experiment", + "output": "log system info per experiment refers to Whether to add logging of system information such as CPU, GPU, disk space at the start of each experiment log. Same information is already logged in system logs." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "log_system_info_per_experiment", + "output": "log system info per experiment refers to Enable logging of system information for each experiment: Whether to add logging of system information such as CPU, GPU, disk space at the start of each experiment log. Same information is already logged in system logs." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "log system info per experiment", + "output": "log system info per experiment refers to Enable logging of system information for each experiment: Whether to add logging of system information such as CPU, GPU, disk space at the start of each experiment log. Same information is already logged in system logs." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Enable logging of system information for each experiment: ", + "output": "log system info per experiment refers to Enable logging of system information for each experiment: Whether to add logging of system information such as CPU, GPU, disk space at the start of each experiment log. Same information is already logged in system logs." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting log_system_info_per_experiment", + "output": "log system info per experiment refers to Whether to add logging of system information such as CPU, GPU, disk space at the start of each experiment log. Same information is already logged in system logs." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting log_system_info_per_experiment", + "output": "log system info per experiment refers to Enable logging of system information for each experiment: Whether to add logging of system information such as CPU, GPU, disk space at the start of each experiment log. Same information is already logged in system logs." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "check_system", + "output": "check system refers to Whether to check system installation on server startup: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "check_system", + "output": "check system refers to Whether to check system installation on server startup: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "check system", + "output": "check system refers to Whether to check system installation on server startup: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Whether to check system installation on server startup: ", + "output": "check system refers to Whether to check system installation on server startup: " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting check_system", + "output": "check system refers to Whether to check system installation on server startup: " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting check_system", + "output": "check system refers to Whether to check system installation on server startup: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "check_system_basic", + "output": "check system basic refers to Whether to report basic system information on server startup: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "check_system_basic", + "output": "check system basic refers to Whether to report basic system information on server startup: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "check system basic", + "output": "check system basic refers to Whether to report basic system information on server startup: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Whether to report basic system information on server startup: ", + "output": "check system basic refers to Whether to report basic system information on server startup: " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting check_system_basic", + "output": "check system basic refers to Whether to report basic system information on server startup: " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting check_system_basic", + "output": "check system basic refers to Whether to report basic system information on server startup: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "abs_tol_for_perfect_score", + "output": "abs tol for perfect score refers to How close to the optimal value (usually 1 or 0) does the validation score need to be to be considered perfect (to stop the experiment)?" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "abs_tol_for_perfect_score", + "output": "abs tol for perfect score refers to How close to the optimal value (usually 1 or 0) does the validation score need to be to be considered perfect (to stop the experiment)?" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "abs tol for perfect score", + "output": "abs tol for perfect score refers to How close to the optimal value (usually 1 or 0) does the validation score need to be to be considered perfect (to stop the experiment)?" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "abs tol for perfect score refers to How close to the optimal value (usually 1 or 0) does the validation score need to be to be considered perfect (to stop the experiment)?" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting abs_tol_for_perfect_score", + "output": "abs tol for perfect score refers to How close to the optimal value (usually 1 or 0) does the validation score need to be to be considered perfect (to stop the experiment)?" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting abs_tol_for_perfect_score", + "output": "abs tol for perfect score refers to How close to the optimal value (usually 1 or 0) does the validation score need to be to be considered perfect (to stop the experiment)?" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "data_ingest_timeout", + "output": "data ingest timeout refers to Timeout in seconds to wait for data ingestion." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "data_ingest_timeout", + "output": "data ingest timeout refers to Timeout in seconds to wait for data ingestion." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "data ingest timeout", + "output": "data ingest timeout refers to Timeout in seconds to wait for data ingestion." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "data ingest timeout refers to Timeout in seconds to wait for data ingestion." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting data_ingest_timeout", + "output": "data ingest timeout refers to Timeout in seconds to wait for data ingestion." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting data_ingest_timeout", + "output": "data ingest timeout refers to Timeout in seconds to wait for data ingestion." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mutate_timeout", + "output": "mutate timeout refers to How many seconds to allow mutate to take, nominally only takes few seconds at most. But on busy system doing many individuals, might take longer. Optuna sometimes live lock hangs in scipy random distribution maker." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mutate_timeout", + "output": "mutate timeout refers to How many seconds to allow mutate to take, nominally only takes few seconds at most. But on busy system doing many individuals, might take longer. Optuna sometimes live lock hangs in scipy random distribution maker." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mutate timeout", + "output": "mutate timeout refers to How many seconds to allow mutate to take, nominally only takes few seconds at most. But on busy system doing many individuals, might take longer. Optuna sometimes live lock hangs in scipy random distribution maker." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "mutate timeout refers to How many seconds to allow mutate to take, nominally only takes few seconds at most. But on busy system doing many individuals, might take longer. Optuna sometimes live lock hangs in scipy random distribution maker." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting mutate_timeout", + "output": "mutate timeout refers to How many seconds to allow mutate to take, nominally only takes few seconds at most. But on busy system doing many individuals, might take longer. Optuna sometimes live lock hangs in scipy random distribution maker." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting mutate_timeout", + "output": "mutate timeout refers to How many seconds to allow mutate to take, nominally only takes few seconds at most. But on busy system doing many individuals, might take longer. Optuna sometimes live lock hangs in scipy random distribution maker." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "gpu_locking_trust_pool_submission", + "output": "gpu locking trust pool submission refers to Whether to trust GPU locking for submission of GPU jobs to limit memory usage. If False, then wait for as GPU submissions to be less than number of GPUs, even if later jobs could be purely CPU jobs that did not need to wait. Only applicable if not restricting number of GPUs via num_gpus_per_experiment, else have to use resources instead of relying upon locking. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "gpu_locking_trust_pool_submission", + "output": "gpu locking trust pool submission refers to Whether to trust GPU locking for submission of GPU jobs to limit memory usage. If False, then wait for as GPU submissions to be less than number of GPUs, even if later jobs could be purely CPU jobs that did not need to wait. Only applicable if not restricting number of GPUs via num_gpus_per_experiment, else have to use resources instead of relying upon locking. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "gpu locking trust pool submission", + "output": "gpu locking trust pool submission refers to Whether to trust GPU locking for submission of GPU jobs to limit memory usage. If False, then wait for as GPU submissions to be less than number of GPUs, even if later jobs could be purely CPU jobs that did not need to wait. Only applicable if not restricting number of GPUs via num_gpus_per_experiment, else have to use resources instead of relying upon locking. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "gpu locking trust pool submission refers to Whether to trust GPU locking for submission of GPU jobs to limit memory usage. If False, then wait for as GPU submissions to be less than number of GPUs, even if later jobs could be purely CPU jobs that did not need to wait. Only applicable if not restricting number of GPUs via num_gpus_per_experiment, else have to use resources instead of relying upon locking. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting gpu_locking_trust_pool_submission", + "output": "gpu locking trust pool submission refers to Whether to trust GPU locking for submission of GPU jobs to limit memory usage. If False, then wait for as GPU submissions to be less than number of GPUs, even if later jobs could be purely CPU jobs that did not need to wait. Only applicable if not restricting number of GPUs via num_gpus_per_experiment, else have to use resources instead of relying upon locking. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting gpu_locking_trust_pool_submission", + "output": "gpu locking trust pool submission refers to Whether to trust GPU locking for submission of GPU jobs to limit memory usage. If False, then wait for as GPU submissions to be less than number of GPUs, even if later jobs could be purely CPU jobs that did not need to wait. Only applicable if not restricting number of GPUs via num_gpus_per_experiment, else have to use resources instead of relying upon locking. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "gpu_locking_free_dead", + "output": "gpu locking free dead refers to Whether to steal GPU locks when process is neither on GPU PID list nor using CPU resources at all (e.g. sleeping). Only steal from multi-GPU locks that are incomplete. Prevents deadlocks in case multi-GPU model hangs." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "gpu_locking_free_dead", + "output": "gpu locking free dead refers to Whether to steal GPU locks when process is neither on GPU PID list nor using CPU resources at all (e.g. sleeping). Only steal from multi-GPU locks that are incomplete. Prevents deadlocks in case multi-GPU model hangs." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "gpu locking free dead", + "output": "gpu locking free dead refers to Whether to steal GPU locks when process is neither on GPU PID list nor using CPU resources at all (e.g. sleeping). Only steal from multi-GPU locks that are incomplete. Prevents deadlocks in case multi-GPU model hangs." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "gpu locking free dead refers to Whether to steal GPU locks when process is neither on GPU PID list nor using CPU resources at all (e.g. sleeping). Only steal from multi-GPU locks that are incomplete. Prevents deadlocks in case multi-GPU model hangs." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting gpu_locking_free_dead", + "output": "gpu locking free dead refers to Whether to steal GPU locks when process is neither on GPU PID list nor using CPU resources at all (e.g. sleeping). Only steal from multi-GPU locks that are incomplete. Prevents deadlocks in case multi-GPU model hangs." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting gpu_locking_free_dead", + "output": "gpu locking free dead refers to Whether to steal GPU locks when process is neither on GPU PID list nor using CPU resources at all (e.g. sleeping). Only steal from multi-GPU locks that are incomplete. Prevents deadlocks in case multi-GPU model hangs." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "log_predict_info", + "output": "log predict info refers to Whether to show detailed predict information in logs.: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "log_predict_info", + "output": "log predict info refers to Whether to show detailed predict information in logs.: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "log predict info", + "output": "log predict info refers to Whether to show detailed predict information in logs.: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Whether to show detailed predict information in logs.: ", + "output": "log predict info refers to Whether to show detailed predict information in logs.: " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting log_predict_info", + "output": "log predict info refers to Whether to show detailed predict information in logs.: " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting log_predict_info", + "output": "log predict info refers to Whether to show detailed predict information in logs.: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "log_fit_info", + "output": "log fit info refers to Whether to show detailed fit information in logs.: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "log_fit_info", + "output": "log fit info refers to Whether to show detailed fit information in logs.: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "log fit info", + "output": "log fit info refers to Whether to show detailed fit information in logs.: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Whether to show detailed fit information in logs.: ", + "output": "log fit info refers to Whether to show detailed fit information in logs.: " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting log_fit_info", + "output": "log fit info refers to Whether to show detailed fit information in logs.: " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting log_fit_info", + "output": "log fit info refers to Whether to show detailed fit information in logs.: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "stalled_time_kill_ref", + "output": "stalled time kill ref refers to Amount of time to stall (in seconds) before killing the job (assumes it hung). Reference time is scaled by train data shape of rows * cols to get used stalled_time_kill" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "stalled_time_kill_ref", + "output": "stalled time kill ref refers to Amount of time to stall (in seconds) before killing the job (assumes it hung). Reference time is scaled by train data shape of rows * cols to get used stalled_time_kill" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "stalled time kill ref", + "output": "stalled time kill ref refers to Amount of time to stall (in seconds) before killing the job (assumes it hung). Reference time is scaled by train data shape of rows * cols to get used stalled_time_kill" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "stalled time kill ref refers to Amount of time to stall (in seconds) before killing the job (assumes it hung). Reference time is scaled by train data shape of rows * cols to get used stalled_time_kill" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting stalled_time_kill_ref", + "output": "stalled time kill ref refers to Amount of time to stall (in seconds) before killing the job (assumes it hung). Reference time is scaled by train data shape of rows * cols to get used stalled_time_kill" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting stalled_time_kill_ref", + "output": "stalled time kill ref refers to Amount of time to stall (in seconds) before killing the job (assumes it hung). Reference time is scaled by train data shape of rows * cols to get used stalled_time_kill" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "long_time_psdump", + "output": "long time psdump refers to Amount of time between checks for some process taking long time, every cycle full process list will be dumped to console or experiment logs if possible." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "long_time_psdump", + "output": "long time psdump refers to Amount of time between checks for some process taking long time, every cycle full process list will be dumped to console or experiment logs if possible." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "long time psdump", + "output": "long time psdump refers to Amount of time between checks for some process taking long time, every cycle full process list will be dumped to console or experiment logs if possible." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "long time psdump refers to Amount of time between checks for some process taking long time, every cycle full process list will be dumped to console or experiment logs if possible." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting long_time_psdump", + "output": "long time psdump refers to Amount of time between checks for some process taking long time, every cycle full process list will be dumped to console or experiment logs if possible." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting long_time_psdump", + "output": "long time psdump refers to Amount of time between checks for some process taking long time, every cycle full process list will be dumped to console or experiment logs if possible." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "do_psdump", + "output": "do psdump refers to Whether to dump ps every long_time_psdump" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "do_psdump", + "output": "do psdump refers to Whether to dump ps every long_time_psdump" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "do psdump", + "output": "do psdump refers to Whether to dump ps every long_time_psdump" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "do psdump refers to Whether to dump ps every long_time_psdump" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting do_psdump", + "output": "do psdump refers to Whether to dump ps every long_time_psdump" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting do_psdump", + "output": "do psdump refers to Whether to dump ps every long_time_psdump" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "livelock_signal", + "output": "livelock signal refers to Whether to check every long_time_psdump seconds and SIGUSR1 to all children to see where maybe stuck or taking long time." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "livelock_signal", + "output": "livelock signal refers to Whether to check every long_time_psdump seconds and SIGUSR1 to all children to see where maybe stuck or taking long time." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "livelock signal", + "output": "livelock signal refers to Whether to check every long_time_psdump seconds and SIGUSR1 to all children to see where maybe stuck or taking long time." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "livelock signal refers to Whether to check every long_time_psdump seconds and SIGUSR1 to all children to see where maybe stuck or taking long time." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting livelock_signal", + "output": "livelock signal refers to Whether to check every long_time_psdump seconds and SIGUSR1 to all children to see where maybe stuck or taking long time." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting livelock_signal", + "output": "livelock signal refers to Whether to check every long_time_psdump seconds and SIGUSR1 to all children to see where maybe stuck or taking long time." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "num_cpu_sockets_override", + "output": "num cpu sockets override refers to Value to override number of sockets, in case DAIs determination is wrong, for non-trivial systems. 0 means auto." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "num_cpu_sockets_override", + "output": "num cpu sockets override refers to Value to override number of sockets, in case DAIs determination is wrong, for non-trivial systems. 0 means auto." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "num cpu sockets override", + "output": "num cpu sockets override refers to Value to override number of sockets, in case DAIs determination is wrong, for non-trivial systems. 0 means auto." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "num cpu sockets override refers to Value to override number of sockets, in case DAIs determination is wrong, for non-trivial systems. 0 means auto." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting num_cpu_sockets_override", + "output": "num cpu sockets override refers to Value to override number of sockets, in case DAIs determination is wrong, for non-trivial systems. 0 means auto." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting num_cpu_sockets_override", + "output": "num cpu sockets override refers to Value to override number of sockets, in case DAIs determination is wrong, for non-trivial systems. 0 means auto." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "num_gpus_override", + "output": "num gpus override refers to Value to override number of GPUs, in case DAIs determination is wrong, for non-trivial systems. -1 means auto.Can also set min_num_cores_per_gpu=-1 to allowany number of GPUs for each experiment regardlessof number of cores." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "num_gpus_override", + "output": "num gpus override refers to Value to override number of GPUs, in case DAIs determination is wrong, for non-trivial systems. -1 means auto.Can also set min_num_cores_per_gpu=-1 to allowany number of GPUs for each experiment regardlessof number of cores." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "num gpus override", + "output": "num gpus override refers to Value to override number of GPUs, in case DAIs determination is wrong, for non-trivial systems. -1 means auto.Can also set min_num_cores_per_gpu=-1 to allowany number of GPUs for each experiment regardlessof number of cores." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "num gpus override refers to Value to override number of GPUs, in case DAIs determination is wrong, for non-trivial systems. -1 means auto.Can also set min_num_cores_per_gpu=-1 to allowany number of GPUs for each experiment regardlessof number of cores." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting num_gpus_override", + "output": "num gpus override refers to Value to override number of GPUs, in case DAIs determination is wrong, for non-trivial systems. -1 means auto.Can also set min_num_cores_per_gpu=-1 to allowany number of GPUs for each experiment regardlessof number of cores." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting num_gpus_override", + "output": "num gpus override refers to Value to override number of GPUs, in case DAIs determination is wrong, for non-trivial systems. -1 means auto.Can also set min_num_cores_per_gpu=-1 to allowany number of GPUs for each experiment regardlessof number of cores." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "show_gpu_usage_only_if_locked", + "output": "show gpu usage only if locked refers to Whether to show GPU usage only when locking. 'auto' means 'on' if num_gpus_override is different than actual total visible GPUs, else it means 'off'" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "show_gpu_usage_only_if_locked", + "output": "show gpu usage only if locked refers to Whether to show GPU usage only when locking. 'auto' means 'on' if num_gpus_override is different than actual total visible GPUs, else it means 'off'" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "show gpu usage only if locked", + "output": "show gpu usage only if locked refers to Whether to show GPU usage only when locking. 'auto' means 'on' if num_gpus_override is different than actual total visible GPUs, else it means 'off'" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "show gpu usage only if locked refers to Whether to show GPU usage only when locking. 'auto' means 'on' if num_gpus_override is different than actual total visible GPUs, else it means 'off'" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting show_gpu_usage_only_if_locked", + "output": "show gpu usage only if locked refers to Whether to show GPU usage only when locking. 'auto' means 'on' if num_gpus_override is different than actual total visible GPUs, else it means 'off'" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting show_gpu_usage_only_if_locked", + "output": "show gpu usage only if locked refers to Whether to show GPU usage only when locking. 'auto' means 'on' if num_gpus_override is different than actual total visible GPUs, else it means 'off'" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "show_inapplicable_models_preview", + "output": "show inapplicable models preview refers to Show inapplicable models in preview, to be sure not missing models one could have used" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "show_inapplicable_models_preview", + "output": "show inapplicable models preview refers to Show inapplicable models in preview, to be sure not missing models one could have used" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "show inapplicable models preview", + "output": "show inapplicable models preview refers to Show inapplicable models in preview, to be sure not missing models one could have used" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "show inapplicable models preview refers to Show inapplicable models in preview, to be sure not missing models one could have used" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting show_inapplicable_models_preview", + "output": "show inapplicable models preview refers to Show inapplicable models in preview, to be sure not missing models one could have used" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting show_inapplicable_models_preview", + "output": "show inapplicable models preview refers to Show inapplicable models in preview, to be sure not missing models one could have used" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "show_inapplicable_transformers_preview", + "output": "show inapplicable transformers preview refers to Show inapplicable transformers in preview, to be sure not missing transformers one could have used" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "show_inapplicable_transformers_preview", + "output": "show inapplicable transformers preview refers to Show inapplicable transformers in preview, to be sure not missing transformers one could have used" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "show inapplicable transformers preview", + "output": "show inapplicable transformers preview refers to Show inapplicable transformers in preview, to be sure not missing transformers one could have used" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "show inapplicable transformers preview refers to Show inapplicable transformers in preview, to be sure not missing transformers one could have used" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting show_inapplicable_transformers_preview", + "output": "show inapplicable transformers preview refers to Show inapplicable transformers in preview, to be sure not missing transformers one could have used" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting show_inapplicable_transformers_preview", + "output": "show inapplicable transformers preview refers to Show inapplicable transformers in preview, to be sure not missing transformers one could have used" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "show_warnings_preview", + "output": "show warnings preview refers to Show warnings for models (image auto, Dask multinode/multi-GPU) if conditions are met to use but not chosen to avoid missing models that could benefit accuracy/performance" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "show_warnings_preview", + "output": "show warnings preview refers to Show warnings for models (image auto, Dask multinode/multi-GPU) if conditions are met to use but not chosen to avoid missing models that could benefit accuracy/performance" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "show warnings preview", + "output": "show warnings preview refers to Show warnings for models (image auto, Dask multinode/multi-GPU) if conditions are met to use but not chosen to avoid missing models that could benefit accuracy/performance" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "show warnings preview refers to Show warnings for models (image auto, Dask multinode/multi-GPU) if conditions are met to use but not chosen to avoid missing models that could benefit accuracy/performance" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting show_warnings_preview", + "output": "show warnings preview refers to Show warnings for models (image auto, Dask multinode/multi-GPU) if conditions are met to use but not chosen to avoid missing models that could benefit accuracy/performance" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting show_warnings_preview", + "output": "show warnings preview refers to Show warnings for models (image auto, Dask multinode/multi-GPU) if conditions are met to use but not chosen to avoid missing models that could benefit accuracy/performance" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "show_warnings_preview_unused_map_features", + "output": "show warnings preview unused map features refers to Show warnings for models that have no transformers for certain features." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "show_warnings_preview_unused_map_features", + "output": "show warnings preview unused map features refers to Show warnings for models that have no transformers for certain features." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "show warnings preview unused map features", + "output": "show warnings preview unused map features refers to Show warnings for models that have no transformers for certain features." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "show warnings preview unused map features refers to Show warnings for models that have no transformers for certain features." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting show_warnings_preview_unused_map_features", + "output": "show warnings preview unused map features refers to Show warnings for models that have no transformers for certain features." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting show_warnings_preview_unused_map_features", + "output": "show warnings preview unused map features refers to Show warnings for models that have no transformers for certain features." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max_cols_show_unused_features", + "output": "max cols show unused features refers to Up to how many input features to determine, during GUI/client preview, unused features. Too many slows preview down." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max_cols_show_unused_features", + "output": "max cols show unused features refers to Up to how many input features to determine, during GUI/client preview, unused features. Too many slows preview down." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max cols show unused features", + "output": "max cols show unused features refers to Up to how many input features to determine, during GUI/client preview, unused features. Too many slows preview down." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "max cols show unused features refers to Up to how many input features to determine, during GUI/client preview, unused features. Too many slows preview down." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting max_cols_show_unused_features", + "output": "max cols show unused features refers to Up to how many input features to determine, during GUI/client preview, unused features. Too many slows preview down." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting max_cols_show_unused_features", + "output": "max cols show unused features refers to Up to how many input features to determine, during GUI/client preview, unused features. Too many slows preview down." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max_cols_show_feature_transformer_mapping", + "output": "max cols show feature transformer mapping refers to Up to how many input features to show transformers used for each input feature." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max_cols_show_feature_transformer_mapping", + "output": "max cols show feature transformer mapping refers to Up to how many input features to show transformers used for each input feature." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max cols show feature transformer mapping", + "output": "max cols show feature transformer mapping refers to Up to how many input features to show transformers used for each input feature." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "max cols show feature transformer mapping refers to Up to how many input features to show transformers used for each input feature." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting max_cols_show_feature_transformer_mapping", + "output": "max cols show feature transformer mapping refers to Up to how many input features to show transformers used for each input feature." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting max_cols_show_feature_transformer_mapping", + "output": "max cols show feature transformer mapping refers to Up to how many input features to show transformers used for each input feature." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "warning_unused_feature_show_max", + "output": "warning unused feature show max refers to Up to how many input features to show, in preview, that are unused features." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "warning_unused_feature_show_max", + "output": "warning unused feature show max refers to Up to how many input features to show, in preview, that are unused features." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "warning unused feature show max", + "output": "warning unused feature show max refers to Up to how many input features to show, in preview, that are unused features." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "warning unused feature show max refers to Up to how many input features to show, in preview, that are unused features." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting warning_unused_feature_show_max", + "output": "warning unused feature show max refers to Up to how many input features to show, in preview, that are unused features." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting warning_unused_feature_show_max", + "output": "warning unused feature show max refers to Up to how many input features to show, in preview, that are unused features." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "interaction_finder_gini_rel_improvement_threshold", + "output": "interaction finder gini rel improvement threshold refers to Required GINI relative improvement for InteractionTransformer. If GINI is not better than this relative improvement compared to original features considered in the interaction, then the interaction is not returned. If noisy data, and no clear signal in interactions but still want interactions, then can decrease this number." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "interaction_finder_gini_rel_improvement_threshold", + "output": "interaction finder gini rel improvement threshold refers to Required GINI relative improvement for Interactions: Required GINI relative improvement for InteractionTransformer. If GINI is not better than this relative improvement compared to original features considered in the interaction, then the interaction is not returned. If noisy data, and no clear signal in interactions but still want interactions, then can decrease this number." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "interaction finder gini rel improvement threshold", + "output": "interaction finder gini rel improvement threshold refers to Required GINI relative improvement for Interactions: Required GINI relative improvement for InteractionTransformer. If GINI is not better than this relative improvement compared to original features considered in the interaction, then the interaction is not returned. If noisy data, and no clear signal in interactions but still want interactions, then can decrease this number." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Required GINI relative improvement for Interactions: ", + "output": "interaction finder gini rel improvement threshold refers to Required GINI relative improvement for Interactions: Required GINI relative improvement for InteractionTransformer. If GINI is not better than this relative improvement compared to original features considered in the interaction, then the interaction is not returned. If noisy data, and no clear signal in interactions but still want interactions, then can decrease this number." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting interaction_finder_gini_rel_improvement_threshold", + "output": "interaction finder gini rel improvement threshold refers to Required GINI relative improvement for InteractionTransformer. If GINI is not better than this relative improvement compared to original features considered in the interaction, then the interaction is not returned. If noisy data, and no clear signal in interactions but still want interactions, then can decrease this number." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting interaction_finder_gini_rel_improvement_threshold", + "output": "interaction finder gini rel improvement threshold refers to Required GINI relative improvement for Interactions: Required GINI relative improvement for InteractionTransformer. If GINI is not better than this relative improvement compared to original features considered in the interaction, then the interaction is not returned. If noisy data, and no clear signal in interactions but still want interactions, then can decrease this number." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "interaction_finder_return_limit", + "output": "interaction finder return limit refers to Number of transformed Interactions to make as best out of many generated trial interactions." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "interaction_finder_return_limit", + "output": "interaction finder return limit refers to Number of transformed Interactions to make: Number of transformed Interactions to make as best out of many generated trial interactions." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "interaction finder return limit", + "output": "interaction finder return limit refers to Number of transformed Interactions to make: Number of transformed Interactions to make as best out of many generated trial interactions." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Number of transformed Interactions to make: ", + "output": "interaction finder return limit refers to Number of transformed Interactions to make: Number of transformed Interactions to make as best out of many generated trial interactions." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting interaction_finder_return_limit", + "output": "interaction finder return limit refers to Number of transformed Interactions to make as best out of many generated trial interactions." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting interaction_finder_return_limit", + "output": "interaction finder return limit refers to Number of transformed Interactions to make: Number of transformed Interactions to make as best out of many generated trial interactions." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable_bootstrap", + "output": "enable bootstrap refers to Whether to enable bootstrap sampling. Provides error bars to validation and test scores based on the standard error of the bootstrap mean." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable_bootstrap", + "output": "enable bootstrap refers to Whether to enable bootstrap sampling for validation and test scores.: Whether to enable bootstrap sampling. Provides error bars to validation and test scores based on the standard error of the bootstrap mean." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable bootstrap", + "output": "enable bootstrap refers to Whether to enable bootstrap sampling for validation and test scores.: Whether to enable bootstrap sampling. Provides error bars to validation and test scores based on the standard error of the bootstrap mean." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Whether to enable bootstrap sampling for validation and test scores.: ", + "output": "enable bootstrap refers to Whether to enable bootstrap sampling for validation and test scores.: Whether to enable bootstrap sampling. Provides error bars to validation and test scores based on the standard error of the bootstrap mean." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting enable_bootstrap", + "output": "enable bootstrap refers to Whether to enable bootstrap sampling. Provides error bars to validation and test scores based on the standard error of the bootstrap mean." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting enable_bootstrap", + "output": "enable bootstrap refers to Whether to enable bootstrap sampling for validation and test scores.: Whether to enable bootstrap sampling. Provides error bars to validation and test scores based on the standard error of the bootstrap mean." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "min_bootstrap_samples", + "output": "min bootstrap samples refers to Minimum number of bootstrap samples to use for estimating score and its standard deviation Actual number of bootstrap samples will vary between the min and max, depending upon row count (more rows, fewer samples) and accuracy settings (higher accuracy, more samples) " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "min_bootstrap_samples", + "output": "min bootstrap samples refers to Minimum number of bootstrap samples: Minimum number of bootstrap samples to use for estimating score and its standard deviation Actual number of bootstrap samples will vary between the min and max, depending upon row count (more rows, fewer samples) and accuracy settings (higher accuracy, more samples) " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "min bootstrap samples", + "output": "min bootstrap samples refers to Minimum number of bootstrap samples: Minimum number of bootstrap samples to use for estimating score and its standard deviation Actual number of bootstrap samples will vary between the min and max, depending upon row count (more rows, fewer samples) and accuracy settings (higher accuracy, more samples) " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Minimum number of bootstrap samples: ", + "output": "min bootstrap samples refers to Minimum number of bootstrap samples: Minimum number of bootstrap samples to use for estimating score and its standard deviation Actual number of bootstrap samples will vary between the min and max, depending upon row count (more rows, fewer samples) and accuracy settings (higher accuracy, more samples) " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting min_bootstrap_samples", + "output": "min bootstrap samples refers to Minimum number of bootstrap samples to use for estimating score and its standard deviation Actual number of bootstrap samples will vary between the min and max, depending upon row count (more rows, fewer samples) and accuracy settings (higher accuracy, more samples) " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting min_bootstrap_samples", + "output": "min bootstrap samples refers to Minimum number of bootstrap samples: Minimum number of bootstrap samples to use for estimating score and its standard deviation Actual number of bootstrap samples will vary between the min and max, depending upon row count (more rows, fewer samples) and accuracy settings (higher accuracy, more samples) " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max_bootstrap_samples", + "output": "max bootstrap samples refers to Maximum number of bootstrap samples to use for estimating score and its standard deviation Actual number of bootstrap samples will vary between the min and max, depending upon row count (more rows, fewer samples) and accuracy settings (higher accuracy, more samples) " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max_bootstrap_samples", + "output": "max bootstrap samples refers to Maximum number of bootstrap samples: Maximum number of bootstrap samples to use for estimating score and its standard deviation Actual number of bootstrap samples will vary between the min and max, depending upon row count (more rows, fewer samples) and accuracy settings (higher accuracy, more samples) " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max bootstrap samples", + "output": "max bootstrap samples refers to Maximum number of bootstrap samples: Maximum number of bootstrap samples to use for estimating score and its standard deviation Actual number of bootstrap samples will vary between the min and max, depending upon row count (more rows, fewer samples) and accuracy settings (higher accuracy, more samples) " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Maximum number of bootstrap samples: ", + "output": "max bootstrap samples refers to Maximum number of bootstrap samples: Maximum number of bootstrap samples to use for estimating score and its standard deviation Actual number of bootstrap samples will vary between the min and max, depending upon row count (more rows, fewer samples) and accuracy settings (higher accuracy, more samples) " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting max_bootstrap_samples", + "output": "max bootstrap samples refers to Maximum number of bootstrap samples to use for estimating score and its standard deviation Actual number of bootstrap samples will vary between the min and max, depending upon row count (more rows, fewer samples) and accuracy settings (higher accuracy, more samples) " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting max_bootstrap_samples", + "output": "max bootstrap samples refers to Maximum number of bootstrap samples: Maximum number of bootstrap samples to use for estimating score and its standard deviation Actual number of bootstrap samples will vary between the min and max, depending upon row count (more rows, fewer samples) and accuracy settings (higher accuracy, more samples) " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "min_bootstrap_sample_size_factor", + "output": "min bootstrap sample size factor refers to Minimum fraction of row size to take as sample size for bootstrap estimator Actual sample size used for bootstrap estimate will vary between the min and max, depending upon row count (more rows, smaller sample size) and accuracy settings (higher accuracy, larger sample size) " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "min_bootstrap_sample_size_factor", + "output": "min bootstrap sample size factor refers to Minimum fraction of rows to use for bootstrap samples: Minimum fraction of row size to take as sample size for bootstrap estimator Actual sample size used for bootstrap estimate will vary between the min and max, depending upon row count (more rows, smaller sample size) and accuracy settings (higher accuracy, larger sample size) " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "min bootstrap sample size factor", + "output": "min bootstrap sample size factor refers to Minimum fraction of rows to use for bootstrap samples: Minimum fraction of row size to take as sample size for bootstrap estimator Actual sample size used for bootstrap estimate will vary between the min and max, depending upon row count (more rows, smaller sample size) and accuracy settings (higher accuracy, larger sample size) " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Minimum fraction of rows to use for bootstrap samples: ", + "output": "min bootstrap sample size factor refers to Minimum fraction of rows to use for bootstrap samples: Minimum fraction of row size to take as sample size for bootstrap estimator Actual sample size used for bootstrap estimate will vary between the min and max, depending upon row count (more rows, smaller sample size) and accuracy settings (higher accuracy, larger sample size) " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting min_bootstrap_sample_size_factor", + "output": "min bootstrap sample size factor refers to Minimum fraction of row size to take as sample size for bootstrap estimator Actual sample size used for bootstrap estimate will vary between the min and max, depending upon row count (more rows, smaller sample size) and accuracy settings (higher accuracy, larger sample size) " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting min_bootstrap_sample_size_factor", + "output": "min bootstrap sample size factor refers to Minimum fraction of rows to use for bootstrap samples: Minimum fraction of row size to take as sample size for bootstrap estimator Actual sample size used for bootstrap estimate will vary between the min and max, depending upon row count (more rows, smaller sample size) and accuracy settings (higher accuracy, larger sample size) " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max_bootstrap_sample_size_factor", + "output": "max bootstrap sample size factor refers to Maximum fraction of row size to take as sample size for bootstrap estimator Actual sample size used for bootstrap estimate will vary between the min and max, depending upon row count (more rows, smaller sample size) and accuracy settings (higher accuracy, larger sample size) " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max_bootstrap_sample_size_factor", + "output": "max bootstrap sample size factor refers to Maximum fraction of rows to use for bootstrap samples: Maximum fraction of row size to take as sample size for bootstrap estimator Actual sample size used for bootstrap estimate will vary between the min and max, depending upon row count (more rows, smaller sample size) and accuracy settings (higher accuracy, larger sample size) " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max bootstrap sample size factor", + "output": "max bootstrap sample size factor refers to Maximum fraction of rows to use for bootstrap samples: Maximum fraction of row size to take as sample size for bootstrap estimator Actual sample size used for bootstrap estimate will vary between the min and max, depending upon row count (more rows, smaller sample size) and accuracy settings (higher accuracy, larger sample size) " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Maximum fraction of rows to use for bootstrap samples: ", + "output": "max bootstrap sample size factor refers to Maximum fraction of rows to use for bootstrap samples: Maximum fraction of row size to take as sample size for bootstrap estimator Actual sample size used for bootstrap estimate will vary between the min and max, depending upon row count (more rows, smaller sample size) and accuracy settings (higher accuracy, larger sample size) " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting max_bootstrap_sample_size_factor", + "output": "max bootstrap sample size factor refers to Maximum fraction of row size to take as sample size for bootstrap estimator Actual sample size used for bootstrap estimate will vary between the min and max, depending upon row count (more rows, smaller sample size) and accuracy settings (higher accuracy, larger sample size) " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting max_bootstrap_sample_size_factor", + "output": "max bootstrap sample size factor refers to Maximum fraction of rows to use for bootstrap samples: Maximum fraction of row size to take as sample size for bootstrap estimator Actual sample size used for bootstrap estimate will vary between the min and max, depending upon row count (more rows, smaller sample size) and accuracy settings (higher accuracy, larger sample size) " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "bootstrap_final_seed", + "output": "bootstrap final seed refers to Seed to use for final model bootstrap sampling, -1 means use experiment-derived seed. E.g. one can retrain final model with different seed to get different final model error bars for scores. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "bootstrap_final_seed", + "output": "bootstrap final seed refers to Seed to use for final model bootstrap sampling: Seed to use for final model bootstrap sampling, -1 means use experiment-derived seed. E.g. one can retrain final model with different seed to get different final model error bars for scores. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "bootstrap final seed", + "output": "bootstrap final seed refers to Seed to use for final model bootstrap sampling: Seed to use for final model bootstrap sampling, -1 means use experiment-derived seed. E.g. one can retrain final model with different seed to get different final model error bars for scores. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Seed to use for final model bootstrap sampling: ", + "output": "bootstrap final seed refers to Seed to use for final model bootstrap sampling: Seed to use for final model bootstrap sampling, -1 means use experiment-derived seed. E.g. one can retrain final model with different seed to get different final model error bars for scores. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting bootstrap_final_seed", + "output": "bootstrap final seed refers to Seed to use for final model bootstrap sampling, -1 means use experiment-derived seed. E.g. one can retrain final model with different seed to get different final model error bars for scores. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting bootstrap_final_seed", + "output": "bootstrap final seed refers to Seed to use for final model bootstrap sampling: Seed to use for final model bootstrap sampling, -1 means use experiment-derived seed. E.g. one can retrain final model with different seed to get different final model error bars for scores. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "benford_mad_threshold_int", + "output": "benford mad threshold int refers to Benford's law: mean absolute deviance threshold equal and above which integer valued columns are treated as categoricals too" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "benford_mad_threshold_int", + "output": "benford mad threshold int refers to Benford's law: mean absolute deviance threshold equal and above which integer valued columns are treated as categoricals too" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "benford mad threshold int", + "output": "benford mad threshold int refers to Benford's law: mean absolute deviance threshold equal and above which integer valued columns are treated as categoricals too" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "benford mad threshold int refers to Benford's law: mean absolute deviance threshold equal and above which integer valued columns are treated as categoricals too" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting benford_mad_threshold_int", + "output": "benford mad threshold int refers to Benford's law: mean absolute deviance threshold equal and above which integer valued columns are treated as categoricals too" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting benford_mad_threshold_int", + "output": "benford mad threshold int refers to Benford's law: mean absolute deviance threshold equal and above which integer valued columns are treated as categoricals too" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "benford_mad_threshold_real", + "output": "benford mad threshold real refers to Benford's law: mean absolute deviance threshold equal and above which real valued columns are treated as categoricals too" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "benford_mad_threshold_real", + "output": "benford mad threshold real refers to Benford's law: mean absolute deviance threshold equal and above which real valued columns are treated as categoricals too" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "benford mad threshold real", + "output": "benford mad threshold real refers to Benford's law: mean absolute deviance threshold equal and above which real valued columns are treated as categoricals too" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "benford mad threshold real refers to Benford's law: mean absolute deviance threshold equal and above which real valued columns are treated as categoricals too" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting benford_mad_threshold_real", + "output": "benford mad threshold real refers to Benford's law: mean absolute deviance threshold equal and above which real valued columns are treated as categoricals too" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting benford_mad_threshold_real", + "output": "benford mad threshold real refers to Benford's law: mean absolute deviance threshold equal and above which real valued columns are treated as categoricals too" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "varimp_threshold_at_interpretability_10", + "output": "varimp threshold at interpretability 10 refers to Variable importance below which feature is dropped (with possible replacement found that is better) This also sets overall scale for lower interpretability settings. Set to lower value if ok with many weak features despite choosing high interpretability, or if see drop in performance due to the need for weak features. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "varimp_threshold_at_interpretability_10", + "output": "varimp threshold at interpretability 10 refers to Lowest allowed variable importance at interpretability 10: Variable importance below which feature is dropped (with possible replacement found that is better) This also sets overall scale for lower interpretability settings. Set to lower value if ok with many weak features despite choosing high interpretability, or if see drop in performance due to the need for weak features. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "varimp threshold at interpretability 10", + "output": "varimp threshold at interpretability 10 refers to Lowest allowed variable importance at interpretability 10: Variable importance below which feature is dropped (with possible replacement found that is better) This also sets overall scale for lower interpretability settings. Set to lower value if ok with many weak features despite choosing high interpretability, or if see drop in performance due to the need for weak features. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Lowest allowed variable importance at interpretability 10: ", + "output": "varimp threshold at interpretability 10 refers to Lowest allowed variable importance at interpretability 10: Variable importance below which feature is dropped (with possible replacement found that is better) This also sets overall scale for lower interpretability settings. Set to lower value if ok with many weak features despite choosing high interpretability, or if see drop in performance due to the need for weak features. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting varimp_threshold_at_interpretability_10", + "output": "varimp threshold at interpretability 10 refers to Variable importance below which feature is dropped (with possible replacement found that is better) This also sets overall scale for lower interpretability settings. Set to lower value if ok with many weak features despite choosing high interpretability, or if see drop in performance due to the need for weak features. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting varimp_threshold_at_interpretability_10", + "output": "varimp threshold at interpretability 10 refers to Lowest allowed variable importance at interpretability 10: Variable importance below which feature is dropped (with possible replacement found that is better) This also sets overall scale for lower interpretability settings. Set to lower value if ok with many weak features despite choosing high interpretability, or if see drop in performance due to the need for weak features. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "allow_stabilize_varimp_for_ts", + "output": "allow stabilize varimp for ts refers to Whether to avoid setting stabilize_varimp=false and stabilize_fs=false for time series experiments." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "allow_stabilize_varimp_for_ts", + "output": "allow stabilize varimp for ts refers to Whether to allow stabilization of features using variable importance for time-series: Whether to avoid setting stabilize_varimp=false and stabilize_fs=false for time series experiments." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "allow stabilize varimp for ts", + "output": "allow stabilize varimp for ts refers to Whether to allow stabilization of features using variable importance for time-series: Whether to avoid setting stabilize_varimp=false and stabilize_fs=false for time series experiments." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Whether to allow stabilization of features using variable importance for time-series: ", + "output": "allow stabilize varimp for ts refers to Whether to allow stabilization of features using variable importance for time-series: Whether to avoid setting stabilize_varimp=false and stabilize_fs=false for time series experiments." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting allow_stabilize_varimp_for_ts", + "output": "allow stabilize varimp for ts refers to Whether to avoid setting stabilize_varimp=false and stabilize_fs=false for time series experiments." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting allow_stabilize_varimp_for_ts", + "output": "allow stabilize varimp for ts refers to Whether to allow stabilization of features using variable importance for time-series: Whether to avoid setting stabilize_varimp=false and stabilize_fs=false for time series experiments." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "stabilize_varimp", + "output": "stabilize varimp refers to Variable importance is used by genetic algorithm to decide which features are useful, so this can stabilize the feature selection by the genetic algorithm. This is by default disabled for time series experiments, which can have real diverse behavior in each split. But in some cases feature selection is improved in presence of highly shifted variables that are not handled by lag transformers and one can set allow_stabilize_varimp_for_ts=true. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "stabilize_varimp", + "output": "stabilize varimp refers to Whether to take minimum (True) or mean (False) of variable importance when have multiple folds/repeats.: Variable importance is used by genetic algorithm to decide which features are useful, so this can stabilize the feature selection by the genetic algorithm. This is by default disabled for time series experiments, which can have real diverse behavior in each split. But in some cases feature selection is improved in presence of highly shifted variables that are not handled by lag transformers and one can set allow_stabilize_varimp_for_ts=true. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "stabilize varimp", + "output": "stabilize varimp refers to Whether to take minimum (True) or mean (False) of variable importance when have multiple folds/repeats.: Variable importance is used by genetic algorithm to decide which features are useful, so this can stabilize the feature selection by the genetic algorithm. This is by default disabled for time series experiments, which can have real diverse behavior in each split. But in some cases feature selection is improved in presence of highly shifted variables that are not handled by lag transformers and one can set allow_stabilize_varimp_for_ts=true. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Whether to take minimum (True) or mean (False) of variable importance when have multiple folds/repeats.: ", + "output": "stabilize varimp refers to Whether to take minimum (True) or mean (False) of variable importance when have multiple folds/repeats.: Variable importance is used by genetic algorithm to decide which features are useful, so this can stabilize the feature selection by the genetic algorithm. This is by default disabled for time series experiments, which can have real diverse behavior in each split. But in some cases feature selection is improved in presence of highly shifted variables that are not handled by lag transformers and one can set allow_stabilize_varimp_for_ts=true. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting stabilize_varimp", + "output": "stabilize varimp refers to Variable importance is used by genetic algorithm to decide which features are useful, so this can stabilize the feature selection by the genetic algorithm. This is by default disabled for time series experiments, which can have real diverse behavior in each split. But in some cases feature selection is improved in presence of highly shifted variables that are not handled by lag transformers and one can set allow_stabilize_varimp_for_ts=true. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting stabilize_varimp", + "output": "stabilize varimp refers to Whether to take minimum (True) or mean (False) of variable importance when have multiple folds/repeats.: Variable importance is used by genetic algorithm to decide which features are useful, so this can stabilize the feature selection by the genetic algorithm. This is by default disabled for time series experiments, which can have real diverse behavior in each split. But in some cases feature selection is improved in presence of highly shifted variables that are not handled by lag transformers and one can set allow_stabilize_varimp_for_ts=true. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "stabilize_fs", + "output": "stabilize fs refers to Whether to take minimum (True) or mean (False) of delta improvement in score when aggregating feature selection scores across multiple folds/depths. Delta improvement of score corresponds to original metric minus metric of shuffled feature frame if maximizing metric, and corresponds to negative of such a score difference if minimizing. Feature selection by permutation importance considers the change in score after shuffling a feature, and using minimum operation ignores optimistic scores in favor of pessimistic scores when aggregating over folds. Note, if using tree methods, multiple depths may be fitted, in which case regardless of this toml setting, only features that are kept for all depths are kept by feature selection. If interpretability >= config toml value of fs_data_vary_for_interpretability, then half data (or setting of fs_data_frac) is used as another fit, in which case regardless of this toml setting, only features that are kept for all data sizes are kept by feature selection. Note: This is disabled for small data since arbitrary slices of small data can lead to disjoint features being important and only aggregated average behavior has signal. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "stabilize_fs", + "output": "stabilize fs refers to Whether to take minimum (True) or mean (False) of delta improvement in score when aggregating feature selection scores across multiple folds/depths.: Whether to take minimum (True) or mean (False) of delta improvement in score when aggregating feature selection scores across multiple folds/depths. Delta improvement of score corresponds to original metric minus metric of shuffled feature frame if maximizing metric, and corresponds to negative of such a score difference if minimizing. Feature selection by permutation importance considers the change in score after shuffling a feature, and using minimum operation ignores optimistic scores in favor of pessimistic scores when aggregating over folds. Note, if using tree methods, multiple depths may be fitted, in which case regardless of this toml setting, only features that are kept for all depths are kept by feature selection. If interpretability >= config toml value of fs_data_vary_for_interpretability, then half data (or setting of fs_data_frac) is used as another fit, in which case regardless of this toml setting, only features that are kept for all data sizes are kept by feature selection. Note: This is disabled for small data since arbitrary slices of small data can lead to disjoint features being important and only aggregated average behavior has signal. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "stabilize fs", + "output": "stabilize fs refers to Whether to take minimum (True) or mean (False) of delta improvement in score when aggregating feature selection scores across multiple folds/depths.: Whether to take minimum (True) or mean (False) of delta improvement in score when aggregating feature selection scores across multiple folds/depths. Delta improvement of score corresponds to original metric minus metric of shuffled feature frame if maximizing metric, and corresponds to negative of such a score difference if minimizing. Feature selection by permutation importance considers the change in score after shuffling a feature, and using minimum operation ignores optimistic scores in favor of pessimistic scores when aggregating over folds. Note, if using tree methods, multiple depths may be fitted, in which case regardless of this toml setting, only features that are kept for all depths are kept by feature selection. If interpretability >= config toml value of fs_data_vary_for_interpretability, then half data (or setting of fs_data_frac) is used as another fit, in which case regardless of this toml setting, only features that are kept for all data sizes are kept by feature selection. Note: This is disabled for small data since arbitrary slices of small data can lead to disjoint features being important and only aggregated average behavior has signal. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Whether to take minimum (True) or mean (False) of delta improvement in score when aggregating feature selection scores across multiple folds/depths.: ", + "output": "stabilize fs refers to Whether to take minimum (True) or mean (False) of delta improvement in score when aggregating feature selection scores across multiple folds/depths.: Whether to take minimum (True) or mean (False) of delta improvement in score when aggregating feature selection scores across multiple folds/depths. Delta improvement of score corresponds to original metric minus metric of shuffled feature frame if maximizing metric, and corresponds to negative of such a score difference if minimizing. Feature selection by permutation importance considers the change in score after shuffling a feature, and using minimum operation ignores optimistic scores in favor of pessimistic scores when aggregating over folds. Note, if using tree methods, multiple depths may be fitted, in which case regardless of this toml setting, only features that are kept for all depths are kept by feature selection. If interpretability >= config toml value of fs_data_vary_for_interpretability, then half data (or setting of fs_data_frac) is used as another fit, in which case regardless of this toml setting, only features that are kept for all data sizes are kept by feature selection. Note: This is disabled for small data since arbitrary slices of small data can lead to disjoint features being important and only aggregated average behavior has signal. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting stabilize_fs", + "output": "stabilize fs refers to Whether to take minimum (True) or mean (False) of delta improvement in score when aggregating feature selection scores across multiple folds/depths. Delta improvement of score corresponds to original metric minus metric of shuffled feature frame if maximizing metric, and corresponds to negative of such a score difference if minimizing. Feature selection by permutation importance considers the change in score after shuffling a feature, and using minimum operation ignores optimistic scores in favor of pessimistic scores when aggregating over folds. Note, if using tree methods, multiple depths may be fitted, in which case regardless of this toml setting, only features that are kept for all depths are kept by feature selection. If interpretability >= config toml value of fs_data_vary_for_interpretability, then half data (or setting of fs_data_frac) is used as another fit, in which case regardless of this toml setting, only features that are kept for all data sizes are kept by feature selection. Note: This is disabled for small data since arbitrary slices of small data can lead to disjoint features being important and only aggregated average behavior has signal. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting stabilize_fs", + "output": "stabilize fs refers to Whether to take minimum (True) or mean (False) of delta improvement in score when aggregating feature selection scores across multiple folds/depths.: Whether to take minimum (True) or mean (False) of delta improvement in score when aggregating feature selection scores across multiple folds/depths. Delta improvement of score corresponds to original metric minus metric of shuffled feature frame if maximizing metric, and corresponds to negative of such a score difference if minimizing. Feature selection by permutation importance considers the change in score after shuffling a feature, and using minimum operation ignores optimistic scores in favor of pessimistic scores when aggregating over folds. Note, if using tree methods, multiple depths may be fitted, in which case regardless of this toml setting, only features that are kept for all depths are kept by feature selection. If interpretability >= config toml value of fs_data_vary_for_interpretability, then half data (or setting of fs_data_frac) is used as another fit, in which case regardless of this toml setting, only features that are kept for all data sizes are kept by feature selection. Note: This is disabled for small data since arbitrary slices of small data can lead to disjoint features being important and only aggregated average behavior has signal. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "stabilize_features", + "output": "stabilize features refers to Whether final pipeline uses fixed features for some transformers that would normally perform search, such as InteractionsTransformer. Use what learned from tuning and evolution (True) or to freshly search for new features (False). This can give a more stable pipeline, especially for small data or when using interaction transformer as pretransformer in multi-layer pipeline. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "stabilize_features", + "output": "stabilize features refers to Use tuning-evolution search result for final model transformer.: Whether final pipeline uses fixed features for some transformers that would normally perform search, such as InteractionsTransformer. Use what learned from tuning and evolution (True) or to freshly search for new features (False). This can give a more stable pipeline, especially for small data or when using interaction transformer as pretransformer in multi-layer pipeline. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "stabilize features", + "output": "stabilize features refers to Use tuning-evolution search result for final model transformer.: Whether final pipeline uses fixed features for some transformers that would normally perform search, such as InteractionsTransformer. Use what learned from tuning and evolution (True) or to freshly search for new features (False). This can give a more stable pipeline, especially for small data or when using interaction transformer as pretransformer in multi-layer pipeline. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Use tuning-evolution search result for final model transformer.: ", + "output": "stabilize features refers to Use tuning-evolution search result for final model transformer.: Whether final pipeline uses fixed features for some transformers that would normally perform search, such as InteractionsTransformer. Use what learned from tuning and evolution (True) or to freshly search for new features (False). This can give a more stable pipeline, especially for small data or when using interaction transformer as pretransformer in multi-layer pipeline. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting stabilize_features", + "output": "stabilize features refers to Whether final pipeline uses fixed features for some transformers that would normally perform search, such as InteractionsTransformer. Use what learned from tuning and evolution (True) or to freshly search for new features (False). This can give a more stable pipeline, especially for small data or when using interaction transformer as pretransformer in multi-layer pipeline. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting stabilize_features", + "output": "stabilize features refers to Use tuning-evolution search result for final model transformer.: Whether final pipeline uses fixed features for some transformers that would normally perform search, such as InteractionsTransformer. Use what learned from tuning and evolution (True) or to freshly search for new features (False). This can give a more stable pipeline, especially for small data or when using interaction transformer as pretransformer in multi-layer pipeline. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable_rapids_transformers", + "output": "enable rapids transformers refers to Whether to enable GPU-based RAPIDS cuML transformers.If want to support Dask RAPIDS transformers, you must set enable_rapids_transformers_dask=true.No mojo support, but Python scoring is supported.In alpha testing status. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable_rapids_transformers", + "output": "enable rapids transformers refers to Whether to enable RAPIDS CUML GPU transformers (no mojo): Whether to enable GPU-based RAPIDS cuML transformers.If want to support Dask RAPIDS transformers, you must set enable_rapids_transformers_dask=true.No mojo support, but Python scoring is supported.In alpha testing status. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable rapids transformers", + "output": "enable rapids transformers refers to Whether to enable RAPIDS CUML GPU transformers (no mojo): Whether to enable GPU-based RAPIDS cuML transformers.If want to support Dask RAPIDS transformers, you must set enable_rapids_transformers_dask=true.No mojo support, but Python scoring is supported.In alpha testing status. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Whether to enable RAPIDS CUML GPU transformers (no mojo): ", + "output": "enable rapids transformers refers to Whether to enable RAPIDS CUML GPU transformers (no mojo): Whether to enable GPU-based RAPIDS cuML transformers.If want to support Dask RAPIDS transformers, you must set enable_rapids_transformers_dask=true.No mojo support, but Python scoring is supported.In alpha testing status. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting enable_rapids_transformers", + "output": "enable rapids transformers refers to Whether to enable GPU-based RAPIDS cuML transformers.If want to support Dask RAPIDS transformers, you must set enable_rapids_transformers_dask=true.No mojo support, but Python scoring is supported.In alpha testing status. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting enable_rapids_transformers", + "output": "enable rapids transformers refers to Whether to enable RAPIDS CUML GPU transformers (no mojo): Whether to enable GPU-based RAPIDS cuML transformers.If want to support Dask RAPIDS transformers, you must set enable_rapids_transformers_dask=true.No mojo support, but Python scoring is supported.In alpha testing status. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable_rapids_transformers_dask", + "output": "enable rapids transformers dask refers to Whether to enable Multi-GPU mode for capable RAPIDS cuML transformers.Must also set enable_rapids_transformers=true.No mojo support, but python scoring is supported.In alpha testing status. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable_rapids_transformers_dask", + "output": "enable rapids transformers dask refers to Whether to enable RAPIDS CUML GPU transformers to use Dask (no mojo): Whether to enable Multi-GPU mode for capable RAPIDS cuML transformers.Must also set enable_rapids_transformers=true.No mojo support, but python scoring is supported.In alpha testing status. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable rapids transformers dask", + "output": "enable rapids transformers dask refers to Whether to enable RAPIDS CUML GPU transformers to use Dask (no mojo): Whether to enable Multi-GPU mode for capable RAPIDS cuML transformers.Must also set enable_rapids_transformers=true.No mojo support, but python scoring is supported.In alpha testing status. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Whether to enable RAPIDS CUML GPU transformers to use Dask (no mojo): ", + "output": "enable rapids transformers dask refers to Whether to enable RAPIDS CUML GPU transformers to use Dask (no mojo): Whether to enable Multi-GPU mode for capable RAPIDS cuML transformers.Must also set enable_rapids_transformers=true.No mojo support, but python scoring is supported.In alpha testing status. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting enable_rapids_transformers_dask", + "output": "enable rapids transformers dask refers to Whether to enable Multi-GPU mode for capable RAPIDS cuML transformers.Must also set enable_rapids_transformers=true.No mojo support, but python scoring is supported.In alpha testing status. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting enable_rapids_transformers_dask", + "output": "enable rapids transformers dask refers to Whether to enable RAPIDS CUML GPU transformers to use Dask (no mojo): Whether to enable Multi-GPU mode for capable RAPIDS cuML transformers.Must also set enable_rapids_transformers=true.No mojo support, but python scoring is supported.In alpha testing status. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "fraction_std_bootstrap_ladder_factor", + "output": "fraction std bootstrap ladder factor refers to Factor of standard deviation of bootstrap scores by which to accept new model in genetic algorithm. Too small a fraction will lead to accepting new models easily even if no significant improvement in score, while too large a fraction will reject too many good models. Non-zero value is a bit risky when no folds are used in GA, because bootstrap score is only rough estimate of error.: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "fraction_std_bootstrap_ladder_factor", + "output": "fraction std bootstrap ladder factor refers to Factor of standard deviation of bootstrap scores by which to accept new model in genetic algorithm. Too small a fraction will lead to accepting new models easily even if no significant improvement in score, while too large a fraction will reject too many good models. Non-zero value is a bit risky when no folds are used in GA, because bootstrap score is only rough estimate of error.: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "fraction std bootstrap ladder factor", + "output": "fraction std bootstrap ladder factor refers to Factor of standard deviation of bootstrap scores by which to accept new model in genetic algorithm. Too small a fraction will lead to accepting new models easily even if no significant improvement in score, while too large a fraction will reject too many good models. Non-zero value is a bit risky when no folds are used in GA, because bootstrap score is only rough estimate of error.: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Factor of standard deviation of bootstrap scores by which to accept new model in genetic algorithm. Too small a fraction will lead to accepting new models easily even if no significant improvement in score, while too large a fraction will reject too many good models. Non-zero value is a bit risky when no folds are used in GA, because bootstrap score is only rough estimate of error.: ", + "output": "fraction std bootstrap ladder factor refers to Factor of standard deviation of bootstrap scores by which to accept new model in genetic algorithm. Too small a fraction will lead to accepting new models easily even if no significant improvement in score, while too large a fraction will reject too many good models. Non-zero value is a bit risky when no folds are used in GA, because bootstrap score is only rough estimate of error.: " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting fraction_std_bootstrap_ladder_factor", + "output": "fraction std bootstrap ladder factor refers to Factor of standard deviation of bootstrap scores by which to accept new model in genetic algorithm. Too small a fraction will lead to accepting new models easily even if no significant improvement in score, while too large a fraction will reject too many good models. Non-zero value is a bit risky when no folds are used in GA, because bootstrap score is only rough estimate of error.: " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting fraction_std_bootstrap_ladder_factor", + "output": "fraction std bootstrap ladder factor refers to Factor of standard deviation of bootstrap scores by which to accept new model in genetic algorithm. Too small a fraction will lead to accepting new models easily even if no significant improvement in score, while too large a fraction will reject too many good models. Non-zero value is a bit risky when no folds are used in GA, because bootstrap score is only rough estimate of error.: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "bootstrap_ladder_samples_limit", + "output": "bootstrap ladder samples limit refers to Minimum number of bootstrap samples that are required to limit accepting new model. If less than this, then new model is always accepted.: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "bootstrap_ladder_samples_limit", + "output": "bootstrap ladder samples limit refers to Minimum number of bootstrap samples that are required to limit accepting new model. If less than this, then new model is always accepted.: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "bootstrap ladder samples limit", + "output": "bootstrap ladder samples limit refers to Minimum number of bootstrap samples that are required to limit accepting new model. If less than this, then new model is always accepted.: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Minimum number of bootstrap samples that are required to limit accepting new model. If less than this, then new model is always accepted.: ", + "output": "bootstrap ladder samples limit refers to Minimum number of bootstrap samples that are required to limit accepting new model. If less than this, then new model is always accepted.: " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting bootstrap_ladder_samples_limit", + "output": "bootstrap ladder samples limit refers to Minimum number of bootstrap samples that are required to limit accepting new model. If less than this, then new model is always accepted.: " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting bootstrap_ladder_samples_limit", + "output": "bootstrap ladder samples limit refers to Minimum number of bootstrap samples that are required to limit accepting new model. If less than this, then new model is always accepted.: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "meta_weight_allowed_for_reference", + "output": "meta weight allowed for reference refers to Min. weight of meta learner for reference models during ensembling. If 1.0, then reference model must be the clear winner to be kept. Set to 0.0 to never drop reference models: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "meta_weight_allowed_for_reference", + "output": "meta weight allowed for reference refers to Min. weight of meta learner for reference models during ensembling. If 1.0, then reference model must be the clear winner to be kept. Set to 0.0 to never drop reference models: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "meta weight allowed for reference", + "output": "meta weight allowed for reference refers to Min. weight of meta learner for reference models during ensembling. If 1.0, then reference model must be the clear winner to be kept. Set to 0.0 to never drop reference models: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Min. weight of meta learner for reference models during ensembling. If 1.0, then reference model must be the clear winner to be kept. Set to 0.0 to never drop reference models: ", + "output": "meta weight allowed for reference refers to Min. weight of meta learner for reference models during ensembling. If 1.0, then reference model must be the clear winner to be kept. Set to 0.0 to never drop reference models: " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting meta_weight_allowed_for_reference", + "output": "meta weight allowed for reference refers to Min. weight of meta learner for reference models during ensembling. If 1.0, then reference model must be the clear winner to be kept. Set to 0.0 to never drop reference models: " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting meta_weight_allowed_for_reference", + "output": "meta weight allowed for reference refers to Min. weight of meta learner for reference models during ensembling. If 1.0, then reference model must be the clear winner to be kept. Set to 0.0 to never drop reference models: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "show_full_pipeline_details", + "output": "show full pipeline details refers to Whether to show full pipeline details: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "show_full_pipeline_details", + "output": "show full pipeline details refers to Whether to show full pipeline details: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "show full pipeline details", + "output": "show full pipeline details refers to Whether to show full pipeline details: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Whether to show full pipeline details: ", + "output": "show full pipeline details refers to Whether to show full pipeline details: " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting show_full_pipeline_details", + "output": "show full pipeline details refers to Whether to show full pipeline details: " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting show_full_pipeline_details", + "output": "show full pipeline details refers to Whether to show full pipeline details: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "num_transformed_features_per_pipeline_show", + "output": "num transformed features per pipeline show refers to Number of features to show when logging size of fitted transformers: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "num_transformed_features_per_pipeline_show", + "output": "num transformed features per pipeline show refers to Number of features to show when logging size of fitted transformers: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "num transformed features per pipeline show", + "output": "num transformed features per pipeline show refers to Number of features to show when logging size of fitted transformers: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Number of features to show when logging size of fitted transformers: ", + "output": "num transformed features per pipeline show refers to Number of features to show when logging size of fitted transformers: " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting num_transformed_features_per_pipeline_show", + "output": "num transformed features per pipeline show refers to Number of features to show when logging size of fitted transformers: " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting num_transformed_features_per_pipeline_show", + "output": "num transformed features per pipeline show refers to Number of features to show when logging size of fitted transformers: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "fs_data_frac", + "output": "fs data frac refers to Fraction of data to use for another data slice for FS: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "fs_data_frac", + "output": "fs data frac refers to Fraction of data to use for another data slice for FS: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "fs data frac", + "output": "fs data frac refers to Fraction of data to use for another data slice for FS: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Fraction of data to use for another data slice for FS: ", + "output": "fs data frac refers to Fraction of data to use for another data slice for FS: " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting fs_data_frac", + "output": "fs data frac refers to Fraction of data to use for another data slice for FS: " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting fs_data_frac", + "output": "fs data frac refers to Fraction of data to use for another data slice for FS: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "many_columns_count", + "output": "many columns count refers to Number of columns beyond which reduce expensive tasks at cost of some accuracy.: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "many_columns_count", + "output": "many columns count refers to Number of columns beyond which reduce expensive tasks at cost of some accuracy.: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "many columns count", + "output": "many columns count refers to Number of columns beyond which reduce expensive tasks at cost of some accuracy.: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Number of columns beyond which reduce expensive tasks at cost of some accuracy.: ", + "output": "many columns count refers to Number of columns beyond which reduce expensive tasks at cost of some accuracy.: " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting many_columns_count", + "output": "many columns count refers to Number of columns beyond which reduce expensive tasks at cost of some accuracy.: " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting many_columns_count", + "output": "many columns count refers to Number of columns beyond which reduce expensive tasks at cost of some accuracy.: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "columns_count_interpretable", + "output": "columns count interpretable refers to Number of columns beyond which do not set default knobs to high interpretability even if bigger data.: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "columns_count_interpretable", + "output": "columns count interpretable refers to Number of columns beyond which do not set default knobs to high interpretability even if bigger data.: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "columns count interpretable", + "output": "columns count interpretable refers to Number of columns beyond which do not set default knobs to high interpretability even if bigger data.: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Number of columns beyond which do not set default knobs to high interpretability even if bigger data.: ", + "output": "columns count interpretable refers to Number of columns beyond which do not set default knobs to high interpretability even if bigger data.: " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting columns_count_interpretable", + "output": "columns count interpretable refers to Number of columns beyond which do not set default knobs to high interpretability even if bigger data.: " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting columns_count_interpretable", + "output": "columns count interpretable refers to Number of columns beyond which do not set default knobs to high interpretability even if bigger data.: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "round_up_indivs_for_busy_gpus", + "output": "round up indivs for busy gpus refers to Whether to round-up individuals to ensure all GPUs used. Not always best if (say) have 16 GPUs, better to have multiple experiments if in multi-user environment on single node.: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "round_up_indivs_for_busy_gpus", + "output": "round up indivs for busy gpus refers to Whether to round-up individuals to ensure all GPUs used. Not always best if (say) have 16 GPUs, better to have multiple experiments if in multi-user environment on single node.: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "round up indivs for busy gpus", + "output": "round up indivs for busy gpus refers to Whether to round-up individuals to ensure all GPUs used. Not always best if (say) have 16 GPUs, better to have multiple experiments if in multi-user environment on single node.: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Whether to round-up individuals to ensure all GPUs used. Not always best if (say) have 16 GPUs, better to have multiple experiments if in multi-user environment on single node.: ", + "output": "round up indivs for busy gpus refers to Whether to round-up individuals to ensure all GPUs used. Not always best if (say) have 16 GPUs, better to have multiple experiments if in multi-user environment on single node.: " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting round_up_indivs_for_busy_gpus", + "output": "round up indivs for busy gpus refers to Whether to round-up individuals to ensure all GPUs used. Not always best if (say) have 16 GPUs, better to have multiple experiments if in multi-user environment on single node.: " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting round_up_indivs_for_busy_gpus", + "output": "round up indivs for busy gpus refers to Whether to round-up individuals to ensure all GPUs used. Not always best if (say) have 16 GPUs, better to have multiple experiments if in multi-user environment on single node.: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "require_graphviz", + "output": "require graphviz refers to Graphviz is an optional requirement for native installations (RPM/DEP/Tar-SH, outside of Docker)to convert .dot files into .png files for pipeline visualizations as part of experiment artifacts" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "require_graphviz", + "output": "require graphviz refers to Whether to require Graphviz package at startup: Graphviz is an optional requirement for native installations (RPM/DEP/Tar-SH, outside of Docker)to convert .dot files into .png files for pipeline visualizations as part of experiment artifacts" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "require graphviz", + "output": "require graphviz refers to Whether to require Graphviz package at startup: Graphviz is an optional requirement for native installations (RPM/DEP/Tar-SH, outside of Docker)to convert .dot files into .png files for pipeline visualizations as part of experiment artifacts" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Whether to require Graphviz package at startup: ", + "output": "require graphviz refers to Whether to require Graphviz package at startup: Graphviz is an optional requirement for native installations (RPM/DEP/Tar-SH, outside of Docker)to convert .dot files into .png files for pipeline visualizations as part of experiment artifacts" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting require_graphviz", + "output": "require graphviz refers to Graphviz is an optional requirement for native installations (RPM/DEP/Tar-SH, outside of Docker)to convert .dot files into .png files for pipeline visualizations as part of experiment artifacts" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting require_graphviz", + "output": "require graphviz refers to Whether to require Graphviz package at startup: Graphviz is an optional requirement for native installations (RPM/DEP/Tar-SH, outside of Docker)to convert .dot files into .png files for pipeline visualizations as part of experiment artifacts" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "prob_add_genes", + "output": "prob add genes refers to Unnormalized probability to add genes or instances of transformers with specific attributes.If no genes can be added, other mutations(mutating models hyper parmaters, pruning genes, pruning features, etc.) are attempted. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "prob_add_genes", + "output": "prob add genes refers to Probability to add transformers: Unnormalized probability to add genes or instances of transformers with specific attributes.If no genes can be added, other mutations(mutating models hyper parmaters, pruning genes, pruning features, etc.) are attempted. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "prob add genes", + "output": "prob add genes refers to Probability to add transformers: Unnormalized probability to add genes or instances of transformers with specific attributes.If no genes can be added, other mutations(mutating models hyper parmaters, pruning genes, pruning features, etc.) are attempted. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Probability to add transformers: ", + "output": "prob add genes refers to Probability to add transformers: Unnormalized probability to add genes or instances of transformers with specific attributes.If no genes can be added, other mutations(mutating models hyper parmaters, pruning genes, pruning features, etc.) are attempted. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting prob_add_genes", + "output": "prob add genes refers to Unnormalized probability to add genes or instances of transformers with specific attributes.If no genes can be added, other mutations(mutating models hyper parmaters, pruning genes, pruning features, etc.) are attempted. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting prob_add_genes", + "output": "prob add genes refers to Probability to add transformers: Unnormalized probability to add genes or instances of transformers with specific attributes.If no genes can be added, other mutations(mutating models hyper parmaters, pruning genes, pruning features, etc.) are attempted. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "prob_addbest_genes", + "output": "prob addbest genes refers to Unnormalized probability, conditioned on prob_add_genes,to add genes or instances of transformers with specific attributesthat have shown to be beneficial to other individuals within the population. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "prob_addbest_genes", + "output": "prob addbest genes refers to Probability to add best shared transformers: Unnormalized probability, conditioned on prob_add_genes,to add genes or instances of transformers with specific attributesthat have shown to be beneficial to other individuals within the population. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "prob addbest genes", + "output": "prob addbest genes refers to Probability to add best shared transformers: Unnormalized probability, conditioned on prob_add_genes,to add genes or instances of transformers with specific attributesthat have shown to be beneficial to other individuals within the population. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Probability to add best shared transformers: ", + "output": "prob addbest genes refers to Probability to add best shared transformers: Unnormalized probability, conditioned on prob_add_genes,to add genes or instances of transformers with specific attributesthat have shown to be beneficial to other individuals within the population. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting prob_addbest_genes", + "output": "prob addbest genes refers to Unnormalized probability, conditioned on prob_add_genes,to add genes or instances of transformers with specific attributesthat have shown to be beneficial to other individuals within the population. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting prob_addbest_genes", + "output": "prob addbest genes refers to Probability to add best shared transformers: Unnormalized probability, conditioned on prob_add_genes,to add genes or instances of transformers with specific attributesthat have shown to be beneficial to other individuals within the population. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "prob_prune_genes", + "output": "prob prune genes refers to Unnormalized probability to prune genes or instances of transformers with specific attributes.If a variety of transformers with many attributes exists, default value is reasonable.However, if one has fixed set of transformers that should not change or no new transformer attributescan be added, then setting this to 0.0 is reasonable to avoid undesired loss of transformations. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "prob_prune_genes", + "output": "prob prune genes refers to Probability to prune transformers: Unnormalized probability to prune genes or instances of transformers with specific attributes.If a variety of transformers with many attributes exists, default value is reasonable.However, if one has fixed set of transformers that should not change or no new transformer attributescan be added, then setting this to 0.0 is reasonable to avoid undesired loss of transformations. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "prob prune genes", + "output": "prob prune genes refers to Probability to prune transformers: Unnormalized probability to prune genes or instances of transformers with specific attributes.If a variety of transformers with many attributes exists, default value is reasonable.However, if one has fixed set of transformers that should not change or no new transformer attributescan be added, then setting this to 0.0 is reasonable to avoid undesired loss of transformations. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Probability to prune transformers: ", + "output": "prob prune genes refers to Probability to prune transformers: Unnormalized probability to prune genes or instances of transformers with specific attributes.If a variety of transformers with many attributes exists, default value is reasonable.However, if one has fixed set of transformers that should not change or no new transformer attributescan be added, then setting this to 0.0 is reasonable to avoid undesired loss of transformations. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting prob_prune_genes", + "output": "prob prune genes refers to Unnormalized probability to prune genes or instances of transformers with specific attributes.If a variety of transformers with many attributes exists, default value is reasonable.However, if one has fixed set of transformers that should not change or no new transformer attributescan be added, then setting this to 0.0 is reasonable to avoid undesired loss of transformations. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting prob_prune_genes", + "output": "prob prune genes refers to Probability to prune transformers: Unnormalized probability to prune genes or instances of transformers with specific attributes.If a variety of transformers with many attributes exists, default value is reasonable.However, if one has fixed set of transformers that should not change or no new transformer attributescan be added, then setting this to 0.0 is reasonable to avoid undesired loss of transformations. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "prob_perturb_xgb", + "output": "prob perturb xgb refers to Unnormalized probability change model hyper parameters. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "prob_perturb_xgb", + "output": "prob perturb xgb refers to Probability to mutate model parameters: Unnormalized probability change model hyper parameters. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "prob perturb xgb", + "output": "prob perturb xgb refers to Probability to mutate model parameters: Unnormalized probability change model hyper parameters. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Probability to mutate model parameters: ", + "output": "prob perturb xgb refers to Probability to mutate model parameters: Unnormalized probability change model hyper parameters. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting prob_perturb_xgb", + "output": "prob perturb xgb refers to Unnormalized probability change model hyper parameters. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting prob_perturb_xgb", + "output": "prob perturb xgb refers to Probability to mutate model parameters: Unnormalized probability change model hyper parameters. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "prob_prune_by_features", + "output": "prob prune by features refers to Unnormalized probability to prune features that have low variable importance, as opposed to pruning entire instances of genes/transformers when prob_prune_genes used.If prob_prune_genes=0.0 and prob_prune_by_features==0.0 and prob_prune_by_top_features==0.0, then genes/transformers and transformed features are only pruned if they are:1) inconsistent with the genome2) inconsistent with the column data types3) had no signal (for interactions and cv_in_cv for target encoding)4) transformation failedE.g. these are toml settings are then ignored:1) ngenes_max2) limit_features_by_interpretability3) varimp_threshold_at_interpretability_104) features_allowed_by_interpretability5) remove_scored_0gain_genes_in_postprocessing_above_interpretability6) nfeatures_max_threshold7) features_cost_per_interpSo this acts similar to no_drop_features, except no_drop_features also applies to shift and leak detection, constant columns are not dropped, ID columns are not dropped." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "prob_prune_by_features", + "output": "prob prune by features refers to Probability to prune weak features: Unnormalized probability to prune features that have low variable importance, as opposed to pruning entire instances of genes/transformers when prob_prune_genes used.If prob_prune_genes=0.0 and prob_prune_by_features==0.0 and prob_prune_by_top_features==0.0, then genes/transformers and transformed features are only pruned if they are:1) inconsistent with the genome2) inconsistent with the column data types3) had no signal (for interactions and cv_in_cv for target encoding)4) transformation failedE.g. these are toml settings are then ignored:1) ngenes_max2) limit_features_by_interpretability3) varimp_threshold_at_interpretability_104) features_allowed_by_interpretability5) remove_scored_0gain_genes_in_postprocessing_above_interpretability6) nfeatures_max_threshold7) features_cost_per_interpSo this acts similar to no_drop_features, except no_drop_features also applies to shift and leak detection, constant columns are not dropped, ID columns are not dropped." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "prob prune by features", + "output": "prob prune by features refers to Probability to prune weak features: Unnormalized probability to prune features that have low variable importance, as opposed to pruning entire instances of genes/transformers when prob_prune_genes used.If prob_prune_genes=0.0 and prob_prune_by_features==0.0 and prob_prune_by_top_features==0.0, then genes/transformers and transformed features are only pruned if they are:1) inconsistent with the genome2) inconsistent with the column data types3) had no signal (for interactions and cv_in_cv for target encoding)4) transformation failedE.g. these are toml settings are then ignored:1) ngenes_max2) limit_features_by_interpretability3) varimp_threshold_at_interpretability_104) features_allowed_by_interpretability5) remove_scored_0gain_genes_in_postprocessing_above_interpretability6) nfeatures_max_threshold7) features_cost_per_interpSo this acts similar to no_drop_features, except no_drop_features also applies to shift and leak detection, constant columns are not dropped, ID columns are not dropped." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Probability to prune weak features: ", + "output": "prob prune by features refers to Probability to prune weak features: Unnormalized probability to prune features that have low variable importance, as opposed to pruning entire instances of genes/transformers when prob_prune_genes used.If prob_prune_genes=0.0 and prob_prune_by_features==0.0 and prob_prune_by_top_features==0.0, then genes/transformers and transformed features are only pruned if they are:1) inconsistent with the genome2) inconsistent with the column data types3) had no signal (for interactions and cv_in_cv for target encoding)4) transformation failedE.g. these are toml settings are then ignored:1) ngenes_max2) limit_features_by_interpretability3) varimp_threshold_at_interpretability_104) features_allowed_by_interpretability5) remove_scored_0gain_genes_in_postprocessing_above_interpretability6) nfeatures_max_threshold7) features_cost_per_interpSo this acts similar to no_drop_features, except no_drop_features also applies to shift and leak detection, constant columns are not dropped, ID columns are not dropped." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting prob_prune_by_features", + "output": "prob prune by features refers to Unnormalized probability to prune features that have low variable importance, as opposed to pruning entire instances of genes/transformers when prob_prune_genes used.If prob_prune_genes=0.0 and prob_prune_by_features==0.0 and prob_prune_by_top_features==0.0, then genes/transformers and transformed features are only pruned if they are:1) inconsistent with the genome2) inconsistent with the column data types3) had no signal (for interactions and cv_in_cv for target encoding)4) transformation failedE.g. these are toml settings are then ignored:1) ngenes_max2) limit_features_by_interpretability3) varimp_threshold_at_interpretability_104) features_allowed_by_interpretability5) remove_scored_0gain_genes_in_postprocessing_above_interpretability6) nfeatures_max_threshold7) features_cost_per_interpSo this acts similar to no_drop_features, except no_drop_features also applies to shift and leak detection, constant columns are not dropped, ID columns are not dropped." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting prob_prune_by_features", + "output": "prob prune by features refers to Probability to prune weak features: Unnormalized probability to prune features that have low variable importance, as opposed to pruning entire instances of genes/transformers when prob_prune_genes used.If prob_prune_genes=0.0 and prob_prune_by_features==0.0 and prob_prune_by_top_features==0.0, then genes/transformers and transformed features are only pruned if they are:1) inconsistent with the genome2) inconsistent with the column data types3) had no signal (for interactions and cv_in_cv for target encoding)4) transformation failedE.g. these are toml settings are then ignored:1) ngenes_max2) limit_features_by_interpretability3) varimp_threshold_at_interpretability_104) features_allowed_by_interpretability5) remove_scored_0gain_genes_in_postprocessing_above_interpretability6) nfeatures_max_threshold7) features_cost_per_interpSo this acts similar to no_drop_features, except no_drop_features also applies to shift and leak detection, constant columns are not dropped, ID columns are not dropped." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "prob_prune_by_top_features", + "output": "prob prune by top features refers to Unnormalized probability to prune features that have high variable importance, in case they have high gain but negaive perfomrance on validation and would otherwise maintain poor validation scores. Similar to prob_prune_by_features but for high gain features." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "prob_prune_by_top_features", + "output": "prob prune by top features refers to Probability to prune strong features: Unnormalized probability to prune features that have high variable importance, in case they have high gain but negaive perfomrance on validation and would otherwise maintain poor validation scores. Similar to prob_prune_by_features but for high gain features." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "prob prune by top features", + "output": "prob prune by top features refers to Probability to prune strong features: Unnormalized probability to prune features that have high variable importance, in case they have high gain but negaive perfomrance on validation and would otherwise maintain poor validation scores. Similar to prob_prune_by_features but for high gain features." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Probability to prune strong features: ", + "output": "prob prune by top features refers to Probability to prune strong features: Unnormalized probability to prune features that have high variable importance, in case they have high gain but negaive perfomrance on validation and would otherwise maintain poor validation scores. Similar to prob_prune_by_features but for high gain features." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting prob_prune_by_top_features", + "output": "prob prune by top features refers to Unnormalized probability to prune features that have high variable importance, in case they have high gain but negaive perfomrance on validation and would otherwise maintain poor validation scores. Similar to prob_prune_by_features but for high gain features." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting prob_prune_by_top_features", + "output": "prob prune by top features refers to Probability to prune strong features: Unnormalized probability to prune features that have high variable importance, in case they have high gain but negaive perfomrance on validation and would otherwise maintain poor validation scores. Similar to prob_prune_by_features but for high gain features." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max_num_prune_by_top_features", + "output": "max num prune by top features refers to Maximum number of high gain features to prune for each mutation call, to control behavior of prob_prune_by_top_features." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max_num_prune_by_top_features", + "output": "max num prune by top features refers to Number of high gain features to prune each mutation: Maximum number of high gain features to prune for each mutation call, to control behavior of prob_prune_by_top_features." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max num prune by top features", + "output": "max num prune by top features refers to Number of high gain features to prune each mutation: Maximum number of high gain features to prune for each mutation call, to control behavior of prob_prune_by_top_features." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Number of high gain features to prune each mutation: ", + "output": "max num prune by top features refers to Number of high gain features to prune each mutation: Maximum number of high gain features to prune for each mutation call, to control behavior of prob_prune_by_top_features." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting max_num_prune_by_top_features", + "output": "max num prune by top features refers to Maximum number of high gain features to prune for each mutation call, to control behavior of prob_prune_by_top_features." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting max_num_prune_by_top_features", + "output": "max num prune by top features refers to Number of high gain features to prune each mutation: Maximum number of high gain features to prune for each mutation call, to control behavior of prob_prune_by_top_features." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "prob_prune_pretransformer_genes", + "output": "prob prune pretransformer genes refers to Like prob_prune_genes but only for pretransformers, i.e. those transformers in layers except last layer that connects to model." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "prob_prune_pretransformer_genes", + "output": "prob prune pretransformer genes refers to Probability to prune pretransformers: Like prob_prune_genes but only for pretransformers, i.e. those transformers in layers except last layer that connects to model." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "prob prune pretransformer genes", + "output": "prob prune pretransformer genes refers to Probability to prune pretransformers: Like prob_prune_genes but only for pretransformers, i.e. those transformers in layers except last layer that connects to model." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Probability to prune pretransformers: ", + "output": "prob prune pretransformer genes refers to Probability to prune pretransformers: Like prob_prune_genes but only for pretransformers, i.e. those transformers in layers except last layer that connects to model." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting prob_prune_pretransformer_genes", + "output": "prob prune pretransformer genes refers to Like prob_prune_genes but only for pretransformers, i.e. those transformers in layers except last layer that connects to model." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting prob_prune_pretransformer_genes", + "output": "prob prune pretransformer genes refers to Probability to prune pretransformers: Like prob_prune_genes but only for pretransformers, i.e. those transformers in layers except last layer that connects to model." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "prob_prune_pretransformer_by_features", + "output": "prob prune pretransformer by features refers to Like prob_prune_by_features but only for pretransformers, i.e. those transformers in layers except last layer that connects to model." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "prob_prune_pretransformer_by_features", + "output": "prob prune pretransformer by features refers to Probability to prune weak pretransformer features: Like prob_prune_by_features but only for pretransformers, i.e. those transformers in layers except last layer that connects to model." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "prob prune pretransformer by features", + "output": "prob prune pretransformer by features refers to Probability to prune weak pretransformer features: Like prob_prune_by_features but only for pretransformers, i.e. those transformers in layers except last layer that connects to model." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Probability to prune weak pretransformer features: ", + "output": "prob prune pretransformer by features refers to Probability to prune weak pretransformer features: Like prob_prune_by_features but only for pretransformers, i.e. those transformers in layers except last layer that connects to model." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting prob_prune_pretransformer_by_features", + "output": "prob prune pretransformer by features refers to Like prob_prune_by_features but only for pretransformers, i.e. those transformers in layers except last layer that connects to model." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting prob_prune_pretransformer_by_features", + "output": "prob prune pretransformer by features refers to Probability to prune weak pretransformer features: Like prob_prune_by_features but only for pretransformers, i.e. those transformers in layers except last layer that connects to model." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "prob_prune_pretransformer_by_top_features", + "output": "prob prune pretransformer by top features refers to Like prob_prune_by_top_features but only for pretransformers, i.e. those transformers in layers except last layer that connects to model." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "prob_prune_pretransformer_by_top_features", + "output": "prob prune pretransformer by top features refers to Probability to prune strong pretransformer features: Like prob_prune_by_top_features but only for pretransformers, i.e. those transformers in layers except last layer that connects to model." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "prob prune pretransformer by top features", + "output": "prob prune pretransformer by top features refers to Probability to prune strong pretransformer features: Like prob_prune_by_top_features but only for pretransformers, i.e. those transformers in layers except last layer that connects to model." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Probability to prune strong pretransformer features: ", + "output": "prob prune pretransformer by top features refers to Probability to prune strong pretransformer features: Like prob_prune_by_top_features but only for pretransformers, i.e. those transformers in layers except last layer that connects to model." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting prob_prune_pretransformer_by_top_features", + "output": "prob prune pretransformer by top features refers to Like prob_prune_by_top_features but only for pretransformers, i.e. those transformers in layers except last layer that connects to model." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting prob_prune_pretransformer_by_top_features", + "output": "prob prune pretransformer by top features refers to Probability to prune strong pretransformer features: Like prob_prune_by_top_features but only for pretransformers, i.e. those transformers in layers except last layer that connects to model." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "override_individual_from_toml_list", + "output": "override individual from toml list refers to When doing restart, retrain, refit, reset these individual parameters to new toml values." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "override_individual_from_toml_list", + "output": "override individual from toml list refers to config.toml items stored in individual to overwrite: When doing restart, retrain, refit, reset these individual parameters to new toml values." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "override individual from toml list", + "output": "override individual from toml list refers to config.toml items stored in individual to overwrite: When doing restart, retrain, refit, reset these individual parameters to new toml values." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "config.toml items stored in individual to overwrite: ", + "output": "override individual from toml list refers to config.toml items stored in individual to overwrite: When doing restart, retrain, refit, reset these individual parameters to new toml values." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting override_individual_from_toml_list", + "output": "override individual from toml list refers to When doing restart, retrain, refit, reset these individual parameters to new toml values." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting override_individual_from_toml_list", + "output": "override individual from toml list refers to config.toml items stored in individual to overwrite: When doing restart, retrain, refit, reset these individual parameters to new toml values." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "fast_approx_max_num_trees_ever", + "output": "fast approx max num trees ever refers to Max. number of trees to use for all tree model predictions. For testing, when predictions don't matter. -1 means disabled." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "fast_approx_max_num_trees_ever", + "output": "fast approx max num trees ever refers to Max. number of trees to use for all tree model predictions. For testing, when predictions don't matter. -1 means disabled." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "fast approx max num trees ever", + "output": "fast approx max num trees ever refers to Max. number of trees to use for all tree model predictions. For testing, when predictions don't matter. -1 means disabled." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "fast approx max num trees ever refers to Max. number of trees to use for all tree model predictions. For testing, when predictions don't matter. -1 means disabled." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting fast_approx_max_num_trees_ever", + "output": "fast approx max num trees ever refers to Max. number of trees to use for all tree model predictions. For testing, when predictions don't matter. -1 means disabled." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting fast_approx_max_num_trees_ever", + "output": "fast approx max num trees ever refers to Max. number of trees to use for all tree model predictions. For testing, when predictions don't matter. -1 means disabled." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "fast_approx_num_trees", + "output": "fast approx num trees refers to Max. number of trees to use for fast_approx=True (e.g., for AutoDoc/MLI)." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "fast_approx_num_trees", + "output": "fast approx num trees refers to Max. number of trees to use for fast_approx=True (e.g., for AutoDoc/MLI)." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "fast approx num trees", + "output": "fast approx num trees refers to Max. number of trees to use for fast_approx=True (e.g., for AutoDoc/MLI)." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "fast approx num trees refers to Max. number of trees to use for fast_approx=True (e.g., for AutoDoc/MLI)." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting fast_approx_num_trees", + "output": "fast approx num trees refers to Max. number of trees to use for fast_approx=True (e.g., for AutoDoc/MLI)." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting fast_approx_num_trees", + "output": "fast approx num trees refers to Max. number of trees to use for fast_approx=True (e.g., for AutoDoc/MLI)." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "fast_approx_do_one_fold", + "output": "fast approx do one fold refers to Whether to speed up fast_approx=True further, by using only one fold out of all cross-validation folds (e.g., for AutoDoc/MLI)." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "fast_approx_do_one_fold", + "output": "fast approx do one fold refers to Whether to speed up fast_approx=True further, by using only one fold out of all cross-validation folds (e.g., for AutoDoc/MLI)." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "fast approx do one fold", + "output": "fast approx do one fold refers to Whether to speed up fast_approx=True further, by using only one fold out of all cross-validation folds (e.g., for AutoDoc/MLI)." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "fast approx do one fold refers to Whether to speed up fast_approx=True further, by using only one fold out of all cross-validation folds (e.g., for AutoDoc/MLI)." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting fast_approx_do_one_fold", + "output": "fast approx do one fold refers to Whether to speed up fast_approx=True further, by using only one fold out of all cross-validation folds (e.g., for AutoDoc/MLI)." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting fast_approx_do_one_fold", + "output": "fast approx do one fold refers to Whether to speed up fast_approx=True further, by using only one fold out of all cross-validation folds (e.g., for AutoDoc/MLI)." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "fast_approx_do_one_model", + "output": "fast approx do one model refers to Whether to speed up fast_approx=True further, by using only one model out of all ensemble models (e.g., for AutoDoc/MLI)." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "fast_approx_do_one_model", + "output": "fast approx do one model refers to Whether to speed up fast_approx=True further, by using only one model out of all ensemble models (e.g., for AutoDoc/MLI)." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "fast approx do one model", + "output": "fast approx do one model refers to Whether to speed up fast_approx=True further, by using only one model out of all ensemble models (e.g., for AutoDoc/MLI)." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "fast approx do one model refers to Whether to speed up fast_approx=True further, by using only one model out of all ensemble models (e.g., for AutoDoc/MLI)." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting fast_approx_do_one_model", + "output": "fast approx do one model refers to Whether to speed up fast_approx=True further, by using only one model out of all ensemble models (e.g., for AutoDoc/MLI)." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting fast_approx_do_one_model", + "output": "fast approx do one model refers to Whether to speed up fast_approx=True further, by using only one model out of all ensemble models (e.g., for AutoDoc/MLI)." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "fast_approx_contribs_num_trees", + "output": "fast approx contribs num trees refers to Max. number of trees to use for fast_approx_contribs=True (e.g., for 'Fast Approximation' in GUI when making Shapley predictions, and for AutoDoc/MLI)." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "fast_approx_contribs_num_trees", + "output": "fast approx contribs num trees refers to Max. number of trees to use for fast_approx_contribs=True (e.g., for 'Fast Approximation' in GUI when making Shapley predictions, and for AutoDoc/MLI)." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "fast approx contribs num trees", + "output": "fast approx contribs num trees refers to Max. number of trees to use for fast_approx_contribs=True (e.g., for 'Fast Approximation' in GUI when making Shapley predictions, and for AutoDoc/MLI)." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "fast approx contribs num trees refers to Max. number of trees to use for fast_approx_contribs=True (e.g., for 'Fast Approximation' in GUI when making Shapley predictions, and for AutoDoc/MLI)." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting fast_approx_contribs_num_trees", + "output": "fast approx contribs num trees refers to Max. number of trees to use for fast_approx_contribs=True (e.g., for 'Fast Approximation' in GUI when making Shapley predictions, and for AutoDoc/MLI)." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting fast_approx_contribs_num_trees", + "output": "fast approx contribs num trees refers to Max. number of trees to use for fast_approx_contribs=True (e.g., for 'Fast Approximation' in GUI when making Shapley predictions, and for AutoDoc/MLI)." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "fast_approx_contribs_do_one_fold", + "output": "fast approx contribs do one fold refers to Whether to speed up fast_approx_contribs=True further, by using only one fold out of all cross-validation folds (e.g., for 'Fast Approximation' in GUI when making Shapley predictions, and for AutoDoc/MLI)." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "fast_approx_contribs_do_one_fold", + "output": "fast approx contribs do one fold refers to Whether to speed up fast_approx_contribs=True further, by using only one fold out of all cross-validation folds (e.g., for 'Fast Approximation' in GUI when making Shapley predictions, and for AutoDoc/MLI)." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "fast approx contribs do one fold", + "output": "fast approx contribs do one fold refers to Whether to speed up fast_approx_contribs=True further, by using only one fold out of all cross-validation folds (e.g., for 'Fast Approximation' in GUI when making Shapley predictions, and for AutoDoc/MLI)." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "fast approx contribs do one fold refers to Whether to speed up fast_approx_contribs=True further, by using only one fold out of all cross-validation folds (e.g., for 'Fast Approximation' in GUI when making Shapley predictions, and for AutoDoc/MLI)." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting fast_approx_contribs_do_one_fold", + "output": "fast approx contribs do one fold refers to Whether to speed up fast_approx_contribs=True further, by using only one fold out of all cross-validation folds (e.g., for 'Fast Approximation' in GUI when making Shapley predictions, and for AutoDoc/MLI)." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting fast_approx_contribs_do_one_fold", + "output": "fast approx contribs do one fold refers to Whether to speed up fast_approx_contribs=True further, by using only one fold out of all cross-validation folds (e.g., for 'Fast Approximation' in GUI when making Shapley predictions, and for AutoDoc/MLI)." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "fast_approx_contribs_do_one_model", + "output": "fast approx contribs do one model refers to Whether to speed up fast_approx_contribs=True further, by using only one model out of all ensemble models (e.g., for 'Fast Approximation' in GUI when making Shapley predictions, and for AutoDoc/MLI)." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "fast_approx_contribs_do_one_model", + "output": "fast approx contribs do one model refers to Whether to speed up fast_approx_contribs=True further, by using only one model out of all ensemble models (e.g., for 'Fast Approximation' in GUI when making Shapley predictions, and for AutoDoc/MLI)." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "fast approx contribs do one model", + "output": "fast approx contribs do one model refers to Whether to speed up fast_approx_contribs=True further, by using only one model out of all ensemble models (e.g., for 'Fast Approximation' in GUI when making Shapley predictions, and for AutoDoc/MLI)." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "fast approx contribs do one model refers to Whether to speed up fast_approx_contribs=True further, by using only one model out of all ensemble models (e.g., for 'Fast Approximation' in GUI when making Shapley predictions, and for AutoDoc/MLI)." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting fast_approx_contribs_do_one_model", + "output": "fast approx contribs do one model refers to Whether to speed up fast_approx_contribs=True further, by using only one model out of all ensemble models (e.g., for 'Fast Approximation' in GUI when making Shapley predictions, and for AutoDoc/MLI)." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting fast_approx_contribs_do_one_model", + "output": "fast approx contribs do one model refers to Whether to speed up fast_approx_contribs=True further, by using only one model out of all ensemble models (e.g., for 'Fast Approximation' in GUI when making Shapley predictions, and for AutoDoc/MLI)." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "use_187_prob_logic", + "output": "use 187 prob logic refers to Whether to use exploit-explore logic like DAI 1.8.x. False will explore more." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "use_187_prob_logic", + "output": "use 187 prob logic refers to Whether to use exploit-explore logic like DAI 1.8.x. False will explore more." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "use 187 prob logic", + "output": "use 187 prob logic refers to Whether to use exploit-explore logic like DAI 1.8.x. False will explore more." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "use 187 prob logic refers to Whether to use exploit-explore logic like DAI 1.8.x. False will explore more." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting use_187_prob_logic", + "output": "use 187 prob logic refers to Whether to use exploit-explore logic like DAI 1.8.x. False will explore more." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting use_187_prob_logic", + "output": "use 187 prob logic refers to Whether to use exploit-explore logic like DAI 1.8.x. False will explore more." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable_ohe_linear", + "output": "enable ohe linear refers to Whether to enable cross-validated OneHotEncoding+LinearModel transformer" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable_ohe_linear", + "output": "enable ohe linear refers to Whether to enable cross-validated OneHotEncoding+LinearModel transformer" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable ohe linear", + "output": "enable ohe linear refers to Whether to enable cross-validated OneHotEncoding+LinearModel transformer" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "enable ohe linear refers to Whether to enable cross-validated OneHotEncoding+LinearModel transformer" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting enable_ohe_linear", + "output": "enable ohe linear refers to Whether to enable cross-validated OneHotEncoding+LinearModel transformer" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting enable_ohe_linear", + "output": "enable ohe linear refers to Whether to enable cross-validated OneHotEncoding+LinearModel transformer" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "tensorflow_added_num_classes_switch", + "output": "tensorflow added num classes switch refers to Number of classes above which to include TensorFlow (if TensorFlow is enabled), even if not used exclusively. For small data this is decreased by tensorflow_num_classes_small_data_factor, and for bigger data, this is increased by tensorflow_num_classes_big_data_reduction_factor." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "tensorflow_added_num_classes_switch", + "output": "tensorflow added num classes switch refers to Num. classes above which include Tensorflow: Number of classes above which to include TensorFlow (if TensorFlow is enabled), even if not used exclusively. For small data this is decreased by tensorflow_num_classes_small_data_factor, and for bigger data, this is increased by tensorflow_num_classes_big_data_reduction_factor." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "tensorflow added num classes switch", + "output": "tensorflow added num classes switch refers to Num. classes above which include Tensorflow: Number of classes above which to include TensorFlow (if TensorFlow is enabled), even if not used exclusively. For small data this is decreased by tensorflow_num_classes_small_data_factor, and for bigger data, this is increased by tensorflow_num_classes_big_data_reduction_factor." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Num. classes above which include Tensorflow: ", + "output": "tensorflow added num classes switch refers to Num. classes above which include Tensorflow: Number of classes above which to include TensorFlow (if TensorFlow is enabled), even if not used exclusively. For small data this is decreased by tensorflow_num_classes_small_data_factor, and for bigger data, this is increased by tensorflow_num_classes_big_data_reduction_factor." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting tensorflow_added_num_classes_switch", + "output": "tensorflow added num classes switch refers to Number of classes above which to include TensorFlow (if TensorFlow is enabled), even if not used exclusively. For small data this is decreased by tensorflow_num_classes_small_data_factor, and for bigger data, this is increased by tensorflow_num_classes_big_data_reduction_factor." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting tensorflow_added_num_classes_switch", + "output": "tensorflow added num classes switch refers to Num. classes above which include Tensorflow: Number of classes above which to include TensorFlow (if TensorFlow is enabled), even if not used exclusively. For small data this is decreased by tensorflow_num_classes_small_data_factor, and for bigger data, this is increased by tensorflow_num_classes_big_data_reduction_factor." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "tensorflow_num_classes_switch", + "output": "tensorflow num classes switch refers to Number of classes above which to only use TensorFlow (if TensorFlow is enabled), instead of others models set on 'auto' (models set to 'on' are still used). Up to tensorflow_num_classes_switch_but_keep_lightgbm, keep LightGBM. If small data, this is increased by tensorflow_num_classes_small_data_factor." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "tensorflow_num_classes_switch", + "output": "tensorflow num classes switch refers to Num. classes above which to exclusively use TensorFlow: Number of classes above which to only use TensorFlow (if TensorFlow is enabled), instead of others models set on 'auto' (models set to 'on' are still used). Up to tensorflow_num_classes_switch_but_keep_lightgbm, keep LightGBM. If small data, this is increased by tensorflow_num_classes_small_data_factor." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "tensorflow num classes switch", + "output": "tensorflow num classes switch refers to Num. classes above which to exclusively use TensorFlow: Number of classes above which to only use TensorFlow (if TensorFlow is enabled), instead of others models set on 'auto' (models set to 'on' are still used). Up to tensorflow_num_classes_switch_but_keep_lightgbm, keep LightGBM. If small data, this is increased by tensorflow_num_classes_small_data_factor." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Num. classes above which to exclusively use TensorFlow: ", + "output": "tensorflow num classes switch refers to Num. classes above which to exclusively use TensorFlow: Number of classes above which to only use TensorFlow (if TensorFlow is enabled), instead of others models set on 'auto' (models set to 'on' are still used). Up to tensorflow_num_classes_switch_but_keep_lightgbm, keep LightGBM. If small data, this is increased by tensorflow_num_classes_small_data_factor." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting tensorflow_num_classes_switch", + "output": "tensorflow num classes switch refers to Number of classes above which to only use TensorFlow (if TensorFlow is enabled), instead of others models set on 'auto' (models set to 'on' are still used). Up to tensorflow_num_classes_switch_but_keep_lightgbm, keep LightGBM. If small data, this is increased by tensorflow_num_classes_small_data_factor." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting tensorflow_num_classes_switch", + "output": "tensorflow num classes switch refers to Num. classes above which to exclusively use TensorFlow: Number of classes above which to only use TensorFlow (if TensorFlow is enabled), instead of others models set on 'auto' (models set to 'on' are still used). Up to tensorflow_num_classes_switch_but_keep_lightgbm, keep LightGBM. If small data, this is increased by tensorflow_num_classes_small_data_factor." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "prediction_intervals", + "output": "prediction intervals refers to Compute empirical prediction intervals (based on holdout predictions)." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "prediction_intervals", + "output": "prediction intervals refers to Compute prediction intervals: Compute empirical prediction intervals (based on holdout predictions)." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "prediction intervals", + "output": "prediction intervals refers to Compute prediction intervals: Compute empirical prediction intervals (based on holdout predictions)." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Compute prediction intervals: ", + "output": "prediction intervals refers to Compute prediction intervals: Compute empirical prediction intervals (based on holdout predictions)." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting prediction_intervals", + "output": "prediction intervals refers to Compute empirical prediction intervals (based on holdout predictions)." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting prediction_intervals", + "output": "prediction intervals refers to Compute prediction intervals: Compute empirical prediction intervals (based on holdout predictions)." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "prediction_intervals_alpha", + "output": "prediction intervals alpha refers to Confidence level for prediction intervals." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "prediction_intervals_alpha", + "output": "prediction intervals alpha refers to Confidence level for prediction intervals: Confidence level for prediction intervals." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "prediction intervals alpha", + "output": "prediction intervals alpha refers to Confidence level for prediction intervals: Confidence level for prediction intervals." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Confidence level for prediction intervals: ", + "output": "prediction intervals alpha refers to Confidence level for prediction intervals: Confidence level for prediction intervals." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting prediction_intervals_alpha", + "output": "prediction intervals alpha refers to Confidence level for prediction intervals." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting prediction_intervals_alpha", + "output": "prediction intervals alpha refers to Confidence level for prediction intervals: Confidence level for prediction intervals." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "pred_labels", + "output": "pred labels refers to Appends one extra output column with predicted target class (after the per-class probabilities). Uses argmax for multiclass, and the threshold defined by the optimal scorer controlled by the 'threshold_scorer' expert setting for binary problems. This setting controls the training, validation and test set predictions (if applicable) that are created by the experiment. MOJO, scoring pipeline and client APIs control this behavior via their own version of this parameter." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "pred_labels", + "output": "pred labels refers to Output labels for predictions created during the experiment for classification problems.: Appends one extra output column with predicted target class (after the per-class probabilities). Uses argmax for multiclass, and the threshold defined by the optimal scorer controlled by the 'threshold_scorer' expert setting for binary problems. This setting controls the training, validation and test set predictions (if applicable) that are created by the experiment. MOJO, scoring pipeline and client APIs control this behavior via their own version of this parameter." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "pred labels", + "output": "pred labels refers to Output labels for predictions created during the experiment for classification problems.: Appends one extra output column with predicted target class (after the per-class probabilities). Uses argmax for multiclass, and the threshold defined by the optimal scorer controlled by the 'threshold_scorer' expert setting for binary problems. This setting controls the training, validation and test set predictions (if applicable) that are created by the experiment. MOJO, scoring pipeline and client APIs control this behavior via their own version of this parameter." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Output labels for predictions created during the experiment for classification problems.: ", + "output": "pred labels refers to Output labels for predictions created during the experiment for classification problems.: Appends one extra output column with predicted target class (after the per-class probabilities). Uses argmax for multiclass, and the threshold defined by the optimal scorer controlled by the 'threshold_scorer' expert setting for binary problems. This setting controls the training, validation and test set predictions (if applicable) that are created by the experiment. MOJO, scoring pipeline and client APIs control this behavior via their own version of this parameter." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting pred_labels", + "output": "pred labels refers to Appends one extra output column with predicted target class (after the per-class probabilities). Uses argmax for multiclass, and the threshold defined by the optimal scorer controlled by the 'threshold_scorer' expert setting for binary problems. This setting controls the training, validation and test set predictions (if applicable) that are created by the experiment. MOJO, scoring pipeline and client APIs control this behavior via their own version of this parameter." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting pred_labels", + "output": "pred labels refers to Output labels for predictions created during the experiment for classification problems.: Appends one extra output column with predicted target class (after the per-class probabilities). Uses argmax for multiclass, and the threshold defined by the optimal scorer controlled by the 'threshold_scorer' expert setting for binary problems. This setting controls the training, validation and test set predictions (if applicable) that are created by the experiment. MOJO, scoring pipeline and client APIs control this behavior via their own version of this parameter." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "textlin_num_classes_switch", + "output": "textlin num classes switch refers to Class count above which do not use TextLin Transformer." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "textlin_num_classes_switch", + "output": "textlin num classes switch refers to Class count above which do not use TextLin Transformer: Class count above which do not use TextLin Transformer." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "textlin num classes switch", + "output": "textlin num classes switch refers to Class count above which do not use TextLin Transformer: Class count above which do not use TextLin Transformer." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Class count above which do not use TextLin Transformer: ", + "output": "textlin num classes switch refers to Class count above which do not use TextLin Transformer: Class count above which do not use TextLin Transformer." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting textlin_num_classes_switch", + "output": "textlin num classes switch refers to Class count above which do not use TextLin Transformer." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting textlin_num_classes_switch", + "output": "textlin num classes switch refers to Class count above which do not use TextLin Transformer: Class count above which do not use TextLin Transformer." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "text_transformers_max_vocabulary_size", + "output": "text transformers max vocabulary size refers to Max size (in tokens) of the vocabulary created during fitting of Tfidf/Count based text transformers (not CNN/BERT). If multiple values are provided, will use the first one for initial models, and use remaining values during parameter tuning and feature evolution. Values smaller than 10000 are recommended for speed." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "text_transformers_max_vocabulary_size", + "output": "text transformers max vocabulary size refers to Max size of the vocabulary for text transformers.: Max size (in tokens) of the vocabulary created during fitting of Tfidf/Count based text transformers (not CNN/BERT). If multiple values are provided, will use the first one for initial models, and use remaining values during parameter tuning and feature evolution. Values smaller than 10000 are recommended for speed." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "text transformers max vocabulary size", + "output": "text transformers max vocabulary size refers to Max size of the vocabulary for text transformers.: Max size (in tokens) of the vocabulary created during fitting of Tfidf/Count based text transformers (not CNN/BERT). If multiple values are provided, will use the first one for initial models, and use remaining values during parameter tuning and feature evolution. Values smaller than 10000 are recommended for speed." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Max size of the vocabulary for text transformers.: ", + "output": "text transformers max vocabulary size refers to Max size of the vocabulary for text transformers.: Max size (in tokens) of the vocabulary created during fitting of Tfidf/Count based text transformers (not CNN/BERT). If multiple values are provided, will use the first one for initial models, and use remaining values during parameter tuning and feature evolution. Values smaller than 10000 are recommended for speed." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting text_transformers_max_vocabulary_size", + "output": "text transformers max vocabulary size refers to Max size (in tokens) of the vocabulary created during fitting of Tfidf/Count based text transformers (not CNN/BERT). If multiple values are provided, will use the first one for initial models, and use remaining values during parameter tuning and feature evolution. Values smaller than 10000 are recommended for speed." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting text_transformers_max_vocabulary_size", + "output": "text transformers max vocabulary size refers to Max size of the vocabulary for text transformers.: Max size (in tokens) of the vocabulary created during fitting of Tfidf/Count based text transformers (not CNN/BERT). If multiple values are provided, will use the first one for initial models, and use remaining values during parameter tuning and feature evolution. Values smaller than 10000 are recommended for speed." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "number_of_texts_to_cache_in_bert_transformer", + "output": "number of texts to cache in bert transformer refers to Enables caching of BERT embeddings by temporally saving the embedding vectors to the experiment directory. Set to -1 to cache all text, set to 0 to disable caching. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "number_of_texts_to_cache_in_bert_transformer", + "output": "number of texts to cache in bert transformer refers to Enables caching of BERT embeddings by temporally saving the embedding vectors to the experiment directory. Set to -1 to cache all text, set to 0 to disable caching. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "number of texts to cache in bert transformer", + "output": "number of texts to cache in bert transformer refers to Enables caching of BERT embeddings by temporally saving the embedding vectors to the experiment directory. Set to -1 to cache all text, set to 0 to disable caching. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "number of texts to cache in bert transformer refers to Enables caching of BERT embeddings by temporally saving the embedding vectors to the experiment directory. Set to -1 to cache all text, set to 0 to disable caching. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting number_of_texts_to_cache_in_bert_transformer", + "output": "number of texts to cache in bert transformer refers to Enables caching of BERT embeddings by temporally saving the embedding vectors to the experiment directory. Set to -1 to cache all text, set to 0 to disable caching. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting number_of_texts_to_cache_in_bert_transformer", + "output": "number of texts to cache in bert transformer refers to Enables caching of BERT embeddings by temporally saving the embedding vectors to the experiment directory. Set to -1 to cache all text, set to 0 to disable caching. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max_abs_score_delta_train_valid", + "output": "max abs score delta train valid refers to Modify early stopping behavior for tree-based models (LightGBM, XGBoostGBM, CatBoost) such that training score (on training data, not holdout) and validation score differ no more than this absolute value (i.e., stop adding trees once abs(train_score - valid_score) > max_abs_score_delta_train_valid). Keep in mind that the meaning of this value depends on the chosen scorer and the dataset (i.e., 0.01 for LogLoss is different than 0.01 for MSE). Experimental option, only for expert use to keep model complexity low. To disable, set to 0.0" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max_abs_score_delta_train_valid", + "output": "max abs score delta train valid refers to Max. absolute delta between training and validation scores for tree models.: Modify early stopping behavior for tree-based models (LightGBM, XGBoostGBM, CatBoost) such that training score (on training data, not holdout) and validation score differ no more than this absolute value (i.e., stop adding trees once abs(train_score - valid_score) > max_abs_score_delta_train_valid). Keep in mind that the meaning of this value depends on the chosen scorer and the dataset (i.e., 0.01 for LogLoss is different than 0.01 for MSE). Experimental option, only for expert use to keep model complexity low. To disable, set to 0.0" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max abs score delta train valid", + "output": "max abs score delta train valid refers to Max. absolute delta between training and validation scores for tree models.: Modify early stopping behavior for tree-based models (LightGBM, XGBoostGBM, CatBoost) such that training score (on training data, not holdout) and validation score differ no more than this absolute value (i.e., stop adding trees once abs(train_score - valid_score) > max_abs_score_delta_train_valid). Keep in mind that the meaning of this value depends on the chosen scorer and the dataset (i.e., 0.01 for LogLoss is different than 0.01 for MSE). Experimental option, only for expert use to keep model complexity low. To disable, set to 0.0" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Max. absolute delta between training and validation scores for tree models.: ", + "output": "max abs score delta train valid refers to Max. absolute delta between training and validation scores for tree models.: Modify early stopping behavior for tree-based models (LightGBM, XGBoostGBM, CatBoost) such that training score (on training data, not holdout) and validation score differ no more than this absolute value (i.e., stop adding trees once abs(train_score - valid_score) > max_abs_score_delta_train_valid). Keep in mind that the meaning of this value depends on the chosen scorer and the dataset (i.e., 0.01 for LogLoss is different than 0.01 for MSE). Experimental option, only for expert use to keep model complexity low. To disable, set to 0.0" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting max_abs_score_delta_train_valid", + "output": "max abs score delta train valid refers to Modify early stopping behavior for tree-based models (LightGBM, XGBoostGBM, CatBoost) such that training score (on training data, not holdout) and validation score differ no more than this absolute value (i.e., stop adding trees once abs(train_score - valid_score) > max_abs_score_delta_train_valid). Keep in mind that the meaning of this value depends on the chosen scorer and the dataset (i.e., 0.01 for LogLoss is different than 0.01 for MSE). Experimental option, only for expert use to keep model complexity low. To disable, set to 0.0" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting max_abs_score_delta_train_valid", + "output": "max abs score delta train valid refers to Max. absolute delta between training and validation scores for tree models.: Modify early stopping behavior for tree-based models (LightGBM, XGBoostGBM, CatBoost) such that training score (on training data, not holdout) and validation score differ no more than this absolute value (i.e., stop adding trees once abs(train_score - valid_score) > max_abs_score_delta_train_valid). Keep in mind that the meaning of this value depends on the chosen scorer and the dataset (i.e., 0.01 for LogLoss is different than 0.01 for MSE). Experimental option, only for expert use to keep model complexity low. To disable, set to 0.0" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max_rel_score_delta_train_valid", + "output": "max rel score delta train valid refers to Modify early stopping behavior for tree-based models (LightGBM, XGBoostGBM, CatBoost) such that training score (on training data, not holdout) and validation score differ no more than this relative value (i.e., stop adding trees once abs(train_score - valid_score) > max_rel_score_delta_train_valid * abs(train_score)). Keep in mind that the meaning of this value depends on the chosen scorer and the dataset (i.e., 0.01 for LogLoss is different than 0.01 for MSE). Experimental option, only for expert use to keep model complexity low. To disable, set to 0.0" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max_rel_score_delta_train_valid", + "output": "max rel score delta train valid refers to Max. relative delta between training and validation scores for tree models.: Modify early stopping behavior for tree-based models (LightGBM, XGBoostGBM, CatBoost) such that training score (on training data, not holdout) and validation score differ no more than this relative value (i.e., stop adding trees once abs(train_score - valid_score) > max_rel_score_delta_train_valid * abs(train_score)). Keep in mind that the meaning of this value depends on the chosen scorer and the dataset (i.e., 0.01 for LogLoss is different than 0.01 for MSE). Experimental option, only for expert use to keep model complexity low. To disable, set to 0.0" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max rel score delta train valid", + "output": "max rel score delta train valid refers to Max. relative delta between training and validation scores for tree models.: Modify early stopping behavior for tree-based models (LightGBM, XGBoostGBM, CatBoost) such that training score (on training data, not holdout) and validation score differ no more than this relative value (i.e., stop adding trees once abs(train_score - valid_score) > max_rel_score_delta_train_valid * abs(train_score)). Keep in mind that the meaning of this value depends on the chosen scorer and the dataset (i.e., 0.01 for LogLoss is different than 0.01 for MSE). Experimental option, only for expert use to keep model complexity low. To disable, set to 0.0" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Max. relative delta between training and validation scores for tree models.: ", + "output": "max rel score delta train valid refers to Max. relative delta between training and validation scores for tree models.: Modify early stopping behavior for tree-based models (LightGBM, XGBoostGBM, CatBoost) such that training score (on training data, not holdout) and validation score differ no more than this relative value (i.e., stop adding trees once abs(train_score - valid_score) > max_rel_score_delta_train_valid * abs(train_score)). Keep in mind that the meaning of this value depends on the chosen scorer and the dataset (i.e., 0.01 for LogLoss is different than 0.01 for MSE). Experimental option, only for expert use to keep model complexity low. To disable, set to 0.0" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting max_rel_score_delta_train_valid", + "output": "max rel score delta train valid refers to Modify early stopping behavior for tree-based models (LightGBM, XGBoostGBM, CatBoost) such that training score (on training data, not holdout) and validation score differ no more than this relative value (i.e., stop adding trees once abs(train_score - valid_score) > max_rel_score_delta_train_valid * abs(train_score)). Keep in mind that the meaning of this value depends on the chosen scorer and the dataset (i.e., 0.01 for LogLoss is different than 0.01 for MSE). Experimental option, only for expert use to keep model complexity low. To disable, set to 0.0" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting max_rel_score_delta_train_valid", + "output": "max rel score delta train valid refers to Max. relative delta between training and validation scores for tree models.: Modify early stopping behavior for tree-based models (LightGBM, XGBoostGBM, CatBoost) such that training score (on training data, not holdout) and validation score differ no more than this relative value (i.e., stop adding trees once abs(train_score - valid_score) > max_rel_score_delta_train_valid * abs(train_score)). Keep in mind that the meaning of this value depends on the chosen scorer and the dataset (i.e., 0.01 for LogLoss is different than 0.01 for MSE). Experimental option, only for expert use to keep model complexity low. To disable, set to 0.0" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "glm_lambda_search", + "output": "glm lambda search refers to Whether to search for optimal lambda for given alpha for XGBoost GLM. If 'auto', disabled if training data has more rows * cols than final_pipeline_data_size or for multiclass experiments. Disabled always for ensemble_level = 0. Not always a good approach, can be slow for little payoff compared to grid search. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "glm_lambda_search", + "output": "glm lambda search refers to Do lambda search for GLM: Whether to search for optimal lambda for given alpha for XGBoost GLM. If 'auto', disabled if training data has more rows * cols than final_pipeline_data_size or for multiclass experiments. Disabled always for ensemble_level = 0. Not always a good approach, can be slow for little payoff compared to grid search. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "glm lambda search", + "output": "glm lambda search refers to Do lambda search for GLM: Whether to search for optimal lambda for given alpha for XGBoost GLM. If 'auto', disabled if training data has more rows * cols than final_pipeline_data_size or for multiclass experiments. Disabled always for ensemble_level = 0. Not always a good approach, can be slow for little payoff compared to grid search. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Do lambda search for GLM: ", + "output": "glm lambda search refers to Do lambda search for GLM: Whether to search for optimal lambda for given alpha for XGBoost GLM. If 'auto', disabled if training data has more rows * cols than final_pipeline_data_size or for multiclass experiments. Disabled always for ensemble_level = 0. Not always a good approach, can be slow for little payoff compared to grid search. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting glm_lambda_search", + "output": "glm lambda search refers to Whether to search for optimal lambda for given alpha for XGBoost GLM. If 'auto', disabled if training data has more rows * cols than final_pipeline_data_size or for multiclass experiments. Disabled always for ensemble_level = 0. Not always a good approach, can be slow for little payoff compared to grid search. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting glm_lambda_search", + "output": "glm lambda search refers to Do lambda search for GLM: Whether to search for optimal lambda for given alpha for XGBoost GLM. If 'auto', disabled if training data has more rows * cols than final_pipeline_data_size or for multiclass experiments. Disabled always for ensemble_level = 0. Not always a good approach, can be slow for little payoff compared to grid search. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "glm_lambda_search_by_eval_metric", + "output": "glm lambda search by eval metric refers to If XGBoost GLM lambda search is enabled, whether to do search by the eval metric (True) or using the actual DAI scorer (False)." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "glm_lambda_search_by_eval_metric", + "output": "glm lambda search by eval metric refers to Do lambda search for GLM by exact eval metric: If XGBoost GLM lambda search is enabled, whether to do search by the eval metric (True) or using the actual DAI scorer (False)." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "glm lambda search by eval metric", + "output": "glm lambda search by eval metric refers to Do lambda search for GLM by exact eval metric: If XGBoost GLM lambda search is enabled, whether to do search by the eval metric (True) or using the actual DAI scorer (False)." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Do lambda search for GLM by exact eval metric: ", + "output": "glm lambda search by eval metric refers to Do lambda search for GLM by exact eval metric: If XGBoost GLM lambda search is enabled, whether to do search by the eval metric (True) or using the actual DAI scorer (False)." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting glm_lambda_search_by_eval_metric", + "output": "glm lambda search by eval metric refers to If XGBoost GLM lambda search is enabled, whether to do search by the eval metric (True) or using the actual DAI scorer (False)." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting glm_lambda_search_by_eval_metric", + "output": "glm lambda search by eval metric refers to Do lambda search for GLM by exact eval metric: If XGBoost GLM lambda search is enabled, whether to do search by the eval metric (True) or using the actual DAI scorer (False)." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable_early_stopping_threshold", + "output": "enable early stopping threshold refers to Whether to enable early stopping threshold for LightGBM, varying by accuracy. Stops training once validation score changes by less than the threshold. This leads to fewer trees, usually avoiding wasteful trees, but may lower accuracy. However, it may also improve generalization by avoiding fine-tuning to validation set. 0 leads to value of 0 used, i.e. disabled > 0 means non-automatic mode using that *relative* value, scaled by first tree results of the metric for any metric. -1 means always enable, but the threshold itself is automatic (lower the accuracy, the larger the threshold). -2 means fully automatic mode, i.e. disabled unless reduce_mojo_size is true. In true, the lower the accuracy, the larger the threshold. NOTE: Automatic threshold is set so relative value of metric's min_delta in LightGBM's callback for early stopping is: if accuracy <= 1: early_stopping_threshold = 1e-1 elif accuracy <= 4: early_stopping_threshold = 1e-2 elif accuracy <= 7: early_stopping_threshold = 1e-3 elif accuracy <= 9: early_stopping_threshold = 1e-4 else: early_stopping_threshold = 0 " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable_early_stopping_threshold", + "output": "enable early stopping threshold refers to Early stopping threshold: Whether to enable early stopping threshold for LightGBM, varying by accuracy. Stops training once validation score changes by less than the threshold. This leads to fewer trees, usually avoiding wasteful trees, but may lower accuracy. However, it may also improve generalization by avoiding fine-tuning to validation set. 0 leads to value of 0 used, i.e. disabled > 0 means non-automatic mode using that *relative* value, scaled by first tree results of the metric for any metric. -1 means always enable, but the threshold itself is automatic (lower the accuracy, the larger the threshold). -2 means fully automatic mode, i.e. disabled unless reduce_mojo_size is true. In true, the lower the accuracy, the larger the threshold. NOTE: Automatic threshold is set so relative value of metric's min_delta in LightGBM's callback for early stopping is: if accuracy <= 1: early_stopping_threshold = 1e-1 elif accuracy <= 4: early_stopping_threshold = 1e-2 elif accuracy <= 7: early_stopping_threshold = 1e-3 elif accuracy <= 9: early_stopping_threshold = 1e-4 else: early_stopping_threshold = 0 " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable early stopping threshold", + "output": "enable early stopping threshold refers to Early stopping threshold: Whether to enable early stopping threshold for LightGBM, varying by accuracy. Stops training once validation score changes by less than the threshold. This leads to fewer trees, usually avoiding wasteful trees, but may lower accuracy. However, it may also improve generalization by avoiding fine-tuning to validation set. 0 leads to value of 0 used, i.e. disabled > 0 means non-automatic mode using that *relative* value, scaled by first tree results of the metric for any metric. -1 means always enable, but the threshold itself is automatic (lower the accuracy, the larger the threshold). -2 means fully automatic mode, i.e. disabled unless reduce_mojo_size is true. In true, the lower the accuracy, the larger the threshold. NOTE: Automatic threshold is set so relative value of metric's min_delta in LightGBM's callback for early stopping is: if accuracy <= 1: early_stopping_threshold = 1e-1 elif accuracy <= 4: early_stopping_threshold = 1e-2 elif accuracy <= 7: early_stopping_threshold = 1e-3 elif accuracy <= 9: early_stopping_threshold = 1e-4 else: early_stopping_threshold = 0 " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Early stopping threshold: ", + "output": "enable early stopping threshold refers to Early stopping threshold: Whether to enable early stopping threshold for LightGBM, varying by accuracy. Stops training once validation score changes by less than the threshold. This leads to fewer trees, usually avoiding wasteful trees, but may lower accuracy. However, it may also improve generalization by avoiding fine-tuning to validation set. 0 leads to value of 0 used, i.e. disabled > 0 means non-automatic mode using that *relative* value, scaled by first tree results of the metric for any metric. -1 means always enable, but the threshold itself is automatic (lower the accuracy, the larger the threshold). -2 means fully automatic mode, i.e. disabled unless reduce_mojo_size is true. In true, the lower the accuracy, the larger the threshold. NOTE: Automatic threshold is set so relative value of metric's min_delta in LightGBM's callback for early stopping is: if accuracy <= 1: early_stopping_threshold = 1e-1 elif accuracy <= 4: early_stopping_threshold = 1e-2 elif accuracy <= 7: early_stopping_threshold = 1e-3 elif accuracy <= 9: early_stopping_threshold = 1e-4 else: early_stopping_threshold = 0 " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting enable_early_stopping_threshold", + "output": "enable early stopping threshold refers to Whether to enable early stopping threshold for LightGBM, varying by accuracy. Stops training once validation score changes by less than the threshold. This leads to fewer trees, usually avoiding wasteful trees, but may lower accuracy. However, it may also improve generalization by avoiding fine-tuning to validation set. 0 leads to value of 0 used, i.e. disabled > 0 means non-automatic mode using that *relative* value, scaled by first tree results of the metric for any metric. -1 means always enable, but the threshold itself is automatic (lower the accuracy, the larger the threshold). -2 means fully automatic mode, i.e. disabled unless reduce_mojo_size is true. In true, the lower the accuracy, the larger the threshold. NOTE: Automatic threshold is set so relative value of metric's min_delta in LightGBM's callback for early stopping is: if accuracy <= 1: early_stopping_threshold = 1e-1 elif accuracy <= 4: early_stopping_threshold = 1e-2 elif accuracy <= 7: early_stopping_threshold = 1e-3 elif accuracy <= 9: early_stopping_threshold = 1e-4 else: early_stopping_threshold = 0 " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting enable_early_stopping_threshold", + "output": "enable early stopping threshold refers to Early stopping threshold: Whether to enable early stopping threshold for LightGBM, varying by accuracy. Stops training once validation score changes by less than the threshold. This leads to fewer trees, usually avoiding wasteful trees, but may lower accuracy. However, it may also improve generalization by avoiding fine-tuning to validation set. 0 leads to value of 0 used, i.e. disabled > 0 means non-automatic mode using that *relative* value, scaled by first tree results of the metric for any metric. -1 means always enable, but the threshold itself is automatic (lower the accuracy, the larger the threshold). -2 means fully automatic mode, i.e. disabled unless reduce_mojo_size is true. In true, the lower the accuracy, the larger the threshold. NOTE: Automatic threshold is set so relative value of metric's min_delta in LightGBM's callback for early stopping is: if accuracy <= 1: early_stopping_threshold = 1e-1 elif accuracy <= 4: early_stopping_threshold = 1e-2 elif accuracy <= 7: early_stopping_threshold = 1e-3 elif accuracy <= 9: early_stopping_threshold = 1e-4 else: early_stopping_threshold = 0 " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max_varimp_to_save", + "output": "max varimp to save refers to Max. number of top variable importances to save per iteration (GUI can only display a max. of 14)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max_varimp_to_save", + "output": "max varimp to save refers to Max. number of top variable importances to save per iteration (GUI can only display a max. of 14)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max varimp to save", + "output": "max varimp to save refers to Max. number of top variable importances to save per iteration (GUI can only display a max. of 14)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "max varimp to save refers to Max. number of top variable importances to save per iteration (GUI can only display a max. of 14)" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting max_varimp_to_save", + "output": "max varimp to save refers to Max. number of top variable importances to save per iteration (GUI can only display a max. of 14)" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting max_varimp_to_save", + "output": "max varimp to save refers to Max. number of top variable importances to save per iteration (GUI can only display a max. of 14)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max_num_varimp_to_log", + "output": "max num varimp to log refers to Max. number of top variable importances to show in logs during feature evolution" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max_num_varimp_to_log", + "output": "max num varimp to log refers to Max. number of top variable importances to show in logs during feature evolution" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max num varimp to log", + "output": "max num varimp to log refers to Max. number of top variable importances to show in logs during feature evolution" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "max num varimp to log refers to Max. number of top variable importances to show in logs during feature evolution" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting max_num_varimp_to_log", + "output": "max num varimp to log refers to Max. number of top variable importances to show in logs during feature evolution" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting max_num_varimp_to_log", + "output": "max num varimp to log refers to Max. number of top variable importances to show in logs during feature evolution" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max_num_varimp_shift_to_log", + "output": "max num varimp shift to log refers to Max. number of top variable importance shifts to show in logs and GUI after final model built" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max_num_varimp_shift_to_log", + "output": "max num varimp shift to log refers to Max. number of top variable importance shifts to show in logs and GUI after final model built" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max num varimp shift to log", + "output": "max num varimp shift to log refers to Max. number of top variable importance shifts to show in logs and GUI after final model built" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "max num varimp shift to log refers to Max. number of top variable importance shifts to show in logs and GUI after final model built" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting max_num_varimp_shift_to_log", + "output": "max num varimp shift to log refers to Max. number of top variable importance shifts to show in logs and GUI after final model built" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting max_num_varimp_shift_to_log", + "output": "max num varimp shift to log refers to Max. number of top variable importance shifts to show in logs and GUI after final model built" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "skip_transformer_failures", + "output": "skip transformer failures refers to Skipping just avoids the failed transformer.Sometimes python multiprocessing swallows exceptions,so skipping and logging exceptions is also more reliable way to handle them.Recipe can raise h2oaicore.systemutils.IgnoreError to ignore error and avoid logging error.Features that fail are pruned from the individual.If that leaves no features in the individual, then backend tuning, feature/model tuning, final model building, etc.will still fail since DAI should not continue if all features are from a failed state. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "skip_transformer_failures", + "output": "skip transformer failures refers to Whether to skip failures of transformers: Skipping just avoids the failed transformer.Sometimes python multiprocessing swallows exceptions,so skipping and logging exceptions is also more reliable way to handle them.Recipe can raise h2oaicore.systemutils.IgnoreError to ignore error and avoid logging error.Features that fail are pruned from the individual.If that leaves no features in the individual, then backend tuning, feature/model tuning, final model building, etc.will still fail since DAI should not continue if all features are from a failed state. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "skip transformer failures", + "output": "skip transformer failures refers to Whether to skip failures of transformers: Skipping just avoids the failed transformer.Sometimes python multiprocessing swallows exceptions,so skipping and logging exceptions is also more reliable way to handle them.Recipe can raise h2oaicore.systemutils.IgnoreError to ignore error and avoid logging error.Features that fail are pruned from the individual.If that leaves no features in the individual, then backend tuning, feature/model tuning, final model building, etc.will still fail since DAI should not continue if all features are from a failed state. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Whether to skip failures of transformers: ", + "output": "skip transformer failures refers to Whether to skip failures of transformers: Skipping just avoids the failed transformer.Sometimes python multiprocessing swallows exceptions,so skipping and logging exceptions is also more reliable way to handle them.Recipe can raise h2oaicore.systemutils.IgnoreError to ignore error and avoid logging error.Features that fail are pruned from the individual.If that leaves no features in the individual, then backend tuning, feature/model tuning, final model building, etc.will still fail since DAI should not continue if all features are from a failed state. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting skip_transformer_failures", + "output": "skip transformer failures refers to Skipping just avoids the failed transformer.Sometimes python multiprocessing swallows exceptions,so skipping and logging exceptions is also more reliable way to handle them.Recipe can raise h2oaicore.systemutils.IgnoreError to ignore error and avoid logging error.Features that fail are pruned from the individual.If that leaves no features in the individual, then backend tuning, feature/model tuning, final model building, etc.will still fail since DAI should not continue if all features are from a failed state. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting skip_transformer_failures", + "output": "skip transformer failures refers to Whether to skip failures of transformers: Skipping just avoids the failed transformer.Sometimes python multiprocessing swallows exceptions,so skipping and logging exceptions is also more reliable way to handle them.Recipe can raise h2oaicore.systemutils.IgnoreError to ignore error and avoid logging error.Features that fail are pruned from the individual.If that leaves no features in the individual, then backend tuning, feature/model tuning, final model building, etc.will still fail since DAI should not continue if all features are from a failed state. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "skip_model_failures", + "output": "skip model failures refers to Skipping just avoids the failed model. Failures are logged depending upon detailed_skip_failure_messages_level.\"Recipe can raise h2oaicore.systemutils.IgnoreError to ignore error and avoid logging error. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "skip_model_failures", + "output": "skip model failures refers to Whether to skip failures of models: Skipping just avoids the failed model. Failures are logged depending upon detailed_skip_failure_messages_level.\"Recipe can raise h2oaicore.systemutils.IgnoreError to ignore error and avoid logging error. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "skip model failures", + "output": "skip model failures refers to Whether to skip failures of models: Skipping just avoids the failed model. Failures are logged depending upon detailed_skip_failure_messages_level.\"Recipe can raise h2oaicore.systemutils.IgnoreError to ignore error and avoid logging error. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Whether to skip failures of models: ", + "output": "skip model failures refers to Whether to skip failures of models: Skipping just avoids the failed model. Failures are logged depending upon detailed_skip_failure_messages_level.\"Recipe can raise h2oaicore.systemutils.IgnoreError to ignore error and avoid logging error. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting skip_model_failures", + "output": "skip model failures refers to Skipping just avoids the failed model. Failures are logged depending upon detailed_skip_failure_messages_level.\"Recipe can raise h2oaicore.systemutils.IgnoreError to ignore error and avoid logging error. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting skip_model_failures", + "output": "skip model failures refers to Whether to skip failures of models: Skipping just avoids the failed model. Failures are logged depending upon detailed_skip_failure_messages_level.\"Recipe can raise h2oaicore.systemutils.IgnoreError to ignore error and avoid logging error. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "skip_scorer_failures", + "output": "skip scorer failures refers to Skipping just avoids the failed scorer if among many scorers. Failures are logged depending upon detailed_skip_failure_messages_level.\"Recipe can raise h2oaicore.systemutils.IgnoreError to ignore error and avoid logging error.Default is True to avoid failing in, e.g., final model building due to a single scorer. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "skip_scorer_failures", + "output": "skip scorer failures refers to Whether to skip failures of scorers: Skipping just avoids the failed scorer if among many scorers. Failures are logged depending upon detailed_skip_failure_messages_level.\"Recipe can raise h2oaicore.systemutils.IgnoreError to ignore error and avoid logging error.Default is True to avoid failing in, e.g., final model building due to a single scorer. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "skip scorer failures", + "output": "skip scorer failures refers to Whether to skip failures of scorers: Skipping just avoids the failed scorer if among many scorers. Failures are logged depending upon detailed_skip_failure_messages_level.\"Recipe can raise h2oaicore.systemutils.IgnoreError to ignore error and avoid logging error.Default is True to avoid failing in, e.g., final model building due to a single scorer. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Whether to skip failures of scorers: ", + "output": "skip scorer failures refers to Whether to skip failures of scorers: Skipping just avoids the failed scorer if among many scorers. Failures are logged depending upon detailed_skip_failure_messages_level.\"Recipe can raise h2oaicore.systemutils.IgnoreError to ignore error and avoid logging error.Default is True to avoid failing in, e.g., final model building due to a single scorer. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting skip_scorer_failures", + "output": "skip scorer failures refers to Skipping just avoids the failed scorer if among many scorers. Failures are logged depending upon detailed_skip_failure_messages_level.\"Recipe can raise h2oaicore.systemutils.IgnoreError to ignore error and avoid logging error.Default is True to avoid failing in, e.g., final model building due to a single scorer. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting skip_scorer_failures", + "output": "skip scorer failures refers to Whether to skip failures of scorers: Skipping just avoids the failed scorer if among many scorers. Failures are logged depending upon detailed_skip_failure_messages_level.\"Recipe can raise h2oaicore.systemutils.IgnoreError to ignore error and avoid logging error.Default is True to avoid failing in, e.g., final model building due to a single scorer. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "skip_data_recipe_failures", + "output": "skip data recipe failures refers to Skipping avoids the failed recipe. Failures are logged depending upon detailed_skip_failure_messages_level.\"Default is False because runtime data recipes are one-time at start of experiment and expected to work by default. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "skip_data_recipe_failures", + "output": "skip data recipe failures refers to Whether to skip runtime data recipe failures: Skipping avoids the failed recipe. Failures are logged depending upon detailed_skip_failure_messages_level.\"Default is False because runtime data recipes are one-time at start of experiment and expected to work by default. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "skip data recipe failures", + "output": "skip data recipe failures refers to Whether to skip runtime data recipe failures: Skipping avoids the failed recipe. Failures are logged depending upon detailed_skip_failure_messages_level.\"Default is False because runtime data recipes are one-time at start of experiment and expected to work by default. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Whether to skip runtime data recipe failures: ", + "output": "skip data recipe failures refers to Whether to skip runtime data recipe failures: Skipping avoids the failed recipe. Failures are logged depending upon detailed_skip_failure_messages_level.\"Default is False because runtime data recipes are one-time at start of experiment and expected to work by default. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting skip_data_recipe_failures", + "output": "skip data recipe failures refers to Skipping avoids the failed recipe. Failures are logged depending upon detailed_skip_failure_messages_level.\"Default is False because runtime data recipes are one-time at start of experiment and expected to work by default. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting skip_data_recipe_failures", + "output": "skip data recipe failures refers to Whether to skip runtime data recipe failures: Skipping avoids the failed recipe. Failures are logged depending upon detailed_skip_failure_messages_level.\"Default is False because runtime data recipes are one-time at start of experiment and expected to work by default. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "can_skip_final_upper_layer_failures", + "output": "can skip final upper layer failures refers to Whether can skip final model transformer failures for layer > first layer for multi-layer pipeline." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "can_skip_final_upper_layer_failures", + "output": "can skip final upper layer failures refers to Whether can skip final model transformer failures for layer > first layer for multi-layer pipeline." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "can skip final upper layer failures", + "output": "can skip final upper layer failures refers to Whether can skip final model transformer failures for layer > first layer for multi-layer pipeline." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "can skip final upper layer failures refers to Whether can skip final model transformer failures for layer > first layer for multi-layer pipeline." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting can_skip_final_upper_layer_failures", + "output": "can skip final upper layer failures refers to Whether can skip final model transformer failures for layer > first layer for multi-layer pipeline." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting can_skip_final_upper_layer_failures", + "output": "can skip final upper layer failures refers to Whether can skip final model transformer failures for layer > first layer for multi-layer pipeline." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "detailed_skip_failure_messages_level", + "output": "detailed skip failure messages level refers to How much verbosity to log failure messages for failed and then skipped transformers or models. Full failures always go to disk as *.stack files, which upon completion of experiment goes into details folder within experiment log zip file. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "detailed_skip_failure_messages_level", + "output": "detailed skip failure messages level refers to Level to log (0=simple message 1=code line plus message 2=detailed stack traces) for skipped failures.: How much verbosity to log failure messages for failed and then skipped transformers or models. Full failures always go to disk as *.stack files, which upon completion of experiment goes into details folder within experiment log zip file. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "detailed skip failure messages level", + "output": "detailed skip failure messages level refers to Level to log (0=simple message 1=code line plus message 2=detailed stack traces) for skipped failures.: How much verbosity to log failure messages for failed and then skipped transformers or models. Full failures always go to disk as *.stack files, which upon completion of experiment goes into details folder within experiment log zip file. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Level to log (0=simple message 1=code line plus message 2=detailed stack traces) for skipped failures.: ", + "output": "detailed skip failure messages level refers to Level to log (0=simple message 1=code line plus message 2=detailed stack traces) for skipped failures.: How much verbosity to log failure messages for failed and then skipped transformers or models. Full failures always go to disk as *.stack files, which upon completion of experiment goes into details folder within experiment log zip file. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting detailed_skip_failure_messages_level", + "output": "detailed skip failure messages level refers to How much verbosity to log failure messages for failed and then skipped transformers or models. Full failures always go to disk as *.stack files, which upon completion of experiment goes into details folder within experiment log zip file. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting detailed_skip_failure_messages_level", + "output": "detailed skip failure messages level refers to Level to log (0=simple message 1=code line plus message 2=detailed stack traces) for skipped failures.: How much verbosity to log failure messages for failed and then skipped transformers or models. Full failures always go to disk as *.stack files, which upon completion of experiment goes into details folder within experiment log zip file. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "notify_failures", + "output": "notify failures refers to Whether to not just log errors of recipes (models and transformers) but also show high-level notification in GUI. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "notify_failures", + "output": "notify failures refers to Whether to notify about failures of transformers or models or other recipe failures: Whether to not just log errors of recipes (models and transformers) but also show high-level notification in GUI. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "notify failures", + "output": "notify failures refers to Whether to notify about failures of transformers or models or other recipe failures: Whether to not just log errors of recipes (models and transformers) but also show high-level notification in GUI. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Whether to notify about failures of transformers or models or other recipe failures: ", + "output": "notify failures refers to Whether to notify about failures of transformers or models or other recipe failures: Whether to not just log errors of recipes (models and transformers) but also show high-level notification in GUI. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting notify_failures", + "output": "notify failures refers to Whether to not just log errors of recipes (models and transformers) but also show high-level notification in GUI. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting notify_failures", + "output": "notify failures refers to Whether to notify about failures of transformers or models or other recipe failures: Whether to not just log errors of recipes (models and transformers) but also show high-level notification in GUI. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "config_overrides", + "output": "config overrides refers to Instructions for 'Add to config.toml via toml string' in GUI expert pageSelf-referential toml parameter, for setting any other toml parameters as string of tomls separated by (spaces around are ok).Useful when toml parameter is not in expert mode but want per-experiment control.Setting this will override all other choices.In expert page, each time expert options saved, the new state is set without memory of any prior settings.The entered item is a fully compliant toml string that would be processed directly by toml.load().One should include 2 double quotes around the entire setting, or double quotes need to be escaped.One enters into the expert page text as follows:e.g. ``enable_glm=\"off\" enable_xgboost_gbm=\"off\" enable_lightgbm=\"on\"``e.g. ``\"\"enable_glm=\"off\" enable_xgboost_gbm=\"off\" enable_lightgbm=\"off\" enable_tensorflow=\"on\"\"\"``e.g. ``fixed_num_individuals=4``e.g. ``params_lightgbm=\"{'objective':'poisson'}\"``e.g. ``\"\"params_lightgbm=\"{'objective':'poisson'}\"\"\"``e.g. ``max_cores=10 data_precision=\"float32\" max_rows_feature_evolution=50000000000 ensemble_accuracy_switch=11 feature_engineering_effort=1 target_transformer=\"identity\" tournament_feature_style_accuracy_switch=5 params_tensorflow=\"{'layers': (100, 100, 100, 100, 100, 100)}\"``e.g. \"\"max_cores=10 data_precision=\"float32\" max_rows_feature_evolution=50000000000 ensemble_accuracy_switch=11 feature_engineering_effort=1 target_transformer=\"identity\" tournament_feature_style_accuracy_switch=5 params_tensorflow=\"{'layers': (100, 100, 100, 100, 100, 100)}\"\"\"If you see: \"toml.TomlDecodeError\" then ensure toml is set correctly.When set in the expert page of an experiment, these changes only affect experiments and not the serverUsually should keep this as empty string in this toml file. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "config_overrides", + "output": "config overrides refers to Add to config.toml via toml string: Instructions for 'Add to config.toml via toml string' in GUI expert pageSelf-referential toml parameter, for setting any other toml parameters as string of tomls separated by (spaces around are ok).Useful when toml parameter is not in expert mode but want per-experiment control.Setting this will override all other choices.In expert page, each time expert options saved, the new state is set without memory of any prior settings.The entered item is a fully compliant toml string that would be processed directly by toml.load().One should include 2 double quotes around the entire setting, or double quotes need to be escaped.One enters into the expert page text as follows:e.g. ``enable_glm=\"off\" enable_xgboost_gbm=\"off\" enable_lightgbm=\"on\"``e.g. ``\"\"enable_glm=\"off\" enable_xgboost_gbm=\"off\" enable_lightgbm=\"off\" enable_tensorflow=\"on\"\"\"``e.g. ``fixed_num_individuals=4``e.g. ``params_lightgbm=\"{'objective':'poisson'}\"``e.g. ``\"\"params_lightgbm=\"{'objective':'poisson'}\"\"\"``e.g. ``max_cores=10 data_precision=\"float32\" max_rows_feature_evolution=50000000000 ensemble_accuracy_switch=11 feature_engineering_effort=1 target_transformer=\"identity\" tournament_feature_style_accuracy_switch=5 params_tensorflow=\"{'layers': (100, 100, 100, 100, 100, 100)}\"``e.g. \"\"max_cores=10 data_precision=\"float32\" max_rows_feature_evolution=50000000000 ensemble_accuracy_switch=11 feature_engineering_effort=1 target_transformer=\"identity\" tournament_feature_style_accuracy_switch=5 params_tensorflow=\"{'layers': (100, 100, 100, 100, 100, 100)}\"\"\"If you see: \"toml.TomlDecodeError\" then ensure toml is set correctly.When set in the expert page of an experiment, these changes only affect experiments and not the serverUsually should keep this as empty string in this toml file. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "config overrides", + "output": "config overrides refers to Add to config.toml via toml string: Instructions for 'Add to config.toml via toml string' in GUI expert pageSelf-referential toml parameter, for setting any other toml parameters as string of tomls separated by (spaces around are ok).Useful when toml parameter is not in expert mode but want per-experiment control.Setting this will override all other choices.In expert page, each time expert options saved, the new state is set without memory of any prior settings.The entered item is a fully compliant toml string that would be processed directly by toml.load().One should include 2 double quotes around the entire setting, or double quotes need to be escaped.One enters into the expert page text as follows:e.g. ``enable_glm=\"off\" enable_xgboost_gbm=\"off\" enable_lightgbm=\"on\"``e.g. ``\"\"enable_glm=\"off\" enable_xgboost_gbm=\"off\" enable_lightgbm=\"off\" enable_tensorflow=\"on\"\"\"``e.g. ``fixed_num_individuals=4``e.g. ``params_lightgbm=\"{'objective':'poisson'}\"``e.g. ``\"\"params_lightgbm=\"{'objective':'poisson'}\"\"\"``e.g. ``max_cores=10 data_precision=\"float32\" max_rows_feature_evolution=50000000000 ensemble_accuracy_switch=11 feature_engineering_effort=1 target_transformer=\"identity\" tournament_feature_style_accuracy_switch=5 params_tensorflow=\"{'layers': (100, 100, 100, 100, 100, 100)}\"``e.g. \"\"max_cores=10 data_precision=\"float32\" max_rows_feature_evolution=50000000000 ensemble_accuracy_switch=11 feature_engineering_effort=1 target_transformer=\"identity\" tournament_feature_style_accuracy_switch=5 params_tensorflow=\"{'layers': (100, 100, 100, 100, 100, 100)}\"\"\"If you see: \"toml.TomlDecodeError\" then ensure toml is set correctly.When set in the expert page of an experiment, these changes only affect experiments and not the serverUsually should keep this as empty string in this toml file. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Add to config.toml via toml string: ", + "output": "config overrides refers to Add to config.toml via toml string: Instructions for 'Add to config.toml via toml string' in GUI expert pageSelf-referential toml parameter, for setting any other toml parameters as string of tomls separated by (spaces around are ok).Useful when toml parameter is not in expert mode but want per-experiment control.Setting this will override all other choices.In expert page, each time expert options saved, the new state is set without memory of any prior settings.The entered item is a fully compliant toml string that would be processed directly by toml.load().One should include 2 double quotes around the entire setting, or double quotes need to be escaped.One enters into the expert page text as follows:e.g. ``enable_glm=\"off\" enable_xgboost_gbm=\"off\" enable_lightgbm=\"on\"``e.g. ``\"\"enable_glm=\"off\" enable_xgboost_gbm=\"off\" enable_lightgbm=\"off\" enable_tensorflow=\"on\"\"\"``e.g. ``fixed_num_individuals=4``e.g. ``params_lightgbm=\"{'objective':'poisson'}\"``e.g. ``\"\"params_lightgbm=\"{'objective':'poisson'}\"\"\"``e.g. ``max_cores=10 data_precision=\"float32\" max_rows_feature_evolution=50000000000 ensemble_accuracy_switch=11 feature_engineering_effort=1 target_transformer=\"identity\" tournament_feature_style_accuracy_switch=5 params_tensorflow=\"{'layers': (100, 100, 100, 100, 100, 100)}\"``e.g. \"\"max_cores=10 data_precision=\"float32\" max_rows_feature_evolution=50000000000 ensemble_accuracy_switch=11 feature_engineering_effort=1 target_transformer=\"identity\" tournament_feature_style_accuracy_switch=5 params_tensorflow=\"{'layers': (100, 100, 100, 100, 100, 100)}\"\"\"If you see: \"toml.TomlDecodeError\" then ensure toml is set correctly.When set in the expert page of an experiment, these changes only affect experiments and not the serverUsually should keep this as empty string in this toml file. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting config_overrides", + "output": "config overrides refers to Instructions for 'Add to config.toml via toml string' in GUI expert pageSelf-referential toml parameter, for setting any other toml parameters as string of tomls separated by (spaces around are ok).Useful when toml parameter is not in expert mode but want per-experiment control.Setting this will override all other choices.In expert page, each time expert options saved, the new state is set without memory of any prior settings.The entered item is a fully compliant toml string that would be processed directly by toml.load().One should include 2 double quotes around the entire setting, or double quotes need to be escaped.One enters into the expert page text as follows:e.g. ``enable_glm=\"off\" enable_xgboost_gbm=\"off\" enable_lightgbm=\"on\"``e.g. ``\"\"enable_glm=\"off\" enable_xgboost_gbm=\"off\" enable_lightgbm=\"off\" enable_tensorflow=\"on\"\"\"``e.g. ``fixed_num_individuals=4``e.g. ``params_lightgbm=\"{'objective':'poisson'}\"``e.g. ``\"\"params_lightgbm=\"{'objective':'poisson'}\"\"\"``e.g. ``max_cores=10 data_precision=\"float32\" max_rows_feature_evolution=50000000000 ensemble_accuracy_switch=11 feature_engineering_effort=1 target_transformer=\"identity\" tournament_feature_style_accuracy_switch=5 params_tensorflow=\"{'layers': (100, 100, 100, 100, 100, 100)}\"``e.g. \"\"max_cores=10 data_precision=\"float32\" max_rows_feature_evolution=50000000000 ensemble_accuracy_switch=11 feature_engineering_effort=1 target_transformer=\"identity\" tournament_feature_style_accuracy_switch=5 params_tensorflow=\"{'layers': (100, 100, 100, 100, 100, 100)}\"\"\"If you see: \"toml.TomlDecodeError\" then ensure toml is set correctly.When set in the expert page of an experiment, these changes only affect experiments and not the serverUsually should keep this as empty string in this toml file. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting config_overrides", + "output": "config overrides refers to Add to config.toml via toml string: Instructions for 'Add to config.toml via toml string' in GUI expert pageSelf-referential toml parameter, for setting any other toml parameters as string of tomls separated by (spaces around are ok).Useful when toml parameter is not in expert mode but want per-experiment control.Setting this will override all other choices.In expert page, each time expert options saved, the new state is set without memory of any prior settings.The entered item is a fully compliant toml string that would be processed directly by toml.load().One should include 2 double quotes around the entire setting, or double quotes need to be escaped.One enters into the expert page text as follows:e.g. ``enable_glm=\"off\" enable_xgboost_gbm=\"off\" enable_lightgbm=\"on\"``e.g. ``\"\"enable_glm=\"off\" enable_xgboost_gbm=\"off\" enable_lightgbm=\"off\" enable_tensorflow=\"on\"\"\"``e.g. ``fixed_num_individuals=4``e.g. ``params_lightgbm=\"{'objective':'poisson'}\"``e.g. ``\"\"params_lightgbm=\"{'objective':'poisson'}\"\"\"``e.g. ``max_cores=10 data_precision=\"float32\" max_rows_feature_evolution=50000000000 ensemble_accuracy_switch=11 feature_engineering_effort=1 target_transformer=\"identity\" tournament_feature_style_accuracy_switch=5 params_tensorflow=\"{'layers': (100, 100, 100, 100, 100, 100)}\"``e.g. \"\"max_cores=10 data_precision=\"float32\" max_rows_feature_evolution=50000000000 ensemble_accuracy_switch=11 feature_engineering_effort=1 target_transformer=\"identity\" tournament_feature_style_accuracy_switch=5 params_tensorflow=\"{'layers': (100, 100, 100, 100, 100, 100)}\"\"\"If you see: \"toml.TomlDecodeError\" then ensure toml is set correctly.When set in the expert page of an experiment, these changes only affect experiments and not the serverUsually should keep this as empty string in this toml file. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "dump_varimp_every_scored_indiv", + "output": "dump varimp every scored indiv refers to Whether to dump every scored individual's variable importance to csv/tabulated/json file produces files like:individual_scored_id%d.iter%d..features.txt for transformed features.individual_scored_id%d.iter%d..features_orig.txt for original features.individual_scored_id%d.iter%d..coefs.txt for absolute importance of transformed features.There are txt, tab.txt, and json formats for some files, and \"best_\" prefix means it is the best individual for that iterationThe hash in the name matches the hash in the files produced by dump_modelparams_every_scored_indiv=true that can be used to track mutation history." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "dump_varimp_every_scored_indiv", + "output": "dump varimp every scored indiv refers to Enable detailed scored features info: Whether to dump every scored individual's variable importance to csv/tabulated/json file produces files like:individual_scored_id%d.iter%d..features.txt for transformed features.individual_scored_id%d.iter%d..features_orig.txt for original features.individual_scored_id%d.iter%d..coefs.txt for absolute importance of transformed features.There are txt, tab.txt, and json formats for some files, and \"best_\" prefix means it is the best individual for that iterationThe hash in the name matches the hash in the files produced by dump_modelparams_every_scored_indiv=true that can be used to track mutation history." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "dump varimp every scored indiv", + "output": "dump varimp every scored indiv refers to Enable detailed scored features info: Whether to dump every scored individual's variable importance to csv/tabulated/json file produces files like:individual_scored_id%d.iter%d..features.txt for transformed features.individual_scored_id%d.iter%d..features_orig.txt for original features.individual_scored_id%d.iter%d..coefs.txt for absolute importance of transformed features.There are txt, tab.txt, and json formats for some files, and \"best_\" prefix means it is the best individual for that iterationThe hash in the name matches the hash in the files produced by dump_modelparams_every_scored_indiv=true that can be used to track mutation history." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Enable detailed scored features info: ", + "output": "dump varimp every scored indiv refers to Enable detailed scored features info: Whether to dump every scored individual's variable importance to csv/tabulated/json file produces files like:individual_scored_id%d.iter%d..features.txt for transformed features.individual_scored_id%d.iter%d..features_orig.txt for original features.individual_scored_id%d.iter%d..coefs.txt for absolute importance of transformed features.There are txt, tab.txt, and json formats for some files, and \"best_\" prefix means it is the best individual for that iterationThe hash in the name matches the hash in the files produced by dump_modelparams_every_scored_indiv=true that can be used to track mutation history." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting dump_varimp_every_scored_indiv", + "output": "dump varimp every scored indiv refers to Whether to dump every scored individual's variable importance to csv/tabulated/json file produces files like:individual_scored_id%d.iter%d..features.txt for transformed features.individual_scored_id%d.iter%d..features_orig.txt for original features.individual_scored_id%d.iter%d..coefs.txt for absolute importance of transformed features.There are txt, tab.txt, and json formats for some files, and \"best_\" prefix means it is the best individual for that iterationThe hash in the name matches the hash in the files produced by dump_modelparams_every_scored_indiv=true that can be used to track mutation history." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting dump_varimp_every_scored_indiv", + "output": "dump varimp every scored indiv refers to Enable detailed scored features info: Whether to dump every scored individual's variable importance to csv/tabulated/json file produces files like:individual_scored_id%d.iter%d..features.txt for transformed features.individual_scored_id%d.iter%d..features_orig.txt for original features.individual_scored_id%d.iter%d..coefs.txt for absolute importance of transformed features.There are txt, tab.txt, and json formats for some files, and \"best_\" prefix means it is the best individual for that iterationThe hash in the name matches the hash in the files produced by dump_modelparams_every_scored_indiv=true that can be used to track mutation history." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "dump_modelparams_every_scored_indiv", + "output": "dump modelparams every scored indiv refers to Whether to dump every scored individual's model parameters to csv/tabulated/json fileproduces files like: individual_scored.params.[txt, csv, json].Each individual has a hash that matches the hash in the filenames produced if dump_varimp_every_scored_indiv=true,and the \"unchanging hash\" is the first parent hash (None if that individual is the first parent itself).These hashes can be used to track the history of the mutations. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "dump_modelparams_every_scored_indiv", + "output": "dump modelparams every scored indiv refers to Enable detailed scored model info: Whether to dump every scored individual's model parameters to csv/tabulated/json fileproduces files like: individual_scored.params.[txt, csv, json].Each individual has a hash that matches the hash in the filenames produced if dump_varimp_every_scored_indiv=true,and the \"unchanging hash\" is the first parent hash (None if that individual is the first parent itself).These hashes can be used to track the history of the mutations. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "dump modelparams every scored indiv", + "output": "dump modelparams every scored indiv refers to Enable detailed scored model info: Whether to dump every scored individual's model parameters to csv/tabulated/json fileproduces files like: individual_scored.params.[txt, csv, json].Each individual has a hash that matches the hash in the filenames produced if dump_varimp_every_scored_indiv=true,and the \"unchanging hash\" is the first parent hash (None if that individual is the first parent itself).These hashes can be used to track the history of the mutations. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Enable detailed scored model info: ", + "output": "dump modelparams every scored indiv refers to Enable detailed scored model info: Whether to dump every scored individual's model parameters to csv/tabulated/json fileproduces files like: individual_scored.params.[txt, csv, json].Each individual has a hash that matches the hash in the filenames produced if dump_varimp_every_scored_indiv=true,and the \"unchanging hash\" is the first parent hash (None if that individual is the first parent itself).These hashes can be used to track the history of the mutations. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting dump_modelparams_every_scored_indiv", + "output": "dump modelparams every scored indiv refers to Whether to dump every scored individual's model parameters to csv/tabulated/json fileproduces files like: individual_scored.params.[txt, csv, json].Each individual has a hash that matches the hash in the filenames produced if dump_varimp_every_scored_indiv=true,and the \"unchanging hash\" is the first parent hash (None if that individual is the first parent itself).These hashes can be used to track the history of the mutations. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting dump_modelparams_every_scored_indiv", + "output": "dump modelparams every scored indiv refers to Enable detailed scored model info: Whether to dump every scored individual's model parameters to csv/tabulated/json fileproduces files like: individual_scored.params.[txt, csv, json].Each individual has a hash that matches the hash in the filenames produced if dump_varimp_every_scored_indiv=true,and the \"unchanging hash\" is the first parent hash (None if that individual is the first parent itself).These hashes can be used to track the history of the mutations. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "dump_modelparams_every_scored_indiv_feature_count", + "output": "dump modelparams every scored indiv feature count refers to Number of features to show in model dump every scored individual" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "dump_modelparams_every_scored_indiv_feature_count", + "output": "dump modelparams every scored indiv feature count refers to Number of features to show in model dump every scored individual" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "dump modelparams every scored indiv feature count", + "output": "dump modelparams every scored indiv feature count refers to Number of features to show in model dump every scored individual" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "dump modelparams every scored indiv feature count refers to Number of features to show in model dump every scored individual" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting dump_modelparams_every_scored_indiv_feature_count", + "output": "dump modelparams every scored indiv feature count refers to Number of features to show in model dump every scored individual" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting dump_modelparams_every_scored_indiv_feature_count", + "output": "dump modelparams every scored indiv feature count refers to Number of features to show in model dump every scored individual" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "dump_modelparams_every_scored_indiv_mutation_count", + "output": "dump modelparams every scored indiv mutation count refers to Number of past mutations to show in model dump every scored individual" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "dump_modelparams_every_scored_indiv_mutation_count", + "output": "dump modelparams every scored indiv mutation count refers to Number of past mutations to show in model dump every scored individual" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "dump modelparams every scored indiv mutation count", + "output": "dump modelparams every scored indiv mutation count refers to Number of past mutations to show in model dump every scored individual" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "dump modelparams every scored indiv mutation count refers to Number of past mutations to show in model dump every scored individual" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting dump_modelparams_every_scored_indiv_mutation_count", + "output": "dump modelparams every scored indiv mutation count refers to Number of past mutations to show in model dump every scored individual" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting dump_modelparams_every_scored_indiv_mutation_count", + "output": "dump modelparams every scored indiv mutation count refers to Number of past mutations to show in model dump every scored individual" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "dump_modelparams_separate_files", + "output": "dump modelparams separate files refers to Whether to append (false) or have separate files, files like: individual_scored_id%d.iter%d*params*, (true) for modelparams every scored indiv" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "dump_modelparams_separate_files", + "output": "dump modelparams separate files refers to Whether to append (false) or have separate files, files like: individual_scored_id%d.iter%d*params*, (true) for modelparams every scored indiv" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "dump modelparams separate files", + "output": "dump modelparams separate files refers to Whether to append (false) or have separate files, files like: individual_scored_id%d.iter%d*params*, (true) for modelparams every scored indiv" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "dump modelparams separate files refers to Whether to append (false) or have separate files, files like: individual_scored_id%d.iter%d*params*, (true) for modelparams every scored indiv" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting dump_modelparams_separate_files", + "output": "dump modelparams separate files refers to Whether to append (false) or have separate files, files like: individual_scored_id%d.iter%d*params*, (true) for modelparams every scored indiv" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting dump_modelparams_separate_files", + "output": "dump modelparams separate files refers to Whether to append (false) or have separate files, files like: individual_scored_id%d.iter%d*params*, (true) for modelparams every scored indiv" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "dump_trans_timings", + "output": "dump trans timings refers to Whether to dump every scored fold's timing and feature info to a *timings*.txt file " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "dump_trans_timings", + "output": "dump trans timings refers to Enable detailed logs for timing and types of features produced: Whether to dump every scored fold's timing and feature info to a *timings*.txt file " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "dump trans timings", + "output": "dump trans timings refers to Enable detailed logs for timing and types of features produced: Whether to dump every scored fold's timing and feature info to a *timings*.txt file " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Enable detailed logs for timing and types of features produced: ", + "output": "dump trans timings refers to Enable detailed logs for timing and types of features produced: Whether to dump every scored fold's timing and feature info to a *timings*.txt file " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting dump_trans_timings", + "output": "dump trans timings refers to Whether to dump every scored fold's timing and feature info to a *timings*.txt file " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting dump_trans_timings", + "output": "dump trans timings refers to Enable detailed logs for timing and types of features produced: Whether to dump every scored fold's timing and feature info to a *timings*.txt file " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "delete_preview_trans_timings", + "output": "delete preview trans timings refers to whether to delete preview timings if wrote transformer timings" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "delete_preview_trans_timings", + "output": "delete preview trans timings refers to whether to delete preview timings if wrote transformer timings" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "delete preview trans timings", + "output": "delete preview trans timings refers to whether to delete preview timings if wrote transformer timings" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "delete preview trans timings refers to whether to delete preview timings if wrote transformer timings" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting delete_preview_trans_timings", + "output": "delete preview trans timings refers to whether to delete preview timings if wrote transformer timings" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting delete_preview_trans_timings", + "output": "delete preview trans timings refers to whether to delete preview timings if wrote transformer timings" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "unsupervised_aggregator_n_exemplars", + "output": "unsupervised aggregator n exemplars refers to Attempt to create at most this many exemplars (actual rows behaving like cluster centroids) for the Aggregator algorithm in unsupervised experiment mode. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "unsupervised_aggregator_n_exemplars", + "output": "unsupervised aggregator n exemplars refers to Max. number of exemplars for unsupervised Aggregator experiments: Attempt to create at most this many exemplars (actual rows behaving like cluster centroids) for the Aggregator algorithm in unsupervised experiment mode. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "unsupervised aggregator n exemplars", + "output": "unsupervised aggregator n exemplars refers to Max. number of exemplars for unsupervised Aggregator experiments: Attempt to create at most this many exemplars (actual rows behaving like cluster centroids) for the Aggregator algorithm in unsupervised experiment mode. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Max. number of exemplars for unsupervised Aggregator experiments: ", + "output": "unsupervised aggregator n exemplars refers to Max. number of exemplars for unsupervised Aggregator experiments: Attempt to create at most this many exemplars (actual rows behaving like cluster centroids) for the Aggregator algorithm in unsupervised experiment mode. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting unsupervised_aggregator_n_exemplars", + "output": "unsupervised aggregator n exemplars refers to Attempt to create at most this many exemplars (actual rows behaving like cluster centroids) for the Aggregator algorithm in unsupervised experiment mode. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting unsupervised_aggregator_n_exemplars", + "output": "unsupervised aggregator n exemplars refers to Max. number of exemplars for unsupervised Aggregator experiments: Attempt to create at most this many exemplars (actual rows behaving like cluster centroids) for the Aggregator algorithm in unsupervised experiment mode. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "unsupervised_clustering_min_clusters", + "output": "unsupervised clustering min clusters refers to Attempt to create at least this many clusters for clustering algorithm in unsupervised experiment mode. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "unsupervised_clustering_min_clusters", + "output": "unsupervised clustering min clusters refers to Min. number of clusters for unsupervised clustering experiments: Attempt to create at least this many clusters for clustering algorithm in unsupervised experiment mode. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "unsupervised clustering min clusters", + "output": "unsupervised clustering min clusters refers to Min. number of clusters for unsupervised clustering experiments: Attempt to create at least this many clusters for clustering algorithm in unsupervised experiment mode. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Min. number of clusters for unsupervised clustering experiments: ", + "output": "unsupervised clustering min clusters refers to Min. number of clusters for unsupervised clustering experiments: Attempt to create at least this many clusters for clustering algorithm in unsupervised experiment mode. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting unsupervised_clustering_min_clusters", + "output": "unsupervised clustering min clusters refers to Attempt to create at least this many clusters for clustering algorithm in unsupervised experiment mode. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting unsupervised_clustering_min_clusters", + "output": "unsupervised clustering min clusters refers to Min. number of clusters for unsupervised clustering experiments: Attempt to create at least this many clusters for clustering algorithm in unsupervised experiment mode. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "unsupervised_clustering_max_clusters", + "output": "unsupervised clustering max clusters refers to Attempt to create no more than this many clusters for clustering algorithm in unsupervised experiment mode. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "unsupervised_clustering_max_clusters", + "output": "unsupervised clustering max clusters refers to Max. number of clusters for unsupervised clustering experiments: Attempt to create no more than this many clusters for clustering algorithm in unsupervised experiment mode. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "unsupervised clustering max clusters", + "output": "unsupervised clustering max clusters refers to Max. number of clusters for unsupervised clustering experiments: Attempt to create no more than this many clusters for clustering algorithm in unsupervised experiment mode. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Max. number of clusters for unsupervised clustering experiments: ", + "output": "unsupervised clustering max clusters refers to Max. number of clusters for unsupervised clustering experiments: Attempt to create no more than this many clusters for clustering algorithm in unsupervised experiment mode. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting unsupervised_clustering_max_clusters", + "output": "unsupervised clustering max clusters refers to Attempt to create no more than this many clusters for clustering algorithm in unsupervised experiment mode. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting unsupervised_clustering_max_clusters", + "output": "unsupervised clustering max clusters refers to Max. number of clusters for unsupervised clustering experiments: Attempt to create no more than this many clusters for clustering algorithm in unsupervised experiment mode. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "wizard_deployment", + "output": "wizard deployment refers to Global preset of deployment option for Experiment Wizard. Set to non-empty string to enable.: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "wizard_deployment", + "output": "wizard deployment refers to Global preset of deployment option for Experiment Wizard. Set to non-empty string to enable.: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "wizard deployment", + "output": "wizard deployment refers to Global preset of deployment option for Experiment Wizard. Set to non-empty string to enable.: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Global preset of deployment option for Experiment Wizard. Set to non-empty string to enable.: ", + "output": "wizard deployment refers to Global preset of deployment option for Experiment Wizard. Set to non-empty string to enable.: " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting wizard_deployment", + "output": "wizard deployment refers to Global preset of deployment option for Experiment Wizard. Set to non-empty string to enable.: " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting wizard_deployment", + "output": "wizard deployment refers to Global preset of deployment option for Experiment Wizard. Set to non-empty string to enable.: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "wizard_repro_level", + "output": "wizard repro level refers to Global preset of repro level option for Experiment Wizard. Set to 1, 2, 3 to enable.: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "wizard_repro_level", + "output": "wizard repro level refers to Global preset of repro level option for Experiment Wizard. Set to 1, 2, 3 to enable.: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "wizard repro level", + "output": "wizard repro level refers to Global preset of repro level option for Experiment Wizard. Set to 1, 2, 3 to enable.: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Global preset of repro level option for Experiment Wizard. Set to 1, 2, 3 to enable.: ", + "output": "wizard repro level refers to Global preset of repro level option for Experiment Wizard. Set to 1, 2, 3 to enable.: " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting wizard_repro_level", + "output": "wizard repro level refers to Global preset of repro level option for Experiment Wizard. Set to 1, 2, 3 to enable.: " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting wizard_repro_level", + "output": "wizard repro level refers to Global preset of repro level option for Experiment Wizard. Set to 1, 2, 3 to enable.: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "wizard_sample_size", + "output": "wizard sample size refers to Max. number of rows for experiment wizard dataset samples. 0 to disable sampling.: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "wizard_sample_size", + "output": "wizard sample size refers to Max. number of rows for experiment wizard dataset samples. 0 to disable sampling.: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "wizard sample size", + "output": "wizard sample size refers to Max. number of rows for experiment wizard dataset samples. 0 to disable sampling.: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Max. number of rows for experiment wizard dataset samples. 0 to disable sampling.: ", + "output": "wizard sample size refers to Max. number of rows for experiment wizard dataset samples. 0 to disable sampling.: " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting wizard_sample_size", + "output": "wizard sample size refers to Max. number of rows for experiment wizard dataset samples. 0 to disable sampling.: " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting wizard_sample_size", + "output": "wizard sample size refers to Max. number of rows for experiment wizard dataset samples. 0 to disable sampling.: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "wizard_model", + "output": "wizard model refers to Type of model for experiment wizard to compute variable importances and leakage checks.: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "wizard_model", + "output": "wizard model refers to Type of model for experiment wizard to compute variable importances and leakage checks.: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "wizard model", + "output": "wizard model refers to Type of model for experiment wizard to compute variable importances and leakage checks.: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Type of model for experiment wizard to compute variable importances and leakage checks.: ", + "output": "wizard model refers to Type of model for experiment wizard to compute variable importances and leakage checks.: " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting wizard_model", + "output": "wizard model refers to Type of model for experiment wizard to compute variable importances and leakage checks.: " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting wizard_model", + "output": "wizard model refers to Type of model for experiment wizard to compute variable importances and leakage checks.: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "wizard_max_cols", + "output": "wizard max cols refers to Maximum number of columns to start an experiment. This threshold exists to constraint the # complexity and the length of the Driverless AI's processes." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "wizard_max_cols", + "output": "wizard max cols refers to Maximum number of columns to start an experiment. This threshold exists to constraint the # complexity and the length of the Driverless AI's processes." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "wizard max cols", + "output": "wizard max cols refers to Maximum number of columns to start an experiment. This threshold exists to constraint the # complexity and the length of the Driverless AI's processes." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "wizard max cols refers to Maximum number of columns to start an experiment. This threshold exists to constraint the # complexity and the length of the Driverless AI's processes." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting wizard_max_cols", + "output": "wizard max cols refers to Maximum number of columns to start an experiment. This threshold exists to constraint the # complexity and the length of the Driverless AI's processes." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting wizard_max_cols", + "output": "wizard max cols refers to Maximum number of columns to start an experiment. This threshold exists to constraint the # complexity and the length of the Driverless AI's processes." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "wizard_timeout_preview", + "output": "wizard timeout preview refers to How many seconds to allow preview to take for Wizard." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "wizard_timeout_preview", + "output": "wizard timeout preview refers to How many seconds to allow preview to take for Wizard." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "wizard timeout preview", + "output": "wizard timeout preview refers to How many seconds to allow preview to take for Wizard." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "wizard timeout preview refers to How many seconds to allow preview to take for Wizard." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting wizard_timeout_preview", + "output": "wizard timeout preview refers to How many seconds to allow preview to take for Wizard." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting wizard_timeout_preview", + "output": "wizard timeout preview refers to How many seconds to allow preview to take for Wizard." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "wizard_timeout_leakage", + "output": "wizard timeout leakage refers to How many seconds to allow leakage detection to take for Wizard." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "wizard_timeout_leakage", + "output": "wizard timeout leakage refers to How many seconds to allow leakage detection to take for Wizard." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "wizard timeout leakage", + "output": "wizard timeout leakage refers to How many seconds to allow leakage detection to take for Wizard." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "wizard timeout leakage refers to How many seconds to allow leakage detection to take for Wizard." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting wizard_timeout_leakage", + "output": "wizard timeout leakage refers to How many seconds to allow leakage detection to take for Wizard." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting wizard_timeout_leakage", + "output": "wizard timeout leakage refers to How many seconds to allow leakage detection to take for Wizard." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "wizard_timeout_dups", + "output": "wizard timeout dups refers to How many seconds to allow duplicate row detection to take for Wizard." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "wizard_timeout_dups", + "output": "wizard timeout dups refers to How many seconds to allow duplicate row detection to take for Wizard." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "wizard timeout dups", + "output": "wizard timeout dups refers to How many seconds to allow duplicate row detection to take for Wizard." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "wizard timeout dups refers to How many seconds to allow duplicate row detection to take for Wizard." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting wizard_timeout_dups", + "output": "wizard timeout dups refers to How many seconds to allow duplicate row detection to take for Wizard." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting wizard_timeout_dups", + "output": "wizard timeout dups refers to How many seconds to allow duplicate row detection to take for Wizard." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "wizard_timeout_varimp", + "output": "wizard timeout varimp refers to How many seconds to allow variable importance calculation to take for Wizard." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "wizard_timeout_varimp", + "output": "wizard timeout varimp refers to How many seconds to allow variable importance calculation to take for Wizard." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "wizard timeout varimp", + "output": "wizard timeout varimp refers to How many seconds to allow variable importance calculation to take for Wizard." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "wizard timeout varimp refers to How many seconds to allow variable importance calculation to take for Wizard." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting wizard_timeout_varimp", + "output": "wizard timeout varimp refers to How many seconds to allow variable importance calculation to take for Wizard." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting wizard_timeout_varimp", + "output": "wizard timeout varimp refers to How many seconds to allow variable importance calculation to take for Wizard." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "wizard_timeout_schema", + "output": "wizard timeout schema refers to How many seconds to allow dataframe schema calculation to take for Wizard." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "wizard_timeout_schema", + "output": "wizard timeout schema refers to How many seconds to allow dataframe schema calculation to take for Wizard." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "wizard timeout schema", + "output": "wizard timeout schema refers to How many seconds to allow dataframe schema calculation to take for Wizard." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "wizard timeout schema refers to How many seconds to allow dataframe schema calculation to take for Wizard." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting wizard_timeout_schema", + "output": "wizard timeout schema refers to How many seconds to allow dataframe schema calculation to take for Wizard." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting wizard_timeout_schema", + "output": "wizard timeout schema refers to How many seconds to allow dataframe schema calculation to take for Wizard." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "authentication_method", + "output": "authentication method refers to authentication_method unvalidated : Accepts user id and password. Does not validate password. none: Does not ask for user id or password. Authenticated as admin. openid: Users OpenID Connect provider for authentication. See additional OpenID settings below. oidc: Renewed OpenID Connect authentication using authorization code flow. See additional OpenID settings below. pam: Accepts user id and password. Validates user with operating system. ldap: Accepts user id and password. Validates against an ldap server. Look for additional settings under LDAP settings. local: Accepts a user id and password. Validated against an htpasswd file provided in local_htpasswd_file. ibm_spectrum_conductor: Authenticate with IBM conductor auth api. tls_certificate: Authenticate with Driverless by providing a TLS certificate. jwt: Authenticate by JWT obtained from the request metadata. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "authentication_method", + "output": "authentication method refers to authentication_method unvalidated : Accepts user id and password. Does not validate password. none: Does not ask for user id or password. Authenticated as admin. openid: Users OpenID Connect provider for authentication. See additional OpenID settings below. oidc: Renewed OpenID Connect authentication using authorization code flow. See additional OpenID settings below. pam: Accepts user id and password. Validates user with operating system. ldap: Accepts user id and password. Validates against an ldap server. Look for additional settings under LDAP settings. local: Accepts a user id and password. Validated against an htpasswd file provided in local_htpasswd_file. ibm_spectrum_conductor: Authenticate with IBM conductor auth api. tls_certificate: Authenticate with Driverless by providing a TLS certificate. jwt: Authenticate by JWT obtained from the request metadata. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "authentication method", + "output": "authentication method refers to authentication_method unvalidated : Accepts user id and password. Does not validate password. none: Does not ask for user id or password. Authenticated as admin. openid: Users OpenID Connect provider for authentication. See additional OpenID settings below. oidc: Renewed OpenID Connect authentication using authorization code flow. See additional OpenID settings below. pam: Accepts user id and password. Validates user with operating system. ldap: Accepts user id and password. Validates against an ldap server. Look for additional settings under LDAP settings. local: Accepts a user id and password. Validated against an htpasswd file provided in local_htpasswd_file. ibm_spectrum_conductor: Authenticate with IBM conductor auth api. tls_certificate: Authenticate with Driverless by providing a TLS certificate. jwt: Authenticate by JWT obtained from the request metadata. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "authentication method refers to authentication_method unvalidated : Accepts user id and password. Does not validate password. none: Does not ask for user id or password. Authenticated as admin. openid: Users OpenID Connect provider for authentication. See additional OpenID settings below. oidc: Renewed OpenID Connect authentication using authorization code flow. See additional OpenID settings below. pam: Accepts user id and password. Validates user with operating system. ldap: Accepts user id and password. Validates against an ldap server. Look for additional settings under LDAP settings. local: Accepts a user id and password. Validated against an htpasswd file provided in local_htpasswd_file. ibm_spectrum_conductor: Authenticate with IBM conductor auth api. tls_certificate: Authenticate with Driverless by providing a TLS certificate. jwt: Authenticate by JWT obtained from the request metadata. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting authentication_method", + "output": "authentication method refers to authentication_method unvalidated : Accepts user id and password. Does not validate password. none: Does not ask for user id or password. Authenticated as admin. openid: Users OpenID Connect provider for authentication. See additional OpenID settings below. oidc: Renewed OpenID Connect authentication using authorization code flow. See additional OpenID settings below. pam: Accepts user id and password. Validates user with operating system. ldap: Accepts user id and password. Validates against an ldap server. Look for additional settings under LDAP settings. local: Accepts a user id and password. Validated against an htpasswd file provided in local_htpasswd_file. ibm_spectrum_conductor: Authenticate with IBM conductor auth api. tls_certificate: Authenticate with Driverless by providing a TLS certificate. jwt: Authenticate by JWT obtained from the request metadata. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting authentication_method", + "output": "authentication method refers to authentication_method unvalidated : Accepts user id and password. Does not validate password. none: Does not ask for user id or password. Authenticated as admin. openid: Users OpenID Connect provider for authentication. See additional OpenID settings below. oidc: Renewed OpenID Connect authentication using authorization code flow. See additional OpenID settings below. pam: Accepts user id and password. Validates user with operating system. ldap: Accepts user id and password. Validates against an ldap server. Look for additional settings under LDAP settings. local: Accepts a user id and password. Validated against an htpasswd file provided in local_htpasswd_file. ibm_spectrum_conductor: Authenticate with IBM conductor auth api. tls_certificate: Authenticate with Driverless by providing a TLS certificate. jwt: Authenticate by JWT obtained from the request metadata. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "additional_authentication_methods", + "output": "additional authentication methods refers to Additional authentication methods that will be enabled for for the clients.Login forms for each method will be available on the``/login/`` path.Comma separated list." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "additional_authentication_methods", + "output": "additional authentication methods refers to Additional authentication methods that will be enabled for for the clients.Login forms for each method will be available on the``/login/`` path.Comma separated list." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "additional authentication methods", + "output": "additional authentication methods refers to Additional authentication methods that will be enabled for for the clients.Login forms for each method will be available on the``/login/`` path.Comma separated list." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "additional authentication methods refers to Additional authentication methods that will be enabled for for the clients.Login forms for each method will be available on the``/login/`` path.Comma separated list." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting additional_authentication_methods", + "output": "additional authentication methods refers to Additional authentication methods that will be enabled for for the clients.Login forms for each method will be available on the``/login/`` path.Comma separated list." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting additional_authentication_methods", + "output": "additional authentication methods refers to Additional authentication methods that will be enabled for for the clients.Login forms for each method will be available on the``/login/`` path.Comma separated list." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "authentication_default_timeout_hours", + "output": "authentication default timeout hours refers to The default amount of time in hours before a user is signed out and must log in again. This setting is used when a default timeout value is not provided by ``authentication_method``." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "authentication_default_timeout_hours", + "output": "authentication default timeout hours refers to The default amount of time in hours before a user is signed out and must log in again. This setting is used when a default timeout value is not provided by ``authentication_method``." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "authentication default timeout hours", + "output": "authentication default timeout hours refers to The default amount of time in hours before a user is signed out and must log in again. This setting is used when a default timeout value is not provided by ``authentication_method``." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "authentication default timeout hours refers to The default amount of time in hours before a user is signed out and must log in again. This setting is used when a default timeout value is not provided by ``authentication_method``." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting authentication_default_timeout_hours", + "output": "authentication default timeout hours refers to The default amount of time in hours before a user is signed out and must log in again. This setting is used when a default timeout value is not provided by ``authentication_method``." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting authentication_default_timeout_hours", + "output": "authentication default timeout hours refers to The default amount of time in hours before a user is signed out and must log in again. This setting is used when a default timeout value is not provided by ``authentication_method``." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "authentication_gui_polling_prolongs_session", + "output": "authentication gui polling prolongs session refers to When enabled, the user's session is automatically prolonged, even when they are not interacting directly with the application." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "authentication_gui_polling_prolongs_session", + "output": "authentication gui polling prolongs session refers to When enabled, the user's session is automatically prolonged, even when they are not interacting directly with the application." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "authentication gui polling prolongs session", + "output": "authentication gui polling prolongs session refers to When enabled, the user's session is automatically prolonged, even when they are not interacting directly with the application." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "authentication gui polling prolongs session refers to When enabled, the user's session is automatically prolonged, even when they are not interacting directly with the application." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting authentication_gui_polling_prolongs_session", + "output": "authentication gui polling prolongs session refers to When enabled, the user's session is automatically prolonged, even when they are not interacting directly with the application." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting authentication_gui_polling_prolongs_session", + "output": "authentication gui polling prolongs session refers to When enabled, the user's session is automatically prolonged, even when they are not interacting directly with the application." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "auth_openid_provider_base_uri", + "output": "auth openid provider base uri refers to OpenID Connect Settings: Refer to the OpenID Connect Basic Client Implementation Guide for details on how OpenID authentication flow works https://openid.net/specs/openid-connect-basic-1_0.html base server URI to the OpenID Provider server (ex: https://oidp.ourdomain.com" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "auth_openid_provider_base_uri", + "output": "auth openid provider base uri refers to OpenID Connect Settings: Refer to the OpenID Connect Basic Client Implementation Guide for details on how OpenID authentication flow works https://openid.net/specs/openid-connect-basic-1_0.html base server URI to the OpenID Provider server (ex: https://oidp.ourdomain.com" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "auth openid provider base uri", + "output": "auth openid provider base uri refers to OpenID Connect Settings: Refer to the OpenID Connect Basic Client Implementation Guide for details on how OpenID authentication flow works https://openid.net/specs/openid-connect-basic-1_0.html base server URI to the OpenID Provider server (ex: https://oidp.ourdomain.com" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "auth openid provider base uri refers to OpenID Connect Settings: Refer to the OpenID Connect Basic Client Implementation Guide for details on how OpenID authentication flow works https://openid.net/specs/openid-connect-basic-1_0.html base server URI to the OpenID Provider server (ex: https://oidp.ourdomain.com" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting auth_openid_provider_base_uri", + "output": "auth openid provider base uri refers to OpenID Connect Settings: Refer to the OpenID Connect Basic Client Implementation Guide for details on how OpenID authentication flow works https://openid.net/specs/openid-connect-basic-1_0.html base server URI to the OpenID Provider server (ex: https://oidp.ourdomain.com" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting auth_openid_provider_base_uri", + "output": "auth openid provider base uri refers to OpenID Connect Settings: Refer to the OpenID Connect Basic Client Implementation Guide for details on how OpenID authentication flow works https://openid.net/specs/openid-connect-basic-1_0.html base server URI to the OpenID Provider server (ex: https://oidp.ourdomain.com" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "auth_openid_configuration_uri", + "output": "auth openid configuration uri refers to URI to pull OpenID config data from (you can extract most of required OpenID config from this url) usually located at: /auth/realms/master/.well-known/openid-configuration" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "auth_openid_configuration_uri", + "output": "auth openid configuration uri refers to URI to pull OpenID config data from (you can extract most of required OpenID config from this url) usually located at: /auth/realms/master/.well-known/openid-configuration" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "auth openid configuration uri", + "output": "auth openid configuration uri refers to URI to pull OpenID config data from (you can extract most of required OpenID config from this url) usually located at: /auth/realms/master/.well-known/openid-configuration" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "auth openid configuration uri refers to URI to pull OpenID config data from (you can extract most of required OpenID config from this url) usually located at: /auth/realms/master/.well-known/openid-configuration" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting auth_openid_configuration_uri", + "output": "auth openid configuration uri refers to URI to pull OpenID config data from (you can extract most of required OpenID config from this url) usually located at: /auth/realms/master/.well-known/openid-configuration" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting auth_openid_configuration_uri", + "output": "auth openid configuration uri refers to URI to pull OpenID config data from (you can extract most of required OpenID config from this url) usually located at: /auth/realms/master/.well-known/openid-configuration" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "auth_openid_auth_uri", + "output": "auth openid auth uri refers to URI to start authentication flow" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "auth_openid_auth_uri", + "output": "auth openid auth uri refers to URI to start authentication flow" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "auth openid auth uri", + "output": "auth openid auth uri refers to URI to start authentication flow" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "auth openid auth uri refers to URI to start authentication flow" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting auth_openid_auth_uri", + "output": "auth openid auth uri refers to URI to start authentication flow" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting auth_openid_auth_uri", + "output": "auth openid auth uri refers to URI to start authentication flow" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "auth_openid_token_uri", + "output": "auth openid token uri refers to URI to make request for token after callback from OpenID server was received" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "auth_openid_token_uri", + "output": "auth openid token uri refers to URI to make request for token after callback from OpenID server was received" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "auth openid token uri", + "output": "auth openid token uri refers to URI to make request for token after callback from OpenID server was received" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "auth openid token uri refers to URI to make request for token after callback from OpenID server was received" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting auth_openid_token_uri", + "output": "auth openid token uri refers to URI to make request for token after callback from OpenID server was received" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting auth_openid_token_uri", + "output": "auth openid token uri refers to URI to make request for token after callback from OpenID server was received" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "auth_openid_userinfo_uri", + "output": "auth openid userinfo uri refers to URI to get user information once access_token has been acquired (ex: list of groups user belongs to will be provided here)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "auth_openid_userinfo_uri", + "output": "auth openid userinfo uri refers to URI to get user information once access_token has been acquired (ex: list of groups user belongs to will be provided here)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "auth openid userinfo uri", + "output": "auth openid userinfo uri refers to URI to get user information once access_token has been acquired (ex: list of groups user belongs to will be provided here)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "auth openid userinfo uri refers to URI to get user information once access_token has been acquired (ex: list of groups user belongs to will be provided here)" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting auth_openid_userinfo_uri", + "output": "auth openid userinfo uri refers to URI to get user information once access_token has been acquired (ex: list of groups user belongs to will be provided here)" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting auth_openid_userinfo_uri", + "output": "auth openid userinfo uri refers to URI to get user information once access_token has been acquired (ex: list of groups user belongs to will be provided here)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "auth_openid_logout_uri", + "output": "auth openid logout uri refers to URI to logout user" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "auth_openid_logout_uri", + "output": "auth openid logout uri refers to URI to logout user" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "auth openid logout uri", + "output": "auth openid logout uri refers to URI to logout user" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "auth openid logout uri refers to URI to logout user" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting auth_openid_logout_uri", + "output": "auth openid logout uri refers to URI to logout user" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting auth_openid_logout_uri", + "output": "auth openid logout uri refers to URI to logout user" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "auth_openid_redirect_uri", + "output": "auth openid redirect uri refers to callback URI that OpenID provide will use to send 'authentication_code' This is OpenID callback endpoint in Driverless AI. Most OpenID providers need this to be HTTPs. (ex. https://driverless.ourdomin.com/openid/callback)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "auth_openid_redirect_uri", + "output": "auth openid redirect uri refers to callback URI that OpenID provide will use to send 'authentication_code' This is OpenID callback endpoint in Driverless AI. Most OpenID providers need this to be HTTPs. (ex. https://driverless.ourdomin.com/openid/callback)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "auth openid redirect uri", + "output": "auth openid redirect uri refers to callback URI that OpenID provide will use to send 'authentication_code' This is OpenID callback endpoint in Driverless AI. Most OpenID providers need this to be HTTPs. (ex. https://driverless.ourdomin.com/openid/callback)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "auth openid redirect uri refers to callback URI that OpenID provide will use to send 'authentication_code' This is OpenID callback endpoint in Driverless AI. Most OpenID providers need this to be HTTPs. (ex. https://driverless.ourdomin.com/openid/callback)" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting auth_openid_redirect_uri", + "output": "auth openid redirect uri refers to callback URI that OpenID provide will use to send 'authentication_code' This is OpenID callback endpoint in Driverless AI. Most OpenID providers need this to be HTTPs. (ex. https://driverless.ourdomin.com/openid/callback)" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting auth_openid_redirect_uri", + "output": "auth openid redirect uri refers to callback URI that OpenID provide will use to send 'authentication_code' This is OpenID callback endpoint in Driverless AI. Most OpenID providers need this to be HTTPs. (ex. https://driverless.ourdomin.com/openid/callback)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "auth_openid_grant_type", + "output": "auth openid grant type refers to OAuth2 grant type (usually authorization_code for OpenID, can be access_token also)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "auth_openid_grant_type", + "output": "auth openid grant type refers to OAuth2 grant type (usually authorization_code for OpenID, can be access_token also)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "auth openid grant type", + "output": "auth openid grant type refers to OAuth2 grant type (usually authorization_code for OpenID, can be access_token also)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "auth openid grant type refers to OAuth2 grant type (usually authorization_code for OpenID, can be access_token also)" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting auth_openid_grant_type", + "output": "auth openid grant type refers to OAuth2 grant type (usually authorization_code for OpenID, can be access_token also)" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting auth_openid_grant_type", + "output": "auth openid grant type refers to OAuth2 grant type (usually authorization_code for OpenID, can be access_token also)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "auth_openid_response_type", + "output": "auth openid response type refers to OAuth2 response type (usually code)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "auth_openid_response_type", + "output": "auth openid response type refers to OAuth2 response type (usually code)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "auth openid response type", + "output": "auth openid response type refers to OAuth2 response type (usually code)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "auth openid response type refers to OAuth2 response type (usually code)" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting auth_openid_response_type", + "output": "auth openid response type refers to OAuth2 response type (usually code)" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting auth_openid_response_type", + "output": "auth openid response type refers to OAuth2 response type (usually code)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "auth_openid_client_id", + "output": "auth openid client id refers to Client ID registered with OpenID provider" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "auth_openid_client_id", + "output": "auth openid client id refers to Client ID registered with OpenID provider" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "auth openid client id", + "output": "auth openid client id refers to Client ID registered with OpenID provider" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "auth openid client id refers to Client ID registered with OpenID provider" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting auth_openid_client_id", + "output": "auth openid client id refers to Client ID registered with OpenID provider" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting auth_openid_client_id", + "output": "auth openid client id refers to Client ID registered with OpenID provider" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "auth_openid_client_secret", + "output": "auth openid client secret refers to Client secret provided by OpenID provider when registering Client ID" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "auth_openid_client_secret", + "output": "auth openid client secret refers to Client secret provided by OpenID provider when registering Client ID" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "auth openid client secret", + "output": "auth openid client secret refers to Client secret provided by OpenID provider when registering Client ID" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "auth openid client secret refers to Client secret provided by OpenID provider when registering Client ID" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting auth_openid_client_secret", + "output": "auth openid client secret refers to Client secret provided by OpenID provider when registering Client ID" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting auth_openid_client_secret", + "output": "auth openid client secret refers to Client secret provided by OpenID provider when registering Client ID" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "auth_openid_scope", + "output": "auth openid scope refers to Scope of info (usually openid). Can be list of more than one, space delimited, possible values listed at https://openid.net/specs/openid-connect-basic-1_0.html#Scopes " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "auth_openid_scope", + "output": "auth openid scope refers to Scope of info (usually openid). Can be list of more than one, space delimited, possible values listed at https://openid.net/specs/openid-connect-basic-1_0.html#Scopes " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "auth openid scope", + "output": "auth openid scope refers to Scope of info (usually openid). Can be list of more than one, space delimited, possible values listed at https://openid.net/specs/openid-connect-basic-1_0.html#Scopes " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "auth openid scope refers to Scope of info (usually openid). Can be list of more than one, space delimited, possible values listed at https://openid.net/specs/openid-connect-basic-1_0.html#Scopes " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting auth_openid_scope", + "output": "auth openid scope refers to Scope of info (usually openid). Can be list of more than one, space delimited, possible values listed at https://openid.net/specs/openid-connect-basic-1_0.html#Scopes " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting auth_openid_scope", + "output": "auth openid scope refers to Scope of info (usually openid). Can be list of more than one, space delimited, possible values listed at https://openid.net/specs/openid-connect-basic-1_0.html#Scopes " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "auth_openid_userinfo_auth_key", + "output": "auth openid userinfo auth key refers to What key in user_info JSON should we check to authorize user" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "auth_openid_userinfo_auth_key", + "output": "auth openid userinfo auth key refers to What key in user_info JSON should we check to authorize user" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "auth openid userinfo auth key", + "output": "auth openid userinfo auth key refers to What key in user_info JSON should we check to authorize user" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "auth openid userinfo auth key refers to What key in user_info JSON should we check to authorize user" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting auth_openid_userinfo_auth_key", + "output": "auth openid userinfo auth key refers to What key in user_info JSON should we check to authorize user" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting auth_openid_userinfo_auth_key", + "output": "auth openid userinfo auth key refers to What key in user_info JSON should we check to authorize user" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "auth_openid_userinfo_auth_value", + "output": "auth openid userinfo auth value refers to What value should the key have in user_info JSON in order to authorize user" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "auth_openid_userinfo_auth_value", + "output": "auth openid userinfo auth value refers to What value should the key have in user_info JSON in order to authorize user" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "auth openid userinfo auth value", + "output": "auth openid userinfo auth value refers to What value should the key have in user_info JSON in order to authorize user" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "auth openid userinfo auth value refers to What value should the key have in user_info JSON in order to authorize user" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting auth_openid_userinfo_auth_value", + "output": "auth openid userinfo auth value refers to What value should the key have in user_info JSON in order to authorize user" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting auth_openid_userinfo_auth_value", + "output": "auth openid userinfo auth value refers to What value should the key have in user_info JSON in order to authorize user" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "auth_openid_userinfo_username_key", + "output": "auth openid userinfo username key refers to Key that specifies username in user_info JSON (we will use the value of this key as username in Driverless AI)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "auth_openid_userinfo_username_key", + "output": "auth openid userinfo username key refers to Key that specifies username in user_info JSON (we will use the value of this key as username in Driverless AI)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "auth openid userinfo username key", + "output": "auth openid userinfo username key refers to Key that specifies username in user_info JSON (we will use the value of this key as username in Driverless AI)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "auth openid userinfo username key refers to Key that specifies username in user_info JSON (we will use the value of this key as username in Driverless AI)" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting auth_openid_userinfo_username_key", + "output": "auth openid userinfo username key refers to Key that specifies username in user_info JSON (we will use the value of this key as username in Driverless AI)" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting auth_openid_userinfo_username_key", + "output": "auth openid userinfo username key refers to Key that specifies username in user_info JSON (we will use the value of this key as username in Driverless AI)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "auth_openid_urlencode_quote_via", + "output": "auth openid urlencode quote via refers to Quote method from urllib.parse used to encode payload dict in Authentication Request" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "auth_openid_urlencode_quote_via", + "output": "auth openid urlencode quote via refers to Quote method from urllib.parse used to encode payload dict in Authentication Request" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "auth openid urlencode quote via", + "output": "auth openid urlencode quote via refers to Quote method from urllib.parse used to encode payload dict in Authentication Request" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "auth openid urlencode quote via refers to Quote method from urllib.parse used to encode payload dict in Authentication Request" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting auth_openid_urlencode_quote_via", + "output": "auth openid urlencode quote via refers to Quote method from urllib.parse used to encode payload dict in Authentication Request" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting auth_openid_urlencode_quote_via", + "output": "auth openid urlencode quote via refers to Quote method from urllib.parse used to encode payload dict in Authentication Request" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "auth_openid_access_token_expiry_key", + "output": "auth openid access token expiry key refers to Key in Token Response JSON that holds the value for access token expiry" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "auth_openid_access_token_expiry_key", + "output": "auth openid access token expiry key refers to Key in Token Response JSON that holds the value for access token expiry" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "auth openid access token expiry key", + "output": "auth openid access token expiry key refers to Key in Token Response JSON that holds the value for access token expiry" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "auth openid access token expiry key refers to Key in Token Response JSON that holds the value for access token expiry" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting auth_openid_access_token_expiry_key", + "output": "auth openid access token expiry key refers to Key in Token Response JSON that holds the value for access token expiry" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting auth_openid_access_token_expiry_key", + "output": "auth openid access token expiry key refers to Key in Token Response JSON that holds the value for access token expiry" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "auth_openid_refresh_token_expiry_key", + "output": "auth openid refresh token expiry key refers to Key in Token Response JSON that holds the value for access token expiry" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "auth_openid_refresh_token_expiry_key", + "output": "auth openid refresh token expiry key refers to Key in Token Response JSON that holds the value for access token expiry" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "auth openid refresh token expiry key", + "output": "auth openid refresh token expiry key refers to Key in Token Response JSON that holds the value for access token expiry" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "auth openid refresh token expiry key refers to Key in Token Response JSON that holds the value for access token expiry" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting auth_openid_refresh_token_expiry_key", + "output": "auth openid refresh token expiry key refers to Key in Token Response JSON that holds the value for access token expiry" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting auth_openid_refresh_token_expiry_key", + "output": "auth openid refresh token expiry key refers to Key in Token Response JSON that holds the value for access token expiry" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "auth_openid_token_expiration_secs", + "output": "auth openid token expiration secs refers to Expiration time in seconds for access token" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "auth_openid_token_expiration_secs", + "output": "auth openid token expiration secs refers to Expiration time in seconds for access token" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "auth openid token expiration secs", + "output": "auth openid token expiration secs refers to Expiration time in seconds for access token" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "auth openid token expiration secs refers to Expiration time in seconds for access token" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting auth_openid_token_expiration_secs", + "output": "auth openid token expiration secs refers to Expiration time in seconds for access token" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting auth_openid_token_expiration_secs", + "output": "auth openid token expiration secs refers to Expiration time in seconds for access token" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "auth_openid_use_objectpath_match", + "output": "auth openid use objectpath match refers to Enables advanced matching for OpenID Connect authentication. When enabled ObjectPath () expression is used to evaluate the user identity. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "auth_openid_use_objectpath_match", + "output": "auth openid use objectpath match refers to Enables advanced matching for OpenID Connect authentication. When enabled ObjectPath () expression is used to evaluate the user identity. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "auth openid use objectpath match", + "output": "auth openid use objectpath match refers to Enables advanced matching for OpenID Connect authentication. When enabled ObjectPath () expression is used to evaluate the user identity. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "auth openid use objectpath match refers to Enables advanced matching for OpenID Connect authentication. When enabled ObjectPath () expression is used to evaluate the user identity. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting auth_openid_use_objectpath_match", + "output": "auth openid use objectpath match refers to Enables advanced matching for OpenID Connect authentication. When enabled ObjectPath () expression is used to evaluate the user identity. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting auth_openid_use_objectpath_match", + "output": "auth openid use objectpath match refers to Enables advanced matching for OpenID Connect authentication. When enabled ObjectPath () expression is used to evaluate the user identity. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "auth_openid_use_objectpath_expression", + "output": "auth openid use objectpath expression refers to ObjectPath () expression that will be used to evaluate whether user is allowed to login into Driverless. Any expression that evaluates to True means user is allowed to log in. Examples: Simple claim equality: `$.our_claim is \"our_value\"` List of claims contains required value: `\"expected_role\" in @.roles` " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "auth_openid_use_objectpath_expression", + "output": "auth openid use objectpath expression refers to ObjectPath () expression that will be used to evaluate whether user is allowed to login into Driverless. Any expression that evaluates to True means user is allowed to log in. Examples: Simple claim equality: `$.our_claim is \"our_value\"` List of claims contains required value: `\"expected_role\" in @.roles` " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "auth openid use objectpath expression", + "output": "auth openid use objectpath expression refers to ObjectPath () expression that will be used to evaluate whether user is allowed to login into Driverless. Any expression that evaluates to True means user is allowed to log in. Examples: Simple claim equality: `$.our_claim is \"our_value\"` List of claims contains required value: `\"expected_role\" in @.roles` " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "auth openid use objectpath expression refers to ObjectPath () expression that will be used to evaluate whether user is allowed to login into Driverless. Any expression that evaluates to True means user is allowed to log in. Examples: Simple claim equality: `$.our_claim is \"our_value\"` List of claims contains required value: `\"expected_role\" in @.roles` " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting auth_openid_use_objectpath_expression", + "output": "auth openid use objectpath expression refers to ObjectPath () expression that will be used to evaluate whether user is allowed to login into Driverless. Any expression that evaluates to True means user is allowed to log in. Examples: Simple claim equality: `$.our_claim is \"our_value\"` List of claims contains required value: `\"expected_role\" in @.roles` " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting auth_openid_use_objectpath_expression", + "output": "auth openid use objectpath expression refers to ObjectPath () expression that will be used to evaluate whether user is allowed to login into Driverless. Any expression that evaluates to True means user is allowed to log in. Examples: Simple claim equality: `$.our_claim is \"our_value\"` List of claims contains required value: `\"expected_role\" in @.roles` " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "auth_openid_token_introspection_url", + "output": "auth openid token introspection url refers to Sets token introspection URL for OpenID Connect authentication. (needs to be an absolute URL) Needs to be set when API token introspection is enabled. Is used to get the token TTL when set and IDP does not provide expires_in field in the token endpoint response." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "auth_openid_token_introspection_url", + "output": "auth openid token introspection url refers to Sets token introspection URL for OpenID Connect authentication. (needs to be an absolute URL) Needs to be set when API token introspection is enabled. Is used to get the token TTL when set and IDP does not provide expires_in field in the token endpoint response." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "auth openid token introspection url", + "output": "auth openid token introspection url refers to Sets token introspection URL for OpenID Connect authentication. (needs to be an absolute URL) Needs to be set when API token introspection is enabled. Is used to get the token TTL when set and IDP does not provide expires_in field in the token endpoint response." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "auth openid token introspection url refers to Sets token introspection URL for OpenID Connect authentication. (needs to be an absolute URL) Needs to be set when API token introspection is enabled. Is used to get the token TTL when set and IDP does not provide expires_in field in the token endpoint response." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting auth_openid_token_introspection_url", + "output": "auth openid token introspection url refers to Sets token introspection URL for OpenID Connect authentication. (needs to be an absolute URL) Needs to be set when API token introspection is enabled. Is used to get the token TTL when set and IDP does not provide expires_in field in the token endpoint response." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting auth_openid_token_introspection_url", + "output": "auth openid token introspection url refers to Sets token introspection URL for OpenID Connect authentication. (needs to be an absolute URL) Needs to be set when API token introspection is enabled. Is used to get the token TTL when set and IDP does not provide expires_in field in the token endpoint response." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "auth_openid_end_session_endpoint_url", + "output": "auth openid end session endpoint url refers to Sets an URL where the user is being redirected after being logged out when set. (needs to be an absolute URL)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "auth_openid_end_session_endpoint_url", + "output": "auth openid end session endpoint url refers to Sets an URL where the user is being redirected after being logged out when set. (needs to be an absolute URL)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "auth openid end session endpoint url", + "output": "auth openid end session endpoint url refers to Sets an URL where the user is being redirected after being logged out when set. (needs to be an absolute URL)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "auth openid end session endpoint url refers to Sets an URL where the user is being redirected after being logged out when set. (needs to be an absolute URL)" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting auth_openid_end_session_endpoint_url", + "output": "auth openid end session endpoint url refers to Sets an URL where the user is being redirected after being logged out when set. (needs to be an absolute URL)" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting auth_openid_end_session_endpoint_url", + "output": "auth openid end session endpoint url refers to Sets an URL where the user is being redirected after being logged out when set. (needs to be an absolute URL)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "auth_openid_default_scopes", + "output": "auth openid default scopes refers to If set, server will use these scopes when it asks for the token on the login. (space separated list)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "auth_openid_default_scopes", + "output": "auth openid default scopes refers to If set, server will use these scopes when it asks for the token on the login. (space separated list)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "auth openid default scopes", + "output": "auth openid default scopes refers to If set, server will use these scopes when it asks for the token on the login. (space separated list)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "auth openid default scopes refers to If set, server will use these scopes when it asks for the token on the login. (space separated list)" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting auth_openid_default_scopes", + "output": "auth openid default scopes refers to If set, server will use these scopes when it asks for the token on the login. (space separated list)" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting auth_openid_default_scopes", + "output": "auth openid default scopes refers to If set, server will use these scopes when it asks for the token on the login. (space separated list)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "auth_oidc_identity_source", + "output": "auth oidc identity source refers to Specifies the source from which user identity and username is retrieved. Currently supported sources are: user_info: Retrieves username from UserInfo endpoint response id_token: Retrieves username from ID Token using `auth_openid_id_token_username_key` claim " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "auth_oidc_identity_source", + "output": "auth oidc identity source refers to Specifies the source from which user identity and username is retrieved. Currently supported sources are: user_info: Retrieves username from UserInfo endpoint response id_token: Retrieves username from ID Token using `auth_openid_id_token_username_key` claim " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "auth oidc identity source", + "output": "auth oidc identity source refers to Specifies the source from which user identity and username is retrieved. Currently supported sources are: user_info: Retrieves username from UserInfo endpoint response id_token: Retrieves username from ID Token using `auth_openid_id_token_username_key` claim " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "auth oidc identity source refers to Specifies the source from which user identity and username is retrieved. Currently supported sources are: user_info: Retrieves username from UserInfo endpoint response id_token: Retrieves username from ID Token using `auth_openid_id_token_username_key` claim " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting auth_oidc_identity_source", + "output": "auth oidc identity source refers to Specifies the source from which user identity and username is retrieved. Currently supported sources are: user_info: Retrieves username from UserInfo endpoint response id_token: Retrieves username from ID Token using `auth_openid_id_token_username_key` claim " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting auth_oidc_identity_source", + "output": "auth oidc identity source refers to Specifies the source from which user identity and username is retrieved. Currently supported sources are: user_info: Retrieves username from UserInfo endpoint response id_token: Retrieves username from ID Token using `auth_openid_id_token_username_key` claim " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "auth_oidc_username_claim", + "output": "auth oidc username claim refers to Claim of preferred username in a message holding the user identity, which will be used as a username in application. The user identity source is specified by `auth_oidc_identity_source`, and can be e.g. UserInfo endpoint response or ID Token" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "auth_oidc_username_claim", + "output": "auth oidc username claim refers to Claim of preferred username in a message holding the user identity, which will be used as a username in application. The user identity source is specified by `auth_oidc_identity_source`, and can be e.g. UserInfo endpoint response or ID Token" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "auth oidc username claim", + "output": "auth oidc username claim refers to Claim of preferred username in a message holding the user identity, which will be used as a username in application. The user identity source is specified by `auth_oidc_identity_source`, and can be e.g. UserInfo endpoint response or ID Token" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "auth oidc username claim refers to Claim of preferred username in a message holding the user identity, which will be used as a username in application. The user identity source is specified by `auth_oidc_identity_source`, and can be e.g. UserInfo endpoint response or ID Token" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting auth_oidc_username_claim", + "output": "auth oidc username claim refers to Claim of preferred username in a message holding the user identity, which will be used as a username in application. The user identity source is specified by `auth_oidc_identity_source`, and can be e.g. UserInfo endpoint response or ID Token" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting auth_oidc_username_claim", + "output": "auth oidc username claim refers to Claim of preferred username in a message holding the user identity, which will be used as a username in application. The user identity source is specified by `auth_oidc_identity_source`, and can be e.g. UserInfo endpoint response or ID Token" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "auth_oidc_issuer_url", + "output": "auth oidc issuer url refers to OpenID-Connect Issuer URL, which is used for automatic provider infodiscovery. E.g. https://login.microsoftonline.com//v2.0" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "auth_oidc_issuer_url", + "output": "auth oidc issuer url refers to OpenID-Connect Issuer URL, which is used for automatic provider infodiscovery. E.g. https://login.microsoftonline.com//v2.0" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "auth oidc issuer url", + "output": "auth oidc issuer url refers to OpenID-Connect Issuer URL, which is used for automatic provider infodiscovery. E.g. https://login.microsoftonline.com//v2.0" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "auth oidc issuer url refers to OpenID-Connect Issuer URL, which is used for automatic provider infodiscovery. E.g. https://login.microsoftonline.com//v2.0" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting auth_oidc_issuer_url", + "output": "auth oidc issuer url refers to OpenID-Connect Issuer URL, which is used for automatic provider infodiscovery. E.g. https://login.microsoftonline.com//v2.0" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting auth_oidc_issuer_url", + "output": "auth oidc issuer url refers to OpenID-Connect Issuer URL, which is used for automatic provider infodiscovery. E.g. https://login.microsoftonline.com//v2.0" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "auth_oidc_token_endpoint_url", + "output": "auth oidc token endpoint url refers to OpenID-Connect Token endpoint URL. Setting this is optional and if it's empty, it'll be automatically set by provider info discovery." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "auth_oidc_token_endpoint_url", + "output": "auth oidc token endpoint url refers to OpenID-Connect Token endpoint URL. Setting this is optional and if it's empty, it'll be automatically set by provider info discovery." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "auth oidc token endpoint url", + "output": "auth oidc token endpoint url refers to OpenID-Connect Token endpoint URL. Setting this is optional and if it's empty, it'll be automatically set by provider info discovery." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "auth oidc token endpoint url refers to OpenID-Connect Token endpoint URL. Setting this is optional and if it's empty, it'll be automatically set by provider info discovery." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting auth_oidc_token_endpoint_url", + "output": "auth oidc token endpoint url refers to OpenID-Connect Token endpoint URL. Setting this is optional and if it's empty, it'll be automatically set by provider info discovery." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting auth_oidc_token_endpoint_url", + "output": "auth oidc token endpoint url refers to OpenID-Connect Token endpoint URL. Setting this is optional and if it's empty, it'll be automatically set by provider info discovery." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "auth_oidc_introspection_endpoint_url", + "output": "auth oidc introspection endpoint url refers to OpenID-Connect Token introspection endpoint URL. Setting this is optional and if it's empty, it'll be automatically set by provider info discovery." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "auth_oidc_introspection_endpoint_url", + "output": "auth oidc introspection endpoint url refers to OpenID-Connect Token introspection endpoint URL. Setting this is optional and if it's empty, it'll be automatically set by provider info discovery." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "auth oidc introspection endpoint url", + "output": "auth oidc introspection endpoint url refers to OpenID-Connect Token introspection endpoint URL. Setting this is optional and if it's empty, it'll be automatically set by provider info discovery." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "auth oidc introspection endpoint url refers to OpenID-Connect Token introspection endpoint URL. Setting this is optional and if it's empty, it'll be automatically set by provider info discovery." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting auth_oidc_introspection_endpoint_url", + "output": "auth oidc introspection endpoint url refers to OpenID-Connect Token introspection endpoint URL. Setting this is optional and if it's empty, it'll be automatically set by provider info discovery." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting auth_oidc_introspection_endpoint_url", + "output": "auth oidc introspection endpoint url refers to OpenID-Connect Token introspection endpoint URL. Setting this is optional and if it's empty, it'll be automatically set by provider info discovery." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "auth_oidc_post_logout_url", + "output": "auth oidc post logout url refers to Absolute URL to which user is redirected, after they log out from the application, in case OIDC authentication is used. Usually this is absolute URL of DriverlessAI Login page e.g. https://1.2.3.4:12345/login" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "auth_oidc_post_logout_url", + "output": "auth oidc post logout url refers to Absolute URL to which user is redirected, after they log out from the application, in case OIDC authentication is used. Usually this is absolute URL of DriverlessAI Login page e.g. https://1.2.3.4:12345/login" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "auth oidc post logout url", + "output": "auth oidc post logout url refers to Absolute URL to which user is redirected, after they log out from the application, in case OIDC authentication is used. Usually this is absolute URL of DriverlessAI Login page e.g. https://1.2.3.4:12345/login" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "auth oidc post logout url refers to Absolute URL to which user is redirected, after they log out from the application, in case OIDC authentication is used. Usually this is absolute URL of DriverlessAI Login page e.g. https://1.2.3.4:12345/login" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting auth_oidc_post_logout_url", + "output": "auth oidc post logout url refers to Absolute URL to which user is redirected, after they log out from the application, in case OIDC authentication is used. Usually this is absolute URL of DriverlessAI Login page e.g. https://1.2.3.4:12345/login" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting auth_oidc_post_logout_url", + "output": "auth oidc post logout url refers to Absolute URL to which user is redirected, after they log out from the application, in case OIDC authentication is used. Usually this is absolute URL of DriverlessAI Login page e.g. https://1.2.3.4:12345/login" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "auth_oidc_authorization_query_params", + "output": "auth oidc authorization query params refers to Key-value mapping of extra HTTP query parameters in an OIDC authorization request." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "auth_oidc_authorization_query_params", + "output": "auth oidc authorization query params refers to Key-value mapping of extra HTTP query parameters in an OIDC authorization request." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "auth oidc authorization query params", + "output": "auth oidc authorization query params refers to Key-value mapping of extra HTTP query parameters in an OIDC authorization request." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "auth oidc authorization query params refers to Key-value mapping of extra HTTP query parameters in an OIDC authorization request." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting auth_oidc_authorization_query_params", + "output": "auth oidc authorization query params refers to Key-value mapping of extra HTTP query parameters in an OIDC authorization request." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting auth_oidc_authorization_query_params", + "output": "auth oidc authorization query params refers to Key-value mapping of extra HTTP query parameters in an OIDC authorization request." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "auth_oidc_skip_cert_verification", + "output": "auth oidc skip cert verification refers to When set to True, will skip cert verification." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "auth_oidc_skip_cert_verification", + "output": "auth oidc skip cert verification refers to When set to True, will skip cert verification." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "auth oidc skip cert verification", + "output": "auth oidc skip cert verification refers to When set to True, will skip cert verification." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "auth oidc skip cert verification refers to When set to True, will skip cert verification." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting auth_oidc_skip_cert_verification", + "output": "auth oidc skip cert verification refers to When set to True, will skip cert verification." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting auth_oidc_skip_cert_verification", + "output": "auth oidc skip cert verification refers to When set to True, will skip cert verification." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "auth_oidc_ca_cert_location", + "output": "auth oidc ca cert location refers to When set will use this value as the location for the CA cert, this takes precedence over auth_oidc_skip_cert_verification." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "auth_oidc_ca_cert_location", + "output": "auth oidc ca cert location refers to When set will use this value as the location for the CA cert, this takes precedence over auth_oidc_skip_cert_verification." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "auth oidc ca cert location", + "output": "auth oidc ca cert location refers to When set will use this value as the location for the CA cert, this takes precedence over auth_oidc_skip_cert_verification." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "auth oidc ca cert location refers to When set will use this value as the location for the CA cert, this takes precedence over auth_oidc_skip_cert_verification." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting auth_oidc_ca_cert_location", + "output": "auth oidc ca cert location refers to When set will use this value as the location for the CA cert, this takes precedence over auth_oidc_skip_cert_verification." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting auth_oidc_ca_cert_location", + "output": "auth oidc ca cert location refers to When set will use this value as the location for the CA cert, this takes precedence over auth_oidc_skip_cert_verification." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "api_token_introspection_enabled", + "output": "api token introspection enabled refers to Enables option to use Bearer token for authentication with the RPC endpoint." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "api_token_introspection_enabled", + "output": "api token introspection enabled refers to Enables option to use Bearer token for authentication with the RPC endpoint." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "api token introspection enabled", + "output": "api token introspection enabled refers to Enables option to use Bearer token for authentication with the RPC endpoint." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "api token introspection enabled refers to Enables option to use Bearer token for authentication with the RPC endpoint." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting api_token_introspection_enabled", + "output": "api token introspection enabled refers to Enables option to use Bearer token for authentication with the RPC endpoint." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting api_token_introspection_enabled", + "output": "api token introspection enabled refers to Enables option to use Bearer token for authentication with the RPC endpoint." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "api_token_introspection_method", + "output": "api token introspection method refers to Sets the method that is used to introspect the bearer token. OAUTH2_TOKEN_INTROSPECTION: Uses OAuth 2.0 Token Introspection (RPC 7662) endpoint to introspect the bearer token. This useful when 'openid' is used as the authentication method. Uses 'auth_openid_client_id' and 'auth_openid_client_secret' and to authenticate with the authorization server and `auth_openid_token_introspection_url` to perform the introspection. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "api_token_introspection_method", + "output": "api token introspection method refers to Sets the method that is used to introspect the bearer token. OAUTH2_TOKEN_INTROSPECTION: Uses OAuth 2.0 Token Introspection (RPC 7662) endpoint to introspect the bearer token. This useful when 'openid' is used as the authentication method. Uses 'auth_openid_client_id' and 'auth_openid_client_secret' and to authenticate with the authorization server and `auth_openid_token_introspection_url` to perform the introspection. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "api token introspection method", + "output": "api token introspection method refers to Sets the method that is used to introspect the bearer token. OAUTH2_TOKEN_INTROSPECTION: Uses OAuth 2.0 Token Introspection (RPC 7662) endpoint to introspect the bearer token. This useful when 'openid' is used as the authentication method. Uses 'auth_openid_client_id' and 'auth_openid_client_secret' and to authenticate with the authorization server and `auth_openid_token_introspection_url` to perform the introspection. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "api token introspection method refers to Sets the method that is used to introspect the bearer token. OAUTH2_TOKEN_INTROSPECTION: Uses OAuth 2.0 Token Introspection (RPC 7662) endpoint to introspect the bearer token. This useful when 'openid' is used as the authentication method. Uses 'auth_openid_client_id' and 'auth_openid_client_secret' and to authenticate with the authorization server and `auth_openid_token_introspection_url` to perform the introspection. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting api_token_introspection_method", + "output": "api token introspection method refers to Sets the method that is used to introspect the bearer token. OAUTH2_TOKEN_INTROSPECTION: Uses OAuth 2.0 Token Introspection (RPC 7662) endpoint to introspect the bearer token. This useful when 'openid' is used as the authentication method. Uses 'auth_openid_client_id' and 'auth_openid_client_secret' and to authenticate with the authorization server and `auth_openid_token_introspection_url` to perform the introspection. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting api_token_introspection_method", + "output": "api token introspection method refers to Sets the method that is used to introspect the bearer token. OAUTH2_TOKEN_INTROSPECTION: Uses OAuth 2.0 Token Introspection (RPC 7662) endpoint to introspect the bearer token. This useful when 'openid' is used as the authentication method. Uses 'auth_openid_client_id' and 'auth_openid_client_secret' and to authenticate with the authorization server and `auth_openid_token_introspection_url` to perform the introspection. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "api_token_oauth2_scopes", + "output": "api token oauth2 scopes refers to Sets the minimum of the scopes that the access token needs to have in order to pass the introspection. Space separated./ This is passed to the introspection endpoint and also verified after response for the servers that don't enforce scopes. Keeping this empty turns any the verification off. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "api_token_oauth2_scopes", + "output": "api token oauth2 scopes refers to Sets the minimum of the scopes that the access token needs to have in order to pass the introspection. Space separated./ This is passed to the introspection endpoint and also verified after response for the servers that don't enforce scopes. Keeping this empty turns any the verification off. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "api token oauth2 scopes", + "output": "api token oauth2 scopes refers to Sets the minimum of the scopes that the access token needs to have in order to pass the introspection. Space separated./ This is passed to the introspection endpoint and also verified after response for the servers that don't enforce scopes. Keeping this empty turns any the verification off. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "api token oauth2 scopes refers to Sets the minimum of the scopes that the access token needs to have in order to pass the introspection. Space separated./ This is passed to the introspection endpoint and also verified after response for the servers that don't enforce scopes. Keeping this empty turns any the verification off. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting api_token_oauth2_scopes", + "output": "api token oauth2 scopes refers to Sets the minimum of the scopes that the access token needs to have in order to pass the introspection. Space separated./ This is passed to the introspection endpoint and also verified after response for the servers that don't enforce scopes. Keeping this empty turns any the verification off. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting api_token_oauth2_scopes", + "output": "api token oauth2 scopes refers to Sets the minimum of the scopes that the access token needs to have in order to pass the introspection. Space separated./ This is passed to the introspection endpoint and also verified after response for the servers that don't enforce scopes. Keeping this empty turns any the verification off. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "api_token_oauth2_username_field_name", + "output": "api token oauth2 username field name refers to Which field of the response returned by the token introspection endpoint should be used as a username." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "api_token_oauth2_username_field_name", + "output": "api token oauth2 username field name refers to Which field of the response returned by the token introspection endpoint should be used as a username." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "api token oauth2 username field name", + "output": "api token oauth2 username field name refers to Which field of the response returned by the token introspection endpoint should be used as a username." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "api token oauth2 username field name refers to Which field of the response returned by the token introspection endpoint should be used as a username." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting api_token_oauth2_username_field_name", + "output": "api token oauth2 username field name refers to Which field of the response returned by the token introspection endpoint should be used as a username." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting api_token_oauth2_username_field_name", + "output": "api token oauth2 username field name refers to Which field of the response returned by the token introspection endpoint should be used as a username." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "oauth2_client_tokens_enabled", + "output": "oauth2 client tokens enabled refers to Enables the option to initiate a PKCE flow from the UI in order to obtaintokens usable with Driverless clients" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "oauth2_client_tokens_enabled", + "output": "oauth2 client tokens enabled refers to Enables the option to initiate a PKCE flow from the UI in order to obtaintokens usable with Driverless clients" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "oauth2 client tokens enabled", + "output": "oauth2 client tokens enabled refers to Enables the option to initiate a PKCE flow from the UI in order to obtaintokens usable with Driverless clients" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "oauth2 client tokens enabled refers to Enables the option to initiate a PKCE flow from the UI in order to obtaintokens usable with Driverless clients" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting oauth2_client_tokens_enabled", + "output": "oauth2 client tokens enabled refers to Enables the option to initiate a PKCE flow from the UI in order to obtaintokens usable with Driverless clients" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting oauth2_client_tokens_enabled", + "output": "oauth2 client tokens enabled refers to Enables the option to initiate a PKCE flow from the UI in order to obtaintokens usable with Driverless clients" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "oauth2_client_tokens_client_id", + "output": "oauth2 client tokens client id refers to Sets up client id that will be used in the OAuth 2.0 Authorization Code Flow to obtain the tokens. Client needs to be public and be able to use PKCE with S256 code challenge." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "oauth2_client_tokens_client_id", + "output": "oauth2 client tokens client id refers to Sets up client id that will be used in the OAuth 2.0 Authorization Code Flow to obtain the tokens. Client needs to be public and be able to use PKCE with S256 code challenge." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "oauth2 client tokens client id", + "output": "oauth2 client tokens client id refers to Sets up client id that will be used in the OAuth 2.0 Authorization Code Flow to obtain the tokens. Client needs to be public and be able to use PKCE with S256 code challenge." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "oauth2 client tokens client id refers to Sets up client id that will be used in the OAuth 2.0 Authorization Code Flow to obtain the tokens. Client needs to be public and be able to use PKCE with S256 code challenge." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting oauth2_client_tokens_client_id", + "output": "oauth2 client tokens client id refers to Sets up client id that will be used in the OAuth 2.0 Authorization Code Flow to obtain the tokens. Client needs to be public and be able to use PKCE with S256 code challenge." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting oauth2_client_tokens_client_id", + "output": "oauth2 client tokens client id refers to Sets up client id that will be used in the OAuth 2.0 Authorization Code Flow to obtain the tokens. Client needs to be public and be able to use PKCE with S256 code challenge." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "oauth2_client_tokens_authorize_url", + "output": "oauth2 client tokens authorize url refers to Sets up the absolute url to the authorize endpoint." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "oauth2_client_tokens_authorize_url", + "output": "oauth2 client tokens authorize url refers to Sets up the absolute url to the authorize endpoint." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "oauth2 client tokens authorize url", + "output": "oauth2 client tokens authorize url refers to Sets up the absolute url to the authorize endpoint." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "oauth2 client tokens authorize url refers to Sets up the absolute url to the authorize endpoint." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting oauth2_client_tokens_authorize_url", + "output": "oauth2 client tokens authorize url refers to Sets up the absolute url to the authorize endpoint." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting oauth2_client_tokens_authorize_url", + "output": "oauth2 client tokens authorize url refers to Sets up the absolute url to the authorize endpoint." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "oauth2_client_tokens_token_url", + "output": "oauth2 client tokens token url refers to Sets up the absolute url to the token endpoint." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "oauth2_client_tokens_token_url", + "output": "oauth2 client tokens token url refers to Sets up the absolute url to the token endpoint." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "oauth2 client tokens token url", + "output": "oauth2 client tokens token url refers to Sets up the absolute url to the token endpoint." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "oauth2 client tokens token url refers to Sets up the absolute url to the token endpoint." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting oauth2_client_tokens_token_url", + "output": "oauth2 client tokens token url refers to Sets up the absolute url to the token endpoint." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting oauth2_client_tokens_token_url", + "output": "oauth2 client tokens token url refers to Sets up the absolute url to the token endpoint." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "oauth2_client_tokens_introspection_url", + "output": "oauth2 client tokens introspection url refers to Sets up the absolute url to the token introspection endpoint.It's displayed in the UI so that clients can inspect the token expiration." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "oauth2_client_tokens_introspection_url", + "output": "oauth2 client tokens introspection url refers to Sets up the absolute url to the token introspection endpoint.It's displayed in the UI so that clients can inspect the token expiration." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "oauth2 client tokens introspection url", + "output": "oauth2 client tokens introspection url refers to Sets up the absolute url to the token introspection endpoint.It's displayed in the UI so that clients can inspect the token expiration." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "oauth2 client tokens introspection url refers to Sets up the absolute url to the token introspection endpoint.It's displayed in the UI so that clients can inspect the token expiration." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting oauth2_client_tokens_introspection_url", + "output": "oauth2 client tokens introspection url refers to Sets up the absolute url to the token introspection endpoint.It's displayed in the UI so that clients can inspect the token expiration." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting oauth2_client_tokens_introspection_url", + "output": "oauth2 client tokens introspection url refers to Sets up the absolute url to the token introspection endpoint.It's displayed in the UI so that clients can inspect the token expiration." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "oauth2_client_tokens_redirect_url", + "output": "oauth2 client tokens redirect url refers to Sets up the absolute to the redirect url where Driverless handles the redirect part of the Authorization Code Flow. this /oauth2/client_token" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "oauth2_client_tokens_redirect_url", + "output": "oauth2 client tokens redirect url refers to Sets up the absolute to the redirect url where Driverless handles the redirect part of the Authorization Code Flow. this /oauth2/client_token" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "oauth2 client tokens redirect url", + "output": "oauth2 client tokens redirect url refers to Sets up the absolute to the redirect url where Driverless handles the redirect part of the Authorization Code Flow. this /oauth2/client_token" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "oauth2 client tokens redirect url refers to Sets up the absolute to the redirect url where Driverless handles the redirect part of the Authorization Code Flow. this /oauth2/client_token" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting oauth2_client_tokens_redirect_url", + "output": "oauth2 client tokens redirect url refers to Sets up the absolute to the redirect url where Driverless handles the redirect part of the Authorization Code Flow. this /oauth2/client_token" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting oauth2_client_tokens_redirect_url", + "output": "oauth2 client tokens redirect url refers to Sets up the absolute to the redirect url where Driverless handles the redirect part of the Authorization Code Flow. this /oauth2/client_token" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "oauth2_client_tokens_scope", + "output": "oauth2 client tokens scope refers to Sets up the scope for the requested tokens. Space seprated list." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "oauth2_client_tokens_scope", + "output": "oauth2 client tokens scope refers to Sets up the scope for the requested tokens. Space seprated list." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "oauth2 client tokens scope", + "output": "oauth2 client tokens scope refers to Sets up the scope for the requested tokens. Space seprated list." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "oauth2 client tokens scope refers to Sets up the scope for the requested tokens. Space seprated list." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting oauth2_client_tokens_scope", + "output": "oauth2 client tokens scope refers to Sets up the scope for the requested tokens. Space seprated list." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting oauth2_client_tokens_scope", + "output": "oauth2 client tokens scope refers to Sets up the scope for the requested tokens. Space seprated list." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "ldap_server", + "output": "ldap server refers to ldap server domain or ip" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "ldap_server", + "output": "ldap server refers to ldap server domain or ip" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "ldap server", + "output": "ldap server refers to ldap server domain or ip" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "ldap server refers to ldap server domain or ip" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting ldap_server", + "output": "ldap server refers to ldap server domain or ip" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting ldap_server", + "output": "ldap server refers to ldap server domain or ip" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "ldap_port", + "output": "ldap port refers to ldap server port" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "ldap_port", + "output": "ldap port refers to ldap server port" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "ldap port", + "output": "ldap port refers to ldap server port" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "ldap port refers to ldap server port" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting ldap_port", + "output": "ldap port refers to ldap server port" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting ldap_port", + "output": "ldap port refers to ldap server port" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "ldap_bind_dn", + "output": "ldap bind dn refers to Complete DN of the LDAP bind user" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "ldap_bind_dn", + "output": "ldap bind dn refers to Complete DN of the LDAP bind user" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "ldap bind dn", + "output": "ldap bind dn refers to Complete DN of the LDAP bind user" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "ldap bind dn refers to Complete DN of the LDAP bind user" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting ldap_bind_dn", + "output": "ldap bind dn refers to Complete DN of the LDAP bind user" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting ldap_bind_dn", + "output": "ldap bind dn refers to Complete DN of the LDAP bind user" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "ldap_bind_password", + "output": "ldap bind password refers to Password for the LDAP bind" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "ldap_bind_password", + "output": "ldap bind password refers to Password for the LDAP bind" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "ldap bind password", + "output": "ldap bind password refers to Password for the LDAP bind" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "ldap bind password refers to Password for the LDAP bind" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting ldap_bind_password", + "output": "ldap bind password refers to Password for the LDAP bind" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting ldap_bind_password", + "output": "ldap bind password refers to Password for the LDAP bind" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "ldap_tls_file", + "output": "ldap tls file refers to Provide Cert file location" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "ldap_tls_file", + "output": "ldap tls file refers to Provide Cert file location" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "ldap tls file", + "output": "ldap tls file refers to Provide Cert file location" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "ldap tls file refers to Provide Cert file location" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting ldap_tls_file", + "output": "ldap tls file refers to Provide Cert file location" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting ldap_tls_file", + "output": "ldap tls file refers to Provide Cert file location" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "ldap_use_ssl", + "output": "ldap use ssl refers to use true to use ssl or false" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "ldap_use_ssl", + "output": "ldap use ssl refers to use true to use ssl or false" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "ldap use ssl", + "output": "ldap use ssl refers to use true to use ssl or false" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "ldap use ssl refers to use true to use ssl or false" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting ldap_use_ssl", + "output": "ldap use ssl refers to use true to use ssl or false" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting ldap_use_ssl", + "output": "ldap use ssl refers to use true to use ssl or false" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "ldap_search_base", + "output": "ldap search base refers to the location in the DIT where the search will start" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "ldap_search_base", + "output": "ldap search base refers to the location in the DIT where the search will start" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "ldap search base", + "output": "ldap search base refers to the location in the DIT where the search will start" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "ldap search base refers to the location in the DIT where the search will start" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting ldap_search_base", + "output": "ldap search base refers to the location in the DIT where the search will start" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting ldap_search_base", + "output": "ldap search base refers to the location in the DIT where the search will start" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "ldap_search_filter", + "output": "ldap search filter refers to A string that describes what you are searching for. You can use Pythonsubstitution to have this constructed dynamically.(only {{DAI_USERNAME}} is supported)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "ldap_search_filter", + "output": "ldap search filter refers to A string that describes what you are searching for. You can use Pythonsubstitution to have this constructed dynamically.(only {{DAI_USERNAME}} is supported)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "ldap search filter", + "output": "ldap search filter refers to A string that describes what you are searching for. You can use Pythonsubstitution to have this constructed dynamically.(only {{DAI_USERNAME}} is supported)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "ldap search filter refers to A string that describes what you are searching for. You can use Pythonsubstitution to have this constructed dynamically.(only {{DAI_USERNAME}} is supported)" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting ldap_search_filter", + "output": "ldap search filter refers to A string that describes what you are searching for. You can use Pythonsubstitution to have this constructed dynamically.(only {{DAI_USERNAME}} is supported)" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting ldap_search_filter", + "output": "ldap search filter refers to A string that describes what you are searching for. You can use Pythonsubstitution to have this constructed dynamically.(only {{DAI_USERNAME}} is supported)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "ldap_search_attributes", + "output": "ldap search attributes refers to ldap attributes to return from search" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "ldap_search_attributes", + "output": "ldap search attributes refers to ldap attributes to return from search" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "ldap search attributes", + "output": "ldap search attributes refers to ldap attributes to return from search" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "ldap search attributes refers to ldap attributes to return from search" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting ldap_search_attributes", + "output": "ldap search attributes refers to ldap attributes to return from search" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting ldap_search_attributes", + "output": "ldap search attributes refers to ldap attributes to return from search" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "ldap_user_name_attribute", + "output": "ldap user name attribute refers to specify key to find user name" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "ldap_user_name_attribute", + "output": "ldap user name attribute refers to specify key to find user name" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "ldap user name attribute", + "output": "ldap user name attribute refers to specify key to find user name" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "ldap user name attribute refers to specify key to find user name" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting ldap_user_name_attribute", + "output": "ldap user name attribute refers to specify key to find user name" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting ldap_user_name_attribute", + "output": "ldap user name attribute refers to specify key to find user name" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "ldap_recipe", + "output": "ldap recipe refers to When using this recipe, needs to be set to \"1\"" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "ldap_recipe", + "output": "ldap recipe refers to When using this recipe, needs to be set to \"1\"" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "ldap recipe", + "output": "ldap recipe refers to When using this recipe, needs to be set to \"1\"" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "ldap recipe refers to When using this recipe, needs to be set to \"1\"" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting ldap_recipe", + "output": "ldap recipe refers to When using this recipe, needs to be set to \"1\"" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting ldap_recipe", + "output": "ldap recipe refers to When using this recipe, needs to be set to \"1\"" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "ldap_user_prefix", + "output": "ldap user prefix refers to Deprecated do not use" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "ldap_user_prefix", + "output": "ldap user prefix refers to Deprecated do not use" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "ldap user prefix", + "output": "ldap user prefix refers to Deprecated do not use" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "ldap user prefix refers to Deprecated do not use" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting ldap_user_prefix", + "output": "ldap user prefix refers to Deprecated do not use" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting ldap_user_prefix", + "output": "ldap user prefix refers to Deprecated do not use" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "ldap_search_user_id", + "output": "ldap search user id refers to Deprecated, Use ldap_bind_dn" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "ldap_search_user_id", + "output": "ldap search user id refers to Deprecated, Use ldap_bind_dn" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "ldap search user id", + "output": "ldap search user id refers to Deprecated, Use ldap_bind_dn" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "ldap search user id refers to Deprecated, Use ldap_bind_dn" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting ldap_search_user_id", + "output": "ldap search user id refers to Deprecated, Use ldap_bind_dn" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting ldap_search_user_id", + "output": "ldap search user id refers to Deprecated, Use ldap_bind_dn" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "ldap_search_password", + "output": "ldap search password refers to Deprecated, ldap_bind_password" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "ldap_search_password", + "output": "ldap search password refers to Deprecated, ldap_bind_password" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "ldap search password", + "output": "ldap search password refers to Deprecated, ldap_bind_password" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "ldap search password refers to Deprecated, ldap_bind_password" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting ldap_search_password", + "output": "ldap search password refers to Deprecated, ldap_bind_password" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting ldap_search_password", + "output": "ldap search password refers to Deprecated, ldap_bind_password" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "ldap_ou_dn", + "output": "ldap ou dn refers to Deprecated, use ldap_search_base instead" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "ldap_ou_dn", + "output": "ldap ou dn refers to Deprecated, use ldap_search_base instead" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "ldap ou dn", + "output": "ldap ou dn refers to Deprecated, use ldap_search_base instead" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "ldap ou dn refers to Deprecated, use ldap_search_base instead" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting ldap_ou_dn", + "output": "ldap ou dn refers to Deprecated, use ldap_search_base instead" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting ldap_ou_dn", + "output": "ldap ou dn refers to Deprecated, use ldap_search_base instead" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "ldap_dc", + "output": "ldap dc refers to Deprecated, use ldap_base_dn" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "ldap_dc", + "output": "ldap dc refers to Deprecated, use ldap_base_dn" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "ldap dc", + "output": "ldap dc refers to Deprecated, use ldap_base_dn" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "ldap dc refers to Deprecated, use ldap_base_dn" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting ldap_dc", + "output": "ldap dc refers to Deprecated, use ldap_base_dn" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting ldap_dc", + "output": "ldap dc refers to Deprecated, use ldap_base_dn" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "ldap_base_dn", + "output": "ldap base dn refers to Deprecated, use ldap_search_base" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "ldap_base_dn", + "output": "ldap base dn refers to Deprecated, use ldap_search_base" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "ldap base dn", + "output": "ldap base dn refers to Deprecated, use ldap_search_base" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "ldap base dn refers to Deprecated, use ldap_search_base" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting ldap_base_dn", + "output": "ldap base dn refers to Deprecated, use ldap_search_base" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting ldap_base_dn", + "output": "ldap base dn refers to Deprecated, use ldap_search_base" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "ldap_base_filter", + "output": "ldap base filter refers to Deprecated, use ldap_search_filter" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "ldap_base_filter", + "output": "ldap base filter refers to Deprecated, use ldap_search_filter" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "ldap base filter", + "output": "ldap base filter refers to Deprecated, use ldap_search_filter" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "ldap base filter refers to Deprecated, use ldap_search_filter" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting ldap_base_filter", + "output": "ldap base filter refers to Deprecated, use ldap_search_filter" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting ldap_base_filter", + "output": "ldap base filter refers to Deprecated, use ldap_search_filter" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "auth_tls_crl_file", + "output": "auth tls crl file refers to Path to the CRL file that will be used to verify client certificate." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "auth_tls_crl_file", + "output": "auth tls crl file refers to Path to the CRL file that will be used to verify client certificate." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "auth tls crl file", + "output": "auth tls crl file refers to Path to the CRL file that will be used to verify client certificate." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "auth tls crl file refers to Path to the CRL file that will be used to verify client certificate." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting auth_tls_crl_file", + "output": "auth tls crl file refers to Path to the CRL file that will be used to verify client certificate." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting auth_tls_crl_file", + "output": "auth tls crl file refers to Path to the CRL file that will be used to verify client certificate." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "auth_tls_subject_field", + "output": "auth tls subject field refers to What field of the subject would used as source for username or other values used for further validation." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "auth_tls_subject_field", + "output": "auth tls subject field refers to What field of the subject would used as source for username or other values used for further validation." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "auth tls subject field", + "output": "auth tls subject field refers to What field of the subject would used as source for username or other values used for further validation." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "auth tls subject field refers to What field of the subject would used as source for username or other values used for further validation." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting auth_tls_subject_field", + "output": "auth tls subject field refers to What field of the subject would used as source for username or other values used for further validation." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting auth_tls_subject_field", + "output": "auth tls subject field refers to What field of the subject would used as source for username or other values used for further validation." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "auth_tls_field_parse_regexp", + "output": "auth tls field parse regexp refers to Regular expression that will be used to parse subject field to obtain the username or other values used for further validation." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "auth_tls_field_parse_regexp", + "output": "auth tls field parse regexp refers to Regular expression that will be used to parse subject field to obtain the username or other values used for further validation." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "auth tls field parse regexp", + "output": "auth tls field parse regexp refers to Regular expression that will be used to parse subject field to obtain the username or other values used for further validation." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "auth tls field parse regexp refers to Regular expression that will be used to parse subject field to obtain the username or other values used for further validation." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting auth_tls_field_parse_regexp", + "output": "auth tls field parse regexp refers to Regular expression that will be used to parse subject field to obtain the username or other values used for further validation." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting auth_tls_field_parse_regexp", + "output": "auth tls field parse regexp refers to Regular expression that will be used to parse subject field to obtain the username or other values used for further validation." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "auth_tls_user_lookup", + "output": "auth tls user lookup refers to Sets up the way how user identity would be obtained REGEXP_ONLY: Will use 'auth_tls_subject_field' and 'auth_tls_field_parse_regexp' to extract the username from the client certificate. LDAP_LOOKUP: Will use LDAP server to lookup for the username. 'auth_tls_ldap_server', 'auth_tls_ldap_port', 'auth_tls_ldap_use_ssl', 'auth_tls_ldap_tls_file', 'auth_tls_ldap_bind_dn', 'auth_tls_ldap_bind_password' options are used to establish the connection with the LDAP server. 'auth_tls_subject_field' and 'auth_tls_field_parse_regexp' options are used to parse the certificate. 'auth_tls_ldap_search_base', 'auth_tls_ldap_search_filter', and 'auth_tls_ldap_username_attribute' options are used to do the lookup. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "auth_tls_user_lookup", + "output": "auth tls user lookup refers to Sets up the way how user identity would be obtained REGEXP_ONLY: Will use 'auth_tls_subject_field' and 'auth_tls_field_parse_regexp' to extract the username from the client certificate. LDAP_LOOKUP: Will use LDAP server to lookup for the username. 'auth_tls_ldap_server', 'auth_tls_ldap_port', 'auth_tls_ldap_use_ssl', 'auth_tls_ldap_tls_file', 'auth_tls_ldap_bind_dn', 'auth_tls_ldap_bind_password' options are used to establish the connection with the LDAP server. 'auth_tls_subject_field' and 'auth_tls_field_parse_regexp' options are used to parse the certificate. 'auth_tls_ldap_search_base', 'auth_tls_ldap_search_filter', and 'auth_tls_ldap_username_attribute' options are used to do the lookup. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "auth tls user lookup", + "output": "auth tls user lookup refers to Sets up the way how user identity would be obtained REGEXP_ONLY: Will use 'auth_tls_subject_field' and 'auth_tls_field_parse_regexp' to extract the username from the client certificate. LDAP_LOOKUP: Will use LDAP server to lookup for the username. 'auth_tls_ldap_server', 'auth_tls_ldap_port', 'auth_tls_ldap_use_ssl', 'auth_tls_ldap_tls_file', 'auth_tls_ldap_bind_dn', 'auth_tls_ldap_bind_password' options are used to establish the connection with the LDAP server. 'auth_tls_subject_field' and 'auth_tls_field_parse_regexp' options are used to parse the certificate. 'auth_tls_ldap_search_base', 'auth_tls_ldap_search_filter', and 'auth_tls_ldap_username_attribute' options are used to do the lookup. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "auth tls user lookup refers to Sets up the way how user identity would be obtained REGEXP_ONLY: Will use 'auth_tls_subject_field' and 'auth_tls_field_parse_regexp' to extract the username from the client certificate. LDAP_LOOKUP: Will use LDAP server to lookup for the username. 'auth_tls_ldap_server', 'auth_tls_ldap_port', 'auth_tls_ldap_use_ssl', 'auth_tls_ldap_tls_file', 'auth_tls_ldap_bind_dn', 'auth_tls_ldap_bind_password' options are used to establish the connection with the LDAP server. 'auth_tls_subject_field' and 'auth_tls_field_parse_regexp' options are used to parse the certificate. 'auth_tls_ldap_search_base', 'auth_tls_ldap_search_filter', and 'auth_tls_ldap_username_attribute' options are used to do the lookup. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting auth_tls_user_lookup", + "output": "auth tls user lookup refers to Sets up the way how user identity would be obtained REGEXP_ONLY: Will use 'auth_tls_subject_field' and 'auth_tls_field_parse_regexp' to extract the username from the client certificate. LDAP_LOOKUP: Will use LDAP server to lookup for the username. 'auth_tls_ldap_server', 'auth_tls_ldap_port', 'auth_tls_ldap_use_ssl', 'auth_tls_ldap_tls_file', 'auth_tls_ldap_bind_dn', 'auth_tls_ldap_bind_password' options are used to establish the connection with the LDAP server. 'auth_tls_subject_field' and 'auth_tls_field_parse_regexp' options are used to parse the certificate. 'auth_tls_ldap_search_base', 'auth_tls_ldap_search_filter', and 'auth_tls_ldap_username_attribute' options are used to do the lookup. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting auth_tls_user_lookup", + "output": "auth tls user lookup refers to Sets up the way how user identity would be obtained REGEXP_ONLY: Will use 'auth_tls_subject_field' and 'auth_tls_field_parse_regexp' to extract the username from the client certificate. LDAP_LOOKUP: Will use LDAP server to lookup for the username. 'auth_tls_ldap_server', 'auth_tls_ldap_port', 'auth_tls_ldap_use_ssl', 'auth_tls_ldap_tls_file', 'auth_tls_ldap_bind_dn', 'auth_tls_ldap_bind_password' options are used to establish the connection with the LDAP server. 'auth_tls_subject_field' and 'auth_tls_field_parse_regexp' options are used to parse the certificate. 'auth_tls_ldap_search_base', 'auth_tls_ldap_search_filter', and 'auth_tls_ldap_username_attribute' options are used to do the lookup. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "auth_tls_ldap_server", + "output": "auth tls ldap server refers to Hostname or IP address of the LDAP server used with LDAP_LOOKUP with 'tls_certificate' authentication method." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "auth_tls_ldap_server", + "output": "auth tls ldap server refers to Hostname or IP address of the LDAP server used with LDAP_LOOKUP with 'tls_certificate' authentication method." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "auth tls ldap server", + "output": "auth tls ldap server refers to Hostname or IP address of the LDAP server used with LDAP_LOOKUP with 'tls_certificate' authentication method." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "auth tls ldap server refers to Hostname or IP address of the LDAP server used with LDAP_LOOKUP with 'tls_certificate' authentication method." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting auth_tls_ldap_server", + "output": "auth tls ldap server refers to Hostname or IP address of the LDAP server used with LDAP_LOOKUP with 'tls_certificate' authentication method." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting auth_tls_ldap_server", + "output": "auth tls ldap server refers to Hostname or IP address of the LDAP server used with LDAP_LOOKUP with 'tls_certificate' authentication method." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "auth_tls_ldap_port", + "output": "auth tls ldap port refers to Port of the LDAP server used with LDAP_LOOKUP with 'tls_certificate' authentication method." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "auth_tls_ldap_port", + "output": "auth tls ldap port refers to Port of the LDAP server used with LDAP_LOOKUP with 'tls_certificate' authentication method." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "auth tls ldap port", + "output": "auth tls ldap port refers to Port of the LDAP server used with LDAP_LOOKUP with 'tls_certificate' authentication method." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "auth tls ldap port refers to Port of the LDAP server used with LDAP_LOOKUP with 'tls_certificate' authentication method." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting auth_tls_ldap_port", + "output": "auth tls ldap port refers to Port of the LDAP server used with LDAP_LOOKUP with 'tls_certificate' authentication method." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting auth_tls_ldap_port", + "output": "auth tls ldap port refers to Port of the LDAP server used with LDAP_LOOKUP with 'tls_certificate' authentication method." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "auth_tls_ldap_use_ssl", + "output": "auth tls ldap use ssl refers to Whether to SSL to when connecting to the LDAP server used with LDAP_LOOKUP with 'tls_certificate' authentication method." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "auth_tls_ldap_use_ssl", + "output": "auth tls ldap use ssl refers to Whether to SSL to when connecting to the LDAP server used with LDAP_LOOKUP with 'tls_certificate' authentication method." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "auth tls ldap use ssl", + "output": "auth tls ldap use ssl refers to Whether to SSL to when connecting to the LDAP server used with LDAP_LOOKUP with 'tls_certificate' authentication method." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "auth tls ldap use ssl refers to Whether to SSL to when connecting to the LDAP server used with LDAP_LOOKUP with 'tls_certificate' authentication method." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting auth_tls_ldap_use_ssl", + "output": "auth tls ldap use ssl refers to Whether to SSL to when connecting to the LDAP server used with LDAP_LOOKUP with 'tls_certificate' authentication method." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting auth_tls_ldap_use_ssl", + "output": "auth tls ldap use ssl refers to Whether to SSL to when connecting to the LDAP server used with LDAP_LOOKUP with 'tls_certificate' authentication method." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "auth_tls_ldap_tls_file", + "output": "auth tls ldap tls file refers to Path to the SSL certificate used with LDAP_LOOKUP with 'tls_certificate' authentication method." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "auth_tls_ldap_tls_file", + "output": "auth tls ldap tls file refers to Path to the SSL certificate used with LDAP_LOOKUP with 'tls_certificate' authentication method." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "auth tls ldap tls file", + "output": "auth tls ldap tls file refers to Path to the SSL certificate used with LDAP_LOOKUP with 'tls_certificate' authentication method." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "auth tls ldap tls file refers to Path to the SSL certificate used with LDAP_LOOKUP with 'tls_certificate' authentication method." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting auth_tls_ldap_tls_file", + "output": "auth tls ldap tls file refers to Path to the SSL certificate used with LDAP_LOOKUP with 'tls_certificate' authentication method." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting auth_tls_ldap_tls_file", + "output": "auth tls ldap tls file refers to Path to the SSL certificate used with LDAP_LOOKUP with 'tls_certificate' authentication method." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "auth_tls_ldap_bind_dn", + "output": "auth tls ldap bind dn refers to Complete DN of the LDAP bind user used with LDAP_LOOKUP with 'tls_certificate' authentication method." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "auth_tls_ldap_bind_dn", + "output": "auth tls ldap bind dn refers to Complete DN of the LDAP bind user used with LDAP_LOOKUP with 'tls_certificate' authentication method." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "auth tls ldap bind dn", + "output": "auth tls ldap bind dn refers to Complete DN of the LDAP bind user used with LDAP_LOOKUP with 'tls_certificate' authentication method." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "auth tls ldap bind dn refers to Complete DN of the LDAP bind user used with LDAP_LOOKUP with 'tls_certificate' authentication method." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting auth_tls_ldap_bind_dn", + "output": "auth tls ldap bind dn refers to Complete DN of the LDAP bind user used with LDAP_LOOKUP with 'tls_certificate' authentication method." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting auth_tls_ldap_bind_dn", + "output": "auth tls ldap bind dn refers to Complete DN of the LDAP bind user used with LDAP_LOOKUP with 'tls_certificate' authentication method." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "auth_tls_ldap_bind_password", + "output": "auth tls ldap bind password refers to Password for the LDAP bind used with LDAP_LOOKUP with 'tls_certificate' authentication method." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "auth_tls_ldap_bind_password", + "output": "auth tls ldap bind password refers to Password for the LDAP bind used with LDAP_LOOKUP with 'tls_certificate' authentication method." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "auth tls ldap bind password", + "output": "auth tls ldap bind password refers to Password for the LDAP bind used with LDAP_LOOKUP with 'tls_certificate' authentication method." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "auth tls ldap bind password refers to Password for the LDAP bind used with LDAP_LOOKUP with 'tls_certificate' authentication method." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting auth_tls_ldap_bind_password", + "output": "auth tls ldap bind password refers to Password for the LDAP bind used with LDAP_LOOKUP with 'tls_certificate' authentication method." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting auth_tls_ldap_bind_password", + "output": "auth tls ldap bind password refers to Password for the LDAP bind used with LDAP_LOOKUP with 'tls_certificate' authentication method." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "auth_tls_ldap_search_base", + "output": "auth tls ldap search base refers to Location in the DIT where the search will start used with LDAP_LOOKUP with 'tls_certificate' authentication method." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "auth_tls_ldap_search_base", + "output": "auth tls ldap search base refers to Location in the DIT where the search will start used with LDAP_LOOKUP with 'tls_certificate' authentication method." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "auth tls ldap search base", + "output": "auth tls ldap search base refers to Location in the DIT where the search will start used with LDAP_LOOKUP with 'tls_certificate' authentication method." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "auth tls ldap search base refers to Location in the DIT where the search will start used with LDAP_LOOKUP with 'tls_certificate' authentication method." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting auth_tls_ldap_search_base", + "output": "auth tls ldap search base refers to Location in the DIT where the search will start used with LDAP_LOOKUP with 'tls_certificate' authentication method." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting auth_tls_ldap_search_base", + "output": "auth tls ldap search base refers to Location in the DIT where the search will start used with LDAP_LOOKUP with 'tls_certificate' authentication method." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "auth_tls_ldap_search_filter", + "output": "auth tls ldap search filter refers to LDAP filter that will be used to lookup for the user with LDAP_LOOKUP with 'tls_certificate' authentication method. Can be built dynamically using the named capturing groups from the 'auth_tls_field_parse_regexp' for substitution. Example: ``auth_tls_field_parse_regexp=\"\\w+ (?P\\d+)\"`` ``auth_tls_ldap_search_filter=\"(&(objectClass=person)(id={{id}}))\"`` " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "auth_tls_ldap_search_filter", + "output": "auth tls ldap search filter refers to LDAP filter that will be used to lookup for the user with LDAP_LOOKUP with 'tls_certificate' authentication method. Can be built dynamically using the named capturing groups from the 'auth_tls_field_parse_regexp' for substitution. Example: ``auth_tls_field_parse_regexp=\"\\w+ (?P\\d+)\"`` ``auth_tls_ldap_search_filter=\"(&(objectClass=person)(id={{id}}))\"`` " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "auth tls ldap search filter", + "output": "auth tls ldap search filter refers to LDAP filter that will be used to lookup for the user with LDAP_LOOKUP with 'tls_certificate' authentication method. Can be built dynamically using the named capturing groups from the 'auth_tls_field_parse_regexp' for substitution. Example: ``auth_tls_field_parse_regexp=\"\\w+ (?P\\d+)\"`` ``auth_tls_ldap_search_filter=\"(&(objectClass=person)(id={{id}}))\"`` " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "auth tls ldap search filter refers to LDAP filter that will be used to lookup for the user with LDAP_LOOKUP with 'tls_certificate' authentication method. Can be built dynamically using the named capturing groups from the 'auth_tls_field_parse_regexp' for substitution. Example: ``auth_tls_field_parse_regexp=\"\\w+ (?P\\d+)\"`` ``auth_tls_ldap_search_filter=\"(&(objectClass=person)(id={{id}}))\"`` " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting auth_tls_ldap_search_filter", + "output": "auth tls ldap search filter refers to LDAP filter that will be used to lookup for the user with LDAP_LOOKUP with 'tls_certificate' authentication method. Can be built dynamically using the named capturing groups from the 'auth_tls_field_parse_regexp' for substitution. Example: ``auth_tls_field_parse_regexp=\"\\w+ (?P\\d+)\"`` ``auth_tls_ldap_search_filter=\"(&(objectClass=person)(id={{id}}))\"`` " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting auth_tls_ldap_search_filter", + "output": "auth tls ldap search filter refers to LDAP filter that will be used to lookup for the user with LDAP_LOOKUP with 'tls_certificate' authentication method. Can be built dynamically using the named capturing groups from the 'auth_tls_field_parse_regexp' for substitution. Example: ``auth_tls_field_parse_regexp=\"\\w+ (?P\\d+)\"`` ``auth_tls_ldap_search_filter=\"(&(objectClass=person)(id={{id}}))\"`` " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "auth_tls_ldap_username_attribute", + "output": "auth tls ldap username attribute refers to Specified what LDAP record attribute will be used as username with LDAP_LOOKUP with 'tls_certificate' authentication method." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "auth_tls_ldap_username_attribute", + "output": "auth tls ldap username attribute refers to Specified what LDAP record attribute will be used as username with LDAP_LOOKUP with 'tls_certificate' authentication method." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "auth tls ldap username attribute", + "output": "auth tls ldap username attribute refers to Specified what LDAP record attribute will be used as username with LDAP_LOOKUP with 'tls_certificate' authentication method." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "auth tls ldap username attribute refers to Specified what LDAP record attribute will be used as username with LDAP_LOOKUP with 'tls_certificate' authentication method." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting auth_tls_ldap_username_attribute", + "output": "auth tls ldap username attribute refers to Specified what LDAP record attribute will be used as username with LDAP_LOOKUP with 'tls_certificate' authentication method." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting auth_tls_ldap_username_attribute", + "output": "auth tls ldap username attribute refers to Specified what LDAP record attribute will be used as username with LDAP_LOOKUP with 'tls_certificate' authentication method." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "auth_tls_ldap_authorization_lookup_filter", + "output": "auth tls ldap authorization lookup filter refers to Sets optional additional lookup filter that is performed after the user is found. This can be used for example to check whether the is member of particular group. Filter can be built dynamically from the attributes returned by the lookup. Authorization fails when search does not return any entry. If one ore more entries are returned authorization succeeds. Example: ``auth_tls_field_parse_regexp=\"\\w+ (?P\\d+)\"`` ``ldap_search_filter=\"(&(objectClass=person)(id={{id}}))\"`` ``auth_tls_ldap_authorization_lookup_filter=\"(&(objectClass=group)(member=uid={{uid}},dc=example,dc=com))\"`` If this option is empty no additional lookup is done and just a successful user lookup is enough to authorize the user. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "auth_tls_ldap_authorization_lookup_filter", + "output": "auth tls ldap authorization lookup filter refers to Sets optional additional lookup filter that is performed after the user is found. This can be used for example to check whether the is member of particular group. Filter can be built dynamically from the attributes returned by the lookup. Authorization fails when search does not return any entry. If one ore more entries are returned authorization succeeds. Example: ``auth_tls_field_parse_regexp=\"\\w+ (?P\\d+)\"`` ``ldap_search_filter=\"(&(objectClass=person)(id={{id}}))\"`` ``auth_tls_ldap_authorization_lookup_filter=\"(&(objectClass=group)(member=uid={{uid}},dc=example,dc=com))\"`` If this option is empty no additional lookup is done and just a successful user lookup is enough to authorize the user. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "auth tls ldap authorization lookup filter", + "output": "auth tls ldap authorization lookup filter refers to Sets optional additional lookup filter that is performed after the user is found. This can be used for example to check whether the is member of particular group. Filter can be built dynamically from the attributes returned by the lookup. Authorization fails when search does not return any entry. If one ore more entries are returned authorization succeeds. Example: ``auth_tls_field_parse_regexp=\"\\w+ (?P\\d+)\"`` ``ldap_search_filter=\"(&(objectClass=person)(id={{id}}))\"`` ``auth_tls_ldap_authorization_lookup_filter=\"(&(objectClass=group)(member=uid={{uid}},dc=example,dc=com))\"`` If this option is empty no additional lookup is done and just a successful user lookup is enough to authorize the user. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "auth tls ldap authorization lookup filter refers to Sets optional additional lookup filter that is performed after the user is found. This can be used for example to check whether the is member of particular group. Filter can be built dynamically from the attributes returned by the lookup. Authorization fails when search does not return any entry. If one ore more entries are returned authorization succeeds. Example: ``auth_tls_field_parse_regexp=\"\\w+ (?P\\d+)\"`` ``ldap_search_filter=\"(&(objectClass=person)(id={{id}}))\"`` ``auth_tls_ldap_authorization_lookup_filter=\"(&(objectClass=group)(member=uid={{uid}},dc=example,dc=com))\"`` If this option is empty no additional lookup is done and just a successful user lookup is enough to authorize the user. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting auth_tls_ldap_authorization_lookup_filter", + "output": "auth tls ldap authorization lookup filter refers to Sets optional additional lookup filter that is performed after the user is found. This can be used for example to check whether the is member of particular group. Filter can be built dynamically from the attributes returned by the lookup. Authorization fails when search does not return any entry. If one ore more entries are returned authorization succeeds. Example: ``auth_tls_field_parse_regexp=\"\\w+ (?P\\d+)\"`` ``ldap_search_filter=\"(&(objectClass=person)(id={{id}}))\"`` ``auth_tls_ldap_authorization_lookup_filter=\"(&(objectClass=group)(member=uid={{uid}},dc=example,dc=com))\"`` If this option is empty no additional lookup is done and just a successful user lookup is enough to authorize the user. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting auth_tls_ldap_authorization_lookup_filter", + "output": "auth tls ldap authorization lookup filter refers to Sets optional additional lookup filter that is performed after the user is found. This can be used for example to check whether the is member of particular group. Filter can be built dynamically from the attributes returned by the lookup. Authorization fails when search does not return any entry. If one ore more entries are returned authorization succeeds. Example: ``auth_tls_field_parse_regexp=\"\\w+ (?P\\d+)\"`` ``ldap_search_filter=\"(&(objectClass=person)(id={{id}}))\"`` ``auth_tls_ldap_authorization_lookup_filter=\"(&(objectClass=group)(member=uid={{uid}},dc=example,dc=com))\"`` If this option is empty no additional lookup is done and just a successful user lookup is enough to authorize the user. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "auth_tls_ldap_authorization_search_base", + "output": "auth tls ldap authorization search base refers to Base DN where to start the Authorization lookup. Used when 'auth_tls_ldap_authorization_lookup_filter' is set." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "auth_tls_ldap_authorization_search_base", + "output": "auth tls ldap authorization search base refers to Base DN where to start the Authorization lookup. Used when 'auth_tls_ldap_authorization_lookup_filter' is set." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "auth tls ldap authorization search base", + "output": "auth tls ldap authorization search base refers to Base DN where to start the Authorization lookup. Used when 'auth_tls_ldap_authorization_lookup_filter' is set." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "auth tls ldap authorization search base refers to Base DN where to start the Authorization lookup. Used when 'auth_tls_ldap_authorization_lookup_filter' is set." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting auth_tls_ldap_authorization_search_base", + "output": "auth tls ldap authorization search base refers to Base DN where to start the Authorization lookup. Used when 'auth_tls_ldap_authorization_lookup_filter' is set." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting auth_tls_ldap_authorization_search_base", + "output": "auth tls ldap authorization search base refers to Base DN where to start the Authorization lookup. Used when 'auth_tls_ldap_authorization_lookup_filter' is set." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "auth_jwt_token_source", + "output": "auth jwt token source refers to Sets up the way how the token will picked from the request COOKIE: Will use 'auth_jwt_cookie_name' cookie content parsed with 'auth_jwt_source_parse_regexp' to obtain the token content. HEADER: Will use 'auth_jwt_header_name' header value parsed with 'auth_jwt_source_parse_regexp' to obtain the token content. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "auth_jwt_token_source", + "output": "auth jwt token source refers to Sets up the way how the token will picked from the request COOKIE: Will use 'auth_jwt_cookie_name' cookie content parsed with 'auth_jwt_source_parse_regexp' to obtain the token content. HEADER: Will use 'auth_jwt_header_name' header value parsed with 'auth_jwt_source_parse_regexp' to obtain the token content. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "auth jwt token source", + "output": "auth jwt token source refers to Sets up the way how the token will picked from the request COOKIE: Will use 'auth_jwt_cookie_name' cookie content parsed with 'auth_jwt_source_parse_regexp' to obtain the token content. HEADER: Will use 'auth_jwt_header_name' header value parsed with 'auth_jwt_source_parse_regexp' to obtain the token content. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "auth jwt token source refers to Sets up the way how the token will picked from the request COOKIE: Will use 'auth_jwt_cookie_name' cookie content parsed with 'auth_jwt_source_parse_regexp' to obtain the token content. HEADER: Will use 'auth_jwt_header_name' header value parsed with 'auth_jwt_source_parse_regexp' to obtain the token content. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting auth_jwt_token_source", + "output": "auth jwt token source refers to Sets up the way how the token will picked from the request COOKIE: Will use 'auth_jwt_cookie_name' cookie content parsed with 'auth_jwt_source_parse_regexp' to obtain the token content. HEADER: Will use 'auth_jwt_header_name' header value parsed with 'auth_jwt_source_parse_regexp' to obtain the token content. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting auth_jwt_token_source", + "output": "auth jwt token source refers to Sets up the way how the token will picked from the request COOKIE: Will use 'auth_jwt_cookie_name' cookie content parsed with 'auth_jwt_source_parse_regexp' to obtain the token content. HEADER: Will use 'auth_jwt_header_name' header value parsed with 'auth_jwt_source_parse_regexp' to obtain the token content. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "auth_jwt_cookie_name", + "output": "auth jwt cookie name refers to Specifies name of the cookie that will be used to obtain JWT." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "auth_jwt_cookie_name", + "output": "auth jwt cookie name refers to Specifies name of the cookie that will be used to obtain JWT." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "auth jwt cookie name", + "output": "auth jwt cookie name refers to Specifies name of the cookie that will be used to obtain JWT." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "auth jwt cookie name refers to Specifies name of the cookie that will be used to obtain JWT." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting auth_jwt_cookie_name", + "output": "auth jwt cookie name refers to Specifies name of the cookie that will be used to obtain JWT." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting auth_jwt_cookie_name", + "output": "auth jwt cookie name refers to Specifies name of the cookie that will be used to obtain JWT." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "auth_jwt_header_name", + "output": "auth jwt header name refers to Specifies name http header that will be used to obtain JWT" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "auth_jwt_header_name", + "output": "auth jwt header name refers to Specifies name http header that will be used to obtain JWT" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "auth jwt header name", + "output": "auth jwt header name refers to Specifies name http header that will be used to obtain JWT" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "auth jwt header name refers to Specifies name http header that will be used to obtain JWT" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting auth_jwt_header_name", + "output": "auth jwt header name refers to Specifies name http header that will be used to obtain JWT" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting auth_jwt_header_name", + "output": "auth jwt header name refers to Specifies name http header that will be used to obtain JWT" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "auth_jwt_source_parse_regexp", + "output": "auth jwt source parse regexp refers to Regular expression that will be used to parse JWT source. Expression is in Python syntax and must contain named group 'token' with capturing the token value." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "auth_jwt_source_parse_regexp", + "output": "auth jwt source parse regexp refers to Regular expression that will be used to parse JWT source. Expression is in Python syntax and must contain named group 'token' with capturing the token value." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "auth jwt source parse regexp", + "output": "auth jwt source parse regexp refers to Regular expression that will be used to parse JWT source. Expression is in Python syntax and must contain named group 'token' with capturing the token value." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "auth jwt source parse regexp refers to Regular expression that will be used to parse JWT source. Expression is in Python syntax and must contain named group 'token' with capturing the token value." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting auth_jwt_source_parse_regexp", + "output": "auth jwt source parse regexp refers to Regular expression that will be used to parse JWT source. Expression is in Python syntax and must contain named group 'token' with capturing the token value." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting auth_jwt_source_parse_regexp", + "output": "auth jwt source parse regexp refers to Regular expression that will be used to parse JWT source. Expression is in Python syntax and must contain named group 'token' with capturing the token value." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "auth_jwt_username_claim_name", + "output": "auth jwt username claim name refers to Which JWT claim will be used as username for Driverless." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "auth_jwt_username_claim_name", + "output": "auth jwt username claim name refers to Which JWT claim will be used as username for Driverless." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "auth jwt username claim name", + "output": "auth jwt username claim name refers to Which JWT claim will be used as username for Driverless." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "auth jwt username claim name refers to Which JWT claim will be used as username for Driverless." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting auth_jwt_username_claim_name", + "output": "auth jwt username claim name refers to Which JWT claim will be used as username for Driverless." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting auth_jwt_username_claim_name", + "output": "auth jwt username claim name refers to Which JWT claim will be used as username for Driverless." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "auth_jwt_verify", + "output": "auth jwt verify refers to Whether to verify the signature of the JWT." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "auth_jwt_verify", + "output": "auth jwt verify refers to Whether to verify the signature of the JWT." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "auth jwt verify", + "output": "auth jwt verify refers to Whether to verify the signature of the JWT." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "auth jwt verify refers to Whether to verify the signature of the JWT." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting auth_jwt_verify", + "output": "auth jwt verify refers to Whether to verify the signature of the JWT." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting auth_jwt_verify", + "output": "auth jwt verify refers to Whether to verify the signature of the JWT." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "auth_jwt_algorithm", + "output": "auth jwt algorithm refers to Signature algorithm that will be used to verify the signature according to RFC 7518." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "auth_jwt_algorithm", + "output": "auth jwt algorithm refers to Signature algorithm that will be used to verify the signature according to RFC 7518." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "auth jwt algorithm", + "output": "auth jwt algorithm refers to Signature algorithm that will be used to verify the signature according to RFC 7518." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "auth jwt algorithm refers to Signature algorithm that will be used to verify the signature according to RFC 7518." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting auth_jwt_algorithm", + "output": "auth jwt algorithm refers to Signature algorithm that will be used to verify the signature according to RFC 7518." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting auth_jwt_algorithm", + "output": "auth jwt algorithm refers to Signature algorithm that will be used to verify the signature according to RFC 7518." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "auth_jwt_secret", + "output": "auth jwt secret refers to Specifies the secret content for HMAC or public key for RSA and DSA signature algorithms." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "auth_jwt_secret", + "output": "auth jwt secret refers to Specifies the secret content for HMAC or public key for RSA and DSA signature algorithms." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "auth jwt secret", + "output": "auth jwt secret refers to Specifies the secret content for HMAC or public key for RSA and DSA signature algorithms." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "auth jwt secret refers to Specifies the secret content for HMAC or public key for RSA and DSA signature algorithms." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting auth_jwt_secret", + "output": "auth jwt secret refers to Specifies the secret content for HMAC or public key for RSA and DSA signature algorithms." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting auth_jwt_secret", + "output": "auth jwt secret refers to Specifies the secret content for HMAC or public key for RSA and DSA signature algorithms." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "auth_jwt_exp_leeway_seconds", + "output": "auth jwt exp leeway seconds refers to Number of seconds after JWT still can be accepted if when already expired" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "auth_jwt_exp_leeway_seconds", + "output": "auth jwt exp leeway seconds refers to Number of seconds after JWT still can be accepted if when already expired" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "auth jwt exp leeway seconds", + "output": "auth jwt exp leeway seconds refers to Number of seconds after JWT still can be accepted if when already expired" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "auth jwt exp leeway seconds refers to Number of seconds after JWT still can be accepted if when already expired" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting auth_jwt_exp_leeway_seconds", + "output": "auth jwt exp leeway seconds refers to Number of seconds after JWT still can be accepted if when already expired" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting auth_jwt_exp_leeway_seconds", + "output": "auth jwt exp leeway seconds refers to Number of seconds after JWT still can be accepted if when already expired" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "auth_jwt_required_audience", + "output": "auth jwt required audience refers to List of accepted 'aud' claims for the JWTs. When empty, anyaudience is accepted" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "auth_jwt_required_audience", + "output": "auth jwt required audience refers to List of accepted 'aud' claims for the JWTs. When empty, anyaudience is accepted" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "auth jwt required audience", + "output": "auth jwt required audience refers to List of accepted 'aud' claims for the JWTs. When empty, anyaudience is accepted" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "auth jwt required audience refers to List of accepted 'aud' claims for the JWTs. When empty, anyaudience is accepted" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting auth_jwt_required_audience", + "output": "auth jwt required audience refers to List of accepted 'aud' claims for the JWTs. When empty, anyaudience is accepted" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting auth_jwt_required_audience", + "output": "auth jwt required audience refers to List of accepted 'aud' claims for the JWTs. When empty, anyaudience is accepted" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "auth_jwt_required_issuer", + "output": "auth jwt required issuer refers to Value of the 'iss' claim that JWTs need to have in order to be accepted." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "auth_jwt_required_issuer", + "output": "auth jwt required issuer refers to Value of the 'iss' claim that JWTs need to have in order to be accepted." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "auth jwt required issuer", + "output": "auth jwt required issuer refers to Value of the 'iss' claim that JWTs need to have in order to be accepted." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "auth jwt required issuer refers to Value of the 'iss' claim that JWTs need to have in order to be accepted." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting auth_jwt_required_issuer", + "output": "auth jwt required issuer refers to Value of the 'iss' claim that JWTs need to have in order to be accepted." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting auth_jwt_required_issuer", + "output": "auth jwt required issuer refers to Value of the 'iss' claim that JWTs need to have in order to be accepted." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "local_htpasswd_file", + "output": "local htpasswd file refers to Local password file Generating a htpasswd file: see syntax below ``htpasswd -B '' ''`` note: -B forces use of brcypt, a secure encryption method" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "local_htpasswd_file", + "output": "local htpasswd file refers to Local password file Generating a htpasswd file: see syntax below ``htpasswd -B '' ''`` note: -B forces use of brcypt, a secure encryption method" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "local htpasswd file", + "output": "local htpasswd file refers to Local password file Generating a htpasswd file: see syntax below ``htpasswd -B '' ''`` note: -B forces use of brcypt, a secure encryption method" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "local htpasswd file refers to Local password file Generating a htpasswd file: see syntax below ``htpasswd -B '' ''`` note: -B forces use of brcypt, a secure encryption method" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting local_htpasswd_file", + "output": "local htpasswd file refers to Local password file Generating a htpasswd file: see syntax below ``htpasswd -B '' ''`` note: -B forces use of brcypt, a secure encryption method" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting local_htpasswd_file", + "output": "local htpasswd file refers to Local password file Generating a htpasswd file: see syntax below ``htpasswd -B '' ''`` note: -B forces use of brcypt, a secure encryption method" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "authorization_service", + "output": "authorization service refers to Authorization service name: * local: Authorization is based on config.toml settings such as `local_administrator_list`" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "authorization_service", + "output": "authorization service refers to Authorization service name: Authorization service name: * local: Authorization is based on config.toml settings such as `local_administrator_list`" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "authorization service", + "output": "authorization service refers to Authorization service name: Authorization service name: * local: Authorization is based on config.toml settings such as `local_administrator_list`" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Authorization service name: ", + "output": "authorization service refers to Authorization service name: Authorization service name: * local: Authorization is based on config.toml settings such as `local_administrator_list`" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting authorization_service", + "output": "authorization service refers to Authorization service name: * local: Authorization is based on config.toml settings such as `local_administrator_list`" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting authorization_service", + "output": "authorization service refers to Authorization service name: Authorization service name: * local: Authorization is based on config.toml settings such as `local_administrator_list`" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "local_administrator_list", + "output": "local administrator list refers to List of usernames with admin rights: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "local_administrator_list", + "output": "local administrator list refers to List of usernames with admin rights: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "local administrator list", + "output": "local administrator list refers to List of usernames with admin rights: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "List of usernames with admin rights: ", + "output": "local administrator list refers to List of usernames with admin rights: " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting local_administrator_list", + "output": "local administrator list refers to List of usernames with admin rights: " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting local_administrator_list", + "output": "local administrator list refers to List of usernames with admin rights: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "autodoc_report_name", + "output": "autodoc report name refers to Specify the name of the report." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "autodoc_report_name", + "output": "autodoc report name refers to AutoDoc Name: Specify the name of the report." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "autodoc report name", + "output": "autodoc report name refers to AutoDoc Name: Specify the name of the report." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "AutoDoc Name: ", + "output": "autodoc report name refers to AutoDoc Name: Specify the name of the report." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting autodoc_report_name", + "output": "autodoc report name refers to Specify the name of the report." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting autodoc_report_name", + "output": "autodoc report name refers to AutoDoc Name: Specify the name of the report." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "autodoc_template", + "output": "autodoc template refers to AutoDoc template path. Provide the full path to your custom AutoDoc template or leave as 'default'to generate the standard AutoDoc." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "autodoc_template", + "output": "autodoc template refers to AutoDoc Template Location: AutoDoc template path. Provide the full path to your custom AutoDoc template or leave as 'default'to generate the standard AutoDoc." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "autodoc template", + "output": "autodoc template refers to AutoDoc Template Location: AutoDoc template path. Provide the full path to your custom AutoDoc template or leave as 'default'to generate the standard AutoDoc." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "AutoDoc Template Location: ", + "output": "autodoc template refers to AutoDoc Template Location: AutoDoc template path. Provide the full path to your custom AutoDoc template or leave as 'default'to generate the standard AutoDoc." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting autodoc_template", + "output": "autodoc template refers to AutoDoc template path. Provide the full path to your custom AutoDoc template or leave as 'default'to generate the standard AutoDoc." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting autodoc_template", + "output": "autodoc template refers to AutoDoc Template Location: AutoDoc template path. Provide the full path to your custom AutoDoc template or leave as 'default'to generate the standard AutoDoc." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "autodoc_additional_template_folder", + "output": "autodoc additional template folder refers to Location of the additional AutoDoc templates" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "autodoc_additional_template_folder", + "output": "autodoc additional template folder refers to Location of the additional AutoDoc templates" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "autodoc additional template folder", + "output": "autodoc additional template folder refers to Location of the additional AutoDoc templates" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "autodoc additional template folder refers to Location of the additional AutoDoc templates" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting autodoc_additional_template_folder", + "output": "autodoc additional template folder refers to Location of the additional AutoDoc templates" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting autodoc_additional_template_folder", + "output": "autodoc additional template folder refers to Location of the additional AutoDoc templates" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "autodoc_output_type", + "output": "autodoc output type refers to Specify the AutoDoc output type." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "autodoc_output_type", + "output": "autodoc output type refers to AutoDoc File Output Type: Specify the AutoDoc output type." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "autodoc output type", + "output": "autodoc output type refers to AutoDoc File Output Type: Specify the AutoDoc output type." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "AutoDoc File Output Type: ", + "output": "autodoc output type refers to AutoDoc File Output Type: Specify the AutoDoc output type." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting autodoc_output_type", + "output": "autodoc output type refers to Specify the AutoDoc output type." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting autodoc_output_type", + "output": "autodoc output type refers to AutoDoc File Output Type: Specify the AutoDoc output type." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "autodoc_subtemplate_type", + "output": "autodoc subtemplate type refers to Specify the type of sub-templates to use. Options are 'auto', 'docx' or 'md'." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "autodoc_subtemplate_type", + "output": "autodoc subtemplate type refers to AutoDoc SubTemplate Type: Specify the type of sub-templates to use. Options are 'auto', 'docx' or 'md'." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "autodoc subtemplate type", + "output": "autodoc subtemplate type refers to AutoDoc SubTemplate Type: Specify the type of sub-templates to use. Options are 'auto', 'docx' or 'md'." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "AutoDoc SubTemplate Type: ", + "output": "autodoc subtemplate type refers to AutoDoc SubTemplate Type: Specify the type of sub-templates to use. Options are 'auto', 'docx' or 'md'." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting autodoc_subtemplate_type", + "output": "autodoc subtemplate type refers to Specify the type of sub-templates to use. Options are 'auto', 'docx' or 'md'." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting autodoc_subtemplate_type", + "output": "autodoc subtemplate type refers to AutoDoc SubTemplate Type: Specify the type of sub-templates to use. Options are 'auto', 'docx' or 'md'." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "autodoc_max_cm_size", + "output": "autodoc max cm size refers to Specify the maximum number of classes in the confusion matrix." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "autodoc_max_cm_size", + "output": "autodoc max cm size refers to Confusion Matrix Max Number of Classes: Specify the maximum number of classes in the confusion matrix." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "autodoc max cm size", + "output": "autodoc max cm size refers to Confusion Matrix Max Number of Classes: Specify the maximum number of classes in the confusion matrix." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Confusion Matrix Max Number of Classes: ", + "output": "autodoc max cm size refers to Confusion Matrix Max Number of Classes: Specify the maximum number of classes in the confusion matrix." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting autodoc_max_cm_size", + "output": "autodoc max cm size refers to Specify the maximum number of classes in the confusion matrix." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting autodoc_max_cm_size", + "output": "autodoc max cm size refers to Confusion Matrix Max Number of Classes: Specify the maximum number of classes in the confusion matrix." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "autodoc_num_features", + "output": "autodoc num features refers to Specify the number of top features to display in the document. setting to -1 disables this restriction." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "autodoc_num_features", + "output": "autodoc num features refers to Number of Top Features to Document: Specify the number of top features to display in the document. setting to -1 disables this restriction." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "autodoc num features", + "output": "autodoc num features refers to Number of Top Features to Document: Specify the number of top features to display in the document. setting to -1 disables this restriction." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Number of Top Features to Document: ", + "output": "autodoc num features refers to Number of Top Features to Document: Specify the number of top features to display in the document. setting to -1 disables this restriction." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting autodoc_num_features", + "output": "autodoc num features refers to Specify the number of top features to display in the document. setting to -1 disables this restriction." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting autodoc_num_features", + "output": "autodoc num features refers to Number of Top Features to Document: Specify the number of top features to display in the document. setting to -1 disables this restriction." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "autodoc_min_relative_importance", + "output": "autodoc min relative importance refers to Specify the minimum relative importance in order for a feature to be displayed. autodoc_min_relative_importance must be a float >= 0 and <= 1." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "autodoc_min_relative_importance", + "output": "autodoc min relative importance refers to Minimum Relative Feature Importance Threshold: Specify the minimum relative importance in order for a feature to be displayed. autodoc_min_relative_importance must be a float >= 0 and <= 1." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "autodoc min relative importance", + "output": "autodoc min relative importance refers to Minimum Relative Feature Importance Threshold: Specify the minimum relative importance in order for a feature to be displayed. autodoc_min_relative_importance must be a float >= 0 and <= 1." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Minimum Relative Feature Importance Threshold: ", + "output": "autodoc min relative importance refers to Minimum Relative Feature Importance Threshold: Specify the minimum relative importance in order for a feature to be displayed. autodoc_min_relative_importance must be a float >= 0 and <= 1." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting autodoc_min_relative_importance", + "output": "autodoc min relative importance refers to Specify the minimum relative importance in order for a feature to be displayed. autodoc_min_relative_importance must be a float >= 0 and <= 1." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting autodoc_min_relative_importance", + "output": "autodoc min relative importance refers to Minimum Relative Feature Importance Threshold: Specify the minimum relative importance in order for a feature to be displayed. autodoc_min_relative_importance must be a float >= 0 and <= 1." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "autodoc_include_permutation_feature_importance", + "output": "autodoc include permutation feature importance refers to Whether to compute permutation based feature importance." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "autodoc_include_permutation_feature_importance", + "output": "autodoc include permutation feature importance refers to Permutation Feature Importance: Whether to compute permutation based feature importance." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "autodoc include permutation feature importance", + "output": "autodoc include permutation feature importance refers to Permutation Feature Importance: Whether to compute permutation based feature importance." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Permutation Feature Importance: ", + "output": "autodoc include permutation feature importance refers to Permutation Feature Importance: Whether to compute permutation based feature importance." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting autodoc_include_permutation_feature_importance", + "output": "autodoc include permutation feature importance refers to Whether to compute permutation based feature importance." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting autodoc_include_permutation_feature_importance", + "output": "autodoc include permutation feature importance refers to Permutation Feature Importance: Whether to compute permutation based feature importance." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "autodoc_feature_importance_num_perm", + "output": "autodoc feature importance num perm refers to Number of permutations to make per feature when computing feature importance." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "autodoc_feature_importance_num_perm", + "output": "autodoc feature importance num perm refers to Number of Permutations for Feature Importance: Number of permutations to make per feature when computing feature importance." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "autodoc feature importance num perm", + "output": "autodoc feature importance num perm refers to Number of Permutations for Feature Importance: Number of permutations to make per feature when computing feature importance." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Number of Permutations for Feature Importance: ", + "output": "autodoc feature importance num perm refers to Number of Permutations for Feature Importance: Number of permutations to make per feature when computing feature importance." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting autodoc_feature_importance_num_perm", + "output": "autodoc feature importance num perm refers to Number of permutations to make per feature when computing feature importance." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting autodoc_feature_importance_num_perm", + "output": "autodoc feature importance num perm refers to Number of Permutations for Feature Importance: Number of permutations to make per feature when computing feature importance." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "autodoc_feature_importance_scorer", + "output": "autodoc feature importance scorer refers to Name of the scorer to be used to calculate feature importance. Leave blank to use experiments default scorer." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "autodoc_feature_importance_scorer", + "output": "autodoc feature importance scorer refers to Feature Importance Scorer: Name of the scorer to be used to calculate feature importance. Leave blank to use experiments default scorer." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "autodoc feature importance scorer", + "output": "autodoc feature importance scorer refers to Feature Importance Scorer: Name of the scorer to be used to calculate feature importance. Leave blank to use experiments default scorer." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Feature Importance Scorer: ", + "output": "autodoc feature importance scorer refers to Feature Importance Scorer: Name of the scorer to be used to calculate feature importance. Leave blank to use experiments default scorer." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting autodoc_feature_importance_scorer", + "output": "autodoc feature importance scorer refers to Name of the scorer to be used to calculate feature importance. Leave blank to use experiments default scorer." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting autodoc_feature_importance_scorer", + "output": "autodoc feature importance scorer refers to Feature Importance Scorer: Name of the scorer to be used to calculate feature importance. Leave blank to use experiments default scorer." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "autodoc_pd_max_rows", + "output": "autodoc pd max rows refers to The autodoc_pd_max_rows configuration controls the number of rows shown for the partial dependence plots (PDP) and Shapley values summary plot in the AutoDoc. Random sampling is used for datasets with more than the autodoc_pd_max_rows limit." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "autodoc_pd_max_rows", + "output": "autodoc pd max rows refers to PDP and Shapley Summary Plot Max Rows: The autodoc_pd_max_rows configuration controls the number of rows shown for the partial dependence plots (PDP) and Shapley values summary plot in the AutoDoc. Random sampling is used for datasets with more than the autodoc_pd_max_rows limit." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "autodoc pd max rows", + "output": "autodoc pd max rows refers to PDP and Shapley Summary Plot Max Rows: The autodoc_pd_max_rows configuration controls the number of rows shown for the partial dependence plots (PDP) and Shapley values summary plot in the AutoDoc. Random sampling is used for datasets with more than the autodoc_pd_max_rows limit." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "PDP and Shapley Summary Plot Max Rows: ", + "output": "autodoc pd max rows refers to PDP and Shapley Summary Plot Max Rows: The autodoc_pd_max_rows configuration controls the number of rows shown for the partial dependence plots (PDP) and Shapley values summary plot in the AutoDoc. Random sampling is used for datasets with more than the autodoc_pd_max_rows limit." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting autodoc_pd_max_rows", + "output": "autodoc pd max rows refers to The autodoc_pd_max_rows configuration controls the number of rows shown for the partial dependence plots (PDP) and Shapley values summary plot in the AutoDoc. Random sampling is used for datasets with more than the autodoc_pd_max_rows limit." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting autodoc_pd_max_rows", + "output": "autodoc pd max rows refers to PDP and Shapley Summary Plot Max Rows: The autodoc_pd_max_rows configuration controls the number of rows shown for the partial dependence plots (PDP) and Shapley values summary plot in the AutoDoc. Random sampling is used for datasets with more than the autodoc_pd_max_rows limit." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "autodoc_pd_max_runtime", + "output": "autodoc pd max runtime refers to Maximum number of seconds Partial Dependency computation can take when generating report. Set to -1 for no time limit." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "autodoc_pd_max_runtime", + "output": "autodoc pd max runtime refers to PDP max runtime in seconds: Maximum number of seconds Partial Dependency computation can take when generating report. Set to -1 for no time limit." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "autodoc pd max runtime", + "output": "autodoc pd max runtime refers to PDP max runtime in seconds: Maximum number of seconds Partial Dependency computation can take when generating report. Set to -1 for no time limit." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "PDP max runtime in seconds: ", + "output": "autodoc pd max runtime refers to PDP max runtime in seconds: Maximum number of seconds Partial Dependency computation can take when generating report. Set to -1 for no time limit." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting autodoc_pd_max_runtime", + "output": "autodoc pd max runtime refers to Maximum number of seconds Partial Dependency computation can take when generating report. Set to -1 for no time limit." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting autodoc_pd_max_runtime", + "output": "autodoc pd max runtime refers to PDP max runtime in seconds: Maximum number of seconds Partial Dependency computation can take when generating report. Set to -1 for no time limit." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "autodoc_pd_fast_approx", + "output": "autodoc pd fast approx refers to Whether to enable fast approximation for predictions that are needed for the generation of partial dependence plots. Can help when want to create many PDP plots in short time. Amount of approximation is controlled by fast_approx_num_trees, fast_approx_do_one_fold, fast_approx_do_one_model experiment expert settings. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "autodoc_pd_fast_approx", + "output": "autodoc pd fast approx refers to Use fast approximation for PDP: Whether to enable fast approximation for predictions that are needed for the generation of partial dependence plots. Can help when want to create many PDP plots in short time. Amount of approximation is controlled by fast_approx_num_trees, fast_approx_do_one_fold, fast_approx_do_one_model experiment expert settings. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "autodoc pd fast approx", + "output": "autodoc pd fast approx refers to Use fast approximation for PDP: Whether to enable fast approximation for predictions that are needed for the generation of partial dependence plots. Can help when want to create many PDP plots in short time. Amount of approximation is controlled by fast_approx_num_trees, fast_approx_do_one_fold, fast_approx_do_one_model experiment expert settings. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Use fast approximation for PDP: ", + "output": "autodoc pd fast approx refers to Use fast approximation for PDP: Whether to enable fast approximation for predictions that are needed for the generation of partial dependence plots. Can help when want to create many PDP plots in short time. Amount of approximation is controlled by fast_approx_num_trees, fast_approx_do_one_fold, fast_approx_do_one_model experiment expert settings. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting autodoc_pd_fast_approx", + "output": "autodoc pd fast approx refers to Whether to enable fast approximation for predictions that are needed for the generation of partial dependence plots. Can help when want to create many PDP plots in short time. Amount of approximation is controlled by fast_approx_num_trees, fast_approx_do_one_fold, fast_approx_do_one_model experiment expert settings. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting autodoc_pd_fast_approx", + "output": "autodoc pd fast approx refers to Use fast approximation for PDP: Whether to enable fast approximation for predictions that are needed for the generation of partial dependence plots. Can help when want to create many PDP plots in short time. Amount of approximation is controlled by fast_approx_num_trees, fast_approx_do_one_fold, fast_approx_do_one_model experiment expert settings. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "autodoc_pd_max_int_as_cat_uniques", + "output": "autodoc pd max int as cat uniques refers to Max number of unique values for integer/real columns to be treated as categoricals (test applies to first statistical_threshold_data_size_small rows only) Similar to max_int_as_cat_uniques used for experiment, but here used to control PDP making." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "autodoc_pd_max_int_as_cat_uniques", + "output": "autodoc pd max int as cat uniques refers to PDP Max. number of unique values for int/float to be categoricals: Max number of unique values for integer/real columns to be treated as categoricals (test applies to first statistical_threshold_data_size_small rows only) Similar to max_int_as_cat_uniques used for experiment, but here used to control PDP making." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "autodoc pd max int as cat uniques", + "output": "autodoc pd max int as cat uniques refers to PDP Max. number of unique values for int/float to be categoricals: Max number of unique values for integer/real columns to be treated as categoricals (test applies to first statistical_threshold_data_size_small rows only) Similar to max_int_as_cat_uniques used for experiment, but here used to control PDP making." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "PDP Max. number of unique values for int/float to be categoricals: ", + "output": "autodoc pd max int as cat uniques refers to PDP Max. number of unique values for int/float to be categoricals: Max number of unique values for integer/real columns to be treated as categoricals (test applies to first statistical_threshold_data_size_small rows only) Similar to max_int_as_cat_uniques used for experiment, but here used to control PDP making." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting autodoc_pd_max_int_as_cat_uniques", + "output": "autodoc pd max int as cat uniques refers to Max number of unique values for integer/real columns to be treated as categoricals (test applies to first statistical_threshold_data_size_small rows only) Similar to max_int_as_cat_uniques used for experiment, but here used to control PDP making." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting autodoc_pd_max_int_as_cat_uniques", + "output": "autodoc pd max int as cat uniques refers to PDP Max. number of unique values for int/float to be categoricals: Max number of unique values for integer/real columns to be treated as categoricals (test applies to first statistical_threshold_data_size_small rows only) Similar to max_int_as_cat_uniques used for experiment, but here used to control PDP making." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "autodoc_out_of_range", + "output": "autodoc out of range refers to Number of standard deviations outside of the range of a column to include in partial dependence plots. This shows how the model will react to data it has not seen before." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "autodoc_out_of_range", + "output": "autodoc out of range refers to PDP Out of Range: Number of standard deviations outside of the range of a column to include in partial dependence plots. This shows how the model will react to data it has not seen before." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "autodoc out of range", + "output": "autodoc out of range refers to PDP Out of Range: Number of standard deviations outside of the range of a column to include in partial dependence plots. This shows how the model will react to data it has not seen before." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "PDP Out of Range: ", + "output": "autodoc out of range refers to PDP Out of Range: Number of standard deviations outside of the range of a column to include in partial dependence plots. This shows how the model will react to data it has not seen before." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting autodoc_out_of_range", + "output": "autodoc out of range refers to Number of standard deviations outside of the range of a column to include in partial dependence plots. This shows how the model will react to data it has not seen before." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting autodoc_out_of_range", + "output": "autodoc out of range refers to PDP Out of Range: Number of standard deviations outside of the range of a column to include in partial dependence plots. This shows how the model will react to data it has not seen before." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "autodoc_num_rows", + "output": "autodoc num rows refers to Specify the number of rows to include in PDP and ICE plot if individual rows are not specified." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "autodoc_num_rows", + "output": "autodoc num rows refers to ICE Number of Rows: Specify the number of rows to include in PDP and ICE plot if individual rows are not specified." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "autodoc num rows", + "output": "autodoc num rows refers to ICE Number of Rows: Specify the number of rows to include in PDP and ICE plot if individual rows are not specified." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "ICE Number of Rows: ", + "output": "autodoc num rows refers to ICE Number of Rows: Specify the number of rows to include in PDP and ICE plot if individual rows are not specified." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting autodoc_num_rows", + "output": "autodoc num rows refers to Specify the number of rows to include in PDP and ICE plot if individual rows are not specified." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting autodoc_num_rows", + "output": "autodoc num rows refers to ICE Number of Rows: Specify the number of rows to include in PDP and ICE plot if individual rows are not specified." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "autodoc_population_stability_index", + "output": "autodoc population stability index refers to Whether to include population stability index if experiment is binary classification/regression." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "autodoc_population_stability_index", + "output": "autodoc population stability index refers to Population Stability Index: Whether to include population stability index if experiment is binary classification/regression." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "autodoc population stability index", + "output": "autodoc population stability index refers to Population Stability Index: Whether to include population stability index if experiment is binary classification/regression." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Population Stability Index: ", + "output": "autodoc population stability index refers to Population Stability Index: Whether to include population stability index if experiment is binary classification/regression." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting autodoc_population_stability_index", + "output": "autodoc population stability index refers to Whether to include population stability index if experiment is binary classification/regression." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting autodoc_population_stability_index", + "output": "autodoc population stability index refers to Population Stability Index: Whether to include population stability index if experiment is binary classification/regression." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "autodoc_population_stability_index_n_quantiles", + "output": "autodoc population stability index n quantiles refers to Number of quantiles to use for population stability index ." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "autodoc_population_stability_index_n_quantiles", + "output": "autodoc population stability index n quantiles refers to Population Stability Index Number of Quantiles: Number of quantiles to use for population stability index ." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "autodoc population stability index n quantiles", + "output": "autodoc population stability index n quantiles refers to Population Stability Index Number of Quantiles: Number of quantiles to use for population stability index ." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Population Stability Index Number of Quantiles: ", + "output": "autodoc population stability index n quantiles refers to Population Stability Index Number of Quantiles: Number of quantiles to use for population stability index ." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting autodoc_population_stability_index_n_quantiles", + "output": "autodoc population stability index n quantiles refers to Number of quantiles to use for population stability index ." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting autodoc_population_stability_index_n_quantiles", + "output": "autodoc population stability index n quantiles refers to Population Stability Index Number of Quantiles: Number of quantiles to use for population stability index ." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "autodoc_prediction_stats", + "output": "autodoc prediction stats refers to Whether to include prediction statistics information if experiment is binary classification/regression." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "autodoc_prediction_stats", + "output": "autodoc prediction stats refers to Prediction Statistics: Whether to include prediction statistics information if experiment is binary classification/regression." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "autodoc prediction stats", + "output": "autodoc prediction stats refers to Prediction Statistics: Whether to include prediction statistics information if experiment is binary classification/regression." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Prediction Statistics: ", + "output": "autodoc prediction stats refers to Prediction Statistics: Whether to include prediction statistics information if experiment is binary classification/regression." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting autodoc_prediction_stats", + "output": "autodoc prediction stats refers to Whether to include prediction statistics information if experiment is binary classification/regression." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting autodoc_prediction_stats", + "output": "autodoc prediction stats refers to Prediction Statistics: Whether to include prediction statistics information if experiment is binary classification/regression." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "autodoc_prediction_stats_n_quantiles", + "output": "autodoc prediction stats n quantiles refers to Number of quantiles to use for prediction statistics." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "autodoc_prediction_stats_n_quantiles", + "output": "autodoc prediction stats n quantiles refers to Prediction Statistics Number of Quantiles: Number of quantiles to use for prediction statistics." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "autodoc prediction stats n quantiles", + "output": "autodoc prediction stats n quantiles refers to Prediction Statistics Number of Quantiles: Number of quantiles to use for prediction statistics." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Prediction Statistics Number of Quantiles: ", + "output": "autodoc prediction stats n quantiles refers to Prediction Statistics Number of Quantiles: Number of quantiles to use for prediction statistics." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting autodoc_prediction_stats_n_quantiles", + "output": "autodoc prediction stats n quantiles refers to Number of quantiles to use for prediction statistics." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting autodoc_prediction_stats_n_quantiles", + "output": "autodoc prediction stats n quantiles refers to Prediction Statistics Number of Quantiles: Number of quantiles to use for prediction statistics." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "autodoc_response_rate", + "output": "autodoc response rate refers to Whether to include response rates information if experiment is binary classification." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "autodoc_response_rate", + "output": "autodoc response rate refers to Response Rates Plot: Whether to include response rates information if experiment is binary classification." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "autodoc response rate", + "output": "autodoc response rate refers to Response Rates Plot: Whether to include response rates information if experiment is binary classification." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Response Rates Plot: ", + "output": "autodoc response rate refers to Response Rates Plot: Whether to include response rates information if experiment is binary classification." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting autodoc_response_rate", + "output": "autodoc response rate refers to Whether to include response rates information if experiment is binary classification." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting autodoc_response_rate", + "output": "autodoc response rate refers to Response Rates Plot: Whether to include response rates information if experiment is binary classification." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "autodoc_response_rate_n_quantiles", + "output": "autodoc response rate n quantiles refers to Number of quantiles to use for response rates information ." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "autodoc_response_rate_n_quantiles", + "output": "autodoc response rate n quantiles refers to Response Rate Plot Number of Quantiles: Number of quantiles to use for response rates information ." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "autodoc response rate n quantiles", + "output": "autodoc response rate n quantiles refers to Response Rate Plot Number of Quantiles: Number of quantiles to use for response rates information ." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Response Rate Plot Number of Quantiles: ", + "output": "autodoc response rate n quantiles refers to Response Rate Plot Number of Quantiles: Number of quantiles to use for response rates information ." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting autodoc_response_rate_n_quantiles", + "output": "autodoc response rate n quantiles refers to Number of quantiles to use for response rates information ." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting autodoc_response_rate_n_quantiles", + "output": "autodoc response rate n quantiles refers to Response Rate Plot Number of Quantiles: Number of quantiles to use for response rates information ." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "autodoc_gini_plot", + "output": "autodoc gini plot refers to Whether to show the Gini Plot." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "autodoc_gini_plot", + "output": "autodoc gini plot refers to Show GINI Plot: Whether to show the Gini Plot." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "autodoc gini plot", + "output": "autodoc gini plot refers to Show GINI Plot: Whether to show the Gini Plot." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Show GINI Plot: ", + "output": "autodoc gini plot refers to Show GINI Plot: Whether to show the Gini Plot." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting autodoc_gini_plot", + "output": "autodoc gini plot refers to Whether to show the Gini Plot." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting autodoc_gini_plot", + "output": "autodoc gini plot refers to Show GINI Plot: Whether to show the Gini Plot." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "autodoc_enable_shapley_values", + "output": "autodoc enable shapley values refers to Show Shapley values results in the AutoDoc." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "autodoc_enable_shapley_values", + "output": "autodoc enable shapley values refers to Enable Shapley Values: Show Shapley values results in the AutoDoc." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "autodoc enable shapley values", + "output": "autodoc enable shapley values refers to Enable Shapley Values: Show Shapley values results in the AutoDoc." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Enable Shapley Values: ", + "output": "autodoc enable shapley values refers to Enable Shapley Values: Show Shapley values results in the AutoDoc." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting autodoc_enable_shapley_values", + "output": "autodoc enable shapley values refers to Show Shapley values results in the AutoDoc." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting autodoc_enable_shapley_values", + "output": "autodoc enable shapley values refers to Enable Shapley Values: Show Shapley values results in the AutoDoc." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "autodoc_global_klime_num_features", + "output": "autodoc global klime num features refers to The number feature in a KLIME global GLM coefficients table. Must be an integer greater than 0 or -1. To show all features set to -1." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "autodoc_global_klime_num_features", + "output": "autodoc global klime num features refers to Global KLIME Number of Features: The number feature in a KLIME global GLM coefficients table. Must be an integer greater than 0 or -1. To show all features set to -1." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "autodoc global klime num features", + "output": "autodoc global klime num features refers to Global KLIME Number of Features: The number feature in a KLIME global GLM coefficients table. Must be an integer greater than 0 or -1. To show all features set to -1." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Global KLIME Number of Features: ", + "output": "autodoc global klime num features refers to Global KLIME Number of Features: The number feature in a KLIME global GLM coefficients table. Must be an integer greater than 0 or -1. To show all features set to -1." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting autodoc_global_klime_num_features", + "output": "autodoc global klime num features refers to The number feature in a KLIME global GLM coefficients table. Must be an integer greater than 0 or -1. To show all features set to -1." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting autodoc_global_klime_num_features", + "output": "autodoc global klime num features refers to Global KLIME Number of Features: The number feature in a KLIME global GLM coefficients table. Must be an integer greater than 0 or -1. To show all features set to -1." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "autodoc_global_klime_num_tables", + "output": "autodoc global klime num tables refers to Set the number of KLIME global GLM coefficients tables. Set to 1 to show one table with coefficients sorted by absolute value. Set to 2 to two tables one with the top positive coefficients and one with the top negative coefficients." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "autodoc_global_klime_num_tables", + "output": "autodoc global klime num tables refers to Global KLIME Number of Tables: Set the number of KLIME global GLM coefficients tables. Set to 1 to show one table with coefficients sorted by absolute value. Set to 2 to two tables one with the top positive coefficients and one with the top negative coefficients." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "autodoc global klime num tables", + "output": "autodoc global klime num tables refers to Global KLIME Number of Tables: Set the number of KLIME global GLM coefficients tables. Set to 1 to show one table with coefficients sorted by absolute value. Set to 2 to two tables one with the top positive coefficients and one with the top negative coefficients." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Global KLIME Number of Tables: ", + "output": "autodoc global klime num tables refers to Global KLIME Number of Tables: Set the number of KLIME global GLM coefficients tables. Set to 1 to show one table with coefficients sorted by absolute value. Set to 2 to two tables one with the top positive coefficients and one with the top negative coefficients." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting autodoc_global_klime_num_tables", + "output": "autodoc global klime num tables refers to Set the number of KLIME global GLM coefficients tables. Set to 1 to show one table with coefficients sorted by absolute value. Set to 2 to two tables one with the top positive coefficients and one with the top negative coefficients." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting autodoc_global_klime_num_tables", + "output": "autodoc global klime num tables refers to Global KLIME Number of Tables: Set the number of KLIME global GLM coefficients tables. Set to 1 to show one table with coefficients sorted by absolute value. Set to 2 to two tables one with the top positive coefficients and one with the top negative coefficients." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "autodoc_data_summary_col_num", + "output": "autodoc data summary col num refers to Number of features to be show in data summary. Value must be an integer. Values lower than 1, f.e. 0 or -1, indicate that all columns should be shown." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "autodoc_data_summary_col_num", + "output": "autodoc data summary col num refers to Number of Features in Data Summary Table: Number of features to be show in data summary. Value must be an integer. Values lower than 1, f.e. 0 or -1, indicate that all columns should be shown." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "autodoc data summary col num", + "output": "autodoc data summary col num refers to Number of Features in Data Summary Table: Number of features to be show in data summary. Value must be an integer. Values lower than 1, f.e. 0 or -1, indicate that all columns should be shown." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Number of Features in Data Summary Table: ", + "output": "autodoc data summary col num refers to Number of Features in Data Summary Table: Number of features to be show in data summary. Value must be an integer. Values lower than 1, f.e. 0 or -1, indicate that all columns should be shown." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting autodoc_data_summary_col_num", + "output": "autodoc data summary col num refers to Number of features to be show in data summary. Value must be an integer. Values lower than 1, f.e. 0 or -1, indicate that all columns should be shown." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting autodoc_data_summary_col_num", + "output": "autodoc data summary col num refers to Number of Features in Data Summary Table: Number of features to be show in data summary. Value must be an integer. Values lower than 1, f.e. 0 or -1, indicate that all columns should be shown." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "autodoc_list_all_config_settings", + "output": "autodoc list all config settings refers to Whether to show all config settings. If False, only the changed settings (config overrides) are listed, otherwise all settings are listed." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "autodoc_list_all_config_settings", + "output": "autodoc list all config settings refers to List All Config Settings: Whether to show all config settings. If False, only the changed settings (config overrides) are listed, otherwise all settings are listed." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "autodoc list all config settings", + "output": "autodoc list all config settings refers to List All Config Settings: Whether to show all config settings. If False, only the changed settings (config overrides) are listed, otherwise all settings are listed." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "List All Config Settings: ", + "output": "autodoc list all config settings refers to List All Config Settings: Whether to show all config settings. If False, only the changed settings (config overrides) are listed, otherwise all settings are listed." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting autodoc_list_all_config_settings", + "output": "autodoc list all config settings refers to Whether to show all config settings. If False, only the changed settings (config overrides) are listed, otherwise all settings are listed." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting autodoc_list_all_config_settings", + "output": "autodoc list all config settings refers to List All Config Settings: Whether to show all config settings. If False, only the changed settings (config overrides) are listed, otherwise all settings are listed." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "autodoc_keras_summary_line_length", + "output": "autodoc keras summary line length refers to Line length of the keras model architecture summary. Must be an integer greater than 0 or -1. To use the default line length set value -1." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "autodoc_keras_summary_line_length", + "output": "autodoc keras summary line length refers to Keras Model Architecture Summary Line Length: Line length of the keras model architecture summary. Must be an integer greater than 0 or -1. To use the default line length set value -1." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "autodoc keras summary line length", + "output": "autodoc keras summary line length refers to Keras Model Architecture Summary Line Length: Line length of the keras model architecture summary. Must be an integer greater than 0 or -1. To use the default line length set value -1." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Keras Model Architecture Summary Line Length: ", + "output": "autodoc keras summary line length refers to Keras Model Architecture Summary Line Length: Line length of the keras model architecture summary. Must be an integer greater than 0 or -1. To use the default line length set value -1." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting autodoc_keras_summary_line_length", + "output": "autodoc keras summary line length refers to Line length of the keras model architecture summary. Must be an integer greater than 0 or -1. To use the default line length set value -1." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting autodoc_keras_summary_line_length", + "output": "autodoc keras summary line length refers to Keras Model Architecture Summary Line Length: Line length of the keras model architecture summary. Must be an integer greater than 0 or -1. To use the default line length set value -1." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "autodoc_transformer_architecture_max_lines", + "output": "autodoc transformer architecture max lines refers to Maximum number of lines shown for advanced transformer architecture in the Feature section. Note that the full architecture can be found in the Appendix." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "autodoc_transformer_architecture_max_lines", + "output": "autodoc transformer architecture max lines refers to NLP/Image Transformer Architecture Max Lines: Maximum number of lines shown for advanced transformer architecture in the Feature section. Note that the full architecture can be found in the Appendix." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "autodoc transformer architecture max lines", + "output": "autodoc transformer architecture max lines refers to NLP/Image Transformer Architecture Max Lines: Maximum number of lines shown for advanced transformer architecture in the Feature section. Note that the full architecture can be found in the Appendix." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "NLP/Image Transformer Architecture Max Lines: ", + "output": "autodoc transformer architecture max lines refers to NLP/Image Transformer Architecture Max Lines: Maximum number of lines shown for advanced transformer architecture in the Feature section. Note that the full architecture can be found in the Appendix." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting autodoc_transformer_architecture_max_lines", + "output": "autodoc transformer architecture max lines refers to Maximum number of lines shown for advanced transformer architecture in the Feature section. Note that the full architecture can be found in the Appendix." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting autodoc_transformer_architecture_max_lines", + "output": "autodoc transformer architecture max lines refers to NLP/Image Transformer Architecture Max Lines: Maximum number of lines shown for advanced transformer architecture in the Feature section. Note that the full architecture can be found in the Appendix." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "autodoc_full_architecture_in_appendix", + "output": "autodoc full architecture in appendix refers to Show full NLP/Image transformer architecture in the Appendix." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "autodoc_full_architecture_in_appendix", + "output": "autodoc full architecture in appendix refers to Appendix NLP/Image Transformer Architecture: Show full NLP/Image transformer architecture in the Appendix." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "autodoc full architecture in appendix", + "output": "autodoc full architecture in appendix refers to Appendix NLP/Image Transformer Architecture: Show full NLP/Image transformer architecture in the Appendix." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Appendix NLP/Image Transformer Architecture: ", + "output": "autodoc full architecture in appendix refers to Appendix NLP/Image Transformer Architecture: Show full NLP/Image transformer architecture in the Appendix." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting autodoc_full_architecture_in_appendix", + "output": "autodoc full architecture in appendix refers to Show full NLP/Image transformer architecture in the Appendix." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting autodoc_full_architecture_in_appendix", + "output": "autodoc full architecture in appendix refers to Appendix NLP/Image Transformer Architecture: Show full NLP/Image transformer architecture in the Appendix." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "autodoc_coef_table_appendix_results_table", + "output": "autodoc coef table appendix results table refers to Specify whether to show the full glm coefficient table(s) in the appendix. coef_table_appendix_results_table must be a boolean: True to show tables in appendix, False to not show them ." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "autodoc_coef_table_appendix_results_table", + "output": "autodoc coef table appendix results table refers to Full GLM Coefficients Table in the Appendix: Specify whether to show the full glm coefficient table(s) in the appendix. coef_table_appendix_results_table must be a boolean: True to show tables in appendix, False to not show them ." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "autodoc coef table appendix results table", + "output": "autodoc coef table appendix results table refers to Full GLM Coefficients Table in the Appendix: Specify whether to show the full glm coefficient table(s) in the appendix. coef_table_appendix_results_table must be a boolean: True to show tables in appendix, False to not show them ." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Full GLM Coefficients Table in the Appendix: ", + "output": "autodoc coef table appendix results table refers to Full GLM Coefficients Table in the Appendix: Specify whether to show the full glm coefficient table(s) in the appendix. coef_table_appendix_results_table must be a boolean: True to show tables in appendix, False to not show them ." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting autodoc_coef_table_appendix_results_table", + "output": "autodoc coef table appendix results table refers to Specify whether to show the full glm coefficient table(s) in the appendix. coef_table_appendix_results_table must be a boolean: True to show tables in appendix, False to not show them ." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting autodoc_coef_table_appendix_results_table", + "output": "autodoc coef table appendix results table refers to Full GLM Coefficients Table in the Appendix: Specify whether to show the full glm coefficient table(s) in the appendix. coef_table_appendix_results_table must be a boolean: True to show tables in appendix, False to not show them ." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "autodoc_coef_table_num_models", + "output": "autodoc coef table num models refers to Set the number of models for which a glm coefficients table is shown in the AutoDoc. coef_table_num_models must be -1 or an integer >= 1 (-1 shows all models)." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "autodoc_coef_table_num_models", + "output": "autodoc coef table num models refers to GLM Coefficient Tables Number of Models: Set the number of models for which a glm coefficients table is shown in the AutoDoc. coef_table_num_models must be -1 or an integer >= 1 (-1 shows all models)." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "autodoc coef table num models", + "output": "autodoc coef table num models refers to GLM Coefficient Tables Number of Models: Set the number of models for which a glm coefficients table is shown in the AutoDoc. coef_table_num_models must be -1 or an integer >= 1 (-1 shows all models)." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "GLM Coefficient Tables Number of Models: ", + "output": "autodoc coef table num models refers to GLM Coefficient Tables Number of Models: Set the number of models for which a glm coefficients table is shown in the AutoDoc. coef_table_num_models must be -1 or an integer >= 1 (-1 shows all models)." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting autodoc_coef_table_num_models", + "output": "autodoc coef table num models refers to Set the number of models for which a glm coefficients table is shown in the AutoDoc. coef_table_num_models must be -1 or an integer >= 1 (-1 shows all models)." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting autodoc_coef_table_num_models", + "output": "autodoc coef table num models refers to GLM Coefficient Tables Number of Models: Set the number of models for which a glm coefficients table is shown in the AutoDoc. coef_table_num_models must be -1 or an integer >= 1 (-1 shows all models)." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "autodoc_coef_table_num_folds", + "output": "autodoc coef table num folds refers to Set the number of folds per model for which a glm coefficients table is shown in the AutoDoc. coef_table_num_folds must be -1 or an integer >= 1 (-1 shows all folds per model)." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "autodoc_coef_table_num_folds", + "output": "autodoc coef table num folds refers to GLM Coefficient Tables Number of Folds Per Model: Set the number of folds per model for which a glm coefficients table is shown in the AutoDoc. coef_table_num_folds must be -1 or an integer >= 1 (-1 shows all folds per model)." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "autodoc coef table num folds", + "output": "autodoc coef table num folds refers to GLM Coefficient Tables Number of Folds Per Model: Set the number of folds per model for which a glm coefficients table is shown in the AutoDoc. coef_table_num_folds must be -1 or an integer >= 1 (-1 shows all folds per model)." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "GLM Coefficient Tables Number of Folds Per Model: ", + "output": "autodoc coef table num folds refers to GLM Coefficient Tables Number of Folds Per Model: Set the number of folds per model for which a glm coefficients table is shown in the AutoDoc. coef_table_num_folds must be -1 or an integer >= 1 (-1 shows all folds per model)." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting autodoc_coef_table_num_folds", + "output": "autodoc coef table num folds refers to Set the number of folds per model for which a glm coefficients table is shown in the AutoDoc. coef_table_num_folds must be -1 or an integer >= 1 (-1 shows all folds per model)." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting autodoc_coef_table_num_folds", + "output": "autodoc coef table num folds refers to GLM Coefficient Tables Number of Folds Per Model: Set the number of folds per model for which a glm coefficients table is shown in the AutoDoc. coef_table_num_folds must be -1 or an integer >= 1 (-1 shows all folds per model)." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "autodoc_coef_table_num_coef", + "output": "autodoc coef table num coef refers to Set the number of coefficients to show within a glm coefficients table in the AutoDoc. coef_table_num_coef, controls the number of rows shown in a glm table and must be -1 or an integer >= 1 (-1 shows all coefficients)." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "autodoc_coef_table_num_coef", + "output": "autodoc coef table num coef refers to GLM Coefficient Tables Number of Coefficients : Set the number of coefficients to show within a glm coefficients table in the AutoDoc. coef_table_num_coef, controls the number of rows shown in a glm table and must be -1 or an integer >= 1 (-1 shows all coefficients)." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "autodoc coef table num coef", + "output": "autodoc coef table num coef refers to GLM Coefficient Tables Number of Coefficients : Set the number of coefficients to show within a glm coefficients table in the AutoDoc. coef_table_num_coef, controls the number of rows shown in a glm table and must be -1 or an integer >= 1 (-1 shows all coefficients)." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "GLM Coefficient Tables Number of Coefficients : ", + "output": "autodoc coef table num coef refers to GLM Coefficient Tables Number of Coefficients : Set the number of coefficients to show within a glm coefficients table in the AutoDoc. coef_table_num_coef, controls the number of rows shown in a glm table and must be -1 or an integer >= 1 (-1 shows all coefficients)." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting autodoc_coef_table_num_coef", + "output": "autodoc coef table num coef refers to Set the number of coefficients to show within a glm coefficients table in the AutoDoc. coef_table_num_coef, controls the number of rows shown in a glm table and must be -1 or an integer >= 1 (-1 shows all coefficients)." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting autodoc_coef_table_num_coef", + "output": "autodoc coef table num coef refers to GLM Coefficient Tables Number of Coefficients : Set the number of coefficients to show within a glm coefficients table in the AutoDoc. coef_table_num_coef, controls the number of rows shown in a glm table and must be -1 or an integer >= 1 (-1 shows all coefficients)." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "autodoc_coef_table_num_classes", + "output": "autodoc coef table num classes refers to Set the number of classes to show within a glm coefficients table in the AutoDoc. coef_table_num_classes controls the number of class-columns shown in a glm table and must be -1 or an integer >= 4 (-1 shows all classes)." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "autodoc_coef_table_num_classes", + "output": "autodoc coef table num classes refers to GLM Coefficient Tables Number of Classes: Set the number of classes to show within a glm coefficients table in the AutoDoc. coef_table_num_classes controls the number of class-columns shown in a glm table and must be -1 or an integer >= 4 (-1 shows all classes)." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "autodoc coef table num classes", + "output": "autodoc coef table num classes refers to GLM Coefficient Tables Number of Classes: Set the number of classes to show within a glm coefficients table in the AutoDoc. coef_table_num_classes controls the number of class-columns shown in a glm table and must be -1 or an integer >= 4 (-1 shows all classes)." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "GLM Coefficient Tables Number of Classes: ", + "output": "autodoc coef table num classes refers to GLM Coefficient Tables Number of Classes: Set the number of classes to show within a glm coefficients table in the AutoDoc. coef_table_num_classes controls the number of class-columns shown in a glm table and must be -1 or an integer >= 4 (-1 shows all classes)." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting autodoc_coef_table_num_classes", + "output": "autodoc coef table num classes refers to Set the number of classes to show within a glm coefficients table in the AutoDoc. coef_table_num_classes controls the number of class-columns shown in a glm table and must be -1 or an integer >= 4 (-1 shows all classes)." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting autodoc_coef_table_num_classes", + "output": "autodoc coef table num classes refers to GLM Coefficient Tables Number of Classes: Set the number of classes to show within a glm coefficients table in the AutoDoc. coef_table_num_classes controls the number of class-columns shown in a glm table and must be -1 or an integer >= 4 (-1 shows all classes)." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "autodoc_num_histogram_plots", + "output": "autodoc num histogram plots refers to When histogram plots are available: The number of top (default 10) features for which to show histograms." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "autodoc_num_histogram_plots", + "output": "autodoc num histogram plots refers to Number of Histograms to Show: When histogram plots are available: The number of top (default 10) features for which to show histograms." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "autodoc num histogram plots", + "output": "autodoc num histogram plots refers to Number of Histograms to Show: When histogram plots are available: The number of top (default 10) features for which to show histograms." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Number of Histograms to Show: ", + "output": "autodoc num histogram plots refers to Number of Histograms to Show: When histogram plots are available: The number of top (default 10) features for which to show histograms." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting autodoc_num_histogram_plots", + "output": "autodoc num histogram plots refers to When histogram plots are available: The number of top (default 10) features for which to show histograms." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting autodoc_num_histogram_plots", + "output": "autodoc num histogram plots refers to Number of Histograms to Show: When histogram plots are available: The number of top (default 10) features for which to show histograms." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "pdp_max_threads", + "output": "pdp max threads refers to Maximum number of threads/forks for autoreport PDP. -1 means auto.: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "pdp_max_threads", + "output": "pdp max threads refers to Maximum number of threads/forks for autoreport PDP. -1 means auto.: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "pdp max threads", + "output": "pdp max threads refers to Maximum number of threads/forks for autoreport PDP. -1 means auto.: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Maximum number of threads/forks for autoreport PDP. -1 means auto.: ", + "output": "pdp max threads refers to Maximum number of threads/forks for autoreport PDP. -1 means auto.: " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting pdp_max_threads", + "output": "pdp max threads refers to Maximum number of threads/forks for autoreport PDP. -1 means auto.: " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting pdp_max_threads", + "output": "pdp max threads refers to Maximum number of threads/forks for autoreport PDP. -1 means auto.: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "autodoc_force_singlenode", + "output": "autodoc force singlenode refers to If True, will force AutoDoc to run in only the main server, not on remote workers in case of a multi-node setup" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "autodoc_force_singlenode", + "output": "autodoc force singlenode refers to If True, will force AutoDoc to run in only the main server, not on remote workers in case of a multi-node setup" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "autodoc force singlenode", + "output": "autodoc force singlenode refers to If True, will force AutoDoc to run in only the main server, not on remote workers in case of a multi-node setup" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "autodoc force singlenode refers to If True, will force AutoDoc to run in only the main server, not on remote workers in case of a multi-node setup" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting autodoc_force_singlenode", + "output": "autodoc force singlenode refers to If True, will force AutoDoc to run in only the main server, not on remote workers in case of a multi-node setup" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting autodoc_force_singlenode", + "output": "autodoc force singlenode refers to If True, will force AutoDoc to run in only the main server, not on remote workers in case of a multi-node setup" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "vis_server_ip", + "output": "vis server ip refers to IP address and port of autoviz process." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "vis_server_ip", + "output": "vis server ip refers to IP address and port of autoviz process." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "vis server ip", + "output": "vis server ip refers to IP address and port of autoviz process." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "vis server ip refers to IP address and port of autoviz process." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting vis_server_ip", + "output": "vis server ip refers to IP address and port of autoviz process." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting vis_server_ip", + "output": "vis server ip refers to IP address and port of autoviz process." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "vis_server_port", + "output": "vis server port refers to IP and port of autoviz process." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "vis_server_port", + "output": "vis server port refers to IP and port of autoviz process." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "vis server port", + "output": "vis server port refers to IP and port of autoviz process." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "vis server port refers to IP and port of autoviz process." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting vis_server_port", + "output": "vis server port refers to IP and port of autoviz process." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting vis_server_port", + "output": "vis server port refers to IP and port of autoviz process." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "autoviz_max_num_columns", + "output": "autoviz max num columns refers to Maximum number of columns autoviz will work with. If dataset has more columns than this number, autoviz will pick columns randomly, prioritizing numerical columns " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "autoviz_max_num_columns", + "output": "autoviz max num columns refers to Maximum number of column for Autoviz: Maximum number of columns autoviz will work with. If dataset has more columns than this number, autoviz will pick columns randomly, prioritizing numerical columns " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "autoviz max num columns", + "output": "autoviz max num columns refers to Maximum number of column for Autoviz: Maximum number of columns autoviz will work with. If dataset has more columns than this number, autoviz will pick columns randomly, prioritizing numerical columns " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Maximum number of column for Autoviz: ", + "output": "autoviz max num columns refers to Maximum number of column for Autoviz: Maximum number of columns autoviz will work with. If dataset has more columns than this number, autoviz will pick columns randomly, prioritizing numerical columns " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting autoviz_max_num_columns", + "output": "autoviz max num columns refers to Maximum number of columns autoviz will work with. If dataset has more columns than this number, autoviz will pick columns randomly, prioritizing numerical columns " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting autoviz_max_num_columns", + "output": "autoviz max num columns refers to Maximum number of column for Autoviz: Maximum number of columns autoviz will work with. If dataset has more columns than this number, autoviz will pick columns randomly, prioritizing numerical columns " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "autoviz_max_aggregated_rows", + "output": "autoviz max aggregated rows refers to Maximum number of rows in aggregated frame: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "autoviz_max_aggregated_rows", + "output": "autoviz max aggregated rows refers to Maximum number of rows in aggregated frame: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "autoviz max aggregated rows", + "output": "autoviz max aggregated rows refers to Maximum number of rows in aggregated frame: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Maximum number of rows in aggregated frame: ", + "output": "autoviz max aggregated rows refers to Maximum number of rows in aggregated frame: " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting autoviz_max_aggregated_rows", + "output": "autoviz max aggregated rows refers to Maximum number of rows in aggregated frame: " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting autoviz_max_aggregated_rows", + "output": "autoviz max aggregated rows refers to Maximum number of rows in aggregated frame: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "autoviz_enable_recommendations", + "output": "autoviz enable recommendations refers to When enabled, experiment will try to use feature transformations recommended by Autoviz" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "autoviz_enable_recommendations", + "output": "autoviz enable recommendations refers to Autoviz Use Recommended Transformations: When enabled, experiment will try to use feature transformations recommended by Autoviz" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "autoviz enable recommendations", + "output": "autoviz enable recommendations refers to Autoviz Use Recommended Transformations: When enabled, experiment will try to use feature transformations recommended by Autoviz" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Autoviz Use Recommended Transformations: ", + "output": "autoviz enable recommendations refers to Autoviz Use Recommended Transformations: When enabled, experiment will try to use feature transformations recommended by Autoviz" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting autoviz_enable_recommendations", + "output": "autoviz enable recommendations refers to When enabled, experiment will try to use feature transformations recommended by Autoviz" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting autoviz_enable_recommendations", + "output": "autoviz enable recommendations refers to Autoviz Use Recommended Transformations: When enabled, experiment will try to use feature transformations recommended by Autoviz" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "autoviz_recommended_transformation", + "output": "autoviz recommended transformation refers to Key-value pairs of column names, and transformations that Autoviz recommended" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "autoviz_recommended_transformation", + "output": "autoviz recommended transformation refers to Autoviz Recommended Transformations: Key-value pairs of column names, and transformations that Autoviz recommended" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "autoviz recommended transformation", + "output": "autoviz recommended transformation refers to Autoviz Recommended Transformations: Key-value pairs of column names, and transformations that Autoviz recommended" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Autoviz Recommended Transformations: ", + "output": "autoviz recommended transformation refers to Autoviz Recommended Transformations: Key-value pairs of column names, and transformations that Autoviz recommended" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting autoviz_recommended_transformation", + "output": "autoviz recommended transformation refers to Key-value pairs of column names, and transformations that Autoviz recommended" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting autoviz_recommended_transformation", + "output": "autoviz recommended transformation refers to Autoviz Recommended Transformations: Key-value pairs of column names, and transformations that Autoviz recommended" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable_custom_recipes", + "output": "enable custom recipes refers to Enable custom recipes." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable_custom_recipes", + "output": "enable custom recipes refers to Enable custom recipes." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable custom recipes", + "output": "enable custom recipes refers to Enable custom recipes." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "enable custom recipes refers to Enable custom recipes." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting enable_custom_recipes", + "output": "enable custom recipes refers to Enable custom recipes." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting enable_custom_recipes", + "output": "enable custom recipes refers to Enable custom recipes." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable_custom_recipes_upload", + "output": "enable custom recipes upload refers to Enable uploading of custom recipes from local file system." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable_custom_recipes_upload", + "output": "enable custom recipes upload refers to Enable uploading of custom recipes from local file system." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable custom recipes upload", + "output": "enable custom recipes upload refers to Enable uploading of custom recipes from local file system." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "enable custom recipes upload refers to Enable uploading of custom recipes from local file system." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting enable_custom_recipes_upload", + "output": "enable custom recipes upload refers to Enable uploading of custom recipes from local file system." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting enable_custom_recipes_upload", + "output": "enable custom recipes upload refers to Enable uploading of custom recipes from local file system." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable_custom_recipes_from_url", + "output": "enable custom recipes from url refers to Enable downloading of custom recipes from external URL." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable_custom_recipes_from_url", + "output": "enable custom recipes from url refers to Enable downloading of custom recipes from external URL." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable custom recipes from url", + "output": "enable custom recipes from url refers to Enable downloading of custom recipes from external URL." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "enable custom recipes from url refers to Enable downloading of custom recipes from external URL." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting enable_custom_recipes_from_url", + "output": "enable custom recipes from url refers to Enable downloading of custom recipes from external URL." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting enable_custom_recipes_from_url", + "output": "enable custom recipes from url refers to Enable downloading of custom recipes from external URL." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable_custom_recipes_from_zip", + "output": "enable custom recipes from zip refers to Enable upload recipe files to be zip, containing custom recipe(s) in root folder, while any other code or auxillary files must be in some sub-folder. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable_custom_recipes_from_zip", + "output": "enable custom recipes from zip refers to Enable upload recipe files to be zip, containing custom recipe(s) in root folder, while any other code or auxillary files must be in some sub-folder. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable custom recipes from zip", + "output": "enable custom recipes from zip refers to Enable upload recipe files to be zip, containing custom recipe(s) in root folder, while any other code or auxillary files must be in some sub-folder. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "enable custom recipes from zip refers to Enable upload recipe files to be zip, containing custom recipe(s) in root folder, while any other code or auxillary files must be in some sub-folder. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting enable_custom_recipes_from_zip", + "output": "enable custom recipes from zip refers to Enable upload recipe files to be zip, containing custom recipe(s) in root folder, while any other code or auxillary files must be in some sub-folder. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting enable_custom_recipes_from_zip", + "output": "enable custom recipes from zip refers to Enable upload recipe files to be zip, containing custom recipe(s) in root folder, while any other code or auxillary files must be in some sub-folder. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable_recreate_custom_recipes_env", + "output": "enable recreate custom recipes env refers to When set to true, it enable downloading custom recipes third party packages from the web, otherwise the python environment will be transferred from main worker." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable_recreate_custom_recipes_env", + "output": "enable recreate custom recipes env refers to When set to true, it enable downloading custom recipes third party packages from the web, otherwise the python environment will be transferred from main worker." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable recreate custom recipes env", + "output": "enable recreate custom recipes env refers to When set to true, it enable downloading custom recipes third party packages from the web, otherwise the python environment will be transferred from main worker." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "enable recreate custom recipes env refers to When set to true, it enable downloading custom recipes third party packages from the web, otherwise the python environment will be transferred from main worker." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting enable_recreate_custom_recipes_env", + "output": "enable recreate custom recipes env refers to When set to true, it enable downloading custom recipes third party packages from the web, otherwise the python environment will be transferred from main worker." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting enable_recreate_custom_recipes_env", + "output": "enable recreate custom recipes env refers to When set to true, it enable downloading custom recipes third party packages from the web, otherwise the python environment will be transferred from main worker." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "extra_migration_custom_recipes_missing_modules", + "output": "extra migration custom recipes missing modules refers to Whether to enable extra attempt to migrate custom modules during preview to show preview. Can lead to slow preview loading.: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "extra_migration_custom_recipes_missing_modules", + "output": "extra migration custom recipes missing modules refers to Whether to enable extra attempt to migrate custom modules during preview to show preview. Can lead to slow preview loading.: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "extra migration custom recipes missing modules", + "output": "extra migration custom recipes missing modules refers to Whether to enable extra attempt to migrate custom modules during preview to show preview. Can lead to slow preview loading.: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Whether to enable extra attempt to migrate custom modules during preview to show preview. Can lead to slow preview loading.: ", + "output": "extra migration custom recipes missing modules refers to Whether to enable extra attempt to migrate custom modules during preview to show preview. Can lead to slow preview loading.: " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting extra_migration_custom_recipes_missing_modules", + "output": "extra migration custom recipes missing modules refers to Whether to enable extra attempt to migrate custom modules during preview to show preview. Can lead to slow preview loading.: " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting extra_migration_custom_recipes_missing_modules", + "output": "extra migration custom recipes missing modules refers to Whether to enable extra attempt to migrate custom modules during preview to show preview. Can lead to slow preview loading.: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "include_custom_recipes_by_default", + "output": "include custom recipes by default refers to Include custom recipes in default inclusion lists (warning: enables all custom recipes)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "include_custom_recipes_by_default", + "output": "include custom recipes by default refers to Include custom recipes in default inclusion lists (warning: enables all custom recipes)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "include custom recipes by default", + "output": "include custom recipes by default refers to Include custom recipes in default inclusion lists (warning: enables all custom recipes)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "include custom recipes by default refers to Include custom recipes in default inclusion lists (warning: enables all custom recipes)" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting include_custom_recipes_by_default", + "output": "include custom recipes by default refers to Include custom recipes in default inclusion lists (warning: enables all custom recipes)" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting include_custom_recipes_by_default", + "output": "include custom recipes by default refers to Include custom recipes in default inclusion lists (warning: enables all custom recipes)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable_h2o_recipes", + "output": "enable h2o recipes refers to Whether to enable use of H2O recipe server. In some casees, recipe server (started at DAI startup) may enter into an unstable state, and this might affect other experiments. Then one can avoid triggering use of the recipe server by setting this to false." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable_h2o_recipes", + "output": "enable h2o recipes refers to Enable h2o recipes server: Whether to enable use of H2O recipe server. In some casees, recipe server (started at DAI startup) may enter into an unstable state, and this might affect other experiments. Then one can avoid triggering use of the recipe server by setting this to false." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable h2o recipes", + "output": "enable h2o recipes refers to Enable h2o recipes server: Whether to enable use of H2O recipe server. In some casees, recipe server (started at DAI startup) may enter into an unstable state, and this might affect other experiments. Then one can avoid triggering use of the recipe server by setting this to false." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Enable h2o recipes server: ", + "output": "enable h2o recipes refers to Enable h2o recipes server: Whether to enable use of H2O recipe server. In some casees, recipe server (started at DAI startup) may enter into an unstable state, and this might affect other experiments. Then one can avoid triggering use of the recipe server by setting this to false." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting enable_h2o_recipes", + "output": "enable h2o recipes refers to Whether to enable use of H2O recipe server. In some casees, recipe server (started at DAI startup) may enter into an unstable state, and this might affect other experiments. Then one can avoid triggering use of the recipe server by setting this to false." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting enable_h2o_recipes", + "output": "enable h2o recipes refers to Enable h2o recipes server: Whether to enable use of H2O recipe server. In some casees, recipe server (started at DAI startup) may enter into an unstable state, and this might affect other experiments. Then one can avoid triggering use of the recipe server by setting this to false." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "h2o_recipes_url", + "output": "h2o recipes url refers to URL of H2O instance for use by transformers, models, or scorers." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "h2o_recipes_url", + "output": "h2o recipes url refers to URL of H2O instance for use by transformers, models, or scorers." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "h2o recipes url", + "output": "h2o recipes url refers to URL of H2O instance for use by transformers, models, or scorers." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "h2o recipes url refers to URL of H2O instance for use by transformers, models, or scorers." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting h2o_recipes_url", + "output": "h2o recipes url refers to URL of H2O instance for use by transformers, models, or scorers." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting h2o_recipes_url", + "output": "h2o recipes url refers to URL of H2O instance for use by transformers, models, or scorers." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "h2o_recipes_ip", + "output": "h2o recipes ip refers to IP of H2O instance for use by transformers, models, or scorers." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "h2o_recipes_ip", + "output": "h2o recipes ip refers to IP of H2O instance for use by transformers, models, or scorers." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "h2o recipes ip", + "output": "h2o recipes ip refers to IP of H2O instance for use by transformers, models, or scorers." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "h2o recipes ip refers to IP of H2O instance for use by transformers, models, or scorers." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting h2o_recipes_ip", + "output": "h2o recipes ip refers to IP of H2O instance for use by transformers, models, or scorers." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting h2o_recipes_ip", + "output": "h2o recipes ip refers to IP of H2O instance for use by transformers, models, or scorers." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "h2o_recipes_port", + "output": "h2o recipes port refers to Port of H2O instance for use by transformers, models, or scorers. No other instances must be on that port or on next port." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "h2o_recipes_port", + "output": "h2o recipes port refers to Port of H2O instance for use by transformers, models, or scorers. No other instances must be on that port or on next port." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "h2o recipes port", + "output": "h2o recipes port refers to Port of H2O instance for use by transformers, models, or scorers. No other instances must be on that port or on next port." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "h2o recipes port refers to Port of H2O instance for use by transformers, models, or scorers. No other instances must be on that port or on next port." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting h2o_recipes_port", + "output": "h2o recipes port refers to Port of H2O instance for use by transformers, models, or scorers. No other instances must be on that port or on next port." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting h2o_recipes_port", + "output": "h2o recipes port refers to Port of H2O instance for use by transformers, models, or scorers. No other instances must be on that port or on next port." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "h2o_recipes_name", + "output": "h2o recipes name refers to Name of H2O instance for use by transformers, models, or scorers." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "h2o_recipes_name", + "output": "h2o recipes name refers to Name of H2O instance for use by transformers, models, or scorers." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "h2o recipes name", + "output": "h2o recipes name refers to Name of H2O instance for use by transformers, models, or scorers." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "h2o recipes name refers to Name of H2O instance for use by transformers, models, or scorers." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting h2o_recipes_name", + "output": "h2o recipes name refers to Name of H2O instance for use by transformers, models, or scorers." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting h2o_recipes_name", + "output": "h2o recipes name refers to Name of H2O instance for use by transformers, models, or scorers." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "h2o_recipes_nthreads", + "output": "h2o recipes nthreads refers to Number of threads for H2O instance for use by transformers, models, or scorers. -1 for all." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "h2o_recipes_nthreads", + "output": "h2o recipes nthreads refers to Number of threads for H2O instance for use by transformers, models, or scorers. -1 for all." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "h2o recipes nthreads", + "output": "h2o recipes nthreads refers to Number of threads for H2O instance for use by transformers, models, or scorers. -1 for all." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "h2o recipes nthreads refers to Number of threads for H2O instance for use by transformers, models, or scorers. -1 for all." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting h2o_recipes_nthreads", + "output": "h2o recipes nthreads refers to Number of threads for H2O instance for use by transformers, models, or scorers. -1 for all." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting h2o_recipes_nthreads", + "output": "h2o recipes nthreads refers to Number of threads for H2O instance for use by transformers, models, or scorers. -1 for all." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "h2o_recipes_log_level", + "output": "h2o recipes log level refers to Log Level of H2O instance for use by transformers, models, or scorers." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "h2o_recipes_log_level", + "output": "h2o recipes log level refers to Log Level of H2O instance for use by transformers, models, or scorers." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "h2o recipes log level", + "output": "h2o recipes log level refers to Log Level of H2O instance for use by transformers, models, or scorers." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "h2o recipes log level refers to Log Level of H2O instance for use by transformers, models, or scorers." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting h2o_recipes_log_level", + "output": "h2o recipes log level refers to Log Level of H2O instance for use by transformers, models, or scorers." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting h2o_recipes_log_level", + "output": "h2o recipes log level refers to Log Level of H2O instance for use by transformers, models, or scorers." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "h2o_recipes_max_mem_size", + "output": "h2o recipes max mem size refers to Maximum memory size of H2O instance for use by transformers, models, or scorers." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "h2o_recipes_max_mem_size", + "output": "h2o recipes max mem size refers to Maximum memory size of H2O instance for use by transformers, models, or scorers." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "h2o recipes max mem size", + "output": "h2o recipes max mem size refers to Maximum memory size of H2O instance for use by transformers, models, or scorers." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "h2o recipes max mem size refers to Maximum memory size of H2O instance for use by transformers, models, or scorers." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting h2o_recipes_max_mem_size", + "output": "h2o recipes max mem size refers to Maximum memory size of H2O instance for use by transformers, models, or scorers." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting h2o_recipes_max_mem_size", + "output": "h2o recipes max mem size refers to Maximum memory size of H2O instance for use by transformers, models, or scorers." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "h2o_recipes_min_mem_size", + "output": "h2o recipes min mem size refers to Minimum memory size of H2O instance for use by transformers, models, or scorers." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "h2o_recipes_min_mem_size", + "output": "h2o recipes min mem size refers to Minimum memory size of H2O instance for use by transformers, models, or scorers." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "h2o recipes min mem size", + "output": "h2o recipes min mem size refers to Minimum memory size of H2O instance for use by transformers, models, or scorers." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "h2o recipes min mem size refers to Minimum memory size of H2O instance for use by transformers, models, or scorers." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting h2o_recipes_min_mem_size", + "output": "h2o recipes min mem size refers to Minimum memory size of H2O instance for use by transformers, models, or scorers." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting h2o_recipes_min_mem_size", + "output": "h2o recipes min mem size refers to Minimum memory size of H2O instance for use by transformers, models, or scorers." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "h2o_recipes_kwargs", + "output": "h2o recipes kwargs refers to General user overrides of kwargs dict to pass to h2o.init() for recipe server." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "h2o_recipes_kwargs", + "output": "h2o recipes kwargs refers to General user overrides of kwargs dict to pass to h2o.init() for recipe server." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "h2o recipes kwargs", + "output": "h2o recipes kwargs refers to General user overrides of kwargs dict to pass to h2o.init() for recipe server." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "h2o recipes kwargs refers to General user overrides of kwargs dict to pass to h2o.init() for recipe server." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting h2o_recipes_kwargs", + "output": "h2o recipes kwargs refers to General user overrides of kwargs dict to pass to h2o.init() for recipe server." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting h2o_recipes_kwargs", + "output": "h2o recipes kwargs refers to General user overrides of kwargs dict to pass to h2o.init() for recipe server." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "h2o_recipes_start_trials", + "output": "h2o recipes start trials refers to Number of trials to give h2o-3 recipe server to start." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "h2o_recipes_start_trials", + "output": "h2o recipes start trials refers to Number of trials to give h2o-3 recipe server to start." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "h2o recipes start trials", + "output": "h2o recipes start trials refers to Number of trials to give h2o-3 recipe server to start." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "h2o recipes start trials refers to Number of trials to give h2o-3 recipe server to start." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting h2o_recipes_start_trials", + "output": "h2o recipes start trials refers to Number of trials to give h2o-3 recipe server to start." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting h2o_recipes_start_trials", + "output": "h2o recipes start trials refers to Number of trials to give h2o-3 recipe server to start." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "h2o_recipes_start_sleep0", + "output": "h2o recipes start sleep0 refers to Number of seconds to sleep before starting h2o-3 recipe server." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "h2o_recipes_start_sleep0", + "output": "h2o recipes start sleep0 refers to Number of seconds to sleep before starting h2o-3 recipe server." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "h2o recipes start sleep0", + "output": "h2o recipes start sleep0 refers to Number of seconds to sleep before starting h2o-3 recipe server." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "h2o recipes start sleep0 refers to Number of seconds to sleep before starting h2o-3 recipe server." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting h2o_recipes_start_sleep0", + "output": "h2o recipes start sleep0 refers to Number of seconds to sleep before starting h2o-3 recipe server." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting h2o_recipes_start_sleep0", + "output": "h2o recipes start sleep0 refers to Number of seconds to sleep before starting h2o-3 recipe server." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "h2o_recipes_start_sleep", + "output": "h2o recipes start sleep refers to Number of seconds to sleep between trials of starting h2o-3 recipe server." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "h2o_recipes_start_sleep", + "output": "h2o recipes start sleep refers to Number of seconds to sleep between trials of starting h2o-3 recipe server." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "h2o recipes start sleep", + "output": "h2o recipes start sleep refers to Number of seconds to sleep between trials of starting h2o-3 recipe server." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "h2o recipes start sleep refers to Number of seconds to sleep between trials of starting h2o-3 recipe server." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting h2o_recipes_start_sleep", + "output": "h2o recipes start sleep refers to Number of seconds to sleep between trials of starting h2o-3 recipe server." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting h2o_recipes_start_sleep", + "output": "h2o recipes start sleep refers to Number of seconds to sleep between trials of starting h2o-3 recipe server." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "custom_recipes_lock_to_git_repo", + "output": "custom recipes lock to git repo refers to Lock source for recipes to a specific github repo. If True then all custom recipes must come from the repo specified in setting: custom_recipes_git_repo" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "custom_recipes_lock_to_git_repo", + "output": "custom recipes lock to git repo refers to Lock source for recipes to a specific github repo. If True then all custom recipes must come from the repo specified in setting: custom_recipes_git_repo" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "custom recipes lock to git repo", + "output": "custom recipes lock to git repo refers to Lock source for recipes to a specific github repo. If True then all custom recipes must come from the repo specified in setting: custom_recipes_git_repo" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "custom recipes lock to git repo refers to Lock source for recipes to a specific github repo. If True then all custom recipes must come from the repo specified in setting: custom_recipes_git_repo" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting custom_recipes_lock_to_git_repo", + "output": "custom recipes lock to git repo refers to Lock source for recipes to a specific github repo. If True then all custom recipes must come from the repo specified in setting: custom_recipes_git_repo" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting custom_recipes_lock_to_git_repo", + "output": "custom recipes lock to git repo refers to Lock source for recipes to a specific github repo. If True then all custom recipes must come from the repo specified in setting: custom_recipes_git_repo" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "custom_recipes_git_repo", + "output": "custom recipes git repo refers to If custom_recipes_lock_to_git_repo is set to True, only this repo can be used to pull recipes from" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "custom_recipes_git_repo", + "output": "custom recipes git repo refers to If custom_recipes_lock_to_git_repo is set to True, only this repo can be used to pull recipes from" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "custom recipes git repo", + "output": "custom recipes git repo refers to If custom_recipes_lock_to_git_repo is set to True, only this repo can be used to pull recipes from" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "custom recipes git repo refers to If custom_recipes_lock_to_git_repo is set to True, only this repo can be used to pull recipes from" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting custom_recipes_git_repo", + "output": "custom recipes git repo refers to If custom_recipes_lock_to_git_repo is set to True, only this repo can be used to pull recipes from" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting custom_recipes_git_repo", + "output": "custom recipes git repo refers to If custom_recipes_lock_to_git_repo is set to True, only this repo can be used to pull recipes from" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "custom_recipes_git_branch", + "output": "custom recipes git branch refers to Branch constraint for recipe source repo. Any branch allowed if unset or None" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "custom_recipes_git_branch", + "output": "custom recipes git branch refers to Branch constraint for recipe source repo. Any branch allowed if unset or None" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "custom recipes git branch", + "output": "custom recipes git branch refers to Branch constraint for recipe source repo. Any branch allowed if unset or None" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "custom recipes git branch refers to Branch constraint for recipe source repo. Any branch allowed if unset or None" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting custom_recipes_git_branch", + "output": "custom recipes git branch refers to Branch constraint for recipe source repo. Any branch allowed if unset or None" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting custom_recipes_git_branch", + "output": "custom recipes git branch refers to Branch constraint for recipe source repo. Any branch allowed if unset or None" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "custom_recipes_excluded_filenames_from_repo_download", + "output": "custom recipes excluded filenames from repo download refers to basenames of files to exclude from repo download: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "custom_recipes_excluded_filenames_from_repo_download", + "output": "custom recipes excluded filenames from repo download refers to basenames of files to exclude from repo download: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "custom recipes excluded filenames from repo download", + "output": "custom recipes excluded filenames from repo download refers to basenames of files to exclude from repo download: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "basenames of files to exclude from repo download: ", + "output": "custom recipes excluded filenames from repo download refers to basenames of files to exclude from repo download: " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting custom_recipes_excluded_filenames_from_repo_download", + "output": "custom recipes excluded filenames from repo download refers to basenames of files to exclude from repo download: " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting custom_recipes_excluded_filenames_from_repo_download", + "output": "custom recipes excluded filenames from repo download refers to basenames of files to exclude from repo download: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "allow_old_recipes_use_datadir_as_data_directory", + "output": "allow old recipes use datadir as data directory refers to Allow use of deprecated get_global_directory() method from custom recipes for backward compatibility of recipes created before 1.9.0. Disable to force separation of custom recipes per user (in which case user_dir() should be used instead).: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "allow_old_recipes_use_datadir_as_data_directory", + "output": "allow old recipes use datadir as data directory refers to Allow use of deprecated get_global_directory() method from custom recipes for backward compatibility of recipes created before 1.9.0. Disable to force separation of custom recipes per user (in which case user_dir() should be used instead).: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "allow old recipes use datadir as data directory", + "output": "allow old recipes use datadir as data directory refers to Allow use of deprecated get_global_directory() method from custom recipes for backward compatibility of recipes created before 1.9.0. Disable to force separation of custom recipes per user (in which case user_dir() should be used instead).: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Allow use of deprecated get_global_directory() method from custom recipes for backward compatibility of recipes created before 1.9.0. Disable to force separation of custom recipes per user (in which case user_dir() should be used instead).: ", + "output": "allow old recipes use datadir as data directory refers to Allow use of deprecated get_global_directory() method from custom recipes for backward compatibility of recipes created before 1.9.0. Disable to force separation of custom recipes per user (in which case user_dir() should be used instead).: " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting allow_old_recipes_use_datadir_as_data_directory", + "output": "allow old recipes use datadir as data directory refers to Allow use of deprecated get_global_directory() method from custom recipes for backward compatibility of recipes created before 1.9.0. Disable to force separation of custom recipes per user (in which case user_dir() should be used instead).: " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting allow_old_recipes_use_datadir_as_data_directory", + "output": "allow old recipes use datadir as data directory refers to Allow use of deprecated get_global_directory() method from custom recipes for backward compatibility of recipes created before 1.9.0. Disable to force separation of custom recipes per user (in which case user_dir() should be used instead).: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "last_recipe", + "output": "last recipe refers to Internal helper to allow memory of if changed recipe" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "last_recipe", + "output": "last recipe refers to Internal helper to allow memory of if changed recipe" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "last recipe", + "output": "last recipe refers to Internal helper to allow memory of if changed recipe" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "last recipe refers to Internal helper to allow memory of if changed recipe" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting last_recipe", + "output": "last recipe refers to Internal helper to allow memory of if changed recipe" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting last_recipe", + "output": "last recipe refers to Internal helper to allow memory of if changed recipe" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "recipe_dict", + "output": "recipe dict refers to Dictionary to control recipes for each experiment and particular custom recipes. E.g. if inserting into the GUI as any toml string, can use: \"\"recipe_dict=\"{'key1': 2, 'key2': 'value2'}\"\"\" E.g. if putting into config.toml as a dict, can use: recipe_dict=\"{'key1': 2, 'key2': 'value2'}\" " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "recipe_dict", + "output": "recipe dict refers to Dictionary to control recipes for each experiment and particular custom recipes. E.g. if inserting into the GUI as any toml string, can use: \"\"recipe_dict=\"{'key1': 2, 'key2': 'value2'}\"\"\" E.g. if putting into config.toml as a dict, can use: recipe_dict=\"{'key1': 2, 'key2': 'value2'}\" " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "recipe dict", + "output": "recipe dict refers to Dictionary to control recipes for each experiment and particular custom recipes. E.g. if inserting into the GUI as any toml string, can use: \"\"recipe_dict=\"{'key1': 2, 'key2': 'value2'}\"\"\" E.g. if putting into config.toml as a dict, can use: recipe_dict=\"{'key1': 2, 'key2': 'value2'}\" " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "recipe dict refers to Dictionary to control recipes for each experiment and particular custom recipes. E.g. if inserting into the GUI as any toml string, can use: \"\"recipe_dict=\"{'key1': 2, 'key2': 'value2'}\"\"\" E.g. if putting into config.toml as a dict, can use: recipe_dict=\"{'key1': 2, 'key2': 'value2'}\" " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting recipe_dict", + "output": "recipe dict refers to Dictionary to control recipes for each experiment and particular custom recipes. E.g. if inserting into the GUI as any toml string, can use: \"\"recipe_dict=\"{'key1': 2, 'key2': 'value2'}\"\"\" E.g. if putting into config.toml as a dict, can use: recipe_dict=\"{'key1': 2, 'key2': 'value2'}\" " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting recipe_dict", + "output": "recipe dict refers to Dictionary to control recipes for each experiment and particular custom recipes. E.g. if inserting into the GUI as any toml string, can use: \"\"recipe_dict=\"{'key1': 2, 'key2': 'value2'}\"\"\" E.g. if putting into config.toml as a dict, can use: recipe_dict=\"{'key1': 2, 'key2': 'value2'}\" " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mutation_dict", + "output": "mutation dict refers to Dictionary to control some mutation parameters. E.g. if inserting into the GUI as any toml string, can use: \"\"mutation_dict=\"{'key1': 2, 'key2': 'value2'}\"\"\" E.g. if putting into config.toml as a dict, can use: mutation_dict=\"{'key1': 2, 'key2': 'value2'}\" " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mutation_dict", + "output": "mutation dict refers to Dictionary to control some mutation parameters. E.g. if inserting into the GUI as any toml string, can use: \"\"mutation_dict=\"{'key1': 2, 'key2': 'value2'}\"\"\" E.g. if putting into config.toml as a dict, can use: mutation_dict=\"{'key1': 2, 'key2': 'value2'}\" " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mutation dict", + "output": "mutation dict refers to Dictionary to control some mutation parameters. E.g. if inserting into the GUI as any toml string, can use: \"\"mutation_dict=\"{'key1': 2, 'key2': 'value2'}\"\"\" E.g. if putting into config.toml as a dict, can use: mutation_dict=\"{'key1': 2, 'key2': 'value2'}\" " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "mutation dict refers to Dictionary to control some mutation parameters. E.g. if inserting into the GUI as any toml string, can use: \"\"mutation_dict=\"{'key1': 2, 'key2': 'value2'}\"\"\" E.g. if putting into config.toml as a dict, can use: mutation_dict=\"{'key1': 2, 'key2': 'value2'}\" " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting mutation_dict", + "output": "mutation dict refers to Dictionary to control some mutation parameters. E.g. if inserting into the GUI as any toml string, can use: \"\"mutation_dict=\"{'key1': 2, 'key2': 'value2'}\"\"\" E.g. if putting into config.toml as a dict, can use: mutation_dict=\"{'key1': 2, 'key2': 'value2'}\" " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting mutation_dict", + "output": "mutation dict refers to Dictionary to control some mutation parameters. E.g. if inserting into the GUI as any toml string, can use: \"\"mutation_dict=\"{'key1': 2, 'key2': 'value2'}\"\"\" E.g. if putting into config.toml as a dict, can use: mutation_dict=\"{'key1': 2, 'key2': 'value2'}\" " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "raise_on_invalid_included_list", + "output": "raise on invalid included list refers to Whether to validate recipe names provided in included lists, like included_models, or (if False) whether to just log warning to server logs and ignore any invalid names of recipes. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "raise_on_invalid_included_list", + "output": "raise on invalid included list refers to Whether to validate recipe names: Whether to validate recipe names provided in included lists, like included_models, or (if False) whether to just log warning to server logs and ignore any invalid names of recipes. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "raise on invalid included list", + "output": "raise on invalid included list refers to Whether to validate recipe names: Whether to validate recipe names provided in included lists, like included_models, or (if False) whether to just log warning to server logs and ignore any invalid names of recipes. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Whether to validate recipe names: ", + "output": "raise on invalid included list refers to Whether to validate recipe names: Whether to validate recipe names provided in included lists, like included_models, or (if False) whether to just log warning to server logs and ignore any invalid names of recipes. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting raise_on_invalid_included_list", + "output": "raise on invalid included list refers to Whether to validate recipe names provided in included lists, like included_models, or (if False) whether to just log warning to server logs and ignore any invalid names of recipes. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting raise_on_invalid_included_list", + "output": "raise on invalid included list refers to Whether to validate recipe names: Whether to validate recipe names provided in included lists, like included_models, or (if False) whether to just log warning to server logs and ignore any invalid names of recipes. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "contrib_relative_directory", + "output": "contrib relative directory refers to Base directory for recipes within data directory.: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "contrib_relative_directory", + "output": "contrib relative directory refers to Base directory for recipes within data directory.: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "contrib relative directory", + "output": "contrib relative directory refers to Base directory for recipes within data directory.: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Base directory for recipes within data directory.: ", + "output": "contrib relative directory refers to Base directory for recipes within data directory.: " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting contrib_relative_directory", + "output": "contrib relative directory refers to Base directory for recipes within data directory.: " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting contrib_relative_directory", + "output": "contrib relative directory refers to Base directory for recipes within data directory.: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "contrib_env_relative_directory", + "output": "contrib env relative directory refers to location of custom recipes packages installed (relative to data_directory) We will try to install packages dynamically, but can also do (before or after server started): (inside docker running docker instance if running docker, or as user server is running as (e.g. dai user) if deb/tar native installation: PYTHONPATH=//lib/python3.6/site-packages/ dai-env.sh python -m pip install --prefix=/ --upgrade --upgrade-strategy only-if-needed --log-file pip_log_file.log where is /opt/h2oai/dai/ for native rpm/deb installation Note can also install wheel files if is name of wheel file or archive. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "contrib_env_relative_directory", + "output": "contrib env relative directory refers to location of custom recipes packages installed (relative to data_directory) We will try to install packages dynamically, but can also do (before or after server started): (inside docker running docker instance if running docker, or as user server is running as (e.g. dai user) if deb/tar native installation: PYTHONPATH=//lib/python3.6/site-packages/ dai-env.sh python -m pip install --prefix=/ --upgrade --upgrade-strategy only-if-needed --log-file pip_log_file.log where is /opt/h2oai/dai/ for native rpm/deb installation Note can also install wheel files if is name of wheel file or archive. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "contrib env relative directory", + "output": "contrib env relative directory refers to location of custom recipes packages installed (relative to data_directory) We will try to install packages dynamically, but can also do (before or after server started): (inside docker running docker instance if running docker, or as user server is running as (e.g. dai user) if deb/tar native installation: PYTHONPATH=//lib/python3.6/site-packages/ dai-env.sh python -m pip install --prefix=/ --upgrade --upgrade-strategy only-if-needed --log-file pip_log_file.log where is /opt/h2oai/dai/ for native rpm/deb installation Note can also install wheel files if is name of wheel file or archive. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "contrib env relative directory refers to location of custom recipes packages installed (relative to data_directory) We will try to install packages dynamically, but can also do (before or after server started): (inside docker running docker instance if running docker, or as user server is running as (e.g. dai user) if deb/tar native installation: PYTHONPATH=//lib/python3.6/site-packages/ dai-env.sh python -m pip install --prefix=/ --upgrade --upgrade-strategy only-if-needed --log-file pip_log_file.log where is /opt/h2oai/dai/ for native rpm/deb installation Note can also install wheel files if is name of wheel file or archive. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting contrib_env_relative_directory", + "output": "contrib env relative directory refers to location of custom recipes packages installed (relative to data_directory) We will try to install packages dynamically, but can also do (before or after server started): (inside docker running docker instance if running docker, or as user server is running as (e.g. dai user) if deb/tar native installation: PYTHONPATH=//lib/python3.6/site-packages/ dai-env.sh python -m pip install --prefix=/ --upgrade --upgrade-strategy only-if-needed --log-file pip_log_file.log where is /opt/h2oai/dai/ for native rpm/deb installation Note can also install wheel files if is name of wheel file or archive. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting contrib_env_relative_directory", + "output": "contrib env relative directory refers to location of custom recipes packages installed (relative to data_directory) We will try to install packages dynamically, but can also do (before or after server started): (inside docker running docker instance if running docker, or as user server is running as (e.g. dai user) if deb/tar native installation: PYTHONPATH=//lib/python3.6/site-packages/ dai-env.sh python -m pip install --prefix=/ --upgrade --upgrade-strategy only-if-needed --log-file pip_log_file.log where is /opt/h2oai/dai/ for native rpm/deb installation Note can also install wheel files if is name of wheel file or archive. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "ignore_package_version", + "output": "ignore package version refers to List of package versions to ignore. Useful when small version change but likely to function still with old package version. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "ignore_package_version", + "output": "ignore package version refers to List of package versions to ignore. Useful when small version change but likely to function still with old package version. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "ignore package version", + "output": "ignore package version refers to List of package versions to ignore. Useful when small version change but likely to function still with old package version. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "ignore package version refers to List of package versions to ignore. Useful when small version change but likely to function still with old package version. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting ignore_package_version", + "output": "ignore package version refers to List of package versions to ignore. Useful when small version change but likely to function still with old package version. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting ignore_package_version", + "output": "ignore package version refers to List of package versions to ignore. Useful when small version change but likely to function still with old package version. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "clobber_package_version", + "output": "clobber package version refers to List of package versions to remove if encounter conflict. Useful when want new version of package, and old recipes likely to function still. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "clobber_package_version", + "output": "clobber package version refers to List of package versions to remove if encounter conflict. Useful when want new version of package, and old recipes likely to function still. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "clobber package version", + "output": "clobber package version refers to List of package versions to remove if encounter conflict. Useful when want new version of package, and old recipes likely to function still. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "clobber package version refers to List of package versions to remove if encounter conflict. Useful when want new version of package, and old recipes likely to function still. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting clobber_package_version", + "output": "clobber package version refers to List of package versions to remove if encounter conflict. Useful when want new version of package, and old recipes likely to function still. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting clobber_package_version", + "output": "clobber package version refers to List of package versions to remove if encounter conflict. Useful when want new version of package, and old recipes likely to function still. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "swap_package_version", + "output": "swap package version refers to List of package versions to remove if encounter conflict. Useful when want new version of package, and old recipes likely to function still. Also useful when do not need to use old versions of recipes even if they would no longer function. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "swap_package_version", + "output": "swap package version refers to List of package versions to remove if encounter conflict. Useful when want new version of package, and old recipes likely to function still. Also useful when do not need to use old versions of recipes even if they would no longer function. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "swap package version", + "output": "swap package version refers to List of package versions to remove if encounter conflict. Useful when want new version of package, and old recipes likely to function still. Also useful when do not need to use old versions of recipes even if they would no longer function. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "swap package version refers to List of package versions to remove if encounter conflict. Useful when want new version of package, and old recipes likely to function still. Also useful when do not need to use old versions of recipes even if they would no longer function. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting swap_package_version", + "output": "swap package version refers to List of package versions to remove if encounter conflict. Useful when want new version of package, and old recipes likely to function still. Also useful when do not need to use old versions of recipes even if they would no longer function. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting swap_package_version", + "output": "swap package version refers to List of package versions to remove if encounter conflict. Useful when want new version of package, and old recipes likely to function still. Also useful when do not need to use old versions of recipes even if they would no longer function. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "allow_version_change_user_packages", + "output": "allow version change user packages refers to If user uploads recipe with changes to package versions, allow upgrade of package versions. If DAI protected packages are attempted to be changed, can try using pip_install_options toml with ['--no-deps']. Or to ignore entirely DAI versions of packages, can try using pip_install_options toml with ['--ignore-installed']. Any other experiments relying on recipes with such packages will be affected, use with caution." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "allow_version_change_user_packages", + "output": "allow version change user packages refers to If user uploads recipe with changes to package versions, allow upgrade of package versions. If DAI protected packages are attempted to be changed, can try using pip_install_options toml with ['--no-deps']. Or to ignore entirely DAI versions of packages, can try using pip_install_options toml with ['--ignore-installed']. Any other experiments relying on recipes with such packages will be affected, use with caution." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "allow version change user packages", + "output": "allow version change user packages refers to If user uploads recipe with changes to package versions, allow upgrade of package versions. If DAI protected packages are attempted to be changed, can try using pip_install_options toml with ['--no-deps']. Or to ignore entirely DAI versions of packages, can try using pip_install_options toml with ['--ignore-installed']. Any other experiments relying on recipes with such packages will be affected, use with caution." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "allow version change user packages refers to If user uploads recipe with changes to package versions, allow upgrade of package versions. If DAI protected packages are attempted to be changed, can try using pip_install_options toml with ['--no-deps']. Or to ignore entirely DAI versions of packages, can try using pip_install_options toml with ['--ignore-installed']. Any other experiments relying on recipes with such packages will be affected, use with caution." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting allow_version_change_user_packages", + "output": "allow version change user packages refers to If user uploads recipe with changes to package versions, allow upgrade of package versions. If DAI protected packages are attempted to be changed, can try using pip_install_options toml with ['--no-deps']. Or to ignore entirely DAI versions of packages, can try using pip_install_options toml with ['--ignore-installed']. Any other experiments relying on recipes with such packages will be affected, use with caution." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting allow_version_change_user_packages", + "output": "allow version change user packages refers to If user uploads recipe with changes to package versions, allow upgrade of package versions. If DAI protected packages are attempted to be changed, can try using pip_install_options toml with ['--no-deps']. Or to ignore entirely DAI versions of packages, can try using pip_install_options toml with ['--ignore-installed']. Any other experiments relying on recipes with such packages will be affected, use with caution." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "pip_install_overall_retries", + "output": "pip install overall retries refers to pip install retry for call to pip. Sometimes need to try twice" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "pip_install_overall_retries", + "output": "pip install overall retries refers to pip install retry for call to pip. Sometimes need to try twice" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "pip install overall retries", + "output": "pip install overall retries refers to pip install retry for call to pip. Sometimes need to try twice" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "pip install overall retries refers to pip install retry for call to pip. Sometimes need to try twice" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting pip_install_overall_retries", + "output": "pip install overall retries refers to pip install retry for call to pip. Sometimes need to try twice" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting pip_install_overall_retries", + "output": "pip install overall retries refers to pip install retry for call to pip. Sometimes need to try twice" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "pip_install_verbosity", + "output": "pip install verbosity refers to pip install verbosity level (number of -v's given to pip, up to 3" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "pip_install_verbosity", + "output": "pip install verbosity refers to pip install verbosity level (number of -v's given to pip, up to 3" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "pip install verbosity", + "output": "pip install verbosity refers to pip install verbosity level (number of -v's given to pip, up to 3" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "pip install verbosity refers to pip install verbosity level (number of -v's given to pip, up to 3" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting pip_install_verbosity", + "output": "pip install verbosity refers to pip install verbosity level (number of -v's given to pip, up to 3" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting pip_install_verbosity", + "output": "pip install verbosity refers to pip install verbosity level (number of -v's given to pip, up to 3" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "pip_install_timeout", + "output": "pip install timeout refers to pip install timeout in seconds, Sometimes internet issues would mean want to fail faster" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "pip_install_timeout", + "output": "pip install timeout refers to pip install timeout in seconds, Sometimes internet issues would mean want to fail faster" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "pip install timeout", + "output": "pip install timeout refers to pip install timeout in seconds, Sometimes internet issues would mean want to fail faster" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "pip install timeout refers to pip install timeout in seconds, Sometimes internet issues would mean want to fail faster" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting pip_install_timeout", + "output": "pip install timeout refers to pip install timeout in seconds, Sometimes internet issues would mean want to fail faster" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting pip_install_timeout", + "output": "pip install timeout refers to pip install timeout in seconds, Sometimes internet issues would mean want to fail faster" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "pip_install_retries", + "output": "pip install retries refers to pip install retry count" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "pip_install_retries", + "output": "pip install retries refers to pip install retry count" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "pip install retries", + "output": "pip install retries refers to pip install retry count" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "pip install retries refers to pip install retry count" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting pip_install_retries", + "output": "pip install retries refers to pip install retry count" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting pip_install_retries", + "output": "pip install retries refers to pip install retry count" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "pip_install_use_constraint", + "output": "pip install use constraint refers to Whether to use DAI constraint file to help pip handle versions. pip can make mistakes and try to install updated packages for no reason." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "pip_install_use_constraint", + "output": "pip install use constraint refers to Whether to use DAI constraint file to help pip handle versions. pip can make mistakes and try to install updated packages for no reason." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "pip install use constraint", + "output": "pip install use constraint refers to Whether to use DAI constraint file to help pip handle versions. pip can make mistakes and try to install updated packages for no reason." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "pip install use constraint refers to Whether to use DAI constraint file to help pip handle versions. pip can make mistakes and try to install updated packages for no reason." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting pip_install_use_constraint", + "output": "pip install use constraint refers to Whether to use DAI constraint file to help pip handle versions. pip can make mistakes and try to install updated packages for no reason." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting pip_install_use_constraint", + "output": "pip install use constraint refers to Whether to use DAI constraint file to help pip handle versions. pip can make mistakes and try to install updated packages for no reason." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "pip_install_options", + "output": "pip install options refers to pip install options: string of list of other options, e.g. ['--proxy', 'http://user:password@proxyserver:port']" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "pip_install_options", + "output": "pip install options refers to pip install options: string of list of other options, e.g. ['--proxy', 'http://user:password@proxyserver:port']" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "pip install options", + "output": "pip install options refers to pip install options: string of list of other options, e.g. ['--proxy', 'http://user:password@proxyserver:port']" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "pip install options refers to pip install options: string of list of other options, e.g. ['--proxy', 'http://user:password@proxyserver:port']" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting pip_install_options", + "output": "pip install options refers to pip install options: string of list of other options, e.g. ['--proxy', 'http://user:password@proxyserver:port']" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting pip_install_options", + "output": "pip install options refers to pip install options: string of list of other options, e.g. ['--proxy', 'http://user:password@proxyserver:port']" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable_basic_acceptance_tests", + "output": "enable basic acceptance tests refers to Whether to enable basic acceptance testing. Tests if can pickle the state, etc." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable_basic_acceptance_tests", + "output": "enable basic acceptance tests refers to Whether to enable basic acceptance testing. Tests if can pickle the state, etc." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable basic acceptance tests", + "output": "enable basic acceptance tests refers to Whether to enable basic acceptance testing. Tests if can pickle the state, etc." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "enable basic acceptance tests refers to Whether to enable basic acceptance testing. Tests if can pickle the state, etc." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting enable_basic_acceptance_tests", + "output": "enable basic acceptance tests refers to Whether to enable basic acceptance testing. Tests if can pickle the state, etc." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting enable_basic_acceptance_tests", + "output": "enable basic acceptance tests refers to Whether to enable basic acceptance testing. Tests if can pickle the state, etc." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable_acceptance_tests", + "output": "enable acceptance tests refers to Whether acceptance tests should run for custom genes / models / scorers / etc." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable_acceptance_tests", + "output": "enable acceptance tests refers to Whether acceptance tests should run for custom genes / models / scorers / etc." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable acceptance tests", + "output": "enable acceptance tests refers to Whether acceptance tests should run for custom genes / models / scorers / etc." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "enable acceptance tests refers to Whether acceptance tests should run for custom genes / models / scorers / etc." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting enable_acceptance_tests", + "output": "enable acceptance tests refers to Whether acceptance tests should run for custom genes / models / scorers / etc." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting enable_acceptance_tests", + "output": "enable acceptance tests refers to Whether acceptance tests should run for custom genes / models / scorers / etc." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "skip_disabled_recipes", + "output": "skip disabled recipes refers to Whether to skip disabled recipes (True) or fail and show GUI message (False)." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "skip_disabled_recipes", + "output": "skip disabled recipes refers to Whether to skip disabled recipes (True) or fail and show GUI message (False)." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "skip disabled recipes", + "output": "skip disabled recipes refers to Whether to skip disabled recipes (True) or fail and show GUI message (False)." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "skip disabled recipes refers to Whether to skip disabled recipes (True) or fail and show GUI message (False)." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting skip_disabled_recipes", + "output": "skip disabled recipes refers to Whether to skip disabled recipes (True) or fail and show GUI message (False)." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting skip_disabled_recipes", + "output": "skip disabled recipes refers to Whether to skip disabled recipes (True) or fail and show GUI message (False)." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "acceptance_test_timeout", + "output": "acceptance test timeout refers to Minutes to wait until a recipe's acceptance testing is aborted. A recipe is rejected if acceptancetesting is enabled and times out.One may also set timeout for a specific recipe by setting the class's staticmethod function calledacceptance_test_timeout to return number of minutes to wait until timeout doing acceptance testing.This timeout does not include the time to install required packages. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "acceptance_test_timeout", + "output": "acceptance test timeout refers to Timeout in minutes for testing acceptance of each recipe: Minutes to wait until a recipe's acceptance testing is aborted. A recipe is rejected if acceptancetesting is enabled and times out.One may also set timeout for a specific recipe by setting the class's staticmethod function calledacceptance_test_timeout to return number of minutes to wait until timeout doing acceptance testing.This timeout does not include the time to install required packages. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "acceptance test timeout", + "output": "acceptance test timeout refers to Timeout in minutes for testing acceptance of each recipe: Minutes to wait until a recipe's acceptance testing is aborted. A recipe is rejected if acceptancetesting is enabled and times out.One may also set timeout for a specific recipe by setting the class's staticmethod function calledacceptance_test_timeout to return number of minutes to wait until timeout doing acceptance testing.This timeout does not include the time to install required packages. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Timeout in minutes for testing acceptance of each recipe: ", + "output": "acceptance test timeout refers to Timeout in minutes for testing acceptance of each recipe: Minutes to wait until a recipe's acceptance testing is aborted. A recipe is rejected if acceptancetesting is enabled and times out.One may also set timeout for a specific recipe by setting the class's staticmethod function calledacceptance_test_timeout to return number of minutes to wait until timeout doing acceptance testing.This timeout does not include the time to install required packages. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting acceptance_test_timeout", + "output": "acceptance test timeout refers to Minutes to wait until a recipe's acceptance testing is aborted. A recipe is rejected if acceptancetesting is enabled and times out.One may also set timeout for a specific recipe by setting the class's staticmethod function calledacceptance_test_timeout to return number of minutes to wait until timeout doing acceptance testing.This timeout does not include the time to install required packages. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting acceptance_test_timeout", + "output": "acceptance test timeout refers to Timeout in minutes for testing acceptance of each recipe: Minutes to wait until a recipe's acceptance testing is aborted. A recipe is rejected if acceptancetesting is enabled and times out.One may also set timeout for a specific recipe by setting the class's staticmethod function calledacceptance_test_timeout to return number of minutes to wait until timeout doing acceptance testing.This timeout does not include the time to install required packages. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "contrib_reload_and_recheck_server_start", + "output": "contrib reload and recheck server start refers to Whether to re-check recipes during server startup (if per_user_directories == false) or during user login (if per_user_directories == true). If any inconsistency develops, the bad recipe will be removed during re-doing acceptance testing. This process can make start-up take alot longer for many recipes, but in LTS releases the risk of recipes becoming out of date is low. If set to false, will disable acceptance re-testing during sever start but note that previews or experiments may fail if those inconsistent recipes are used. Such inconsistencies can occur when API changes for recipes or more aggressive acceptance tests are performed. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "contrib_reload_and_recheck_server_start", + "output": "contrib reload and recheck server start refers to Whether to re-check recipes during server startup (if per_user_directories == false) or during user login (if per_user_directories == true). If any inconsistency develops, the bad recipe will be removed during re-doing acceptance testing. This process can make start-up take alot longer for many recipes, but in LTS releases the risk of recipes becoming out of date is low. If set to false, will disable acceptance re-testing during sever start but note that previews or experiments may fail if those inconsistent recipes are used. Such inconsistencies can occur when API changes for recipes or more aggressive acceptance tests are performed. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "contrib reload and recheck server start", + "output": "contrib reload and recheck server start refers to Whether to re-check recipes during server startup (if per_user_directories == false) or during user login (if per_user_directories == true). If any inconsistency develops, the bad recipe will be removed during re-doing acceptance testing. This process can make start-up take alot longer for many recipes, but in LTS releases the risk of recipes becoming out of date is low. If set to false, will disable acceptance re-testing during sever start but note that previews or experiments may fail if those inconsistent recipes are used. Such inconsistencies can occur when API changes for recipes or more aggressive acceptance tests are performed. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "contrib reload and recheck server start refers to Whether to re-check recipes during server startup (if per_user_directories == false) or during user login (if per_user_directories == true). If any inconsistency develops, the bad recipe will be removed during re-doing acceptance testing. This process can make start-up take alot longer for many recipes, but in LTS releases the risk of recipes becoming out of date is low. If set to false, will disable acceptance re-testing during sever start but note that previews or experiments may fail if those inconsistent recipes are used. Such inconsistencies can occur when API changes for recipes or more aggressive acceptance tests are performed. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting contrib_reload_and_recheck_server_start", + "output": "contrib reload and recheck server start refers to Whether to re-check recipes during server startup (if per_user_directories == false) or during user login (if per_user_directories == true). If any inconsistency develops, the bad recipe will be removed during re-doing acceptance testing. This process can make start-up take alot longer for many recipes, but in LTS releases the risk of recipes becoming out of date is low. If set to false, will disable acceptance re-testing during sever start but note that previews or experiments may fail if those inconsistent recipes are used. Such inconsistencies can occur when API changes for recipes or more aggressive acceptance tests are performed. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting contrib_reload_and_recheck_server_start", + "output": "contrib reload and recheck server start refers to Whether to re-check recipes during server startup (if per_user_directories == false) or during user login (if per_user_directories == true). If any inconsistency develops, the bad recipe will be removed during re-doing acceptance testing. This process can make start-up take alot longer for many recipes, but in LTS releases the risk of recipes becoming out of date is low. If set to false, will disable acceptance re-testing during sever start but note that previews or experiments may fail if those inconsistent recipes are used. Such inconsistencies can occur when API changes for recipes or more aggressive acceptance tests are performed. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "contrib_install_packages_server_start", + "output": "contrib install packages server start refers to Whether to at least install packages required for recipes during server startup (if per_user_directories == false) or during user login (if per_user_directories == true). Important to keep True so any later use of recipes (that have global packages installed) will work. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "contrib_install_packages_server_start", + "output": "contrib install packages server start refers to Whether to at least install packages required for recipes during server startup (if per_user_directories == false) or during user login (if per_user_directories == true). Important to keep True so any later use of recipes (that have global packages installed) will work. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "contrib install packages server start", + "output": "contrib install packages server start refers to Whether to at least install packages required for recipes during server startup (if per_user_directories == false) or during user login (if per_user_directories == true). Important to keep True so any later use of recipes (that have global packages installed) will work. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "contrib install packages server start refers to Whether to at least install packages required for recipes during server startup (if per_user_directories == false) or during user login (if per_user_directories == true). Important to keep True so any later use of recipes (that have global packages installed) will work. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting contrib_install_packages_server_start", + "output": "contrib install packages server start refers to Whether to at least install packages required for recipes during server startup (if per_user_directories == false) or during user login (if per_user_directories == true). Important to keep True so any later use of recipes (that have global packages installed) will work. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting contrib_install_packages_server_start", + "output": "contrib install packages server start refers to Whether to at least install packages required for recipes during server startup (if per_user_directories == false) or during user login (if per_user_directories == true). Important to keep True so any later use of recipes (that have global packages installed) will work. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "contrib_reload_and_recheck_worker_tasks", + "output": "contrib reload and recheck worker tasks refers to Whether to re-check recipes after uploaded from main server to worker in multinode. Expensive for every task that has recipes to do this." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "contrib_reload_and_recheck_worker_tasks", + "output": "contrib reload and recheck worker tasks refers to Whether to re-check recipes after uploaded from main server to worker in multinode. Expensive for every task that has recipes to do this." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "contrib reload and recheck worker tasks", + "output": "contrib reload and recheck worker tasks refers to Whether to re-check recipes after uploaded from main server to worker in multinode. Expensive for every task that has recipes to do this." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "contrib reload and recheck worker tasks refers to Whether to re-check recipes after uploaded from main server to worker in multinode. Expensive for every task that has recipes to do this." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting contrib_reload_and_recheck_worker_tasks", + "output": "contrib reload and recheck worker tasks refers to Whether to re-check recipes after uploaded from main server to worker in multinode. Expensive for every task that has recipes to do this." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting contrib_reload_and_recheck_worker_tasks", + "output": "contrib reload and recheck worker tasks refers to Whether to re-check recipes after uploaded from main server to worker in multinode. Expensive for every task that has recipes to do this." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "data_recipe_isolate", + "output": "data recipe isolate refers to Whether to isolate (in fork) data recipe in case imports change needs across.: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "data_recipe_isolate", + "output": "data recipe isolate refers to Whether to isolate (in fork) data recipe in case imports change needs across.: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "data recipe isolate", + "output": "data recipe isolate refers to Whether to isolate (in fork) data recipe in case imports change needs across.: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Whether to isolate (in fork) data recipe in case imports change needs across.: ", + "output": "data recipe isolate refers to Whether to isolate (in fork) data recipe in case imports change needs across.: " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting data_recipe_isolate", + "output": "data recipe isolate refers to Whether to isolate (in fork) data recipe in case imports change needs across.: " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting data_recipe_isolate", + "output": "data recipe isolate refers to Whether to isolate (in fork) data recipe in case imports change needs across.: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "server_recipe_url", + "output": "server recipe url refers to Space-separated string list of URLs for recipes that are loaded at user login time" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "server_recipe_url", + "output": "server recipe url refers to Space-separated string list of URLs for recipes that are loaded at user login time" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "server recipe url", + "output": "server recipe url refers to Space-separated string list of URLs for recipes that are loaded at user login time" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "server recipe url refers to Space-separated string list of URLs for recipes that are loaded at user login time" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting server_recipe_url", + "output": "server recipe url refers to Space-separated string list of URLs for recipes that are loaded at user login time" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting server_recipe_url", + "output": "server recipe url refers to Space-separated string list of URLs for recipes that are loaded at user login time" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "recipe_activation", + "output": "recipe activation refers to List of recipes (per dict key by type) that are applicable for given experiment. This is especially relevantfor situations such as new `experiment with same params` where the user should be able touse the same recipe versions as the parent experiment if he/she wishes to. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "recipe_activation", + "output": "recipe activation refers to Recipe Activation List: List of recipes (per dict key by type) that are applicable for given experiment. This is especially relevantfor situations such as new `experiment with same params` where the user should be able touse the same recipe versions as the parent experiment if he/she wishes to. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "recipe activation", + "output": "recipe activation refers to Recipe Activation List: List of recipes (per dict key by type) that are applicable for given experiment. This is especially relevantfor situations such as new `experiment with same params` where the user should be able touse the same recipe versions as the parent experiment if he/she wishes to. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Recipe Activation List: ", + "output": "recipe activation refers to Recipe Activation List: List of recipes (per dict key by type) that are applicable for given experiment. This is especially relevantfor situations such as new `experiment with same params` where the user should be able touse the same recipe versions as the parent experiment if he/she wishes to. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting recipe_activation", + "output": "recipe activation refers to List of recipes (per dict key by type) that are applicable for given experiment. This is especially relevantfor situations such as new `experiment with same params` where the user should be able touse the same recipe versions as the parent experiment if he/she wishes to. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting recipe_activation", + "output": "recipe activation refers to Recipe Activation List: List of recipes (per dict key by type) that are applicable for given experiment. This is especially relevantfor situations such as new `experiment with same params` where the user should be able touse the same recipe versions as the parent experiment if he/she wishes to. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enabled_file_systems", + "output": "enabled file systems refers to File System Support upload : standard upload feature file : local file system/server file system hdfs : Hadoop file system, remember to configure the HDFS config folder path and keytab below dtap : Blue Data Tap file system, remember to configure the DTap section below s3 : Amazon S3, optionally configure secret and access key below gcs : Google Cloud Storage, remember to configure gcs_path_to_service_account_json below gbq : Google Big Query, remember to configure gcs_path_to_service_account_json below minio : Minio Cloud Storage, remember to configure secret and access key below snow : Snowflake Data Warehouse, remember to configure Snowflake credentials below (account name, username, password) kdb : KDB+ Time Series Database, remember to configure KDB credentials below (hostname and port, optionally: username, password, classpath, and jvm_args) azrbs : Azure Blob Storage, remember to configure Azure credentials below (account name, account key) jdbc: JDBC Connector, remember to configure JDBC below. (jdbc_app_configs) hive: Hive Connector, remember to configure Hive below. (hive_app_configs) recipe_file: Custom recipe file upload recipe_url: Custom recipe upload via url h2o_drive: H2O Drive, remember to configure `h2o_drive_endpoint_url` below feature_store: Feature Store, remember to configure feature_store_endpoint_url below " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enabled_file_systems", + "output": "enabled file systems refers to File System Support upload : standard upload feature file : local file system/server file system hdfs : Hadoop file system, remember to configure the HDFS config folder path and keytab below dtap : Blue Data Tap file system, remember to configure the DTap section below s3 : Amazon S3, optionally configure secret and access key below gcs : Google Cloud Storage, remember to configure gcs_path_to_service_account_json below gbq : Google Big Query, remember to configure gcs_path_to_service_account_json below minio : Minio Cloud Storage, remember to configure secret and access key below snow : Snowflake Data Warehouse, remember to configure Snowflake credentials below (account name, username, password) kdb : KDB+ Time Series Database, remember to configure KDB credentials below (hostname and port, optionally: username, password, classpath, and jvm_args) azrbs : Azure Blob Storage, remember to configure Azure credentials below (account name, account key) jdbc: JDBC Connector, remember to configure JDBC below. (jdbc_app_configs) hive: Hive Connector, remember to configure Hive below. (hive_app_configs) recipe_file: Custom recipe file upload recipe_url: Custom recipe upload via url h2o_drive: H2O Drive, remember to configure `h2o_drive_endpoint_url` below feature_store: Feature Store, remember to configure feature_store_endpoint_url below " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enabled file systems", + "output": "enabled file systems refers to File System Support upload : standard upload feature file : local file system/server file system hdfs : Hadoop file system, remember to configure the HDFS config folder path and keytab below dtap : Blue Data Tap file system, remember to configure the DTap section below s3 : Amazon S3, optionally configure secret and access key below gcs : Google Cloud Storage, remember to configure gcs_path_to_service_account_json below gbq : Google Big Query, remember to configure gcs_path_to_service_account_json below minio : Minio Cloud Storage, remember to configure secret and access key below snow : Snowflake Data Warehouse, remember to configure Snowflake credentials below (account name, username, password) kdb : KDB+ Time Series Database, remember to configure KDB credentials below (hostname and port, optionally: username, password, classpath, and jvm_args) azrbs : Azure Blob Storage, remember to configure Azure credentials below (account name, account key) jdbc: JDBC Connector, remember to configure JDBC below. (jdbc_app_configs) hive: Hive Connector, remember to configure Hive below. (hive_app_configs) recipe_file: Custom recipe file upload recipe_url: Custom recipe upload via url h2o_drive: H2O Drive, remember to configure `h2o_drive_endpoint_url` below feature_store: Feature Store, remember to configure feature_store_endpoint_url below " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "enabled file systems refers to File System Support upload : standard upload feature file : local file system/server file system hdfs : Hadoop file system, remember to configure the HDFS config folder path and keytab below dtap : Blue Data Tap file system, remember to configure the DTap section below s3 : Amazon S3, optionally configure secret and access key below gcs : Google Cloud Storage, remember to configure gcs_path_to_service_account_json below gbq : Google Big Query, remember to configure gcs_path_to_service_account_json below minio : Minio Cloud Storage, remember to configure secret and access key below snow : Snowflake Data Warehouse, remember to configure Snowflake credentials below (account name, username, password) kdb : KDB+ Time Series Database, remember to configure KDB credentials below (hostname and port, optionally: username, password, classpath, and jvm_args) azrbs : Azure Blob Storage, remember to configure Azure credentials below (account name, account key) jdbc: JDBC Connector, remember to configure JDBC below. (jdbc_app_configs) hive: Hive Connector, remember to configure Hive below. (hive_app_configs) recipe_file: Custom recipe file upload recipe_url: Custom recipe upload via url h2o_drive: H2O Drive, remember to configure `h2o_drive_endpoint_url` below feature_store: Feature Store, remember to configure feature_store_endpoint_url below " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting enabled_file_systems", + "output": "enabled file systems refers to File System Support upload : standard upload feature file : local file system/server file system hdfs : Hadoop file system, remember to configure the HDFS config folder path and keytab below dtap : Blue Data Tap file system, remember to configure the DTap section below s3 : Amazon S3, optionally configure secret and access key below gcs : Google Cloud Storage, remember to configure gcs_path_to_service_account_json below gbq : Google Big Query, remember to configure gcs_path_to_service_account_json below minio : Minio Cloud Storage, remember to configure secret and access key below snow : Snowflake Data Warehouse, remember to configure Snowflake credentials below (account name, username, password) kdb : KDB+ Time Series Database, remember to configure KDB credentials below (hostname and port, optionally: username, password, classpath, and jvm_args) azrbs : Azure Blob Storage, remember to configure Azure credentials below (account name, account key) jdbc: JDBC Connector, remember to configure JDBC below. (jdbc_app_configs) hive: Hive Connector, remember to configure Hive below. (hive_app_configs) recipe_file: Custom recipe file upload recipe_url: Custom recipe upload via url h2o_drive: H2O Drive, remember to configure `h2o_drive_endpoint_url` below feature_store: Feature Store, remember to configure feature_store_endpoint_url below " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting enabled_file_systems", + "output": "enabled file systems refers to File System Support upload : standard upload feature file : local file system/server file system hdfs : Hadoop file system, remember to configure the HDFS config folder path and keytab below dtap : Blue Data Tap file system, remember to configure the DTap section below s3 : Amazon S3, optionally configure secret and access key below gcs : Google Cloud Storage, remember to configure gcs_path_to_service_account_json below gbq : Google Big Query, remember to configure gcs_path_to_service_account_json below minio : Minio Cloud Storage, remember to configure secret and access key below snow : Snowflake Data Warehouse, remember to configure Snowflake credentials below (account name, username, password) kdb : KDB+ Time Series Database, remember to configure KDB credentials below (hostname and port, optionally: username, password, classpath, and jvm_args) azrbs : Azure Blob Storage, remember to configure Azure credentials below (account name, account key) jdbc: JDBC Connector, remember to configure JDBC below. (jdbc_app_configs) hive: Hive Connector, remember to configure Hive below. (hive_app_configs) recipe_file: Custom recipe file upload recipe_url: Custom recipe upload via url h2o_drive: H2O Drive, remember to configure `h2o_drive_endpoint_url` below feature_store: Feature Store, remember to configure feature_store_endpoint_url below " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "file_hide_data_directory", + "output": "file hide data directory refers to The option disable access to DAI data_directory from file browser" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "file_hide_data_directory", + "output": "file hide data directory refers to The option disable access to DAI data_directory from file browser" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "file hide data directory", + "output": "file hide data directory refers to The option disable access to DAI data_directory from file browser" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "file hide data directory refers to The option disable access to DAI data_directory from file browser" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting file_hide_data_directory", + "output": "file hide data directory refers to The option disable access to DAI data_directory from file browser" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting file_hide_data_directory", + "output": "file hide data directory refers to The option disable access to DAI data_directory from file browser" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "file_path_filtering_enabled", + "output": "file path filtering enabled refers to Enable usage of path filters" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "file_path_filtering_enabled", + "output": "file path filtering enabled refers to Enable usage of path filters" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "file path filtering enabled", + "output": "file path filtering enabled refers to Enable usage of path filters" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "file path filtering enabled refers to Enable usage of path filters" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting file_path_filtering_enabled", + "output": "file path filtering enabled refers to Enable usage of path filters" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting file_path_filtering_enabled", + "output": "file path filtering enabled refers to Enable usage of path filters" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "file_path_filter_include", + "output": "file path filter include refers to List of absolute path prefixes to restrict access to in file system browser. First add the following environment variable to your command line to enable this feature: file_path_filtering_enabled=true This feature can be used in the following ways (using specific path or using logged user's directory): file_path_filter_include=\"['/data/stage']\" file_path_filter_include=\"['/data/stage','/data/prod']\" file_path_filter_include=/home/{{DAI_USERNAME}}/ file_path_filter_include=\"['/home/{{DAI_USERNAME}}/','/data/stage','/data/prod']\" " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "file_path_filter_include", + "output": "file path filter include refers to List of absolute path prefixes to restrict access to in file system browser. First add the following environment variable to your command line to enable this feature: file_path_filtering_enabled=true This feature can be used in the following ways (using specific path or using logged user's directory): file_path_filter_include=\"['/data/stage']\" file_path_filter_include=\"['/data/stage','/data/prod']\" file_path_filter_include=/home/{{DAI_USERNAME}}/ file_path_filter_include=\"['/home/{{DAI_USERNAME}}/','/data/stage','/data/prod']\" " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "file path filter include", + "output": "file path filter include refers to List of absolute path prefixes to restrict access to in file system browser. First add the following environment variable to your command line to enable this feature: file_path_filtering_enabled=true This feature can be used in the following ways (using specific path or using logged user's directory): file_path_filter_include=\"['/data/stage']\" file_path_filter_include=\"['/data/stage','/data/prod']\" file_path_filter_include=/home/{{DAI_USERNAME}}/ file_path_filter_include=\"['/home/{{DAI_USERNAME}}/','/data/stage','/data/prod']\" " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "file path filter include refers to List of absolute path prefixes to restrict access to in file system browser. First add the following environment variable to your command line to enable this feature: file_path_filtering_enabled=true This feature can be used in the following ways (using specific path or using logged user's directory): file_path_filter_include=\"['/data/stage']\" file_path_filter_include=\"['/data/stage','/data/prod']\" file_path_filter_include=/home/{{DAI_USERNAME}}/ file_path_filter_include=\"['/home/{{DAI_USERNAME}}/','/data/stage','/data/prod']\" " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting file_path_filter_include", + "output": "file path filter include refers to List of absolute path prefixes to restrict access to in file system browser. First add the following environment variable to your command line to enable this feature: file_path_filtering_enabled=true This feature can be used in the following ways (using specific path or using logged user's directory): file_path_filter_include=\"['/data/stage']\" file_path_filter_include=\"['/data/stage','/data/prod']\" file_path_filter_include=/home/{{DAI_USERNAME}}/ file_path_filter_include=\"['/home/{{DAI_USERNAME}}/','/data/stage','/data/prod']\" " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting file_path_filter_include", + "output": "file path filter include refers to List of absolute path prefixes to restrict access to in file system browser. First add the following environment variable to your command line to enable this feature: file_path_filtering_enabled=true This feature can be used in the following ways (using specific path or using logged user's directory): file_path_filter_include=\"['/data/stage']\" file_path_filter_include=\"['/data/stage','/data/prod']\" file_path_filter_include=/home/{{DAI_USERNAME}}/ file_path_filter_include=\"['/home/{{DAI_USERNAME}}/','/data/stage','/data/prod']\" " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "hdfs_auth_type", + "output": "hdfs auth type refers to (Required) HDFS connector Specify HDFS Auth Type, allowed options are: noauth : (default) No authentication needed principal : Authenticate with HDFS with a principal user (DEPRECTATED - use `keytab` auth type) keytab : Authenticate with a Key tab (recommended). If running DAI as a service, then the Kerberos keytab needs to be owned by the DAI user. keytabimpersonation : Login with impersonation using a keytab" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "hdfs_auth_type", + "output": "hdfs auth type refers to (Required) HDFS connector Specify HDFS Auth Type, allowed options are: noauth : (default) No authentication needed principal : Authenticate with HDFS with a principal user (DEPRECTATED - use `keytab` auth type) keytab : Authenticate with a Key tab (recommended). If running DAI as a service, then the Kerberos keytab needs to be owned by the DAI user. keytabimpersonation : Login with impersonation using a keytab" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "hdfs auth type", + "output": "hdfs auth type refers to (Required) HDFS connector Specify HDFS Auth Type, allowed options are: noauth : (default) No authentication needed principal : Authenticate with HDFS with a principal user (DEPRECTATED - use `keytab` auth type) keytab : Authenticate with a Key tab (recommended). If running DAI as a service, then the Kerberos keytab needs to be owned by the DAI user. keytabimpersonation : Login with impersonation using a keytab" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "hdfs auth type refers to (Required) HDFS connector Specify HDFS Auth Type, allowed options are: noauth : (default) No authentication needed principal : Authenticate with HDFS with a principal user (DEPRECTATED - use `keytab` auth type) keytab : Authenticate with a Key tab (recommended). If running DAI as a service, then the Kerberos keytab needs to be owned by the DAI user. keytabimpersonation : Login with impersonation using a keytab" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting hdfs_auth_type", + "output": "hdfs auth type refers to (Required) HDFS connector Specify HDFS Auth Type, allowed options are: noauth : (default) No authentication needed principal : Authenticate with HDFS with a principal user (DEPRECTATED - use `keytab` auth type) keytab : Authenticate with a Key tab (recommended). If running DAI as a service, then the Kerberos keytab needs to be owned by the DAI user. keytabimpersonation : Login with impersonation using a keytab" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting hdfs_auth_type", + "output": "hdfs auth type refers to (Required) HDFS connector Specify HDFS Auth Type, allowed options are: noauth : (default) No authentication needed principal : Authenticate with HDFS with a principal user (DEPRECTATED - use `keytab` auth type) keytab : Authenticate with a Key tab (recommended). If running DAI as a service, then the Kerberos keytab needs to be owned by the DAI user. keytabimpersonation : Login with impersonation using a keytab" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "hdfs_app_principal_user", + "output": "hdfs app principal user refers to Kerberos app principal user. Required when hdfs_auth_type='keytab'; recommended otherwise." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "hdfs_app_principal_user", + "output": "hdfs app principal user refers to Kerberos app principal user. Required when hdfs_auth_type='keytab'; recommended otherwise." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "hdfs app principal user", + "output": "hdfs app principal user refers to Kerberos app principal user. Required when hdfs_auth_type='keytab'; recommended otherwise." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "hdfs app principal user refers to Kerberos app principal user. Required when hdfs_auth_type='keytab'; recommended otherwise." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting hdfs_app_principal_user", + "output": "hdfs app principal user refers to Kerberos app principal user. Required when hdfs_auth_type='keytab'; recommended otherwise." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting hdfs_app_principal_user", + "output": "hdfs app principal user refers to Kerberos app principal user. Required when hdfs_auth_type='keytab'; recommended otherwise." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "hdfs_app_login_user", + "output": "hdfs app login user refers to Deprecated - Do Not Use, login user is taken from the user name from login" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "hdfs_app_login_user", + "output": "hdfs app login user refers to Deprecated - Do Not Use, login user is taken from the user name from login" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "hdfs app login user", + "output": "hdfs app login user refers to Deprecated - Do Not Use, login user is taken from the user name from login" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "hdfs app login user refers to Deprecated - Do Not Use, login user is taken from the user name from login" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting hdfs_app_login_user", + "output": "hdfs app login user refers to Deprecated - Do Not Use, login user is taken from the user name from login" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting hdfs_app_login_user", + "output": "hdfs app login user refers to Deprecated - Do Not Use, login user is taken from the user name from login" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "hdfs_app_jvm_args", + "output": "hdfs app jvm args refers to JVM args for HDFS distributions, provide args seperate by space -Djava.security.krb5.conf=/krb5.conf -Dsun.security.krb5.debug=True -Dlog4j.configuration=file:///log4j.properties" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "hdfs_app_jvm_args", + "output": "hdfs app jvm args refers to JVM args for HDFS distributions, provide args seperate by space -Djava.security.krb5.conf=/krb5.conf -Dsun.security.krb5.debug=True -Dlog4j.configuration=file:///log4j.properties" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "hdfs app jvm args", + "output": "hdfs app jvm args refers to JVM args for HDFS distributions, provide args seperate by space -Djava.security.krb5.conf=/krb5.conf -Dsun.security.krb5.debug=True -Dlog4j.configuration=file:///log4j.properties" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "hdfs app jvm args refers to JVM args for HDFS distributions, provide args seperate by space -Djava.security.krb5.conf=/krb5.conf -Dsun.security.krb5.debug=True -Dlog4j.configuration=file:///log4j.properties" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting hdfs_app_jvm_args", + "output": "hdfs app jvm args refers to JVM args for HDFS distributions, provide args seperate by space -Djava.security.krb5.conf=/krb5.conf -Dsun.security.krb5.debug=True -Dlog4j.configuration=file:///log4j.properties" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting hdfs_app_jvm_args", + "output": "hdfs app jvm args refers to JVM args for HDFS distributions, provide args seperate by space -Djava.security.krb5.conf=/krb5.conf -Dsun.security.krb5.debug=True -Dlog4j.configuration=file:///log4j.properties" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "hdfs_app_classpath", + "output": "hdfs app classpath refers to hdfs class path" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "hdfs_app_classpath", + "output": "hdfs app classpath refers to hdfs class path" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "hdfs app classpath", + "output": "hdfs app classpath refers to hdfs class path" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "hdfs app classpath refers to hdfs class path" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting hdfs_app_classpath", + "output": "hdfs app classpath refers to hdfs class path" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting hdfs_app_classpath", + "output": "hdfs app classpath refers to hdfs class path" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "hdfs_app_supported_schemes", + "output": "hdfs app supported schemes refers to List of supported DFS schemas. Ex. \"['hdfs://', 'maprfs://', 'swift://']\" Supported schemas list is used as an initial check to ensure valid input to connector " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "hdfs_app_supported_schemes", + "output": "hdfs app supported schemes refers to List of supported DFS schemas. Ex. \"['hdfs://', 'maprfs://', 'swift://']\" Supported schemas list is used as an initial check to ensure valid input to connector " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "hdfs app supported schemes", + "output": "hdfs app supported schemes refers to List of supported DFS schemas. Ex. \"['hdfs://', 'maprfs://', 'swift://']\" Supported schemas list is used as an initial check to ensure valid input to connector " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "hdfs app supported schemes refers to List of supported DFS schemas. Ex. \"['hdfs://', 'maprfs://', 'swift://']\" Supported schemas list is used as an initial check to ensure valid input to connector " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting hdfs_app_supported_schemes", + "output": "hdfs app supported schemes refers to List of supported DFS schemas. Ex. \"['hdfs://', 'maprfs://', 'swift://']\" Supported schemas list is used as an initial check to ensure valid input to connector " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting hdfs_app_supported_schemes", + "output": "hdfs app supported schemes refers to List of supported DFS schemas. Ex. \"['hdfs://', 'maprfs://', 'swift://']\" Supported schemas list is used as an initial check to ensure valid input to connector " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "hdfs_max_files_listed", + "output": "hdfs max files listed refers to Maximum number of files viewable in connector ui. Set to larger number to view more files" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "hdfs_max_files_listed", + "output": "hdfs max files listed refers to Maximum number of files viewable in connector ui. Set to larger number to view more files" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "hdfs max files listed", + "output": "hdfs max files listed refers to Maximum number of files viewable in connector ui. Set to larger number to view more files" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "hdfs max files listed refers to Maximum number of files viewable in connector ui. Set to larger number to view more files" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting hdfs_max_files_listed", + "output": "hdfs max files listed refers to Maximum number of files viewable in connector ui. Set to larger number to view more files" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting hdfs_max_files_listed", + "output": "hdfs max files listed refers to Maximum number of files viewable in connector ui. Set to larger number to view more files" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "hdfs_init_path", + "output": "hdfs init path refers to Starting HDFS path displayed in UI HDFS browser" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "hdfs_init_path", + "output": "hdfs init path refers to Starting HDFS path displayed in UI HDFS browser" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "hdfs init path", + "output": "hdfs init path refers to Starting HDFS path displayed in UI HDFS browser" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "hdfs init path refers to Starting HDFS path displayed in UI HDFS browser" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting hdfs_init_path", + "output": "hdfs init path refers to Starting HDFS path displayed in UI HDFS browser" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting hdfs_init_path", + "output": "hdfs init path refers to Starting HDFS path displayed in UI HDFS browser" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "hdfs_upload_init_path", + "output": "hdfs upload init path refers to Starting HDFS path for the artifacts upload operations" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "hdfs_upload_init_path", + "output": "hdfs upload init path refers to Starting HDFS path for the artifacts upload operations" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "hdfs upload init path", + "output": "hdfs upload init path refers to Starting HDFS path for the artifacts upload operations" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "hdfs upload init path refers to Starting HDFS path for the artifacts upload operations" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting hdfs_upload_init_path", + "output": "hdfs upload init path refers to Starting HDFS path for the artifacts upload operations" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting hdfs_upload_init_path", + "output": "hdfs upload init path refers to Starting HDFS path for the artifacts upload operations" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable_mapr_multi_user_mode", + "output": "enable mapr multi user mode refers to Enables the multi-user mode for MapR integration, which allows to have MapR ticket per user." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable_mapr_multi_user_mode", + "output": "enable mapr multi user mode refers to Enables the multi-user mode for MapR integration, which allows to have MapR ticket per user." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable mapr multi user mode", + "output": "enable mapr multi user mode refers to Enables the multi-user mode for MapR integration, which allows to have MapR ticket per user." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "enable mapr multi user mode refers to Enables the multi-user mode for MapR integration, which allows to have MapR ticket per user." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting enable_mapr_multi_user_mode", + "output": "enable mapr multi user mode refers to Enables the multi-user mode for MapR integration, which allows to have MapR ticket per user." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting enable_mapr_multi_user_mode", + "output": "enable mapr multi user mode refers to Enables the multi-user mode for MapR integration, which allows to have MapR ticket per user." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "dtap_auth_type", + "output": "dtap auth type refers to Blue Data DTap connector settings are similar to HDFS connector settings. Specify DTap Auth Type, allowed options are: noauth : No authentication needed principal : Authenticate with DTab with a principal user keytab : Authenticate with a Key tab (recommended). If running DAI as a service, then the Kerberos keytab needs to be owned by the DAI user. keytabimpersonation : Login with impersonation using a keytab NOTE: \"hdfs_app_classpath\" and \"core_site_xml_path\" are both required to be set for DTap connector" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "dtap_auth_type", + "output": "dtap auth type refers to Blue Data DTap connector settings are similar to HDFS connector settings. Specify DTap Auth Type, allowed options are: noauth : No authentication needed principal : Authenticate with DTab with a principal user keytab : Authenticate with a Key tab (recommended). If running DAI as a service, then the Kerberos keytab needs to be owned by the DAI user. keytabimpersonation : Login with impersonation using a keytab NOTE: \"hdfs_app_classpath\" and \"core_site_xml_path\" are both required to be set for DTap connector" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "dtap auth type", + "output": "dtap auth type refers to Blue Data DTap connector settings are similar to HDFS connector settings. Specify DTap Auth Type, allowed options are: noauth : No authentication needed principal : Authenticate with DTab with a principal user keytab : Authenticate with a Key tab (recommended). If running DAI as a service, then the Kerberos keytab needs to be owned by the DAI user. keytabimpersonation : Login with impersonation using a keytab NOTE: \"hdfs_app_classpath\" and \"core_site_xml_path\" are both required to be set for DTap connector" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "dtap auth type refers to Blue Data DTap connector settings are similar to HDFS connector settings. Specify DTap Auth Type, allowed options are: noauth : No authentication needed principal : Authenticate with DTab with a principal user keytab : Authenticate with a Key tab (recommended). If running DAI as a service, then the Kerberos keytab needs to be owned by the DAI user. keytabimpersonation : Login with impersonation using a keytab NOTE: \"hdfs_app_classpath\" and \"core_site_xml_path\" are both required to be set for DTap connector" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting dtap_auth_type", + "output": "dtap auth type refers to Blue Data DTap connector settings are similar to HDFS connector settings. Specify DTap Auth Type, allowed options are: noauth : No authentication needed principal : Authenticate with DTab with a principal user keytab : Authenticate with a Key tab (recommended). If running DAI as a service, then the Kerberos keytab needs to be owned by the DAI user. keytabimpersonation : Login with impersonation using a keytab NOTE: \"hdfs_app_classpath\" and \"core_site_xml_path\" are both required to be set for DTap connector" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting dtap_auth_type", + "output": "dtap auth type refers to Blue Data DTap connector settings are similar to HDFS connector settings. Specify DTap Auth Type, allowed options are: noauth : No authentication needed principal : Authenticate with DTab with a principal user keytab : Authenticate with a Key tab (recommended). If running DAI as a service, then the Kerberos keytab needs to be owned by the DAI user. keytabimpersonation : Login with impersonation using a keytab NOTE: \"hdfs_app_classpath\" and \"core_site_xml_path\" are both required to be set for DTap connector" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "dtap_config_path", + "output": "dtap config path refers to Dtap (HDFS) config folder path , can contain multiple config files" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "dtap_config_path", + "output": "dtap config path refers to Dtap (HDFS) config folder path , can contain multiple config files" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "dtap config path", + "output": "dtap config path refers to Dtap (HDFS) config folder path , can contain multiple config files" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "dtap config path refers to Dtap (HDFS) config folder path , can contain multiple config files" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting dtap_config_path", + "output": "dtap config path refers to Dtap (HDFS) config folder path , can contain multiple config files" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting dtap_config_path", + "output": "dtap config path refers to Dtap (HDFS) config folder path , can contain multiple config files" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "dtap_key_tab_path", + "output": "dtap key tab path refers to Path of the principal key tab file, dtap_key_tab_path is deprecated. Please use dtap_keytab_path" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "dtap_key_tab_path", + "output": "dtap key tab path refers to Path of the principal key tab file, dtap_key_tab_path is deprecated. Please use dtap_keytab_path" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "dtap key tab path", + "output": "dtap key tab path refers to Path of the principal key tab file, dtap_key_tab_path is deprecated. Please use dtap_keytab_path" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "dtap key tab path refers to Path of the principal key tab file, dtap_key_tab_path is deprecated. Please use dtap_keytab_path" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting dtap_key_tab_path", + "output": "dtap key tab path refers to Path of the principal key tab file, dtap_key_tab_path is deprecated. Please use dtap_keytab_path" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting dtap_key_tab_path", + "output": "dtap key tab path refers to Path of the principal key tab file, dtap_key_tab_path is deprecated. Please use dtap_keytab_path" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "dtap_keytab_path", + "output": "dtap keytab path refers to Path of the principal key tab file" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "dtap_keytab_path", + "output": "dtap keytab path refers to Path of the principal key tab file" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "dtap keytab path", + "output": "dtap keytab path refers to Path of the principal key tab file" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "dtap keytab path refers to Path of the principal key tab file" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting dtap_keytab_path", + "output": "dtap keytab path refers to Path of the principal key tab file" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting dtap_keytab_path", + "output": "dtap keytab path refers to Path of the principal key tab file" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "dtap_app_principal_user", + "output": "dtap app principal user refers to Kerberos app principal user (recommended)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "dtap_app_principal_user", + "output": "dtap app principal user refers to Kerberos app principal user (recommended)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "dtap app principal user", + "output": "dtap app principal user refers to Kerberos app principal user (recommended)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "dtap app principal user refers to Kerberos app principal user (recommended)" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting dtap_app_principal_user", + "output": "dtap app principal user refers to Kerberos app principal user (recommended)" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting dtap_app_principal_user", + "output": "dtap app principal user refers to Kerberos app principal user (recommended)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "dtap_app_login_user", + "output": "dtap app login user refers to Specify the user id of the current user here as user@realm" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "dtap_app_login_user", + "output": "dtap app login user refers to Specify the user id of the current user here as user@realm" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "dtap app login user", + "output": "dtap app login user refers to Specify the user id of the current user here as user@realm" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "dtap app login user refers to Specify the user id of the current user here as user@realm" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting dtap_app_login_user", + "output": "dtap app login user refers to Specify the user id of the current user here as user@realm" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting dtap_app_login_user", + "output": "dtap app login user refers to Specify the user id of the current user here as user@realm" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "dtap_app_jvm_args", + "output": "dtap app jvm args refers to JVM args for DTap distributions, provide args seperate by space" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "dtap_app_jvm_args", + "output": "dtap app jvm args refers to JVM args for DTap distributions, provide args seperate by space" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "dtap app jvm args", + "output": "dtap app jvm args refers to JVM args for DTap distributions, provide args seperate by space" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "dtap app jvm args refers to JVM args for DTap distributions, provide args seperate by space" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting dtap_app_jvm_args", + "output": "dtap app jvm args refers to JVM args for DTap distributions, provide args seperate by space" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting dtap_app_jvm_args", + "output": "dtap app jvm args refers to JVM args for DTap distributions, provide args seperate by space" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "dtap_app_classpath", + "output": "dtap app classpath refers to DTap (HDFS) class path. NOTE: set 'hdfs_app_classpath' also" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "dtap_app_classpath", + "output": "dtap app classpath refers to DTap (HDFS) class path. NOTE: set 'hdfs_app_classpath' also" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "dtap app classpath", + "output": "dtap app classpath refers to DTap (HDFS) class path. NOTE: set 'hdfs_app_classpath' also" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "dtap app classpath refers to DTap (HDFS) class path. NOTE: set 'hdfs_app_classpath' also" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting dtap_app_classpath", + "output": "dtap app classpath refers to DTap (HDFS) class path. NOTE: set 'hdfs_app_classpath' also" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting dtap_app_classpath", + "output": "dtap app classpath refers to DTap (HDFS) class path. NOTE: set 'hdfs_app_classpath' also" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "dtap_init_path", + "output": "dtap init path refers to Starting DTAP path displayed in UI DTAP browser" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "dtap_init_path", + "output": "dtap init path refers to Starting DTAP path displayed in UI DTAP browser" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "dtap init path", + "output": "dtap init path refers to Starting DTAP path displayed in UI DTAP browser" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "dtap init path refers to Starting DTAP path displayed in UI DTAP browser" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting dtap_init_path", + "output": "dtap init path refers to Starting DTAP path displayed in UI DTAP browser" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting dtap_init_path", + "output": "dtap init path refers to Starting DTAP path displayed in UI DTAP browser" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "aws_access_key_id", + "output": "aws access key id refers to S3 Connector credentials" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "aws_access_key_id", + "output": "aws access key id refers to AWS Access Key ID: S3 Connector credentials" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "aws access key id", + "output": "aws access key id refers to AWS Access Key ID: S3 Connector credentials" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "AWS Access Key ID: ", + "output": "aws access key id refers to AWS Access Key ID: S3 Connector credentials" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting aws_access_key_id", + "output": "aws access key id refers to S3 Connector credentials" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting aws_access_key_id", + "output": "aws access key id refers to AWS Access Key ID: S3 Connector credentials" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "aws_secret_access_key", + "output": "aws secret access key refers to S3 Connector credentials" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "aws_secret_access_key", + "output": "aws secret access key refers to AWS Secret Access Key: S3 Connector credentials" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "aws secret access key", + "output": "aws secret access key refers to AWS Secret Access Key: S3 Connector credentials" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "AWS Secret Access Key: ", + "output": "aws secret access key refers to AWS Secret Access Key: S3 Connector credentials" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting aws_secret_access_key", + "output": "aws secret access key refers to S3 Connector credentials" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting aws_secret_access_key", + "output": "aws secret access key refers to AWS Secret Access Key: S3 Connector credentials" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "aws_role_arn", + "output": "aws role arn refers to S3 Connector credentials" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "aws_role_arn", + "output": "aws role arn refers to S3 Connector credentials" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "aws role arn", + "output": "aws role arn refers to S3 Connector credentials" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "aws role arn refers to S3 Connector credentials" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting aws_role_arn", + "output": "aws role arn refers to S3 Connector credentials" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting aws_role_arn", + "output": "aws role arn refers to S3 Connector credentials" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "aws_default_region", + "output": "aws default region refers to What region to use when none is specified in the s3 url. Ignored when aws_s3_endpoint_url is set. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "aws_default_region", + "output": "aws default region refers to What region to use when none is specified in the s3 url. Ignored when aws_s3_endpoint_url is set. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "aws default region", + "output": "aws default region refers to What region to use when none is specified in the s3 url. Ignored when aws_s3_endpoint_url is set. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "aws default region refers to What region to use when none is specified in the s3 url. Ignored when aws_s3_endpoint_url is set. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting aws_default_region", + "output": "aws default region refers to What region to use when none is specified in the s3 url. Ignored when aws_s3_endpoint_url is set. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting aws_default_region", + "output": "aws default region refers to What region to use when none is specified in the s3 url. Ignored when aws_s3_endpoint_url is set. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "aws_s3_endpoint_url", + "output": "aws s3 endpoint url refers to Sets endpoint URL that will be used to access S3." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "aws_s3_endpoint_url", + "output": "aws s3 endpoint url refers to Sets endpoint URL that will be used to access S3." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "aws s3 endpoint url", + "output": "aws s3 endpoint url refers to Sets endpoint URL that will be used to access S3." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "aws s3 endpoint url refers to Sets endpoint URL that will be used to access S3." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting aws_s3_endpoint_url", + "output": "aws s3 endpoint url refers to Sets endpoint URL that will be used to access S3." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting aws_s3_endpoint_url", + "output": "aws s3 endpoint url refers to Sets endpoint URL that will be used to access S3." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "aws_use_ec2_role_credentials", + "output": "aws use ec2 role credentials refers to If set to true S3 Connector will try to to obtain credentials associated with the role attached to the EC2 instance." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "aws_use_ec2_role_credentials", + "output": "aws use ec2 role credentials refers to If set to true S3 Connector will try to to obtain credentials associated with the role attached to the EC2 instance." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "aws use ec2 role credentials", + "output": "aws use ec2 role credentials refers to If set to true S3 Connector will try to to obtain credentials associated with the role attached to the EC2 instance." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "aws use ec2 role credentials refers to If set to true S3 Connector will try to to obtain credentials associated with the role attached to the EC2 instance." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting aws_use_ec2_role_credentials", + "output": "aws use ec2 role credentials refers to If set to true S3 Connector will try to to obtain credentials associated with the role attached to the EC2 instance." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting aws_use_ec2_role_credentials", + "output": "aws use ec2 role credentials refers to If set to true S3 Connector will try to to obtain credentials associated with the role attached to the EC2 instance." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "s3_init_path", + "output": "s3 init path refers to Starting S3 path displayed in UI S3 browser" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "s3_init_path", + "output": "s3 init path refers to Starting S3 path displayed in UI S3 browser" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "s3 init path", + "output": "s3 init path refers to Starting S3 path displayed in UI S3 browser" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "s3 init path refers to Starting S3 path displayed in UI S3 browser" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting s3_init_path", + "output": "s3 init path refers to Starting S3 path displayed in UI S3 browser" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting s3_init_path", + "output": "s3 init path refers to Starting S3 path displayed in UI S3 browser" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "s3_skip_cert_verification", + "output": "s3 skip cert verification refers to S3 Connector will skip cert verification if this is set to true, (mostly used for S3-like connectors, e.g. Ceph)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "s3_skip_cert_verification", + "output": "s3 skip cert verification refers to S3 Connector will skip cert verification if this is set to true, (mostly used for S3-like connectors, e.g. Ceph)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "s3 skip cert verification", + "output": "s3 skip cert verification refers to S3 Connector will skip cert verification if this is set to true, (mostly used for S3-like connectors, e.g. Ceph)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "s3 skip cert verification refers to S3 Connector will skip cert verification if this is set to true, (mostly used for S3-like connectors, e.g. Ceph)" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting s3_skip_cert_verification", + "output": "s3 skip cert verification refers to S3 Connector will skip cert verification if this is set to true, (mostly used for S3-like connectors, e.g. Ceph)" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting s3_skip_cert_verification", + "output": "s3 skip cert verification refers to S3 Connector will skip cert verification if this is set to true, (mostly used for S3-like connectors, e.g. Ceph)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "s3_connector_cert_location", + "output": "s3 connector cert location refers to path/to/cert/bundle.pem - A filename of the CA cert bundle to use for the S3 connector" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "s3_connector_cert_location", + "output": "s3 connector cert location refers to path/to/cert/bundle.pem - A filename of the CA cert bundle to use for the S3 connector" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "s3 connector cert location", + "output": "s3 connector cert location refers to path/to/cert/bundle.pem - A filename of the CA cert bundle to use for the S3 connector" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "s3 connector cert location refers to path/to/cert/bundle.pem - A filename of the CA cert bundle to use for the S3 connector" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting s3_connector_cert_location", + "output": "s3 connector cert location refers to path/to/cert/bundle.pem - A filename of the CA cert bundle to use for the S3 connector" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting s3_connector_cert_location", + "output": "s3 connector cert location refers to path/to/cert/bundle.pem - A filename of the CA cert bundle to use for the S3 connector" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "gcs_path_to_service_account_json", + "output": "gcs path to service account json refers to GCS Connector credentials example (suggested) -- '/licenses/my_service_account_json.json' " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "gcs_path_to_service_account_json", + "output": "gcs path to service account json refers to GCS Connector credentials example (suggested) -- '/licenses/my_service_account_json.json' " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "gcs path to service account json", + "output": "gcs path to service account json refers to GCS Connector credentials example (suggested) -- '/licenses/my_service_account_json.json' " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "gcs path to service account json refers to GCS Connector credentials example (suggested) -- '/licenses/my_service_account_json.json' " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting gcs_path_to_service_account_json", + "output": "gcs path to service account json refers to GCS Connector credentials example (suggested) -- '/licenses/my_service_account_json.json' " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting gcs_path_to_service_account_json", + "output": "gcs path to service account json refers to GCS Connector credentials example (suggested) -- '/licenses/my_service_account_json.json' " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "gcs_init_path", + "output": "gcs init path refers to Starting GCS path displayed in UI GCS browser" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "gcs_init_path", + "output": "gcs init path refers to Starting GCS path displayed in UI GCS browser" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "gcs init path", + "output": "gcs init path refers to Starting GCS path displayed in UI GCS browser" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "gcs init path refers to Starting GCS path displayed in UI GCS browser" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting gcs_init_path", + "output": "gcs init path refers to Starting GCS path displayed in UI GCS browser" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting gcs_init_path", + "output": "gcs init path refers to Starting GCS path displayed in UI GCS browser" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "gcs_access_token_scopes", + "output": "gcs access token scopes refers to Space-seperated list of OAuth2 scopes for the access token used to authenticate in Google Cloud Storage" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "gcs_access_token_scopes", + "output": "gcs access token scopes refers to Space-seperated list of OAuth2 scopes for the access token used to authenticate in Google Cloud Storage" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "gcs access token scopes", + "output": "gcs access token scopes refers to Space-seperated list of OAuth2 scopes for the access token used to authenticate in Google Cloud Storage" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "gcs access token scopes refers to Space-seperated list of OAuth2 scopes for the access token used to authenticate in Google Cloud Storage" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting gcs_access_token_scopes", + "output": "gcs access token scopes refers to Space-seperated list of OAuth2 scopes for the access token used to authenticate in Google Cloud Storage" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting gcs_access_token_scopes", + "output": "gcs access token scopes refers to Space-seperated list of OAuth2 scopes for the access token used to authenticate in Google Cloud Storage" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "gcs_default_project_id", + "output": "gcs default project id refers to When ``google_cloud_use_oauth`` is enabled, Google Cloud client cannot automatically infer the default project, thus it must be explicitly specified" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "gcs_default_project_id", + "output": "gcs default project id refers to When ``google_cloud_use_oauth`` is enabled, Google Cloud client cannot automatically infer the default project, thus it must be explicitly specified" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "gcs default project id", + "output": "gcs default project id refers to When ``google_cloud_use_oauth`` is enabled, Google Cloud client cannot automatically infer the default project, thus it must be explicitly specified" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "gcs default project id refers to When ``google_cloud_use_oauth`` is enabled, Google Cloud client cannot automatically infer the default project, thus it must be explicitly specified" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting gcs_default_project_id", + "output": "gcs default project id refers to When ``google_cloud_use_oauth`` is enabled, Google Cloud client cannot automatically infer the default project, thus it must be explicitly specified" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting gcs_default_project_id", + "output": "gcs default project id refers to When ``google_cloud_use_oauth`` is enabled, Google Cloud client cannot automatically infer the default project, thus it must be explicitly specified" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "gbq_access_token_scopes", + "output": "gbq access token scopes refers to Space-seperated list of OAuth2 scopes for the access token used to authenticate in Google BigQuery" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "gbq_access_token_scopes", + "output": "gbq access token scopes refers to Space-seperated list of OAuth2 scopes for the access token used to authenticate in Google BigQuery" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "gbq access token scopes", + "output": "gbq access token scopes refers to Space-seperated list of OAuth2 scopes for the access token used to authenticate in Google BigQuery" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "gbq access token scopes refers to Space-seperated list of OAuth2 scopes for the access token used to authenticate in Google BigQuery" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting gbq_access_token_scopes", + "output": "gbq access token scopes refers to Space-seperated list of OAuth2 scopes for the access token used to authenticate in Google BigQuery" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting gbq_access_token_scopes", + "output": "gbq access token scopes refers to Space-seperated list of OAuth2 scopes for the access token used to authenticate in Google BigQuery" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "google_cloud_use_oauth", + "output": "google cloud use oauth refers to By default the DriverlessAI Google Cloud Storage and BigQuery connectors are using service account file to retrieve authentication credentials.When enabled, the Storage and BigQuery connectors will use OAuth2 user access tokens to authenticate in Google Cloud instead." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "google_cloud_use_oauth", + "output": "google cloud use oauth refers to By default the DriverlessAI Google Cloud Storage and BigQuery connectors are using service account file to retrieve authentication credentials.When enabled, the Storage and BigQuery connectors will use OAuth2 user access tokens to authenticate in Google Cloud instead." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "google cloud use oauth", + "output": "google cloud use oauth refers to By default the DriverlessAI Google Cloud Storage and BigQuery connectors are using service account file to retrieve authentication credentials.When enabled, the Storage and BigQuery connectors will use OAuth2 user access tokens to authenticate in Google Cloud instead." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "google cloud use oauth refers to By default the DriverlessAI Google Cloud Storage and BigQuery connectors are using service account file to retrieve authentication credentials.When enabled, the Storage and BigQuery connectors will use OAuth2 user access tokens to authenticate in Google Cloud instead." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting google_cloud_use_oauth", + "output": "google cloud use oauth refers to By default the DriverlessAI Google Cloud Storage and BigQuery connectors are using service account file to retrieve authentication credentials.When enabled, the Storage and BigQuery connectors will use OAuth2 user access tokens to authenticate in Google Cloud instead." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting google_cloud_use_oauth", + "output": "google cloud use oauth refers to By default the DriverlessAI Google Cloud Storage and BigQuery connectors are using service account file to retrieve authentication credentials.When enabled, the Storage and BigQuery connectors will use OAuth2 user access tokens to authenticate in Google Cloud instead." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "minio_endpoint_url", + "output": "minio endpoint url refers to Minio Connector credentials" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "minio_endpoint_url", + "output": "minio endpoint url refers to Minio Connector credentials" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "minio endpoint url", + "output": "minio endpoint url refers to Minio Connector credentials" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "minio endpoint url refers to Minio Connector credentials" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting minio_endpoint_url", + "output": "minio endpoint url refers to Minio Connector credentials" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting minio_endpoint_url", + "output": "minio endpoint url refers to Minio Connector credentials" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "minio_access_key_id", + "output": "minio access key id refers to Minio Connector credentials" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "minio_access_key_id", + "output": "minio access key id refers to Minio Access Key ID: Minio Connector credentials" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "minio access key id", + "output": "minio access key id refers to Minio Access Key ID: Minio Connector credentials" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Minio Access Key ID: ", + "output": "minio access key id refers to Minio Access Key ID: Minio Connector credentials" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting minio_access_key_id", + "output": "minio access key id refers to Minio Connector credentials" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting minio_access_key_id", + "output": "minio access key id refers to Minio Access Key ID: Minio Connector credentials" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "minio_secret_access_key", + "output": "minio secret access key refers to Minio Connector credentials" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "minio_secret_access_key", + "output": "minio secret access key refers to Minio Secret Access Key: Minio Connector credentials" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "minio secret access key", + "output": "minio secret access key refers to Minio Secret Access Key: Minio Connector credentials" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Minio Secret Access Key: ", + "output": "minio secret access key refers to Minio Secret Access Key: Minio Connector credentials" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting minio_secret_access_key", + "output": "minio secret access key refers to Minio Connector credentials" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting minio_secret_access_key", + "output": "minio secret access key refers to Minio Secret Access Key: Minio Connector credentials" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "minio_skip_cert_verification", + "output": "minio skip cert verification refers to Minio Connector will skip cert verification if this is set to true" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "minio_skip_cert_verification", + "output": "minio skip cert verification refers to Minio Connector will skip cert verification if this is set to true" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "minio skip cert verification", + "output": "minio skip cert verification refers to Minio Connector will skip cert verification if this is set to true" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "minio skip cert verification refers to Minio Connector will skip cert verification if this is set to true" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting minio_skip_cert_verification", + "output": "minio skip cert verification refers to Minio Connector will skip cert verification if this is set to true" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting minio_skip_cert_verification", + "output": "minio skip cert verification refers to Minio Connector will skip cert verification if this is set to true" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "minio_connector_cert_location", + "output": "minio connector cert location refers to path/to/cert/bundle.pem - A filename of the CA cert bundle to use for the Minio connector" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "minio_connector_cert_location", + "output": "minio connector cert location refers to path/to/cert/bundle.pem - A filename of the CA cert bundle to use for the Minio connector" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "minio connector cert location", + "output": "minio connector cert location refers to path/to/cert/bundle.pem - A filename of the CA cert bundle to use for the Minio connector" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "minio connector cert location refers to path/to/cert/bundle.pem - A filename of the CA cert bundle to use for the Minio connector" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting minio_connector_cert_location", + "output": "minio connector cert location refers to path/to/cert/bundle.pem - A filename of the CA cert bundle to use for the Minio connector" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting minio_connector_cert_location", + "output": "minio connector cert location refers to path/to/cert/bundle.pem - A filename of the CA cert bundle to use for the Minio connector" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "minio_init_path", + "output": "minio init path refers to Starting Minio path displayed in UI Minio browser" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "minio_init_path", + "output": "minio init path refers to Starting Minio path displayed in UI Minio browser" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "minio init path", + "output": "minio init path refers to Starting Minio path displayed in UI Minio browser" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "minio init path refers to Starting Minio path displayed in UI Minio browser" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting minio_init_path", + "output": "minio init path refers to Starting Minio path displayed in UI Minio browser" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting minio_init_path", + "output": "minio init path refers to Starting Minio path displayed in UI Minio browser" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "h2o_drive_endpoint_url", + "output": "h2o drive endpoint url refers to H2O Drive server endpoint URL" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "h2o_drive_endpoint_url", + "output": "h2o drive endpoint url refers to H2O Drive server endpoint URL" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "h2o drive endpoint url", + "output": "h2o drive endpoint url refers to H2O Drive server endpoint URL" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "h2o drive endpoint url refers to H2O Drive server endpoint URL" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting h2o_drive_endpoint_url", + "output": "h2o drive endpoint url refers to H2O Drive server endpoint URL" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting h2o_drive_endpoint_url", + "output": "h2o drive endpoint url refers to H2O Drive server endpoint URL" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "h2o_drive_access_token_scopes", + "output": "h2o drive access token scopes refers to Space seperated list of OpenID scopes for the access token used by the H2O Drive connector" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "h2o_drive_access_token_scopes", + "output": "h2o drive access token scopes refers to Space seperated list of OpenID scopes for the access token used by the H2O Drive connector" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "h2o drive access token scopes", + "output": "h2o drive access token scopes refers to Space seperated list of OpenID scopes for the access token used by the H2O Drive connector" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "h2o drive access token scopes refers to Space seperated list of OpenID scopes for the access token used by the H2O Drive connector" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting h2o_drive_access_token_scopes", + "output": "h2o drive access token scopes refers to Space seperated list of OpenID scopes for the access token used by the H2O Drive connector" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting h2o_drive_access_token_scopes", + "output": "h2o drive access token scopes refers to Space seperated list of OpenID scopes for the access token used by the H2O Drive connector" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "h2o_drive_session_duration", + "output": "h2o drive session duration refers to Maximum duration (in seconds) for a session with the H2O Drive" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "h2o_drive_session_duration", + "output": "h2o drive session duration refers to Maximum duration (in seconds) for a session with the H2O Drive" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "h2o drive session duration", + "output": "h2o drive session duration refers to Maximum duration (in seconds) for a session with the H2O Drive" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "h2o drive session duration refers to Maximum duration (in seconds) for a session with the H2O Drive" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting h2o_drive_session_duration", + "output": "h2o drive session duration refers to Maximum duration (in seconds) for a session with the H2O Drive" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting h2o_drive_session_duration", + "output": "h2o drive session duration refers to Maximum duration (in seconds) for a session with the H2O Drive" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "snowflake_url", + "output": "snowflake url refers to Recommended Provide: url, user, password Optionally Provide: account, user, password Example URL: https://..snowflakecomputing.com Snowflake Connector credentials" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "snowflake_url", + "output": "snowflake url refers to Recommended Provide: url, user, password Optionally Provide: account, user, password Example URL: https://..snowflakecomputing.com Snowflake Connector credentials" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "snowflake url", + "output": "snowflake url refers to Recommended Provide: url, user, password Optionally Provide: account, user, password Example URL: https://..snowflakecomputing.com Snowflake Connector credentials" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "snowflake url refers to Recommended Provide: url, user, password Optionally Provide: account, user, password Example URL: https://..snowflakecomputing.com Snowflake Connector credentials" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting snowflake_url", + "output": "snowflake url refers to Recommended Provide: url, user, password Optionally Provide: account, user, password Example URL: https://..snowflakecomputing.com Snowflake Connector credentials" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting snowflake_url", + "output": "snowflake url refers to Recommended Provide: url, user, password Optionally Provide: account, user, password Example URL: https://..snowflakecomputing.com Snowflake Connector credentials" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "snowflake_user", + "output": "snowflake user refers to Snowflake Connector credentials" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "snowflake_user", + "output": "snowflake user refers to Snowflake Connector credentials" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "snowflake user", + "output": "snowflake user refers to Snowflake Connector credentials" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "snowflake user refers to Snowflake Connector credentials" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting snowflake_user", + "output": "snowflake user refers to Snowflake Connector credentials" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting snowflake_user", + "output": "snowflake user refers to Snowflake Connector credentials" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "snowflake_password", + "output": "snowflake password refers to Snowflake Connector credentials" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "snowflake_password", + "output": "snowflake password refers to Snowflake Connector credentials" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "snowflake password", + "output": "snowflake password refers to Snowflake Connector credentials" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "snowflake password refers to Snowflake Connector credentials" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting snowflake_password", + "output": "snowflake password refers to Snowflake Connector credentials" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting snowflake_password", + "output": "snowflake password refers to Snowflake Connector credentials" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "snowflake_account", + "output": "snowflake account refers to Snowflake Connector credentials" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "snowflake_account", + "output": "snowflake account refers to Snowflake Connector credentials" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "snowflake account", + "output": "snowflake account refers to Snowflake Connector credentials" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "snowflake account refers to Snowflake Connector credentials" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting snowflake_account", + "output": "snowflake account refers to Snowflake Connector credentials" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting snowflake_account", + "output": "snowflake account refers to Snowflake Connector credentials" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "snowflake_allow_stages", + "output": "snowflake allow stages refers to Setting to allow or disallow Snowflake connector from using Snowflake stages during queries. True - will permit the connector to use stages and generally improves performance. However, if the Snowflake user does not have permission to create/use stages will end in errors. False - will prevent the connector from using stages, thus Snowflake users without permission to create/use stages will have successful queries, however may significantly negatively impact query performance. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "snowflake_allow_stages", + "output": "snowflake allow stages refers to Setting to allow or disallow Snowflake connector from using Snowflake stages during queries. True - will permit the connector to use stages and generally improves performance. However, if the Snowflake user does not have permission to create/use stages will end in errors. False - will prevent the connector from using stages, thus Snowflake users without permission to create/use stages will have successful queries, however may significantly negatively impact query performance. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "snowflake allow stages", + "output": "snowflake allow stages refers to Setting to allow or disallow Snowflake connector from using Snowflake stages during queries. True - will permit the connector to use stages and generally improves performance. However, if the Snowflake user does not have permission to create/use stages will end in errors. False - will prevent the connector from using stages, thus Snowflake users without permission to create/use stages will have successful queries, however may significantly negatively impact query performance. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "snowflake allow stages refers to Setting to allow or disallow Snowflake connector from using Snowflake stages during queries. True - will permit the connector to use stages and generally improves performance. However, if the Snowflake user does not have permission to create/use stages will end in errors. False - will prevent the connector from using stages, thus Snowflake users without permission to create/use stages will have successful queries, however may significantly negatively impact query performance. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting snowflake_allow_stages", + "output": "snowflake allow stages refers to Setting to allow or disallow Snowflake connector from using Snowflake stages during queries. True - will permit the connector to use stages and generally improves performance. However, if the Snowflake user does not have permission to create/use stages will end in errors. False - will prevent the connector from using stages, thus Snowflake users without permission to create/use stages will have successful queries, however may significantly negatively impact query performance. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting snowflake_allow_stages", + "output": "snowflake allow stages refers to Setting to allow or disallow Snowflake connector from using Snowflake stages during queries. True - will permit the connector to use stages and generally improves performance. However, if the Snowflake user does not have permission to create/use stages will end in errors. False - will prevent the connector from using stages, thus Snowflake users without permission to create/use stages will have successful queries, however may significantly negatively impact query performance. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "snowflake_batch_size", + "output": "snowflake batch size refers to Sets the number of rows to be fetched by Snowflake cursor at one time. This is only used if setting `snowflake_allow_stages` is set to False, may help with performance depending on the type and size of data being queried. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "snowflake_batch_size", + "output": "snowflake batch size refers to Sets the number of rows to be fetched by Snowflake cursor at one time. This is only used if setting `snowflake_allow_stages` is set to False, may help with performance depending on the type and size of data being queried. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "snowflake batch size", + "output": "snowflake batch size refers to Sets the number of rows to be fetched by Snowflake cursor at one time. This is only used if setting `snowflake_allow_stages` is set to False, may help with performance depending on the type and size of data being queried. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "snowflake batch size refers to Sets the number of rows to be fetched by Snowflake cursor at one time. This is only used if setting `snowflake_allow_stages` is set to False, may help with performance depending on the type and size of data being queried. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting snowflake_batch_size", + "output": "snowflake batch size refers to Sets the number of rows to be fetched by Snowflake cursor at one time. This is only used if setting `snowflake_allow_stages` is set to False, may help with performance depending on the type and size of data being queried. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting snowflake_batch_size", + "output": "snowflake batch size refers to Sets the number of rows to be fetched by Snowflake cursor at one time. This is only used if setting `snowflake_allow_stages` is set to False, may help with performance depending on the type and size of data being queried. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "kdb_user", + "output": "kdb user refers to KDB Connector credentials" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "kdb_user", + "output": "kdb user refers to KDB Connector credentials" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "kdb user", + "output": "kdb user refers to KDB Connector credentials" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "kdb user refers to KDB Connector credentials" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting kdb_user", + "output": "kdb user refers to KDB Connector credentials" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting kdb_user", + "output": "kdb user refers to KDB Connector credentials" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "kdb_password", + "output": "kdb password refers to KDB Connector credentials" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "kdb_password", + "output": "kdb password refers to KDB Connector credentials" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "kdb password", + "output": "kdb password refers to KDB Connector credentials" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "kdb password refers to KDB Connector credentials" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting kdb_password", + "output": "kdb password refers to KDB Connector credentials" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting kdb_password", + "output": "kdb password refers to KDB Connector credentials" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "kdb_hostname", + "output": "kdb hostname refers to KDB Connector credentials" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "kdb_hostname", + "output": "kdb hostname refers to KDB Connector credentials" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "kdb hostname", + "output": "kdb hostname refers to KDB Connector credentials" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "kdb hostname refers to KDB Connector credentials" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting kdb_hostname", + "output": "kdb hostname refers to KDB Connector credentials" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting kdb_hostname", + "output": "kdb hostname refers to KDB Connector credentials" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "kdb_port", + "output": "kdb port refers to KDB Connector credentials" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "kdb_port", + "output": "kdb port refers to KDB Connector credentials" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "kdb port", + "output": "kdb port refers to KDB Connector credentials" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "kdb port refers to KDB Connector credentials" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting kdb_port", + "output": "kdb port refers to KDB Connector credentials" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting kdb_port", + "output": "kdb port refers to KDB Connector credentials" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "kdb_app_classpath", + "output": "kdb app classpath refers to KDB Connector credentials" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "kdb_app_classpath", + "output": "kdb app classpath refers to KDB Connector credentials" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "kdb app classpath", + "output": "kdb app classpath refers to KDB Connector credentials" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "kdb app classpath refers to KDB Connector credentials" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting kdb_app_classpath", + "output": "kdb app classpath refers to KDB Connector credentials" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting kdb_app_classpath", + "output": "kdb app classpath refers to KDB Connector credentials" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "kdb_app_jvm_args", + "output": "kdb app jvm args refers to KDB Connector credentials" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "kdb_app_jvm_args", + "output": "kdb app jvm args refers to KDB Connector credentials" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "kdb app jvm args", + "output": "kdb app jvm args refers to KDB Connector credentials" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "kdb app jvm args refers to KDB Connector credentials" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting kdb_app_jvm_args", + "output": "kdb app jvm args refers to KDB Connector credentials" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting kdb_app_jvm_args", + "output": "kdb app jvm args refers to KDB Connector credentials" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "azure_blob_account_name", + "output": "azure blob account name refers to Account name for Azure Blob Store Connector" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "azure_blob_account_name", + "output": "azure blob account name refers to Azure Blob Store Account Name: Account name for Azure Blob Store Connector" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "azure blob account name", + "output": "azure blob account name refers to Azure Blob Store Account Name: Account name for Azure Blob Store Connector" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Azure Blob Store Account Name: ", + "output": "azure blob account name refers to Azure Blob Store Account Name: Account name for Azure Blob Store Connector" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting azure_blob_account_name", + "output": "azure blob account name refers to Account name for Azure Blob Store Connector" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting azure_blob_account_name", + "output": "azure blob account name refers to Azure Blob Store Account Name: Account name for Azure Blob Store Connector" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "azure_blob_account_key", + "output": "azure blob account key refers to Account key for Azure Blob Store Connector" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "azure_blob_account_key", + "output": "azure blob account key refers to Azure Blob Store Account Key: Account key for Azure Blob Store Connector" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "azure blob account key", + "output": "azure blob account key refers to Azure Blob Store Account Key: Account key for Azure Blob Store Connector" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Azure Blob Store Account Key: ", + "output": "azure blob account key refers to Azure Blob Store Account Key: Account key for Azure Blob Store Connector" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting azure_blob_account_key", + "output": "azure blob account key refers to Account key for Azure Blob Store Connector" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting azure_blob_account_key", + "output": "azure blob account key refers to Azure Blob Store Account Key: Account key for Azure Blob Store Connector" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "azure_connection_string", + "output": "azure connection string refers to Connection string for Azure Blob Store Connector" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "azure_connection_string", + "output": "azure connection string refers to Azure Blob Store Connection String: Connection string for Azure Blob Store Connector" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "azure connection string", + "output": "azure connection string refers to Azure Blob Store Connection String: Connection string for Azure Blob Store Connector" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Azure Blob Store Connection String: ", + "output": "azure connection string refers to Azure Blob Store Connection String: Connection string for Azure Blob Store Connector" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting azure_connection_string", + "output": "azure connection string refers to Connection string for Azure Blob Store Connector" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting azure_connection_string", + "output": "azure connection string refers to Azure Blob Store Connection String: Connection string for Azure Blob Store Connector" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "azure_sas_token", + "output": "azure sas token refers to SAS token for Azure Blob Store Connector" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "azure_sas_token", + "output": "azure sas token refers to Azure Blob Store SAS token: SAS token for Azure Blob Store Connector" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "azure sas token", + "output": "azure sas token refers to Azure Blob Store SAS token: SAS token for Azure Blob Store Connector" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Azure Blob Store SAS token: ", + "output": "azure sas token refers to Azure Blob Store SAS token: SAS token for Azure Blob Store Connector" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting azure_sas_token", + "output": "azure sas token refers to SAS token for Azure Blob Store Connector" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting azure_sas_token", + "output": "azure sas token refers to Azure Blob Store SAS token: SAS token for Azure Blob Store Connector" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "azure_blob_init_path", + "output": "azure blob init path refers to Starting Azure blob store path displayed in UI Azure blob store browser" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "azure_blob_init_path", + "output": "azure blob init path refers to Starting Azure blob store path displayed in UI Azure blob store browser" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "azure blob init path", + "output": "azure blob init path refers to Starting Azure blob store path displayed in UI Azure blob store browser" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "azure blob init path refers to Starting Azure blob store path displayed in UI Azure blob store browser" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting azure_blob_init_path", + "output": "azure blob init path refers to Starting Azure blob store path displayed in UI Azure blob store browser" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting azure_blob_init_path", + "output": "azure blob init path refers to Starting Azure blob store path displayed in UI Azure blob store browser" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "azure_blob_use_access_token", + "output": "azure blob use access token refers to When enabled, Azure Blob Store Connector will use access token derived from the credentials received on login with OpenID Connect." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "azure_blob_use_access_token", + "output": "azure blob use access token refers to When enabled, Azure Blob Store Connector will use access token derived from the credentials received on login with OpenID Connect." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "azure blob use access token", + "output": "azure blob use access token refers to When enabled, Azure Blob Store Connector will use access token derived from the credentials received on login with OpenID Connect." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "azure blob use access token refers to When enabled, Azure Blob Store Connector will use access token derived from the credentials received on login with OpenID Connect." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting azure_blob_use_access_token", + "output": "azure blob use access token refers to When enabled, Azure Blob Store Connector will use access token derived from the credentials received on login with OpenID Connect." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting azure_blob_use_access_token", + "output": "azure blob use access token refers to When enabled, Azure Blob Store Connector will use access token derived from the credentials received on login with OpenID Connect." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "azure_blob_use_access_token_scopes", + "output": "azure blob use access token scopes refers to Configures the scopes for the access token used by Azure Blob Store Connector when the azure_blob_use_access_token us enabled. (space separated list)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "azure_blob_use_access_token_scopes", + "output": "azure blob use access token scopes refers to Configures the scopes for the access token used by Azure Blob Store Connector when the azure_blob_use_access_token us enabled. (space separated list)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "azure blob use access token scopes", + "output": "azure blob use access token scopes refers to Configures the scopes for the access token used by Azure Blob Store Connector when the azure_blob_use_access_token us enabled. (space separated list)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "azure blob use access token scopes refers to Configures the scopes for the access token used by Azure Blob Store Connector when the azure_blob_use_access_token us enabled. (space separated list)" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting azure_blob_use_access_token_scopes", + "output": "azure blob use access token scopes refers to Configures the scopes for the access token used by Azure Blob Store Connector when the azure_blob_use_access_token us enabled. (space separated list)" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting azure_blob_use_access_token_scopes", + "output": "azure blob use access token scopes refers to Configures the scopes for the access token used by Azure Blob Store Connector when the azure_blob_use_access_token us enabled. (space separated list)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "azure_blob_use_access_token_source", + "output": "azure blob use access token source refers to Sets the source of the access token for accessing the Azure bob store KEYCLOAK: Will exchange the session access token for the federated refresh token with Keycloak and use it to obtain the access token directly with the Azure AD. SESSION: Will use the access token derived from the credentials received on login with OpenID Connect. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "azure_blob_use_access_token_source", + "output": "azure blob use access token source refers to Sets the source of the access token for accessing the Azure bob store KEYCLOAK: Will exchange the session access token for the federated refresh token with Keycloak and use it to obtain the access token directly with the Azure AD. SESSION: Will use the access token derived from the credentials received on login with OpenID Connect. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "azure blob use access token source", + "output": "azure blob use access token source refers to Sets the source of the access token for accessing the Azure bob store KEYCLOAK: Will exchange the session access token for the federated refresh token with Keycloak and use it to obtain the access token directly with the Azure AD. SESSION: Will use the access token derived from the credentials received on login with OpenID Connect. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "azure blob use access token source refers to Sets the source of the access token for accessing the Azure bob store KEYCLOAK: Will exchange the session access token for the federated refresh token with Keycloak and use it to obtain the access token directly with the Azure AD. SESSION: Will use the access token derived from the credentials received on login with OpenID Connect. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting azure_blob_use_access_token_source", + "output": "azure blob use access token source refers to Sets the source of the access token for accessing the Azure bob store KEYCLOAK: Will exchange the session access token for the federated refresh token with Keycloak and use it to obtain the access token directly with the Azure AD. SESSION: Will use the access token derived from the credentials received on login with OpenID Connect. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting azure_blob_use_access_token_source", + "output": "azure blob use access token source refers to Sets the source of the access token for accessing the Azure bob store KEYCLOAK: Will exchange the session access token for the federated refresh token with Keycloak and use it to obtain the access token directly with the Azure AD. SESSION: Will use the access token derived from the credentials received on login with OpenID Connect. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "azure_blob_keycloak_aad_client_id", + "output": "azure blob keycloak aad client id refers to Application (client) ID registered on Azure AD when the KEYCLOAK source is enabled." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "azure_blob_keycloak_aad_client_id", + "output": "azure blob keycloak aad client id refers to Application (client) ID registered on Azure AD when the KEYCLOAK source is enabled." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "azure blob keycloak aad client id", + "output": "azure blob keycloak aad client id refers to Application (client) ID registered on Azure AD when the KEYCLOAK source is enabled." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "azure blob keycloak aad client id refers to Application (client) ID registered on Azure AD when the KEYCLOAK source is enabled." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting azure_blob_keycloak_aad_client_id", + "output": "azure blob keycloak aad client id refers to Application (client) ID registered on Azure AD when the KEYCLOAK source is enabled." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting azure_blob_keycloak_aad_client_id", + "output": "azure blob keycloak aad client id refers to Application (client) ID registered on Azure AD when the KEYCLOAK source is enabled." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "azure_blob_keycloak_aad_client_secret", + "output": "azure blob keycloak aad client secret refers to Application (client) secret when the KEYCLOAK source is enabled." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "azure_blob_keycloak_aad_client_secret", + "output": "azure blob keycloak aad client secret refers to Application (client) secret when the KEYCLOAK source is enabled." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "azure blob keycloak aad client secret", + "output": "azure blob keycloak aad client secret refers to Application (client) secret when the KEYCLOAK source is enabled." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "azure blob keycloak aad client secret refers to Application (client) secret when the KEYCLOAK source is enabled." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting azure_blob_keycloak_aad_client_secret", + "output": "azure blob keycloak aad client secret refers to Application (client) secret when the KEYCLOAK source is enabled." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting azure_blob_keycloak_aad_client_secret", + "output": "azure blob keycloak aad client secret refers to Application (client) secret when the KEYCLOAK source is enabled." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "azure_blob_keycloak_aad_auth_uri", + "output": "azure blob keycloak aad auth uri refers to A URL that identifies a token authority. It should be of the format https://login.microsoftonline.com/your_tenant " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "azure_blob_keycloak_aad_auth_uri", + "output": "azure blob keycloak aad auth uri refers to A URL that identifies a token authority. It should be of the format https://login.microsoftonline.com/your_tenant " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "azure blob keycloak aad auth uri", + "output": "azure blob keycloak aad auth uri refers to A URL that identifies a token authority. It should be of the format https://login.microsoftonline.com/your_tenant " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "azure blob keycloak aad auth uri refers to A URL that identifies a token authority. It should be of the format https://login.microsoftonline.com/your_tenant " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting azure_blob_keycloak_aad_auth_uri", + "output": "azure blob keycloak aad auth uri refers to A URL that identifies a token authority. It should be of the format https://login.microsoftonline.com/your_tenant " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting azure_blob_keycloak_aad_auth_uri", + "output": "azure blob keycloak aad auth uri refers to A URL that identifies a token authority. It should be of the format https://login.microsoftonline.com/your_tenant " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "azure_blob_keycloak_broker_token_endpoint", + "output": "azure blob keycloak broker token endpoint refers to Keycloak Endpoint for Retrieving External IDP Tokens (https://www.keycloak.org/docs/latest/server_admin/#retrieving-external-idp-tokens)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "azure_blob_keycloak_broker_token_endpoint", + "output": "azure blob keycloak broker token endpoint refers to Keycloak Endpoint for Retrieving External IDP Tokens (https://www.keycloak.org/docs/latest/server_admin/#retrieving-external-idp-tokens)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "azure blob keycloak broker token endpoint", + "output": "azure blob keycloak broker token endpoint refers to Keycloak Endpoint for Retrieving External IDP Tokens (https://www.keycloak.org/docs/latest/server_admin/#retrieving-external-idp-tokens)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "azure blob keycloak broker token endpoint refers to Keycloak Endpoint for Retrieving External IDP Tokens (https://www.keycloak.org/docs/latest/server_admin/#retrieving-external-idp-tokens)" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting azure_blob_keycloak_broker_token_endpoint", + "output": "azure blob keycloak broker token endpoint refers to Keycloak Endpoint for Retrieving External IDP Tokens (https://www.keycloak.org/docs/latest/server_admin/#retrieving-external-idp-tokens)" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting azure_blob_keycloak_broker_token_endpoint", + "output": "azure blob keycloak broker token endpoint refers to Keycloak Endpoint for Retrieving External IDP Tokens (https://www.keycloak.org/docs/latest/server_admin/#retrieving-external-idp-tokens)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "azure_enable_token_auth_aad", + "output": "azure enable token auth aad refers to (DEPRECATED, use azure_blob_use_access_token and azure_blob_use_access_token_source=\"KEYCLOAK\" instead.) (When enabled only DEPRECATED options azure_ad_client_id, azure_ad_client_secret, azure_ad_auth_uri and azure_keycloak_idp_token_endpoint will be effective) This is equivalent to setting azure_blob_use_access_token_source = \"KEYCLOAK\" and setting azure_blob_keycloak_aad_client_id, azure_blob_keycloak_aad_client_secret, azure_blob_keycloak_aad_auth_uri and azure_blob_keycloak_broker_token_endpoint options. ) If true, enable the Azure Blob Storage Connector to use Azure AD tokens obtained from the Keycloak for auth. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "azure_enable_token_auth_aad", + "output": "azure enable token auth aad refers to (DEPRECATED, use azure_blob_use_access_token and azure_blob_use_access_token_source=\"KEYCLOAK\" instead.) (When enabled only DEPRECATED options azure_ad_client_id, azure_ad_client_secret, azure_ad_auth_uri and azure_keycloak_idp_token_endpoint will be effective) This is equivalent to setting azure_blob_use_access_token_source = \"KEYCLOAK\" and setting azure_blob_keycloak_aad_client_id, azure_blob_keycloak_aad_client_secret, azure_blob_keycloak_aad_auth_uri and azure_blob_keycloak_broker_token_endpoint options. ) If true, enable the Azure Blob Storage Connector to use Azure AD tokens obtained from the Keycloak for auth. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "azure enable token auth aad", + "output": "azure enable token auth aad refers to (DEPRECATED, use azure_blob_use_access_token and azure_blob_use_access_token_source=\"KEYCLOAK\" instead.) (When enabled only DEPRECATED options azure_ad_client_id, azure_ad_client_secret, azure_ad_auth_uri and azure_keycloak_idp_token_endpoint will be effective) This is equivalent to setting azure_blob_use_access_token_source = \"KEYCLOAK\" and setting azure_blob_keycloak_aad_client_id, azure_blob_keycloak_aad_client_secret, azure_blob_keycloak_aad_auth_uri and azure_blob_keycloak_broker_token_endpoint options. ) If true, enable the Azure Blob Storage Connector to use Azure AD tokens obtained from the Keycloak for auth. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "azure enable token auth aad refers to (DEPRECATED, use azure_blob_use_access_token and azure_blob_use_access_token_source=\"KEYCLOAK\" instead.) (When enabled only DEPRECATED options azure_ad_client_id, azure_ad_client_secret, azure_ad_auth_uri and azure_keycloak_idp_token_endpoint will be effective) This is equivalent to setting azure_blob_use_access_token_source = \"KEYCLOAK\" and setting azure_blob_keycloak_aad_client_id, azure_blob_keycloak_aad_client_secret, azure_blob_keycloak_aad_auth_uri and azure_blob_keycloak_broker_token_endpoint options. ) If true, enable the Azure Blob Storage Connector to use Azure AD tokens obtained from the Keycloak for auth. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting azure_enable_token_auth_aad", + "output": "azure enable token auth aad refers to (DEPRECATED, use azure_blob_use_access_token and azure_blob_use_access_token_source=\"KEYCLOAK\" instead.) (When enabled only DEPRECATED options azure_ad_client_id, azure_ad_client_secret, azure_ad_auth_uri and azure_keycloak_idp_token_endpoint will be effective) This is equivalent to setting azure_blob_use_access_token_source = \"KEYCLOAK\" and setting azure_blob_keycloak_aad_client_id, azure_blob_keycloak_aad_client_secret, azure_blob_keycloak_aad_auth_uri and azure_blob_keycloak_broker_token_endpoint options. ) If true, enable the Azure Blob Storage Connector to use Azure AD tokens obtained from the Keycloak for auth. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting azure_enable_token_auth_aad", + "output": "azure enable token auth aad refers to (DEPRECATED, use azure_blob_use_access_token and azure_blob_use_access_token_source=\"KEYCLOAK\" instead.) (When enabled only DEPRECATED options azure_ad_client_id, azure_ad_client_secret, azure_ad_auth_uri and azure_keycloak_idp_token_endpoint will be effective) This is equivalent to setting azure_blob_use_access_token_source = \"KEYCLOAK\" and setting azure_blob_keycloak_aad_client_id, azure_blob_keycloak_aad_client_secret, azure_blob_keycloak_aad_auth_uri and azure_blob_keycloak_broker_token_endpoint options. ) If true, enable the Azure Blob Storage Connector to use Azure AD tokens obtained from the Keycloak for auth. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "azure_ad_client_id", + "output": "azure ad client id refers to (DEPRECATED, use azure_blob_keycloak_aad_client_id instead.) Application (client) ID registered on Azure AD" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "azure_ad_client_id", + "output": "azure ad client id refers to (DEPRECATED, use azure_blob_keycloak_aad_client_id instead.) Application (client) ID registered on Azure AD" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "azure ad client id", + "output": "azure ad client id refers to (DEPRECATED, use azure_blob_keycloak_aad_client_id instead.) Application (client) ID registered on Azure AD" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "azure ad client id refers to (DEPRECATED, use azure_blob_keycloak_aad_client_id instead.) Application (client) ID registered on Azure AD" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting azure_ad_client_id", + "output": "azure ad client id refers to (DEPRECATED, use azure_blob_keycloak_aad_client_id instead.) Application (client) ID registered on Azure AD" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting azure_ad_client_id", + "output": "azure ad client id refers to (DEPRECATED, use azure_blob_keycloak_aad_client_id instead.) Application (client) ID registered on Azure AD" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "azure_ad_client_secret", + "output": "azure ad client secret refers to (DEPRECATED, use azure_blob_keycloak_aad_client_secret instead.) Application Client Secret" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "azure_ad_client_secret", + "output": "azure ad client secret refers to (DEPRECATED, use azure_blob_keycloak_aad_client_secret instead.) Application Client Secret" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "azure ad client secret", + "output": "azure ad client secret refers to (DEPRECATED, use azure_blob_keycloak_aad_client_secret instead.) Application Client Secret" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "azure ad client secret refers to (DEPRECATED, use azure_blob_keycloak_aad_client_secret instead.) Application Client Secret" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting azure_ad_client_secret", + "output": "azure ad client secret refers to (DEPRECATED, use azure_blob_keycloak_aad_client_secret instead.) Application Client Secret" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting azure_ad_client_secret", + "output": "azure ad client secret refers to (DEPRECATED, use azure_blob_keycloak_aad_client_secret instead.) Application Client Secret" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "azure_ad_auth_uri", + "output": "azure ad auth uri refers to (DEPRECATED, use azure_blob_keycloak_aad_auth_uri instead)A URL that identifies a token authority. It should be of the format https://login.microsoftonline.com/your_tenant " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "azure_ad_auth_uri", + "output": "azure ad auth uri refers to (DEPRECATED, use azure_blob_keycloak_aad_auth_uri instead)A URL that identifies a token authority. It should be of the format https://login.microsoftonline.com/your_tenant " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "azure ad auth uri", + "output": "azure ad auth uri refers to (DEPRECATED, use azure_blob_keycloak_aad_auth_uri instead)A URL that identifies a token authority. It should be of the format https://login.microsoftonline.com/your_tenant " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "azure ad auth uri refers to (DEPRECATED, use azure_blob_keycloak_aad_auth_uri instead)A URL that identifies a token authority. It should be of the format https://login.microsoftonline.com/your_tenant " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting azure_ad_auth_uri", + "output": "azure ad auth uri refers to (DEPRECATED, use azure_blob_keycloak_aad_auth_uri instead)A URL that identifies a token authority. It should be of the format https://login.microsoftonline.com/your_tenant " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting azure_ad_auth_uri", + "output": "azure ad auth uri refers to (DEPRECATED, use azure_blob_keycloak_aad_auth_uri instead)A URL that identifies a token authority. It should be of the format https://login.microsoftonline.com/your_tenant " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "azure_ad_scopes", + "output": "azure ad scopes refers to (DEPRECATED, use azure_blob_use_access_token_scopes instead.)Scopes requested to access a protected API (a resource)." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "azure_ad_scopes", + "output": "azure ad scopes refers to (DEPRECATED, use azure_blob_use_access_token_scopes instead.)Scopes requested to access a protected API (a resource)." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "azure ad scopes", + "output": "azure ad scopes refers to (DEPRECATED, use azure_blob_use_access_token_scopes instead.)Scopes requested to access a protected API (a resource)." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "azure ad scopes refers to (DEPRECATED, use azure_blob_use_access_token_scopes instead.)Scopes requested to access a protected API (a resource)." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting azure_ad_scopes", + "output": "azure ad scopes refers to (DEPRECATED, use azure_blob_use_access_token_scopes instead.)Scopes requested to access a protected API (a resource)." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting azure_ad_scopes", + "output": "azure ad scopes refers to (DEPRECATED, use azure_blob_use_access_token_scopes instead.)Scopes requested to access a protected API (a resource)." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "azure_keycloak_idp_token_endpoint", + "output": "azure keycloak idp token endpoint refers to (DEPRECATED, use azure_blob_keycloak_broker_token_endpoint instead.)Keycloak Endpoint for Retrieving External IDP Tokens (https://www.keycloak.org/docs/latest/server_admin/#retrieving-external-idp-tokens)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "azure_keycloak_idp_token_endpoint", + "output": "azure keycloak idp token endpoint refers to (DEPRECATED, use azure_blob_keycloak_broker_token_endpoint instead.)Keycloak Endpoint for Retrieving External IDP Tokens (https://www.keycloak.org/docs/latest/server_admin/#retrieving-external-idp-tokens)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "azure keycloak idp token endpoint", + "output": "azure keycloak idp token endpoint refers to (DEPRECATED, use azure_blob_keycloak_broker_token_endpoint instead.)Keycloak Endpoint for Retrieving External IDP Tokens (https://www.keycloak.org/docs/latest/server_admin/#retrieving-external-idp-tokens)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "azure keycloak idp token endpoint refers to (DEPRECATED, use azure_blob_keycloak_broker_token_endpoint instead.)Keycloak Endpoint for Retrieving External IDP Tokens (https://www.keycloak.org/docs/latest/server_admin/#retrieving-external-idp-tokens)" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting azure_keycloak_idp_token_endpoint", + "output": "azure keycloak idp token endpoint refers to (DEPRECATED, use azure_blob_keycloak_broker_token_endpoint instead.)Keycloak Endpoint for Retrieving External IDP Tokens (https://www.keycloak.org/docs/latest/server_admin/#retrieving-external-idp-tokens)" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting azure_keycloak_idp_token_endpoint", + "output": "azure keycloak idp token endpoint refers to (DEPRECATED, use azure_blob_keycloak_broker_token_endpoint instead.)Keycloak Endpoint for Retrieving External IDP Tokens (https://www.keycloak.org/docs/latest/server_admin/#retrieving-external-idp-tokens)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "jdbc_app_configs", + "output": "jdbc app configs refers to Configuration for JDBC Connector. JSON/Dictionary String with multiple keys. Format as a single line without using carriage returns (the following example is formatted for readability). Use triple quotations to ensure that the text is read as a single string. Example: '{ \"postgres\": { \"url\": \"jdbc:postgresql://ip address:port/postgres\", \"jarpath\": \"/path/to/postgres_driver.jar\", \"classpath\": \"org.postgresql.Driver\" }, \"mysql\": { \"url\":\"mysql connection string\", \"jarpath\": \"/path/to/mysql_driver.jar\", \"classpath\": \"my.sql.classpath.Driver\" } }' " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "jdbc_app_configs", + "output": "jdbc app configs refers to Configuration for JDBC Connector. JSON/Dictionary String with multiple keys. Format as a single line without using carriage returns (the following example is formatted for readability). Use triple quotations to ensure that the text is read as a single string. Example: '{ \"postgres\": { \"url\": \"jdbc:postgresql://ip address:port/postgres\", \"jarpath\": \"/path/to/postgres_driver.jar\", \"classpath\": \"org.postgresql.Driver\" }, \"mysql\": { \"url\":\"mysql connection string\", \"jarpath\": \"/path/to/mysql_driver.jar\", \"classpath\": \"my.sql.classpath.Driver\" } }' " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "jdbc app configs", + "output": "jdbc app configs refers to Configuration for JDBC Connector. JSON/Dictionary String with multiple keys. Format as a single line without using carriage returns (the following example is formatted for readability). Use triple quotations to ensure that the text is read as a single string. Example: '{ \"postgres\": { \"url\": \"jdbc:postgresql://ip address:port/postgres\", \"jarpath\": \"/path/to/postgres_driver.jar\", \"classpath\": \"org.postgresql.Driver\" }, \"mysql\": { \"url\":\"mysql connection string\", \"jarpath\": \"/path/to/mysql_driver.jar\", \"classpath\": \"my.sql.classpath.Driver\" } }' " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "jdbc app configs refers to Configuration for JDBC Connector. JSON/Dictionary String with multiple keys. Format as a single line without using carriage returns (the following example is formatted for readability). Use triple quotations to ensure that the text is read as a single string. Example: '{ \"postgres\": { \"url\": \"jdbc:postgresql://ip address:port/postgres\", \"jarpath\": \"/path/to/postgres_driver.jar\", \"classpath\": \"org.postgresql.Driver\" }, \"mysql\": { \"url\":\"mysql connection string\", \"jarpath\": \"/path/to/mysql_driver.jar\", \"classpath\": \"my.sql.classpath.Driver\" } }' " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting jdbc_app_configs", + "output": "jdbc app configs refers to Configuration for JDBC Connector. JSON/Dictionary String with multiple keys. Format as a single line without using carriage returns (the following example is formatted for readability). Use triple quotations to ensure that the text is read as a single string. Example: '{ \"postgres\": { \"url\": \"jdbc:postgresql://ip address:port/postgres\", \"jarpath\": \"/path/to/postgres_driver.jar\", \"classpath\": \"org.postgresql.Driver\" }, \"mysql\": { \"url\":\"mysql connection string\", \"jarpath\": \"/path/to/mysql_driver.jar\", \"classpath\": \"my.sql.classpath.Driver\" } }' " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting jdbc_app_configs", + "output": "jdbc app configs refers to Configuration for JDBC Connector. JSON/Dictionary String with multiple keys. Format as a single line without using carriage returns (the following example is formatted for readability). Use triple quotations to ensure that the text is read as a single string. Example: '{ \"postgres\": { \"url\": \"jdbc:postgresql://ip address:port/postgres\", \"jarpath\": \"/path/to/postgres_driver.jar\", \"classpath\": \"org.postgresql.Driver\" }, \"mysql\": { \"url\":\"mysql connection string\", \"jarpath\": \"/path/to/mysql_driver.jar\", \"classpath\": \"my.sql.classpath.Driver\" } }' " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "jdbc_app_jvm_args", + "output": "jdbc app jvm args refers to extra jvm args for jdbc connector" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "jdbc_app_jvm_args", + "output": "jdbc app jvm args refers to extra jvm args for jdbc connector" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "jdbc app jvm args", + "output": "jdbc app jvm args refers to extra jvm args for jdbc connector" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "jdbc app jvm args refers to extra jvm args for jdbc connector" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting jdbc_app_jvm_args", + "output": "jdbc app jvm args refers to extra jvm args for jdbc connector" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting jdbc_app_jvm_args", + "output": "jdbc app jvm args refers to extra jvm args for jdbc connector" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "jdbc_app_classpath", + "output": "jdbc app classpath refers to alternative classpath for jdbc connector" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "jdbc_app_classpath", + "output": "jdbc app classpath refers to alternative classpath for jdbc connector" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "jdbc app classpath", + "output": "jdbc app classpath refers to alternative classpath for jdbc connector" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "jdbc app classpath refers to alternative classpath for jdbc connector" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting jdbc_app_classpath", + "output": "jdbc app classpath refers to alternative classpath for jdbc connector" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting jdbc_app_classpath", + "output": "jdbc app classpath refers to alternative classpath for jdbc connector" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "hive_app_configs", + "output": "hive app configs refers to Configuration for Hive Connector. Note that inputs are similar to configuring HDFS connectivity. important keys: * hive_conf_path - path to hive configuration, may have multiple files. typically: hive-site.xml, hdfs-site.xml, etc * auth_type - one of `noauth`, `keytab`, `keytabimpersonation` for kerberos authentication * keytab_path - path to the kerberos keytab to use for authentication, can be \"\" if using `noauth` auth_type * principal_user - Kerberos app principal user. Required when using auth_type `keytab` or `keytabimpersonation` JSON/Dictionary String with multiple keys. Example: '{ \"hive_connection_1\": { \"hive_conf_path\": \"/path/to/hive/conf\", \"auth_type\": \"one of ['noauth', 'keytab', 'keytabimpersonation']\", \"keytab_path\": \"/path/to/.keytab\", \"principal_user\": \"hive/localhost@EXAMPLE.COM\", }, \"hive_connection_2\": { \"hive_conf_path\": \"/path/to/hive/conf_2\", \"auth_type\": \"one of ['noauth', 'keytab', 'keytabimpersonation']\", \"keytab_path\": \"/path/to/.keytab\", \"principal_user\": \"my_user/localhost@EXAMPLE.COM\", } }' " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "hive_app_configs", + "output": "hive app configs refers to Configuration for Hive Connector. Note that inputs are similar to configuring HDFS connectivity. important keys: * hive_conf_path - path to hive configuration, may have multiple files. typically: hive-site.xml, hdfs-site.xml, etc * auth_type - one of `noauth`, `keytab`, `keytabimpersonation` for kerberos authentication * keytab_path - path to the kerberos keytab to use for authentication, can be \"\" if using `noauth` auth_type * principal_user - Kerberos app principal user. Required when using auth_type `keytab` or `keytabimpersonation` JSON/Dictionary String with multiple keys. Example: '{ \"hive_connection_1\": { \"hive_conf_path\": \"/path/to/hive/conf\", \"auth_type\": \"one of ['noauth', 'keytab', 'keytabimpersonation']\", \"keytab_path\": \"/path/to/.keytab\", \"principal_user\": \"hive/localhost@EXAMPLE.COM\", }, \"hive_connection_2\": { \"hive_conf_path\": \"/path/to/hive/conf_2\", \"auth_type\": \"one of ['noauth', 'keytab', 'keytabimpersonation']\", \"keytab_path\": \"/path/to/.keytab\", \"principal_user\": \"my_user/localhost@EXAMPLE.COM\", } }' " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "hive app configs", + "output": "hive app configs refers to Configuration for Hive Connector. Note that inputs are similar to configuring HDFS connectivity. important keys: * hive_conf_path - path to hive configuration, may have multiple files. typically: hive-site.xml, hdfs-site.xml, etc * auth_type - one of `noauth`, `keytab`, `keytabimpersonation` for kerberos authentication * keytab_path - path to the kerberos keytab to use for authentication, can be \"\" if using `noauth` auth_type * principal_user - Kerberos app principal user. Required when using auth_type `keytab` or `keytabimpersonation` JSON/Dictionary String with multiple keys. Example: '{ \"hive_connection_1\": { \"hive_conf_path\": \"/path/to/hive/conf\", \"auth_type\": \"one of ['noauth', 'keytab', 'keytabimpersonation']\", \"keytab_path\": \"/path/to/.keytab\", \"principal_user\": \"hive/localhost@EXAMPLE.COM\", }, \"hive_connection_2\": { \"hive_conf_path\": \"/path/to/hive/conf_2\", \"auth_type\": \"one of ['noauth', 'keytab', 'keytabimpersonation']\", \"keytab_path\": \"/path/to/.keytab\", \"principal_user\": \"my_user/localhost@EXAMPLE.COM\", } }' " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "hive app configs refers to Configuration for Hive Connector. Note that inputs are similar to configuring HDFS connectivity. important keys: * hive_conf_path - path to hive configuration, may have multiple files. typically: hive-site.xml, hdfs-site.xml, etc * auth_type - one of `noauth`, `keytab`, `keytabimpersonation` for kerberos authentication * keytab_path - path to the kerberos keytab to use for authentication, can be \"\" if using `noauth` auth_type * principal_user - Kerberos app principal user. Required when using auth_type `keytab` or `keytabimpersonation` JSON/Dictionary String with multiple keys. Example: '{ \"hive_connection_1\": { \"hive_conf_path\": \"/path/to/hive/conf\", \"auth_type\": \"one of ['noauth', 'keytab', 'keytabimpersonation']\", \"keytab_path\": \"/path/to/.keytab\", \"principal_user\": \"hive/localhost@EXAMPLE.COM\", }, \"hive_connection_2\": { \"hive_conf_path\": \"/path/to/hive/conf_2\", \"auth_type\": \"one of ['noauth', 'keytab', 'keytabimpersonation']\", \"keytab_path\": \"/path/to/.keytab\", \"principal_user\": \"my_user/localhost@EXAMPLE.COM\", } }' " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting hive_app_configs", + "output": "hive app configs refers to Configuration for Hive Connector. Note that inputs are similar to configuring HDFS connectivity. important keys: * hive_conf_path - path to hive configuration, may have multiple files. typically: hive-site.xml, hdfs-site.xml, etc * auth_type - one of `noauth`, `keytab`, `keytabimpersonation` for kerberos authentication * keytab_path - path to the kerberos keytab to use for authentication, can be \"\" if using `noauth` auth_type * principal_user - Kerberos app principal user. Required when using auth_type `keytab` or `keytabimpersonation` JSON/Dictionary String with multiple keys. Example: '{ \"hive_connection_1\": { \"hive_conf_path\": \"/path/to/hive/conf\", \"auth_type\": \"one of ['noauth', 'keytab', 'keytabimpersonation']\", \"keytab_path\": \"/path/to/.keytab\", \"principal_user\": \"hive/localhost@EXAMPLE.COM\", }, \"hive_connection_2\": { \"hive_conf_path\": \"/path/to/hive/conf_2\", \"auth_type\": \"one of ['noauth', 'keytab', 'keytabimpersonation']\", \"keytab_path\": \"/path/to/.keytab\", \"principal_user\": \"my_user/localhost@EXAMPLE.COM\", } }' " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting hive_app_configs", + "output": "hive app configs refers to Configuration for Hive Connector. Note that inputs are similar to configuring HDFS connectivity. important keys: * hive_conf_path - path to hive configuration, may have multiple files. typically: hive-site.xml, hdfs-site.xml, etc * auth_type - one of `noauth`, `keytab`, `keytabimpersonation` for kerberos authentication * keytab_path - path to the kerberos keytab to use for authentication, can be \"\" if using `noauth` auth_type * principal_user - Kerberos app principal user. Required when using auth_type `keytab` or `keytabimpersonation` JSON/Dictionary String with multiple keys. Example: '{ \"hive_connection_1\": { \"hive_conf_path\": \"/path/to/hive/conf\", \"auth_type\": \"one of ['noauth', 'keytab', 'keytabimpersonation']\", \"keytab_path\": \"/path/to/.keytab\", \"principal_user\": \"hive/localhost@EXAMPLE.COM\", }, \"hive_connection_2\": { \"hive_conf_path\": \"/path/to/hive/conf_2\", \"auth_type\": \"one of ['noauth', 'keytab', 'keytabimpersonation']\", \"keytab_path\": \"/path/to/.keytab\", \"principal_user\": \"my_user/localhost@EXAMPLE.COM\", } }' " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "hive_app_jvm_args", + "output": "hive app jvm args refers to Extra jvm args for hive connector" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "hive_app_jvm_args", + "output": "hive app jvm args refers to Extra jvm args for hive connector" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "hive app jvm args", + "output": "hive app jvm args refers to Extra jvm args for hive connector" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "hive app jvm args refers to Extra jvm args for hive connector" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting hive_app_jvm_args", + "output": "hive app jvm args refers to Extra jvm args for hive connector" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting hive_app_jvm_args", + "output": "hive app jvm args refers to Extra jvm args for hive connector" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "hive_app_classpath", + "output": "hive app classpath refers to Alternative classpath for hive connector. Can be used to add additional jar files to classpath." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "hive_app_classpath", + "output": "hive app classpath refers to Alternative classpath for hive connector. Can be used to add additional jar files to classpath." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "hive app classpath", + "output": "hive app classpath refers to Alternative classpath for hive connector. Can be used to add additional jar files to classpath." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "hive app classpath refers to Alternative classpath for hive connector. Can be used to add additional jar files to classpath." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting hive_app_classpath", + "output": "hive app classpath refers to Alternative classpath for hive connector. Can be used to add additional jar files to classpath." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting hive_app_classpath", + "output": "hive app classpath refers to Alternative classpath for hive connector. Can be used to add additional jar files to classpath." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable_artifacts_upload", + "output": "enable artifacts upload refers to Replace all the downloads on the experiment page to exports and allow users to push to the artifact store configured with artifacts_store" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable_artifacts_upload", + "output": "enable artifacts upload refers to Replace all the downloads on the experiment page to exports and allow users to push to the artifact store configured with artifacts_store" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable artifacts upload", + "output": "enable artifacts upload refers to Replace all the downloads on the experiment page to exports and allow users to push to the artifact store configured with artifacts_store" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "enable artifacts upload refers to Replace all the downloads on the experiment page to exports and allow users to push to the artifact store configured with artifacts_store" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting enable_artifacts_upload", + "output": "enable artifacts upload refers to Replace all the downloads on the experiment page to exports and allow users to push to the artifact store configured with artifacts_store" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting enable_artifacts_upload", + "output": "enable artifacts upload refers to Replace all the downloads on the experiment page to exports and allow users to push to the artifact store configured with artifacts_store" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "artifacts_store", + "output": "artifacts store refers to Artifacts store. file_system: stores artifacts on a file system directory denoted by artifacts_file_system_directory. s3: stores artifacts to S3 bucket. bitbucket: stores data into Bitbucket repository. azure: stores data into Azure Blob Store. hdfs: stores data into a Hadoop distributed file system location. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "artifacts_store", + "output": "artifacts store refers to Artifacts store. file_system: stores artifacts on a file system directory denoted by artifacts_file_system_directory. s3: stores artifacts to S3 bucket. bitbucket: stores data into Bitbucket repository. azure: stores data into Azure Blob Store. hdfs: stores data into a Hadoop distributed file system location. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "artifacts store", + "output": "artifacts store refers to Artifacts store. file_system: stores artifacts on a file system directory denoted by artifacts_file_system_directory. s3: stores artifacts to S3 bucket. bitbucket: stores data into Bitbucket repository. azure: stores data into Azure Blob Store. hdfs: stores data into a Hadoop distributed file system location. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "artifacts store refers to Artifacts store. file_system: stores artifacts on a file system directory denoted by artifacts_file_system_directory. s3: stores artifacts to S3 bucket. bitbucket: stores data into Bitbucket repository. azure: stores data into Azure Blob Store. hdfs: stores data into a Hadoop distributed file system location. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting artifacts_store", + "output": "artifacts store refers to Artifacts store. file_system: stores artifacts on a file system directory denoted by artifacts_file_system_directory. s3: stores artifacts to S3 bucket. bitbucket: stores data into Bitbucket repository. azure: stores data into Azure Blob Store. hdfs: stores data into a Hadoop distributed file system location. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting artifacts_store", + "output": "artifacts store refers to Artifacts store. file_system: stores artifacts on a file system directory denoted by artifacts_file_system_directory. s3: stores artifacts to S3 bucket. bitbucket: stores data into Bitbucket repository. azure: stores data into Azure Blob Store. hdfs: stores data into a Hadoop distributed file system location. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "bitbucket_skip_cert_verification", + "output": "bitbucket skip cert verification refers to Decide whether to skip cert verification for Bitbucket when using a repo with HTTPS" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "bitbucket_skip_cert_verification", + "output": "bitbucket skip cert verification refers to Decide whether to skip cert verification for Bitbucket when using a repo with HTTPS" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "bitbucket skip cert verification", + "output": "bitbucket skip cert verification refers to Decide whether to skip cert verification for Bitbucket when using a repo with HTTPS" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "bitbucket skip cert verification refers to Decide whether to skip cert verification for Bitbucket when using a repo with HTTPS" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting bitbucket_skip_cert_verification", + "output": "bitbucket skip cert verification refers to Decide whether to skip cert verification for Bitbucket when using a repo with HTTPS" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting bitbucket_skip_cert_verification", + "output": "bitbucket skip cert verification refers to Decide whether to skip cert verification for Bitbucket when using a repo with HTTPS" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "bitbucket_tmp_relative_dir", + "output": "bitbucket tmp relative dir refers to Local temporary directory to clone artifacts to, relative to data_directory" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "bitbucket_tmp_relative_dir", + "output": "bitbucket tmp relative dir refers to Local temporary directory to clone artifacts to, relative to data_directory" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "bitbucket tmp relative dir", + "output": "bitbucket tmp relative dir refers to Local temporary directory to clone artifacts to, relative to data_directory" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "bitbucket tmp relative dir refers to Local temporary directory to clone artifacts to, relative to data_directory" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting bitbucket_tmp_relative_dir", + "output": "bitbucket tmp relative dir refers to Local temporary directory to clone artifacts to, relative to data_directory" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting bitbucket_tmp_relative_dir", + "output": "bitbucket tmp relative dir refers to Local temporary directory to clone artifacts to, relative to data_directory" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "artifacts_file_system_directory", + "output": "artifacts file system directory refers to File system location where artifacts will be copied in case artifacts_store is set to file_system" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "artifacts_file_system_directory", + "output": "artifacts file system directory refers to File system location where artifacts will be copied in case artifacts_store is set to file_system" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "artifacts file system directory", + "output": "artifacts file system directory refers to File system location where artifacts will be copied in case artifacts_store is set to file_system" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "artifacts file system directory refers to File system location where artifacts will be copied in case artifacts_store is set to file_system" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting artifacts_file_system_directory", + "output": "artifacts file system directory refers to File system location where artifacts will be copied in case artifacts_store is set to file_system" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting artifacts_file_system_directory", + "output": "artifacts file system directory refers to File system location where artifacts will be copied in case artifacts_store is set to file_system" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "artifacts_s3_bucket", + "output": "artifacts s3 bucket refers to AWS S3 bucket used for experiment artifact export." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "artifacts_s3_bucket", + "output": "artifacts s3 bucket refers to AWS S3 Bucket Name: AWS S3 bucket used for experiment artifact export." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "artifacts s3 bucket", + "output": "artifacts s3 bucket refers to AWS S3 Bucket Name: AWS S3 bucket used for experiment artifact export." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "AWS S3 Bucket Name: ", + "output": "artifacts s3 bucket refers to AWS S3 Bucket Name: AWS S3 bucket used for experiment artifact export." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting artifacts_s3_bucket", + "output": "artifacts s3 bucket refers to AWS S3 bucket used for experiment artifact export." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting artifacts_s3_bucket", + "output": "artifacts s3 bucket refers to AWS S3 Bucket Name: AWS S3 bucket used for experiment artifact export." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "artifacts_azure_blob_account_name", + "output": "artifacts azure blob account name refers to Azure Blob Store credentials used for experiment artifact export" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "artifacts_azure_blob_account_name", + "output": "artifacts azure blob account name refers to Azure Blob Store Account Name: Azure Blob Store credentials used for experiment artifact export" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "artifacts azure blob account name", + "output": "artifacts azure blob account name refers to Azure Blob Store Account Name: Azure Blob Store credentials used for experiment artifact export" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Azure Blob Store Account Name: ", + "output": "artifacts azure blob account name refers to Azure Blob Store Account Name: Azure Blob Store credentials used for experiment artifact export" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting artifacts_azure_blob_account_name", + "output": "artifacts azure blob account name refers to Azure Blob Store credentials used for experiment artifact export" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting artifacts_azure_blob_account_name", + "output": "artifacts azure blob account name refers to Azure Blob Store Account Name: Azure Blob Store credentials used for experiment artifact export" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "artifacts_azure_blob_account_key", + "output": "artifacts azure blob account key refers to Azure Blob Store credentials used for experiment artifact export" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "artifacts_azure_blob_account_key", + "output": "artifacts azure blob account key refers to Azure Blob Store Account Key: Azure Blob Store credentials used for experiment artifact export" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "artifacts azure blob account key", + "output": "artifacts azure blob account key refers to Azure Blob Store Account Key: Azure Blob Store credentials used for experiment artifact export" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Azure Blob Store Account Key: ", + "output": "artifacts azure blob account key refers to Azure Blob Store Account Key: Azure Blob Store credentials used for experiment artifact export" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting artifacts_azure_blob_account_key", + "output": "artifacts azure blob account key refers to Azure Blob Store credentials used for experiment artifact export" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting artifacts_azure_blob_account_key", + "output": "artifacts azure blob account key refers to Azure Blob Store Account Key: Azure Blob Store credentials used for experiment artifact export" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "artifacts_azure_connection_string", + "output": "artifacts azure connection string refers to Azure Blob Store connection string used for experiment artifact export" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "artifacts_azure_connection_string", + "output": "artifacts azure connection string refers to Azure Blob Store Connection String: Azure Blob Store connection string used for experiment artifact export" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "artifacts azure connection string", + "output": "artifacts azure connection string refers to Azure Blob Store Connection String: Azure Blob Store connection string used for experiment artifact export" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Azure Blob Store Connection String: ", + "output": "artifacts azure connection string refers to Azure Blob Store Connection String: Azure Blob Store connection string used for experiment artifact export" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting artifacts_azure_connection_string", + "output": "artifacts azure connection string refers to Azure Blob Store connection string used for experiment artifact export" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting artifacts_azure_connection_string", + "output": "artifacts azure connection string refers to Azure Blob Store Connection String: Azure Blob Store connection string used for experiment artifact export" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "artifacts_azure_sas_token", + "output": "artifacts azure sas token refers to Azure Blob Store SAS token used for experiment artifact export" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "artifacts_azure_sas_token", + "output": "artifacts azure sas token refers to Azure Blob Store SAS token: Azure Blob Store SAS token used for experiment artifact export" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "artifacts azure sas token", + "output": "artifacts azure sas token refers to Azure Blob Store SAS token: Azure Blob Store SAS token used for experiment artifact export" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Azure Blob Store SAS token: ", + "output": "artifacts azure sas token refers to Azure Blob Store SAS token: Azure Blob Store SAS token used for experiment artifact export" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting artifacts_azure_sas_token", + "output": "artifacts azure sas token refers to Azure Blob Store SAS token used for experiment artifact export" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting artifacts_azure_sas_token", + "output": "artifacts azure sas token refers to Azure Blob Store SAS token: Azure Blob Store SAS token used for experiment artifact export" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "artifacts_git_user", + "output": "artifacts git user refers to Git auth user" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "artifacts_git_user", + "output": "artifacts git user refers to Git auth user" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "artifacts git user", + "output": "artifacts git user refers to Git auth user" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "artifacts git user refers to Git auth user" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting artifacts_git_user", + "output": "artifacts git user refers to Git auth user" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting artifacts_git_user", + "output": "artifacts git user refers to Git auth user" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "artifacts_git_password", + "output": "artifacts git password refers to Git auth password" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "artifacts_git_password", + "output": "artifacts git password refers to Git auth password" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "artifacts git password", + "output": "artifacts git password refers to Git auth password" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "artifacts git password refers to Git auth password" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting artifacts_git_password", + "output": "artifacts git password refers to Git auth password" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting artifacts_git_password", + "output": "artifacts git password refers to Git auth password" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "artifacts_git_repo", + "output": "artifacts git repo refers to Git repo where artifacts will be pushed upon and upload" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "artifacts_git_repo", + "output": "artifacts git repo refers to Git repo where artifacts will be pushed upon and upload" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "artifacts git repo", + "output": "artifacts git repo refers to Git repo where artifacts will be pushed upon and upload" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "artifacts git repo refers to Git repo where artifacts will be pushed upon and upload" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting artifacts_git_repo", + "output": "artifacts git repo refers to Git repo where artifacts will be pushed upon and upload" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting artifacts_git_repo", + "output": "artifacts git repo refers to Git repo where artifacts will be pushed upon and upload" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "artifacts_git_branch", + "output": "artifacts git branch refers to Git branch on the remote repo where artifacts are pushed" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "artifacts_git_branch", + "output": "artifacts git branch refers to Git branch on the remote repo where artifacts are pushed" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "artifacts git branch", + "output": "artifacts git branch refers to Git branch on the remote repo where artifacts are pushed" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "artifacts git branch refers to Git branch on the remote repo where artifacts are pushed" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting artifacts_git_branch", + "output": "artifacts git branch refers to Git branch on the remote repo where artifacts are pushed" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting artifacts_git_branch", + "output": "artifacts git branch refers to Git branch on the remote repo where artifacts are pushed" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "artifacts_git_ssh_private_key_file_location", + "output": "artifacts git ssh private key file location refers to File location for the ssh private key used for git authentication" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "artifacts_git_ssh_private_key_file_location", + "output": "artifacts git ssh private key file location refers to File location for the ssh private key used for git authentication" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "artifacts git ssh private key file location", + "output": "artifacts git ssh private key file location refers to File location for the ssh private key used for git authentication" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "artifacts git ssh private key file location refers to File location for the ssh private key used for git authentication" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting artifacts_git_ssh_private_key_file_location", + "output": "artifacts git ssh private key file location refers to File location for the ssh private key used for git authentication" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting artifacts_git_ssh_private_key_file_location", + "output": "artifacts git ssh private key file location refers to File location for the ssh private key used for git authentication" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "feature_store_endpoint_url", + "output": "feature store endpoint url refers to Feature Store server endpoint URL" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "feature_store_endpoint_url", + "output": "feature store endpoint url refers to Feature Store server endpoint URL" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "feature store endpoint url", + "output": "feature store endpoint url refers to Feature Store server endpoint URL" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "feature store endpoint url refers to Feature Store server endpoint URL" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting feature_store_endpoint_url", + "output": "feature store endpoint url refers to Feature Store server endpoint URL" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting feature_store_endpoint_url", + "output": "feature store endpoint url refers to Feature Store server endpoint URL" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "feature_store_enable_tls", + "output": "feature store enable tls refers to Enable TLS communication between DAI and the Feature Store server" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "feature_store_enable_tls", + "output": "feature store enable tls refers to Enable TLS communication between DAI and the Feature Store server" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "feature store enable tls", + "output": "feature store enable tls refers to Enable TLS communication between DAI and the Feature Store server" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "feature store enable tls refers to Enable TLS communication between DAI and the Feature Store server" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting feature_store_enable_tls", + "output": "feature store enable tls refers to Enable TLS communication between DAI and the Feature Store server" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting feature_store_enable_tls", + "output": "feature store enable tls refers to Enable TLS communication between DAI and the Feature Store server" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "feature_store_tls_cert_path", + "output": "feature store tls cert path refers to Path to the client certificate to authenticate with the Feature Store server. This is only effective when feature_store_enable_tls=True." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "feature_store_tls_cert_path", + "output": "feature store tls cert path refers to Path to the client certificate to authenticate with the Feature Store server. This is only effective when feature_store_enable_tls=True." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "feature store tls cert path", + "output": "feature store tls cert path refers to Path to the client certificate to authenticate with the Feature Store server. This is only effective when feature_store_enable_tls=True." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "feature store tls cert path refers to Path to the client certificate to authenticate with the Feature Store server. This is only effective when feature_store_enable_tls=True." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting feature_store_tls_cert_path", + "output": "feature store tls cert path refers to Path to the client certificate to authenticate with the Feature Store server. This is only effective when feature_store_enable_tls=True." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting feature_store_tls_cert_path", + "output": "feature store tls cert path refers to Path to the client certificate to authenticate with the Feature Store server. This is only effective when feature_store_enable_tls=True." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "feature_store_access_token_scopes", + "output": "feature store access token scopes refers to A list of access token scopes used by the Feature Store connector to authenticate. (Space separate list)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "feature_store_access_token_scopes", + "output": "feature store access token scopes refers to A list of access token scopes used by the Feature Store connector to authenticate. (Space separate list)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "feature store access token scopes", + "output": "feature store access token scopes refers to A list of access token scopes used by the Feature Store connector to authenticate. (Space separate list)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "feature store access token scopes refers to A list of access token scopes used by the Feature Store connector to authenticate. (Space separate list)" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting feature_store_access_token_scopes", + "output": "feature store access token scopes refers to A list of access token scopes used by the Feature Store connector to authenticate. (Space separate list)" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting feature_store_access_token_scopes", + "output": "feature store access token scopes refers to A list of access token scopes used by the Feature Store connector to authenticate. (Space separate list)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "deployment_aws_access_key_id", + "output": "deployment aws access key id refers to Default AWS credentials to be used for scorer deployments." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "deployment_aws_access_key_id", + "output": "deployment aws access key id refers to Default AWS credentials to be used for scorer deployments." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "deployment aws access key id", + "output": "deployment aws access key id refers to Default AWS credentials to be used for scorer deployments." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "deployment aws access key id refers to Default AWS credentials to be used for scorer deployments." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting deployment_aws_access_key_id", + "output": "deployment aws access key id refers to Default AWS credentials to be used for scorer deployments." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting deployment_aws_access_key_id", + "output": "deployment aws access key id refers to Default AWS credentials to be used for scorer deployments." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "deployment_aws_secret_access_key", + "output": "deployment aws secret access key refers to Default AWS credentials to be used for scorer deployments." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "deployment_aws_secret_access_key", + "output": "deployment aws secret access key refers to Default AWS credentials to be used for scorer deployments." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "deployment aws secret access key", + "output": "deployment aws secret access key refers to Default AWS credentials to be used for scorer deployments." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "deployment aws secret access key refers to Default AWS credentials to be used for scorer deployments." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting deployment_aws_secret_access_key", + "output": "deployment aws secret access key refers to Default AWS credentials to be used for scorer deployments." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting deployment_aws_secret_access_key", + "output": "deployment aws secret access key refers to Default AWS credentials to be used for scorer deployments." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "deployment_aws_bucket_name", + "output": "deployment aws bucket name refers to AWS S3 bucket to be used for scorer deployments." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "deployment_aws_bucket_name", + "output": "deployment aws bucket name refers to AWS S3 bucket to be used for scorer deployments." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "deployment aws bucket name", + "output": "deployment aws bucket name refers to AWS S3 bucket to be used for scorer deployments." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "deployment aws bucket name refers to AWS S3 bucket to be used for scorer deployments." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting deployment_aws_bucket_name", + "output": "deployment aws bucket name refers to AWS S3 bucket to be used for scorer deployments." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting deployment_aws_bucket_name", + "output": "deployment aws bucket name refers to AWS S3 bucket to be used for scorer deployments." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "triton_benchmark_runtime", + "output": "triton benchmark runtime refers to Approximate upper limit of time for Triton to take to compute latency and throughput performance numbers when performing 'Benchmark' operations for a deployment. Higher values result in more accurate performance numbers." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "triton_benchmark_runtime", + "output": "triton benchmark runtime refers to Approximate upper limit of time for Triton to take to compute latency and throughput performance numbers when performing 'Benchmark' operations for a deployment. Higher values result in more accurate performance numbers." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "triton benchmark runtime", + "output": "triton benchmark runtime refers to Approximate upper limit of time for Triton to take to compute latency and throughput performance numbers when performing 'Benchmark' operations for a deployment. Higher values result in more accurate performance numbers." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "triton benchmark runtime refers to Approximate upper limit of time for Triton to take to compute latency and throughput performance numbers when performing 'Benchmark' operations for a deployment. Higher values result in more accurate performance numbers." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting triton_benchmark_runtime", + "output": "triton benchmark runtime refers to Approximate upper limit of time for Triton to take to compute latency and throughput performance numbers when performing 'Benchmark' operations for a deployment. Higher values result in more accurate performance numbers." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting triton_benchmark_runtime", + "output": "triton benchmark runtime refers to Approximate upper limit of time for Triton to take to compute latency and throughput performance numbers when performing 'Benchmark' operations for a deployment. Higher values result in more accurate performance numbers." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "triton_quick_test_runtime", + "output": "triton quick test runtime refers to Approximate upper limit of time for Triton to take to compute latency and throughput performance numbers after loading up the deployment, per model. Higher values result in more accurate performance numbers." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "triton_quick_test_runtime", + "output": "triton quick test runtime refers to Approximate upper limit of time for Triton to take to compute latency and throughput performance numbers after loading up the deployment, per model. Higher values result in more accurate performance numbers." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "triton quick test runtime", + "output": "triton quick test runtime refers to Approximate upper limit of time for Triton to take to compute latency and throughput performance numbers after loading up the deployment, per model. Higher values result in more accurate performance numbers." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "triton quick test runtime refers to Approximate upper limit of time for Triton to take to compute latency and throughput performance numbers after loading up the deployment, per model. Higher values result in more accurate performance numbers." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting triton_quick_test_runtime", + "output": "triton quick test runtime refers to Approximate upper limit of time for Triton to take to compute latency and throughput performance numbers after loading up the deployment, per model. Higher values result in more accurate performance numbers." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting triton_quick_test_runtime", + "output": "triton quick test runtime refers to Approximate upper limit of time for Triton to take to compute latency and throughput performance numbers after loading up the deployment, per model. Higher values result in more accurate performance numbers." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "deploy_wizard_num_per_page", + "output": "deploy wizard num per page refers to Number of Triton deployments to show per page of the Deploy Wizard" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "deploy_wizard_num_per_page", + "output": "deploy wizard num per page refers to Number of Triton deployments to show per page of the Deploy Wizard" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "deploy wizard num per page", + "output": "deploy wizard num per page refers to Number of Triton deployments to show per page of the Deploy Wizard" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "deploy wizard num per page refers to Number of Triton deployments to show per page of the Deploy Wizard" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting deploy_wizard_num_per_page", + "output": "deploy wizard num per page refers to Number of Triton deployments to show per page of the Deploy Wizard" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting deploy_wizard_num_per_page", + "output": "deploy wizard num per page refers to Number of Triton deployments to show per page of the Deploy Wizard" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "allow_config_overrides_in_expert_page", + "output": "allow config overrides in expert page refers to Whether to allow user to change non-server toml parameters per experiment in expert page." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "allow_config_overrides_in_expert_page", + "output": "allow config overrides in expert page refers to Whether to allow user to change non-server toml parameters per experiment in expert page." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "allow config overrides in expert page", + "output": "allow config overrides in expert page refers to Whether to allow user to change non-server toml parameters per experiment in expert page." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "allow config overrides in expert page refers to Whether to allow user to change non-server toml parameters per experiment in expert page." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting allow_config_overrides_in_expert_page", + "output": "allow config overrides in expert page refers to Whether to allow user to change non-server toml parameters per experiment in expert page." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting allow_config_overrides_in_expert_page", + "output": "allow config overrides in expert page refers to Whether to allow user to change non-server toml parameters per experiment in expert page." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max_cols_log_headtail", + "output": "max cols log headtail refers to Maximum number of columns in each head and tail to log when ingesting data or running experiment on data." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max_cols_log_headtail", + "output": "max cols log headtail refers to Maximum number of columns in each head and tail to log when ingesting data or running experiment on data." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max cols log headtail", + "output": "max cols log headtail refers to Maximum number of columns in each head and tail to log when ingesting data or running experiment on data." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "max cols log headtail refers to Maximum number of columns in each head and tail to log when ingesting data or running experiment on data." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting max_cols_log_headtail", + "output": "max cols log headtail refers to Maximum number of columns in each head and tail to log when ingesting data or running experiment on data." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting max_cols_log_headtail", + "output": "max cols log headtail refers to Maximum number of columns in each head and tail to log when ingesting data or running experiment on data." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max_cols_gui_headtail", + "output": "max cols gui headtail refers to Maximum number of columns in each head and tail to show in GUI, useful when head or tail has all necessary columns, but too many for UI or web server to handle.-1 means no limit.A reasonable value is 500, after which web server or browser can become overloaded and use too much memory.Some values of column counts in UI may not show up correctly, and some dataset details functions may not work.To select (from GUI or client) any columns as being target, weight column, fold column, time column, time column groups, or dropped columns, the dataset should have those columns within the selected head or tail set of columns." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max_cols_gui_headtail", + "output": "max cols gui headtail refers to Maximum number of columns in each head and tail to show in GUI, useful when head or tail has all necessary columns, but too many for UI or web server to handle.-1 means no limit.A reasonable value is 500, after which web server or browser can become overloaded and use too much memory.Some values of column counts in UI may not show up correctly, and some dataset details functions may not work.To select (from GUI or client) any columns as being target, weight column, fold column, time column, time column groups, or dropped columns, the dataset should have those columns within the selected head or tail set of columns." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max cols gui headtail", + "output": "max cols gui headtail refers to Maximum number of columns in each head and tail to show in GUI, useful when head or tail has all necessary columns, but too many for UI or web server to handle.-1 means no limit.A reasonable value is 500, after which web server or browser can become overloaded and use too much memory.Some values of column counts in UI may not show up correctly, and some dataset details functions may not work.To select (from GUI or client) any columns as being target, weight column, fold column, time column, time column groups, or dropped columns, the dataset should have those columns within the selected head or tail set of columns." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "max cols gui headtail refers to Maximum number of columns in each head and tail to show in GUI, useful when head or tail has all necessary columns, but too many for UI or web server to handle.-1 means no limit.A reasonable value is 500, after which web server or browser can become overloaded and use too much memory.Some values of column counts in UI may not show up correctly, and some dataset details functions may not work.To select (from GUI or client) any columns as being target, weight column, fold column, time column, time column groups, or dropped columns, the dataset should have those columns within the selected head or tail set of columns." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting max_cols_gui_headtail", + "output": "max cols gui headtail refers to Maximum number of columns in each head and tail to show in GUI, useful when head or tail has all necessary columns, but too many for UI or web server to handle.-1 means no limit.A reasonable value is 500, after which web server or browser can become overloaded and use too much memory.Some values of column counts in UI may not show up correctly, and some dataset details functions may not work.To select (from GUI or client) any columns as being target, weight column, fold column, time column, time column groups, or dropped columns, the dataset should have those columns within the selected head or tail set of columns." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting max_cols_gui_headtail", + "output": "max cols gui headtail refers to Maximum number of columns in each head and tail to show in GUI, useful when head or tail has all necessary columns, but too many for UI or web server to handle.-1 means no limit.A reasonable value is 500, after which web server or browser can become overloaded and use too much memory.Some values of column counts in UI may not show up correctly, and some dataset details functions may not work.To select (from GUI or client) any columns as being target, weight column, fold column, time column, time column groups, or dropped columns, the dataset should have those columns within the selected head or tail set of columns." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "supported_file_types", + "output": "supported file types refers to Supported file formats (file name endings must match for files to show up in file browser)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "supported_file_types", + "output": "supported file types refers to Supported file formats (file name endings must match for files to show up in file browser)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "supported file types", + "output": "supported file types refers to Supported file formats (file name endings must match for files to show up in file browser)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "supported file types refers to Supported file formats (file name endings must match for files to show up in file browser)" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting supported_file_types", + "output": "supported file types refers to Supported file formats (file name endings must match for files to show up in file browser)" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting supported_file_types", + "output": "supported file types refers to Supported file formats (file name endings must match for files to show up in file browser)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "recipe_supported_file_types", + "output": "recipe supported file types refers to Supported file formats of data recipe files (file name endings must match for files to show up in file browser)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "recipe_supported_file_types", + "output": "recipe supported file types refers to Supported file formats of data recipe files (file name endings must match for files to show up in file browser)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "recipe supported file types", + "output": "recipe supported file types refers to Supported file formats of data recipe files (file name endings must match for files to show up in file browser)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "recipe supported file types refers to Supported file formats of data recipe files (file name endings must match for files to show up in file browser)" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting recipe_supported_file_types", + "output": "recipe supported file types refers to Supported file formats of data recipe files (file name endings must match for files to show up in file browser)" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting recipe_supported_file_types", + "output": "recipe supported file types refers to Supported file formats of data recipe files (file name endings must match for files to show up in file browser)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "list_files_without_extensions", + "output": "list files without extensions refers to By default, only supported file types (based on the file extensions listed above) will be listed for import into DAI Some data pipelines generate parquet files without any extensions. Enabling the below option will cause files without an extension to be listed in the file import dialog. DAI will import files without extensions as parquet files; if cannot be imported, an error is generated " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "list_files_without_extensions", + "output": "list files without extensions refers to By default, only supported file types (based on the file extensions listed above) will be listed for import into DAI Some data pipelines generate parquet files without any extensions. Enabling the below option will cause files without an extension to be listed in the file import dialog. DAI will import files without extensions as parquet files; if cannot be imported, an error is generated " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "list files without extensions", + "output": "list files without extensions refers to By default, only supported file types (based on the file extensions listed above) will be listed for import into DAI Some data pipelines generate parquet files without any extensions. Enabling the below option will cause files without an extension to be listed in the file import dialog. DAI will import files without extensions as parquet files; if cannot be imported, an error is generated " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "list files without extensions refers to By default, only supported file types (based on the file extensions listed above) will be listed for import into DAI Some data pipelines generate parquet files without any extensions. Enabling the below option will cause files without an extension to be listed in the file import dialog. DAI will import files without extensions as parquet files; if cannot be imported, an error is generated " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting list_files_without_extensions", + "output": "list files without extensions refers to By default, only supported file types (based on the file extensions listed above) will be listed for import into DAI Some data pipelines generate parquet files without any extensions. Enabling the below option will cause files without an extension to be listed in the file import dialog. DAI will import files without extensions as parquet files; if cannot be imported, an error is generated " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting list_files_without_extensions", + "output": "list files without extensions refers to By default, only supported file types (based on the file extensions listed above) will be listed for import into DAI Some data pipelines generate parquet files without any extensions. Enabling the below option will cause files without an extension to be listed in the file import dialog. DAI will import files without extensions as parquet files; if cannot be imported, an error is generated " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "allow_localstorage", + "output": "allow localstorage refers to Allow using browser localstorage, to improve UX." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "allow_localstorage", + "output": "allow localstorage refers to Allow using browser localstorage, to improve UX." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "allow localstorage", + "output": "allow localstorage refers to Allow using browser localstorage, to improve UX." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "allow localstorage refers to Allow using browser localstorage, to improve UX." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting allow_localstorage", + "output": "allow localstorage refers to Allow using browser localstorage, to improve UX." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting allow_localstorage", + "output": "allow localstorage refers to Allow using browser localstorage, to improve UX." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "allow_orig_cols_in_predictions", + "output": "allow orig cols in predictions refers to Allow original dataset columns to be present in downloaded predictions CSV" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "allow_orig_cols_in_predictions", + "output": "allow orig cols in predictions refers to Allow original dataset columns to be present in downloaded predictions CSV" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "allow orig cols in predictions", + "output": "allow orig cols in predictions refers to Allow original dataset columns to be present in downloaded predictions CSV" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "allow orig cols in predictions refers to Allow original dataset columns to be present in downloaded predictions CSV" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting allow_orig_cols_in_predictions", + "output": "allow orig cols in predictions refers to Allow original dataset columns to be present in downloaded predictions CSV" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting allow_orig_cols_in_predictions", + "output": "allow orig cols in predictions refers to Allow original dataset columns to be present in downloaded predictions CSV" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "allow_form_autocomplete", + "output": "allow form autocomplete refers to Allow the browser to store e.g. login credentials in login form (set to false for higher security)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "allow_form_autocomplete", + "output": "allow form autocomplete refers to Allow the browser to store e.g. login credentials in login form (set to false for higher security)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "allow form autocomplete", + "output": "allow form autocomplete refers to Allow the browser to store e.g. login credentials in login form (set to false for higher security)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "allow form autocomplete refers to Allow the browser to store e.g. login credentials in login form (set to false for higher security)" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting allow_form_autocomplete", + "output": "allow form autocomplete refers to Allow the browser to store e.g. login credentials in login form (set to false for higher security)" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting allow_form_autocomplete", + "output": "allow form autocomplete refers to Allow the browser to store e.g. login credentials in login form (set to false for higher security)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable_projects", + "output": "enable projects refers to Enable Projects workspace (alpha version, for evaluation)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable_projects", + "output": "enable projects refers to Enable Projects workspace: Enable Projects workspace (alpha version, for evaluation)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable projects", + "output": "enable projects refers to Enable Projects workspace: Enable Projects workspace (alpha version, for evaluation)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Enable Projects workspace: ", + "output": "enable projects refers to Enable Projects workspace: Enable Projects workspace (alpha version, for evaluation)" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting enable_projects", + "output": "enable projects refers to Enable Projects workspace (alpha version, for evaluation)" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting enable_projects", + "output": "enable projects refers to Enable Projects workspace: Enable Projects workspace (alpha version, for evaluation)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "app_language", + "output": "app language refers to Default application language - options are 'en', 'ja', 'cn', 'ko'" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "app_language", + "output": "app language refers to Default application language - options are 'en', 'ja', 'cn', 'ko'" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "app language", + "output": "app language refers to Default application language - options are 'en', 'ja', 'cn', 'ko'" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "app language refers to Default application language - options are 'en', 'ja', 'cn', 'ko'" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting app_language", + "output": "app language refers to Default application language - options are 'en', 'ja', 'cn', 'ko'" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting app_language", + "output": "app language refers to Default application language - options are 'en', 'ja', 'cn', 'ko'" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "disablelogout", + "output": "disablelogout refers to If true, Logout button is not visible in the GUI." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "disablelogout", + "output": "disablelogout refers to If true, Logout button is not visible in the GUI." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "disablelogout", + "output": "disablelogout refers to If true, Logout button is not visible in the GUI." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "disablelogout refers to If true, Logout button is not visible in the GUI." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting disablelogout", + "output": "disablelogout refers to If true, Logout button is not visible in the GUI." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting disablelogout", + "output": "disablelogout refers to If true, Logout button is not visible in the GUI." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "python_client_path", + "output": "python client path refers to Local path to the location of the Driverless AI Python Client. If empty, will download from s3" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "python_client_path", + "output": "python client path refers to Local path to the location of the Driverless AI Python Client. If empty, will download from s3" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "python client path", + "output": "python client path refers to Local path to the location of the Driverless AI Python Client. If empty, will download from s3" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "python client path refers to Local path to the location of the Driverless AI Python Client. If empty, will download from s3" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting python_client_path", + "output": "python client path refers to Local path to the location of the Driverless AI Python Client. If empty, will download from s3" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting python_client_path", + "output": "python client path refers to Local path to the location of the Driverless AI Python Client. If empty, will download from s3" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "python_client_url", + "output": "python client url refers to URL from where new python client WHL file is fetched." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "python_client_url", + "output": "python client url refers to Python client wheel URL: URL from where new python client WHL file is fetched." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "python client url", + "output": "python client url refers to Python client wheel URL: URL from where new python client WHL file is fetched." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Python client wheel URL: ", + "output": "python client url refers to Python client wheel URL: URL from where new python client WHL file is fetched." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting python_client_url", + "output": "python client url refers to URL from where new python client WHL file is fetched." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting python_client_url", + "output": "python client url refers to Python client wheel URL: URL from where new python client WHL file is fetched." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "python_client_verify_integrity", + "output": "python client verify integrity refers to If disabled, server won't verify if WHL package specified in `python_client_path` is valid DAI python client. Default True" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "python_client_verify_integrity", + "output": "python client verify integrity refers to If disabled, server won't verify if WHL package specified in `python_client_path` is valid DAI python client. Default True" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "python client verify integrity", + "output": "python client verify integrity refers to If disabled, server won't verify if WHL package specified in `python_client_path` is valid DAI python client. Default True" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "python client verify integrity refers to If disabled, server won't verify if WHL package specified in `python_client_path` is valid DAI python client. Default True" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting python_client_verify_integrity", + "output": "python client verify integrity refers to If disabled, server won't verify if WHL package specified in `python_client_path` is valid DAI python client. Default True" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting python_client_verify_integrity", + "output": "python client verify integrity refers to If disabled, server won't verify if WHL package specified in `python_client_path` is valid DAI python client. Default True" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "gui_require_experiment_name", + "output": "gui require experiment name refers to When enabled, new experiment requires to specify expert name" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "gui_require_experiment_name", + "output": "gui require experiment name refers to Require experiment name: When enabled, new experiment requires to specify expert name" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "gui require experiment name", + "output": "gui require experiment name refers to Require experiment name: When enabled, new experiment requires to specify expert name" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Require experiment name: ", + "output": "gui require experiment name refers to Require experiment name: When enabled, new experiment requires to specify expert name" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting gui_require_experiment_name", + "output": "gui require experiment name refers to When enabled, new experiment requires to specify expert name" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting gui_require_experiment_name", + "output": "gui require experiment name refers to Require experiment name: When enabled, new experiment requires to specify expert name" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "gui_enable_deploy_button", + "output": "gui enable deploy button refers to When disabled, Deploy option will be disabled on finished experiment page" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "gui_enable_deploy_button", + "output": "gui enable deploy button refers to Enable experiment deploy button: When disabled, Deploy option will be disabled on finished experiment page" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "gui enable deploy button", + "output": "gui enable deploy button refers to Enable experiment deploy button: When disabled, Deploy option will be disabled on finished experiment page" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Enable experiment deploy button: ", + "output": "gui enable deploy button refers to Enable experiment deploy button: When disabled, Deploy option will be disabled on finished experiment page" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting gui_enable_deploy_button", + "output": "gui enable deploy button refers to When disabled, Deploy option will be disabled on finished experiment page" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting gui_enable_deploy_button", + "output": "gui enable deploy button refers to Enable experiment deploy button: When disabled, Deploy option will be disabled on finished experiment page" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable_gui_product_tour", + "output": "enable gui product tour refers to Display experiment tour" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable_gui_product_tour", + "output": "enable gui product tour refers to If False, GUI won't show experiment/product tour, when first time using DriverlessAI: Display experiment tour" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable gui product tour", + "output": "enable gui product tour refers to If False, GUI won't show experiment/product tour, when first time using DriverlessAI: Display experiment tour" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "If False, GUI won't show experiment/product tour, when first time using DriverlessAI: ", + "output": "enable gui product tour refers to If False, GUI won't show experiment/product tour, when first time using DriverlessAI: Display experiment tour" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting enable_gui_product_tour", + "output": "enable gui product tour refers to Display experiment tour" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting enable_gui_product_tour", + "output": "enable gui product tour refers to If False, GUI won't show experiment/product tour, when first time using DriverlessAI: Display experiment tour" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable_dataset_downloading", + "output": "enable dataset downloading refers to Whether user can download dataset as csv file" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable_dataset_downloading", + "output": "enable dataset downloading refers to Whether user can download dataset as csv file" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable dataset downloading", + "output": "enable dataset downloading refers to Whether user can download dataset as csv file" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "enable dataset downloading refers to Whether user can download dataset as csv file" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting enable_dataset_downloading", + "output": "enable dataset downloading refers to Whether user can download dataset as csv file" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting enable_dataset_downloading", + "output": "enable dataset downloading refers to Whether user can download dataset as csv file" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable_experiment_export", + "output": "enable experiment export refers to If enabled, user can export experiment as a Zip file" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable_experiment_export", + "output": "enable experiment export refers to If enabled, user can export experiment as a Zip file" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable experiment export", + "output": "enable experiment export refers to If enabled, user can export experiment as a Zip file" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "enable experiment export refers to If enabled, user can export experiment as a Zip file" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting enable_experiment_export", + "output": "enable experiment export refers to If enabled, user can export experiment as a Zip file" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting enable_experiment_export", + "output": "enable experiment export refers to If enabled, user can export experiment as a Zip file" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable_experiment_import", + "output": "enable experiment import refers to If enabled, user can import experiments, exported as Zip files from DriverlessAI" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable_experiment_import", + "output": "enable experiment import refers to If enabled, user can import experiments, exported as Zip files from DriverlessAI" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable experiment import", + "output": "enable experiment import refers to If enabled, user can import experiments, exported as Zip files from DriverlessAI" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "enable experiment import refers to If enabled, user can import experiments, exported as Zip files from DriverlessAI" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting enable_experiment_import", + "output": "enable experiment import refers to If enabled, user can import experiments, exported as Zip files from DriverlessAI" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting enable_experiment_import", + "output": "enable experiment import refers to If enabled, user can import experiments, exported as Zip files from DriverlessAI" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable_experiment_wizard", + "output": "enable experiment wizard refers to (EXPERIMENTAL) If enabled, user can launch experiment via new `Predict Wizard` options, which navigates to the new Nitro wizard." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable_experiment_wizard", + "output": "enable experiment wizard refers to (EXPERIMENTAL) If enabled, user can launch experiment via new `Predict Wizard` options, which navigates to the new Nitro wizard." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable experiment wizard", + "output": "enable experiment wizard refers to (EXPERIMENTAL) If enabled, user can launch experiment via new `Predict Wizard` options, which navigates to the new Nitro wizard." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "enable experiment wizard refers to (EXPERIMENTAL) If enabled, user can launch experiment via new `Predict Wizard` options, which navigates to the new Nitro wizard." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting enable_experiment_wizard", + "output": "enable experiment wizard refers to (EXPERIMENTAL) If enabled, user can launch experiment via new `Predict Wizard` options, which navigates to the new Nitro wizard." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting enable_experiment_wizard", + "output": "enable experiment wizard refers to (EXPERIMENTAL) If enabled, user can launch experiment via new `Predict Wizard` options, which navigates to the new Nitro wizard." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable_join_wizard", + "output": "enable join wizard refers to (EXPERIMENTAL) If enabled, user can do joins via new `Join Wizard` options, which navigates to the new Nitro wizard." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable_join_wizard", + "output": "enable join wizard refers to (EXPERIMENTAL) If enabled, user can do joins via new `Join Wizard` options, which navigates to the new Nitro wizard." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable join wizard", + "output": "enable join wizard refers to (EXPERIMENTAL) If enabled, user can do joins via new `Join Wizard` options, which navigates to the new Nitro wizard." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "enable join wizard refers to (EXPERIMENTAL) If enabled, user can do joins via new `Join Wizard` options, which navigates to the new Nitro wizard." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting enable_join_wizard", + "output": "enable join wizard refers to (EXPERIMENTAL) If enabled, user can do joins via new `Join Wizard` options, which navigates to the new Nitro wizard." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting enable_join_wizard", + "output": "enable join wizard refers to (EXPERIMENTAL) If enabled, user can do joins via new `Join Wizard` options, which navigates to the new Nitro wizard." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "hac_link_url", + "output": "hac link url refers to URL address of the H2O AI link" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "hac_link_url", + "output": "hac link url refers to URL address of the H2O AI link" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "hac link url", + "output": "hac link url refers to URL address of the H2O AI link" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "hac link url refers to URL address of the H2O AI link" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting hac_link_url", + "output": "hac link url refers to URL address of the H2O AI link" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting hac_link_url", + "output": "hac link url refers to URL address of the H2O AI link" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable_license_manager", + "output": "enable license manager refers to Switches Driverless AI to use H2O.ai License Management Server to manage licenses/permission to use software" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable_license_manager", + "output": "enable license manager refers to Switches Driverless AI to use H2O.ai License Management Server to manage licenses/permission to use software" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable license manager", + "output": "enable license manager refers to Switches Driverless AI to use H2O.ai License Management Server to manage licenses/permission to use software" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "enable license manager refers to Switches Driverless AI to use H2O.ai License Management Server to manage licenses/permission to use software" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting enable_license_manager", + "output": "enable license manager refers to Switches Driverless AI to use H2O.ai License Management Server to manage licenses/permission to use software" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting enable_license_manager", + "output": "enable license manager refers to Switches Driverless AI to use H2O.ai License Management Server to manage licenses/permission to use software" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "license_manager_address", + "output": "license manager address refers to Address at which to communicate with H2O.ai License Management Server. Requires above value, `enable_license_manager` set to True. Format: {http/https}://{ip address}:{port number} " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "license_manager_address", + "output": "license manager address refers to Address at which to communicate with H2O.ai License Management Server. Requires above value, `enable_license_manager` set to True. Format: {http/https}://{ip address}:{port number} " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "license manager address", + "output": "license manager address refers to Address at which to communicate with H2O.ai License Management Server. Requires above value, `enable_license_manager` set to True. Format: {http/https}://{ip address}:{port number} " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "license manager address refers to Address at which to communicate with H2O.ai License Management Server. Requires above value, `enable_license_manager` set to True. Format: {http/https}://{ip address}:{port number} " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting license_manager_address", + "output": "license manager address refers to Address at which to communicate with H2O.ai License Management Server. Requires above value, `enable_license_manager` set to True. Format: {http/https}://{ip address}:{port number} " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting license_manager_address", + "output": "license manager address refers to Address at which to communicate with H2O.ai License Management Server. Requires above value, `enable_license_manager` set to True. Format: {http/https}://{ip address}:{port number} " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "license_manager_project_name", + "output": "license manager project name refers to Name of license manager project that Driverless AI will attempt to retrieve leases from. NOTE: requires an active license within the License Manager Server to function properly " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "license_manager_project_name", + "output": "license manager project name refers to Name of license manager project that Driverless AI will attempt to retrieve leases from. NOTE: requires an active license within the License Manager Server to function properly " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "license manager project name", + "output": "license manager project name refers to Name of license manager project that Driverless AI will attempt to retrieve leases from. NOTE: requires an active license within the License Manager Server to function properly " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "license manager project name refers to Name of license manager project that Driverless AI will attempt to retrieve leases from. NOTE: requires an active license within the License Manager Server to function properly " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting license_manager_project_name", + "output": "license manager project name refers to Name of license manager project that Driverless AI will attempt to retrieve leases from. NOTE: requires an active license within the License Manager Server to function properly " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting license_manager_project_name", + "output": "license manager project name refers to Name of license manager project that Driverless AI will attempt to retrieve leases from. NOTE: requires an active license within the License Manager Server to function properly " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "license_manager_lease_duration", + "output": "license manager lease duration refers to Number of milliseconds a lease for users will be expected to last, if using the H2O.ai License Manager server, before the lease REQUIRES renewal. Default: 3600000 (1 hour) = 1 hour * 60 min / hour * 60 sec / min * 1000 milliseconds / sec " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "license_manager_lease_duration", + "output": "license manager lease duration refers to Number of milliseconds a lease for users will be expected to last, if using the H2O.ai License Manager server, before the lease REQUIRES renewal. Default: 3600000 (1 hour) = 1 hour * 60 min / hour * 60 sec / min * 1000 milliseconds / sec " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "license manager lease duration", + "output": "license manager lease duration refers to Number of milliseconds a lease for users will be expected to last, if using the H2O.ai License Manager server, before the lease REQUIRES renewal. Default: 3600000 (1 hour) = 1 hour * 60 min / hour * 60 sec / min * 1000 milliseconds / sec " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "license manager lease duration refers to Number of milliseconds a lease for users will be expected to last, if using the H2O.ai License Manager server, before the lease REQUIRES renewal. Default: 3600000 (1 hour) = 1 hour * 60 min / hour * 60 sec / min * 1000 milliseconds / sec " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting license_manager_lease_duration", + "output": "license manager lease duration refers to Number of milliseconds a lease for users will be expected to last, if using the H2O.ai License Manager server, before the lease REQUIRES renewal. Default: 3600000 (1 hour) = 1 hour * 60 min / hour * 60 sec / min * 1000 milliseconds / sec " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting license_manager_lease_duration", + "output": "license manager lease duration refers to Number of milliseconds a lease for users will be expected to last, if using the H2O.ai License Manager server, before the lease REQUIRES renewal. Default: 3600000 (1 hour) = 1 hour * 60 min / hour * 60 sec / min * 1000 milliseconds / sec " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "license_manager_worker_lease_duration", + "output": "license manager worker lease duration refers to Number of milliseconds a lease for Driverless AI worker nodes will be expected to last, if using the H2O.ai License Manager server, before the lease REQUIRES renewal. Default: 21600000 (6 hour) = 6 hour * 60 min / hour * 60 sec / min * 1000 milliseconds / sec " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "license_manager_worker_lease_duration", + "output": "license manager worker lease duration refers to Number of milliseconds a lease for Driverless AI worker nodes will be expected to last, if using the H2O.ai License Manager server, before the lease REQUIRES renewal. Default: 21600000 (6 hour) = 6 hour * 60 min / hour * 60 sec / min * 1000 milliseconds / sec " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "license manager worker lease duration", + "output": "license manager worker lease duration refers to Number of milliseconds a lease for Driverless AI worker nodes will be expected to last, if using the H2O.ai License Manager server, before the lease REQUIRES renewal. Default: 21600000 (6 hour) = 6 hour * 60 min / hour * 60 sec / min * 1000 milliseconds / sec " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "license manager worker lease duration refers to Number of milliseconds a lease for Driverless AI worker nodes will be expected to last, if using the H2O.ai License Manager server, before the lease REQUIRES renewal. Default: 21600000 (6 hour) = 6 hour * 60 min / hour * 60 sec / min * 1000 milliseconds / sec " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting license_manager_worker_lease_duration", + "output": "license manager worker lease duration refers to Number of milliseconds a lease for Driverless AI worker nodes will be expected to last, if using the H2O.ai License Manager server, before the lease REQUIRES renewal. Default: 21600000 (6 hour) = 6 hour * 60 min / hour * 60 sec / min * 1000 milliseconds / sec " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting license_manager_worker_lease_duration", + "output": "license manager worker lease duration refers to Number of milliseconds a lease for Driverless AI worker nodes will be expected to last, if using the H2O.ai License Manager server, before the lease REQUIRES renewal. Default: 21600000 (6 hour) = 6 hour * 60 min / hour * 60 sec / min * 1000 milliseconds / sec " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "license_manager_ssl_certs", + "output": "license manager ssl certs refers to To be used only if License Manager server is started with HTTPS Accepts a boolean: true/false, or a path to a file/directory. Denotates whether or not to attempt SSL Certificate verification when making a request to the License Manager server. True: attempt ssl certificate verification, will fail if certificates are self signed False: skip ssl certificate verification. /path/to/cert/directory: load certificates in directory and use those for certificate verification Behaves in the same manner as python requests package: https://requests.readthedocs.io/en/latest/user/advanced/#ssl-cert-verification " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "license_manager_ssl_certs", + "output": "license manager ssl certs refers to To be used only if License Manager server is started with HTTPS Accepts a boolean: true/false, or a path to a file/directory. Denotates whether or not to attempt SSL Certificate verification when making a request to the License Manager server. True: attempt ssl certificate verification, will fail if certificates are self signed False: skip ssl certificate verification. /path/to/cert/directory: load certificates in directory and use those for certificate verification Behaves in the same manner as python requests package: https://requests.readthedocs.io/en/latest/user/advanced/#ssl-cert-verification " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "license manager ssl certs", + "output": "license manager ssl certs refers to To be used only if License Manager server is started with HTTPS Accepts a boolean: true/false, or a path to a file/directory. Denotates whether or not to attempt SSL Certificate verification when making a request to the License Manager server. True: attempt ssl certificate verification, will fail if certificates are self signed False: skip ssl certificate verification. /path/to/cert/directory: load certificates in directory and use those for certificate verification Behaves in the same manner as python requests package: https://requests.readthedocs.io/en/latest/user/advanced/#ssl-cert-verification " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "license manager ssl certs refers to To be used only if License Manager server is started with HTTPS Accepts a boolean: true/false, or a path to a file/directory. Denotates whether or not to attempt SSL Certificate verification when making a request to the License Manager server. True: attempt ssl certificate verification, will fail if certificates are self signed False: skip ssl certificate verification. /path/to/cert/directory: load certificates in directory and use those for certificate verification Behaves in the same manner as python requests package: https://requests.readthedocs.io/en/latest/user/advanced/#ssl-cert-verification " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting license_manager_ssl_certs", + "output": "license manager ssl certs refers to To be used only if License Manager server is started with HTTPS Accepts a boolean: true/false, or a path to a file/directory. Denotates whether or not to attempt SSL Certificate verification when making a request to the License Manager server. True: attempt ssl certificate verification, will fail if certificates are self signed False: skip ssl certificate verification. /path/to/cert/directory: load certificates in directory and use those for certificate verification Behaves in the same manner as python requests package: https://requests.readthedocs.io/en/latest/user/advanced/#ssl-cert-verification " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting license_manager_ssl_certs", + "output": "license manager ssl certs refers to To be used only if License Manager server is started with HTTPS Accepts a boolean: true/false, or a path to a file/directory. Denotates whether or not to attempt SSL Certificate verification when making a request to the License Manager server. True: attempt ssl certificate verification, will fail if certificates are self signed False: skip ssl certificate verification. /path/to/cert/directory: load certificates in directory and use those for certificate verification Behaves in the same manner as python requests package: https://requests.readthedocs.io/en/latest/user/advanced/#ssl-cert-verification " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "license_manager_worker_startup_timeout", + "output": "license manager worker startup timeout refers to Amount of time that Driverless AI workers will keep retrying to startup and obtain a lease from the license manager before timing out. Time out will cause worker startup to fail. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "license_manager_worker_startup_timeout", + "output": "license manager worker startup timeout refers to Amount of time that Driverless AI workers will keep retrying to startup and obtain a lease from the license manager before timing out. Time out will cause worker startup to fail. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "license manager worker startup timeout", + "output": "license manager worker startup timeout refers to Amount of time that Driverless AI workers will keep retrying to startup and obtain a lease from the license manager before timing out. Time out will cause worker startup to fail. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "license manager worker startup timeout refers to Amount of time that Driverless AI workers will keep retrying to startup and obtain a lease from the license manager before timing out. Time out will cause worker startup to fail. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting license_manager_worker_startup_timeout", + "output": "license manager worker startup timeout refers to Amount of time that Driverless AI workers will keep retrying to startup and obtain a lease from the license manager before timing out. Time out will cause worker startup to fail. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting license_manager_worker_startup_timeout", + "output": "license manager worker startup timeout refers to Amount of time that Driverless AI workers will keep retrying to startup and obtain a lease from the license manager before timing out. Time out will cause worker startup to fail. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "license_manager_dry_run_token", + "output": "license manager dry run token refers to Emergency setting that will allow Driverless AI to run even if there is issues communicating with or obtaining leases from, the License Manager server. This is an encoded string that can be obtained from either the license manager ui or the logs of the license manager server. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "license_manager_dry_run_token", + "output": "license manager dry run token refers to Emergency setting that will allow Driverless AI to run even if there is issues communicating with or obtaining leases from, the License Manager server. This is an encoded string that can be obtained from either the license manager ui or the logs of the license manager server. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "license manager dry run token", + "output": "license manager dry run token refers to Emergency setting that will allow Driverless AI to run even if there is issues communicating with or obtaining leases from, the License Manager server. This is an encoded string that can be obtained from either the license manager ui or the logs of the license manager server. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "license manager dry run token refers to Emergency setting that will allow Driverless AI to run even if there is issues communicating with or obtaining leases from, the License Manager server. This is an encoded string that can be obtained from either the license manager ui or the logs of the license manager server. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting license_manager_dry_run_token", + "output": "license manager dry run token refers to Emergency setting that will allow Driverless AI to run even if there is issues communicating with or obtaining leases from, the License Manager server. This is an encoded string that can be obtained from either the license manager ui or the logs of the license manager server. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting license_manager_dry_run_token", + "output": "license manager dry run token refers to Emergency setting that will allow Driverless AI to run even if there is issues communicating with or obtaining leases from, the License Manager server. This is an encoded string that can be obtained from either the license manager ui or the logs of the license manager server. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mli_lime_method", + "output": "mli lime method refers to Choose LIME method to be used for creation of surrogate models." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mli_lime_method", + "output": "mli lime method refers to LIME method: Choose LIME method to be used for creation of surrogate models." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mli lime method", + "output": "mli lime method refers to LIME method: Choose LIME method to be used for creation of surrogate models." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "LIME method: ", + "output": "mli lime method refers to LIME method: Choose LIME method to be used for creation of surrogate models." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting mli_lime_method", + "output": "mli lime method refers to Choose LIME method to be used for creation of surrogate models." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting mli_lime_method", + "output": "mli lime method refers to LIME method: Choose LIME method to be used for creation of surrogate models." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mli_use_raw_features", + "output": "mli use raw features refers to Choose whether surrogate models should be built for original or transformed features." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mli_use_raw_features", + "output": "mli use raw features refers to Use original features for surrogate models: Choose whether surrogate models should be built for original or transformed features." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mli use raw features", + "output": "mli use raw features refers to Use original features for surrogate models: Choose whether surrogate models should be built for original or transformed features." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Use original features for surrogate models: ", + "output": "mli use raw features refers to Use original features for surrogate models: Choose whether surrogate models should be built for original or transformed features." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting mli_use_raw_features", + "output": "mli use raw features refers to Choose whether surrogate models should be built for original or transformed features." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting mli_use_raw_features", + "output": "mli use raw features refers to Use original features for surrogate models: Choose whether surrogate models should be built for original or transformed features." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mli_ts_use_raw_features", + "output": "mli ts use raw features refers to Choose whether time series based surrogate models should be built for original features." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mli_ts_use_raw_features", + "output": "mli ts use raw features refers to Use original features for time series based surrogate models: Choose whether time series based surrogate models should be built for original features." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mli ts use raw features", + "output": "mli ts use raw features refers to Use original features for time series based surrogate models: Choose whether time series based surrogate models should be built for original features." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Use original features for time series based surrogate models: ", + "output": "mli ts use raw features refers to Use original features for time series based surrogate models: Choose whether time series based surrogate models should be built for original features." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting mli_ts_use_raw_features", + "output": "mli ts use raw features refers to Choose whether time series based surrogate models should be built for original features." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting mli_ts_use_raw_features", + "output": "mli ts use raw features refers to Use original features for time series based surrogate models: Choose whether time series based surrogate models should be built for original features." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mli_sample", + "output": "mli sample refers to Choose whether to run all explainers on the sampled dataset." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mli_sample", + "output": "mli sample refers to Sample all explainers: Choose whether to run all explainers on the sampled dataset." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mli sample", + "output": "mli sample refers to Sample all explainers: Choose whether to run all explainers on the sampled dataset." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Sample all explainers: ", + "output": "mli sample refers to Sample all explainers: Choose whether to run all explainers on the sampled dataset." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting mli_sample", + "output": "mli sample refers to Choose whether to run all explainers on the sampled dataset." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting mli_sample", + "output": "mli sample refers to Sample all explainers: Choose whether to run all explainers on the sampled dataset." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mli_vars_to_pdp", + "output": "mli vars to pdp refers to Set maximum number of features for which to build Surrogate Partial Dependence Plot. Use -1 to calculate Surrogate Partial Dependence Plot for all features." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mli_vars_to_pdp", + "output": "mli vars to pdp refers to Number of features for Surrogate Partial Dependence Plot. Set to -1 to use all features.: Set maximum number of features for which to build Surrogate Partial Dependence Plot. Use -1 to calculate Surrogate Partial Dependence Plot for all features." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mli vars to pdp", + "output": "mli vars to pdp refers to Number of features for Surrogate Partial Dependence Plot. Set to -1 to use all features.: Set maximum number of features for which to build Surrogate Partial Dependence Plot. Use -1 to calculate Surrogate Partial Dependence Plot for all features." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Number of features for Surrogate Partial Dependence Plot. Set to -1 to use all features.: ", + "output": "mli vars to pdp refers to Number of features for Surrogate Partial Dependence Plot. Set to -1 to use all features.: Set maximum number of features for which to build Surrogate Partial Dependence Plot. Use -1 to calculate Surrogate Partial Dependence Plot for all features." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting mli_vars_to_pdp", + "output": "mli vars to pdp refers to Set maximum number of features for which to build Surrogate Partial Dependence Plot. Use -1 to calculate Surrogate Partial Dependence Plot for all features." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting mli_vars_to_pdp", + "output": "mli vars to pdp refers to Number of features for Surrogate Partial Dependence Plot. Set to -1 to use all features.: Set maximum number of features for which to build Surrogate Partial Dependence Plot. Use -1 to calculate Surrogate Partial Dependence Plot for all features." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mli_nfolds", + "output": "mli nfolds refers to Set the number of cross-validation folds for surrogate models." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mli_nfolds", + "output": "mli nfolds refers to Cross-validation folds for surrogate models: Set the number of cross-validation folds for surrogate models." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mli nfolds", + "output": "mli nfolds refers to Cross-validation folds for surrogate models: Set the number of cross-validation folds for surrogate models." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Cross-validation folds for surrogate models: ", + "output": "mli nfolds refers to Cross-validation folds for surrogate models: Set the number of cross-validation folds for surrogate models." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting mli_nfolds", + "output": "mli nfolds refers to Set the number of cross-validation folds for surrogate models." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting mli_nfolds", + "output": "mli nfolds refers to Cross-validation folds for surrogate models: Set the number of cross-validation folds for surrogate models." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mli_qbin_count", + "output": "mli qbin count refers to Set the number of columns to bin in case of quantile binning." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mli_qbin_count", + "output": "mli qbin count refers to Number of columns to bin for surrogate models: Set the number of columns to bin in case of quantile binning." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mli qbin count", + "output": "mli qbin count refers to Number of columns to bin for surrogate models: Set the number of columns to bin in case of quantile binning." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Number of columns to bin for surrogate models: ", + "output": "mli qbin count refers to Number of columns to bin for surrogate models: Set the number of columns to bin in case of quantile binning." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting mli_qbin_count", + "output": "mli qbin count refers to Set the number of columns to bin in case of quantile binning." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting mli_qbin_count", + "output": "mli qbin count refers to Number of columns to bin for surrogate models: Set the number of columns to bin in case of quantile binning." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "h2o_mli_nthreads", + "output": "h2o mli nthreads refers to Number of threads for H2O instance for use by MLI." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "h2o_mli_nthreads", + "output": "h2o mli nthreads refers to Number of threads for H2O instance for use by MLI." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "h2o mli nthreads", + "output": "h2o mli nthreads refers to Number of threads for H2O instance for use by MLI." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "h2o mli nthreads refers to Number of threads for H2O instance for use by MLI." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting h2o_mli_nthreads", + "output": "h2o mli nthreads refers to Number of threads for H2O instance for use by MLI." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting h2o_mli_nthreads", + "output": "h2o mli nthreads refers to Number of threads for H2O instance for use by MLI." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mli_enable_mojo_scorer", + "output": "mli enable mojo scorer refers to Use this option to disable MOJO scoring pipeline. Scoring pipeline is chosen automatically (from MOJO and Python pipelines) by default. In case of certain models MOJO vs. Python choice can impact pipeline performance and robustness." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mli_enable_mojo_scorer", + "output": "mli enable mojo scorer refers to Allow use of MOJO scoring pipeline: Use this option to disable MOJO scoring pipeline. Scoring pipeline is chosen automatically (from MOJO and Python pipelines) by default. In case of certain models MOJO vs. Python choice can impact pipeline performance and robustness." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mli enable mojo scorer", + "output": "mli enable mojo scorer refers to Allow use of MOJO scoring pipeline: Use this option to disable MOJO scoring pipeline. Scoring pipeline is chosen automatically (from MOJO and Python pipelines) by default. In case of certain models MOJO vs. Python choice can impact pipeline performance and robustness." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Allow use of MOJO scoring pipeline: ", + "output": "mli enable mojo scorer refers to Allow use of MOJO scoring pipeline: Use this option to disable MOJO scoring pipeline. Scoring pipeline is chosen automatically (from MOJO and Python pipelines) by default. In case of certain models MOJO vs. Python choice can impact pipeline performance and robustness." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting mli_enable_mojo_scorer", + "output": "mli enable mojo scorer refers to Use this option to disable MOJO scoring pipeline. Scoring pipeline is chosen automatically (from MOJO and Python pipelines) by default. In case of certain models MOJO vs. Python choice can impact pipeline performance and robustness." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting mli_enable_mojo_scorer", + "output": "mli enable mojo scorer refers to Allow use of MOJO scoring pipeline: Use this option to disable MOJO scoring pipeline. Scoring pipeline is chosen automatically (from MOJO and Python pipelines) by default. In case of certain models MOJO vs. Python choice can impact pipeline performance and robustness." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mli_sample_above_for_scoring", + "output": "mli sample above for scoring refers to When number of rows are above this limit sample for MLI for scoring UI data." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mli_sample_above_for_scoring", + "output": "mli sample above for scoring refers to When number of rows are above this limit sample for MLI for scoring UI data." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mli sample above for scoring", + "output": "mli sample above for scoring refers to When number of rows are above this limit sample for MLI for scoring UI data." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "mli sample above for scoring refers to When number of rows are above this limit sample for MLI for scoring UI data." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting mli_sample_above_for_scoring", + "output": "mli sample above for scoring refers to When number of rows are above this limit sample for MLI for scoring UI data." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting mli_sample_above_for_scoring", + "output": "mli sample above for scoring refers to When number of rows are above this limit sample for MLI for scoring UI data." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mli_sample_above_for_training", + "output": "mli sample above for training refers to When number of rows are above this limit sample for MLI for training surrogate models." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mli_sample_above_for_training", + "output": "mli sample above for training refers to When number of rows are above this limit sample for MLI for training surrogate models." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mli sample above for training", + "output": "mli sample above for training refers to When number of rows are above this limit sample for MLI for training surrogate models." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "mli sample above for training refers to When number of rows are above this limit sample for MLI for training surrogate models." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting mli_sample_above_for_training", + "output": "mli sample above for training refers to When number of rows are above this limit sample for MLI for training surrogate models." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting mli_sample_above_for_training", + "output": "mli sample above for training refers to When number of rows are above this limit sample for MLI for training surrogate models." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mli_sample_size", + "output": "mli sample size refers to The sample size, number of rows, used for MLI surrogate models." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mli_sample_size", + "output": "mli sample size refers to Sample size for surrogate models: The sample size, number of rows, used for MLI surrogate models." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mli sample size", + "output": "mli sample size refers to Sample size for surrogate models: The sample size, number of rows, used for MLI surrogate models." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Sample size for surrogate models: ", + "output": "mli sample size refers to Sample size for surrogate models: The sample size, number of rows, used for MLI surrogate models." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting mli_sample_size", + "output": "mli sample size refers to The sample size, number of rows, used for MLI surrogate models." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting mli_sample_size", + "output": "mli sample size refers to Sample size for surrogate models: The sample size, number of rows, used for MLI surrogate models." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mli_num_quantiles", + "output": "mli num quantiles refers to Number of bins for quantile binning." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mli_num_quantiles", + "output": "mli num quantiles refers to Number of bins for quantile binning: Number of bins for quantile binning." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mli num quantiles", + "output": "mli num quantiles refers to Number of bins for quantile binning: Number of bins for quantile binning." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Number of bins for quantile binning: ", + "output": "mli num quantiles refers to Number of bins for quantile binning: Number of bins for quantile binning." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting mli_num_quantiles", + "output": "mli num quantiles refers to Number of bins for quantile binning." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting mli_num_quantiles", + "output": "mli num quantiles refers to Number of bins for quantile binning: Number of bins for quantile binning." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mli_drf_num_trees", + "output": "mli drf num trees refers to Number of trees for Random Forest surrogate model." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mli_drf_num_trees", + "output": "mli drf num trees refers to Number of trees for Random Forest surrogate model: Number of trees for Random Forest surrogate model." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mli drf num trees", + "output": "mli drf num trees refers to Number of trees for Random Forest surrogate model: Number of trees for Random Forest surrogate model." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Number of trees for Random Forest surrogate model: ", + "output": "mli drf num trees refers to Number of trees for Random Forest surrogate model: Number of trees for Random Forest surrogate model." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting mli_drf_num_trees", + "output": "mli drf num trees refers to Number of trees for Random Forest surrogate model." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting mli_drf_num_trees", + "output": "mli drf num trees refers to Number of trees for Random Forest surrogate model: Number of trees for Random Forest surrogate model." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mli_fast_approx", + "output": "mli fast approx refers to Speed up predictions with a fast approximation (can reduce the number of trees or cross-validation folds)." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mli_fast_approx", + "output": "mli fast approx refers to Speed up predictions with a fast approximation: Speed up predictions with a fast approximation (can reduce the number of trees or cross-validation folds)." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mli fast approx", + "output": "mli fast approx refers to Speed up predictions with a fast approximation: Speed up predictions with a fast approximation (can reduce the number of trees or cross-validation folds)." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Speed up predictions with a fast approximation: ", + "output": "mli fast approx refers to Speed up predictions with a fast approximation: Speed up predictions with a fast approximation (can reduce the number of trees or cross-validation folds)." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting mli_fast_approx", + "output": "mli fast approx refers to Speed up predictions with a fast approximation (can reduce the number of trees or cross-validation folds)." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting mli_fast_approx", + "output": "mli fast approx refers to Speed up predictions with a fast approximation: Speed up predictions with a fast approximation (can reduce the number of trees or cross-validation folds)." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mli_interpreter_status_cache_size", + "output": "mli interpreter status cache size refers to Maximum number of interpreters status cache entries." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mli_interpreter_status_cache_size", + "output": "mli interpreter status cache size refers to Maximum number of interpreters status cache entries." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mli interpreter status cache size", + "output": "mli interpreter status cache size refers to Maximum number of interpreters status cache entries." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "mli interpreter status cache size refers to Maximum number of interpreters status cache entries." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting mli_interpreter_status_cache_size", + "output": "mli interpreter status cache size refers to Maximum number of interpreters status cache entries." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting mli_interpreter_status_cache_size", + "output": "mli interpreter status cache size refers to Maximum number of interpreters status cache entries." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mli_drf_max_depth", + "output": "mli drf max depth refers to Max depth for Random Forest surrogate model." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mli_drf_max_depth", + "output": "mli drf max depth refers to Max depth for Random Forest surrogate model: Max depth for Random Forest surrogate model." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mli drf max depth", + "output": "mli drf max depth refers to Max depth for Random Forest surrogate model: Max depth for Random Forest surrogate model." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Max depth for Random Forest surrogate model: ", + "output": "mli drf max depth refers to Max depth for Random Forest surrogate model: Max depth for Random Forest surrogate model." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting mli_drf_max_depth", + "output": "mli drf max depth refers to Max depth for Random Forest surrogate model." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting mli_drf_max_depth", + "output": "mli drf max depth refers to Max depth for Random Forest surrogate model: Max depth for Random Forest surrogate model." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mli_sample_training", + "output": "mli sample training refers to not only sample training, but also sample scoring." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mli_sample_training", + "output": "mli sample training refers to not only sample training, but also sample scoring." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mli sample training", + "output": "mli sample training refers to not only sample training, but also sample scoring." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "mli sample training refers to not only sample training, but also sample scoring." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting mli_sample_training", + "output": "mli sample training refers to not only sample training, but also sample scoring." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting mli_sample_training", + "output": "mli sample training refers to not only sample training, but also sample scoring." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "klime_lambda", + "output": "klime lambda refers to Regularization strength for k-LIME GLM's." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "klime_lambda", + "output": "klime lambda refers to Regularization strength for k-LIME GLM's: Regularization strength for k-LIME GLM's." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "klime lambda", + "output": "klime lambda refers to Regularization strength for k-LIME GLM's: Regularization strength for k-LIME GLM's." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Regularization strength for k-LIME GLM's: ", + "output": "klime lambda refers to Regularization strength for k-LIME GLM's: Regularization strength for k-LIME GLM's." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting klime_lambda", + "output": "klime lambda refers to Regularization strength for k-LIME GLM's." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting klime_lambda", + "output": "klime lambda refers to Regularization strength for k-LIME GLM's: Regularization strength for k-LIME GLM's." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "klime_alpha", + "output": "klime alpha refers to Regularization distribution between L1 and L2 for k-LIME GLM's." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "klime_alpha", + "output": "klime alpha refers to Regularization distribution between L1 and L2 for k-LIME GLM's: Regularization distribution between L1 and L2 for k-LIME GLM's." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "klime alpha", + "output": "klime alpha refers to Regularization distribution between L1 and L2 for k-LIME GLM's: Regularization distribution between L1 and L2 for k-LIME GLM's." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Regularization distribution between L1 and L2 for k-LIME GLM's: ", + "output": "klime alpha refers to Regularization distribution between L1 and L2 for k-LIME GLM's: Regularization distribution between L1 and L2 for k-LIME GLM's." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting klime_alpha", + "output": "klime alpha refers to Regularization distribution between L1 and L2 for k-LIME GLM's." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting klime_alpha", + "output": "klime alpha refers to Regularization distribution between L1 and L2 for k-LIME GLM's: Regularization distribution between L1 and L2 for k-LIME GLM's." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mli_max_numeric_enum_cardinality", + "output": "mli max numeric enum cardinality refers to Max cardinality for numeric variables in surrogate models to be considered categorical." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mli_max_numeric_enum_cardinality", + "output": "mli max numeric enum cardinality refers to Max cardinality for numeric variables in surrogate models to be considered categorical: Max cardinality for numeric variables in surrogate models to be considered categorical." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mli max numeric enum cardinality", + "output": "mli max numeric enum cardinality refers to Max cardinality for numeric variables in surrogate models to be considered categorical: Max cardinality for numeric variables in surrogate models to be considered categorical." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Max cardinality for numeric variables in surrogate models to be considered categorical: ", + "output": "mli max numeric enum cardinality refers to Max cardinality for numeric variables in surrogate models to be considered categorical: Max cardinality for numeric variables in surrogate models to be considered categorical." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting mli_max_numeric_enum_cardinality", + "output": "mli max numeric enum cardinality refers to Max cardinality for numeric variables in surrogate models to be considered categorical." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting mli_max_numeric_enum_cardinality", + "output": "mli max numeric enum cardinality refers to Max cardinality for numeric variables in surrogate models to be considered categorical: Max cardinality for numeric variables in surrogate models to be considered categorical." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mli_max_number_cluster_vars", + "output": "mli max number cluster vars refers to Maximum number of features allowed for k-LIME k-means clustering." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mli_max_number_cluster_vars", + "output": "mli max number cluster vars refers to Maximum number of features allowed for k-LIME k-means clustering: Maximum number of features allowed for k-LIME k-means clustering." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mli max number cluster vars", + "output": "mli max number cluster vars refers to Maximum number of features allowed for k-LIME k-means clustering: Maximum number of features allowed for k-LIME k-means clustering." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Maximum number of features allowed for k-LIME k-means clustering: ", + "output": "mli max number cluster vars refers to Maximum number of features allowed for k-LIME k-means clustering: Maximum number of features allowed for k-LIME k-means clustering." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting mli_max_number_cluster_vars", + "output": "mli max number cluster vars refers to Maximum number of features allowed for k-LIME k-means clustering." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting mli_max_number_cluster_vars", + "output": "mli max number cluster vars refers to Maximum number of features allowed for k-LIME k-means clustering: Maximum number of features allowed for k-LIME k-means clustering." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "use_all_columns_klime_kmeans", + "output": "use all columns klime kmeans refers to Use all columns for k-LIME k-means clustering (this will override `mli_max_number_cluster_vars` if set to `True`)." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "use_all_columns_klime_kmeans", + "output": "use all columns klime kmeans refers to Use all columns for k-LIME k-means clustering (this will override `mli_max_number_cluster_vars` if set to `True`): Use all columns for k-LIME k-means clustering (this will override `mli_max_number_cluster_vars` if set to `True`)." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "use all columns klime kmeans", + "output": "use all columns klime kmeans refers to Use all columns for k-LIME k-means clustering (this will override `mli_max_number_cluster_vars` if set to `True`): Use all columns for k-LIME k-means clustering (this will override `mli_max_number_cluster_vars` if set to `True`)." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Use all columns for k-LIME k-means clustering (this will override `mli_max_number_cluster_vars` if set to `True`): ", + "output": "use all columns klime kmeans refers to Use all columns for k-LIME k-means clustering (this will override `mli_max_number_cluster_vars` if set to `True`): Use all columns for k-LIME k-means clustering (this will override `mli_max_number_cluster_vars` if set to `True`)." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting use_all_columns_klime_kmeans", + "output": "use all columns klime kmeans refers to Use all columns for k-LIME k-means clustering (this will override `mli_max_number_cluster_vars` if set to `True`)." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting use_all_columns_klime_kmeans", + "output": "use all columns klime kmeans refers to Use all columns for k-LIME k-means clustering (this will override `mli_max_number_cluster_vars` if set to `True`): Use all columns for k-LIME k-means clustering (this will override `mli_max_number_cluster_vars` if set to `True`)." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mli_strict_version_check", + "output": "mli strict version check refers to Strict version check for MLI" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mli_strict_version_check", + "output": "mli strict version check refers to Strict version check for MLI" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mli strict version check", + "output": "mli strict version check refers to Strict version check for MLI" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "mli strict version check refers to Strict version check for MLI" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting mli_strict_version_check", + "output": "mli strict version check refers to Strict version check for MLI" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting mli_strict_version_check", + "output": "mli strict version check refers to Strict version check for MLI" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mli_cloud_name", + "output": "mli cloud name refers to MLI cloud name" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mli_cloud_name", + "output": "mli cloud name refers to MLI cloud name" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mli cloud name", + "output": "mli cloud name refers to MLI cloud name" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "mli cloud name refers to MLI cloud name" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting mli_cloud_name", + "output": "mli cloud name refers to MLI cloud name" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting mli_cloud_name", + "output": "mli cloud name refers to MLI cloud name" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mli_ice_per_bin_strategy", + "output": "mli ice per bin strategy refers to Compute original model ICE using per feature's bin predictions (true) or use \"one frame\" strategy (false)." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mli_ice_per_bin_strategy", + "output": "mli ice per bin strategy refers to Compute original model ICE using per feature's bin predictions (true) or use \"one frame\" strategy (false)." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mli ice per bin strategy", + "output": "mli ice per bin strategy refers to Compute original model ICE using per feature's bin predictions (true) or use \"one frame\" strategy (false)." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "mli ice per bin strategy refers to Compute original model ICE using per feature's bin predictions (true) or use \"one frame\" strategy (false)." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting mli_ice_per_bin_strategy", + "output": "mli ice per bin strategy refers to Compute original model ICE using per feature's bin predictions (true) or use \"one frame\" strategy (false)." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting mli_ice_per_bin_strategy", + "output": "mli ice per bin strategy refers to Compute original model ICE using per feature's bin predictions (true) or use \"one frame\" strategy (false)." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mli_dia_default_max_cardinality", + "output": "mli dia default max cardinality refers to By default DIA will run for categorical columns with cardinality <= mli_dia_default_max_cardinality." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mli_dia_default_max_cardinality", + "output": "mli dia default max cardinality refers to By default DIA will run for categorical columns with cardinality <= mli_dia_default_max_cardinality." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mli dia default max cardinality", + "output": "mli dia default max cardinality refers to By default DIA will run for categorical columns with cardinality <= mli_dia_default_max_cardinality." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "mli dia default max cardinality refers to By default DIA will run for categorical columns with cardinality <= mli_dia_default_max_cardinality." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting mli_dia_default_max_cardinality", + "output": "mli dia default max cardinality refers to By default DIA will run for categorical columns with cardinality <= mli_dia_default_max_cardinality." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting mli_dia_default_max_cardinality", + "output": "mli dia default max cardinality refers to By default DIA will run for categorical columns with cardinality <= mli_dia_default_max_cardinality." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mli_dia_default_min_cardinality", + "output": "mli dia default min cardinality refers to By default DIA will run for categorical columns with cardinality >= mli_dia_default_min_cardinality." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mli_dia_default_min_cardinality", + "output": "mli dia default min cardinality refers to By default DIA will run for categorical columns with cardinality >= mli_dia_default_min_cardinality." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mli dia default min cardinality", + "output": "mli dia default min cardinality refers to By default DIA will run for categorical columns with cardinality >= mli_dia_default_min_cardinality." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "mli dia default min cardinality refers to By default DIA will run for categorical columns with cardinality >= mli_dia_default_min_cardinality." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting mli_dia_default_min_cardinality", + "output": "mli dia default min cardinality refers to By default DIA will run for categorical columns with cardinality >= mli_dia_default_min_cardinality." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting mli_dia_default_min_cardinality", + "output": "mli dia default min cardinality refers to By default DIA will run for categorical columns with cardinality >= mli_dia_default_min_cardinality." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mli_shapley_sample_size", + "output": "mli shapley sample size refers to When number of rows are above this limit, then sample for MLI transformed Shapley calculation." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mli_shapley_sample_size", + "output": "mli shapley sample size refers to Sample size for transformed Shapley: When number of rows are above this limit, then sample for MLI transformed Shapley calculation." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mli shapley sample size", + "output": "mli shapley sample size refers to Sample size for transformed Shapley: When number of rows are above this limit, then sample for MLI transformed Shapley calculation." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Sample size for transformed Shapley: ", + "output": "mli shapley sample size refers to Sample size for transformed Shapley: When number of rows are above this limit, then sample for MLI transformed Shapley calculation." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting mli_shapley_sample_size", + "output": "mli shapley sample size refers to When number of rows are above this limit, then sample for MLI transformed Shapley calculation." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting mli_shapley_sample_size", + "output": "mli shapley sample size refers to Sample size for transformed Shapley: When number of rows are above this limit, then sample for MLI transformed Shapley calculation." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable_mli_keeper", + "output": "enable mli keeper refers to Enable MLI keeper which ensures efficient use of filesystem/memory/DB by MLI." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable_mli_keeper", + "output": "enable mli keeper refers to Enable MLI keeper which ensures efficient use of filesystem/memory/DB by MLI." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable mli keeper", + "output": "enable mli keeper refers to Enable MLI keeper which ensures efficient use of filesystem/memory/DB by MLI." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "enable mli keeper refers to Enable MLI keeper which ensures efficient use of filesystem/memory/DB by MLI." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting enable_mli_keeper", + "output": "enable mli keeper refers to Enable MLI keeper which ensures efficient use of filesystem/memory/DB by MLI." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting enable_mli_keeper", + "output": "enable mli keeper refers to Enable MLI keeper which ensures efficient use of filesystem/memory/DB by MLI." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable_mli_sa", + "output": "enable mli sa refers to Enable MLI Sensitivity Analysis" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable_mli_sa", + "output": "enable mli sa refers to Enable MLI Sensitivity Analysis" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable mli sa", + "output": "enable mli sa refers to Enable MLI Sensitivity Analysis" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "enable mli sa refers to Enable MLI Sensitivity Analysis" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting enable_mli_sa", + "output": "enable mli sa refers to Enable MLI Sensitivity Analysis" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting enable_mli_sa", + "output": "enable mli sa refers to Enable MLI Sensitivity Analysis" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable_mli_priority_queues", + "output": "enable mli priority queues refers to Enable priority queues based explainers execution. Priority queues restrict available system resources and prevent system over-utilization. Interpretation execution time might be (significantly) slower." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable_mli_priority_queues", + "output": "enable mli priority queues refers to Enable priority queues based explainers execution. Priority queues restrict available system resources and prevent system over-utilization. Interpretation execution time might be (significantly) slower." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable mli priority queues", + "output": "enable mli priority queues refers to Enable priority queues based explainers execution. Priority queues restrict available system resources and prevent system over-utilization. Interpretation execution time might be (significantly) slower." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "enable mli priority queues refers to Enable priority queues based explainers execution. Priority queues restrict available system resources and prevent system over-utilization. Interpretation execution time might be (significantly) slower." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting enable_mli_priority_queues", + "output": "enable mli priority queues refers to Enable priority queues based explainers execution. Priority queues restrict available system resources and prevent system over-utilization. Interpretation execution time might be (significantly) slower." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting enable_mli_priority_queues", + "output": "enable mli priority queues refers to Enable priority queues based explainers execution. Priority queues restrict available system resources and prevent system over-utilization. Interpretation execution time might be (significantly) slower." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mli_sequential_task_execution", + "output": "mli sequential task execution refers to Explainers are run sequentially by default. This option can be used to run all explainers in parallel which can - depending on hardware strength and the number of explainers - decrease interpretation duration. Consider explainer dependencies, random explainers order and hardware over utilization." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mli_sequential_task_execution", + "output": "mli sequential task execution refers to Explainers are run sequentially by default. This option can be used to run all explainers in parallel which can - depending on hardware strength and the number of explainers - decrease interpretation duration. Consider explainer dependencies, random explainers order and hardware over utilization." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mli sequential task execution", + "output": "mli sequential task execution refers to Explainers are run sequentially by default. This option can be used to run all explainers in parallel which can - depending on hardware strength and the number of explainers - decrease interpretation duration. Consider explainer dependencies, random explainers order and hardware over utilization." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "mli sequential task execution refers to Explainers are run sequentially by default. This option can be used to run all explainers in parallel which can - depending on hardware strength and the number of explainers - decrease interpretation duration. Consider explainer dependencies, random explainers order and hardware over utilization." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting mli_sequential_task_execution", + "output": "mli sequential task execution refers to Explainers are run sequentially by default. This option can be used to run all explainers in parallel which can - depending on hardware strength and the number of explainers - decrease interpretation duration. Consider explainer dependencies, random explainers order and hardware over utilization." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting mli_sequential_task_execution", + "output": "mli sequential task execution refers to Explainers are run sequentially by default. This option can be used to run all explainers in parallel which can - depending on hardware strength and the number of explainers - decrease interpretation duration. Consider explainer dependencies, random explainers order and hardware over utilization." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mli_dia_sample_size", + "output": "mli dia sample size refers to When number of rows are above this limit, then sample for Disparate Impact Analysis." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mli_dia_sample_size", + "output": "mli dia sample size refers to Sample size for Disparate Impact Analysis: When number of rows are above this limit, then sample for Disparate Impact Analysis." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mli dia sample size", + "output": "mli dia sample size refers to Sample size for Disparate Impact Analysis: When number of rows are above this limit, then sample for Disparate Impact Analysis." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Sample size for Disparate Impact Analysis: ", + "output": "mli dia sample size refers to Sample size for Disparate Impact Analysis: When number of rows are above this limit, then sample for Disparate Impact Analysis." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting mli_dia_sample_size", + "output": "mli dia sample size refers to When number of rows are above this limit, then sample for Disparate Impact Analysis." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting mli_dia_sample_size", + "output": "mli dia sample size refers to Sample size for Disparate Impact Analysis: When number of rows are above this limit, then sample for Disparate Impact Analysis." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mli_pd_sample_size", + "output": "mli pd sample size refers to When number of rows are above this limit, then sample for Partial Dependence Plot." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mli_pd_sample_size", + "output": "mli pd sample size refers to Sample size for Partial Dependence Plot: When number of rows are above this limit, then sample for Partial Dependence Plot." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mli pd sample size", + "output": "mli pd sample size refers to Sample size for Partial Dependence Plot: When number of rows are above this limit, then sample for Partial Dependence Plot." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Sample size for Partial Dependence Plot: ", + "output": "mli pd sample size refers to Sample size for Partial Dependence Plot: When number of rows are above this limit, then sample for Partial Dependence Plot." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting mli_pd_sample_size", + "output": "mli pd sample size refers to When number of rows are above this limit, then sample for Partial Dependence Plot." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting mli_pd_sample_size", + "output": "mli pd sample size refers to Sample size for Partial Dependence Plot: When number of rows are above this limit, then sample for Partial Dependence Plot." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mli_pd_numcat_num_chart", + "output": "mli pd numcat num chart refers to Use dynamic switching between Partial Dependence Plot numeric and categorical binning and UI chart selection in case of features which were used both as numeric and categorical by experiment." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mli_pd_numcat_num_chart", + "output": "mli pd numcat num chart refers to Unique feature values count driven Partial Dependence Plot binning and chart selection: Use dynamic switching between Partial Dependence Plot numeric and categorical binning and UI chart selection in case of features which were used both as numeric and categorical by experiment." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mli pd numcat num chart", + "output": "mli pd numcat num chart refers to Unique feature values count driven Partial Dependence Plot binning and chart selection: Use dynamic switching between Partial Dependence Plot numeric and categorical binning and UI chart selection in case of features which were used both as numeric and categorical by experiment." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Unique feature values count driven Partial Dependence Plot binning and chart selection: ", + "output": "mli pd numcat num chart refers to Unique feature values count driven Partial Dependence Plot binning and chart selection: Use dynamic switching between Partial Dependence Plot numeric and categorical binning and UI chart selection in case of features which were used both as numeric and categorical by experiment." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting mli_pd_numcat_num_chart", + "output": "mli pd numcat num chart refers to Use dynamic switching between Partial Dependence Plot numeric and categorical binning and UI chart selection in case of features which were used both as numeric and categorical by experiment." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting mli_pd_numcat_num_chart", + "output": "mli pd numcat num chart refers to Unique feature values count driven Partial Dependence Plot binning and chart selection: Use dynamic switching between Partial Dependence Plot numeric and categorical binning and UI chart selection in case of features which were used both as numeric and categorical by experiment." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mli_pd_numcat_threshold", + "output": "mli pd numcat threshold refers to If 'mli_pd_numcat_num_chart' is enabled, then use numeric binning and chart if feature unique values count is bigger than threshold, else use categorical binning and chart." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mli_pd_numcat_threshold", + "output": "mli pd numcat threshold refers to Threshold for Partial Dependence Plot binning and chart selection (<=threshold categorical, >threshold numeric): If 'mli_pd_numcat_num_chart' is enabled, then use numeric binning and chart if feature unique values count is bigger than threshold, else use categorical binning and chart." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mli pd numcat threshold", + "output": "mli pd numcat threshold refers to Threshold for Partial Dependence Plot binning and chart selection (<=threshold categorical, >threshold numeric): If 'mli_pd_numcat_num_chart' is enabled, then use numeric binning and chart if feature unique values count is bigger than threshold, else use categorical binning and chart." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Threshold for Partial Dependence Plot binning and chart selection (<=threshold categorical, >threshold numeric): ", + "output": "mli pd numcat threshold refers to Threshold for Partial Dependence Plot binning and chart selection (<=threshold categorical, >threshold numeric): If 'mli_pd_numcat_num_chart' is enabled, then use numeric binning and chart if feature unique values count is bigger than threshold, else use categorical binning and chart." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting mli_pd_numcat_threshold", + "output": "mli pd numcat threshold refers to If 'mli_pd_numcat_num_chart' is enabled, then use numeric binning and chart if feature unique values count is bigger than threshold, else use categorical binning and chart." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting mli_pd_numcat_threshold", + "output": "mli pd numcat threshold refers to Threshold for Partial Dependence Plot binning and chart selection (<=threshold categorical, >threshold numeric): If 'mli_pd_numcat_num_chart' is enabled, then use numeric binning and chart if feature unique values count is bigger than threshold, else use categorical binning and chart." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "new_mli_list_only_explainable_datasets", + "output": "new mli list only explainable datasets refers to In New Interpretation screen show only datasets which can be used to explain a selected model. This can slow down the server significantly." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "new_mli_list_only_explainable_datasets", + "output": "new mli list only explainable datasets refers to In New Interpretation screen show only datasets which can be used to explain a selected model. This can slow down the server significantly." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "new mli list only explainable datasets", + "output": "new mli list only explainable datasets refers to In New Interpretation screen show only datasets which can be used to explain a selected model. This can slow down the server significantly." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "new mli list only explainable datasets refers to In New Interpretation screen show only datasets which can be used to explain a selected model. This can slow down the server significantly." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting new_mli_list_only_explainable_datasets", + "output": "new mli list only explainable datasets refers to In New Interpretation screen show only datasets which can be used to explain a selected model. This can slow down the server significantly." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting new_mli_list_only_explainable_datasets", + "output": "new mli list only explainable datasets refers to In New Interpretation screen show only datasets which can be used to explain a selected model. This can slow down the server significantly." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable_mli_async_api", + "output": "enable mli async api refers to Enable async/await-based non-blocking MLI API" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable_mli_async_api", + "output": "enable mli async api refers to Enable async/await-based non-blocking MLI API" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable mli async api", + "output": "enable mli async api refers to Enable async/await-based non-blocking MLI API" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "enable mli async api refers to Enable async/await-based non-blocking MLI API" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting enable_mli_async_api", + "output": "enable mli async api refers to Enable async/await-based non-blocking MLI API" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting enable_mli_async_api", + "output": "enable mli async api refers to Enable async/await-based non-blocking MLI API" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable_mli_sa_main_chart_aggregator", + "output": "enable mli sa main chart aggregator refers to Enable main chart aggregator in Sensitivity Analysis" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable_mli_sa_main_chart_aggregator", + "output": "enable mli sa main chart aggregator refers to Enable main chart aggregator in Sensitivity Analysis" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable mli sa main chart aggregator", + "output": "enable mli sa main chart aggregator refers to Enable main chart aggregator in Sensitivity Analysis" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "enable mli sa main chart aggregator refers to Enable main chart aggregator in Sensitivity Analysis" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting enable_mli_sa_main_chart_aggregator", + "output": "enable mli sa main chart aggregator refers to Enable main chart aggregator in Sensitivity Analysis" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting enable_mli_sa_main_chart_aggregator", + "output": "enable mli sa main chart aggregator refers to Enable main chart aggregator in Sensitivity Analysis" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mli_sa_sampling_limit", + "output": "mli sa sampling limit refers to When to sample for Sensitivity Analysis (number of rows after sampling)." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mli_sa_sampling_limit", + "output": "mli sa sampling limit refers to Sample size for SA: When to sample for Sensitivity Analysis (number of rows after sampling)." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mli sa sampling limit", + "output": "mli sa sampling limit refers to Sample size for SA: When to sample for Sensitivity Analysis (number of rows after sampling)." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Sample size for SA: ", + "output": "mli sa sampling limit refers to Sample size for SA: When to sample for Sensitivity Analysis (number of rows after sampling)." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting mli_sa_sampling_limit", + "output": "mli sa sampling limit refers to When to sample for Sensitivity Analysis (number of rows after sampling)." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting mli_sa_sampling_limit", + "output": "mli sa sampling limit refers to Sample size for SA: When to sample for Sensitivity Analysis (number of rows after sampling)." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mli_sa_main_chart_aggregator_limit", + "output": "mli sa main chart aggregator limit refers to Run main chart aggregator in Sensitivity Analysis when the number of dataset instances is bigger than given limit." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mli_sa_main_chart_aggregator_limit", + "output": "mli sa main chart aggregator limit refers to Run main chart aggregator in Sensitivity Analysis when the number of dataset instances is bigger than given limit." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mli sa main chart aggregator limit", + "output": "mli sa main chart aggregator limit refers to Run main chart aggregator in Sensitivity Analysis when the number of dataset instances is bigger than given limit." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "mli sa main chart aggregator limit refers to Run main chart aggregator in Sensitivity Analysis when the number of dataset instances is bigger than given limit." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting mli_sa_main_chart_aggregator_limit", + "output": "mli sa main chart aggregator limit refers to Run main chart aggregator in Sensitivity Analysis when the number of dataset instances is bigger than given limit." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting mli_sa_main_chart_aggregator_limit", + "output": "mli sa main chart aggregator limit refers to Run main chart aggregator in Sensitivity Analysis when the number of dataset instances is bigger than given limit." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mli_predict_safe", + "output": "mli predict safe refers to Use predict_safe() (true) or predict_base() (false) in MLI (PD, ICE, SA, ...)." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mli_predict_safe", + "output": "mli predict safe refers to Use predict_safe() (true) or predict_base() (false) in MLI (PD, ICE, SA, ...)." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mli predict safe", + "output": "mli predict safe refers to Use predict_safe() (true) or predict_base() (false) in MLI (PD, ICE, SA, ...)." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "mli predict safe refers to Use predict_safe() (true) or predict_base() (false) in MLI (PD, ICE, SA, ...)." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting mli_predict_safe", + "output": "mli predict safe refers to Use predict_safe() (true) or predict_base() (false) in MLI (PD, ICE, SA, ...)." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting mli_predict_safe", + "output": "mli predict safe refers to Use predict_safe() (true) or predict_base() (false) in MLI (PD, ICE, SA, ...)." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mli_max_surrogate_retries", + "output": "mli max surrogate retries refers to Number of max retries should the surrogate model fail to build." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mli_max_surrogate_retries", + "output": "mli max surrogate retries refers to Number of max retries should the surrogate model fail to build." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mli max surrogate retries", + "output": "mli max surrogate retries refers to Number of max retries should the surrogate model fail to build." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "mli max surrogate retries refers to Number of max retries should the surrogate model fail to build." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting mli_max_surrogate_retries", + "output": "mli max surrogate retries refers to Number of max retries should the surrogate model fail to build." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting mli_max_surrogate_retries", + "output": "mli max surrogate retries refers to Number of max retries should the surrogate model fail to build." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable_mli_symlinks", + "output": "enable mli symlinks refers to Allow use of symlinks (instead of file copy) by MLI explainer procedures." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable_mli_symlinks", + "output": "enable mli symlinks refers to Allow use of symlinks (instead of file copy) by MLI explainer procedures." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable mli symlinks", + "output": "enable mli symlinks refers to Allow use of symlinks (instead of file copy) by MLI explainer procedures." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "enable mli symlinks refers to Allow use of symlinks (instead of file copy) by MLI explainer procedures." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting enable_mli_symlinks", + "output": "enable mli symlinks refers to Allow use of symlinks (instead of file copy) by MLI explainer procedures." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting enable_mli_symlinks", + "output": "enable mli symlinks refers to Allow use of symlinks (instead of file copy) by MLI explainer procedures." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "h2o_mli_fraction_memory", + "output": "h2o mli fraction memory refers to Fraction of memory to allocate for h2o MLI jar" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "h2o_mli_fraction_memory", + "output": "h2o mli fraction memory refers to Fraction of memory to allocate for h2o MLI jar" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "h2o mli fraction memory", + "output": "h2o mli fraction memory refers to Fraction of memory to allocate for h2o MLI jar" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "h2o mli fraction memory refers to Fraction of memory to allocate for h2o MLI jar" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting h2o_mli_fraction_memory", + "output": "h2o mli fraction memory refers to Fraction of memory to allocate for h2o MLI jar" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting h2o_mli_fraction_memory", + "output": "h2o mli fraction memory refers to Fraction of memory to allocate for h2o MLI jar" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mli_custom", + "output": "mli custom refers to Add TOML string to Driverless AI server config.toml configuration file." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mli_custom", + "output": "mli custom refers to Add to config.toml via TOML string: Add TOML string to Driverless AI server config.toml configuration file." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mli custom", + "output": "mli custom refers to Add to config.toml via TOML string: Add TOML string to Driverless AI server config.toml configuration file." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Add to config.toml via TOML string: ", + "output": "mli custom refers to Add to config.toml via TOML string: Add TOML string to Driverless AI server config.toml configuration file." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting mli_custom", + "output": "mli custom refers to Add TOML string to Driverless AI server config.toml configuration file." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting mli_custom", + "output": "mli custom refers to Add to config.toml via TOML string: Add TOML string to Driverless AI server config.toml configuration file." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "excluded_mli_explainers", + "output": "excluded mli explainers refers to To exclude e.g. Sensitivity Analysis explainer use: excluded_mli_explainers=['h2oaicore.mli.byor.recipes.sa_explainer.SaExplainer']." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "excluded_mli_explainers", + "output": "excluded mli explainers refers to Exclude specific explainers by explainer ID: To exclude e.g. Sensitivity Analysis explainer use: excluded_mli_explainers=['h2oaicore.mli.byor.recipes.sa_explainer.SaExplainer']." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "excluded mli explainers", + "output": "excluded mli explainers refers to Exclude specific explainers by explainer ID: To exclude e.g. Sensitivity Analysis explainer use: excluded_mli_explainers=['h2oaicore.mli.byor.recipes.sa_explainer.SaExplainer']." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Exclude specific explainers by explainer ID: ", + "output": "excluded mli explainers refers to Exclude specific explainers by explainer ID: To exclude e.g. Sensitivity Analysis explainer use: excluded_mli_explainers=['h2oaicore.mli.byor.recipes.sa_explainer.SaExplainer']." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting excluded_mli_explainers", + "output": "excluded mli explainers refers to To exclude e.g. Sensitivity Analysis explainer use: excluded_mli_explainers=['h2oaicore.mli.byor.recipes.sa_explainer.SaExplainer']." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting excluded_mli_explainers", + "output": "excluded mli explainers refers to Exclude specific explainers by explainer ID: To exclude e.g. Sensitivity Analysis explainer use: excluded_mli_explainers=['h2oaicore.mli.byor.recipes.sa_explainer.SaExplainer']." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable_ws_perfmon", + "output": "enable ws perfmon refers to Enable RPC API performance monitor." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable_ws_perfmon", + "output": "enable ws perfmon refers to Enable RPC API performance monitor." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable ws perfmon", + "output": "enable ws perfmon refers to Enable RPC API performance monitor." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "enable ws perfmon refers to Enable RPC API performance monitor." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting enable_ws_perfmon", + "output": "enable ws perfmon refers to Enable RPC API performance monitor." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting enable_ws_perfmon", + "output": "enable ws perfmon refers to Enable RPC API performance monitor." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mli_kernel_explainer_workers", + "output": "mli kernel explainer workers refers to Number of parallel workers when scoring using MOJO in Kernel Explainer." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mli_kernel_explainer_workers", + "output": "mli kernel explainer workers refers to Number of parallel workers when scoring using MOJO in Kernel Explainer." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mli kernel explainer workers", + "output": "mli kernel explainer workers refers to Number of parallel workers when scoring using MOJO in Kernel Explainer." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "mli kernel explainer workers refers to Number of parallel workers when scoring using MOJO in Kernel Explainer." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting mli_kernel_explainer_workers", + "output": "mli kernel explainer workers refers to Number of parallel workers when scoring using MOJO in Kernel Explainer." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting mli_kernel_explainer_workers", + "output": "mli kernel explainer workers refers to Number of parallel workers when scoring using MOJO in Kernel Explainer." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mli_run_kernel_explainer", + "output": "mli run kernel explainer refers to Use Kernel Explainer to obtain Shapley values for original features." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mli_run_kernel_explainer", + "output": "mli run kernel explainer refers to Use Kernel Explainer to obtain Shapley values for original features: Use Kernel Explainer to obtain Shapley values for original features." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mli run kernel explainer", + "output": "mli run kernel explainer refers to Use Kernel Explainer to obtain Shapley values for original features: Use Kernel Explainer to obtain Shapley values for original features." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Use Kernel Explainer to obtain Shapley values for original features: ", + "output": "mli run kernel explainer refers to Use Kernel Explainer to obtain Shapley values for original features: Use Kernel Explainer to obtain Shapley values for original features." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting mli_run_kernel_explainer", + "output": "mli run kernel explainer refers to Use Kernel Explainer to obtain Shapley values for original features." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting mli_run_kernel_explainer", + "output": "mli run kernel explainer refers to Use Kernel Explainer to obtain Shapley values for original features: Use Kernel Explainer to obtain Shapley values for original features." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mli_kernel_explainer_sample", + "output": "mli kernel explainer sample refers to Sample input dataset for Kernel Explainer." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mli_kernel_explainer_sample", + "output": "mli kernel explainer sample refers to Sample input dataset for Kernel Explainer: Sample input dataset for Kernel Explainer." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mli kernel explainer sample", + "output": "mli kernel explainer sample refers to Sample input dataset for Kernel Explainer: Sample input dataset for Kernel Explainer." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Sample input dataset for Kernel Explainer: ", + "output": "mli kernel explainer sample refers to Sample input dataset for Kernel Explainer: Sample input dataset for Kernel Explainer." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting mli_kernel_explainer_sample", + "output": "mli kernel explainer sample refers to Sample input dataset for Kernel Explainer." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting mli_kernel_explainer_sample", + "output": "mli kernel explainer sample refers to Sample input dataset for Kernel Explainer: Sample input dataset for Kernel Explainer." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mli_kernel_explainer_sample_size", + "output": "mli kernel explainer sample size refers to Sample size for input dataset passed to Kernel Explainer." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mli_kernel_explainer_sample_size", + "output": "mli kernel explainer sample size refers to Sample size for input dataset passed to Kernel Explainer: Sample size for input dataset passed to Kernel Explainer." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mli kernel explainer sample size", + "output": "mli kernel explainer sample size refers to Sample size for input dataset passed to Kernel Explainer: Sample size for input dataset passed to Kernel Explainer." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Sample size for input dataset passed to Kernel Explainer: ", + "output": "mli kernel explainer sample size refers to Sample size for input dataset passed to Kernel Explainer: Sample size for input dataset passed to Kernel Explainer." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting mli_kernel_explainer_sample_size", + "output": "mli kernel explainer sample size refers to Sample size for input dataset passed to Kernel Explainer." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting mli_kernel_explainer_sample_size", + "output": "mli kernel explainer sample size refers to Sample size for input dataset passed to Kernel Explainer: Sample size for input dataset passed to Kernel Explainer." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mli_kernel_explainer_nsamples", + "output": "mli kernel explainer nsamples refers to 'auto' or int. Number of times to re-evaluate the model when explaining each prediction. More samples lead to lower variance estimates of the SHAP values. The 'auto' setting uses nsamples = 2 * X.shape[1] + 2048. This setting is disabled by default and DAI determines the right number internally." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mli_kernel_explainer_nsamples", + "output": "mli kernel explainer nsamples refers to Number of times to re-evaluate the model when explaining each prediction with Kernel Explainer. Default is determined internally: 'auto' or int. Number of times to re-evaluate the model when explaining each prediction. More samples lead to lower variance estimates of the SHAP values. The 'auto' setting uses nsamples = 2 * X.shape[1] + 2048. This setting is disabled by default and DAI determines the right number internally." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mli kernel explainer nsamples", + "output": "mli kernel explainer nsamples refers to Number of times to re-evaluate the model when explaining each prediction with Kernel Explainer. Default is determined internally: 'auto' or int. Number of times to re-evaluate the model when explaining each prediction. More samples lead to lower variance estimates of the SHAP values. The 'auto' setting uses nsamples = 2 * X.shape[1] + 2048. This setting is disabled by default and DAI determines the right number internally." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Number of times to re-evaluate the model when explaining each prediction with Kernel Explainer. Default is determined internally: ", + "output": "mli kernel explainer nsamples refers to Number of times to re-evaluate the model when explaining each prediction with Kernel Explainer. Default is determined internally: 'auto' or int. Number of times to re-evaluate the model when explaining each prediction. More samples lead to lower variance estimates of the SHAP values. The 'auto' setting uses nsamples = 2 * X.shape[1] + 2048. This setting is disabled by default and DAI determines the right number internally." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting mli_kernel_explainer_nsamples", + "output": "mli kernel explainer nsamples refers to 'auto' or int. Number of times to re-evaluate the model when explaining each prediction. More samples lead to lower variance estimates of the SHAP values. The 'auto' setting uses nsamples = 2 * X.shape[1] + 2048. This setting is disabled by default and DAI determines the right number internally." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting mli_kernel_explainer_nsamples", + "output": "mli kernel explainer nsamples refers to Number of times to re-evaluate the model when explaining each prediction with Kernel Explainer. Default is determined internally: 'auto' or int. Number of times to re-evaluate the model when explaining each prediction. More samples lead to lower variance estimates of the SHAP values. The 'auto' setting uses nsamples = 2 * X.shape[1] + 2048. This setting is disabled by default and DAI determines the right number internally." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mli_kernel_explainer_l1_reg", + "output": "mli kernel explainer l1 reg refers to 'num_features(int)', 'auto' (default for now, but deprecated), 'aic', 'bic', or float. The l1 regularization to use for feature selection (the estimation procedure is based on a debiased lasso). The 'auto' option currently uses aic when less that 20% of the possible sample space is enumerated, otherwise it uses no regularization. THE BEHAVIOR OF 'auto' WILL CHANGE in a future version to be based on 'num_features' instead of AIC. The aic and bic options use the AIC and BIC rules for regularization. Using 'num_features(int)' selects a fix number of top features. Passing a float directly sets the alpha parameter of the sklearn.linear_model.Lasso model used for feature selection." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mli_kernel_explainer_l1_reg", + "output": "mli kernel explainer l1 reg refers to L1 regularization for Kernel Explainer: 'num_features(int)', 'auto' (default for now, but deprecated), 'aic', 'bic', or float. The l1 regularization to use for feature selection (the estimation procedure is based on a debiased lasso). The 'auto' option currently uses aic when less that 20% of the possible sample space is enumerated, otherwise it uses no regularization. THE BEHAVIOR OF 'auto' WILL CHANGE in a future version to be based on 'num_features' instead of AIC. The aic and bic options use the AIC and BIC rules for regularization. Using 'num_features(int)' selects a fix number of top features. Passing a float directly sets the alpha parameter of the sklearn.linear_model.Lasso model used for feature selection." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mli kernel explainer l1 reg", + "output": "mli kernel explainer l1 reg refers to L1 regularization for Kernel Explainer: 'num_features(int)', 'auto' (default for now, but deprecated), 'aic', 'bic', or float. The l1 regularization to use for feature selection (the estimation procedure is based on a debiased lasso). The 'auto' option currently uses aic when less that 20% of the possible sample space is enumerated, otherwise it uses no regularization. THE BEHAVIOR OF 'auto' WILL CHANGE in a future version to be based on 'num_features' instead of AIC. The aic and bic options use the AIC and BIC rules for regularization. Using 'num_features(int)' selects a fix number of top features. Passing a float directly sets the alpha parameter of the sklearn.linear_model.Lasso model used for feature selection." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "L1 regularization for Kernel Explainer: ", + "output": "mli kernel explainer l1 reg refers to L1 regularization for Kernel Explainer: 'num_features(int)', 'auto' (default for now, but deprecated), 'aic', 'bic', or float. The l1 regularization to use for feature selection (the estimation procedure is based on a debiased lasso). The 'auto' option currently uses aic when less that 20% of the possible sample space is enumerated, otherwise it uses no regularization. THE BEHAVIOR OF 'auto' WILL CHANGE in a future version to be based on 'num_features' instead of AIC. The aic and bic options use the AIC and BIC rules for regularization. Using 'num_features(int)' selects a fix number of top features. Passing a float directly sets the alpha parameter of the sklearn.linear_model.Lasso model used for feature selection." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting mli_kernel_explainer_l1_reg", + "output": "mli kernel explainer l1 reg refers to 'num_features(int)', 'auto' (default for now, but deprecated), 'aic', 'bic', or float. The l1 regularization to use for feature selection (the estimation procedure is based on a debiased lasso). The 'auto' option currently uses aic when less that 20% of the possible sample space is enumerated, otherwise it uses no regularization. THE BEHAVIOR OF 'auto' WILL CHANGE in a future version to be based on 'num_features' instead of AIC. The aic and bic options use the AIC and BIC rules for regularization. Using 'num_features(int)' selects a fix number of top features. Passing a float directly sets the alpha parameter of the sklearn.linear_model.Lasso model used for feature selection." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting mli_kernel_explainer_l1_reg", + "output": "mli kernel explainer l1 reg refers to L1 regularization for Kernel Explainer: 'num_features(int)', 'auto' (default for now, but deprecated), 'aic', 'bic', or float. The l1 regularization to use for feature selection (the estimation procedure is based on a debiased lasso). The 'auto' option currently uses aic when less that 20% of the possible sample space is enumerated, otherwise it uses no regularization. THE BEHAVIOR OF 'auto' WILL CHANGE in a future version to be based on 'num_features' instead of AIC. The aic and bic options use the AIC and BIC rules for regularization. Using 'num_features(int)' selects a fix number of top features. Passing a float directly sets the alpha parameter of the sklearn.linear_model.Lasso model used for feature selection." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mli_kernel_explainer_max_runtime", + "output": "mli kernel explainer max runtime refers to Max runtime for Kernel Explainer in seconds. Default is 900, which equates to 15 minutes. Setting this parameter to -1 means to honor the Kernel Shapley sample size provided regardless of max runtime." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mli_kernel_explainer_max_runtime", + "output": "mli kernel explainer max runtime refers to Max runtime for Kernel Explainer in seconds: Max runtime for Kernel Explainer in seconds. Default is 900, which equates to 15 minutes. Setting this parameter to -1 means to honor the Kernel Shapley sample size provided regardless of max runtime." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mli kernel explainer max runtime", + "output": "mli kernel explainer max runtime refers to Max runtime for Kernel Explainer in seconds: Max runtime for Kernel Explainer in seconds. Default is 900, which equates to 15 minutes. Setting this parameter to -1 means to honor the Kernel Shapley sample size provided regardless of max runtime." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Max runtime for Kernel Explainer in seconds: ", + "output": "mli kernel explainer max runtime refers to Max runtime for Kernel Explainer in seconds: Max runtime for Kernel Explainer in seconds. Default is 900, which equates to 15 minutes. Setting this parameter to -1 means to honor the Kernel Shapley sample size provided regardless of max runtime." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting mli_kernel_explainer_max_runtime", + "output": "mli kernel explainer max runtime refers to Max runtime for Kernel Explainer in seconds. Default is 900, which equates to 15 minutes. Setting this parameter to -1 means to honor the Kernel Shapley sample size provided regardless of max runtime." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting mli_kernel_explainer_max_runtime", + "output": "mli kernel explainer max runtime refers to Max runtime for Kernel Explainer in seconds: Max runtime for Kernel Explainer in seconds. Default is 900, which equates to 15 minutes. Setting this parameter to -1 means to honor the Kernel Shapley sample size provided regardless of max runtime." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mli_nlp_tokenizer", + "output": "mli nlp tokenizer refers to Tokenizer used to extract tokens from text columns for MLI." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mli_nlp_tokenizer", + "output": "mli nlp tokenizer refers to Tokenizer used to extract tokens from text columns for MLI." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mli nlp tokenizer", + "output": "mli nlp tokenizer refers to Tokenizer used to extract tokens from text columns for MLI." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "mli nlp tokenizer refers to Tokenizer used to extract tokens from text columns for MLI." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting mli_nlp_tokenizer", + "output": "mli nlp tokenizer refers to Tokenizer used to extract tokens from text columns for MLI." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting mli_nlp_tokenizer", + "output": "mli nlp tokenizer refers to Tokenizer used to extract tokens from text columns for MLI." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mli_nlp_top_n", + "output": "mli nlp top n refers to Number of tokens used for MLI NLP explanations. -1 means all." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mli_nlp_top_n", + "output": "mli nlp top n refers to Number of tokens used for MLI NLP explanations. -1 means all.: Number of tokens used for MLI NLP explanations. -1 means all." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mli nlp top n", + "output": "mli nlp top n refers to Number of tokens used for MLI NLP explanations. -1 means all.: Number of tokens used for MLI NLP explanations. -1 means all." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Number of tokens used for MLI NLP explanations. -1 means all.: ", + "output": "mli nlp top n refers to Number of tokens used for MLI NLP explanations. -1 means all.: Number of tokens used for MLI NLP explanations. -1 means all." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting mli_nlp_top_n", + "output": "mli nlp top n refers to Number of tokens used for MLI NLP explanations. -1 means all." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting mli_nlp_top_n", + "output": "mli nlp top n refers to Number of tokens used for MLI NLP explanations. -1 means all.: Number of tokens used for MLI NLP explanations. -1 means all." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mli_nlp_sample_limit", + "output": "mli nlp sample limit refers to Maximum number of records used by MLI NLP explainers" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mli_nlp_sample_limit", + "output": "mli nlp sample limit refers to Sample size for MLI NLP explainers: Maximum number of records used by MLI NLP explainers" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mli nlp sample limit", + "output": "mli nlp sample limit refers to Sample size for MLI NLP explainers: Maximum number of records used by MLI NLP explainers" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Sample size for MLI NLP explainers: ", + "output": "mli nlp sample limit refers to Sample size for MLI NLP explainers: Maximum number of records used by MLI NLP explainers" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting mli_nlp_sample_limit", + "output": "mli nlp sample limit refers to Maximum number of records used by MLI NLP explainers" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting mli_nlp_sample_limit", + "output": "mli nlp sample limit refers to Sample size for MLI NLP explainers: Maximum number of records used by MLI NLP explainers" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mli_nlp_min_df", + "output": "mli nlp min df refers to Minimum number of documents in which token has to appear. Integer mean absolute count, float means percentage." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mli_nlp_min_df", + "output": "mli nlp min df refers to Minimum number of documents in which token has to appear. Integer mean absolute count, float means percentage.: Minimum number of documents in which token has to appear. Integer mean absolute count, float means percentage." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mli nlp min df", + "output": "mli nlp min df refers to Minimum number of documents in which token has to appear. Integer mean absolute count, float means percentage.: Minimum number of documents in which token has to appear. Integer mean absolute count, float means percentage." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Minimum number of documents in which token has to appear. Integer mean absolute count, float means percentage.: ", + "output": "mli nlp min df refers to Minimum number of documents in which token has to appear. Integer mean absolute count, float means percentage.: Minimum number of documents in which token has to appear. Integer mean absolute count, float means percentage." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting mli_nlp_min_df", + "output": "mli nlp min df refers to Minimum number of documents in which token has to appear. Integer mean absolute count, float means percentage." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting mli_nlp_min_df", + "output": "mli nlp min df refers to Minimum number of documents in which token has to appear. Integer mean absolute count, float means percentage.: Minimum number of documents in which token has to appear. Integer mean absolute count, float means percentage." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mli_nlp_max_df", + "output": "mli nlp max df refers to Maximum number of documents in which token has to appear. Integer mean absolute count, float means percentage." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mli_nlp_max_df", + "output": "mli nlp max df refers to Maximum number of documents in which token has to appear. Integer mean absolute count, float means percentage.: Maximum number of documents in which token has to appear. Integer mean absolute count, float means percentage." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mli nlp max df", + "output": "mli nlp max df refers to Maximum number of documents in which token has to appear. Integer mean absolute count, float means percentage.: Maximum number of documents in which token has to appear. Integer mean absolute count, float means percentage." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Maximum number of documents in which token has to appear. Integer mean absolute count, float means percentage.: ", + "output": "mli nlp max df refers to Maximum number of documents in which token has to appear. Integer mean absolute count, float means percentage.: Maximum number of documents in which token has to appear. Integer mean absolute count, float means percentage." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting mli_nlp_max_df", + "output": "mli nlp max df refers to Maximum number of documents in which token has to appear. Integer mean absolute count, float means percentage." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting mli_nlp_max_df", + "output": "mli nlp max df refers to Maximum number of documents in which token has to appear. Integer mean absolute count, float means percentage.: Maximum number of documents in which token has to appear. Integer mean absolute count, float means percentage." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mli_nlp_min_ngram", + "output": "mli nlp min ngram refers to The minimum value in the ngram range. The tokenizer will generate all possible tokens in the (mli_nlp_min_ngram, mli_nlp_max_ngram) range." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mli_nlp_min_ngram", + "output": "mli nlp min ngram refers to The minimum value in the ngram range. The tokenizer will generate all possible tokens in the (mli_nlp_min_ngram, mli_nlp_max_ngram) range.: The minimum value in the ngram range. The tokenizer will generate all possible tokens in the (mli_nlp_min_ngram, mli_nlp_max_ngram) range." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mli nlp min ngram", + "output": "mli nlp min ngram refers to The minimum value in the ngram range. The tokenizer will generate all possible tokens in the (mli_nlp_min_ngram, mli_nlp_max_ngram) range.: The minimum value in the ngram range. The tokenizer will generate all possible tokens in the (mli_nlp_min_ngram, mli_nlp_max_ngram) range." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "The minimum value in the ngram range. The tokenizer will generate all possible tokens in the (mli_nlp_min_ngram, mli_nlp_max_ngram) range.: ", + "output": "mli nlp min ngram refers to The minimum value in the ngram range. The tokenizer will generate all possible tokens in the (mli_nlp_min_ngram, mli_nlp_max_ngram) range.: The minimum value in the ngram range. The tokenizer will generate all possible tokens in the (mli_nlp_min_ngram, mli_nlp_max_ngram) range." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting mli_nlp_min_ngram", + "output": "mli nlp min ngram refers to The minimum value in the ngram range. The tokenizer will generate all possible tokens in the (mli_nlp_min_ngram, mli_nlp_max_ngram) range." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting mli_nlp_min_ngram", + "output": "mli nlp min ngram refers to The minimum value in the ngram range. The tokenizer will generate all possible tokens in the (mli_nlp_min_ngram, mli_nlp_max_ngram) range.: The minimum value in the ngram range. The tokenizer will generate all possible tokens in the (mli_nlp_min_ngram, mli_nlp_max_ngram) range." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mli_nlp_max_ngram", + "output": "mli nlp max ngram refers to The maximum value in the ngram range. The tokenizer will generate all possible tokens in the (mli_nlp_min_ngram, mli_nlp_max_ngram) range." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mli_nlp_max_ngram", + "output": "mli nlp max ngram refers to The maximum value in the ngram range. The tokenizer will generate all possible tokens in the (mli_nlp_min_ngram, mli_nlp_max_ngram) range.: The maximum value in the ngram range. The tokenizer will generate all possible tokens in the (mli_nlp_min_ngram, mli_nlp_max_ngram) range." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mli nlp max ngram", + "output": "mli nlp max ngram refers to The maximum value in the ngram range. The tokenizer will generate all possible tokens in the (mli_nlp_min_ngram, mli_nlp_max_ngram) range.: The maximum value in the ngram range. The tokenizer will generate all possible tokens in the (mli_nlp_min_ngram, mli_nlp_max_ngram) range." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "The maximum value in the ngram range. The tokenizer will generate all possible tokens in the (mli_nlp_min_ngram, mli_nlp_max_ngram) range.: ", + "output": "mli nlp max ngram refers to The maximum value in the ngram range. The tokenizer will generate all possible tokens in the (mli_nlp_min_ngram, mli_nlp_max_ngram) range.: The maximum value in the ngram range. The tokenizer will generate all possible tokens in the (mli_nlp_min_ngram, mli_nlp_max_ngram) range." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting mli_nlp_max_ngram", + "output": "mli nlp max ngram refers to The maximum value in the ngram range. The tokenizer will generate all possible tokens in the (mli_nlp_min_ngram, mli_nlp_max_ngram) range." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting mli_nlp_max_ngram", + "output": "mli nlp max ngram refers to The maximum value in the ngram range. The tokenizer will generate all possible tokens in the (mli_nlp_min_ngram, mli_nlp_max_ngram) range.: The maximum value in the ngram range. The tokenizer will generate all possible tokens in the (mli_nlp_min_ngram, mli_nlp_max_ngram) range." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mli_nlp_min_token_mode", + "output": "mli nlp min token mode refers to Mode used to choose N tokens for MLI NLP.\"top\" chooses N top tokens.\"bottom\" chooses N bottom tokens.\"top-bottom\" chooses math.floor(N/2) top and math.ceil(N/2) bottom tokens.\"linspace\" chooses N evenly spaced out tokens." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mli_nlp_min_token_mode", + "output": "mli nlp min token mode refers to Mode used to choose N tokens for MLI NLP.\"top\" chooses N top tokens.\"bottom\" chooses N bottom tokens.\"top-bottom\" chooses math.floor(N/2) top and math.ceil(N/2) bottom tokens.\"linspace\" chooses N evenly spaced out tokens.: Mode used to choose N tokens for MLI NLP.\"top\" chooses N top tokens.\"bottom\" chooses N bottom tokens.\"top-bottom\" chooses math.floor(N/2) top and math.ceil(N/2) bottom tokens.\"linspace\" chooses N evenly spaced out tokens." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mli nlp min token mode", + "output": "mli nlp min token mode refers to Mode used to choose N tokens for MLI NLP.\"top\" chooses N top tokens.\"bottom\" chooses N bottom tokens.\"top-bottom\" chooses math.floor(N/2) top and math.ceil(N/2) bottom tokens.\"linspace\" chooses N evenly spaced out tokens.: Mode used to choose N tokens for MLI NLP.\"top\" chooses N top tokens.\"bottom\" chooses N bottom tokens.\"top-bottom\" chooses math.floor(N/2) top and math.ceil(N/2) bottom tokens.\"linspace\" chooses N evenly spaced out tokens." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Mode used to choose N tokens for MLI NLP.\n\"top\" chooses N top tokens.\n\"bottom\" chooses N bottom tokens.\n\"top-bottom\" chooses math.floor(N/2) top and math.ceil(N/2) bottom tokens.\n\"linspace\" chooses N evenly spaced out tokens.: ", + "output": "mli nlp min token mode refers to Mode used to choose N tokens for MLI NLP.\"top\" chooses N top tokens.\"bottom\" chooses N bottom tokens.\"top-bottom\" chooses math.floor(N/2) top and math.ceil(N/2) bottom tokens.\"linspace\" chooses N evenly spaced out tokens.: Mode used to choose N tokens for MLI NLP.\"top\" chooses N top tokens.\"bottom\" chooses N bottom tokens.\"top-bottom\" chooses math.floor(N/2) top and math.ceil(N/2) bottom tokens.\"linspace\" chooses N evenly spaced out tokens." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting mli_nlp_min_token_mode", + "output": "mli nlp min token mode refers to Mode used to choose N tokens for MLI NLP.\"top\" chooses N top tokens.\"bottom\" chooses N bottom tokens.\"top-bottom\" chooses math.floor(N/2) top and math.ceil(N/2) bottom tokens.\"linspace\" chooses N evenly spaced out tokens." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting mli_nlp_min_token_mode", + "output": "mli nlp min token mode refers to Mode used to choose N tokens for MLI NLP.\"top\" chooses N top tokens.\"bottom\" chooses N bottom tokens.\"top-bottom\" chooses math.floor(N/2) top and math.ceil(N/2) bottom tokens.\"linspace\" chooses N evenly spaced out tokens.: Mode used to choose N tokens for MLI NLP.\"top\" chooses N top tokens.\"bottom\" chooses N bottom tokens.\"top-bottom\" chooses math.floor(N/2) top and math.ceil(N/2) bottom tokens.\"linspace\" chooses N evenly spaced out tokens." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mli_nlp_tokenizer_max_features", + "output": "mli nlp tokenizer max features refers to The number of top tokens to be used as features when building token based feature importance." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mli_nlp_tokenizer_max_features", + "output": "mli nlp tokenizer max features refers to The number of top tokens to be used as features when building token based feature importance.: The number of top tokens to be used as features when building token based feature importance." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mli nlp tokenizer max features", + "output": "mli nlp tokenizer max features refers to The number of top tokens to be used as features when building token based feature importance.: The number of top tokens to be used as features when building token based feature importance." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "The number of top tokens to be used as features when building token based feature importance.: ", + "output": "mli nlp tokenizer max features refers to The number of top tokens to be used as features when building token based feature importance.: The number of top tokens to be used as features when building token based feature importance." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting mli_nlp_tokenizer_max_features", + "output": "mli nlp tokenizer max features refers to The number of top tokens to be used as features when building token based feature importance." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting mli_nlp_tokenizer_max_features", + "output": "mli nlp tokenizer max features refers to The number of top tokens to be used as features when building token based feature importance.: The number of top tokens to be used as features when building token based feature importance." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mli_nlp_loco_max_features", + "output": "mli nlp loco max features refers to The number of top tokens to be used as features when computing text LOCO." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mli_nlp_loco_max_features", + "output": "mli nlp loco max features refers to The number of top tokens to be used as features when computing text LOCO.: The number of top tokens to be used as features when computing text LOCO." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mli nlp loco max features", + "output": "mli nlp loco max features refers to The number of top tokens to be used as features when computing text LOCO.: The number of top tokens to be used as features when computing text LOCO." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "The number of top tokens to be used as features when computing text LOCO.: ", + "output": "mli nlp loco max features refers to The number of top tokens to be used as features when computing text LOCO.: The number of top tokens to be used as features when computing text LOCO." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting mli_nlp_loco_max_features", + "output": "mli nlp loco max features refers to The number of top tokens to be used as features when computing text LOCO." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting mli_nlp_loco_max_features", + "output": "mli nlp loco max features refers to The number of top tokens to be used as features when computing text LOCO.: The number of top tokens to be used as features when computing text LOCO." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mli_nlp_surrogate_tokenizer", + "output": "mli nlp surrogate tokenizer refers to The tokenizer method to use when tokenizing a dataset for surrogate models. Can either choose 'TF-IDF' or 'Linear Model + TF-IDF', which first runs TF-IDF to get tokens and then fits a linear model between the tokens and the target to get importances of tokens, which are based on coefficients of the linear model. Default is 'Linear Model + TF-IDF'. Only applies to NLP models." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mli_nlp_surrogate_tokenizer", + "output": "mli nlp surrogate tokenizer refers to Tokenizer for surrogate models. Only applies to NLP models.: The tokenizer method to use when tokenizing a dataset for surrogate models. Can either choose 'TF-IDF' or 'Linear Model + TF-IDF', which first runs TF-IDF to get tokens and then fits a linear model between the tokens and the target to get importances of tokens, which are based on coefficients of the linear model. Default is 'Linear Model + TF-IDF'. Only applies to NLP models." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mli nlp surrogate tokenizer", + "output": "mli nlp surrogate tokenizer refers to Tokenizer for surrogate models. Only applies to NLP models.: The tokenizer method to use when tokenizing a dataset for surrogate models. Can either choose 'TF-IDF' or 'Linear Model + TF-IDF', which first runs TF-IDF to get tokens and then fits a linear model between the tokens and the target to get importances of tokens, which are based on coefficients of the linear model. Default is 'Linear Model + TF-IDF'. Only applies to NLP models." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Tokenizer for surrogate models. Only applies to NLP models.: ", + "output": "mli nlp surrogate tokenizer refers to Tokenizer for surrogate models. Only applies to NLP models.: The tokenizer method to use when tokenizing a dataset for surrogate models. Can either choose 'TF-IDF' or 'Linear Model + TF-IDF', which first runs TF-IDF to get tokens and then fits a linear model between the tokens and the target to get importances of tokens, which are based on coefficients of the linear model. Default is 'Linear Model + TF-IDF'. Only applies to NLP models." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting mli_nlp_surrogate_tokenizer", + "output": "mli nlp surrogate tokenizer refers to The tokenizer method to use when tokenizing a dataset for surrogate models. Can either choose 'TF-IDF' or 'Linear Model + TF-IDF', which first runs TF-IDF to get tokens and then fits a linear model between the tokens and the target to get importances of tokens, which are based on coefficients of the linear model. Default is 'Linear Model + TF-IDF'. Only applies to NLP models." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting mli_nlp_surrogate_tokenizer", + "output": "mli nlp surrogate tokenizer refers to Tokenizer for surrogate models. Only applies to NLP models.: The tokenizer method to use when tokenizing a dataset for surrogate models. Can either choose 'TF-IDF' or 'Linear Model + TF-IDF', which first runs TF-IDF to get tokens and then fits a linear model between the tokens and the target to get importances of tokens, which are based on coefficients of the linear model. Default is 'Linear Model + TF-IDF'. Only applies to NLP models." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mli_nlp_surrogate_tokens", + "output": "mli nlp surrogate tokens refers to The number of top tokens to be used as features when building surrogate models. Only applies to NLP models." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mli_nlp_surrogate_tokens", + "output": "mli nlp surrogate tokens refers to The number of top tokens to be used as features when building surrogate models. Only applies to NLP models.: The number of top tokens to be used as features when building surrogate models. Only applies to NLP models." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mli nlp surrogate tokens", + "output": "mli nlp surrogate tokens refers to The number of top tokens to be used as features when building surrogate models. Only applies to NLP models.: The number of top tokens to be used as features when building surrogate models. Only applies to NLP models." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "The number of top tokens to be used as features when building surrogate models. Only applies to NLP models.: ", + "output": "mli nlp surrogate tokens refers to The number of top tokens to be used as features when building surrogate models. Only applies to NLP models.: The number of top tokens to be used as features when building surrogate models. Only applies to NLP models." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting mli_nlp_surrogate_tokens", + "output": "mli nlp surrogate tokens refers to The number of top tokens to be used as features when building surrogate models. Only applies to NLP models." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting mli_nlp_surrogate_tokens", + "output": "mli nlp surrogate tokens refers to The number of top tokens to be used as features when building surrogate models. Only applies to NLP models.: The number of top tokens to be used as features when building surrogate models. Only applies to NLP models." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mli_nlp_use_stop_words", + "output": "mli nlp use stop words refers to Ignore stop words for MLI NLP." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mli_nlp_use_stop_words", + "output": "mli nlp use stop words refers to Ignore stop words for MLI NLP.: Ignore stop words for MLI NLP." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mli nlp use stop words", + "output": "mli nlp use stop words refers to Ignore stop words for MLI NLP.: Ignore stop words for MLI NLP." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Ignore stop words for MLI NLP.: ", + "output": "mli nlp use stop words refers to Ignore stop words for MLI NLP.: Ignore stop words for MLI NLP." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting mli_nlp_use_stop_words", + "output": "mli nlp use stop words refers to Ignore stop words for MLI NLP." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting mli_nlp_use_stop_words", + "output": "mli nlp use stop words refers to Ignore stop words for MLI NLP.: Ignore stop words for MLI NLP." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mli_nlp_stop_words", + "output": "mli nlp stop words refers to List of words to filter out before generation of text tokens, which are passed to MLI NLP LOCO and surrogate models (if enabled). Default is 'english'. Pass in custom stop-words as a list, e.g., ['great', 'good']." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mli_nlp_stop_words", + "output": "mli nlp stop words refers to List of words to filter out before generation of text tokens, which are passed to MLI NLP LOCO and surrogate models (if enabled). Default is 'english'. Pass in custom stop-words as a list, e.g., ['great', 'good']: List of words to filter out before generation of text tokens, which are passed to MLI NLP LOCO and surrogate models (if enabled). Default is 'english'. Pass in custom stop-words as a list, e.g., ['great', 'good']." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mli nlp stop words", + "output": "mli nlp stop words refers to List of words to filter out before generation of text tokens, which are passed to MLI NLP LOCO and surrogate models (if enabled). Default is 'english'. Pass in custom stop-words as a list, e.g., ['great', 'good']: List of words to filter out before generation of text tokens, which are passed to MLI NLP LOCO and surrogate models (if enabled). Default is 'english'. Pass in custom stop-words as a list, e.g., ['great', 'good']." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "List of words to filter out before generation of text tokens, which are passed to MLI NLP LOCO and surrogate models (if enabled). Default is 'english'. Pass in custom stop-words as a list, e.g., ['great', 'good']: ", + "output": "mli nlp stop words refers to List of words to filter out before generation of text tokens, which are passed to MLI NLP LOCO and surrogate models (if enabled). Default is 'english'. Pass in custom stop-words as a list, e.g., ['great', 'good']: List of words to filter out before generation of text tokens, which are passed to MLI NLP LOCO and surrogate models (if enabled). Default is 'english'. Pass in custom stop-words as a list, e.g., ['great', 'good']." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting mli_nlp_stop_words", + "output": "mli nlp stop words refers to List of words to filter out before generation of text tokens, which are passed to MLI NLP LOCO and surrogate models (if enabled). Default is 'english'. Pass in custom stop-words as a list, e.g., ['great', 'good']." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting mli_nlp_stop_words", + "output": "mli nlp stop words refers to List of words to filter out before generation of text tokens, which are passed to MLI NLP LOCO and surrogate models (if enabled). Default is 'english'. Pass in custom stop-words as a list, e.g., ['great', 'good']: List of words to filter out before generation of text tokens, which are passed to MLI NLP LOCO and surrogate models (if enabled). Default is 'english'. Pass in custom stop-words as a list, e.g., ['great', 'good']." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mli_nlp_append_to_english_stop_words", + "output": "mli nlp append to english stop words refers to Append passed in list of custom stop words to default 'english' stop words." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mli_nlp_append_to_english_stop_words", + "output": "mli nlp append to english stop words refers to Append passed in list of custom stop words to default 'english' stop words: Append passed in list of custom stop words to default 'english' stop words." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mli nlp append to english stop words", + "output": "mli nlp append to english stop words refers to Append passed in list of custom stop words to default 'english' stop words: Append passed in list of custom stop words to default 'english' stop words." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Append passed in list of custom stop words to default 'english' stop words: ", + "output": "mli nlp append to english stop words refers to Append passed in list of custom stop words to default 'english' stop words: Append passed in list of custom stop words to default 'english' stop words." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting mli_nlp_append_to_english_stop_words", + "output": "mli nlp append to english stop words refers to Append passed in list of custom stop words to default 'english' stop words." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting mli_nlp_append_to_english_stop_words", + "output": "mli nlp append to english stop words refers to Append passed in list of custom stop words to default 'english' stop words: Append passed in list of custom stop words to default 'english' stop words." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mli_image_enable", + "output": "mli image enable refers to Enable MLI for image experiments." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mli_image_enable", + "output": "mli image enable refers to Enable MLI for image experiments." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mli image enable", + "output": "mli image enable refers to Enable MLI for image experiments." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "mli image enable refers to Enable MLI for image experiments." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting mli_image_enable", + "output": "mli image enable refers to Enable MLI for image experiments." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting mli_image_enable", + "output": "mli image enable refers to Enable MLI for image experiments." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mli_max_explain_rows", + "output": "mli max explain rows refers to The maximum number of rows allowed to get the local explanation result, increase the value may jeopardize overall performance, change the value only if necessary." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mli_max_explain_rows", + "output": "mli max explain rows refers to The maximum number of rows allowed to get the local explanation result.: The maximum number of rows allowed to get the local explanation result, increase the value may jeopardize overall performance, change the value only if necessary." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mli max explain rows", + "output": "mli max explain rows refers to The maximum number of rows allowed to get the local explanation result.: The maximum number of rows allowed to get the local explanation result, increase the value may jeopardize overall performance, change the value only if necessary." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "The maximum number of rows allowed to get the local explanation result.: ", + "output": "mli max explain rows refers to The maximum number of rows allowed to get the local explanation result.: The maximum number of rows allowed to get the local explanation result, increase the value may jeopardize overall performance, change the value only if necessary." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting mli_max_explain_rows", + "output": "mli max explain rows refers to The maximum number of rows allowed to get the local explanation result, increase the value may jeopardize overall performance, change the value only if necessary." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting mli_max_explain_rows", + "output": "mli max explain rows refers to The maximum number of rows allowed to get the local explanation result.: The maximum number of rows allowed to get the local explanation result, increase the value may jeopardize overall performance, change the value only if necessary." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mli_nlp_max_tokens_rows", + "output": "mli nlp max tokens rows refers to The maximum number of rows allowed to get the NLP token importance result, increasing the value may consume too much memory and negatively impact the performance, change the value only if necessary." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mli_nlp_max_tokens_rows", + "output": "mli nlp max tokens rows refers to The maximum number of rows allowed to get the NLP token importance result.: The maximum number of rows allowed to get the NLP token importance result, increasing the value may consume too much memory and negatively impact the performance, change the value only if necessary." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mli nlp max tokens rows", + "output": "mli nlp max tokens rows refers to The maximum number of rows allowed to get the NLP token importance result.: The maximum number of rows allowed to get the NLP token importance result, increasing the value may consume too much memory and negatively impact the performance, change the value only if necessary." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "The maximum number of rows allowed to get the NLP token importance result.: ", + "output": "mli nlp max tokens rows refers to The maximum number of rows allowed to get the NLP token importance result.: The maximum number of rows allowed to get the NLP token importance result, increasing the value may consume too much memory and negatively impact the performance, change the value only if necessary." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting mli_nlp_max_tokens_rows", + "output": "mli nlp max tokens rows refers to The maximum number of rows allowed to get the NLP token importance result, increasing the value may consume too much memory and negatively impact the performance, change the value only if necessary." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting mli_nlp_max_tokens_rows", + "output": "mli nlp max tokens rows refers to The maximum number of rows allowed to get the NLP token importance result.: The maximum number of rows allowed to get the NLP token importance result, increasing the value may consume too much memory and negatively impact the performance, change the value only if necessary." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mli_nlp_min_parallel_rows", + "output": "mli nlp min parallel rows refers to The minimum number of rows to enable parallel execution for NLP local explanations calculation." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mli_nlp_min_parallel_rows", + "output": "mli nlp min parallel rows refers to The minimum number of rows to enable parallel execution for NLP local explanations calculation.: The minimum number of rows to enable parallel execution for NLP local explanations calculation." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mli nlp min parallel rows", + "output": "mli nlp min parallel rows refers to The minimum number of rows to enable parallel execution for NLP local explanations calculation.: The minimum number of rows to enable parallel execution for NLP local explanations calculation." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "The minimum number of rows to enable parallel execution for NLP local explanations calculation.: ", + "output": "mli nlp min parallel rows refers to The minimum number of rows to enable parallel execution for NLP local explanations calculation.: The minimum number of rows to enable parallel execution for NLP local explanations calculation." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting mli_nlp_min_parallel_rows", + "output": "mli nlp min parallel rows refers to The minimum number of rows to enable parallel execution for NLP local explanations calculation." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting mli_nlp_min_parallel_rows", + "output": "mli nlp min parallel rows refers to The minimum number of rows to enable parallel execution for NLP local explanations calculation.: The minimum number of rows to enable parallel execution for NLP local explanations calculation." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mli_run_legacy_defaults", + "output": "mli run legacy defaults refers to Run legacy defaults in addition to current default explainers in MLI." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mli_run_legacy_defaults", + "output": "mli run legacy defaults refers to Run legacy defaults in addition to current default explainers in MLI.: Run legacy defaults in addition to current default explainers in MLI." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mli run legacy defaults", + "output": "mli run legacy defaults refers to Run legacy defaults in addition to current default explainers in MLI.: Run legacy defaults in addition to current default explainers in MLI." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Run legacy defaults in addition to current default explainers in MLI.: ", + "output": "mli run legacy defaults refers to Run legacy defaults in addition to current default explainers in MLI.: Run legacy defaults in addition to current default explainers in MLI." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting mli_run_legacy_defaults", + "output": "mli run legacy defaults refers to Run legacy defaults in addition to current default explainers in MLI." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting mli_run_legacy_defaults", + "output": "mli run legacy defaults refers to Run legacy defaults in addition to current default explainers in MLI.: Run legacy defaults in addition to current default explainers in MLI." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "dask_cuda_cluster_kwargs", + "output": "dask cuda cluster kwargs refers to Set dask CUDA/RAPIDS cluster settings for single node workers. Additional environment variables can be set, see: https://dask-cuda.readthedocs.io/en/latest/ucx.html#dask-scheduler e.g. for ucx use: {} dict version of: dict(n_workers=None, threads_per_worker=1, processes=True, memory_limit='auto', device_memory_limit=None, CUDA_VISIBLE_DEVICES=None, data=None, local_directory=None, protocol='ucx', enable_tcp_over_ucx=True, enable_infiniband=False, enable_nvlink=False, enable_rdmacm=False, ucx_net_devices='auto', rmm_pool_size='1GB') WARNING: Do not add arguments like {'n_workers': 1, 'processes': True, 'threads_per_worker': 1} this will lead to hangs, cuda cluster handles this itself. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "dask_cuda_cluster_kwargs", + "output": "dask cuda cluster kwargs refers to Set dask CUDA/RAPIDS cluster settings for single node workers.: Set dask CUDA/RAPIDS cluster settings for single node workers. Additional environment variables can be set, see: https://dask-cuda.readthedocs.io/en/latest/ucx.html#dask-scheduler e.g. for ucx use: {} dict version of: dict(n_workers=None, threads_per_worker=1, processes=True, memory_limit='auto', device_memory_limit=None, CUDA_VISIBLE_DEVICES=None, data=None, local_directory=None, protocol='ucx', enable_tcp_over_ucx=True, enable_infiniband=False, enable_nvlink=False, enable_rdmacm=False, ucx_net_devices='auto', rmm_pool_size='1GB') WARNING: Do not add arguments like {'n_workers': 1, 'processes': True, 'threads_per_worker': 1} this will lead to hangs, cuda cluster handles this itself. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "dask cuda cluster kwargs", + "output": "dask cuda cluster kwargs refers to Set dask CUDA/RAPIDS cluster settings for single node workers.: Set dask CUDA/RAPIDS cluster settings for single node workers. Additional environment variables can be set, see: https://dask-cuda.readthedocs.io/en/latest/ucx.html#dask-scheduler e.g. for ucx use: {} dict version of: dict(n_workers=None, threads_per_worker=1, processes=True, memory_limit='auto', device_memory_limit=None, CUDA_VISIBLE_DEVICES=None, data=None, local_directory=None, protocol='ucx', enable_tcp_over_ucx=True, enable_infiniband=False, enable_nvlink=False, enable_rdmacm=False, ucx_net_devices='auto', rmm_pool_size='1GB') WARNING: Do not add arguments like {'n_workers': 1, 'processes': True, 'threads_per_worker': 1} this will lead to hangs, cuda cluster handles this itself. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Set dask CUDA/RAPIDS cluster settings for single node workers.: ", + "output": "dask cuda cluster kwargs refers to Set dask CUDA/RAPIDS cluster settings for single node workers.: Set dask CUDA/RAPIDS cluster settings for single node workers. Additional environment variables can be set, see: https://dask-cuda.readthedocs.io/en/latest/ucx.html#dask-scheduler e.g. for ucx use: {} dict version of: dict(n_workers=None, threads_per_worker=1, processes=True, memory_limit='auto', device_memory_limit=None, CUDA_VISIBLE_DEVICES=None, data=None, local_directory=None, protocol='ucx', enable_tcp_over_ucx=True, enable_infiniband=False, enable_nvlink=False, enable_rdmacm=False, ucx_net_devices='auto', rmm_pool_size='1GB') WARNING: Do not add arguments like {'n_workers': 1, 'processes': True, 'threads_per_worker': 1} this will lead to hangs, cuda cluster handles this itself. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting dask_cuda_cluster_kwargs", + "output": "dask cuda cluster kwargs refers to Set dask CUDA/RAPIDS cluster settings for single node workers. Additional environment variables can be set, see: https://dask-cuda.readthedocs.io/en/latest/ucx.html#dask-scheduler e.g. for ucx use: {} dict version of: dict(n_workers=None, threads_per_worker=1, processes=True, memory_limit='auto', device_memory_limit=None, CUDA_VISIBLE_DEVICES=None, data=None, local_directory=None, protocol='ucx', enable_tcp_over_ucx=True, enable_infiniband=False, enable_nvlink=False, enable_rdmacm=False, ucx_net_devices='auto', rmm_pool_size='1GB') WARNING: Do not add arguments like {'n_workers': 1, 'processes': True, 'threads_per_worker': 1} this will lead to hangs, cuda cluster handles this itself. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting dask_cuda_cluster_kwargs", + "output": "dask cuda cluster kwargs refers to Set dask CUDA/RAPIDS cluster settings for single node workers.: Set dask CUDA/RAPIDS cluster settings for single node workers. Additional environment variables can be set, see: https://dask-cuda.readthedocs.io/en/latest/ucx.html#dask-scheduler e.g. for ucx use: {} dict version of: dict(n_workers=None, threads_per_worker=1, processes=True, memory_limit='auto', device_memory_limit=None, CUDA_VISIBLE_DEVICES=None, data=None, local_directory=None, protocol='ucx', enable_tcp_over_ucx=True, enable_infiniband=False, enable_nvlink=False, enable_rdmacm=False, ucx_net_devices='auto', rmm_pool_size='1GB') WARNING: Do not add arguments like {'n_workers': 1, 'processes': True, 'threads_per_worker': 1} this will lead to hangs, cuda cluster handles this itself. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "dask_cluster_kwargs", + "output": "dask cluster kwargs refers to Set dask cluster settings for single node workers. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "dask_cluster_kwargs", + "output": "dask cluster kwargs refers to Set dask cluster settings for single node workers.: Set dask cluster settings for single node workers. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "dask cluster kwargs", + "output": "dask cluster kwargs refers to Set dask cluster settings for single node workers.: Set dask cluster settings for single node workers. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Set dask cluster settings for single node workers.: ", + "output": "dask cluster kwargs refers to Set dask cluster settings for single node workers.: Set dask cluster settings for single node workers. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting dask_cluster_kwargs", + "output": "dask cluster kwargs refers to Set dask cluster settings for single node workers. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting dask_cluster_kwargs", + "output": "dask cluster kwargs refers to Set dask cluster settings for single node workers.: Set dask cluster settings for single node workers. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable_dask_cluster", + "output": "enable dask cluster refers to Whether to enable dask scheduler DAI server node and dask workers on DAI worker nodes. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable_dask_cluster", + "output": "enable dask cluster refers to Enable dask scheduler and worker on singlenode/multinode setup: Whether to enable dask scheduler DAI server node and dask workers on DAI worker nodes. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable dask cluster", + "output": "enable dask cluster refers to Enable dask scheduler and worker on singlenode/multinode setup: Whether to enable dask scheduler DAI server node and dask workers on DAI worker nodes. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Enable dask scheduler and worker on singlenode/multinode setup: ", + "output": "enable dask cluster refers to Enable dask scheduler and worker on singlenode/multinode setup: Whether to enable dask scheduler DAI server node and dask workers on DAI worker nodes. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting enable_dask_cluster", + "output": "enable dask cluster refers to Whether to enable dask scheduler DAI server node and dask workers on DAI worker nodes. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting enable_dask_cluster", + "output": "enable dask cluster refers to Enable dask scheduler and worker on singlenode/multinode setup: Whether to enable dask scheduler DAI server node and dask workers on DAI worker nodes. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "start_dask_worker", + "output": "start dask worker refers to Whether to start dask workers on this multinode worker. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "start_dask_worker", + "output": "start dask worker refers to Start dask workers for given multinode worker: Whether to start dask workers on this multinode worker. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "start dask worker", + "output": "start dask worker refers to Start dask workers for given multinode worker: Whether to start dask workers on this multinode worker. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Start dask workers for given multinode worker: ", + "output": "start dask worker refers to Start dask workers for given multinode worker: Whether to start dask workers on this multinode worker. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting start_dask_worker", + "output": "start dask worker refers to Whether to start dask workers on this multinode worker. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting start_dask_worker", + "output": "start dask worker refers to Start dask workers for given multinode worker: Whether to start dask workers on this multinode worker. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "dask_scheduler_env", + "output": "dask scheduler env refers to Set dask scheduler env. See https://docs.dask.org/en/latest/setup/cli.html " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "dask_scheduler_env", + "output": "dask scheduler env refers to Set dask scheduler env.: Set dask scheduler env. See https://docs.dask.org/en/latest/setup/cli.html " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "dask scheduler env", + "output": "dask scheduler env refers to Set dask scheduler env.: Set dask scheduler env. See https://docs.dask.org/en/latest/setup/cli.html " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Set dask scheduler env.: ", + "output": "dask scheduler env refers to Set dask scheduler env.: Set dask scheduler env. See https://docs.dask.org/en/latest/setup/cli.html " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting dask_scheduler_env", + "output": "dask scheduler env refers to Set dask scheduler env. See https://docs.dask.org/en/latest/setup/cli.html " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting dask_scheduler_env", + "output": "dask scheduler env refers to Set dask scheduler env.: Set dask scheduler env. See https://docs.dask.org/en/latest/setup/cli.html " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "dask_cuda_scheduler_env", + "output": "dask cuda scheduler env refers to Set dask scheduler env. See https://docs.dask.org/en/latest/setup/cli.html " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "dask_cuda_scheduler_env", + "output": "dask cuda scheduler env refers to Set dask cuda scheduler env.: Set dask scheduler env. See https://docs.dask.org/en/latest/setup/cli.html " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "dask cuda scheduler env", + "output": "dask cuda scheduler env refers to Set dask cuda scheduler env.: Set dask scheduler env. See https://docs.dask.org/en/latest/setup/cli.html " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Set dask cuda scheduler env.: ", + "output": "dask cuda scheduler env refers to Set dask cuda scheduler env.: Set dask scheduler env. See https://docs.dask.org/en/latest/setup/cli.html " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting dask_cuda_scheduler_env", + "output": "dask cuda scheduler env refers to Set dask scheduler env. See https://docs.dask.org/en/latest/setup/cli.html " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting dask_cuda_scheduler_env", + "output": "dask cuda scheduler env refers to Set dask cuda scheduler env.: Set dask scheduler env. See https://docs.dask.org/en/latest/setup/cli.html " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "dask_scheduler_options", + "output": "dask scheduler options refers to Set dask scheduler options. See https://docs.dask.org/en/latest/setup/cli.html " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "dask_scheduler_options", + "output": "dask scheduler options refers to Set dask scheduler command-line options.: Set dask scheduler options. See https://docs.dask.org/en/latest/setup/cli.html " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "dask scheduler options", + "output": "dask scheduler options refers to Set dask scheduler command-line options.: Set dask scheduler options. See https://docs.dask.org/en/latest/setup/cli.html " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Set dask scheduler command-line options.: ", + "output": "dask scheduler options refers to Set dask scheduler command-line options.: Set dask scheduler options. See https://docs.dask.org/en/latest/setup/cli.html " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting dask_scheduler_options", + "output": "dask scheduler options refers to Set dask scheduler options. See https://docs.dask.org/en/latest/setup/cli.html " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting dask_scheduler_options", + "output": "dask scheduler options refers to Set dask scheduler command-line options.: Set dask scheduler options. See https://docs.dask.org/en/latest/setup/cli.html " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "dask_cuda_scheduler_options", + "output": "dask cuda scheduler options refers to Set dask cuda scheduler options. See https://docs.dask.org/en/latest/setup/cli.html " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "dask_cuda_scheduler_options", + "output": "dask cuda scheduler options refers to Set dask cuda scheduler command-line options.: Set dask cuda scheduler options. See https://docs.dask.org/en/latest/setup/cli.html " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "dask cuda scheduler options", + "output": "dask cuda scheduler options refers to Set dask cuda scheduler command-line options.: Set dask cuda scheduler options. See https://docs.dask.org/en/latest/setup/cli.html " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Set dask cuda scheduler command-line options.: ", + "output": "dask cuda scheduler options refers to Set dask cuda scheduler command-line options.: Set dask cuda scheduler options. See https://docs.dask.org/en/latest/setup/cli.html " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting dask_cuda_scheduler_options", + "output": "dask cuda scheduler options refers to Set dask cuda scheduler options. See https://docs.dask.org/en/latest/setup/cli.html " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting dask_cuda_scheduler_options", + "output": "dask cuda scheduler options refers to Set dask cuda scheduler command-line options.: Set dask cuda scheduler options. See https://docs.dask.org/en/latest/setup/cli.html " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "dask_worker_env", + "output": "dask worker env refers to Set dask worker env. See https://docs.dask.org/en/latest/setup/cli.html " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "dask_worker_env", + "output": "dask worker env refers to Set dask worker environment variables. NCCL_SOCKET_IFNAME is automatically set, but can be overridden here.: Set dask worker env. See https://docs.dask.org/en/latest/setup/cli.html " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "dask worker env", + "output": "dask worker env refers to Set dask worker environment variables. NCCL_SOCKET_IFNAME is automatically set, but can be overridden here.: Set dask worker env. See https://docs.dask.org/en/latest/setup/cli.html " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Set dask worker environment variables. NCCL_SOCKET_IFNAME is automatically set, but can be overridden here.: ", + "output": "dask worker env refers to Set dask worker environment variables. NCCL_SOCKET_IFNAME is automatically set, but can be overridden here.: Set dask worker env. See https://docs.dask.org/en/latest/setup/cli.html " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting dask_worker_env", + "output": "dask worker env refers to Set dask worker env. See https://docs.dask.org/en/latest/setup/cli.html " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting dask_worker_env", + "output": "dask worker env refers to Set dask worker environment variables. NCCL_SOCKET_IFNAME is automatically set, but can be overridden here.: Set dask worker env. See https://docs.dask.org/en/latest/setup/cli.html " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "dask_worker_options", + "output": "dask worker options refers to Set dask worker options. See https://docs.dask.org/en/latest/setup/cli.html " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "dask_worker_options", + "output": "dask worker options refers to Set dask worker command-line options.: Set dask worker options. See https://docs.dask.org/en/latest/setup/cli.html " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "dask worker options", + "output": "dask worker options refers to Set dask worker command-line options.: Set dask worker options. See https://docs.dask.org/en/latest/setup/cli.html " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Set dask worker command-line options.: ", + "output": "dask worker options refers to Set dask worker command-line options.: Set dask worker options. See https://docs.dask.org/en/latest/setup/cli.html " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting dask_worker_options", + "output": "dask worker options refers to Set dask worker options. See https://docs.dask.org/en/latest/setup/cli.html " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting dask_worker_options", + "output": "dask worker options refers to Set dask worker command-line options.: Set dask worker options. See https://docs.dask.org/en/latest/setup/cli.html " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "dask_cuda_worker_options", + "output": "dask cuda worker options refers to Set dask cuda worker options. Similar options as dask_cuda_cluster_kwargs. See https://dask-cuda.readthedocs.io/en/latest/ucx.html#launching-scheduler-workers-and-clients-separately \"--rmm-pool-size 1GB\" can be set to give 1GB to RMM for more efficient rapids " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "dask_cuda_worker_options", + "output": "dask cuda worker options refers to Set dask cuda worker options.: Set dask cuda worker options. Similar options as dask_cuda_cluster_kwargs. See https://dask-cuda.readthedocs.io/en/latest/ucx.html#launching-scheduler-workers-and-clients-separately \"--rmm-pool-size 1GB\" can be set to give 1GB to RMM for more efficient rapids " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "dask cuda worker options", + "output": "dask cuda worker options refers to Set dask cuda worker options.: Set dask cuda worker options. Similar options as dask_cuda_cluster_kwargs. See https://dask-cuda.readthedocs.io/en/latest/ucx.html#launching-scheduler-workers-and-clients-separately \"--rmm-pool-size 1GB\" can be set to give 1GB to RMM for more efficient rapids " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Set dask cuda worker options.: ", + "output": "dask cuda worker options refers to Set dask cuda worker options.: Set dask cuda worker options. Similar options as dask_cuda_cluster_kwargs. See https://dask-cuda.readthedocs.io/en/latest/ucx.html#launching-scheduler-workers-and-clients-separately \"--rmm-pool-size 1GB\" can be set to give 1GB to RMM for more efficient rapids " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting dask_cuda_worker_options", + "output": "dask cuda worker options refers to Set dask cuda worker options. Similar options as dask_cuda_cluster_kwargs. See https://dask-cuda.readthedocs.io/en/latest/ucx.html#launching-scheduler-workers-and-clients-separately \"--rmm-pool-size 1GB\" can be set to give 1GB to RMM for more efficient rapids " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting dask_cuda_worker_options", + "output": "dask cuda worker options refers to Set dask cuda worker options.: Set dask cuda worker options. Similar options as dask_cuda_cluster_kwargs. See https://dask-cuda.readthedocs.io/en/latest/ucx.html#launching-scheduler-workers-and-clients-separately \"--rmm-pool-size 1GB\" can be set to give 1GB to RMM for more efficient rapids " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "dask_cuda_worker_env", + "output": "dask cuda worker env refers to Set dask cuda worker env. See: https://dask-cuda.readthedocs.io/en/latest/ucx.html#launching-scheduler-workers-and-clients-separately https://ucx-py.readthedocs.io/en/latest/dask.html " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "dask_cuda_worker_env", + "output": "dask cuda worker env refers to Set dask cuda worker environment variables.: Set dask cuda worker env. See: https://dask-cuda.readthedocs.io/en/latest/ucx.html#launching-scheduler-workers-and-clients-separately https://ucx-py.readthedocs.io/en/latest/dask.html " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "dask cuda worker env", + "output": "dask cuda worker env refers to Set dask cuda worker environment variables.: Set dask cuda worker env. See: https://dask-cuda.readthedocs.io/en/latest/ucx.html#launching-scheduler-workers-and-clients-separately https://ucx-py.readthedocs.io/en/latest/dask.html " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Set dask cuda worker environment variables.: ", + "output": "dask cuda worker env refers to Set dask cuda worker environment variables.: Set dask cuda worker env. See: https://dask-cuda.readthedocs.io/en/latest/ucx.html#launching-scheduler-workers-and-clients-separately https://ucx-py.readthedocs.io/en/latest/dask.html " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting dask_cuda_worker_env", + "output": "dask cuda worker env refers to Set dask cuda worker env. See: https://dask-cuda.readthedocs.io/en/latest/ucx.html#launching-scheduler-workers-and-clients-separately https://ucx-py.readthedocs.io/en/latest/dask.html " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting dask_cuda_worker_env", + "output": "dask cuda worker env refers to Set dask cuda worker environment variables.: Set dask cuda worker env. See: https://dask-cuda.readthedocs.io/en/latest/ucx.html#launching-scheduler-workers-and-clients-separately https://ucx-py.readthedocs.io/en/latest/dask.html " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "dask_protocol", + "output": "dask protocol refers to See https://docs.dask.org/en/latest/setup/cli.html e.g. ucx is optimal, while tcp is most reliable " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "dask_protocol", + "output": "dask protocol refers to Protocol using for dask communications.: See https://docs.dask.org/en/latest/setup/cli.html e.g. ucx is optimal, while tcp is most reliable " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "dask protocol", + "output": "dask protocol refers to Protocol using for dask communications.: See https://docs.dask.org/en/latest/setup/cli.html e.g. ucx is optimal, while tcp is most reliable " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Protocol using for dask communications.: ", + "output": "dask protocol refers to Protocol using for dask communications.: See https://docs.dask.org/en/latest/setup/cli.html e.g. ucx is optimal, while tcp is most reliable " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting dask_protocol", + "output": "dask protocol refers to See https://docs.dask.org/en/latest/setup/cli.html e.g. ucx is optimal, while tcp is most reliable " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting dask_protocol", + "output": "dask protocol refers to Protocol using for dask communications.: See https://docs.dask.org/en/latest/setup/cli.html e.g. ucx is optimal, while tcp is most reliable " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "dask_server_port", + "output": "dask server port refers to See https://docs.dask.org/en/latest/setup/cli.html " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "dask_server_port", + "output": "dask server port refers to Port using by server for dask communications.: See https://docs.dask.org/en/latest/setup/cli.html " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "dask server port", + "output": "dask server port refers to Port using by server for dask communications.: See https://docs.dask.org/en/latest/setup/cli.html " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Port using by server for dask communications.: ", + "output": "dask server port refers to Port using by server for dask communications.: See https://docs.dask.org/en/latest/setup/cli.html " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting dask_server_port", + "output": "dask server port refers to See https://docs.dask.org/en/latest/setup/cli.html " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting dask_server_port", + "output": "dask server port refers to Port using by server for dask communications.: See https://docs.dask.org/en/latest/setup/cli.html " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "dask_dashboard_port", + "output": "dask dashboard port refers to See https://docs.dask.org/en/latest/setup/cli.html " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "dask_dashboard_port", + "output": "dask dashboard port refers to Dask dashboard port for dask diagnostics.: See https://docs.dask.org/en/latest/setup/cli.html " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "dask dashboard port", + "output": "dask dashboard port refers to Dask dashboard port for dask diagnostics.: See https://docs.dask.org/en/latest/setup/cli.html " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Dask dashboard port for dask diagnostics.: ", + "output": "dask dashboard port refers to Dask dashboard port for dask diagnostics.: See https://docs.dask.org/en/latest/setup/cli.html " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting dask_dashboard_port", + "output": "dask dashboard port refers to See https://docs.dask.org/en/latest/setup/cli.html " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting dask_dashboard_port", + "output": "dask dashboard port refers to Dask dashboard port for dask diagnostics.: See https://docs.dask.org/en/latest/setup/cli.html " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "dask_cuda_protocol", + "output": "dask cuda protocol refers to See https://docs.dask.org/en/latest/setup/cli.html e.g. ucx is optimal, while tcp is most reliable " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "dask_cuda_protocol", + "output": "dask cuda protocol refers to Protocol using for dask cuda communications.: See https://docs.dask.org/en/latest/setup/cli.html e.g. ucx is optimal, while tcp is most reliable " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "dask cuda protocol", + "output": "dask cuda protocol refers to Protocol using for dask cuda communications.: See https://docs.dask.org/en/latest/setup/cli.html e.g. ucx is optimal, while tcp is most reliable " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Protocol using for dask cuda communications.: ", + "output": "dask cuda protocol refers to Protocol using for dask cuda communications.: See https://docs.dask.org/en/latest/setup/cli.html e.g. ucx is optimal, while tcp is most reliable " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting dask_cuda_protocol", + "output": "dask cuda protocol refers to See https://docs.dask.org/en/latest/setup/cli.html e.g. ucx is optimal, while tcp is most reliable " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting dask_cuda_protocol", + "output": "dask cuda protocol refers to Protocol using for dask cuda communications.: See https://docs.dask.org/en/latest/setup/cli.html e.g. ucx is optimal, while tcp is most reliable " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "dask_cuda_server_port", + "output": "dask cuda server port refers to See https://docs.dask.org/en/latest/setup/cli.html port + 1 is used for dask dashboard " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "dask_cuda_server_port", + "output": "dask cuda server port refers to Port using by server for dask cuda communications.: See https://docs.dask.org/en/latest/setup/cli.html port + 1 is used for dask dashboard " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "dask cuda server port", + "output": "dask cuda server port refers to Port using by server for dask cuda communications.: See https://docs.dask.org/en/latest/setup/cli.html port + 1 is used for dask dashboard " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Port using by server for dask cuda communications.: ", + "output": "dask cuda server port refers to Port using by server for dask cuda communications.: See https://docs.dask.org/en/latest/setup/cli.html port + 1 is used for dask dashboard " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting dask_cuda_server_port", + "output": "dask cuda server port refers to See https://docs.dask.org/en/latest/setup/cli.html port + 1 is used for dask dashboard " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting dask_cuda_server_port", + "output": "dask cuda server port refers to Port using by server for dask cuda communications.: See https://docs.dask.org/en/latest/setup/cli.html port + 1 is used for dask dashboard " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "dask_cuda_dashboard_port", + "output": "dask cuda dashboard port refers to See https://docs.dask.org/en/latest/setup/cli.html " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "dask_cuda_dashboard_port", + "output": "dask cuda dashboard port refers to Dask dashboard port for dask_cuda diagnostics.: See https://docs.dask.org/en/latest/setup/cli.html " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "dask cuda dashboard port", + "output": "dask cuda dashboard port refers to Dask dashboard port for dask_cuda diagnostics.: See https://docs.dask.org/en/latest/setup/cli.html " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Dask dashboard port for dask_cuda diagnostics.: ", + "output": "dask cuda dashboard port refers to Dask dashboard port for dask_cuda diagnostics.: See https://docs.dask.org/en/latest/setup/cli.html " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting dask_cuda_dashboard_port", + "output": "dask cuda dashboard port refers to See https://docs.dask.org/en/latest/setup/cli.html " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting dask_cuda_dashboard_port", + "output": "dask cuda dashboard port refers to Dask dashboard port for dask_cuda diagnostics.: See https://docs.dask.org/en/latest/setup/cli.html " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "dask_server_ip", + "output": "dask server ip refers to If empty string, auto-detect IP capable of reaching network. Required to be set if using worker_mode=multinode. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "dask_server_ip", + "output": "dask server ip refers to IP address using by server for dask and dask cuda communications.: If empty string, auto-detect IP capable of reaching network. Required to be set if using worker_mode=multinode. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "dask server ip", + "output": "dask server ip refers to IP address using by server for dask and dask cuda communications.: If empty string, auto-detect IP capable of reaching network. Required to be set if using worker_mode=multinode. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "IP address using by server for dask and dask cuda communications.: ", + "output": "dask server ip refers to IP address using by server for dask and dask cuda communications.: If empty string, auto-detect IP capable of reaching network. Required to be set if using worker_mode=multinode. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting dask_server_ip", + "output": "dask server ip refers to If empty string, auto-detect IP capable of reaching network. Required to be set if using worker_mode=multinode. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting dask_server_ip", + "output": "dask server ip refers to IP address using by server for dask and dask cuda communications.: If empty string, auto-detect IP capable of reaching network. Required to be set if using worker_mode=multinode. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "dask_worker_nprocs", + "output": "dask worker nprocs refers to Number of processses per dask (not cuda-GPU) worker. If -1, uses dask default of cpu count + 1 + nprocs. If -2, uses DAI default of total number of physical cores. Recommended for heavy feature engineering. If 1, assumes tasks are mostly multi-threaded and can use entire node per task. Recommended for heavy multinode model training. Only applicable to dask (not dask_cuda) workers " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "dask_worker_nprocs", + "output": "dask worker nprocs refers to Number of processes per dask worker.: Number of processses per dask (not cuda-GPU) worker. If -1, uses dask default of cpu count + 1 + nprocs. If -2, uses DAI default of total number of physical cores. Recommended for heavy feature engineering. If 1, assumes tasks are mostly multi-threaded and can use entire node per task. Recommended for heavy multinode model training. Only applicable to dask (not dask_cuda) workers " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "dask worker nprocs", + "output": "dask worker nprocs refers to Number of processes per dask worker.: Number of processses per dask (not cuda-GPU) worker. If -1, uses dask default of cpu count + 1 + nprocs. If -2, uses DAI default of total number of physical cores. Recommended for heavy feature engineering. If 1, assumes tasks are mostly multi-threaded and can use entire node per task. Recommended for heavy multinode model training. Only applicable to dask (not dask_cuda) workers " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Number of processes per dask worker.: ", + "output": "dask worker nprocs refers to Number of processes per dask worker.: Number of processses per dask (not cuda-GPU) worker. If -1, uses dask default of cpu count + 1 + nprocs. If -2, uses DAI default of total number of physical cores. Recommended for heavy feature engineering. If 1, assumes tasks are mostly multi-threaded and can use entire node per task. Recommended for heavy multinode model training. Only applicable to dask (not dask_cuda) workers " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting dask_worker_nprocs", + "output": "dask worker nprocs refers to Number of processses per dask (not cuda-GPU) worker. If -1, uses dask default of cpu count + 1 + nprocs. If -2, uses DAI default of total number of physical cores. Recommended for heavy feature engineering. If 1, assumes tasks are mostly multi-threaded and can use entire node per task. Recommended for heavy multinode model training. Only applicable to dask (not dask_cuda) workers " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting dask_worker_nprocs", + "output": "dask worker nprocs refers to Number of processes per dask worker.: Number of processses per dask (not cuda-GPU) worker. If -1, uses dask default of cpu count + 1 + nprocs. If -2, uses DAI default of total number of physical cores. Recommended for heavy feature engineering. If 1, assumes tasks are mostly multi-threaded and can use entire node per task. Recommended for heavy multinode model training. Only applicable to dask (not dask_cuda) workers " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "dask_worker_nthreads", + "output": "dask worker nthreads refers to Number of threads per process for dask workers" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "dask_worker_nthreads", + "output": "dask worker nthreads refers to Number of threads per process for dask.: Number of threads per process for dask workers" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "dask worker nthreads", + "output": "dask worker nthreads refers to Number of threads per process for dask.: Number of threads per process for dask workers" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Number of threads per process for dask.: ", + "output": "dask worker nthreads refers to Number of threads per process for dask.: Number of threads per process for dask workers" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting dask_worker_nthreads", + "output": "dask worker nthreads refers to Number of threads per process for dask workers" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting dask_worker_nthreads", + "output": "dask worker nthreads refers to Number of threads per process for dask.: Number of threads per process for dask workers" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "dask_cuda_worker_nthreads", + "output": "dask cuda worker nthreads refers to Number of threads per process for dask_cuda workers If -2, uses DAI default of physical cores per GPU, since must have 1 worker/GPU only. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "dask_cuda_worker_nthreads", + "output": "dask cuda worker nthreads refers to Number of threads per process for dask_cuda.: Number of threads per process for dask_cuda workers If -2, uses DAI default of physical cores per GPU, since must have 1 worker/GPU only. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "dask cuda worker nthreads", + "output": "dask cuda worker nthreads refers to Number of threads per process for dask_cuda.: Number of threads per process for dask_cuda workers If -2, uses DAI default of physical cores per GPU, since must have 1 worker/GPU only. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Number of threads per process for dask_cuda.: ", + "output": "dask cuda worker nthreads refers to Number of threads per process for dask_cuda.: Number of threads per process for dask_cuda workers If -2, uses DAI default of physical cores per GPU, since must have 1 worker/GPU only. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting dask_cuda_worker_nthreads", + "output": "dask cuda worker nthreads refers to Number of threads per process for dask_cuda workers If -2, uses DAI default of physical cores per GPU, since must have 1 worker/GPU only. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting dask_cuda_worker_nthreads", + "output": "dask cuda worker nthreads refers to Number of threads per process for dask_cuda.: Number of threads per process for dask_cuda workers If -2, uses DAI default of physical cores per GPU, since must have 1 worker/GPU only. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "lightgbm_listen_port", + "output": "lightgbm listen port refers to See https://github.com/dask/dask-lightgbm " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "lightgbm_listen_port", + "output": "lightgbm listen port refers to LightGBM local listen port when using dask with lightgbm: See https://github.com/dask/dask-lightgbm " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "lightgbm listen port", + "output": "lightgbm listen port refers to LightGBM local listen port when using dask with lightgbm: See https://github.com/dask/dask-lightgbm " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "LightGBM local listen port when using dask with lightgbm: ", + "output": "lightgbm listen port refers to LightGBM local listen port when using dask with lightgbm: See https://github.com/dask/dask-lightgbm " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting lightgbm_listen_port", + "output": "lightgbm listen port refers to See https://github.com/dask/dask-lightgbm " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting lightgbm_listen_port", + "output": "lightgbm listen port refers to LightGBM local listen port when using dask with lightgbm: See https://github.com/dask/dask-lightgbm " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable_jupyter_server", + "output": "enable jupyter server refers to Whether to enable jupyter server" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable_jupyter_server", + "output": "enable jupyter server refers to Whether to enable jupyter server" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable jupyter server", + "output": "enable jupyter server refers to Whether to enable jupyter server" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "enable jupyter server refers to Whether to enable jupyter server" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting enable_jupyter_server", + "output": "enable jupyter server refers to Whether to enable jupyter server" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting enable_jupyter_server", + "output": "enable jupyter server refers to Whether to enable jupyter server" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "jupyter_server_port", + "output": "jupyter server port refers to Port for jupyter server" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "jupyter_server_port", + "output": "jupyter server port refers to Port for jupyter server" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "jupyter server port", + "output": "jupyter server port refers to Port for jupyter server" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "jupyter server port refers to Port for jupyter server" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting jupyter_server_port", + "output": "jupyter server port refers to Port for jupyter server" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting jupyter_server_port", + "output": "jupyter server port refers to Port for jupyter server" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable_jupyter_server_browser", + "output": "enable jupyter server browser refers to Whether to enable jupyter server browser" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable_jupyter_server_browser", + "output": "enable jupyter server browser refers to Whether to enable jupyter server browser" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable jupyter server browser", + "output": "enable jupyter server browser refers to Whether to enable jupyter server browser" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "enable jupyter server browser refers to Whether to enable jupyter server browser" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting enable_jupyter_server_browser", + "output": "enable jupyter server browser refers to Whether to enable jupyter server browser" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting enable_jupyter_server_browser", + "output": "enable jupyter server browser refers to Whether to enable jupyter server browser" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable_jupyter_server_browser_root", + "output": "enable jupyter server browser root refers to Whether to root access to jupyter server browser" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable_jupyter_server_browser_root", + "output": "enable jupyter server browser root refers to Whether to root access to jupyter server browser" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable jupyter server browser root", + "output": "enable jupyter server browser root refers to Whether to root access to jupyter server browser" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "enable jupyter server browser root refers to Whether to root access to jupyter server browser" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting enable_jupyter_server_browser_root", + "output": "enable jupyter server browser root refers to Whether to root access to jupyter server browser" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting enable_jupyter_server_browser_root", + "output": "enable jupyter server browser root refers to Whether to root access to jupyter server browser" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable_triton_server_local", + "output": "enable triton server local refers to Whether to enable built-in Triton inference server. If false, can still connect to remote Triton inference server by setting triton_host. If true, will start built-in Triton inference server." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable_triton_server_local", + "output": "enable triton server local refers to Whether to enable built-in Triton inference server. If false, can still connect to remote Triton inference server by setting triton_host. If true, will start built-in Triton inference server." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable triton server local", + "output": "enable triton server local refers to Whether to enable built-in Triton inference server. If false, can still connect to remote Triton inference server by setting triton_host. If true, will start built-in Triton inference server." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "enable triton server local refers to Whether to enable built-in Triton inference server. If false, can still connect to remote Triton inference server by setting triton_host. If true, will start built-in Triton inference server." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting enable_triton_server_local", + "output": "enable triton server local refers to Whether to enable built-in Triton inference server. If false, can still connect to remote Triton inference server by setting triton_host. If true, will start built-in Triton inference server." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting enable_triton_server_local", + "output": "enable triton server local refers to Whether to enable built-in Triton inference server. If false, can still connect to remote Triton inference server by setting triton_host. If true, will start built-in Triton inference server." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "triton_host_local", + "output": "triton host local refers to Hostname (or IP address) of built-in Triton inference service, to be used when auto_deploy_triton_scoring_pipeline and make_triton_scoring_pipeline are not disabled. Only needed if enable_triton_server_local is disabled. Required to be set for some systems, like AWS, for networking packages to reach the server. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "triton_host_local", + "output": "triton host local refers to Hostname of built-in Triton inference server.: Hostname (or IP address) of built-in Triton inference service, to be used when auto_deploy_triton_scoring_pipeline and make_triton_scoring_pipeline are not disabled. Only needed if enable_triton_server_local is disabled. Required to be set for some systems, like AWS, for networking packages to reach the server. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "triton host local", + "output": "triton host local refers to Hostname of built-in Triton inference server.: Hostname (or IP address) of built-in Triton inference service, to be used when auto_deploy_triton_scoring_pipeline and make_triton_scoring_pipeline are not disabled. Only needed if enable_triton_server_local is disabled. Required to be set for some systems, like AWS, for networking packages to reach the server. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Hostname of built-in Triton inference server.: ", + "output": "triton host local refers to Hostname of built-in Triton inference server.: Hostname (or IP address) of built-in Triton inference service, to be used when auto_deploy_triton_scoring_pipeline and make_triton_scoring_pipeline are not disabled. Only needed if enable_triton_server_local is disabled. Required to be set for some systems, like AWS, for networking packages to reach the server. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting triton_host_local", + "output": "triton host local refers to Hostname (or IP address) of built-in Triton inference service, to be used when auto_deploy_triton_scoring_pipeline and make_triton_scoring_pipeline are not disabled. Only needed if enable_triton_server_local is disabled. Required to be set for some systems, like AWS, for networking packages to reach the server. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting triton_host_local", + "output": "triton host local refers to Hostname of built-in Triton inference server.: Hostname (or IP address) of built-in Triton inference service, to be used when auto_deploy_triton_scoring_pipeline and make_triton_scoring_pipeline are not disabled. Only needed if enable_triton_server_local is disabled. Required to be set for some systems, like AWS, for networking packages to reach the server. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "triton_server_params_local", + "output": "triton server params local refers to Set Triton server command line arguments passed with --key=value." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "triton_server_params_local", + "output": "triton server params local refers to Built-in Triton server command line arguments.: Set Triton server command line arguments passed with --key=value." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "triton server params local", + "output": "triton server params local refers to Built-in Triton server command line arguments.: Set Triton server command line arguments passed with --key=value." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Built-in Triton server command line arguments.: ", + "output": "triton server params local refers to Built-in Triton server command line arguments.: Set Triton server command line arguments passed with --key=value." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting triton_server_params_local", + "output": "triton server params local refers to Set Triton server command line arguments passed with --key=value." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting triton_server_params_local", + "output": "triton server params local refers to Built-in Triton server command line arguments.: Set Triton server command line arguments passed with --key=value." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "triton_model_repository_dir_local", + "output": "triton model repository dir local refers to Path to model repository (relative to data_directory) for local Triton inference server built-in to Driverless AI. All Triton deployments for all users are stored in this directory." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "triton_model_repository_dir_local", + "output": "triton model repository dir local refers to Path to Triton model repository.: Path to model repository (relative to data_directory) for local Triton inference server built-in to Driverless AI. All Triton deployments for all users are stored in this directory." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "triton model repository dir local", + "output": "triton model repository dir local refers to Path to Triton model repository.: Path to model repository (relative to data_directory) for local Triton inference server built-in to Driverless AI. All Triton deployments for all users are stored in this directory." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Path to Triton model repository.: ", + "output": "triton model repository dir local refers to Path to Triton model repository.: Path to model repository (relative to data_directory) for local Triton inference server built-in to Driverless AI. All Triton deployments for all users are stored in this directory." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting triton_model_repository_dir_local", + "output": "triton model repository dir local refers to Path to model repository (relative to data_directory) for local Triton inference server built-in to Driverless AI. All Triton deployments for all users are stored in this directory." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting triton_model_repository_dir_local", + "output": "triton model repository dir local refers to Path to Triton model repository.: Path to model repository (relative to data_directory) for local Triton inference server built-in to Driverless AI. All Triton deployments for all users are stored in this directory." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "triton_server_core_chunk_size_local", + "output": "triton server core chunk size local refers to Number of cores to specify as resource, so that C++ MOJO can use its own multi-threaded parallel row batching to save memory and increase performance. A value of 1 is most portable across any Triton server, and is the most efficient use of resources for small (e.g. 1) batch sizes, while 4 is reasonable default assuming requests are batched." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "triton_server_core_chunk_size_local", + "output": "triton server core chunk size local refers to Number of cores to use for each model.: Number of cores to specify as resource, so that C++ MOJO can use its own multi-threaded parallel row batching to save memory and increase performance. A value of 1 is most portable across any Triton server, and is the most efficient use of resources for small (e.g. 1) batch sizes, while 4 is reasonable default assuming requests are batched." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "triton server core chunk size local", + "output": "triton server core chunk size local refers to Number of cores to use for each model.: Number of cores to specify as resource, so that C++ MOJO can use its own multi-threaded parallel row batching to save memory and increase performance. A value of 1 is most portable across any Triton server, and is the most efficient use of resources for small (e.g. 1) batch sizes, while 4 is reasonable default assuming requests are batched." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Number of cores to use for each model.: ", + "output": "triton server core chunk size local refers to Number of cores to use for each model.: Number of cores to specify as resource, so that C++ MOJO can use its own multi-threaded parallel row batching to save memory and increase performance. A value of 1 is most portable across any Triton server, and is the most efficient use of resources for small (e.g. 1) batch sizes, while 4 is reasonable default assuming requests are batched." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting triton_server_core_chunk_size_local", + "output": "triton server core chunk size local refers to Number of cores to specify as resource, so that C++ MOJO can use its own multi-threaded parallel row batching to save memory and increase performance. A value of 1 is most portable across any Triton server, and is the most efficient use of resources for small (e.g. 1) batch sizes, while 4 is reasonable default assuming requests are batched." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting triton_server_core_chunk_size_local", + "output": "triton server core chunk size local refers to Number of cores to use for each model.: Number of cores to specify as resource, so that C++ MOJO can use its own multi-threaded parallel row batching to save memory and increase performance. A value of 1 is most portable across any Triton server, and is the most efficient use of resources for small (e.g. 1) batch sizes, while 4 is reasonable default assuming requests are batched." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "triton_host_remote", + "output": "triton host remote refers to Hostname (or IP address) of remote Triton inference service (outside of DAI), to be used when auto_deploy_triton_scoring_pipeline and make_triton_scoring_pipeline are not disabled. If set, check triton_model_repository_dir_remote and triton_server_params_remote as well. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "triton_host_remote", + "output": "triton host remote refers to Hostname of remote Triton inference server.: Hostname (or IP address) of remote Triton inference service (outside of DAI), to be used when auto_deploy_triton_scoring_pipeline and make_triton_scoring_pipeline are not disabled. If set, check triton_model_repository_dir_remote and triton_server_params_remote as well. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "triton host remote", + "output": "triton host remote refers to Hostname of remote Triton inference server.: Hostname (or IP address) of remote Triton inference service (outside of DAI), to be used when auto_deploy_triton_scoring_pipeline and make_triton_scoring_pipeline are not disabled. If set, check triton_model_repository_dir_remote and triton_server_params_remote as well. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Hostname of remote Triton inference server.: ", + "output": "triton host remote refers to Hostname of remote Triton inference server.: Hostname (or IP address) of remote Triton inference service (outside of DAI), to be used when auto_deploy_triton_scoring_pipeline and make_triton_scoring_pipeline are not disabled. If set, check triton_model_repository_dir_remote and triton_server_params_remote as well. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting triton_host_remote", + "output": "triton host remote refers to Hostname (or IP address) of remote Triton inference service (outside of DAI), to be used when auto_deploy_triton_scoring_pipeline and make_triton_scoring_pipeline are not disabled. If set, check triton_model_repository_dir_remote and triton_server_params_remote as well. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting triton_host_remote", + "output": "triton host remote refers to Hostname of remote Triton inference server.: Hostname (or IP address) of remote Triton inference service (outside of DAI), to be used when auto_deploy_triton_scoring_pipeline and make_triton_scoring_pipeline are not disabled. If set, check triton_model_repository_dir_remote and triton_server_params_remote as well. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "triton_model_repository_dir_remote", + "output": "triton model repository dir remote refers to Path to model repository directory for remote Triton inference server outside of Driverless AI. All Triton deployments for all users are stored in this directory. Requires write access to this directory from Driverless AI (shared file system). This setting is optional. If not provided, will upload each model deployment over gRPC protocol." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "triton_model_repository_dir_remote", + "output": "triton model repository dir remote refers to Path to model repository directory for remote Triton inference server outside of Driverless AI. All Triton deployments for all users are stored in this directory. Requires write access to this directory from Driverless AI (shared file system). This setting is optional. If not provided, will upload each model deployment over gRPC protocol." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "triton model repository dir remote", + "output": "triton model repository dir remote refers to Path to model repository directory for remote Triton inference server outside of Driverless AI. All Triton deployments for all users are stored in this directory. Requires write access to this directory from Driverless AI (shared file system). This setting is optional. If not provided, will upload each model deployment over gRPC protocol." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "triton model repository dir remote refers to Path to model repository directory for remote Triton inference server outside of Driverless AI. All Triton deployments for all users are stored in this directory. Requires write access to this directory from Driverless AI (shared file system). This setting is optional. If not provided, will upload each model deployment over gRPC protocol." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting triton_model_repository_dir_remote", + "output": "triton model repository dir remote refers to Path to model repository directory for remote Triton inference server outside of Driverless AI. All Triton deployments for all users are stored in this directory. Requires write access to this directory from Driverless AI (shared file system). This setting is optional. If not provided, will upload each model deployment over gRPC protocol." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting triton_model_repository_dir_remote", + "output": "triton model repository dir remote refers to Path to model repository directory for remote Triton inference server outside of Driverless AI. All Triton deployments for all users are stored in this directory. Requires write access to this directory from Driverless AI (shared file system). This setting is optional. If not provided, will upload each model deployment over gRPC protocol." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "triton_server_params_remote", + "output": "triton server params remote refers to Parameters to connect to remote Triton server, only used if triton_host_remote and triton_model_repository_dir_remote are set. ." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "triton_server_params_remote", + "output": "triton server params remote refers to Remote Triton server parameters, used to connect via tritonclient: Parameters to connect to remote Triton server, only used if triton_host_remote and triton_model_repository_dir_remote are set. ." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "triton server params remote", + "output": "triton server params remote refers to Remote Triton server parameters, used to connect via tritonclient: Parameters to connect to remote Triton server, only used if triton_host_remote and triton_model_repository_dir_remote are set. ." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Remote Triton server parameters, used to connect via tritonclient: ", + "output": "triton server params remote refers to Remote Triton server parameters, used to connect via tritonclient: Parameters to connect to remote Triton server, only used if triton_host_remote and triton_model_repository_dir_remote are set. ." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting triton_server_params_remote", + "output": "triton server params remote refers to Parameters to connect to remote Triton server, only used if triton_host_remote and triton_model_repository_dir_remote are set. ." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting triton_server_params_remote", + "output": "triton server params remote refers to Remote Triton server parameters, used to connect via tritonclient: Parameters to connect to remote Triton server, only used if triton_host_remote and triton_model_repository_dir_remote are set. ." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "multinode_enable_strict_queue_policy", + "output": "multinode enable strict queue policy refers to When set to true, CPU executors will strictly run just CPU tasks." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "multinode_enable_strict_queue_policy", + "output": "multinode enable strict queue policy refers to When set to true, CPU executors will strictly run just CPU tasks." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "multinode enable strict queue policy", + "output": "multinode enable strict queue policy refers to When set to true, CPU executors will strictly run just CPU tasks." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "multinode enable strict queue policy refers to When set to true, CPU executors will strictly run just CPU tasks." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting multinode_enable_strict_queue_policy", + "output": "multinode enable strict queue policy refers to When set to true, CPU executors will strictly run just CPU tasks." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting multinode_enable_strict_queue_policy", + "output": "multinode enable strict queue policy refers to When set to true, CPU executors will strictly run just CPU tasks." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "multinode_enable_cpu_tasks_on_gpu_machines", + "output": "multinode enable cpu tasks on gpu machines refers to Controls whether CPU tasks can run on GPU machines." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "multinode_enable_cpu_tasks_on_gpu_machines", + "output": "multinode enable cpu tasks on gpu machines refers to Controls whether CPU tasks can run on GPU machines." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "multinode enable cpu tasks on gpu machines", + "output": "multinode enable cpu tasks on gpu machines refers to Controls whether CPU tasks can run on GPU machines." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "multinode enable cpu tasks on gpu machines refers to Controls whether CPU tasks can run on GPU machines." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting multinode_enable_cpu_tasks_on_gpu_machines", + "output": "multinode enable cpu tasks on gpu machines refers to Controls whether CPU tasks can run on GPU machines." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting multinode_enable_cpu_tasks_on_gpu_machines", + "output": "multinode enable cpu tasks on gpu machines refers to Controls whether CPU tasks can run on GPU machines." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "multinode_storage_medium", + "output": "multinode storage medium refers to Storage medium to be used to exchange data between main server and remote worker nodes." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "multinode_storage_medium", + "output": "multinode storage medium refers to Storage medium to be used to exchange data between main server and remote worker nodes." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "multinode storage medium", + "output": "multinode storage medium refers to Storage medium to be used to exchange data between main server and remote worker nodes." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "multinode storage medium refers to Storage medium to be used to exchange data between main server and remote worker nodes." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting multinode_storage_medium", + "output": "multinode storage medium refers to Storage medium to be used to exchange data between main server and remote worker nodes." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting multinode_storage_medium", + "output": "multinode storage medium refers to Storage medium to be used to exchange data between main server and remote worker nodes." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "worker_mode", + "output": "worker mode refers to How the long running tasks are scheduled. multiprocessing: forks the current process immediately. singlenode: shares the task through redis and needs a worker running. multinode: same as singlenode and also shares the data through minio and allows worker to run on the different machine. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "worker_mode", + "output": "worker mode refers to How the long running tasks are scheduled. multiprocessing: forks the current process immediately. singlenode: shares the task through redis and needs a worker running. multinode: same as singlenode and also shares the data through minio and allows worker to run on the different machine. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "worker mode", + "output": "worker mode refers to How the long running tasks are scheduled. multiprocessing: forks the current process immediately. singlenode: shares the task through redis and needs a worker running. multinode: same as singlenode and also shares the data through minio and allows worker to run on the different machine. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "worker mode refers to How the long running tasks are scheduled. multiprocessing: forks the current process immediately. singlenode: shares the task through redis and needs a worker running. multinode: same as singlenode and also shares the data through minio and allows worker to run on the different machine. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting worker_mode", + "output": "worker mode refers to How the long running tasks are scheduled. multiprocessing: forks the current process immediately. singlenode: shares the task through redis and needs a worker running. multinode: same as singlenode and also shares the data through minio and allows worker to run on the different machine. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting worker_mode", + "output": "worker mode refers to How the long running tasks are scheduled. multiprocessing: forks the current process immediately. singlenode: shares the task through redis and needs a worker running. multinode: same as singlenode and also shares the data through minio and allows worker to run on the different machine. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "redis_ip", + "output": "redis ip refers to Redis settings" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "redis_ip", + "output": "redis ip refers to Redis settings" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "redis ip", + "output": "redis ip refers to Redis settings" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "redis ip refers to Redis settings" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting redis_ip", + "output": "redis ip refers to Redis settings" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting redis_ip", + "output": "redis ip refers to Redis settings" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "redis_port", + "output": "redis port refers to Redis settings" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "redis_port", + "output": "redis port refers to Redis settings" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "redis port", + "output": "redis port refers to Redis settings" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "redis port refers to Redis settings" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting redis_port", + "output": "redis port refers to Redis settings" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting redis_port", + "output": "redis port refers to Redis settings" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "redis_db", + "output": "redis db refers to Redis database. Each DAI instance running on the redis server should have unique integer." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "redis_db", + "output": "redis db refers to Redis database. Each DAI instance running on the redis server should have unique integer." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "redis db", + "output": "redis db refers to Redis database. Each DAI instance running on the redis server should have unique integer." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "redis db refers to Redis database. Each DAI instance running on the redis server should have unique integer." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting redis_db", + "output": "redis db refers to Redis database. Each DAI instance running on the redis server should have unique integer." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting redis_db", + "output": "redis db refers to Redis database. Each DAI instance running on the redis server should have unique integer." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "main_server_redis_password", + "output": "main server redis password refers to Redis password. Will be randomly generated main server startup, and by default it will show up in config file uncommented.If you are running more than one DriverlessAI instance per system, make sure each and every instance is connected to its own redis queue." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "main_server_redis_password", + "output": "main server redis password refers to Redis password. Will be randomly generated main server startup, and by default it will show up in config file uncommented.If you are running more than one DriverlessAI instance per system, make sure each and every instance is connected to its own redis queue." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "main server redis password", + "output": "main server redis password refers to Redis password. Will be randomly generated main server startup, and by default it will show up in config file uncommented.If you are running more than one DriverlessAI instance per system, make sure each and every instance is connected to its own redis queue." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "main server redis password refers to Redis password. Will be randomly generated main server startup, and by default it will show up in config file uncommented.If you are running more than one DriverlessAI instance per system, make sure each and every instance is connected to its own redis queue." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting main_server_redis_password", + "output": "main server redis password refers to Redis password. Will be randomly generated main server startup, and by default it will show up in config file uncommented.If you are running more than one DriverlessAI instance per system, make sure each and every instance is connected to its own redis queue." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting main_server_redis_password", + "output": "main server redis password refers to Redis password. Will be randomly generated main server startup, and by default it will show up in config file uncommented.If you are running more than one DriverlessAI instance per system, make sure each and every instance is connected to its own redis queue." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "redis_encrypt_config", + "output": "redis encrypt config refers to If set to true, the config will get encrypted before it gets saved into the Redis database." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "redis_encrypt_config", + "output": "redis encrypt config refers to If set to true, the config will get encrypted before it gets saved into the Redis database." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "redis encrypt config", + "output": "redis encrypt config refers to If set to true, the config will get encrypted before it gets saved into the Redis database." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "redis encrypt config refers to If set to true, the config will get encrypted before it gets saved into the Redis database." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting redis_encrypt_config", + "output": "redis encrypt config refers to If set to true, the config will get encrypted before it gets saved into the Redis database." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting redis_encrypt_config", + "output": "redis encrypt config refers to If set to true, the config will get encrypted before it gets saved into the Redis database." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "local_minio_port", + "output": "local minio port refers to The port that Minio will listen on, this only takes effect if the current system is a multinode main server." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "local_minio_port", + "output": "local minio port refers to The port that Minio will listen on, this only takes effect if the current system is a multinode main server." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "local minio port", + "output": "local minio port refers to The port that Minio will listen on, this only takes effect if the current system is a multinode main server." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "local minio port refers to The port that Minio will listen on, this only takes effect if the current system is a multinode main server." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting local_minio_port", + "output": "local minio port refers to The port that Minio will listen on, this only takes effect if the current system is a multinode main server." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting local_minio_port", + "output": "local minio port refers to The port that Minio will listen on, this only takes effect if the current system is a multinode main server." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "main_server_minio_address", + "output": "main server minio address refers to Location of main server's minio server." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "main_server_minio_address", + "output": "main server minio address refers to Location of main server's minio server." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "main server minio address", + "output": "main server minio address refers to Location of main server's minio server." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "main server minio address refers to Location of main server's minio server." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting main_server_minio_address", + "output": "main server minio address refers to Location of main server's minio server." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting main_server_minio_address", + "output": "main server minio address refers to Location of main server's minio server." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "main_server_minio_access_key_id", + "output": "main server minio access key id refers to Access key of main server's minio server." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "main_server_minio_access_key_id", + "output": "main server minio access key id refers to Access key of main server's minio server." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "main server minio access key id", + "output": "main server minio access key id refers to Access key of main server's minio server." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "main server minio access key id refers to Access key of main server's minio server." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting main_server_minio_access_key_id", + "output": "main server minio access key id refers to Access key of main server's minio server." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting main_server_minio_access_key_id", + "output": "main server minio access key id refers to Access key of main server's minio server." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "main_server_minio_secret_access_key", + "output": "main server minio secret access key refers to Secret access key of main server's minio server." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "main_server_minio_secret_access_key", + "output": "main server minio secret access key refers to Secret access key of main server's minio server." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "main server minio secret access key", + "output": "main server minio secret access key refers to Secret access key of main server's minio server." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "main server minio secret access key refers to Secret access key of main server's minio server." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting main_server_minio_secret_access_key", + "output": "main server minio secret access key refers to Secret access key of main server's minio server." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting main_server_minio_secret_access_key", + "output": "main server minio secret access key refers to Secret access key of main server's minio server." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "main_server_minio_bucket", + "output": "main server minio bucket refers to Name of minio bucket used for file synchronization." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "main_server_minio_bucket", + "output": "main server minio bucket refers to Name of minio bucket used for file synchronization." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "main server minio bucket", + "output": "main server minio bucket refers to Name of minio bucket used for file synchronization." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "main server minio bucket refers to Name of minio bucket used for file synchronization." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting main_server_minio_bucket", + "output": "main server minio bucket refers to Name of minio bucket used for file synchronization." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting main_server_minio_bucket", + "output": "main server minio bucket refers to Name of minio bucket used for file synchronization." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "main_server_s3_access_key_id", + "output": "main server s3 access key id refers to S3 global access key." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "main_server_s3_access_key_id", + "output": "main server s3 access key id refers to S3 global access key." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "main server s3 access key id", + "output": "main server s3 access key id refers to S3 global access key." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "main server s3 access key id refers to S3 global access key." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting main_server_s3_access_key_id", + "output": "main server s3 access key id refers to S3 global access key." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting main_server_s3_access_key_id", + "output": "main server s3 access key id refers to S3 global access key." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "main_server_s3_secret_access_key", + "output": "main server s3 secret access key refers to S3 global secret access key" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "main_server_s3_secret_access_key", + "output": "main server s3 secret access key refers to S3 global secret access key" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "main server s3 secret access key", + "output": "main server s3 secret access key refers to S3 global secret access key" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "main server s3 secret access key refers to S3 global secret access key" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting main_server_s3_secret_access_key", + "output": "main server s3 secret access key refers to S3 global secret access key" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting main_server_s3_secret_access_key", + "output": "main server s3 secret access key refers to S3 global secret access key" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "main_server_s3_bucket", + "output": "main server s3 bucket refers to S3 bucket." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "main_server_s3_bucket", + "output": "main server s3 bucket refers to S3 bucket." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "main server s3 bucket", + "output": "main server s3 bucket refers to S3 bucket." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "main server s3 bucket refers to S3 bucket." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting main_server_s3_bucket", + "output": "main server s3 bucket refers to S3 bucket." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting main_server_s3_bucket", + "output": "main server s3 bucket refers to S3 bucket." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "worker_local_processors", + "output": "worker local processors refers to Maximum number of local tasks processed at once, limited to no more than total number of physical (not virtual) cores divided by two (minimum of 1)." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "worker_local_processors", + "output": "worker local processors refers to Maximum number of local tasks processed at once, limited to no more than total number of physical (not virtual) cores divided by two (minimum of 1)." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "worker local processors", + "output": "worker local processors refers to Maximum number of local tasks processed at once, limited to no more than total number of physical (not virtual) cores divided by two (minimum of 1)." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "worker local processors refers to Maximum number of local tasks processed at once, limited to no more than total number of physical (not virtual) cores divided by two (minimum of 1)." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting worker_local_processors", + "output": "worker local processors refers to Maximum number of local tasks processed at once, limited to no more than total number of physical (not virtual) cores divided by two (minimum of 1)." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting worker_local_processors", + "output": "worker local processors refers to Maximum number of local tasks processed at once, limited to no more than total number of physical (not virtual) cores divided by two (minimum of 1)." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "worker_priority_queues_processors", + "output": "worker priority queues processors refers to A concurrency limit for the 3 priority queues, only enabled when worker_remote_processors is greater than 0." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "worker_priority_queues_processors", + "output": "worker priority queues processors refers to A concurrency limit for the 3 priority queues, only enabled when worker_remote_processors is greater than 0." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "worker priority queues processors", + "output": "worker priority queues processors refers to A concurrency limit for the 3 priority queues, only enabled when worker_remote_processors is greater than 0." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "worker priority queues processors refers to A concurrency limit for the 3 priority queues, only enabled when worker_remote_processors is greater than 0." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting worker_priority_queues_processors", + "output": "worker priority queues processors refers to A concurrency limit for the 3 priority queues, only enabled when worker_remote_processors is greater than 0." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting worker_priority_queues_processors", + "output": "worker priority queues processors refers to A concurrency limit for the 3 priority queues, only enabled when worker_remote_processors is greater than 0." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "worker_priority_queues_time_check", + "output": "worker priority queues time check refers to A timeout before which a scheduled task is bumped up in priority" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "worker_priority_queues_time_check", + "output": "worker priority queues time check refers to A timeout before which a scheduled task is bumped up in priority" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "worker priority queues time check", + "output": "worker priority queues time check refers to A timeout before which a scheduled task is bumped up in priority" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "worker priority queues time check refers to A timeout before which a scheduled task is bumped up in priority" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting worker_priority_queues_time_check", + "output": "worker priority queues time check refers to A timeout before which a scheduled task is bumped up in priority" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting worker_priority_queues_time_check", + "output": "worker priority queues time check refers to A timeout before which a scheduled task is bumped up in priority" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "worker_remote_processors", + "output": "worker remote processors refers to Maximum number of remote tasks processed at once, if value is set to -1 the system will automatically pick a reasonable limit depending on the number of available virtual CPU cores." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "worker_remote_processors", + "output": "worker remote processors refers to Maximum number of remote tasks processed at once, if value is set to -1 the system will automatically pick a reasonable limit depending on the number of available virtual CPU cores." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "worker remote processors", + "output": "worker remote processors refers to Maximum number of remote tasks processed at once, if value is set to -1 the system will automatically pick a reasonable limit depending on the number of available virtual CPU cores." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "worker remote processors refers to Maximum number of remote tasks processed at once, if value is set to -1 the system will automatically pick a reasonable limit depending on the number of available virtual CPU cores." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting worker_remote_processors", + "output": "worker remote processors refers to Maximum number of remote tasks processed at once, if value is set to -1 the system will automatically pick a reasonable limit depending on the number of available virtual CPU cores." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting worker_remote_processors", + "output": "worker remote processors refers to Maximum number of remote tasks processed at once, if value is set to -1 the system will automatically pick a reasonable limit depending on the number of available virtual CPU cores." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "worker_remote_processors_max_threads_reduction_factor", + "output": "worker remote processors max threads reduction factor refers to If worker_remote_processors >= 3, factor by which each task reduces threads, used by various packages like datatable, lightgbm, xgboost, etc." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "worker_remote_processors_max_threads_reduction_factor", + "output": "worker remote processors max threads reduction factor refers to If worker_remote_processors >= 3, factor by which each task reduces threads, used by various packages like datatable, lightgbm, xgboost, etc." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "worker remote processors max threads reduction factor", + "output": "worker remote processors max threads reduction factor refers to If worker_remote_processors >= 3, factor by which each task reduces threads, used by various packages like datatable, lightgbm, xgboost, etc." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "worker remote processors max threads reduction factor refers to If worker_remote_processors >= 3, factor by which each task reduces threads, used by various packages like datatable, lightgbm, xgboost, etc." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting worker_remote_processors_max_threads_reduction_factor", + "output": "worker remote processors max threads reduction factor refers to If worker_remote_processors >= 3, factor by which each task reduces threads, used by various packages like datatable, lightgbm, xgboost, etc." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting worker_remote_processors_max_threads_reduction_factor", + "output": "worker remote processors max threads reduction factor refers to If worker_remote_processors >= 3, factor by which each task reduces threads, used by various packages like datatable, lightgbm, xgboost, etc." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "multinode_tmpfs", + "output": "multinode tmpfs refers to Temporary file system location for multinode data transfer. This has to be an absolute path with equivalent configuration on both the main server and remote workers." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "multinode_tmpfs", + "output": "multinode tmpfs refers to Temporary file system location for multinode data transfer. This has to be an absolute path with equivalent configuration on both the main server and remote workers." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "multinode tmpfs", + "output": "multinode tmpfs refers to Temporary file system location for multinode data transfer. This has to be an absolute path with equivalent configuration on both the main server and remote workers." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "multinode tmpfs refers to Temporary file system location for multinode data transfer. This has to be an absolute path with equivalent configuration on both the main server and remote workers." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting multinode_tmpfs", + "output": "multinode tmpfs refers to Temporary file system location for multinode data transfer. This has to be an absolute path with equivalent configuration on both the main server and remote workers." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting multinode_tmpfs", + "output": "multinode tmpfs refers to Temporary file system location for multinode data transfer. This has to be an absolute path with equivalent configuration on both the main server and remote workers." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "multinode_store_datasets_in_tmpfs", + "output": "multinode store datasets in tmpfs refers to When set to true, will use the 'multinode_tmpfs' as datasets store." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "multinode_store_datasets_in_tmpfs", + "output": "multinode store datasets in tmpfs refers to When set to true, will use the 'multinode_tmpfs' as datasets store." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "multinode store datasets in tmpfs", + "output": "multinode store datasets in tmpfs refers to When set to true, will use the 'multinode_tmpfs' as datasets store." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "multinode store datasets in tmpfs refers to When set to true, will use the 'multinode_tmpfs' as datasets store." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting multinode_store_datasets_in_tmpfs", + "output": "multinode store datasets in tmpfs refers to When set to true, will use the 'multinode_tmpfs' as datasets store." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting multinode_store_datasets_in_tmpfs", + "output": "multinode store datasets in tmpfs refers to When set to true, will use the 'multinode_tmpfs' as datasets store." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "redis_result_queue_polling_interval", + "output": "redis result queue polling interval refers to How often the server should extract results from redis queue in milliseconds." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "redis_result_queue_polling_interval", + "output": "redis result queue polling interval refers to How often the server should extract results from redis queue in milliseconds." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "redis result queue polling interval", + "output": "redis result queue polling interval refers to How often the server should extract results from redis queue in milliseconds." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "redis result queue polling interval refers to How often the server should extract results from redis queue in milliseconds." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting redis_result_queue_polling_interval", + "output": "redis result queue polling interval refers to How often the server should extract results from redis queue in milliseconds." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting redis_result_queue_polling_interval", + "output": "redis result queue polling interval refers to How often the server should extract results from redis queue in milliseconds." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "worker_sleep", + "output": "worker sleep refers to Sleep time for worker loop." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "worker_sleep", + "output": "worker sleep refers to Sleep time for worker loop." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "worker sleep", + "output": "worker sleep refers to Sleep time for worker loop." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "worker sleep refers to Sleep time for worker loop." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting worker_sleep", + "output": "worker sleep refers to Sleep time for worker loop." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting worker_sleep", + "output": "worker sleep refers to Sleep time for worker loop." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "main_server_minio_bucket_ping_timeout", + "output": "main server minio bucket ping timeout refers to For how many seconds worker should wait for main server minio bucket before it fails" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "main_server_minio_bucket_ping_timeout", + "output": "main server minio bucket ping timeout refers to For how many seconds worker should wait for main server minio bucket before it fails" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "main server minio bucket ping timeout", + "output": "main server minio bucket ping timeout refers to For how many seconds worker should wait for main server minio bucket before it fails" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "main server minio bucket ping timeout refers to For how many seconds worker should wait for main server minio bucket before it fails" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting main_server_minio_bucket_ping_timeout", + "output": "main server minio bucket ping timeout refers to For how many seconds worker should wait for main server minio bucket before it fails" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting main_server_minio_bucket_ping_timeout", + "output": "main server minio bucket ping timeout refers to For how many seconds worker should wait for main server minio bucket before it fails" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "worker_start_timeout", + "output": "worker start timeout refers to How long the worker should wait on redis db initialization in seconds." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "worker_start_timeout", + "output": "worker start timeout refers to How long the worker should wait on redis db initialization in seconds." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "worker start timeout", + "output": "worker start timeout refers to How long the worker should wait on redis db initialization in seconds." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "worker start timeout refers to How long the worker should wait on redis db initialization in seconds." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting worker_start_timeout", + "output": "worker start timeout refers to How long the worker should wait on redis db initialization in seconds." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting worker_start_timeout", + "output": "worker start timeout refers to How long the worker should wait on redis db initialization in seconds." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "worker_healthy_response_period", + "output": "worker healthy response period refers to For how many seconds the worker shouldn't respond to be marked unhealthy." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "worker_healthy_response_period", + "output": "worker healthy response period refers to For how many seconds the worker shouldn't respond to be marked unhealthy." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "worker healthy response period", + "output": "worker healthy response period refers to For how many seconds the worker shouldn't respond to be marked unhealthy." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "worker healthy response period refers to For how many seconds the worker shouldn't respond to be marked unhealthy." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting worker_healthy_response_period", + "output": "worker healthy response period refers to For how many seconds the worker shouldn't respond to be marked unhealthy." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting worker_healthy_response_period", + "output": "worker healthy response period refers to For how many seconds the worker shouldn't respond to be marked unhealthy." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "expose_server_version", + "output": "expose server version refers to Exposes the DriverlessAI base version when enabled." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "expose_server_version", + "output": "expose server version refers to Exposes the DriverlessAI base version when enabled." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "expose server version", + "output": "expose server version refers to Exposes the DriverlessAI base version when enabled." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "expose server version refers to Exposes the DriverlessAI base version when enabled." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting expose_server_version", + "output": "expose server version refers to Exposes the DriverlessAI base version when enabled." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting expose_server_version", + "output": "expose server version refers to Exposes the DriverlessAI base version when enabled." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable_https", + "output": "enable https refers to https settings You can make a self-signed certificate for testing with the following commands: sudo openssl req -x509 -newkey rsa:4096 -keyout private_key.pem -out cert.pem -days 3650 -nodes -subj '/O=Driverless AI' sudo chown dai:dai cert.pem private_key.pem sudo chmod 600 cert.pem private_key.pem sudo mv cert.pem private_key.pem /etc/dai" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable_https", + "output": "enable https refers to https settings You can make a self-signed certificate for testing with the following commands: sudo openssl req -x509 -newkey rsa:4096 -keyout private_key.pem -out cert.pem -days 3650 -nodes -subj '/O=Driverless AI' sudo chown dai:dai cert.pem private_key.pem sudo chmod 600 cert.pem private_key.pem sudo mv cert.pem private_key.pem /etc/dai" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable https", + "output": "enable https refers to https settings You can make a self-signed certificate for testing with the following commands: sudo openssl req -x509 -newkey rsa:4096 -keyout private_key.pem -out cert.pem -days 3650 -nodes -subj '/O=Driverless AI' sudo chown dai:dai cert.pem private_key.pem sudo chmod 600 cert.pem private_key.pem sudo mv cert.pem private_key.pem /etc/dai" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "enable https refers to https settings You can make a self-signed certificate for testing with the following commands: sudo openssl req -x509 -newkey rsa:4096 -keyout private_key.pem -out cert.pem -days 3650 -nodes -subj '/O=Driverless AI' sudo chown dai:dai cert.pem private_key.pem sudo chmod 600 cert.pem private_key.pem sudo mv cert.pem private_key.pem /etc/dai" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting enable_https", + "output": "enable https refers to https settings You can make a self-signed certificate for testing with the following commands: sudo openssl req -x509 -newkey rsa:4096 -keyout private_key.pem -out cert.pem -days 3650 -nodes -subj '/O=Driverless AI' sudo chown dai:dai cert.pem private_key.pem sudo chmod 600 cert.pem private_key.pem sudo mv cert.pem private_key.pem /etc/dai" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting enable_https", + "output": "enable https refers to https settings You can make a self-signed certificate for testing with the following commands: sudo openssl req -x509 -newkey rsa:4096 -keyout private_key.pem -out cert.pem -days 3650 -nodes -subj '/O=Driverless AI' sudo chown dai:dai cert.pem private_key.pem sudo chmod 600 cert.pem private_key.pem sudo mv cert.pem private_key.pem /etc/dai" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "ssl_key_file", + "output": "ssl key file refers to https settings You can make a self-signed certificate for testing with the following commands: sudo openssl req -x509 -newkey rsa:4096 -keyout private_key.pem -out cert.pem -days 3650 -nodes -subj '/O=Driverless AI' sudo chown dai:dai cert.pem private_key.pem sudo chmod 600 cert.pem private_key.pem sudo mv cert.pem private_key.pem /etc/dai" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "ssl_key_file", + "output": "ssl key file refers to https settings You can make a self-signed certificate for testing with the following commands: sudo openssl req -x509 -newkey rsa:4096 -keyout private_key.pem -out cert.pem -days 3650 -nodes -subj '/O=Driverless AI' sudo chown dai:dai cert.pem private_key.pem sudo chmod 600 cert.pem private_key.pem sudo mv cert.pem private_key.pem /etc/dai" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "ssl key file", + "output": "ssl key file refers to https settings You can make a self-signed certificate for testing with the following commands: sudo openssl req -x509 -newkey rsa:4096 -keyout private_key.pem -out cert.pem -days 3650 -nodes -subj '/O=Driverless AI' sudo chown dai:dai cert.pem private_key.pem sudo chmod 600 cert.pem private_key.pem sudo mv cert.pem private_key.pem /etc/dai" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "ssl key file refers to https settings You can make a self-signed certificate for testing with the following commands: sudo openssl req -x509 -newkey rsa:4096 -keyout private_key.pem -out cert.pem -days 3650 -nodes -subj '/O=Driverless AI' sudo chown dai:dai cert.pem private_key.pem sudo chmod 600 cert.pem private_key.pem sudo mv cert.pem private_key.pem /etc/dai" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting ssl_key_file", + "output": "ssl key file refers to https settings You can make a self-signed certificate for testing with the following commands: sudo openssl req -x509 -newkey rsa:4096 -keyout private_key.pem -out cert.pem -days 3650 -nodes -subj '/O=Driverless AI' sudo chown dai:dai cert.pem private_key.pem sudo chmod 600 cert.pem private_key.pem sudo mv cert.pem private_key.pem /etc/dai" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting ssl_key_file", + "output": "ssl key file refers to https settings You can make a self-signed certificate for testing with the following commands: sudo openssl req -x509 -newkey rsa:4096 -keyout private_key.pem -out cert.pem -days 3650 -nodes -subj '/O=Driverless AI' sudo chown dai:dai cert.pem private_key.pem sudo chmod 600 cert.pem private_key.pem sudo mv cert.pem private_key.pem /etc/dai" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "ssl_crt_file", + "output": "ssl crt file refers to https settings You can make a self-signed certificate for testing with the following commands: sudo openssl req -x509 -newkey rsa:4096 -keyout private_key.pem -out cert.pem -days 3650 -nodes -subj '/O=Driverless AI' sudo chown dai:dai cert.pem private_key.pem sudo chmod 600 cert.pem private_key.pem sudo mv cert.pem private_key.pem /etc/dai" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "ssl_crt_file", + "output": "ssl crt file refers to https settings You can make a self-signed certificate for testing with the following commands: sudo openssl req -x509 -newkey rsa:4096 -keyout private_key.pem -out cert.pem -days 3650 -nodes -subj '/O=Driverless AI' sudo chown dai:dai cert.pem private_key.pem sudo chmod 600 cert.pem private_key.pem sudo mv cert.pem private_key.pem /etc/dai" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "ssl crt file", + "output": "ssl crt file refers to https settings You can make a self-signed certificate for testing with the following commands: sudo openssl req -x509 -newkey rsa:4096 -keyout private_key.pem -out cert.pem -days 3650 -nodes -subj '/O=Driverless AI' sudo chown dai:dai cert.pem private_key.pem sudo chmod 600 cert.pem private_key.pem sudo mv cert.pem private_key.pem /etc/dai" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "ssl crt file refers to https settings You can make a self-signed certificate for testing with the following commands: sudo openssl req -x509 -newkey rsa:4096 -keyout private_key.pem -out cert.pem -days 3650 -nodes -subj '/O=Driverless AI' sudo chown dai:dai cert.pem private_key.pem sudo chmod 600 cert.pem private_key.pem sudo mv cert.pem private_key.pem /etc/dai" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting ssl_crt_file", + "output": "ssl crt file refers to https settings You can make a self-signed certificate for testing with the following commands: sudo openssl req -x509 -newkey rsa:4096 -keyout private_key.pem -out cert.pem -days 3650 -nodes -subj '/O=Driverless AI' sudo chown dai:dai cert.pem private_key.pem sudo chmod 600 cert.pem private_key.pem sudo mv cert.pem private_key.pem /etc/dai" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting ssl_crt_file", + "output": "ssl crt file refers to https settings You can make a self-signed certificate for testing with the following commands: sudo openssl req -x509 -newkey rsa:4096 -keyout private_key.pem -out cert.pem -days 3650 -nodes -subj '/O=Driverless AI' sudo chown dai:dai cert.pem private_key.pem sudo chmod 600 cert.pem private_key.pem sudo mv cert.pem private_key.pem /etc/dai" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "ssl_no_sslv2", + "output": "ssl no sslv2 refers to SSL TLS" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "ssl_no_sslv2", + "output": "ssl no sslv2 refers to SSL TLS" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "ssl no sslv2", + "output": "ssl no sslv2 refers to SSL TLS" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "ssl no sslv2 refers to SSL TLS" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting ssl_no_sslv2", + "output": "ssl no sslv2 refers to SSL TLS" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting ssl_no_sslv2", + "output": "ssl no sslv2 refers to SSL TLS" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "ssl_no_sslv3", + "output": "ssl no sslv3 refers to SSL TLS" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "ssl_no_sslv3", + "output": "ssl no sslv3 refers to SSL TLS" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "ssl no sslv3", + "output": "ssl no sslv3 refers to SSL TLS" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "ssl no sslv3 refers to SSL TLS" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting ssl_no_sslv3", + "output": "ssl no sslv3 refers to SSL TLS" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting ssl_no_sslv3", + "output": "ssl no sslv3 refers to SSL TLS" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "ssl_no_tlsv1", + "output": "ssl no tlsv1 refers to SSL TLS" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "ssl_no_tlsv1", + "output": "ssl no tlsv1 refers to SSL TLS" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "ssl no tlsv1", + "output": "ssl no tlsv1 refers to SSL TLS" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "ssl no tlsv1 refers to SSL TLS" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting ssl_no_tlsv1", + "output": "ssl no tlsv1 refers to SSL TLS" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting ssl_no_tlsv1", + "output": "ssl no tlsv1 refers to SSL TLS" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "ssl_no_tlsv1_1", + "output": "ssl no tlsv1 1 refers to SSL TLS" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "ssl_no_tlsv1_1", + "output": "ssl no tlsv1 1 refers to SSL TLS" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "ssl no tlsv1 1", + "output": "ssl no tlsv1 1 refers to SSL TLS" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "ssl no tlsv1 1 refers to SSL TLS" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting ssl_no_tlsv1_1", + "output": "ssl no tlsv1 1 refers to SSL TLS" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting ssl_no_tlsv1_1", + "output": "ssl no tlsv1 1 refers to SSL TLS" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "ssl_no_tlsv1_2", + "output": "ssl no tlsv1 2 refers to SSL TLS" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "ssl_no_tlsv1_2", + "output": "ssl no tlsv1 2 refers to SSL TLS" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "ssl no tlsv1 2", + "output": "ssl no tlsv1 2 refers to SSL TLS" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "ssl no tlsv1 2 refers to SSL TLS" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting ssl_no_tlsv1_2", + "output": "ssl no tlsv1 2 refers to SSL TLS" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting ssl_no_tlsv1_2", + "output": "ssl no tlsv1 2 refers to SSL TLS" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "ssl_no_tlsv1_3", + "output": "ssl no tlsv1 3 refers to SSL TLS" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "ssl_no_tlsv1_3", + "output": "ssl no tlsv1 3 refers to SSL TLS" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "ssl no tlsv1 3", + "output": "ssl no tlsv1 3 refers to SSL TLS" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "ssl no tlsv1 3 refers to SSL TLS" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting ssl_no_tlsv1_3", + "output": "ssl no tlsv1 3 refers to SSL TLS" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting ssl_no_tlsv1_3", + "output": "ssl no tlsv1 3 refers to SSL TLS" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "ssl_client_verify_mode", + "output": "ssl client verify mode refers to https settings Sets the client verification mode. CERT_NONE: Client does not need to provide the certificate and if it does any verification errors are ignored. CERT_OPTIONAL: Client does not need to provide the certificate and if it does certificate is verified against set up CA chains. CERT_REQUIRED: Client needs to provide a certificate and certificate is verified. You'll need to set 'ssl_client_key_file' and 'ssl_client_crt_file' When this mode is selected for Driverless to be able to verify it's own callback requests. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "ssl_client_verify_mode", + "output": "ssl client verify mode refers to https settings Sets the client verification mode. CERT_NONE: Client does not need to provide the certificate and if it does any verification errors are ignored. CERT_OPTIONAL: Client does not need to provide the certificate and if it does certificate is verified against set up CA chains. CERT_REQUIRED: Client needs to provide a certificate and certificate is verified. You'll need to set 'ssl_client_key_file' and 'ssl_client_crt_file' When this mode is selected for Driverless to be able to verify it's own callback requests. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "ssl client verify mode", + "output": "ssl client verify mode refers to https settings Sets the client verification mode. CERT_NONE: Client does not need to provide the certificate and if it does any verification errors are ignored. CERT_OPTIONAL: Client does not need to provide the certificate and if it does certificate is verified against set up CA chains. CERT_REQUIRED: Client needs to provide a certificate and certificate is verified. You'll need to set 'ssl_client_key_file' and 'ssl_client_crt_file' When this mode is selected for Driverless to be able to verify it's own callback requests. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "ssl client verify mode refers to https settings Sets the client verification mode. CERT_NONE: Client does not need to provide the certificate and if it does any verification errors are ignored. CERT_OPTIONAL: Client does not need to provide the certificate and if it does certificate is verified against set up CA chains. CERT_REQUIRED: Client needs to provide a certificate and certificate is verified. You'll need to set 'ssl_client_key_file' and 'ssl_client_crt_file' When this mode is selected for Driverless to be able to verify it's own callback requests. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting ssl_client_verify_mode", + "output": "ssl client verify mode refers to https settings Sets the client verification mode. CERT_NONE: Client does not need to provide the certificate and if it does any verification errors are ignored. CERT_OPTIONAL: Client does not need to provide the certificate and if it does certificate is verified against set up CA chains. CERT_REQUIRED: Client needs to provide a certificate and certificate is verified. You'll need to set 'ssl_client_key_file' and 'ssl_client_crt_file' When this mode is selected for Driverless to be able to verify it's own callback requests. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting ssl_client_verify_mode", + "output": "ssl client verify mode refers to https settings Sets the client verification mode. CERT_NONE: Client does not need to provide the certificate and if it does any verification errors are ignored. CERT_OPTIONAL: Client does not need to provide the certificate and if it does certificate is verified against set up CA chains. CERT_REQUIRED: Client needs to provide a certificate and certificate is verified. You'll need to set 'ssl_client_key_file' and 'ssl_client_crt_file' When this mode is selected for Driverless to be able to verify it's own callback requests. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "ssl_ca_file", + "output": "ssl ca file refers to https settings Path to the Certification Authority certificate file. This certificate will be used when to verify client certificate when client authentication is turned on. If this is not set, clients are verified using default system certificates. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "ssl_ca_file", + "output": "ssl ca file refers to https settings Path to the Certification Authority certificate file. This certificate will be used when to verify client certificate when client authentication is turned on. If this is not set, clients are verified using default system certificates. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "ssl ca file", + "output": "ssl ca file refers to https settings Path to the Certification Authority certificate file. This certificate will be used when to verify client certificate when client authentication is turned on. If this is not set, clients are verified using default system certificates. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "ssl ca file refers to https settings Path to the Certification Authority certificate file. This certificate will be used when to verify client certificate when client authentication is turned on. If this is not set, clients are verified using default system certificates. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting ssl_ca_file", + "output": "ssl ca file refers to https settings Path to the Certification Authority certificate file. This certificate will be used when to verify client certificate when client authentication is turned on. If this is not set, clients are verified using default system certificates. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting ssl_ca_file", + "output": "ssl ca file refers to https settings Path to the Certification Authority certificate file. This certificate will be used when to verify client certificate when client authentication is turned on. If this is not set, clients are verified using default system certificates. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "ssl_client_key_file", + "output": "ssl client key file refers to https settings path to the private key that Driverless will use to authenticate itself when CERT_REQUIRED mode is set. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "ssl_client_key_file", + "output": "ssl client key file refers to https settings path to the private key that Driverless will use to authenticate itself when CERT_REQUIRED mode is set. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "ssl client key file", + "output": "ssl client key file refers to https settings path to the private key that Driverless will use to authenticate itself when CERT_REQUIRED mode is set. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "ssl client key file refers to https settings path to the private key that Driverless will use to authenticate itself when CERT_REQUIRED mode is set. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting ssl_client_key_file", + "output": "ssl client key file refers to https settings path to the private key that Driverless will use to authenticate itself when CERT_REQUIRED mode is set. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting ssl_client_key_file", + "output": "ssl client key file refers to https settings path to the private key that Driverless will use to authenticate itself when CERT_REQUIRED mode is set. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "ssl_client_crt_file", + "output": "ssl client crt file refers to https settings path to the client certificate that Driverless will use to authenticate itself when CERT_REQUIRED mode is set. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "ssl_client_crt_file", + "output": "ssl client crt file refers to https settings path to the client certificate that Driverless will use to authenticate itself when CERT_REQUIRED mode is set. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "ssl client crt file", + "output": "ssl client crt file refers to https settings path to the client certificate that Driverless will use to authenticate itself when CERT_REQUIRED mode is set. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "ssl client crt file refers to https settings path to the client certificate that Driverless will use to authenticate itself when CERT_REQUIRED mode is set. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting ssl_client_crt_file", + "output": "ssl client crt file refers to https settings path to the client certificate that Driverless will use to authenticate itself when CERT_REQUIRED mode is set. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting ssl_client_crt_file", + "output": "ssl client crt file refers to https settings path to the client certificate that Driverless will use to authenticate itself when CERT_REQUIRED mode is set. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable_xsrf_protection", + "output": "enable xsrf protection refers to If enabled, webserver will serve xsrf cookies and verify their validity upon every POST request" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable_xsrf_protection", + "output": "enable xsrf protection refers to Enable XSRF Webserver protection: If enabled, webserver will serve xsrf cookies and verify their validity upon every POST request" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable xsrf protection", + "output": "enable xsrf protection refers to Enable XSRF Webserver protection: If enabled, webserver will serve xsrf cookies and verify their validity upon every POST request" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Enable XSRF Webserver protection: ", + "output": "enable xsrf protection refers to Enable XSRF Webserver protection: If enabled, webserver will serve xsrf cookies and verify their validity upon every POST request" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting enable_xsrf_protection", + "output": "enable xsrf protection refers to If enabled, webserver will serve xsrf cookies and verify their validity upon every POST request" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting enable_xsrf_protection", + "output": "enable xsrf protection refers to Enable XSRF Webserver protection: If enabled, webserver will serve xsrf cookies and verify their validity upon every POST request" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable_secure_cookies", + "output": "enable secure cookies refers to Enable secure flag on HTTP cookies: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable_secure_cookies", + "output": "enable secure cookies refers to Enable secure flag on HTTP cookies: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable secure cookies", + "output": "enable secure cookies refers to Enable secure flag on HTTP cookies: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Enable secure flag on HTTP cookies: ", + "output": "enable secure cookies refers to Enable secure flag on HTTP cookies: " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting enable_secure_cookies", + "output": "enable secure cookies refers to Enable secure flag on HTTP cookies: " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting enable_secure_cookies", + "output": "enable secure cookies refers to Enable secure flag on HTTP cookies: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "verify_session_ip", + "output": "verify session ip refers to When enabled each authenticated access will be verified comparing IP address of initiator of session and current request IP" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "verify_session_ip", + "output": "verify session ip refers to When enabled, webserver verifies session and request IP address: When enabled each authenticated access will be verified comparing IP address of initiator of session and current request IP" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "verify session ip", + "output": "verify session ip refers to When enabled, webserver verifies session and request IP address: When enabled each authenticated access will be verified comparing IP address of initiator of session and current request IP" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "When enabled, webserver verifies session and request IP address: ", + "output": "verify session ip refers to When enabled, webserver verifies session and request IP address: When enabled each authenticated access will be verified comparing IP address of initiator of session and current request IP" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting verify_session_ip", + "output": "verify session ip refers to When enabled each authenticated access will be verified comparing IP address of initiator of session and current request IP" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting verify_session_ip", + "output": "verify session ip refers to When enabled, webserver verifies session and request IP address: When enabled each authenticated access will be verified comparing IP address of initiator of session and current request IP" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "custom_recipe_security_analysis_enabled", + "output": "custom recipe security analysis enabled refers to Enables automatic detection for forbidden/dangerous constructs in custom recipe" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "custom_recipe_security_analysis_enabled", + "output": "custom recipe security analysis enabled refers to Enables automatic detection for forbidden/dangerous constructs in custom recipe" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "custom recipe security analysis enabled", + "output": "custom recipe security analysis enabled refers to Enables automatic detection for forbidden/dangerous constructs in custom recipe" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "custom recipe security analysis enabled refers to Enables automatic detection for forbidden/dangerous constructs in custom recipe" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting custom_recipe_security_analysis_enabled", + "output": "custom recipe security analysis enabled refers to Enables automatic detection for forbidden/dangerous constructs in custom recipe" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting custom_recipe_security_analysis_enabled", + "output": "custom recipe security analysis enabled refers to Enables automatic detection for forbidden/dangerous constructs in custom recipe" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "custom_recipe_import_allowlist", + "output": "custom recipe import allowlist refers to List of modules that can be imported in custom recipes. Default empty list means all modules are allowed except for banlisted ones" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "custom_recipe_import_allowlist", + "output": "custom recipe import allowlist refers to List of modules that can be imported in custom recipes. Default empty list means all modules are allowed except for banlisted ones" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "custom recipe import allowlist", + "output": "custom recipe import allowlist refers to List of modules that can be imported in custom recipes. Default empty list means all modules are allowed except for banlisted ones" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "custom recipe import allowlist refers to List of modules that can be imported in custom recipes. Default empty list means all modules are allowed except for banlisted ones" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting custom_recipe_import_allowlist", + "output": "custom recipe import allowlist refers to List of modules that can be imported in custom recipes. Default empty list means all modules are allowed except for banlisted ones" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting custom_recipe_import_allowlist", + "output": "custom recipe import allowlist refers to List of modules that can be imported in custom recipes. Default empty list means all modules are allowed except for banlisted ones" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "custom_recipe_import_banlist", + "output": "custom recipe import banlist refers to List of modules that cannot be imported in custom recipes" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "custom_recipe_import_banlist", + "output": "custom recipe import banlist refers to List of modules that cannot be imported in custom recipes" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "custom recipe import banlist", + "output": "custom recipe import banlist refers to List of modules that cannot be imported in custom recipes" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "custom recipe import banlist refers to List of modules that cannot be imported in custom recipes" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting custom_recipe_import_banlist", + "output": "custom recipe import banlist refers to List of modules that cannot be imported in custom recipes" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting custom_recipe_import_banlist", + "output": "custom recipe import banlist refers to List of modules that cannot be imported in custom recipes" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "custom_recipe_method_call_allowlist", + "output": "custom recipe method call allowlist refers to Regex pattern list of calls which are allowed in custom recipes. Empty list means everything (except for banlist) is allowed. E.g. if only `os.path.*` is in allowlist, custom recipe can only call methods from `os.path` module and the built in ones " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "custom_recipe_method_call_allowlist", + "output": "custom recipe method call allowlist refers to Regex pattern list of calls which are allowed in custom recipes. Empty list means everything (except for banlist) is allowed. E.g. if only `os.path.*` is in allowlist, custom recipe can only call methods from `os.path` module and the built in ones " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "custom recipe method call allowlist", + "output": "custom recipe method call allowlist refers to Regex pattern list of calls which are allowed in custom recipes. Empty list means everything (except for banlist) is allowed. E.g. if only `os.path.*` is in allowlist, custom recipe can only call methods from `os.path` module and the built in ones " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "custom recipe method call allowlist refers to Regex pattern list of calls which are allowed in custom recipes. Empty list means everything (except for banlist) is allowed. E.g. if only `os.path.*` is in allowlist, custom recipe can only call methods from `os.path` module and the built in ones " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting custom_recipe_method_call_allowlist", + "output": "custom recipe method call allowlist refers to Regex pattern list of calls which are allowed in custom recipes. Empty list means everything (except for banlist) is allowed. E.g. if only `os.path.*` is in allowlist, custom recipe can only call methods from `os.path` module and the built in ones " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting custom_recipe_method_call_allowlist", + "output": "custom recipe method call allowlist refers to Regex pattern list of calls which are allowed in custom recipes. Empty list means everything (except for banlist) is allowed. E.g. if only `os.path.*` is in allowlist, custom recipe can only call methods from `os.path` module and the built in ones " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "custom_recipe_method_call_banlist", + "output": "custom recipe method call banlist refers to Regex pattern list of calls which need to be rejected in custom recipes. E.g. if `os.system` in banlist, custom recipe cannot call `os.system()`. If `socket.*` in banlist, recipe cannot call any method of socket module such as `socket.socket()` or any `socket.a.b.c()` " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "custom_recipe_method_call_banlist", + "output": "custom recipe method call banlist refers to Regex pattern list of calls which need to be rejected in custom recipes. E.g. if `os.system` in banlist, custom recipe cannot call `os.system()`. If `socket.*` in banlist, recipe cannot call any method of socket module such as `socket.socket()` or any `socket.a.b.c()` " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "custom recipe method call banlist", + "output": "custom recipe method call banlist refers to Regex pattern list of calls which need to be rejected in custom recipes. E.g. if `os.system` in banlist, custom recipe cannot call `os.system()`. If `socket.*` in banlist, recipe cannot call any method of socket module such as `socket.socket()` or any `socket.a.b.c()` " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "custom recipe method call banlist refers to Regex pattern list of calls which need to be rejected in custom recipes. E.g. if `os.system` in banlist, custom recipe cannot call `os.system()`. If `socket.*` in banlist, recipe cannot call any method of socket module such as `socket.socket()` or any `socket.a.b.c()` " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting custom_recipe_method_call_banlist", + "output": "custom recipe method call banlist refers to Regex pattern list of calls which need to be rejected in custom recipes. E.g. if `os.system` in banlist, custom recipe cannot call `os.system()`. If `socket.*` in banlist, recipe cannot call any method of socket module such as `socket.socket()` or any `socket.a.b.c()` " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting custom_recipe_method_call_banlist", + "output": "custom recipe method call banlist refers to Regex pattern list of calls which need to be rejected in custom recipes. E.g. if `os.system` in banlist, custom recipe cannot call `os.system()`. If `socket.*` in banlist, recipe cannot call any method of socket module such as `socket.socket()` or any `socket.a.b.c()` " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "custom_recipe_dangerous_patterns", + "output": "custom recipe dangerous patterns refers to List of regex patterns representing dangerous sequences/constructs which could be harmful to whole system and should be banned from code " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "custom_recipe_dangerous_patterns", + "output": "custom recipe dangerous patterns refers to List of regex patterns representing dangerous sequences/constructs which could be harmful to whole system and should be banned from code " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "custom recipe dangerous patterns", + "output": "custom recipe dangerous patterns refers to List of regex patterns representing dangerous sequences/constructs which could be harmful to whole system and should be banned from code " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "custom recipe dangerous patterns refers to List of regex patterns representing dangerous sequences/constructs which could be harmful to whole system and should be banned from code " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting custom_recipe_dangerous_patterns", + "output": "custom recipe dangerous patterns refers to List of regex patterns representing dangerous sequences/constructs which could be harmful to whole system and should be banned from code " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting custom_recipe_dangerous_patterns", + "output": "custom recipe dangerous patterns refers to List of regex patterns representing dangerous sequences/constructs which could be harmful to whole system and should be banned from code " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "allow_concurrent_sessions", + "output": "allow concurrent sessions refers to If enabled, user can log in from 2 browsers (scripts) at the same time" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "allow_concurrent_sessions", + "output": "allow concurrent sessions refers to Enable concurrent session for same user: If enabled, user can log in from 2 browsers (scripts) at the same time" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "allow concurrent sessions", + "output": "allow concurrent sessions refers to Enable concurrent session for same user: If enabled, user can log in from 2 browsers (scripts) at the same time" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Enable concurrent session for same user: ", + "output": "allow concurrent sessions refers to Enable concurrent session for same user: If enabled, user can log in from 2 browsers (scripts) at the same time" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting allow_concurrent_sessions", + "output": "allow concurrent sessions refers to If enabled, user can log in from 2 browsers (scripts) at the same time" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting allow_concurrent_sessions", + "output": "allow concurrent sessions refers to Enable concurrent session for same user: If enabled, user can log in from 2 browsers (scripts) at the same time" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "extra_http_headers", + "output": "extra http headers refers to Extra HTTP headers." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "extra_http_headers", + "output": "extra http headers refers to Extra HTTP headers." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "extra http headers", + "output": "extra http headers refers to Extra HTTP headers." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "extra http headers refers to Extra HTTP headers." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting extra_http_headers", + "output": "extra http headers refers to Extra HTTP headers." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting extra_http_headers", + "output": "extra http headers refers to Extra HTTP headers." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "http_cookie_attributes", + "output": "http cookie attributes refers to By default DriverlessAI issues cookies with HTTPOnly and Secure attributes (morsels) enabled. In addition to that, SameSite attribute is set to 'Lax', as it's a default in modern browsers. The config overrides the default key/value (morsels)." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "http_cookie_attributes", + "output": "http cookie attributes refers to Extra HTTP cookie flags: By default DriverlessAI issues cookies with HTTPOnly and Secure attributes (morsels) enabled. In addition to that, SameSite attribute is set to 'Lax', as it's a default in modern browsers. The config overrides the default key/value (morsels)." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "http cookie attributes", + "output": "http cookie attributes refers to Extra HTTP cookie flags: By default DriverlessAI issues cookies with HTTPOnly and Secure attributes (morsels) enabled. In addition to that, SameSite attribute is set to 'Lax', as it's a default in modern browsers. The config overrides the default key/value (morsels)." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Extra HTTP cookie flags: ", + "output": "http cookie attributes refers to Extra HTTP cookie flags: By default DriverlessAI issues cookies with HTTPOnly and Secure attributes (morsels) enabled. In addition to that, SameSite attribute is set to 'Lax', as it's a default in modern browsers. The config overrides the default key/value (morsels)." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting http_cookie_attributes", + "output": "http cookie attributes refers to By default DriverlessAI issues cookies with HTTPOnly and Secure attributes (morsels) enabled. In addition to that, SameSite attribute is set to 'Lax', as it's a default in modern browsers. The config overrides the default key/value (morsels)." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting http_cookie_attributes", + "output": "http cookie attributes refers to Extra HTTP cookie flags: By default DriverlessAI issues cookies with HTTPOnly and Secure attributes (morsels) enabled. In addition to that, SameSite attribute is set to 'Lax', as it's a default in modern browsers. The config overrides the default key/value (morsels)." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable_imputation", + "output": "enable imputation refers to Enable column imputation" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable_imputation", + "output": "enable imputation refers to Enabling imputation adds new picker to EXPT setup GUI and triggers imputation functionality in Transformers : Enable column imputation" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable imputation", + "output": "enable imputation refers to Enabling imputation adds new picker to EXPT setup GUI and triggers imputation functionality in Transformers : Enable column imputation" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "\n Enabling imputation adds new picker to EXPT setup GUI\n and triggers imputation functionality in Transformers\n : ", + "output": "enable imputation refers to Enabling imputation adds new picker to EXPT setup GUI and triggers imputation functionality in Transformers : Enable column imputation" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting enable_imputation", + "output": "enable imputation refers to Enable column imputation" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting enable_imputation", + "output": "enable imputation refers to Enabling imputation adds new picker to EXPT setup GUI and triggers imputation functionality in Transformers : Enable column imputation" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable_advanced_features_experiment", + "output": "enable advanced features experiment refers to Adds advanced settings panel to experiment setup, which allows creating custom features and more. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable_advanced_features_experiment", + "output": "enable advanced features experiment refers to Reveal advanced settings panel in experiment setup: Adds advanced settings panel to experiment setup, which allows creating custom features and more. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable advanced features experiment", + "output": "enable advanced features experiment refers to Reveal advanced settings panel in experiment setup: Adds advanced settings panel to experiment setup, which allows creating custom features and more. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Reveal advanced settings panel in experiment setup: ", + "output": "enable advanced features experiment refers to Reveal advanced settings panel in experiment setup: Adds advanced settings panel to experiment setup, which allows creating custom features and more. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting enable_advanced_features_experiment", + "output": "enable advanced features experiment refers to Adds advanced settings panel to experiment setup, which allows creating custom features and more. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting enable_advanced_features_experiment", + "output": "enable advanced features experiment refers to Reveal advanced settings panel in experiment setup: Adds advanced settings panel to experiment setup, which allows creating custom features and more. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "h2o_storage_address", + "output": "h2o storage address refers to Address of the H2O Storage endpoint. Keep empty to use the local storage only." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "h2o_storage_address", + "output": "h2o storage address refers to Address of the H2O Storage endpoint. Keep empty to use the local storage only." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "h2o storage address", + "output": "h2o storage address refers to Address of the H2O Storage endpoint. Keep empty to use the local storage only." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "h2o storage address refers to Address of the H2O Storage endpoint. Keep empty to use the local storage only." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting h2o_storage_address", + "output": "h2o storage address refers to Address of the H2O Storage endpoint. Keep empty to use the local storage only." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting h2o_storage_address", + "output": "h2o storage address refers to Address of the H2O Storage endpoint. Keep empty to use the local storage only." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "h2o_storage_projects_enabled", + "output": "h2o storage projects enabled refers to Whether to use remote projects stored in H2O Storage instead of local projects." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "h2o_storage_projects_enabled", + "output": "h2o storage projects enabled refers to Whether to use remote projects stored in H2O Storage instead of local projects." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "h2o storage projects enabled", + "output": "h2o storage projects enabled refers to Whether to use remote projects stored in H2O Storage instead of local projects." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "h2o storage projects enabled refers to Whether to use remote projects stored in H2O Storage instead of local projects." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting h2o_storage_projects_enabled", + "output": "h2o storage projects enabled refers to Whether to use remote projects stored in H2O Storage instead of local projects." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting h2o_storage_projects_enabled", + "output": "h2o storage projects enabled refers to Whether to use remote projects stored in H2O Storage instead of local projects." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "h2o_storage_tls_enabled", + "output": "h2o storage tls enabled refers to Whether the channel to the storage should be encrypted." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "h2o_storage_tls_enabled", + "output": "h2o storage tls enabled refers to Whether the channel to the storage should be encrypted." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "h2o storage tls enabled", + "output": "h2o storage tls enabled refers to Whether the channel to the storage should be encrypted." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "h2o storage tls enabled refers to Whether the channel to the storage should be encrypted." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting h2o_storage_tls_enabled", + "output": "h2o storage tls enabled refers to Whether the channel to the storage should be encrypted." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting h2o_storage_tls_enabled", + "output": "h2o storage tls enabled refers to Whether the channel to the storage should be encrypted." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "h2o_storage_tls_ca_path", + "output": "h2o storage tls ca path refers to Path to the certification authority certificate that H2O Storage server identity will be checked against." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "h2o_storage_tls_ca_path", + "output": "h2o storage tls ca path refers to Path to the certification authority certificate that H2O Storage server identity will be checked against." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "h2o storage tls ca path", + "output": "h2o storage tls ca path refers to Path to the certification authority certificate that H2O Storage server identity will be checked against." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "h2o storage tls ca path refers to Path to the certification authority certificate that H2O Storage server identity will be checked against." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting h2o_storage_tls_ca_path", + "output": "h2o storage tls ca path refers to Path to the certification authority certificate that H2O Storage server identity will be checked against." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting h2o_storage_tls_ca_path", + "output": "h2o storage tls ca path refers to Path to the certification authority certificate that H2O Storage server identity will be checked against." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "h2o_storage_tls_cert_path", + "output": "h2o storage tls cert path refers to Path to the client certificate to authenticate with H2O Storage server" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "h2o_storage_tls_cert_path", + "output": "h2o storage tls cert path refers to Path to the client certificate to authenticate with H2O Storage server" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "h2o storage tls cert path", + "output": "h2o storage tls cert path refers to Path to the client certificate to authenticate with H2O Storage server" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "h2o storage tls cert path refers to Path to the client certificate to authenticate with H2O Storage server" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting h2o_storage_tls_cert_path", + "output": "h2o storage tls cert path refers to Path to the client certificate to authenticate with H2O Storage server" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting h2o_storage_tls_cert_path", + "output": "h2o storage tls cert path refers to Path to the client certificate to authenticate with H2O Storage server" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "h2o_storage_tls_key_path", + "output": "h2o storage tls key path refers to Path to the client key to authenticate with H2O Storage server" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "h2o_storage_tls_key_path", + "output": "h2o storage tls key path refers to Path to the client key to authenticate with H2O Storage server" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "h2o storage tls key path", + "output": "h2o storage tls key path refers to Path to the client key to authenticate with H2O Storage server" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "h2o storage tls key path refers to Path to the client key to authenticate with H2O Storage server" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting h2o_storage_tls_key_path", + "output": "h2o storage tls key path refers to Path to the client key to authenticate with H2O Storage server" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting h2o_storage_tls_key_path", + "output": "h2o storage tls key path refers to Path to the client key to authenticate with H2O Storage server" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "h2o_storage_internal_default_project_id", + "output": "h2o storage internal default project id refers to UUID of a Storage project to use instead of the remote HOME folder." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "h2o_storage_internal_default_project_id", + "output": "h2o storage internal default project id refers to UUID of a Storage project to use instead of the remote HOME folder." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "h2o storage internal default project id", + "output": "h2o storage internal default project id refers to UUID of a Storage project to use instead of the remote HOME folder." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "h2o storage internal default project id refers to UUID of a Storage project to use instead of the remote HOME folder." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting h2o_storage_internal_default_project_id", + "output": "h2o storage internal default project id refers to UUID of a Storage project to use instead of the remote HOME folder." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting h2o_storage_internal_default_project_id", + "output": "h2o storage internal default project id refers to UUID of a Storage project to use instead of the remote HOME folder." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "h2o_storage_rpc_deadline_seconds", + "output": "h2o storage rpc deadline seconds refers to Deadline for RPC calls with H2O Storage in seconds. Sets maximum number of seconds that Driverless waits for RPC call to complete before it cancels it." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "h2o_storage_rpc_deadline_seconds", + "output": "h2o storage rpc deadline seconds refers to Deadline for RPC calls with H2O Storage in seconds. Sets maximum number of seconds that Driverless waits for RPC call to complete before it cancels it." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "h2o storage rpc deadline seconds", + "output": "h2o storage rpc deadline seconds refers to Deadline for RPC calls with H2O Storage in seconds. Sets maximum number of seconds that Driverless waits for RPC call to complete before it cancels it." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "h2o storage rpc deadline seconds refers to Deadline for RPC calls with H2O Storage in seconds. Sets maximum number of seconds that Driverless waits for RPC call to complete before it cancels it." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting h2o_storage_rpc_deadline_seconds", + "output": "h2o storage rpc deadline seconds refers to Deadline for RPC calls with H2O Storage in seconds. Sets maximum number of seconds that Driverless waits for RPC call to complete before it cancels it." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting h2o_storage_rpc_deadline_seconds", + "output": "h2o storage rpc deadline seconds refers to Deadline for RPC calls with H2O Storage in seconds. Sets maximum number of seconds that Driverless waits for RPC call to complete before it cancels it." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "h2o_storage_rpc_bytestream_deadline_seconds", + "output": "h2o storage rpc bytestream deadline seconds refers to Deadline for RPC bytestrteam calls with H2O Storage in seconds. Sets maximum number of seconds that Driverless waits for RPC call to complete before it cancels it. This value is used for uploading and downloading artifacts." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "h2o_storage_rpc_bytestream_deadline_seconds", + "output": "h2o storage rpc bytestream deadline seconds refers to Deadline for RPC bytestrteam calls with H2O Storage in seconds. Sets maximum number of seconds that Driverless waits for RPC call to complete before it cancels it. This value is used for uploading and downloading artifacts." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "h2o storage rpc bytestream deadline seconds", + "output": "h2o storage rpc bytestream deadline seconds refers to Deadline for RPC bytestrteam calls with H2O Storage in seconds. Sets maximum number of seconds that Driverless waits for RPC call to complete before it cancels it. This value is used for uploading and downloading artifacts." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "h2o storage rpc bytestream deadline seconds refers to Deadline for RPC bytestrteam calls with H2O Storage in seconds. Sets maximum number of seconds that Driverless waits for RPC call to complete before it cancels it. This value is used for uploading and downloading artifacts." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting h2o_storage_rpc_bytestream_deadline_seconds", + "output": "h2o storage rpc bytestream deadline seconds refers to Deadline for RPC bytestrteam calls with H2O Storage in seconds. Sets maximum number of seconds that Driverless waits for RPC call to complete before it cancels it. This value is used for uploading and downloading artifacts." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting h2o_storage_rpc_bytestream_deadline_seconds", + "output": "h2o storage rpc bytestream deadline seconds refers to Deadline for RPC bytestrteam calls with H2O Storage in seconds. Sets maximum number of seconds that Driverless waits for RPC call to complete before it cancels it. This value is used for uploading and downloading artifacts." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "h2o_storage_oauth2_scopes", + "output": "h2o storage oauth2 scopes refers to Storage client manages it's own access tokens derived from the refresh token received on the user login. When this option is set access token with the scopes defined here is requested. (space separated list)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "h2o_storage_oauth2_scopes", + "output": "h2o storage oauth2 scopes refers to Storage client manages it's own access tokens derived from the refresh token received on the user login. When this option is set access token with the scopes defined here is requested. (space separated list)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "h2o storage oauth2 scopes", + "output": "h2o storage oauth2 scopes refers to Storage client manages it's own access tokens derived from the refresh token received on the user login. When this option is set access token with the scopes defined here is requested. (space separated list)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "h2o storage oauth2 scopes refers to Storage client manages it's own access tokens derived from the refresh token received on the user login. When this option is set access token with the scopes defined here is requested. (space separated list)" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting h2o_storage_oauth2_scopes", + "output": "h2o storage oauth2 scopes refers to Storage client manages it's own access tokens derived from the refresh token received on the user login. When this option is set access token with the scopes defined here is requested. (space separated list)" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting h2o_storage_oauth2_scopes", + "output": "h2o storage oauth2 scopes refers to Storage client manages it's own access tokens derived from the refresh token received on the user login. When this option is set access token with the scopes defined here is requested. (space separated list)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "h2o_storage_message_size_limit", + "output": "h2o storage message size limit refers to Maximum size of message size of RPC request in bytes. Requests larger than this limit will fail." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "h2o_storage_message_size_limit", + "output": "h2o storage message size limit refers to Maximum size of message size of RPC request in bytes. Requests larger than this limit will fail." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "h2o storage message size limit", + "output": "h2o storage message size limit refers to Maximum size of message size of RPC request in bytes. Requests larger than this limit will fail." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "h2o storage message size limit refers to Maximum size of message size of RPC request in bytes. Requests larger than this limit will fail." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting h2o_storage_message_size_limit", + "output": "h2o storage message size limit refers to Maximum size of message size of RPC request in bytes. Requests larger than this limit will fail." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting h2o_storage_message_size_limit", + "output": "h2o storage message size limit refers to Maximum size of message size of RPC request in bytes. Requests larger than this limit will fail." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "h2o_mlops_ui_url", + "output": "h2o mlops ui url refers to If the `h2o_mlops_ui_url` is provided alongside the `enable_storage`, DAI is able to redirect user to the MLOps app upon clicking the Deploy button." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "h2o_mlops_ui_url", + "output": "h2o mlops ui url refers to MLOps UI URL address: If the `h2o_mlops_ui_url` is provided alongside the `enable_storage`, DAI is able to redirect user to the MLOps app upon clicking the Deploy button." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "h2o mlops ui url", + "output": "h2o mlops ui url refers to MLOps UI URL address: If the `h2o_mlops_ui_url` is provided alongside the `enable_storage`, DAI is able to redirect user to the MLOps app upon clicking the Deploy button." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "MLOps UI URL address: ", + "output": "h2o mlops ui url refers to MLOps UI URL address: If the `h2o_mlops_ui_url` is provided alongside the `enable_storage`, DAI is able to redirect user to the MLOps app upon clicking the Deploy button." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting h2o_mlops_ui_url", + "output": "h2o mlops ui url refers to If the `h2o_mlops_ui_url` is provided alongside the `enable_storage`, DAI is able to redirect user to the MLOps app upon clicking the Deploy button." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting h2o_mlops_ui_url", + "output": "h2o mlops ui url refers to MLOps UI URL address: If the `h2o_mlops_ui_url` is provided alongside the `enable_storage`, DAI is able to redirect user to the MLOps app upon clicking the Deploy button." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "keystore_file", + "output": "keystore file refers to Keystore file that contains secure config.toml items like passwords, secret keys etc. Keystore is managed by h2oai.keystore tool." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "keystore_file", + "output": "keystore file refers to Keystore file that contains secure config.toml items like passwords, secret keys etc. Keystore is managed by h2oai.keystore tool." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "keystore file", + "output": "keystore file refers to Keystore file that contains secure config.toml items like passwords, secret keys etc. Keystore is managed by h2oai.keystore tool." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "keystore file refers to Keystore file that contains secure config.toml items like passwords, secret keys etc. Keystore is managed by h2oai.keystore tool." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting keystore_file", + "output": "keystore file refers to Keystore file that contains secure config.toml items like passwords, secret keys etc. Keystore is managed by h2oai.keystore tool." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting keystore_file", + "output": "keystore file refers to Keystore file that contains secure config.toml items like passwords, secret keys etc. Keystore is managed by h2oai.keystore tool." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "log_level", + "output": "log level refers to Verbosity of logging 0: quiet (CRITICAL, ERROR, WARNING) 1: default (CRITICAL, ERROR, WARNING, INFO, DATA) 2: verbose (CRITICAL, ERROR, WARNING, INFO, DATA, DEBUG) Affects server and all experiments" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "log_level", + "output": "log level refers to Verbosity of logging 0: quiet (CRITICAL, ERROR, WARNING) 1: default (CRITICAL, ERROR, WARNING, INFO, DATA) 2: verbose (CRITICAL, ERROR, WARNING, INFO, DATA, DEBUG) Affects server and all experiments" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "log level", + "output": "log level refers to Verbosity of logging 0: quiet (CRITICAL, ERROR, WARNING) 1: default (CRITICAL, ERROR, WARNING, INFO, DATA) 2: verbose (CRITICAL, ERROR, WARNING, INFO, DATA, DEBUG) Affects server and all experiments" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "log level refers to Verbosity of logging 0: quiet (CRITICAL, ERROR, WARNING) 1: default (CRITICAL, ERROR, WARNING, INFO, DATA) 2: verbose (CRITICAL, ERROR, WARNING, INFO, DATA, DEBUG) Affects server and all experiments" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting log_level", + "output": "log level refers to Verbosity of logging 0: quiet (CRITICAL, ERROR, WARNING) 1: default (CRITICAL, ERROR, WARNING, INFO, DATA) 2: verbose (CRITICAL, ERROR, WARNING, INFO, DATA, DEBUG) Affects server and all experiments" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting log_level", + "output": "log level refers to Verbosity of logging 0: quiet (CRITICAL, ERROR, WARNING) 1: default (CRITICAL, ERROR, WARNING, INFO, DATA) 2: verbose (CRITICAL, ERROR, WARNING, INFO, DATA, DEBUG) Affects server and all experiments" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "collect_server_logs_in_experiment_logs", + "output": "collect server logs in experiment logs refers to Whether to collect relevant server logs (h2oai_server.log, dai.log from systemctl or docker, and h2o log) Useful for when sending logs to H2O.ai" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "collect_server_logs_in_experiment_logs", + "output": "collect server logs in experiment logs refers to Whether to collect relevant server logs (h2oai_server.log, dai.log from systemctl or docker, and h2o log) Useful for when sending logs to H2O.ai" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "collect server logs in experiment logs", + "output": "collect server logs in experiment logs refers to Whether to collect relevant server logs (h2oai_server.log, dai.log from systemctl or docker, and h2o log) Useful for when sending logs to H2O.ai" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "collect server logs in experiment logs refers to Whether to collect relevant server logs (h2oai_server.log, dai.log from systemctl or docker, and h2o log) Useful for when sending logs to H2O.ai" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting collect_server_logs_in_experiment_logs", + "output": "collect server logs in experiment logs refers to Whether to collect relevant server logs (h2oai_server.log, dai.log from systemctl or docker, and h2o log) Useful for when sending logs to H2O.ai" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting collect_server_logs_in_experiment_logs", + "output": "collect server logs in experiment logs refers to Whether to collect relevant server logs (h2oai_server.log, dai.log from systemctl or docker, and h2o log) Useful for when sending logs to H2O.ai" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "migrate_all_entities_to_user", + "output": "migrate all entities to user refers to When set, will migrate all user entities to the defined user upon startup, this is mostly useful during instance migration via H2O's AIEM/Steam." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "migrate_all_entities_to_user", + "output": "migrate all entities to user refers to When set, will migrate all user entities to the defined user upon startup, this is mostly useful during instance migration via H2O's AIEM/Steam." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "migrate all entities to user", + "output": "migrate all entities to user refers to When set, will migrate all user entities to the defined user upon startup, this is mostly useful during instance migration via H2O's AIEM/Steam." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "migrate all entities to user refers to When set, will migrate all user entities to the defined user upon startup, this is mostly useful during instance migration via H2O's AIEM/Steam." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting migrate_all_entities_to_user", + "output": "migrate all entities to user refers to When set, will migrate all user entities to the defined user upon startup, this is mostly useful during instance migration via H2O's AIEM/Steam." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting migrate_all_entities_to_user", + "output": "migrate all entities to user refers to When set, will migrate all user entities to the defined user upon startup, this is mostly useful during instance migration via H2O's AIEM/Steam." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "per_user_directories", + "output": "per user directories refers to Whether to have all user content isolated into a directory for each user. If set to False, all users content is common to single directory, recipes are shared, and brain folder for restart/refit is shared. If set to True, each user has separate folder for all user tasks, recipes are isolated to each user, and brain folder for restart/refit is only for the specific user. Migration from False to True or back to False is allowed for all experiment content accessible by GUI or python client, all recipes, and starting experiment with same settings, restart, or refit. However, if switch to per-user mode, the common brain folder is no longer used. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "per_user_directories", + "output": "per user directories refers to Whether to have all user content isolated into a directory for each user. If set to False, all users content is common to single directory, recipes are shared, and brain folder for restart/refit is shared. If set to True, each user has separate folder for all user tasks, recipes are isolated to each user, and brain folder for restart/refit is only for the specific user. Migration from False to True or back to False is allowed for all experiment content accessible by GUI or python client, all recipes, and starting experiment with same settings, restart, or refit. However, if switch to per-user mode, the common brain folder is no longer used. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "per user directories", + "output": "per user directories refers to Whether to have all user content isolated into a directory for each user. If set to False, all users content is common to single directory, recipes are shared, and brain folder for restart/refit is shared. If set to True, each user has separate folder for all user tasks, recipes are isolated to each user, and brain folder for restart/refit is only for the specific user. Migration from False to True or back to False is allowed for all experiment content accessible by GUI or python client, all recipes, and starting experiment with same settings, restart, or refit. However, if switch to per-user mode, the common brain folder is no longer used. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "per user directories refers to Whether to have all user content isolated into a directory for each user. If set to False, all users content is common to single directory, recipes are shared, and brain folder for restart/refit is shared. If set to True, each user has separate folder for all user tasks, recipes are isolated to each user, and brain folder for restart/refit is only for the specific user. Migration from False to True or back to False is allowed for all experiment content accessible by GUI or python client, all recipes, and starting experiment with same settings, restart, or refit. However, if switch to per-user mode, the common brain folder is no longer used. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting per_user_directories", + "output": "per user directories refers to Whether to have all user content isolated into a directory for each user. If set to False, all users content is common to single directory, recipes are shared, and brain folder for restart/refit is shared. If set to True, each user has separate folder for all user tasks, recipes are isolated to each user, and brain folder for restart/refit is only for the specific user. Migration from False to True or back to False is allowed for all experiment content accessible by GUI or python client, all recipes, and starting experiment with same settings, restart, or refit. However, if switch to per-user mode, the common brain folder is no longer used. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting per_user_directories", + "output": "per user directories refers to Whether to have all user content isolated into a directory for each user. If set to False, all users content is common to single directory, recipes are shared, and brain folder for restart/refit is shared. If set to True, each user has separate folder for all user tasks, recipes are isolated to each user, and brain folder for restart/refit is only for the specific user. Migration from False to True or back to False is allowed for all experiment content accessible by GUI or python client, all recipes, and starting experiment with same settings, restart, or refit. However, if switch to per-user mode, the common brain folder is no longer used. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "data_import_ignore_file_names", + "output": "data import ignore file names refers to List of file names to ignore during dataset import. Any files with names listed above will be skipped when DAI creates a dataset. Example, directory contains 3 files: [data_1.csv, data_2.csv, _SUCCESS] DAI will only attempt to create a dataset using files data_1.csv and data_2.csv, and _SUCCESS file will be ignored. Default is to ignore _SUCCESS files which are commonly created in exporting data from Hadoop " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "data_import_ignore_file_names", + "output": "data import ignore file names refers to List of file names to ignore during dataset import. Any files with names listed above will be skipped when DAI creates a dataset. Example, directory contains 3 files: [data_1.csv, data_2.csv, _SUCCESS] DAI will only attempt to create a dataset using files data_1.csv and data_2.csv, and _SUCCESS file will be ignored. Default is to ignore _SUCCESS files which are commonly created in exporting data from Hadoop " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "data import ignore file names", + "output": "data import ignore file names refers to List of file names to ignore during dataset import. Any files with names listed above will be skipped when DAI creates a dataset. Example, directory contains 3 files: [data_1.csv, data_2.csv, _SUCCESS] DAI will only attempt to create a dataset using files data_1.csv and data_2.csv, and _SUCCESS file will be ignored. Default is to ignore _SUCCESS files which are commonly created in exporting data from Hadoop " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "data import ignore file names refers to List of file names to ignore during dataset import. Any files with names listed above will be skipped when DAI creates a dataset. Example, directory contains 3 files: [data_1.csv, data_2.csv, _SUCCESS] DAI will only attempt to create a dataset using files data_1.csv and data_2.csv, and _SUCCESS file will be ignored. Default is to ignore _SUCCESS files which are commonly created in exporting data from Hadoop " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting data_import_ignore_file_names", + "output": "data import ignore file names refers to List of file names to ignore during dataset import. Any files with names listed above will be skipped when DAI creates a dataset. Example, directory contains 3 files: [data_1.csv, data_2.csv, _SUCCESS] DAI will only attempt to create a dataset using files data_1.csv and data_2.csv, and _SUCCESS file will be ignored. Default is to ignore _SUCCESS files which are commonly created in exporting data from Hadoop " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting data_import_ignore_file_names", + "output": "data import ignore file names refers to List of file names to ignore during dataset import. Any files with names listed above will be skipped when DAI creates a dataset. Example, directory contains 3 files: [data_1.csv, data_2.csv, _SUCCESS] DAI will only attempt to create a dataset using files data_1.csv and data_2.csv, and _SUCCESS file will be ignored. Default is to ignore _SUCCESS files which are commonly created in exporting data from Hadoop " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "data_import_upcast_multi_file", + "output": "data import upcast multi file refers to For data import from a directory (multiple files), allow column types to differ and perform upcast during import." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "data_import_upcast_multi_file", + "output": "data import upcast multi file refers to For data import from a directory (multiple files), allow column types to differ and perform upcast during import." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "data import upcast multi file", + "output": "data import upcast multi file refers to For data import from a directory (multiple files), allow column types to differ and perform upcast during import." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "data import upcast multi file refers to For data import from a directory (multiple files), allow column types to differ and perform upcast during import." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting data_import_upcast_multi_file", + "output": "data import upcast multi file refers to For data import from a directory (multiple files), allow column types to differ and perform upcast during import." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting data_import_upcast_multi_file", + "output": "data import upcast multi file refers to For data import from a directory (multiple files), allow column types to differ and perform upcast during import." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "data_import_explode_list_type_columns_in_parquet", + "output": "data import explode list type columns in parquet refers to If set to true, will explode columns with list data type when importing parquet files." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "data_import_explode_list_type_columns_in_parquet", + "output": "data import explode list type columns in parquet refers to If set to true, will explode columns with list data type when importing parquet files." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "data import explode list type columns in parquet", + "output": "data import explode list type columns in parquet refers to If set to true, will explode columns with list data type when importing parquet files." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "data import explode list type columns in parquet refers to If set to true, will explode columns with list data type when importing parquet files." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting data_import_explode_list_type_columns_in_parquet", + "output": "data import explode list type columns in parquet refers to If set to true, will explode columns with list data type when importing parquet files." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting data_import_explode_list_type_columns_in_parquet", + "output": "data import explode list type columns in parquet refers to If set to true, will explode columns with list data type when importing parquet files." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "files_without_extensions_expected_types", + "output": "files without extensions expected types refers to List of file types that Driverless AI should attempt to import data as IF no file extension exists in the file name If no file extension is provided, Driverless AI will attempt to import the data starting with first type in the defined list. Default [\"parquet\", \"orc\"] Example: 'test.csv' (file extension exists) vs 'test' (file extension DOES NOT exist) NOTE: see supported_file_types configuration option for more details on supported file types " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "files_without_extensions_expected_types", + "output": "files without extensions expected types refers to List of file types that Driverless AI should attempt to import data as IF no file extension exists in the file name If no file extension is provided, Driverless AI will attempt to import the data starting with first type in the defined list. Default [\"parquet\", \"orc\"] Example: 'test.csv' (file extension exists) vs 'test' (file extension DOES NOT exist) NOTE: see supported_file_types configuration option for more details on supported file types " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "files without extensions expected types", + "output": "files without extensions expected types refers to List of file types that Driverless AI should attempt to import data as IF no file extension exists in the file name If no file extension is provided, Driverless AI will attempt to import the data starting with first type in the defined list. Default [\"parquet\", \"orc\"] Example: 'test.csv' (file extension exists) vs 'test' (file extension DOES NOT exist) NOTE: see supported_file_types configuration option for more details on supported file types " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "files without extensions expected types refers to List of file types that Driverless AI should attempt to import data as IF no file extension exists in the file name If no file extension is provided, Driverless AI will attempt to import the data starting with first type in the defined list. Default [\"parquet\", \"orc\"] Example: 'test.csv' (file extension exists) vs 'test' (file extension DOES NOT exist) NOTE: see supported_file_types configuration option for more details on supported file types " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting files_without_extensions_expected_types", + "output": "files without extensions expected types refers to List of file types that Driverless AI should attempt to import data as IF no file extension exists in the file name If no file extension is provided, Driverless AI will attempt to import the data starting with first type in the defined list. Default [\"parquet\", \"orc\"] Example: 'test.csv' (file extension exists) vs 'test' (file extension DOES NOT exist) NOTE: see supported_file_types configuration option for more details on supported file types " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting files_without_extensions_expected_types", + "output": "files without extensions expected types refers to List of file types that Driverless AI should attempt to import data as IF no file extension exists in the file name If no file extension is provided, Driverless AI will attempt to import the data starting with first type in the defined list. Default [\"parquet\", \"orc\"] Example: 'test.csv' (file extension exists) vs 'test' (file extension DOES NOT exist) NOTE: see supported_file_types configuration option for more details on supported file types " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "do_not_log_list", + "output": "do not log list refers to do_not_log_list : add configurations that you do not wish to be recorded in logs here.They will still be stored in experiment information so child experiments can behave consistently." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "do_not_log_list", + "output": "do not log list refers to do_not_log_list : add configurations that you do not wish to be recorded in logs here.They will still be stored in experiment information so child experiments can behave consistently." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "do not log list", + "output": "do not log list refers to do_not_log_list : add configurations that you do not wish to be recorded in logs here.They will still be stored in experiment information so child experiments can behave consistently." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "do not log list refers to do_not_log_list : add configurations that you do not wish to be recorded in logs here.They will still be stored in experiment information so child experiments can behave consistently." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting do_not_log_list", + "output": "do not log list refers to do_not_log_list : add configurations that you do not wish to be recorded in logs here.They will still be stored in experiment information so child experiments can behave consistently." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting do_not_log_list", + "output": "do not log list refers to do_not_log_list : add configurations that you do not wish to be recorded in logs here.They will still be stored in experiment information so child experiments can behave consistently." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "do_not_store_list", + "output": "do not store list refers to do_not_store_list : add configurations that you do not wish to be stored at all here.Will not be remembered across experiments, so not applicable to data science related itemsthat could be controlled by a user. These items are automatically not logged." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "do_not_store_list", + "output": "do not store list refers to do_not_store_list : add configurations that you do not wish to be stored at all here.Will not be remembered across experiments, so not applicable to data science related itemsthat could be controlled by a user. These items are automatically not logged." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "do not store list", + "output": "do not store list refers to do_not_store_list : add configurations that you do not wish to be stored at all here.Will not be remembered across experiments, so not applicable to data science related itemsthat could be controlled by a user. These items are automatically not logged." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "do not store list refers to do_not_store_list : add configurations that you do not wish to be stored at all here.Will not be remembered across experiments, so not applicable to data science related itemsthat could be controlled by a user. These items are automatically not logged." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting do_not_store_list", + "output": "do not store list refers to do_not_store_list : add configurations that you do not wish to be stored at all here.Will not be remembered across experiments, so not applicable to data science related itemsthat could be controlled by a user. These items are automatically not logged." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting do_not_store_list", + "output": "do not store list refers to do_not_store_list : add configurations that you do not wish to be stored at all here.Will not be remembered across experiments, so not applicable to data science related itemsthat could be controlled by a user. These items are automatically not logged." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "datatable_parse_max_memory_bytes", + "output": "datatable parse max memory bytes refers to Memory limit in bytes for datatable to use during parsing of CSV files. -1 for unlimited. 0 for automatic. >0 for constraint." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "datatable_parse_max_memory_bytes", + "output": "datatable parse max memory bytes refers to Memory limit in bytes for datatable to use during parsing of CSV files. -1 for unlimited. 0 for automatic. >0 for constraint." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "datatable parse max memory bytes", + "output": "datatable parse max memory bytes refers to Memory limit in bytes for datatable to use during parsing of CSV files. -1 for unlimited. 0 for automatic. >0 for constraint." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "datatable parse max memory bytes refers to Memory limit in bytes for datatable to use during parsing of CSV files. -1 for unlimited. 0 for automatic. >0 for constraint." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting datatable_parse_max_memory_bytes", + "output": "datatable parse max memory bytes refers to Memory limit in bytes for datatable to use during parsing of CSV files. -1 for unlimited. 0 for automatic. >0 for constraint." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting datatable_parse_max_memory_bytes", + "output": "datatable parse max memory bytes refers to Memory limit in bytes for datatable to use during parsing of CSV files. -1 for unlimited. 0 for automatic. >0 for constraint." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "datatable_separator", + "output": "datatable separator refers to Delimiter/Separator to use when parsing tabular text files like CSV. Automatic if empty. Must be provided at system start." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "datatable_separator", + "output": "datatable separator refers to Delimiter/Separator to use when parsing tabular text files like CSV. Automatic if empty. Must be provided at system start." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "datatable separator", + "output": "datatable separator refers to Delimiter/Separator to use when parsing tabular text files like CSV. Automatic if empty. Must be provided at system start." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "datatable separator refers to Delimiter/Separator to use when parsing tabular text files like CSV. Automatic if empty. Must be provided at system start." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting datatable_separator", + "output": "datatable separator refers to Delimiter/Separator to use when parsing tabular text files like CSV. Automatic if empty. Must be provided at system start." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting datatable_separator", + "output": "datatable separator refers to Delimiter/Separator to use when parsing tabular text files like CSV. Automatic if empty. Must be provided at system start." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "ping_load_data_file", + "output": "ping load data file refers to Whether to enable ping of system status during DAI data ingestion." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "ping_load_data_file", + "output": "ping load data file refers to Whether to enable ping of system status during DAI data ingestion.: Whether to enable ping of system status during DAI data ingestion." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "ping load data file", + "output": "ping load data file refers to Whether to enable ping of system status during DAI data ingestion.: Whether to enable ping of system status during DAI data ingestion." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Whether to enable ping of system status during DAI data ingestion.: ", + "output": "ping load data file refers to Whether to enable ping of system status during DAI data ingestion.: Whether to enable ping of system status during DAI data ingestion." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting ping_load_data_file", + "output": "ping load data file refers to Whether to enable ping of system status during DAI data ingestion." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting ping_load_data_file", + "output": "ping load data file refers to Whether to enable ping of system status during DAI data ingestion.: Whether to enable ping of system status during DAI data ingestion." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "ping_sleep_period", + "output": "ping sleep period refers to Period between checking DAI status. Should be small enough to avoid slowing parent who stops ping process." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "ping_sleep_period", + "output": "ping sleep period refers to Period between checking DAI status. Should be small enough to avoid slowing parent who stops ping process." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "ping sleep period", + "output": "ping sleep period refers to Period between checking DAI status. Should be small enough to avoid slowing parent who stops ping process." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "ping sleep period refers to Period between checking DAI status. Should be small enough to avoid slowing parent who stops ping process." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting ping_sleep_period", + "output": "ping sleep period refers to Period between checking DAI status. Should be small enough to avoid slowing parent who stops ping process." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting ping_sleep_period", + "output": "ping sleep period refers to Period between checking DAI status. Should be small enough to avoid slowing parent who stops ping process." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "data_precision", + "output": "data precision refers to Precision of how data is stored 'datatable' keeps original datatable storage types (i.e. bool, int, float32, float64) (experimental) 'float32' best for speed, 'float64' best for accuracy or very large input values, \"datatable\" best for memory 'float32' allows numbers up to about +-3E38 with relative error of about 1E-7 'float64' allows numbers up to about +-1E308 with relative error of about 1E-16 Some calculations, like the GLM standardization, can only handle up to sqrt() of these maximums for data values, So GLM with 32-bit precision can only handle up to about a value of 1E19 before standardization generates inf values. If you see \"Best individual has invalid score\" you may require higher precision." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "data_precision", + "output": "data precision refers to Precision of how data is stored 'datatable' keeps original datatable storage types (i.e. bool, int, float32, float64) (experimental) 'float32' best for speed, 'float64' best for accuracy or very large input values, \"datatable\" best for memory 'float32' allows numbers up to about +-3E38 with relative error of about 1E-7 'float64' allows numbers up to about +-1E308 with relative error of about 1E-16 Some calculations, like the GLM standardization, can only handle up to sqrt() of these maximums for data values, So GLM with 32-bit precision can only handle up to about a value of 1E19 before standardization generates inf values. If you see \"Best individual has invalid score\" you may require higher precision." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "data precision", + "output": "data precision refers to Precision of how data is stored 'datatable' keeps original datatable storage types (i.e. bool, int, float32, float64) (experimental) 'float32' best for speed, 'float64' best for accuracy or very large input values, \"datatable\" best for memory 'float32' allows numbers up to about +-3E38 with relative error of about 1E-7 'float64' allows numbers up to about +-1E308 with relative error of about 1E-16 Some calculations, like the GLM standardization, can only handle up to sqrt() of these maximums for data values, So GLM with 32-bit precision can only handle up to about a value of 1E19 before standardization generates inf values. If you see \"Best individual has invalid score\" you may require higher precision." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "data precision refers to Precision of how data is stored 'datatable' keeps original datatable storage types (i.e. bool, int, float32, float64) (experimental) 'float32' best for speed, 'float64' best for accuracy or very large input values, \"datatable\" best for memory 'float32' allows numbers up to about +-3E38 with relative error of about 1E-7 'float64' allows numbers up to about +-1E308 with relative error of about 1E-16 Some calculations, like the GLM standardization, can only handle up to sqrt() of these maximums for data values, So GLM with 32-bit precision can only handle up to about a value of 1E19 before standardization generates inf values. If you see \"Best individual has invalid score\" you may require higher precision." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting data_precision", + "output": "data precision refers to Precision of how data is stored 'datatable' keeps original datatable storage types (i.e. bool, int, float32, float64) (experimental) 'float32' best for speed, 'float64' best for accuracy or very large input values, \"datatable\" best for memory 'float32' allows numbers up to about +-3E38 with relative error of about 1E-7 'float64' allows numbers up to about +-1E308 with relative error of about 1E-16 Some calculations, like the GLM standardization, can only handle up to sqrt() of these maximums for data values, So GLM with 32-bit precision can only handle up to about a value of 1E19 before standardization generates inf values. If you see \"Best individual has invalid score\" you may require higher precision." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting data_precision", + "output": "data precision refers to Precision of how data is stored 'datatable' keeps original datatable storage types (i.e. bool, int, float32, float64) (experimental) 'float32' best for speed, 'float64' best for accuracy or very large input values, \"datatable\" best for memory 'float32' allows numbers up to about +-3E38 with relative error of about 1E-7 'float64' allows numbers up to about +-1E308 with relative error of about 1E-16 Some calculations, like the GLM standardization, can only handle up to sqrt() of these maximums for data values, So GLM with 32-bit precision can only handle up to about a value of 1E19 before standardization generates inf values. If you see \"Best individual has invalid score\" you may require higher precision." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "transformer_precision", + "output": "transformer precision refers to Precision of most data transformers (same options and notes as data_precision). Useful for higher precision in transformers with numerous operations that can accumulate error. Also useful if want faster performance for transformers but otherwise want data stored in high precision." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "transformer_precision", + "output": "transformer precision refers to Precision of most data transformers (same options and notes as data_precision). Useful for higher precision in transformers with numerous operations that can accumulate error. Also useful if want faster performance for transformers but otherwise want data stored in high precision." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "transformer precision", + "output": "transformer precision refers to Precision of most data transformers (same options and notes as data_precision). Useful for higher precision in transformers with numerous operations that can accumulate error. Also useful if want faster performance for transformers but otherwise want data stored in high precision." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "transformer precision refers to Precision of most data transformers (same options and notes as data_precision). Useful for higher precision in transformers with numerous operations that can accumulate error. Also useful if want faster performance for transformers but otherwise want data stored in high precision." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting transformer_precision", + "output": "transformer precision refers to Precision of most data transformers (same options and notes as data_precision). Useful for higher precision in transformers with numerous operations that can accumulate error. Also useful if want faster performance for transformers but otherwise want data stored in high precision." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting transformer_precision", + "output": "transformer precision refers to Precision of most data transformers (same options and notes as data_precision). Useful for higher precision in transformers with numerous operations that can accumulate error. Also useful if want faster performance for transformers but otherwise want data stored in high precision." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "ulimit_up_to_hard_limit", + "output": "ulimit up to hard limit refers to Whether to change ulimit soft limits up to hard limits (for DAI server app, which is not a generic user app). Prevents resource limit problems in some cases. Restricted to no more than limit_nofile and limit_nproc for those resources." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "ulimit_up_to_hard_limit", + "output": "ulimit up to hard limit refers to Whether to change ulimit soft limits up to hard limits (for DAI server app, which is not a generic user app). Prevents resource limit problems in some cases. Restricted to no more than limit_nofile and limit_nproc for those resources." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "ulimit up to hard limit", + "output": "ulimit up to hard limit refers to Whether to change ulimit soft limits up to hard limits (for DAI server app, which is not a generic user app). Prevents resource limit problems in some cases. Restricted to no more than limit_nofile and limit_nproc for those resources." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "ulimit up to hard limit refers to Whether to change ulimit soft limits up to hard limits (for DAI server app, which is not a generic user app). Prevents resource limit problems in some cases. Restricted to no more than limit_nofile and limit_nproc for those resources." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting ulimit_up_to_hard_limit", + "output": "ulimit up to hard limit refers to Whether to change ulimit soft limits up to hard limits (for DAI server app, which is not a generic user app). Prevents resource limit problems in some cases. Restricted to no more than limit_nofile and limit_nproc for those resources." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting ulimit_up_to_hard_limit", + "output": "ulimit up to hard limit refers to Whether to change ulimit soft limits up to hard limits (for DAI server app, which is not a generic user app). Prevents resource limit problems in some cases. Restricted to no more than limit_nofile and limit_nproc for those resources." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "disable_core_files", + "output": "disable core files refers to Whether to disable core files if debug_log=true. If debug_log=false, core file creation is always disabled.: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "disable_core_files", + "output": "disable core files refers to Whether to disable core files if debug_log=true. If debug_log=false, core file creation is always disabled.: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "disable core files", + "output": "disable core files refers to Whether to disable core files if debug_log=true. If debug_log=false, core file creation is always disabled.: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Whether to disable core files if debug_log=true. If debug_log=false, core file creation is always disabled.: ", + "output": "disable core files refers to Whether to disable core files if debug_log=true. If debug_log=false, core file creation is always disabled.: " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting disable_core_files", + "output": "disable core files refers to Whether to disable core files if debug_log=true. If debug_log=false, core file creation is always disabled.: " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting disable_core_files", + "output": "disable core files refers to Whether to disable core files if debug_log=true. If debug_log=false, core file creation is always disabled.: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "limit_nofile", + "output": "limit nofile refers to number of file limit Below should be consistent with start-dai.sh" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "limit_nofile", + "output": "limit nofile refers to number of file limit Below should be consistent with start-dai.sh" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "limit nofile", + "output": "limit nofile refers to number of file limit Below should be consistent with start-dai.sh" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "limit nofile refers to number of file limit Below should be consistent with start-dai.sh" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting limit_nofile", + "output": "limit nofile refers to number of file limit Below should be consistent with start-dai.sh" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting limit_nofile", + "output": "limit nofile refers to number of file limit Below should be consistent with start-dai.sh" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "limit_nproc", + "output": "limit nproc refers to number of threads limit Below should be consistent with start-dai.sh" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "limit_nproc", + "output": "limit nproc refers to number of threads limit Below should be consistent with start-dai.sh" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "limit nproc", + "output": "limit nproc refers to number of threads limit Below should be consistent with start-dai.sh" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "limit nproc refers to number of threads limit Below should be consistent with start-dai.sh" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting limit_nproc", + "output": "limit nproc refers to number of threads limit Below should be consistent with start-dai.sh" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting limit_nproc", + "output": "limit nproc refers to number of threads limit Below should be consistent with start-dai.sh" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "compute_correlation", + "output": "compute correlation refers to ' Whether to compute training, validation, and test correlation matrix (table and heatmap pdf) and save to disk alpha: WARNING: currently single threaded and quadratically slow for many columns" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "compute_correlation", + "output": "compute correlation refers to Compute correlation matrix: ' Whether to compute training, validation, and test correlation matrix (table and heatmap pdf) and save to disk alpha: WARNING: currently single threaded and quadratically slow for many columns" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "compute correlation", + "output": "compute correlation refers to Compute correlation matrix: ' Whether to compute training, validation, and test correlation matrix (table and heatmap pdf) and save to disk alpha: WARNING: currently single threaded and quadratically slow for many columns" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Compute correlation matrix: ", + "output": "compute correlation refers to Compute correlation matrix: ' Whether to compute training, validation, and test correlation matrix (table and heatmap pdf) and save to disk alpha: WARNING: currently single threaded and quadratically slow for many columns" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting compute_correlation", + "output": "compute correlation refers to ' Whether to compute training, validation, and test correlation matrix (table and heatmap pdf) and save to disk alpha: WARNING: currently single threaded and quadratically slow for many columns" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting compute_correlation", + "output": "compute correlation refers to Compute correlation matrix: ' Whether to compute training, validation, and test correlation matrix (table and heatmap pdf) and save to disk alpha: WARNING: currently single threaded and quadratically slow for many columns" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "produce_correlation_heatmap", + "output": "produce correlation heatmap refers to Whether to dump to disk a correlation heatmap" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "produce_correlation_heatmap", + "output": "produce correlation heatmap refers to Whether to dump to disk a correlation heatmap" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "produce correlation heatmap", + "output": "produce correlation heatmap refers to Whether to dump to disk a correlation heatmap" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "produce correlation heatmap refers to Whether to dump to disk a correlation heatmap" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting produce_correlation_heatmap", + "output": "produce correlation heatmap refers to Whether to dump to disk a correlation heatmap" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting produce_correlation_heatmap", + "output": "produce correlation heatmap refers to Whether to dump to disk a correlation heatmap" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "high_correlation_value_to_report", + "output": "high correlation value to report refers to Value to report high correlation between original features" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "high_correlation_value_to_report", + "output": "high correlation value to report refers to Threshold for reporting high correlation: Value to report high correlation between original features" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "high correlation value to report", + "output": "high correlation value to report refers to Threshold for reporting high correlation: Value to report high correlation between original features" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Threshold for reporting high correlation: ", + "output": "high correlation value to report refers to Threshold for reporting high correlation: Value to report high correlation between original features" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting high_correlation_value_to_report", + "output": "high correlation value to report refers to Value to report high correlation between original features" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting high_correlation_value_to_report", + "output": "high correlation value to report refers to Threshold for reporting high correlation: Value to report high correlation between original features" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "restart_experiments_after_shutdown", + "output": "restart experiments after shutdown refers to If True, experiments aborted by server restart will automatically restart and continue upon user login" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "restart_experiments_after_shutdown", + "output": "restart experiments after shutdown refers to If True, experiments aborted by server restart will automatically restart and continue upon user login" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "restart experiments after shutdown", + "output": "restart experiments after shutdown refers to If True, experiments aborted by server restart will automatically restart and continue upon user login" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "restart experiments after shutdown refers to If True, experiments aborted by server restart will automatically restart and continue upon user login" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting restart_experiments_after_shutdown", + "output": "restart experiments after shutdown refers to If True, experiments aborted by server restart will automatically restart and continue upon user login" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting restart_experiments_after_shutdown", + "output": "restart experiments after shutdown refers to If True, experiments aborted by server restart will automatically restart and continue upon user login" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "any_env_overrides", + "output": "any env overrides refers to When environment variable is set to toml value, consider that an override of any toml value. Experiment's remember toml values for scoring, and this treats any environment set as equivalent to putting OVERRIDE_ in front of the environment key." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "any_env_overrides", + "output": "any env overrides refers to When environment variable is set to toml value, consider that an override of any toml value. Experiment's remember toml values for scoring, and this treats any environment set as equivalent to putting OVERRIDE_ in front of the environment key." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "any env overrides", + "output": "any env overrides refers to When environment variable is set to toml value, consider that an override of any toml value. Experiment's remember toml values for scoring, and this treats any environment set as equivalent to putting OVERRIDE_ in front of the environment key." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "any env overrides refers to When environment variable is set to toml value, consider that an override of any toml value. Experiment's remember toml values for scoring, and this treats any environment set as equivalent to putting OVERRIDE_ in front of the environment key." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting any_env_overrides", + "output": "any env overrides refers to When environment variable is set to toml value, consider that an override of any toml value. Experiment's remember toml values for scoring, and this treats any environment set as equivalent to putting OVERRIDE_ in front of the environment key." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting any_env_overrides", + "output": "any env overrides refers to When environment variable is set to toml value, consider that an override of any toml value. Experiment's remember toml values for scoring, and this treats any environment set as equivalent to putting OVERRIDE_ in front of the environment key." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "datatable_bom_csv", + "output": "datatable bom csv refers to Include byte order mark (BOM) when writing CSV files. Required to support UTF-8 encoding in Excel." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "datatable_bom_csv", + "output": "datatable bom csv refers to Include byte order mark (BOM) when writing CSV files. Required to support UTF-8 encoding in Excel." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "datatable bom csv", + "output": "datatable bom csv refers to Include byte order mark (BOM) when writing CSV files. Required to support UTF-8 encoding in Excel." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "datatable bom csv refers to Include byte order mark (BOM) when writing CSV files. Required to support UTF-8 encoding in Excel." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting datatable_bom_csv", + "output": "datatable bom csv refers to Include byte order mark (BOM) when writing CSV files. Required to support UTF-8 encoding in Excel." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting datatable_bom_csv", + "output": "datatable bom csv refers to Include byte order mark (BOM) when writing CSV files. Required to support UTF-8 encoding in Excel." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "debug_print", + "output": "debug print refers to Whether to enable debug prints (to console/stdout/stderr), e.g. showing up in dai*.log or dai*.txt type files." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "debug_print", + "output": "debug print refers to Enable debug prints to console: Whether to enable debug prints (to console/stdout/stderr), e.g. showing up in dai*.log or dai*.txt type files." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "debug print", + "output": "debug print refers to Enable debug prints to console: Whether to enable debug prints (to console/stdout/stderr), e.g. showing up in dai*.log or dai*.txt type files." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Enable debug prints to console: ", + "output": "debug print refers to Enable debug prints to console: Whether to enable debug prints (to console/stdout/stderr), e.g. showing up in dai*.log or dai*.txt type files." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting debug_print", + "output": "debug print refers to Whether to enable debug prints (to console/stdout/stderr), e.g. showing up in dai*.log or dai*.txt type files." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting debug_print", + "output": "debug print refers to Enable debug prints to console: Whether to enable debug prints (to console/stdout/stderr), e.g. showing up in dai*.log or dai*.txt type files." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "debug_print_level", + "output": "debug print level refers to Level (0-4) for debug prints (to console/stdout/stderr), e.g. showing up in dai*.log or dai*.txt type files. 1-2 is normal, 4 would lead to highly excessive debug and is not recommended in production." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "debug_print_level", + "output": "debug print level refers to Level of debug to print: Level (0-4) for debug prints (to console/stdout/stderr), e.g. showing up in dai*.log or dai*.txt type files. 1-2 is normal, 4 would lead to highly excessive debug and is not recommended in production." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "debug print level", + "output": "debug print level refers to Level of debug to print: Level (0-4) for debug prints (to console/stdout/stderr), e.g. showing up in dai*.log or dai*.txt type files. 1-2 is normal, 4 would lead to highly excessive debug and is not recommended in production." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Level of debug to print: ", + "output": "debug print level refers to Level of debug to print: Level (0-4) for debug prints (to console/stdout/stderr), e.g. showing up in dai*.log or dai*.txt type files. 1-2 is normal, 4 would lead to highly excessive debug and is not recommended in production." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting debug_print_level", + "output": "debug print level refers to Level (0-4) for debug prints (to console/stdout/stderr), e.g. showing up in dai*.log or dai*.txt type files. 1-2 is normal, 4 would lead to highly excessive debug and is not recommended in production." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting debug_print_level", + "output": "debug print level refers to Level of debug to print: Level (0-4) for debug prints (to console/stdout/stderr), e.g. showing up in dai*.log or dai*.txt type files. 1-2 is normal, 4 would lead to highly excessive debug and is not recommended in production." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "check_invalid_config_toml_keys", + "output": "check invalid config toml keys refers to Whether to check if config.toml keys are valid and fail if not valid" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "check_invalid_config_toml_keys", + "output": "check invalid config toml keys refers to Whether to check if config.toml keys are valid and fail if not valid" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "check invalid config toml keys", + "output": "check invalid config toml keys refers to Whether to check if config.toml keys are valid and fail if not valid" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "check invalid config toml keys refers to Whether to check if config.toml keys are valid and fail if not valid" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting check_invalid_config_toml_keys", + "output": "check invalid config toml keys refers to Whether to check if config.toml keys are valid and fail if not valid" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting check_invalid_config_toml_keys", + "output": "check invalid config toml keys refers to Whether to check if config.toml keys are valid and fail if not valid" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "allow_no_pid_host", + "output": "allow no pid host refers to Whether to allow no --pid=host setting. Some GPU info from within docker will not be correct.: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "allow_no_pid_host", + "output": "allow no pid host refers to Whether to allow no --pid=host setting. Some GPU info from within docker will not be correct.: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "allow no pid host", + "output": "allow no pid host refers to Whether to allow no --pid=host setting. Some GPU info from within docker will not be correct.: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Whether to allow no --pid=host setting. Some GPU info from within docker will not be correct.: ", + "output": "allow no pid host refers to Whether to allow no --pid=host setting. Some GPU info from within docker will not be correct.: " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting allow_no_pid_host", + "output": "allow no pid host refers to Whether to allow no --pid=host setting. Some GPU info from within docker will not be correct.: " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting allow_no_pid_host", + "output": "allow no pid host refers to Whether to allow no --pid=host setting. Some GPU info from within docker will not be correct.: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "final_munging_memory_reduction_factor", + "output": "final munging memory reduction factor refers to Reduce memory usage during final ensemble feature engineering (1 uses most memory, larger values use less memory)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "final_munging_memory_reduction_factor", + "output": "final munging memory reduction factor refers to Factor to reduce estimated memory usage by: Reduce memory usage during final ensemble feature engineering (1 uses most memory, larger values use less memory)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "final munging memory reduction factor", + "output": "final munging memory reduction factor refers to Factor to reduce estimated memory usage by: Reduce memory usage during final ensemble feature engineering (1 uses most memory, larger values use less memory)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Factor to reduce estimated memory usage by: ", + "output": "final munging memory reduction factor refers to Factor to reduce estimated memory usage by: Reduce memory usage during final ensemble feature engineering (1 uses most memory, larger values use less memory)" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting final_munging_memory_reduction_factor", + "output": "final munging memory reduction factor refers to Reduce memory usage during final ensemble feature engineering (1 uses most memory, larger values use less memory)" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting final_munging_memory_reduction_factor", + "output": "final munging memory reduction factor refers to Factor to reduce estimated memory usage by: Reduce memory usage during final ensemble feature engineering (1 uses most memory, larger values use less memory)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "munging_memory_overhead_factor", + "output": "munging memory overhead factor refers to How much more memory a typical transformer needs than the input data. Can be increased if, e.g., final model munging uses too much memory due to parallel operations." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "munging_memory_overhead_factor", + "output": "munging memory overhead factor refers to Memory use per transformer per input data size: How much more memory a typical transformer needs than the input data. Can be increased if, e.g., final model munging uses too much memory due to parallel operations." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "munging memory overhead factor", + "output": "munging memory overhead factor refers to Memory use per transformer per input data size: How much more memory a typical transformer needs than the input data. Can be increased if, e.g., final model munging uses too much memory due to parallel operations." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Memory use per transformer per input data size: ", + "output": "munging memory overhead factor refers to Memory use per transformer per input data size: How much more memory a typical transformer needs than the input data. Can be increased if, e.g., final model munging uses too much memory due to parallel operations." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting munging_memory_overhead_factor", + "output": "munging memory overhead factor refers to How much more memory a typical transformer needs than the input data. Can be increased if, e.g., final model munging uses too much memory due to parallel operations." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting munging_memory_overhead_factor", + "output": "munging memory overhead factor refers to Memory use per transformer per input data size: How much more memory a typical transformer needs than the input data. Can be increased if, e.g., final model munging uses too much memory due to parallel operations." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "per_transformer_segfault_protection_ga", + "output": "per transformer segfault protection ga refers to Whether to have per-transformer segfault protection when munging data into transformed features during tuning and evolution. Can lead to significant slowdown for cases when large data but data is sampled, leaving large objects in parent fork, leading to slow fork time for each transformer.: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "per_transformer_segfault_protection_ga", + "output": "per transformer segfault protection ga refers to Whether to have per-transformer segfault protection when munging data into transformed features during tuning and evolution. Can lead to significant slowdown for cases when large data but data is sampled, leaving large objects in parent fork, leading to slow fork time for each transformer.: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "per transformer segfault protection ga", + "output": "per transformer segfault protection ga refers to Whether to have per-transformer segfault protection when munging data into transformed features during tuning and evolution. Can lead to significant slowdown for cases when large data but data is sampled, leaving large objects in parent fork, leading to slow fork time for each transformer.: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Whether to have per-transformer segfault protection when munging data into transformed features during tuning and evolution. Can lead to significant slowdown for cases when large data but data is sampled, leaving large objects in parent fork, leading to slow fork time for each transformer.: ", + "output": "per transformer segfault protection ga refers to Whether to have per-transformer segfault protection when munging data into transformed features during tuning and evolution. Can lead to significant slowdown for cases when large data but data is sampled, leaving large objects in parent fork, leading to slow fork time for each transformer.: " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting per_transformer_segfault_protection_ga", + "output": "per transformer segfault protection ga refers to Whether to have per-transformer segfault protection when munging data into transformed features during tuning and evolution. Can lead to significant slowdown for cases when large data but data is sampled, leaving large objects in parent fork, leading to slow fork time for each transformer.: " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting per_transformer_segfault_protection_ga", + "output": "per transformer segfault protection ga refers to Whether to have per-transformer segfault protection when munging data into transformed features during tuning and evolution. Can lead to significant slowdown for cases when large data but data is sampled, leaving large objects in parent fork, leading to slow fork time for each transformer.: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "per_transformer_segfault_protection_final", + "output": "per transformer segfault protection final refers to Whether to have per-transformer segfault protection when munging data into transformed features during final model fitting and scoring. Can lead to significant slowdown for cases when large data but data is sampled, leaving large objects in parent fork, leading to slow fork time for each transformer.: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "per_transformer_segfault_protection_final", + "output": "per transformer segfault protection final refers to Whether to have per-transformer segfault protection when munging data into transformed features during final model fitting and scoring. Can lead to significant slowdown for cases when large data but data is sampled, leaving large objects in parent fork, leading to slow fork time for each transformer.: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "per transformer segfault protection final", + "output": "per transformer segfault protection final refers to Whether to have per-transformer segfault protection when munging data into transformed features during final model fitting and scoring. Can lead to significant slowdown for cases when large data but data is sampled, leaving large objects in parent fork, leading to slow fork time for each transformer.: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Whether to have per-transformer segfault protection when munging data into transformed features during final model fitting and scoring. Can lead to significant slowdown for cases when large data but data is sampled, leaving large objects in parent fork, leading to slow fork time for each transformer.: ", + "output": "per transformer segfault protection final refers to Whether to have per-transformer segfault protection when munging data into transformed features during final model fitting and scoring. Can lead to significant slowdown for cases when large data but data is sampled, leaving large objects in parent fork, leading to slow fork time for each transformer.: " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting per_transformer_segfault_protection_final", + "output": "per transformer segfault protection final refers to Whether to have per-transformer segfault protection when munging data into transformed features during final model fitting and scoring. Can lead to significant slowdown for cases when large data but data is sampled, leaving large objects in parent fork, leading to slow fork time for each transformer.: " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting per_transformer_segfault_protection_final", + "output": "per transformer segfault protection final refers to Whether to have per-transformer segfault protection when munging data into transformed features during final model fitting and scoring. Can lead to significant slowdown for cases when large data but data is sampled, leaving large objects in parent fork, leading to slow fork time for each transformer.: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "submit_resource_wait_period", + "output": "submit resource wait period refers to How often to check resources (disk, memory, cpu) to see if need to stall submission." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "submit_resource_wait_period", + "output": "submit resource wait period refers to How often to check resources (disk, memory, cpu) to see if need to stall submission." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "submit resource wait period", + "output": "submit resource wait period refers to How often to check resources (disk, memory, cpu) to see if need to stall submission." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "submit resource wait period refers to How often to check resources (disk, memory, cpu) to see if need to stall submission." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting submit_resource_wait_period", + "output": "submit resource wait period refers to How often to check resources (disk, memory, cpu) to see if need to stall submission." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting submit_resource_wait_period", + "output": "submit resource wait period refers to How often to check resources (disk, memory, cpu) to see if need to stall submission." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "stall_subprocess_submission_cpu_threshold_pct", + "output": "stall subprocess submission cpu threshold pct refers to Stall submission of subprocesses if system CPU usage is higher than this threshold in percent (set to 100 to disable). A reasonable number is 90.0 if activated" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "stall_subprocess_submission_cpu_threshold_pct", + "output": "stall subprocess submission cpu threshold pct refers to Stall submission of subprocesses if system CPU usage is higher than this threshold in percent (set to 100 to disable). A reasonable number is 90.0 if activated" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "stall subprocess submission cpu threshold pct", + "output": "stall subprocess submission cpu threshold pct refers to Stall submission of subprocesses if system CPU usage is higher than this threshold in percent (set to 100 to disable). A reasonable number is 90.0 if activated" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "stall subprocess submission cpu threshold pct refers to Stall submission of subprocesses if system CPU usage is higher than this threshold in percent (set to 100 to disable). A reasonable number is 90.0 if activated" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting stall_subprocess_submission_cpu_threshold_pct", + "output": "stall subprocess submission cpu threshold pct refers to Stall submission of subprocesses if system CPU usage is higher than this threshold in percent (set to 100 to disable). A reasonable number is 90.0 if activated" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting stall_subprocess_submission_cpu_threshold_pct", + "output": "stall subprocess submission cpu threshold pct refers to Stall submission of subprocesses if system CPU usage is higher than this threshold in percent (set to 100 to disable). A reasonable number is 90.0 if activated" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "stall_subprocess_submission_dai_fork_threshold_pct", + "output": "stall subprocess submission dai fork threshold pct refers to Restrict/Stall submission of subprocesses if DAI fork count (across all experiments) per unit ulimit nproc soft limit is higher than this threshold in percent (set to -1 to disable, 0 for minimal forking. A reasonable number is 90.0 if activated" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "stall_subprocess_submission_dai_fork_threshold_pct", + "output": "stall subprocess submission dai fork threshold pct refers to Restrict/Stall submission of subprocesses if DAI fork count (across all experiments) per unit ulimit nproc soft limit is higher than this threshold in percent (set to -1 to disable, 0 for minimal forking. A reasonable number is 90.0 if activated" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "stall subprocess submission dai fork threshold pct", + "output": "stall subprocess submission dai fork threshold pct refers to Restrict/Stall submission of subprocesses if DAI fork count (across all experiments) per unit ulimit nproc soft limit is higher than this threshold in percent (set to -1 to disable, 0 for minimal forking. A reasonable number is 90.0 if activated" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "stall subprocess submission dai fork threshold pct refers to Restrict/Stall submission of subprocesses if DAI fork count (across all experiments) per unit ulimit nproc soft limit is higher than this threshold in percent (set to -1 to disable, 0 for minimal forking. A reasonable number is 90.0 if activated" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting stall_subprocess_submission_dai_fork_threshold_pct", + "output": "stall subprocess submission dai fork threshold pct refers to Restrict/Stall submission of subprocesses if DAI fork count (across all experiments) per unit ulimit nproc soft limit is higher than this threshold in percent (set to -1 to disable, 0 for minimal forking. A reasonable number is 90.0 if activated" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting stall_subprocess_submission_dai_fork_threshold_pct", + "output": "stall subprocess submission dai fork threshold pct refers to Restrict/Stall submission of subprocesses if DAI fork count (across all experiments) per unit ulimit nproc soft limit is higher than this threshold in percent (set to -1 to disable, 0 for minimal forking. A reasonable number is 90.0 if activated" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "stall_subprocess_submission_experiment_fork_threshold_pct", + "output": "stall subprocess submission experiment fork threshold pct refers to Restrict/Stall submission of subprocesses if experiment fork count (across all experiments) per unit ulimit nproc soft limit is higher than this threshold in percent (set to -1 to disable, 0 for minimal forking). A reasonable number is 90.0 if activated. For small data leads to overhead of about 0.1s per task submitted due to checks, so for scoring can slow things down for tests." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "stall_subprocess_submission_experiment_fork_threshold_pct", + "output": "stall subprocess submission experiment fork threshold pct refers to Restrict/Stall submission of subprocesses if experiment fork count (across all experiments) per unit ulimit nproc soft limit is higher than this threshold in percent (set to -1 to disable, 0 for minimal forking). A reasonable number is 90.0 if activated. For small data leads to overhead of about 0.1s per task submitted due to checks, so for scoring can slow things down for tests." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "stall subprocess submission experiment fork threshold pct", + "output": "stall subprocess submission experiment fork threshold pct refers to Restrict/Stall submission of subprocesses if experiment fork count (across all experiments) per unit ulimit nproc soft limit is higher than this threshold in percent (set to -1 to disable, 0 for minimal forking). A reasonable number is 90.0 if activated. For small data leads to overhead of about 0.1s per task submitted due to checks, so for scoring can slow things down for tests." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "stall subprocess submission experiment fork threshold pct refers to Restrict/Stall submission of subprocesses if experiment fork count (across all experiments) per unit ulimit nproc soft limit is higher than this threshold in percent (set to -1 to disable, 0 for minimal forking). A reasonable number is 90.0 if activated. For small data leads to overhead of about 0.1s per task submitted due to checks, so for scoring can slow things down for tests." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting stall_subprocess_submission_experiment_fork_threshold_pct", + "output": "stall subprocess submission experiment fork threshold pct refers to Restrict/Stall submission of subprocesses if experiment fork count (across all experiments) per unit ulimit nproc soft limit is higher than this threshold in percent (set to -1 to disable, 0 for minimal forking). A reasonable number is 90.0 if activated. For small data leads to overhead of about 0.1s per task submitted due to checks, so for scoring can slow things down for tests." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting stall_subprocess_submission_experiment_fork_threshold_pct", + "output": "stall subprocess submission experiment fork threshold pct refers to Restrict/Stall submission of subprocesses if experiment fork count (across all experiments) per unit ulimit nproc soft limit is higher than this threshold in percent (set to -1 to disable, 0 for minimal forking). A reasonable number is 90.0 if activated. For small data leads to overhead of about 0.1s per task submitted due to checks, so for scoring can slow things down for tests." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "restrict_initpool_by_memory", + "output": "restrict initpool by memory refers to Whether to restrict pool workers even if not used, by reducing number of pool workers available. Good if really huge number of experiments, but otherwise, best to have all pool workers ready and only stall submission of tasks so can be dynamic to multi-experiment environment" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "restrict_initpool_by_memory", + "output": "restrict initpool by memory refers to Whether to restrict pool workers even if not used, by reducing number of pool workers available. Good if really huge number of experiments, but otherwise, best to have all pool workers ready and only stall submission of tasks so can be dynamic to multi-experiment environment" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "restrict initpool by memory", + "output": "restrict initpool by memory refers to Whether to restrict pool workers even if not used, by reducing number of pool workers available. Good if really huge number of experiments, but otherwise, best to have all pool workers ready and only stall submission of tasks so can be dynamic to multi-experiment environment" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "restrict initpool by memory refers to Whether to restrict pool workers even if not used, by reducing number of pool workers available. Good if really huge number of experiments, but otherwise, best to have all pool workers ready and only stall submission of tasks so can be dynamic to multi-experiment environment" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting restrict_initpool_by_memory", + "output": "restrict initpool by memory refers to Whether to restrict pool workers even if not used, by reducing number of pool workers available. Good if really huge number of experiments, but otherwise, best to have all pool workers ready and only stall submission of tasks so can be dynamic to multi-experiment environment" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting restrict_initpool_by_memory", + "output": "restrict initpool by memory refers to Whether to restrict pool workers even if not used, by reducing number of pool workers available. Good if really huge number of experiments, but otherwise, best to have all pool workers ready and only stall submission of tasks so can be dynamic to multi-experiment environment" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "terminate_experiment_if_memory_low", + "output": "terminate experiment if memory low refers to Whether to terminate experiments if the system memory available falls below memory_limit_gb_terminate" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "terminate_experiment_if_memory_low", + "output": "terminate experiment if memory low refers to Whether to terminate experiments if the system memory available falls below memory_limit_gb_terminate" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "terminate experiment if memory low", + "output": "terminate experiment if memory low refers to Whether to terminate experiments if the system memory available falls below memory_limit_gb_terminate" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "terminate experiment if memory low refers to Whether to terminate experiments if the system memory available falls below memory_limit_gb_terminate" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting terminate_experiment_if_memory_low", + "output": "terminate experiment if memory low refers to Whether to terminate experiments if the system memory available falls below memory_limit_gb_terminate" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting terminate_experiment_if_memory_low", + "output": "terminate experiment if memory low refers to Whether to terminate experiments if the system memory available falls below memory_limit_gb_terminate" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "memory_limit_gb_terminate", + "output": "memory limit gb terminate refers to Memory in GB beyond which will terminate experiment if terminate_experiment_if_memory_low=true." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "memory_limit_gb_terminate", + "output": "memory limit gb terminate refers to Memory in GB beyond which will terminate experiment if terminate_experiment_if_memory_low=true." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "memory limit gb terminate", + "output": "memory limit gb terminate refers to Memory in GB beyond which will terminate experiment if terminate_experiment_if_memory_low=true." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "memory limit gb terminate refers to Memory in GB beyond which will terminate experiment if terminate_experiment_if_memory_low=true." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting memory_limit_gb_terminate", + "output": "memory limit gb terminate refers to Memory in GB beyond which will terminate experiment if terminate_experiment_if_memory_low=true." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting memory_limit_gb_terminate", + "output": "memory limit gb terminate refers to Memory in GB beyond which will terminate experiment if terminate_experiment_if_memory_low=true." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "scoring_data_directory", + "output": "scoring data directory refers to Path to use for scoring directory path relative to run path" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "scoring_data_directory", + "output": "scoring data directory refers to Path to use for scoring directory path relative to run path" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "scoring data directory", + "output": "scoring data directory refers to Path to use for scoring directory path relative to run path" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "scoring data directory refers to Path to use for scoring directory path relative to run path" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting scoring_data_directory", + "output": "scoring data directory refers to Path to use for scoring directory path relative to run path" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting scoring_data_directory", + "output": "scoring data directory refers to Path to use for scoring directory path relative to run path" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "last_exclusive_mode", + "output": "last exclusive mode refers to Internal helper to allow memory of if changed exclusive mode" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "last_exclusive_mode", + "output": "last exclusive mode refers to Internal helper to allow memory of if changed exclusive mode" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "last exclusive mode", + "output": "last exclusive mode refers to Internal helper to allow memory of if changed exclusive mode" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "last exclusive mode refers to Internal helper to allow memory of if changed exclusive mode" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting last_exclusive_mode", + "output": "last exclusive mode refers to Internal helper to allow memory of if changed exclusive mode" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting last_exclusive_mode", + "output": "last exclusive mode refers to Internal helper to allow memory of if changed exclusive mode" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mojo_acceptance_test_mojo_types", + "output": "mojo acceptance test mojo types refers to Which MOJO runtimes should be tested as part of the mini acceptance tests" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mojo_acceptance_test_mojo_types", + "output": "mojo acceptance test mojo types refers to MOJO types to test at end of experiment: Which MOJO runtimes should be tested as part of the mini acceptance tests" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mojo acceptance test mojo types", + "output": "mojo acceptance test mojo types refers to MOJO types to test at end of experiment: Which MOJO runtimes should be tested as part of the mini acceptance tests" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "MOJO types to test at end of experiment: ", + "output": "mojo acceptance test mojo types refers to MOJO types to test at end of experiment: Which MOJO runtimes should be tested as part of the mini acceptance tests" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting mojo_acceptance_test_mojo_types", + "output": "mojo acceptance test mojo types refers to Which MOJO runtimes should be tested as part of the mini acceptance tests" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting mojo_acceptance_test_mojo_types", + "output": "mojo acceptance test mojo types refers to MOJO types to test at end of experiment: Which MOJO runtimes should be tested as part of the mini acceptance tests" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "make_mojo_scoring_pipeline_for_features_only", + "output": "make mojo scoring pipeline for features only refers to Create MOJO for feature engineering pipeline only (no predictions)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "make_mojo_scoring_pipeline_for_features_only", + "output": "make mojo scoring pipeline for features only refers to Create MOJO for feature engineering pipeline only (no predictions): Create MOJO for feature engineering pipeline only (no predictions)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "make mojo scoring pipeline for features only", + "output": "make mojo scoring pipeline for features only refers to Create MOJO for feature engineering pipeline only (no predictions): Create MOJO for feature engineering pipeline only (no predictions)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Create MOJO for feature engineering pipeline only (no predictions): ", + "output": "make mojo scoring pipeline for features only refers to Create MOJO for feature engineering pipeline only (no predictions): Create MOJO for feature engineering pipeline only (no predictions)" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting make_mojo_scoring_pipeline_for_features_only", + "output": "make mojo scoring pipeline for features only refers to Create MOJO for feature engineering pipeline only (no predictions)" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting make_mojo_scoring_pipeline_for_features_only", + "output": "make mojo scoring pipeline for features only refers to Create MOJO for feature engineering pipeline only (no predictions): Create MOJO for feature engineering pipeline only (no predictions)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mojo_replace_target_encoding_with_grouped_input_cols", + "output": "mojo replace target encoding with grouped input cols refers to Replaces target encoding features by their input columns. Instead of CVTE_Age:Income:Zip, this will create Age:Income:Zip. Only when make_mojo_scoring_pipeline_for_features_only is enabled." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mojo_replace_target_encoding_with_grouped_input_cols", + "output": "mojo replace target encoding with grouped input cols refers to Replaces target encoding features with concatenated input features.: Replaces target encoding features by their input columns. Instead of CVTE_Age:Income:Zip, this will create Age:Income:Zip. Only when make_mojo_scoring_pipeline_for_features_only is enabled." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mojo replace target encoding with grouped input cols", + "output": "mojo replace target encoding with grouped input cols refers to Replaces target encoding features with concatenated input features.: Replaces target encoding features by their input columns. Instead of CVTE_Age:Income:Zip, this will create Age:Income:Zip. Only when make_mojo_scoring_pipeline_for_features_only is enabled." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Replaces target encoding features with concatenated input features.: ", + "output": "mojo replace target encoding with grouped input cols refers to Replaces target encoding features with concatenated input features.: Replaces target encoding features by their input columns. Instead of CVTE_Age:Income:Zip, this will create Age:Income:Zip. Only when make_mojo_scoring_pipeline_for_features_only is enabled." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting mojo_replace_target_encoding_with_grouped_input_cols", + "output": "mojo replace target encoding with grouped input cols refers to Replaces target encoding features by their input columns. Instead of CVTE_Age:Income:Zip, this will create Age:Income:Zip. Only when make_mojo_scoring_pipeline_for_features_only is enabled." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting mojo_replace_target_encoding_with_grouped_input_cols", + "output": "mojo replace target encoding with grouped input cols refers to Replaces target encoding features with concatenated input features.: Replaces target encoding features by their input columns. Instead of CVTE_Age:Income:Zip, this will create Age:Income:Zip. Only when make_mojo_scoring_pipeline_for_features_only is enabled." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "predictions_as_transform_only", + "output": "predictions as transform only refers to Use pipeline to generate transformed features, when making predictions, bypassing the model that usually converts transformed features into predictions." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "predictions_as_transform_only", + "output": "predictions as transform only refers to Generate transformation when making predictions: Use pipeline to generate transformed features, when making predictions, bypassing the model that usually converts transformed features into predictions." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "predictions as transform only", + "output": "predictions as transform only refers to Generate transformation when making predictions: Use pipeline to generate transformed features, when making predictions, bypassing the model that usually converts transformed features into predictions." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Generate transformation when making predictions: ", + "output": "predictions as transform only refers to Generate transformation when making predictions: Use pipeline to generate transformed features, when making predictions, bypassing the model that usually converts transformed features into predictions." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting predictions_as_transform_only", + "output": "predictions as transform only refers to Use pipeline to generate transformed features, when making predictions, bypassing the model that usually converts transformed features into predictions." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting predictions_as_transform_only", + "output": "predictions as transform only refers to Generate transformation when making predictions: Use pipeline to generate transformed features, when making predictions, bypassing the model that usually converts transformed features into predictions." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable_single_instance_db_access", + "output": "enable single instance db access refers to If set to true, will make sure only current instance can access its database" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable_single_instance_db_access", + "output": "enable single instance db access refers to If set to true, will make sure only current instance can access its database" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable single instance db access", + "output": "enable single instance db access refers to If set to true, will make sure only current instance can access its database" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "enable single instance db access refers to If set to true, will make sure only current instance can access its database" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting enable_single_instance_db_access", + "output": "enable single instance db access refers to If set to true, will make sure only current instance can access its database" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting enable_single_instance_db_access", + "output": "enable single instance db access refers to If set to true, will make sure only current instance can access its database" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable_pytorch_nlp", + "output": "enable pytorch nlp refers to Deprecated - maps to enable_pytorch_nlp_transformer and enable_pytorch_nlp_model in 1.10.2+" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable_pytorch_nlp", + "output": "enable pytorch nlp refers to Deprecated - maps to enable_pytorch_nlp_transformer and enable_pytorch_nlp_model in 1.10.2+" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable pytorch nlp", + "output": "enable pytorch nlp refers to Deprecated - maps to enable_pytorch_nlp_transformer and enable_pytorch_nlp_model in 1.10.2+" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "enable pytorch nlp refers to Deprecated - maps to enable_pytorch_nlp_transformer and enable_pytorch_nlp_model in 1.10.2+" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting enable_pytorch_nlp", + "output": "enable pytorch nlp refers to Deprecated - maps to enable_pytorch_nlp_transformer and enable_pytorch_nlp_model in 1.10.2+" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting enable_pytorch_nlp", + "output": "enable pytorch nlp refers to Deprecated - maps to enable_pytorch_nlp_transformer and enable_pytorch_nlp_model in 1.10.2+" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "check_timeout_per_gpu", + "output": "check timeout per gpu refers to How long to wait per GPU for tensorflow/torch to run during system checks." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "check_timeout_per_gpu", + "output": "check timeout per gpu refers to How long to wait per GPU for tensorflow/torch to run during system checks." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "check timeout per gpu", + "output": "check timeout per gpu refers to How long to wait per GPU for tensorflow/torch to run during system checks." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "check timeout per gpu refers to How long to wait per GPU for tensorflow/torch to run during system checks." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting check_timeout_per_gpu", + "output": "check timeout per gpu refers to How long to wait per GPU for tensorflow/torch to run during system checks." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting check_timeout_per_gpu", + "output": "check timeout per gpu refers to How long to wait per GPU for tensorflow/torch to run during system checks." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "gpu_exit_if_fails", + "output": "gpu exit if fails refers to Whether to fail start-up if cannot successfully run GPU checks" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "gpu_exit_if_fails", + "output": "gpu exit if fails refers to Whether to fail start-up if cannot successfully run GPU checks" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "gpu exit if fails", + "output": "gpu exit if fails refers to Whether to fail start-up if cannot successfully run GPU checks" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "gpu exit if fails refers to Whether to fail start-up if cannot successfully run GPU checks" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting gpu_exit_if_fails", + "output": "gpu exit if fails refers to Whether to fail start-up if cannot successfully run GPU checks" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting gpu_exit_if_fails", + "output": "gpu exit if fails refers to Whether to fail start-up if cannot successfully run GPU checks" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "time_series_recipe", + "output": "time series recipe refers to Enable time series lag-based recipe with lag transformers. If disabled, the same train-test gap and periods are used, but no lag transformers are enabled. If disabled, the set of feature transformations is quite limited without lag transformers, so consider setting enable_time_unaware_transformers to true in order to treat the problem as more like an IID type problem." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "time_series_recipe", + "output": "time series recipe refers to Time-series lag-based recipe: Enable time series lag-based recipe with lag transformers. If disabled, the same train-test gap and periods are used, but no lag transformers are enabled. If disabled, the set of feature transformations is quite limited without lag transformers, so consider setting enable_time_unaware_transformers to true in order to treat the problem as more like an IID type problem." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "time series recipe", + "output": "time series recipe refers to Time-series lag-based recipe: Enable time series lag-based recipe with lag transformers. If disabled, the same train-test gap and periods are used, but no lag transformers are enabled. If disabled, the set of feature transformations is quite limited without lag transformers, so consider setting enable_time_unaware_transformers to true in order to treat the problem as more like an IID type problem." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Time-series lag-based recipe: ", + "output": "time series recipe refers to Time-series lag-based recipe: Enable time series lag-based recipe with lag transformers. If disabled, the same train-test gap and periods are used, but no lag transformers are enabled. If disabled, the set of feature transformations is quite limited without lag transformers, so consider setting enable_time_unaware_transformers to true in order to treat the problem as more like an IID type problem." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting time_series_recipe", + "output": "time series recipe refers to Enable time series lag-based recipe with lag transformers. If disabled, the same train-test gap and periods are used, but no lag transformers are enabled. If disabled, the set of feature transformations is quite limited without lag transformers, so consider setting enable_time_unaware_transformers to true in order to treat the problem as more like an IID type problem." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting time_series_recipe", + "output": "time series recipe refers to Time-series lag-based recipe: Enable time series lag-based recipe with lag transformers. If disabled, the same train-test gap and periods are used, but no lag transformers are enabled. If disabled, the set of feature transformations is quite limited without lag transformers, so consider setting enable_time_unaware_transformers to true in order to treat the problem as more like an IID type problem." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "time_series_causal_split_recipe", + "output": "time series causal split recipe refers to Whether causal splits are used when time_series_recipe is false orwhether to use same train-gap-test splits when lag transformers are disabled (default behavior).For train-test gap, period, etc. to be used when lag-based recipe is disabled, this must be false." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "time_series_causal_split_recipe", + "output": "time series causal split recipe refers to Whether causal recipe is used for non-lag-based recipe: Whether causal splits are used when time_series_recipe is false orwhether to use same train-gap-test splits when lag transformers are disabled (default behavior).For train-test gap, period, etc. to be used when lag-based recipe is disabled, this must be false." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "time series causal split recipe", + "output": "time series causal split recipe refers to Whether causal recipe is used for non-lag-based recipe: Whether causal splits are used when time_series_recipe is false orwhether to use same train-gap-test splits when lag transformers are disabled (default behavior).For train-test gap, period, etc. to be used when lag-based recipe is disabled, this must be false." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Whether causal recipe is used for non-lag-based recipe: ", + "output": "time series causal split recipe refers to Whether causal recipe is used for non-lag-based recipe: Whether causal splits are used when time_series_recipe is false orwhether to use same train-gap-test splits when lag transformers are disabled (default behavior).For train-test gap, period, etc. to be used when lag-based recipe is disabled, this must be false." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting time_series_causal_split_recipe", + "output": "time series causal split recipe refers to Whether causal splits are used when time_series_recipe is false orwhether to use same train-gap-test splits when lag transformers are disabled (default behavior).For train-test gap, period, etc. to be used when lag-based recipe is disabled, this must be false." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting time_series_causal_split_recipe", + "output": "time series causal split recipe refers to Whether causal recipe is used for non-lag-based recipe: Whether causal splits are used when time_series_recipe is false orwhether to use same train-gap-test splits when lag transformers are disabled (default behavior).For train-test gap, period, etc. to be used when lag-based recipe is disabled, this must be false." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "use_lags_if_causal_recipe", + "output": "use lags if causal recipe refers to Whether to use lag transformers when using causal-split for validation (as occurs when not using time-based lag recipe). If no time groups columns, lag transformers will still use time-column as sole time group column. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "use_lags_if_causal_recipe", + "output": "use lags if causal recipe refers to Use lag transformers when using causal time-series recipe: Whether to use lag transformers when using causal-split for validation (as occurs when not using time-based lag recipe). If no time groups columns, lag transformers will still use time-column as sole time group column. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "use lags if causal recipe", + "output": "use lags if causal recipe refers to Use lag transformers when using causal time-series recipe: Whether to use lag transformers when using causal-split for validation (as occurs when not using time-based lag recipe). If no time groups columns, lag transformers will still use time-column as sole time group column. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Use lag transformers when using causal time-series recipe: ", + "output": "use lags if causal recipe refers to Use lag transformers when using causal time-series recipe: Whether to use lag transformers when using causal-split for validation (as occurs when not using time-based lag recipe). If no time groups columns, lag transformers will still use time-column as sole time group column. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting use_lags_if_causal_recipe", + "output": "use lags if causal recipe refers to Whether to use lag transformers when using causal-split for validation (as occurs when not using time-based lag recipe). If no time groups columns, lag transformers will still use time-column as sole time group column. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting use_lags_if_causal_recipe", + "output": "use lags if causal recipe refers to Use lag transformers when using causal time-series recipe: Whether to use lag transformers when using causal-split for validation (as occurs when not using time-based lag recipe). If no time groups columns, lag transformers will still use time-column as sole time group column. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "time_series_leaderboard_mode", + "output": "time series leaderboard mode refers to 'diverse': explore a diverse set of models built using various expert settings. Note that it's possible to rerun another such diverse leaderboard on top of the best-performing model(s), which will effectively help you compose these expert settings.'sliding_window': If the forecast horizon is N periods, create a separate model for each of the (gap, horizon) pairs of (0,n), (n,n), (2*n,n), ..., (2*N-1, n) in units of time periods.The number of periods to predict per model n is controlled by the expert setting 'time_series_leaderboard_periods_per_model', which defaults to 1." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "time_series_leaderboard_mode", + "output": "time series leaderboard mode refers to Control the automatic time-series leaderboard mode: 'diverse': explore a diverse set of models built using various expert settings. Note that it's possible to rerun another such diverse leaderboard on top of the best-performing model(s), which will effectively help you compose these expert settings.'sliding_window': If the forecast horizon is N periods, create a separate model for each of the (gap, horizon) pairs of (0,n), (n,n), (2*n,n), ..., (2*N-1, n) in units of time periods.The number of periods to predict per model n is controlled by the expert setting 'time_series_leaderboard_periods_per_model', which defaults to 1." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "time series leaderboard mode", + "output": "time series leaderboard mode refers to Control the automatic time-series leaderboard mode: 'diverse': explore a diverse set of models built using various expert settings. Note that it's possible to rerun another such diverse leaderboard on top of the best-performing model(s), which will effectively help you compose these expert settings.'sliding_window': If the forecast horizon is N periods, create a separate model for each of the (gap, horizon) pairs of (0,n), (n,n), (2*n,n), ..., (2*N-1, n) in units of time periods.The number of periods to predict per model n is controlled by the expert setting 'time_series_leaderboard_periods_per_model', which defaults to 1." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Control the automatic time-series leaderboard mode: ", + "output": "time series leaderboard mode refers to Control the automatic time-series leaderboard mode: 'diverse': explore a diverse set of models built using various expert settings. Note that it's possible to rerun another such diverse leaderboard on top of the best-performing model(s), which will effectively help you compose these expert settings.'sliding_window': If the forecast horizon is N periods, create a separate model for each of the (gap, horizon) pairs of (0,n), (n,n), (2*n,n), ..., (2*N-1, n) in units of time periods.The number of periods to predict per model n is controlled by the expert setting 'time_series_leaderboard_periods_per_model', which defaults to 1." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting time_series_leaderboard_mode", + "output": "time series leaderboard mode refers to 'diverse': explore a diverse set of models built using various expert settings. Note that it's possible to rerun another such diverse leaderboard on top of the best-performing model(s), which will effectively help you compose these expert settings.'sliding_window': If the forecast horizon is N periods, create a separate model for each of the (gap, horizon) pairs of (0,n), (n,n), (2*n,n), ..., (2*N-1, n) in units of time periods.The number of periods to predict per model n is controlled by the expert setting 'time_series_leaderboard_periods_per_model', which defaults to 1." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting time_series_leaderboard_mode", + "output": "time series leaderboard mode refers to Control the automatic time-series leaderboard mode: 'diverse': explore a diverse set of models built using various expert settings. Note that it's possible to rerun another such diverse leaderboard on top of the best-performing model(s), which will effectively help you compose these expert settings.'sliding_window': If the forecast horizon is N periods, create a separate model for each of the (gap, horizon) pairs of (0,n), (n,n), (2*n,n), ..., (2*N-1, n) in units of time periods.The number of periods to predict per model n is controlled by the expert setting 'time_series_leaderboard_periods_per_model', which defaults to 1." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "time_series_leaderboard_periods_per_model", + "output": "time series leaderboard periods per model refers to Fine-control to limit the number of models built in the 'sliding_window' mode. Larger values lead to fewer models." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "time_series_leaderboard_periods_per_model", + "output": "time series leaderboard periods per model refers to Number of periods per model if time_series_leaderboard_mode is 'sliding_window'.: Fine-control to limit the number of models built in the 'sliding_window' mode. Larger values lead to fewer models." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "time series leaderboard periods per model", + "output": "time series leaderboard periods per model refers to Number of periods per model if time_series_leaderboard_mode is 'sliding_window'.: Fine-control to limit the number of models built in the 'sliding_window' mode. Larger values lead to fewer models." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Number of periods per model if time_series_leaderboard_mode is 'sliding_window'.: ", + "output": "time series leaderboard periods per model refers to Number of periods per model if time_series_leaderboard_mode is 'sliding_window'.: Fine-control to limit the number of models built in the 'sliding_window' mode. Larger values lead to fewer models." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting time_series_leaderboard_periods_per_model", + "output": "time series leaderboard periods per model refers to Fine-control to limit the number of models built in the 'sliding_window' mode. Larger values lead to fewer models." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting time_series_leaderboard_periods_per_model", + "output": "time series leaderboard periods per model refers to Number of periods per model if time_series_leaderboard_mode is 'sliding_window'.: Fine-control to limit the number of models built in the 'sliding_window' mode. Larger values lead to fewer models." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "time_series_merge_splits", + "output": "time series merge splits refers to Whether to create larger validation splits that are not bound to the length of the forecast horizon." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "time_series_merge_splits", + "output": "time series merge splits refers to Larger validation splits for lag-based recipe: Whether to create larger validation splits that are not bound to the length of the forecast horizon." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "time series merge splits", + "output": "time series merge splits refers to Larger validation splits for lag-based recipe: Whether to create larger validation splits that are not bound to the length of the forecast horizon." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Larger validation splits for lag-based recipe: ", + "output": "time series merge splits refers to Larger validation splits for lag-based recipe: Whether to create larger validation splits that are not bound to the length of the forecast horizon." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting time_series_merge_splits", + "output": "time series merge splits refers to Whether to create larger validation splits that are not bound to the length of the forecast horizon." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting time_series_merge_splits", + "output": "time series merge splits refers to Larger validation splits for lag-based recipe: Whether to create larger validation splits that are not bound to the length of the forecast horizon." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "merge_splits_max_valid_ratio", + "output": "merge splits max valid ratio refers to Maximum ratio of training data samples used for validation across splits when larger validation splits are created." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "merge_splits_max_valid_ratio", + "output": "merge splits max valid ratio refers to Maximum ratio of training data samples used for validation (-1 = auto): Maximum ratio of training data samples used for validation across splits when larger validation splits are created." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "merge splits max valid ratio", + "output": "merge splits max valid ratio refers to Maximum ratio of training data samples used for validation (-1 = auto): Maximum ratio of training data samples used for validation across splits when larger validation splits are created." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Maximum ratio of training data samples used for validation (-1 = auto): ", + "output": "merge splits max valid ratio refers to Maximum ratio of training data samples used for validation (-1 = auto): Maximum ratio of training data samples used for validation across splits when larger validation splits are created." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting merge_splits_max_valid_ratio", + "output": "merge splits max valid ratio refers to Maximum ratio of training data samples used for validation across splits when larger validation splits are created." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting merge_splits_max_valid_ratio", + "output": "merge splits max valid ratio refers to Maximum ratio of training data samples used for validation (-1 = auto): Maximum ratio of training data samples used for validation across splits when larger validation splits are created." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "fixed_size_train_timespan", + "output": "fixed size train timespan refers to Whether to keep a fixed-size train timespan across time-based splits. That leads to roughly the same amount of train samples in every split. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "fixed_size_train_timespan", + "output": "fixed size train timespan refers to Fixed-size train timespan across splits: Whether to keep a fixed-size train timespan across time-based splits. That leads to roughly the same amount of train samples in every split. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "fixed size train timespan", + "output": "fixed size train timespan refers to Fixed-size train timespan across splits: Whether to keep a fixed-size train timespan across time-based splits. That leads to roughly the same amount of train samples in every split. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Fixed-size train timespan across splits: ", + "output": "fixed size train timespan refers to Fixed-size train timespan across splits: Whether to keep a fixed-size train timespan across time-based splits. That leads to roughly the same amount of train samples in every split. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting fixed_size_train_timespan", + "output": "fixed size train timespan refers to Whether to keep a fixed-size train timespan across time-based splits. That leads to roughly the same amount of train samples in every split. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting fixed_size_train_timespan", + "output": "fixed size train timespan refers to Fixed-size train timespan across splits: Whether to keep a fixed-size train timespan across time-based splits. That leads to roughly the same amount of train samples in every split. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "time_series_validation_fold_split_datetime_boundaries", + "output": "time series validation fold split datetime boundaries refers to Provide date or datetime timestamps (in same format as the time column) for custom training and validation splits like this: \"tr_start1, tr_end1, va_start1, va_end1, ..., tr_startN, tr_endN, va_startN, va_endN\"" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "time_series_validation_fold_split_datetime_boundaries", + "output": "time series validation fold split datetime boundaries refers to Custom validation splits for time-series experiments: Provide date or datetime timestamps (in same format as the time column) for custom training and validation splits like this: \"tr_start1, tr_end1, va_start1, va_end1, ..., tr_startN, tr_endN, va_startN, va_endN\"" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "time series validation fold split datetime boundaries", + "output": "time series validation fold split datetime boundaries refers to Custom validation splits for time-series experiments: Provide date or datetime timestamps (in same format as the time column) for custom training and validation splits like this: \"tr_start1, tr_end1, va_start1, va_end1, ..., tr_startN, tr_endN, va_startN, va_endN\"" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Custom validation splits for time-series experiments: ", + "output": "time series validation fold split datetime boundaries refers to Custom validation splits for time-series experiments: Provide date or datetime timestamps (in same format as the time column) for custom training and validation splits like this: \"tr_start1, tr_end1, va_start1, va_end1, ..., tr_startN, tr_endN, va_startN, va_endN\"" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting time_series_validation_fold_split_datetime_boundaries", + "output": "time series validation fold split datetime boundaries refers to Provide date or datetime timestamps (in same format as the time column) for custom training and validation splits like this: \"tr_start1, tr_end1, va_start1, va_end1, ..., tr_startN, tr_endN, va_startN, va_endN\"" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting time_series_validation_fold_split_datetime_boundaries", + "output": "time series validation fold split datetime boundaries refers to Custom validation splits for time-series experiments: Provide date or datetime timestamps (in same format as the time column) for custom training and validation splits like this: \"tr_start1, tr_end1, va_start1, va_end1, ..., tr_startN, tr_endN, va_startN, va_endN\"" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "time_series_validation_splits", + "output": "time series validation splits refers to Set fixed number of time-based splits for internal model validation (actual number of splits allowed can be less and is determined at experiment run-time)." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "time_series_validation_splits", + "output": "time series validation splits refers to Number of time-based splits for internal model validation (-1 = auto): Set fixed number of time-based splits for internal model validation (actual number of splits allowed can be less and is determined at experiment run-time)." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "time series validation splits", + "output": "time series validation splits refers to Number of time-based splits for internal model validation (-1 = auto): Set fixed number of time-based splits for internal model validation (actual number of splits allowed can be less and is determined at experiment run-time)." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Number of time-based splits for internal model validation (-1 = auto): ", + "output": "time series validation splits refers to Number of time-based splits for internal model validation (-1 = auto): Set fixed number of time-based splits for internal model validation (actual number of splits allowed can be less and is determined at experiment run-time)." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting time_series_validation_splits", + "output": "time series validation splits refers to Set fixed number of time-based splits for internal model validation (actual number of splits allowed can be less and is determined at experiment run-time)." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting time_series_validation_splits", + "output": "time series validation splits refers to Number of time-based splits for internal model validation (-1 = auto): Set fixed number of time-based splits for internal model validation (actual number of splits allowed can be less and is determined at experiment run-time)." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "time_series_splits_max_overlap", + "output": "time series splits max overlap refers to Maximum overlap between two time-based splits. Higher values increase the amount of possible splits." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "time_series_splits_max_overlap", + "output": "time series splits max overlap refers to Maximum overlap between two time-based splits.: Maximum overlap between two time-based splits. Higher values increase the amount of possible splits." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "time series splits max overlap", + "output": "time series splits max overlap refers to Maximum overlap between two time-based splits.: Maximum overlap between two time-based splits. Higher values increase the amount of possible splits." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Maximum overlap between two time-based splits.: ", + "output": "time series splits max overlap refers to Maximum overlap between two time-based splits.: Maximum overlap between two time-based splits. Higher values increase the amount of possible splits." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting time_series_splits_max_overlap", + "output": "time series splits max overlap refers to Maximum overlap between two time-based splits. Higher values increase the amount of possible splits." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting time_series_splits_max_overlap", + "output": "time series splits max overlap refers to Maximum overlap between two time-based splits.: Maximum overlap between two time-based splits. Higher values increase the amount of possible splits." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "min_ymd_timestamp", + "output": "min ymd timestamp refers to Earliest allowed datetime (in %Y%m%d format) for which to allow automatic conversion of integers to a time column during parsing. For example, 2010 or 201004 or 20100402 or 201004022312 can be converted to a valid date/datetime, but 1000 or 100004 or 10000402 or 10004022313 can not, and neither can 201000 or 20100500 etc." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "min_ymd_timestamp", + "output": "min ymd timestamp refers to Earliest allowed datetime (in %Y%m%d format) for which to allow automatic conversion of integers to a time column during parsing. For example, 2010 or 201004 or 20100402 or 201004022312 can be converted to a valid date/datetime, but 1000 or 100004 or 10000402 or 10004022313 can not, and neither can 201000 or 20100500 etc." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "min ymd timestamp", + "output": "min ymd timestamp refers to Earliest allowed datetime (in %Y%m%d format) for which to allow automatic conversion of integers to a time column during parsing. For example, 2010 or 201004 or 20100402 or 201004022312 can be converted to a valid date/datetime, but 1000 or 100004 or 10000402 or 10004022313 can not, and neither can 201000 or 20100500 etc." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "min ymd timestamp refers to Earliest allowed datetime (in %Y%m%d format) for which to allow automatic conversion of integers to a time column during parsing. For example, 2010 or 201004 or 20100402 or 201004022312 can be converted to a valid date/datetime, but 1000 or 100004 or 10000402 or 10004022313 can not, and neither can 201000 or 20100500 etc." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting min_ymd_timestamp", + "output": "min ymd timestamp refers to Earliest allowed datetime (in %Y%m%d format) for which to allow automatic conversion of integers to a time column during parsing. For example, 2010 or 201004 or 20100402 or 201004022312 can be converted to a valid date/datetime, but 1000 or 100004 or 10000402 or 10004022313 can not, and neither can 201000 or 20100500 etc." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting min_ymd_timestamp", + "output": "min ymd timestamp refers to Earliest allowed datetime (in %Y%m%d format) for which to allow automatic conversion of integers to a time column during parsing. For example, 2010 or 201004 or 20100402 or 201004022312 can be converted to a valid date/datetime, but 1000 or 100004 or 10000402 or 10004022313 can not, and neither can 201000 or 20100500 etc." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max_ymd_timestamp", + "output": "max ymd timestamp refers to Latest allowed datetime (in %Y%m%d format) for which to allow automatic conversion of integers to a time column during parsing. For example, 2010 or 201004 or 20100402 can be converted to a valid date/datetime, but 3000 or 300004 or 30000402 or 30004022313 can not, and neither can 201000 or 20100500 etc." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max_ymd_timestamp", + "output": "max ymd timestamp refers to Latest allowed datetime (in %Y%m%d format) for which to allow automatic conversion of integers to a time column during parsing. For example, 2010 or 201004 or 20100402 can be converted to a valid date/datetime, but 3000 or 300004 or 30000402 or 30004022313 can not, and neither can 201000 or 20100500 etc." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max ymd timestamp", + "output": "max ymd timestamp refers to Latest allowed datetime (in %Y%m%d format) for which to allow automatic conversion of integers to a time column during parsing. For example, 2010 or 201004 or 20100402 can be converted to a valid date/datetime, but 3000 or 300004 or 30000402 or 30004022313 can not, and neither can 201000 or 20100500 etc." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "max ymd timestamp refers to Latest allowed datetime (in %Y%m%d format) for which to allow automatic conversion of integers to a time column during parsing. For example, 2010 or 201004 or 20100402 can be converted to a valid date/datetime, but 3000 or 300004 or 30000402 or 30004022313 can not, and neither can 201000 or 20100500 etc." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting max_ymd_timestamp", + "output": "max ymd timestamp refers to Latest allowed datetime (in %Y%m%d format) for which to allow automatic conversion of integers to a time column during parsing. For example, 2010 or 201004 or 20100402 can be converted to a valid date/datetime, but 3000 or 300004 or 30000402 or 30004022313 can not, and neither can 201000 or 20100500 etc." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting max_ymd_timestamp", + "output": "max ymd timestamp refers to Latest allowed datetime (in %Y%m%d format) for which to allow automatic conversion of integers to a time column during parsing. For example, 2010 or 201004 or 20100402 can be converted to a valid date/datetime, but 3000 or 300004 or 30000402 or 30004022313 can not, and neither can 201000 or 20100500 etc." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max_rows_datetime_format_detection", + "output": "max rows datetime format detection refers to maximum number of data samples (randomly selected rows) for date/datetime format detection" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max_rows_datetime_format_detection", + "output": "max rows datetime format detection refers to maximum number of data samples (randomly selected rows) for date/datetime format detection" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max rows datetime format detection", + "output": "max rows datetime format detection refers to maximum number of data samples (randomly selected rows) for date/datetime format detection" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "max rows datetime format detection refers to maximum number of data samples (randomly selected rows) for date/datetime format detection" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting max_rows_datetime_format_detection", + "output": "max rows datetime format detection refers to maximum number of data samples (randomly selected rows) for date/datetime format detection" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting max_rows_datetime_format_detection", + "output": "max rows datetime format detection refers to maximum number of data samples (randomly selected rows) for date/datetime format detection" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "disallowed_datetime_formats", + "output": "disallowed datetime formats refers to Manually disables certain datetime formats during data ingest and experiments. For example, ['%y'] will avoid parsing columns that contain '00', '01', '02' string values as a date column. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "disallowed_datetime_formats", + "output": "disallowed datetime formats refers to List of disallowed datetime formats.: Manually disables certain datetime formats during data ingest and experiments. For example, ['%y'] will avoid parsing columns that contain '00', '01', '02' string values as a date column. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "disallowed datetime formats", + "output": "disallowed datetime formats refers to List of disallowed datetime formats.: Manually disables certain datetime formats during data ingest and experiments. For example, ['%y'] will avoid parsing columns that contain '00', '01', '02' string values as a date column. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "List of disallowed datetime formats.: ", + "output": "disallowed datetime formats refers to List of disallowed datetime formats.: Manually disables certain datetime formats during data ingest and experiments. For example, ['%y'] will avoid parsing columns that contain '00', '01', '02' string values as a date column. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting disallowed_datetime_formats", + "output": "disallowed datetime formats refers to Manually disables certain datetime formats during data ingest and experiments. For example, ['%y'] will avoid parsing columns that contain '00', '01', '02' string values as a date column. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting disallowed_datetime_formats", + "output": "disallowed datetime formats refers to List of disallowed datetime formats.: Manually disables certain datetime formats during data ingest and experiments. For example, ['%y'] will avoid parsing columns that contain '00', '01', '02' string values as a date column. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "use_datetime_cache", + "output": "use datetime cache refers to Whether to use datetime cache" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "use_datetime_cache", + "output": "use datetime cache refers to Whether to use datetime cache" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "use datetime cache", + "output": "use datetime cache refers to Whether to use datetime cache" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "use datetime cache refers to Whether to use datetime cache" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting use_datetime_cache", + "output": "use datetime cache refers to Whether to use datetime cache" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting use_datetime_cache", + "output": "use datetime cache refers to Whether to use datetime cache" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "datetime_cache_min_rows", + "output": "datetime cache min rows refers to Minimum amount of rows required to utilize datetime cache" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "datetime_cache_min_rows", + "output": "datetime cache min rows refers to Minimum amount of rows required to utilize datetime cache" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "datetime cache min rows", + "output": "datetime cache min rows refers to Minimum amount of rows required to utilize datetime cache" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "datetime cache min rows refers to Minimum amount of rows required to utilize datetime cache" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting datetime_cache_min_rows", + "output": "datetime cache min rows refers to Minimum amount of rows required to utilize datetime cache" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting datetime_cache_min_rows", + "output": "datetime cache min rows refers to Minimum amount of rows required to utilize datetime cache" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "holiday_features", + "output": "holiday features refers to Automatically generate is-holiday features from date columns" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "holiday_features", + "output": "holiday features refers to Generate holiday features: Automatically generate is-holiday features from date columns" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "holiday features", + "output": "holiday features refers to Generate holiday features: Automatically generate is-holiday features from date columns" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Generate holiday features: ", + "output": "holiday features refers to Generate holiday features: Automatically generate is-holiday features from date columns" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting holiday_features", + "output": "holiday features refers to Automatically generate is-holiday features from date columns" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting holiday_features", + "output": "holiday features refers to Generate holiday features: Automatically generate is-holiday features from date columns" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "holiday_countries", + "output": "holiday countries refers to List of countries for which to look up holiday calendar and to generate is-Holiday features for" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "holiday_countries", + "output": "holiday countries refers to Country code(s) for holiday features: List of countries for which to look up holiday calendar and to generate is-Holiday features for" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "holiday countries", + "output": "holiday countries refers to Country code(s) for holiday features: List of countries for which to look up holiday calendar and to generate is-Holiday features for" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Country code(s) for holiday features: ", + "output": "holiday countries refers to Country code(s) for holiday features: List of countries for which to look up holiday calendar and to generate is-Holiday features for" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting holiday_countries", + "output": "holiday countries refers to List of countries for which to look up holiday calendar and to generate is-Holiday features for" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting holiday_countries", + "output": "holiday countries refers to Country code(s) for holiday features: List of countries for which to look up holiday calendar and to generate is-Holiday features for" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max_time_series_properties_sample_size", + "output": "max time series properties sample size refers to Max. sample size for automatic determination of time series train/valid split properties, only if time column is selected" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max_time_series_properties_sample_size", + "output": "max time series properties sample size refers to Max. sample size for automatic determination of time series train/valid split properties, only if time column is selected" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max time series properties sample size", + "output": "max time series properties sample size refers to Max. sample size for automatic determination of time series train/valid split properties, only if time column is selected" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "max time series properties sample size refers to Max. sample size for automatic determination of time series train/valid split properties, only if time column is selected" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting max_time_series_properties_sample_size", + "output": "max time series properties sample size refers to Max. sample size for automatic determination of time series train/valid split properties, only if time column is selected" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting max_time_series_properties_sample_size", + "output": "max time series properties sample size refers to Max. sample size for automatic determination of time series train/valid split properties, only if time column is selected" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max_lag_sizes", + "output": "max lag sizes refers to Maximum number of lag sizes to use for lags-based time-series experiments. are sampled from if sample_lag_sizes==True, else all are taken (-1 == automatic)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max_lag_sizes", + "output": "max lag sizes refers to Maximum number of lag sizes to use for lags-based time-series experiments. are sampled from if sample_lag_sizes==True, else all are taken (-1 == automatic)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max lag sizes", + "output": "max lag sizes refers to Maximum number of lag sizes to use for lags-based time-series experiments. are sampled from if sample_lag_sizes==True, else all are taken (-1 == automatic)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "max lag sizes refers to Maximum number of lag sizes to use for lags-based time-series experiments. are sampled from if sample_lag_sizes==True, else all are taken (-1 == automatic)" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting max_lag_sizes", + "output": "max lag sizes refers to Maximum number of lag sizes to use for lags-based time-series experiments. are sampled from if sample_lag_sizes==True, else all are taken (-1 == automatic)" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting max_lag_sizes", + "output": "max lag sizes refers to Maximum number of lag sizes to use for lags-based time-series experiments. are sampled from if sample_lag_sizes==True, else all are taken (-1 == automatic)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "min_lag_autocorrelation", + "output": "min lag autocorrelation refers to Minimum required autocorrelation threshold for a lag to be considered for feature engineering" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "min_lag_autocorrelation", + "output": "min lag autocorrelation refers to Minimum required autocorrelation threshold for a lag to be considered for feature engineering" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "min lag autocorrelation", + "output": "min lag autocorrelation refers to Minimum required autocorrelation threshold for a lag to be considered for feature engineering" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "min lag autocorrelation refers to Minimum required autocorrelation threshold for a lag to be considered for feature engineering" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting min_lag_autocorrelation", + "output": "min lag autocorrelation refers to Minimum required autocorrelation threshold for a lag to be considered for feature engineering" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting min_lag_autocorrelation", + "output": "min lag autocorrelation refers to Minimum required autocorrelation threshold for a lag to be considered for feature engineering" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max_signal_lag_sizes", + "output": "max signal lag sizes refers to How many samples of lag sizes to use for a single time group (single time series signal)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max_signal_lag_sizes", + "output": "max signal lag sizes refers to How many samples of lag sizes to use for a single time group (single time series signal)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max signal lag sizes", + "output": "max signal lag sizes refers to How many samples of lag sizes to use for a single time group (single time series signal)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "max signal lag sizes refers to How many samples of lag sizes to use for a single time group (single time series signal)" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting max_signal_lag_sizes", + "output": "max signal lag sizes refers to How many samples of lag sizes to use for a single time group (single time series signal)" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting max_signal_lag_sizes", + "output": "max signal lag sizes refers to How many samples of lag sizes to use for a single time group (single time series signal)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "sample_lag_sizes", + "output": "sample lag sizes refers to If enabled, sample from a set of possible lag sizes (e.g., lags=[1, 4, 8]) for each lag-based transformer, to no more than max_sampled_lag_sizes lags. Can help reduce overall model complexity and size, esp. when many unavailable columns for prediction." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "sample_lag_sizes", + "output": "sample lag sizes refers to Whether to sample lag sizes: If enabled, sample from a set of possible lag sizes (e.g., lags=[1, 4, 8]) for each lag-based transformer, to no more than max_sampled_lag_sizes lags. Can help reduce overall model complexity and size, esp. when many unavailable columns for prediction." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "sample lag sizes", + "output": "sample lag sizes refers to Whether to sample lag sizes: If enabled, sample from a set of possible lag sizes (e.g., lags=[1, 4, 8]) for each lag-based transformer, to no more than max_sampled_lag_sizes lags. Can help reduce overall model complexity and size, esp. when many unavailable columns for prediction." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Whether to sample lag sizes: ", + "output": "sample lag sizes refers to Whether to sample lag sizes: If enabled, sample from a set of possible lag sizes (e.g., lags=[1, 4, 8]) for each lag-based transformer, to no more than max_sampled_lag_sizes lags. Can help reduce overall model complexity and size, esp. when many unavailable columns for prediction." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting sample_lag_sizes", + "output": "sample lag sizes refers to If enabled, sample from a set of possible lag sizes (e.g., lags=[1, 4, 8]) for each lag-based transformer, to no more than max_sampled_lag_sizes lags. Can help reduce overall model complexity and size, esp. when many unavailable columns for prediction." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting sample_lag_sizes", + "output": "sample lag sizes refers to Whether to sample lag sizes: If enabled, sample from a set of possible lag sizes (e.g., lags=[1, 4, 8]) for each lag-based transformer, to no more than max_sampled_lag_sizes lags. Can help reduce overall model complexity and size, esp. when many unavailable columns for prediction." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max_sampled_lag_sizes", + "output": "max sampled lag sizes refers to If sample_lag_sizes is enabled, sample from a set of possible lag sizes (e.g., lags=[1, 4, 8]) for each lag-based transformer, to no more than max_sampled_lag_sizes lags. Can help reduce overall model complexity and size. Defaults to -1 (auto), in which case it's the same as the feature interaction depth controlled by max_feature_interaction_depth." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max_sampled_lag_sizes", + "output": "max sampled lag sizes refers to Number of sampled lag sizes. -1 for auto.: If sample_lag_sizes is enabled, sample from a set of possible lag sizes (e.g., lags=[1, 4, 8]) for each lag-based transformer, to no more than max_sampled_lag_sizes lags. Can help reduce overall model complexity and size. Defaults to -1 (auto), in which case it's the same as the feature interaction depth controlled by max_feature_interaction_depth." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max sampled lag sizes", + "output": "max sampled lag sizes refers to Number of sampled lag sizes. -1 for auto.: If sample_lag_sizes is enabled, sample from a set of possible lag sizes (e.g., lags=[1, 4, 8]) for each lag-based transformer, to no more than max_sampled_lag_sizes lags. Can help reduce overall model complexity and size. Defaults to -1 (auto), in which case it's the same as the feature interaction depth controlled by max_feature_interaction_depth." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Number of sampled lag sizes. -1 for auto.: ", + "output": "max sampled lag sizes refers to Number of sampled lag sizes. -1 for auto.: If sample_lag_sizes is enabled, sample from a set of possible lag sizes (e.g., lags=[1, 4, 8]) for each lag-based transformer, to no more than max_sampled_lag_sizes lags. Can help reduce overall model complexity and size. Defaults to -1 (auto), in which case it's the same as the feature interaction depth controlled by max_feature_interaction_depth." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting max_sampled_lag_sizes", + "output": "max sampled lag sizes refers to If sample_lag_sizes is enabled, sample from a set of possible lag sizes (e.g., lags=[1, 4, 8]) for each lag-based transformer, to no more than max_sampled_lag_sizes lags. Can help reduce overall model complexity and size. Defaults to -1 (auto), in which case it's the same as the feature interaction depth controlled by max_feature_interaction_depth." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting max_sampled_lag_sizes", + "output": "max sampled lag sizes refers to Number of sampled lag sizes. -1 for auto.: If sample_lag_sizes is enabled, sample from a set of possible lag sizes (e.g., lags=[1, 4, 8]) for each lag-based transformer, to no more than max_sampled_lag_sizes lags. Can help reduce overall model complexity and size. Defaults to -1 (auto), in which case it's the same as the feature interaction depth controlled by max_feature_interaction_depth." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "override_lag_sizes", + "output": "override lag sizes refers to Override lags to be usede.g. [7, 14, 21] # this exact liste.g. 21 # produce from 1 to 21e.g. 21:3 produce from 1 to 21 in step of 3e.g. 5-21 produce from 5 to 21e.g. 5-21:3 produce from 5 to 21 in step of 3 " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "override_lag_sizes", + "output": "override lag sizes refers to Time-series lags override, e.g. [7, 14, 21]: Override lags to be usede.g. [7, 14, 21] # this exact liste.g. 21 # produce from 1 to 21e.g. 21:3 produce from 1 to 21 in step of 3e.g. 5-21 produce from 5 to 21e.g. 5-21:3 produce from 5 to 21 in step of 3 " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "override lag sizes", + "output": "override lag sizes refers to Time-series lags override, e.g. [7, 14, 21]: Override lags to be usede.g. [7, 14, 21] # this exact liste.g. 21 # produce from 1 to 21e.g. 21:3 produce from 1 to 21 in step of 3e.g. 5-21 produce from 5 to 21e.g. 5-21:3 produce from 5 to 21 in step of 3 " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Time-series lags override, e.g. [7, 14, 21]: ", + "output": "override lag sizes refers to Time-series lags override, e.g. [7, 14, 21]: Override lags to be usede.g. [7, 14, 21] # this exact liste.g. 21 # produce from 1 to 21e.g. 21:3 produce from 1 to 21 in step of 3e.g. 5-21 produce from 5 to 21e.g. 5-21:3 produce from 5 to 21 in step of 3 " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting override_lag_sizes", + "output": "override lag sizes refers to Override lags to be usede.g. [7, 14, 21] # this exact liste.g. 21 # produce from 1 to 21e.g. 21:3 produce from 1 to 21 in step of 3e.g. 5-21 produce from 5 to 21e.g. 5-21:3 produce from 5 to 21 in step of 3 " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting override_lag_sizes", + "output": "override lag sizes refers to Time-series lags override, e.g. [7, 14, 21]: Override lags to be usede.g. [7, 14, 21] # this exact liste.g. 21 # produce from 1 to 21e.g. 21:3 produce from 1 to 21 in step of 3e.g. 5-21 produce from 5 to 21e.g. 5-21:3 produce from 5 to 21 in step of 3 " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "override_ufapt_lag_sizes", + "output": "override ufapt lag sizes refers to Override lags to be used for features that are not known ahead of timee.g. [7, 14, 21] # this exact liste.g. 21 # produce from 1 to 21e.g. 21:3 produce from 1 to 21 in step of 3e.g. 5-21 produce from 5 to 21e.g. 5-21:3 produce from 5 to 21 in step of 3 " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "override_ufapt_lag_sizes", + "output": "override ufapt lag sizes refers to Lags override for features that are not known ahead of time: Override lags to be used for features that are not known ahead of timee.g. [7, 14, 21] # this exact liste.g. 21 # produce from 1 to 21e.g. 21:3 produce from 1 to 21 in step of 3e.g. 5-21 produce from 5 to 21e.g. 5-21:3 produce from 5 to 21 in step of 3 " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "override ufapt lag sizes", + "output": "override ufapt lag sizes refers to Lags override for features that are not known ahead of time: Override lags to be used for features that are not known ahead of timee.g. [7, 14, 21] # this exact liste.g. 21 # produce from 1 to 21e.g. 21:3 produce from 1 to 21 in step of 3e.g. 5-21 produce from 5 to 21e.g. 5-21:3 produce from 5 to 21 in step of 3 " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Lags override for features that are not known ahead of time: ", + "output": "override ufapt lag sizes refers to Lags override for features that are not known ahead of time: Override lags to be used for features that are not known ahead of timee.g. [7, 14, 21] # this exact liste.g. 21 # produce from 1 to 21e.g. 21:3 produce from 1 to 21 in step of 3e.g. 5-21 produce from 5 to 21e.g. 5-21:3 produce from 5 to 21 in step of 3 " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting override_ufapt_lag_sizes", + "output": "override ufapt lag sizes refers to Override lags to be used for features that are not known ahead of timee.g. [7, 14, 21] # this exact liste.g. 21 # produce from 1 to 21e.g. 21:3 produce from 1 to 21 in step of 3e.g. 5-21 produce from 5 to 21e.g. 5-21:3 produce from 5 to 21 in step of 3 " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting override_ufapt_lag_sizes", + "output": "override ufapt lag sizes refers to Lags override for features that are not known ahead of time: Override lags to be used for features that are not known ahead of timee.g. [7, 14, 21] # this exact liste.g. 21 # produce from 1 to 21e.g. 21:3 produce from 1 to 21 in step of 3e.g. 5-21 produce from 5 to 21e.g. 5-21:3 produce from 5 to 21 in step of 3 " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "override_non_ufapt_lag_sizes", + "output": "override non ufapt lag sizes refers to Override lags to be used for features that are known ahead of timee.g. [7, 14, 21] # this exact liste.g. 21 # produce from 1 to 21e.g. 21:3 produce from 1 to 21 in step of 3e.g. 5-21 produce from 5 to 21e.g. 5-21:3 produce from 5 to 21 in step of 3 " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "override_non_ufapt_lag_sizes", + "output": "override non ufapt lag sizes refers to Lags override for features that are known ahead of time: Override lags to be used for features that are known ahead of timee.g. [7, 14, 21] # this exact liste.g. 21 # produce from 1 to 21e.g. 21:3 produce from 1 to 21 in step of 3e.g. 5-21 produce from 5 to 21e.g. 5-21:3 produce from 5 to 21 in step of 3 " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "override non ufapt lag sizes", + "output": "override non ufapt lag sizes refers to Lags override for features that are known ahead of time: Override lags to be used for features that are known ahead of timee.g. [7, 14, 21] # this exact liste.g. 21 # produce from 1 to 21e.g. 21:3 produce from 1 to 21 in step of 3e.g. 5-21 produce from 5 to 21e.g. 5-21:3 produce from 5 to 21 in step of 3 " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Lags override for features that are known ahead of time: ", + "output": "override non ufapt lag sizes refers to Lags override for features that are known ahead of time: Override lags to be used for features that are known ahead of timee.g. [7, 14, 21] # this exact liste.g. 21 # produce from 1 to 21e.g. 21:3 produce from 1 to 21 in step of 3e.g. 5-21 produce from 5 to 21e.g. 5-21:3 produce from 5 to 21 in step of 3 " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting override_non_ufapt_lag_sizes", + "output": "override non ufapt lag sizes refers to Override lags to be used for features that are known ahead of timee.g. [7, 14, 21] # this exact liste.g. 21 # produce from 1 to 21e.g. 21:3 produce from 1 to 21 in step of 3e.g. 5-21 produce from 5 to 21e.g. 5-21:3 produce from 5 to 21 in step of 3 " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting override_non_ufapt_lag_sizes", + "output": "override non ufapt lag sizes refers to Lags override for features that are known ahead of time: Override lags to be used for features that are known ahead of timee.g. [7, 14, 21] # this exact liste.g. 21 # produce from 1 to 21e.g. 21:3 produce from 1 to 21 in step of 3e.g. 5-21 produce from 5 to 21e.g. 5-21:3 produce from 5 to 21 in step of 3 " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "min_lag_size", + "output": "min lag size refers to Smallest considered lag size" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "min_lag_size", + "output": "min lag size refers to Smallest considered lag size (-1 = auto): Smallest considered lag size" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "min lag size", + "output": "min lag size refers to Smallest considered lag size (-1 = auto): Smallest considered lag size" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Smallest considered lag size (-1 = auto): ", + "output": "min lag size refers to Smallest considered lag size (-1 = auto): Smallest considered lag size" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting min_lag_size", + "output": "min lag size refers to Smallest considered lag size" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting min_lag_size", + "output": "min lag size refers to Smallest considered lag size (-1 = auto): Smallest considered lag size" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "allow_time_column_as_feature", + "output": "allow time column as feature refers to Whether to enable feature engineering based on selected time column, e.g. Date~weekday." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "allow_time_column_as_feature", + "output": "allow time column as feature refers to Enable feature engineering from time column: Whether to enable feature engineering based on selected time column, e.g. Date~weekday." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "allow time column as feature", + "output": "allow time column as feature refers to Enable feature engineering from time column: Whether to enable feature engineering based on selected time column, e.g. Date~weekday." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Enable feature engineering from time column: ", + "output": "allow time column as feature refers to Enable feature engineering from time column: Whether to enable feature engineering based on selected time column, e.g. Date~weekday." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting allow_time_column_as_feature", + "output": "allow time column as feature refers to Whether to enable feature engineering based on selected time column, e.g. Date~weekday." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting allow_time_column_as_feature", + "output": "allow time column as feature refers to Enable feature engineering from time column: Whether to enable feature engineering based on selected time column, e.g. Date~weekday." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "allow_time_column_as_numeric_feature", + "output": "allow time column as numeric feature refers to Whether to enable integer time column to be used as a numeric feature.If using time series recipe, using time column (numeric time stamps) as input features can lead to model thatmemorizes the actual time stamps instead of features that generalize to the future. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "allow_time_column_as_numeric_feature", + "output": "allow time column as numeric feature refers to Allow integer time column as numeric feature: Whether to enable integer time column to be used as a numeric feature.If using time series recipe, using time column (numeric time stamps) as input features can lead to model thatmemorizes the actual time stamps instead of features that generalize to the future. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "allow time column as numeric feature", + "output": "allow time column as numeric feature refers to Allow integer time column as numeric feature: Whether to enable integer time column to be used as a numeric feature.If using time series recipe, using time column (numeric time stamps) as input features can lead to model thatmemorizes the actual time stamps instead of features that generalize to the future. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Allow integer time column as numeric feature: ", + "output": "allow time column as numeric feature refers to Allow integer time column as numeric feature: Whether to enable integer time column to be used as a numeric feature.If using time series recipe, using time column (numeric time stamps) as input features can lead to model thatmemorizes the actual time stamps instead of features that generalize to the future. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting allow_time_column_as_numeric_feature", + "output": "allow time column as numeric feature refers to Whether to enable integer time column to be used as a numeric feature.If using time series recipe, using time column (numeric time stamps) as input features can lead to model thatmemorizes the actual time stamps instead of features that generalize to the future. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting allow_time_column_as_numeric_feature", + "output": "allow time column as numeric feature refers to Allow integer time column as numeric feature: Whether to enable integer time column to be used as a numeric feature.If using time series recipe, using time column (numeric time stamps) as input features can lead to model thatmemorizes the actual time stamps instead of features that generalize to the future. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "datetime_funcs", + "output": "datetime funcs refers to Allowed date or date-time transformations. Date transformers include: year, quarter, month, week, weekday, day, dayofyear, num. Date transformers also include: hour, minute, second. Features in DAI will show up as get_ + transformation name. E.g. num is a direct numeric value representing the floating point value of time, which can lead to over-fitting if used on IID problems. So this is turned off by default." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "datetime_funcs", + "output": "datetime funcs refers to Allowed date and date-time transformations: Allowed date or date-time transformations. Date transformers include: year, quarter, month, week, weekday, day, dayofyear, num. Date transformers also include: hour, minute, second. Features in DAI will show up as get_ + transformation name. E.g. num is a direct numeric value representing the floating point value of time, which can lead to over-fitting if used on IID problems. So this is turned off by default." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "datetime funcs", + "output": "datetime funcs refers to Allowed date and date-time transformations: Allowed date or date-time transformations. Date transformers include: year, quarter, month, week, weekday, day, dayofyear, num. Date transformers also include: hour, minute, second. Features in DAI will show up as get_ + transformation name. E.g. num is a direct numeric value representing the floating point value of time, which can lead to over-fitting if used on IID problems. So this is turned off by default." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Allowed date and date-time transformations: ", + "output": "datetime funcs refers to Allowed date and date-time transformations: Allowed date or date-time transformations. Date transformers include: year, quarter, month, week, weekday, day, dayofyear, num. Date transformers also include: hour, minute, second. Features in DAI will show up as get_ + transformation name. E.g. num is a direct numeric value representing the floating point value of time, which can lead to over-fitting if used on IID problems. So this is turned off by default." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting datetime_funcs", + "output": "datetime funcs refers to Allowed date or date-time transformations. Date transformers include: year, quarter, month, week, weekday, day, dayofyear, num. Date transformers also include: hour, minute, second. Features in DAI will show up as get_ + transformation name. E.g. num is a direct numeric value representing the floating point value of time, which can lead to over-fitting if used on IID problems. So this is turned off by default." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting datetime_funcs", + "output": "datetime funcs refers to Allowed date and date-time transformations: Allowed date or date-time transformations. Date transformers include: year, quarter, month, week, weekday, day, dayofyear, num. Date transformers also include: hour, minute, second. Features in DAI will show up as get_ + transformation name. E.g. num is a direct numeric value representing the floating point value of time, which can lead to over-fitting if used on IID problems. So this is turned off by default." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "filter_datetime_funcs", + "output": "filter datetime funcs refers to Whether to filter out date and date-time transformations that lead to unseen values in the future. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "filter_datetime_funcs", + "output": "filter datetime funcs refers to Auto filtering of date and date-time transformations: Whether to filter out date and date-time transformations that lead to unseen values in the future. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "filter datetime funcs", + "output": "filter datetime funcs refers to Auto filtering of date and date-time transformations: Whether to filter out date and date-time transformations that lead to unseen values in the future. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Auto filtering of date and date-time transformations: ", + "output": "filter datetime funcs refers to Auto filtering of date and date-time transformations: Whether to filter out date and date-time transformations that lead to unseen values in the future. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting filter_datetime_funcs", + "output": "filter datetime funcs refers to Whether to filter out date and date-time transformations that lead to unseen values in the future. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting filter_datetime_funcs", + "output": "filter datetime funcs refers to Auto filtering of date and date-time transformations: Whether to filter out date and date-time transformations that lead to unseen values in the future. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "allow_tgc_as_features", + "output": "allow tgc as features refers to Whether to consider time groups columns (tgc) as standalone features. Note that 'time_column' is treated separately via 'Allow to engineer features from time column'. Note that tgc_allow_target_encoding independently controls if time column groups are target encoded. Use allowed_coltypes_for_tgc_as_features for control per feature type. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "allow_tgc_as_features", + "output": "allow tgc as features refers to Consider time groups columns as standalone features: Whether to consider time groups columns (tgc) as standalone features. Note that 'time_column' is treated separately via 'Allow to engineer features from time column'. Note that tgc_allow_target_encoding independently controls if time column groups are target encoded. Use allowed_coltypes_for_tgc_as_features for control per feature type. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "allow tgc as features", + "output": "allow tgc as features refers to Consider time groups columns as standalone features: Whether to consider time groups columns (tgc) as standalone features. Note that 'time_column' is treated separately via 'Allow to engineer features from time column'. Note that tgc_allow_target_encoding independently controls if time column groups are target encoded. Use allowed_coltypes_for_tgc_as_features for control per feature type. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Consider time groups columns as standalone features: ", + "output": "allow tgc as features refers to Consider time groups columns as standalone features: Whether to consider time groups columns (tgc) as standalone features. Note that 'time_column' is treated separately via 'Allow to engineer features from time column'. Note that tgc_allow_target_encoding independently controls if time column groups are target encoded. Use allowed_coltypes_for_tgc_as_features for control per feature type. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting allow_tgc_as_features", + "output": "allow tgc as features refers to Whether to consider time groups columns (tgc) as standalone features. Note that 'time_column' is treated separately via 'Allow to engineer features from time column'. Note that tgc_allow_target_encoding independently controls if time column groups are target encoded. Use allowed_coltypes_for_tgc_as_features for control per feature type. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting allow_tgc_as_features", + "output": "allow tgc as features refers to Consider time groups columns as standalone features: Whether to consider time groups columns (tgc) as standalone features. Note that 'time_column' is treated separately via 'Allow to engineer features from time column'. Note that tgc_allow_target_encoding independently controls if time column groups are target encoded. Use allowed_coltypes_for_tgc_as_features for control per feature type. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "allowed_coltypes_for_tgc_as_features", + "output": "allowed coltypes for tgc as features refers to Which time groups columns (tgc) feature types to consider as standalone features,if the corresponding flag \"Consider time groups columns as standalone features\" is set to true.E.g. all column types would be [\"numeric\", \"categorical\", \"ohe_categorical\", \"datetime\", \"date\", \"text\"]Note that 'time_column' is treated separately via 'Allow to engineer features from time column'.Note that if lag-based time series recipe is disabled, then all tgc are allowed features. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "allowed_coltypes_for_tgc_as_features", + "output": "allowed coltypes for tgc as features refers to Which tgc feature types to consider as standalone features: Which time groups columns (tgc) feature types to consider as standalone features,if the corresponding flag \"Consider time groups columns as standalone features\" is set to true.E.g. all column types would be [\"numeric\", \"categorical\", \"ohe_categorical\", \"datetime\", \"date\", \"text\"]Note that 'time_column' is treated separately via 'Allow to engineer features from time column'.Note that if lag-based time series recipe is disabled, then all tgc are allowed features. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "allowed coltypes for tgc as features", + "output": "allowed coltypes for tgc as features refers to Which tgc feature types to consider as standalone features: Which time groups columns (tgc) feature types to consider as standalone features,if the corresponding flag \"Consider time groups columns as standalone features\" is set to true.E.g. all column types would be [\"numeric\", \"categorical\", \"ohe_categorical\", \"datetime\", \"date\", \"text\"]Note that 'time_column' is treated separately via 'Allow to engineer features from time column'.Note that if lag-based time series recipe is disabled, then all tgc are allowed features. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Which tgc feature types to consider as standalone features: ", + "output": "allowed coltypes for tgc as features refers to Which tgc feature types to consider as standalone features: Which time groups columns (tgc) feature types to consider as standalone features,if the corresponding flag \"Consider time groups columns as standalone features\" is set to true.E.g. all column types would be [\"numeric\", \"categorical\", \"ohe_categorical\", \"datetime\", \"date\", \"text\"]Note that 'time_column' is treated separately via 'Allow to engineer features from time column'.Note that if lag-based time series recipe is disabled, then all tgc are allowed features. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting allowed_coltypes_for_tgc_as_features", + "output": "allowed coltypes for tgc as features refers to Which time groups columns (tgc) feature types to consider as standalone features,if the corresponding flag \"Consider time groups columns as standalone features\" is set to true.E.g. all column types would be [\"numeric\", \"categorical\", \"ohe_categorical\", \"datetime\", \"date\", \"text\"]Note that 'time_column' is treated separately via 'Allow to engineer features from time column'.Note that if lag-based time series recipe is disabled, then all tgc are allowed features. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting allowed_coltypes_for_tgc_as_features", + "output": "allowed coltypes for tgc as features refers to Which tgc feature types to consider as standalone features: Which time groups columns (tgc) feature types to consider as standalone features,if the corresponding flag \"Consider time groups columns as standalone features\" is set to true.E.g. all column types would be [\"numeric\", \"categorical\", \"ohe_categorical\", \"datetime\", \"date\", \"text\"]Note that 'time_column' is treated separately via 'Allow to engineer features from time column'.Note that if lag-based time series recipe is disabled, then all tgc are allowed features. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable_time_unaware_transformers", + "output": "enable time unaware transformers refers to Whether various transformers (clustering, truncated SVD) are enabled,that otherwise would be disabled for time series due topotential to overfit by leaking across time within the fit of each fold. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable_time_unaware_transformers", + "output": "enable time unaware transformers refers to Enable time unaware transformers: Whether various transformers (clustering, truncated SVD) are enabled,that otherwise would be disabled for time series due topotential to overfit by leaking across time within the fit of each fold. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable time unaware transformers", + "output": "enable time unaware transformers refers to Enable time unaware transformers: Whether various transformers (clustering, truncated SVD) are enabled,that otherwise would be disabled for time series due topotential to overfit by leaking across time within the fit of each fold. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Enable time unaware transformers: ", + "output": "enable time unaware transformers refers to Enable time unaware transformers: Whether various transformers (clustering, truncated SVD) are enabled,that otherwise would be disabled for time series due topotential to overfit by leaking across time within the fit of each fold. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting enable_time_unaware_transformers", + "output": "enable time unaware transformers refers to Whether various transformers (clustering, truncated SVD) are enabled,that otherwise would be disabled for time series due topotential to overfit by leaking across time within the fit of each fold. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting enable_time_unaware_transformers", + "output": "enable time unaware transformers refers to Enable time unaware transformers: Whether various transformers (clustering, truncated SVD) are enabled,that otherwise would be disabled for time series due topotential to overfit by leaking across time within the fit of each fold. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "tgc_only_use_all_groups", + "output": "tgc only use all groups refers to Whether to group by all time groups columns for creating lag features, instead of sampling from them" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "tgc_only_use_all_groups", + "output": "tgc only use all groups refers to Always group by all time groups columns for creating lag features: Whether to group by all time groups columns for creating lag features, instead of sampling from them" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "tgc only use all groups", + "output": "tgc only use all groups refers to Always group by all time groups columns for creating lag features: Whether to group by all time groups columns for creating lag features, instead of sampling from them" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Always group by all time groups columns for creating lag features: ", + "output": "tgc only use all groups refers to Always group by all time groups columns for creating lag features: Whether to group by all time groups columns for creating lag features, instead of sampling from them" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting tgc_only_use_all_groups", + "output": "tgc only use all groups refers to Whether to group by all time groups columns for creating lag features, instead of sampling from them" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting tgc_only_use_all_groups", + "output": "tgc only use all groups refers to Always group by all time groups columns for creating lag features: Whether to group by all time groups columns for creating lag features, instead of sampling from them" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "tgc_allow_target_encoding", + "output": "tgc allow target encoding refers to Whether to allow target encoding of time groups. This can be useful if there are many groups. Note that allow_tgc_as_features independently controls if tgc are treated as normal features. 'auto': Choose CV by default. 'CV': Enable out-of-fold and CV-in-CV (if enabled) encoding 'simple': Simple memorized targets per group. 'off': Disable. Only relevant for time series experiments that have at least one time column group apart from the time column." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "tgc_allow_target_encoding", + "output": "tgc allow target encoding refers to Target encoding of time groups: Whether to allow target encoding of time groups. This can be useful if there are many groups. Note that allow_tgc_as_features independently controls if tgc are treated as normal features. 'auto': Choose CV by default. 'CV': Enable out-of-fold and CV-in-CV (if enabled) encoding 'simple': Simple memorized targets per group. 'off': Disable. Only relevant for time series experiments that have at least one time column group apart from the time column." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "tgc allow target encoding", + "output": "tgc allow target encoding refers to Target encoding of time groups: Whether to allow target encoding of time groups. This can be useful if there are many groups. Note that allow_tgc_as_features independently controls if tgc are treated as normal features. 'auto': Choose CV by default. 'CV': Enable out-of-fold and CV-in-CV (if enabled) encoding 'simple': Simple memorized targets per group. 'off': Disable. Only relevant for time series experiments that have at least one time column group apart from the time column." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Target encoding of time groups: ", + "output": "tgc allow target encoding refers to Target encoding of time groups: Whether to allow target encoding of time groups. This can be useful if there are many groups. Note that allow_tgc_as_features independently controls if tgc are treated as normal features. 'auto': Choose CV by default. 'CV': Enable out-of-fold and CV-in-CV (if enabled) encoding 'simple': Simple memorized targets per group. 'off': Disable. Only relevant for time series experiments that have at least one time column group apart from the time column." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting tgc_allow_target_encoding", + "output": "tgc allow target encoding refers to Whether to allow target encoding of time groups. This can be useful if there are many groups. Note that allow_tgc_as_features independently controls if tgc are treated as normal features. 'auto': Choose CV by default. 'CV': Enable out-of-fold and CV-in-CV (if enabled) encoding 'simple': Simple memorized targets per group. 'off': Disable. Only relevant for time series experiments that have at least one time column group apart from the time column." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting tgc_allow_target_encoding", + "output": "tgc allow target encoding refers to Target encoding of time groups: Whether to allow target encoding of time groups. This can be useful if there are many groups. Note that allow_tgc_as_features independently controls if tgc are treated as normal features. 'auto': Choose CV by default. 'CV': Enable out-of-fold and CV-in-CV (if enabled) encoding 'simple': Simple memorized targets per group. 'off': Disable. Only relevant for time series experiments that have at least one time column group apart from the time column." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "tgc_allow_features_and_target_encoding_auto_tune", + "output": "tgc allow features and target encoding auto tune refers to if allow_tgc_as_features is true or tgc_allow_target_encoding is true, whether to try both possibilities to see which does better during tuning. Safer than forcing one way or the other." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "tgc_allow_features_and_target_encoding_auto_tune", + "output": "tgc allow features and target encoding auto tune refers to Auto-Tune time column groups as features and target encoding: if allow_tgc_as_features is true or tgc_allow_target_encoding is true, whether to try both possibilities to see which does better during tuning. Safer than forcing one way or the other." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "tgc allow features and target encoding auto tune", + "output": "tgc allow features and target encoding auto tune refers to Auto-Tune time column groups as features and target encoding: if allow_tgc_as_features is true or tgc_allow_target_encoding is true, whether to try both possibilities to see which does better during tuning. Safer than forcing one way or the other." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Auto-Tune time column groups as features and target encoding: ", + "output": "tgc allow features and target encoding auto tune refers to Auto-Tune time column groups as features and target encoding: if allow_tgc_as_features is true or tgc_allow_target_encoding is true, whether to try both possibilities to see which does better during tuning. Safer than forcing one way or the other." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting tgc_allow_features_and_target_encoding_auto_tune", + "output": "tgc allow features and target encoding auto tune refers to if allow_tgc_as_features is true or tgc_allow_target_encoding is true, whether to try both possibilities to see which does better during tuning. Safer than forcing one way or the other." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting tgc_allow_features_and_target_encoding_auto_tune", + "output": "tgc allow features and target encoding auto tune refers to Auto-Tune time column groups as features and target encoding: if allow_tgc_as_features is true or tgc_allow_target_encoding is true, whether to try both possibilities to see which does better during tuning. Safer than forcing one way or the other." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "time_series_holdout_preds", + "output": "time series holdout preds refers to Enable creation of holdout predictions on training data using moving windows (useful for MLI, but can be slow)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "time_series_holdout_preds", + "output": "time series holdout preds refers to Generate Time-Series Holdout Predictions: Enable creation of holdout predictions on training data using moving windows (useful for MLI, but can be slow)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "time series holdout preds", + "output": "time series holdout preds refers to Generate Time-Series Holdout Predictions: Enable creation of holdout predictions on training data using moving windows (useful for MLI, but can be slow)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Generate Time-Series Holdout Predictions: ", + "output": "time series holdout preds refers to Generate Time-Series Holdout Predictions: Enable creation of holdout predictions on training data using moving windows (useful for MLI, but can be slow)" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting time_series_holdout_preds", + "output": "time series holdout preds refers to Enable creation of holdout predictions on training data using moving windows (useful for MLI, but can be slow)" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting time_series_holdout_preds", + "output": "time series holdout preds refers to Generate Time-Series Holdout Predictions: Enable creation of holdout predictions on training data using moving windows (useful for MLI, but can be slow)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "time_series_max_holdout_splits", + "output": "time series max holdout splits refers to Max number of splits used for creating final time-series model's holdout/backtesting predictions. With the default value '-1' the same amount of splits as during model validation will be used. Use 'time_series_validation_splits' to control amount of time-based splits used for model validation." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "time_series_max_holdout_splits", + "output": "time series max holdout splits refers to Maximum number of splits used for creating final time-series model's holdout predictions: Max number of splits used for creating final time-series model's holdout/backtesting predictions. With the default value '-1' the same amount of splits as during model validation will be used. Use 'time_series_validation_splits' to control amount of time-based splits used for model validation." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "time series max holdout splits", + "output": "time series max holdout splits refers to Maximum number of splits used for creating final time-series model's holdout predictions: Max number of splits used for creating final time-series model's holdout/backtesting predictions. With the default value '-1' the same amount of splits as during model validation will be used. Use 'time_series_validation_splits' to control amount of time-based splits used for model validation." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Maximum number of splits used for creating final time-series model's holdout predictions: ", + "output": "time series max holdout splits refers to Maximum number of splits used for creating final time-series model's holdout predictions: Max number of splits used for creating final time-series model's holdout/backtesting predictions. With the default value '-1' the same amount of splits as during model validation will be used. Use 'time_series_validation_splits' to control amount of time-based splits used for model validation." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting time_series_max_holdout_splits", + "output": "time series max holdout splits refers to Max number of splits used for creating final time-series model's holdout/backtesting predictions. With the default value '-1' the same amount of splits as during model validation will be used. Use 'time_series_validation_splits' to control amount of time-based splits used for model validation." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting time_series_max_holdout_splits", + "output": "time series max holdout splits refers to Maximum number of splits used for creating final time-series model's holdout predictions: Max number of splits used for creating final time-series model's holdout/backtesting predictions. With the default value '-1' the same amount of splits as during model validation will be used. Use 'time_series_validation_splits' to control amount of time-based splits used for model validation." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "blend_in_link_space", + "output": "blend in link space refers to Whether to blend ensembles in link space, so that can apply inverse link function to get predictions after blending. This allows to get Shapley values to sum up to final predictions, after applying inverse link function: preds = inverse_link( (blend(base learner predictions in link space ))) = inverse_link(sum(blend(base learner shapley values in link space))) = inverse_link(sum( ensemble shapley values in link space ))For binary classification, this is only supported if inverse_link = logistic = 1/(1+exp(-x))For multiclass classification, this is only supported if inverse_link = softmax = exp(x)/sum(exp(x))For regression, this behavior happens naturally if all base learners use the identity link function, otherwise not possible" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "blend_in_link_space", + "output": "blend in link space refers to Whether to blend ensembles in link space (applies to classification only): Whether to blend ensembles in link space, so that can apply inverse link function to get predictions after blending. This allows to get Shapley values to sum up to final predictions, after applying inverse link function: preds = inverse_link( (blend(base learner predictions in link space ))) = inverse_link(sum(blend(base learner shapley values in link space))) = inverse_link(sum( ensemble shapley values in link space ))For binary classification, this is only supported if inverse_link = logistic = 1/(1+exp(-x))For multiclass classification, this is only supported if inverse_link = softmax = exp(x)/sum(exp(x))For regression, this behavior happens naturally if all base learners use the identity link function, otherwise not possible" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "blend in link space", + "output": "blend in link space refers to Whether to blend ensembles in link space (applies to classification only): Whether to blend ensembles in link space, so that can apply inverse link function to get predictions after blending. This allows to get Shapley values to sum up to final predictions, after applying inverse link function: preds = inverse_link( (blend(base learner predictions in link space ))) = inverse_link(sum(blend(base learner shapley values in link space))) = inverse_link(sum( ensemble shapley values in link space ))For binary classification, this is only supported if inverse_link = logistic = 1/(1+exp(-x))For multiclass classification, this is only supported if inverse_link = softmax = exp(x)/sum(exp(x))For regression, this behavior happens naturally if all base learners use the identity link function, otherwise not possible" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Whether to blend ensembles in link space (applies to classification only): ", + "output": "blend in link space refers to Whether to blend ensembles in link space (applies to classification only): Whether to blend ensembles in link space, so that can apply inverse link function to get predictions after blending. This allows to get Shapley values to sum up to final predictions, after applying inverse link function: preds = inverse_link( (blend(base learner predictions in link space ))) = inverse_link(sum(blend(base learner shapley values in link space))) = inverse_link(sum( ensemble shapley values in link space ))For binary classification, this is only supported if inverse_link = logistic = 1/(1+exp(-x))For multiclass classification, this is only supported if inverse_link = softmax = exp(x)/sum(exp(x))For regression, this behavior happens naturally if all base learners use the identity link function, otherwise not possible" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting blend_in_link_space", + "output": "blend in link space refers to Whether to blend ensembles in link space, so that can apply inverse link function to get predictions after blending. This allows to get Shapley values to sum up to final predictions, after applying inverse link function: preds = inverse_link( (blend(base learner predictions in link space ))) = inverse_link(sum(blend(base learner shapley values in link space))) = inverse_link(sum( ensemble shapley values in link space ))For binary classification, this is only supported if inverse_link = logistic = 1/(1+exp(-x))For multiclass classification, this is only supported if inverse_link = softmax = exp(x)/sum(exp(x))For regression, this behavior happens naturally if all base learners use the identity link function, otherwise not possible" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting blend_in_link_space", + "output": "blend in link space refers to Whether to blend ensembles in link space (applies to classification only): Whether to blend ensembles in link space, so that can apply inverse link function to get predictions after blending. This allows to get Shapley values to sum up to final predictions, after applying inverse link function: preds = inverse_link( (blend(base learner predictions in link space ))) = inverse_link(sum(blend(base learner shapley values in link space))) = inverse_link(sum( ensemble shapley values in link space ))For binary classification, this is only supported if inverse_link = logistic = 1/(1+exp(-x))For multiclass classification, this is only supported if inverse_link = softmax = exp(x)/sum(exp(x))For regression, this behavior happens naturally if all base learners use the identity link function, otherwise not possible" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mli_ts_fast_approx", + "output": "mli ts fast approx refers to Whether to speed up time-series holdout predictions for back-testing on training data (used for MLI and metrics calculation). Can be slightly less accurate." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mli_ts_fast_approx", + "output": "mli ts fast approx refers to Whether to speed up calculation of Time-Series Holdout Predictions: Whether to speed up time-series holdout predictions for back-testing on training data (used for MLI and metrics calculation). Can be slightly less accurate." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mli ts fast approx", + "output": "mli ts fast approx refers to Whether to speed up calculation of Time-Series Holdout Predictions: Whether to speed up time-series holdout predictions for back-testing on training data (used for MLI and metrics calculation). Can be slightly less accurate." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Whether to speed up calculation of Time-Series Holdout Predictions: ", + "output": "mli ts fast approx refers to Whether to speed up calculation of Time-Series Holdout Predictions: Whether to speed up time-series holdout predictions for back-testing on training data (used for MLI and metrics calculation). Can be slightly less accurate." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting mli_ts_fast_approx", + "output": "mli ts fast approx refers to Whether to speed up time-series holdout predictions for back-testing on training data (used for MLI and metrics calculation). Can be slightly less accurate." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting mli_ts_fast_approx", + "output": "mli ts fast approx refers to Whether to speed up calculation of Time-Series Holdout Predictions: Whether to speed up time-series holdout predictions for back-testing on training data (used for MLI and metrics calculation). Can be slightly less accurate." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mli_ts_fast_approx_contribs", + "output": "mli ts fast approx contribs refers to Whether to speed up Shapley values for time-series holdout predictions for back-testing on training data (used for MLI). Can be slightly less accurate." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mli_ts_fast_approx_contribs", + "output": "mli ts fast approx contribs refers to Whether to speed up calculation of Shapley values for Time-Series Holdout Predictions: Whether to speed up Shapley values for time-series holdout predictions for back-testing on training data (used for MLI). Can be slightly less accurate." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mli ts fast approx contribs", + "output": "mli ts fast approx contribs refers to Whether to speed up calculation of Shapley values for Time-Series Holdout Predictions: Whether to speed up Shapley values for time-series holdout predictions for back-testing on training data (used for MLI). Can be slightly less accurate." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Whether to speed up calculation of Shapley values for Time-Series Holdout Predictions: ", + "output": "mli ts fast approx contribs refers to Whether to speed up calculation of Shapley values for Time-Series Holdout Predictions: Whether to speed up Shapley values for time-series holdout predictions for back-testing on training data (used for MLI). Can be slightly less accurate." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting mli_ts_fast_approx_contribs", + "output": "mli ts fast approx contribs refers to Whether to speed up Shapley values for time-series holdout predictions for back-testing on training data (used for MLI). Can be slightly less accurate." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting mli_ts_fast_approx_contribs", + "output": "mli ts fast approx contribs refers to Whether to speed up calculation of Shapley values for Time-Series Holdout Predictions: Whether to speed up Shapley values for time-series holdout predictions for back-testing on training data (used for MLI). Can be slightly less accurate." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mli_ts_holdout_contribs", + "output": "mli ts holdout contribs refers to Enable creation of Shapley values for holdout predictions on training data using moving windows (useful for MLI, but can be slow), at the time of the experiment. If disabled, MLI will generate Shapley values on demand." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mli_ts_holdout_contribs", + "output": "mli ts holdout contribs refers to Generate Shapley values for Time-Series Holdout Predictions at the time of experiment: Enable creation of Shapley values for holdout predictions on training data using moving windows (useful for MLI, but can be slow), at the time of the experiment. If disabled, MLI will generate Shapley values on demand." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mli ts holdout contribs", + "output": "mli ts holdout contribs refers to Generate Shapley values for Time-Series Holdout Predictions at the time of experiment: Enable creation of Shapley values for holdout predictions on training data using moving windows (useful for MLI, but can be slow), at the time of the experiment. If disabled, MLI will generate Shapley values on demand." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Generate Shapley values for Time-Series Holdout Predictions at the time of experiment: ", + "output": "mli ts holdout contribs refers to Generate Shapley values for Time-Series Holdout Predictions at the time of experiment: Enable creation of Shapley values for holdout predictions on training data using moving windows (useful for MLI, but can be slow), at the time of the experiment. If disabled, MLI will generate Shapley values on demand." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting mli_ts_holdout_contribs", + "output": "mli ts holdout contribs refers to Enable creation of Shapley values for holdout predictions on training data using moving windows (useful for MLI, but can be slow), at the time of the experiment. If disabled, MLI will generate Shapley values on demand." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting mli_ts_holdout_contribs", + "output": "mli ts holdout contribs refers to Generate Shapley values for Time-Series Holdout Predictions at the time of experiment: Enable creation of Shapley values for holdout predictions on training data using moving windows (useful for MLI, but can be slow), at the time of the experiment. If disabled, MLI will generate Shapley values on demand." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "time_series_min_interpretability", + "output": "time series min interpretability refers to Values of 5 or more can improve generalization by more aggressive dropping of least important features. Set to 1 to disable." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "time_series_min_interpretability", + "output": "time series min interpretability refers to Lower limit on interpretability setting for time-series experiments, implicitly enforced.: Values of 5 or more can improve generalization by more aggressive dropping of least important features. Set to 1 to disable." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "time series min interpretability", + "output": "time series min interpretability refers to Lower limit on interpretability setting for time-series experiments, implicitly enforced.: Values of 5 or more can improve generalization by more aggressive dropping of least important features. Set to 1 to disable." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Lower limit on interpretability setting for time-series experiments, implicitly enforced.: ", + "output": "time series min interpretability refers to Lower limit on interpretability setting for time-series experiments, implicitly enforced.: Values of 5 or more can improve generalization by more aggressive dropping of least important features. Set to 1 to disable." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting time_series_min_interpretability", + "output": "time series min interpretability refers to Values of 5 or more can improve generalization by more aggressive dropping of least important features. Set to 1 to disable." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting time_series_min_interpretability", + "output": "time series min interpretability refers to Lower limit on interpretability setting for time-series experiments, implicitly enforced.: Values of 5 or more can improve generalization by more aggressive dropping of least important features. Set to 1 to disable." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "lags_dropout", + "output": "lags dropout refers to Dropout mode for lag features in order to achieve an equal n.a.-ratio between train and validation/test. The independent mode performs a simple feature-wise dropout, whereas the dependent one takes lag-size dependencies per sample/row into account." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "lags_dropout", + "output": "lags dropout refers to Dropout mode for lag features: Dropout mode for lag features in order to achieve an equal n.a.-ratio between train and validation/test. The independent mode performs a simple feature-wise dropout, whereas the dependent one takes lag-size dependencies per sample/row into account." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "lags dropout", + "output": "lags dropout refers to Dropout mode for lag features: Dropout mode for lag features in order to achieve an equal n.a.-ratio between train and validation/test. The independent mode performs a simple feature-wise dropout, whereas the dependent one takes lag-size dependencies per sample/row into account." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Dropout mode for lag features: ", + "output": "lags dropout refers to Dropout mode for lag features: Dropout mode for lag features in order to achieve an equal n.a.-ratio between train and validation/test. The independent mode performs a simple feature-wise dropout, whereas the dependent one takes lag-size dependencies per sample/row into account." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting lags_dropout", + "output": "lags dropout refers to Dropout mode for lag features in order to achieve an equal n.a.-ratio between train and validation/test. The independent mode performs a simple feature-wise dropout, whereas the dependent one takes lag-size dependencies per sample/row into account." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting lags_dropout", + "output": "lags dropout refers to Dropout mode for lag features: Dropout mode for lag features in order to achieve an equal n.a.-ratio between train and validation/test. The independent mode performs a simple feature-wise dropout, whereas the dependent one takes lag-size dependencies per sample/row into account." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "prob_lag_non_targets", + "output": "prob lag non targets refers to Normalized probability of choosing to lag non-targets relative to targets (-1.0 = auto)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "prob_lag_non_targets", + "output": "prob lag non targets refers to Probability to create non-target lag features (-1.0 = auto): Normalized probability of choosing to lag non-targets relative to targets (-1.0 = auto)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "prob lag non targets", + "output": "prob lag non targets refers to Probability to create non-target lag features (-1.0 = auto): Normalized probability of choosing to lag non-targets relative to targets (-1.0 = auto)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Probability to create non-target lag features (-1.0 = auto): ", + "output": "prob lag non targets refers to Probability to create non-target lag features (-1.0 = auto): Normalized probability of choosing to lag non-targets relative to targets (-1.0 = auto)" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting prob_lag_non_targets", + "output": "prob lag non targets refers to Normalized probability of choosing to lag non-targets relative to targets (-1.0 = auto)" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting prob_lag_non_targets", + "output": "prob lag non targets refers to Probability to create non-target lag features (-1.0 = auto): Normalized probability of choosing to lag non-targets relative to targets (-1.0 = auto)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "rolling_test_method", + "output": "rolling test method refers to Method to create rolling test set predictions, if the forecast horizon is shorter than the time span of the test set. One can choose between test time augmentation (TTA) and a successive refitting of the final pipeline." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "rolling_test_method", + "output": "rolling test method refers to Method to create rolling test set predictions: Method to create rolling test set predictions, if the forecast horizon is shorter than the time span of the test set. One can choose between test time augmentation (TTA) and a successive refitting of the final pipeline." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "rolling test method", + "output": "rolling test method refers to Method to create rolling test set predictions: Method to create rolling test set predictions, if the forecast horizon is shorter than the time span of the test set. One can choose between test time augmentation (TTA) and a successive refitting of the final pipeline." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Method to create rolling test set predictions: ", + "output": "rolling test method refers to Method to create rolling test set predictions: Method to create rolling test set predictions, if the forecast horizon is shorter than the time span of the test set. One can choose between test time augmentation (TTA) and a successive refitting of the final pipeline." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting rolling_test_method", + "output": "rolling test method refers to Method to create rolling test set predictions, if the forecast horizon is shorter than the time span of the test set. One can choose between test time augmentation (TTA) and a successive refitting of the final pipeline." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting rolling_test_method", + "output": "rolling test method refers to Method to create rolling test set predictions: Method to create rolling test set predictions, if the forecast horizon is shorter than the time span of the test set. One can choose between test time augmentation (TTA) and a successive refitting of the final pipeline." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "rolling_test_method_max_splits", + "output": "rolling test method max splits refers to Max number of splits for 'refit' method to avoid OOM/slowness, both for GA and final refit. In GA, will fall back to fast_tta, in final will fail with error msg.: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "rolling_test_method_max_splits", + "output": "rolling test method max splits refers to Max number of splits for 'refit' method to avoid OOM/slowness, both for GA and final refit. In GA, will fall back to fast_tta, in final will fail with error msg.: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "rolling test method max splits", + "output": "rolling test method max splits refers to Max number of splits for 'refit' method to avoid OOM/slowness, both for GA and final refit. In GA, will fall back to fast_tta, in final will fail with error msg.: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Max number of splits for 'refit' method to avoid OOM/slowness, both for GA and final refit. In GA, will fall back to fast_tta, in final will fail with error msg.: ", + "output": "rolling test method max splits refers to Max number of splits for 'refit' method to avoid OOM/slowness, both for GA and final refit. In GA, will fall back to fast_tta, in final will fail with error msg.: " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting rolling_test_method_max_splits", + "output": "rolling test method max splits refers to Max number of splits for 'refit' method to avoid OOM/slowness, both for GA and final refit. In GA, will fall back to fast_tta, in final will fail with error msg.: " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting rolling_test_method_max_splits", + "output": "rolling test method max splits refers to Max number of splits for 'refit' method to avoid OOM/slowness, both for GA and final refit. In GA, will fall back to fast_tta, in final will fail with error msg.: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "fast_tta_internal", + "output": "fast tta internal refers to Apply TTA in one pass instead of using rolling windows for internal validation split predictions. Note: Setting this to 'False' leads to significantly longer runtimes." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "fast_tta_internal", + "output": "fast tta internal refers to Fast TTA for internal validation (feature evolution and holdout predictions): Apply TTA in one pass instead of using rolling windows for internal validation split predictions. Note: Setting this to 'False' leads to significantly longer runtimes." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "fast tta internal", + "output": "fast tta internal refers to Fast TTA for internal validation (feature evolution and holdout predictions): Apply TTA in one pass instead of using rolling windows for internal validation split predictions. Note: Setting this to 'False' leads to significantly longer runtimes." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Fast TTA for internal validation (feature evolution and holdout predictions): ", + "output": "fast tta internal refers to Fast TTA for internal validation (feature evolution and holdout predictions): Apply TTA in one pass instead of using rolling windows for internal validation split predictions. Note: Setting this to 'False' leads to significantly longer runtimes." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting fast_tta_internal", + "output": "fast tta internal refers to Apply TTA in one pass instead of using rolling windows for internal validation split predictions. Note: Setting this to 'False' leads to significantly longer runtimes." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting fast_tta_internal", + "output": "fast tta internal refers to Fast TTA for internal validation (feature evolution and holdout predictions): Apply TTA in one pass instead of using rolling windows for internal validation split predictions. Note: Setting this to 'False' leads to significantly longer runtimes." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "fast_tta_test", + "output": "fast tta test refers to Apply TTA in one pass instead of using rolling windows for test set predictions. This only applies if the forecast horizon is shorter than the time span of the test set. Note: Setting this to 'False' leads to significantly longer runtimes." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "fast_tta_test", + "output": "fast tta test refers to Fast TTA for test set predictions: Apply TTA in one pass instead of using rolling windows for test set predictions. This only applies if the forecast horizon is shorter than the time span of the test set. Note: Setting this to 'False' leads to significantly longer runtimes." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "fast tta test", + "output": "fast tta test refers to Fast TTA for test set predictions: Apply TTA in one pass instead of using rolling windows for test set predictions. This only applies if the forecast horizon is shorter than the time span of the test set. Note: Setting this to 'False' leads to significantly longer runtimes." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Fast TTA for test set predictions: ", + "output": "fast tta test refers to Fast TTA for test set predictions: Apply TTA in one pass instead of using rolling windows for test set predictions. This only applies if the forecast horizon is shorter than the time span of the test set. Note: Setting this to 'False' leads to significantly longer runtimes." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting fast_tta_test", + "output": "fast tta test refers to Apply TTA in one pass instead of using rolling windows for test set predictions. This only applies if the forecast horizon is shorter than the time span of the test set. Note: Setting this to 'False' leads to significantly longer runtimes." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting fast_tta_test", + "output": "fast tta test refers to Fast TTA for test set predictions: Apply TTA in one pass instead of using rolling windows for test set predictions. This only applies if the forecast horizon is shorter than the time span of the test set. Note: Setting this to 'False' leads to significantly longer runtimes." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "prob_default_lags", + "output": "prob default lags refers to Probability for new Lags/EWMA gene to use default lags (determined by frequency/gap/horizon, independent of data) (-1.0 = auto)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "prob_default_lags", + "output": "prob default lags refers to Probability for new time-series transformers to use default lags (-1.0 = auto): Probability for new Lags/EWMA gene to use default lags (determined by frequency/gap/horizon, independent of data) (-1.0 = auto)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "prob default lags", + "output": "prob default lags refers to Probability for new time-series transformers to use default lags (-1.0 = auto): Probability for new Lags/EWMA gene to use default lags (determined by frequency/gap/horizon, independent of data) (-1.0 = auto)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Probability for new time-series transformers to use default lags (-1.0 = auto): ", + "output": "prob default lags refers to Probability for new time-series transformers to use default lags (-1.0 = auto): Probability for new Lags/EWMA gene to use default lags (determined by frequency/gap/horizon, independent of data) (-1.0 = auto)" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting prob_default_lags", + "output": "prob default lags refers to Probability for new Lags/EWMA gene to use default lags (determined by frequency/gap/horizon, independent of data) (-1.0 = auto)" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting prob_default_lags", + "output": "prob default lags refers to Probability for new time-series transformers to use default lags (-1.0 = auto): Probability for new Lags/EWMA gene to use default lags (determined by frequency/gap/horizon, independent of data) (-1.0 = auto)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "prob_lagsinteraction", + "output": "prob lagsinteraction refers to Unnormalized probability of choosing other lag time-series transformers based on interactions (-1.0 = auto)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "prob_lagsinteraction", + "output": "prob lagsinteraction refers to Probability of exploring interaction-based lag transformers (-1.0 = auto): Unnormalized probability of choosing other lag time-series transformers based on interactions (-1.0 = auto)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "prob lagsinteraction", + "output": "prob lagsinteraction refers to Probability of exploring interaction-based lag transformers (-1.0 = auto): Unnormalized probability of choosing other lag time-series transformers based on interactions (-1.0 = auto)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Probability of exploring interaction-based lag transformers (-1.0 = auto): ", + "output": "prob lagsinteraction refers to Probability of exploring interaction-based lag transformers (-1.0 = auto): Unnormalized probability of choosing other lag time-series transformers based on interactions (-1.0 = auto)" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting prob_lagsinteraction", + "output": "prob lagsinteraction refers to Unnormalized probability of choosing other lag time-series transformers based on interactions (-1.0 = auto)" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting prob_lagsinteraction", + "output": "prob lagsinteraction refers to Probability of exploring interaction-based lag transformers (-1.0 = auto): Unnormalized probability of choosing other lag time-series transformers based on interactions (-1.0 = auto)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "prob_lagsaggregates", + "output": "prob lagsaggregates refers to Unnormalized probability of choosing other lag time-series transformers based on aggregations (-1.0 = auto)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "prob_lagsaggregates", + "output": "prob lagsaggregates refers to Probability of exploring aggregation-based lag transformers (-1.0 = auto): Unnormalized probability of choosing other lag time-series transformers based on aggregations (-1.0 = auto)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "prob lagsaggregates", + "output": "prob lagsaggregates refers to Probability of exploring aggregation-based lag transformers (-1.0 = auto): Unnormalized probability of choosing other lag time-series transformers based on aggregations (-1.0 = auto)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Probability of exploring aggregation-based lag transformers (-1.0 = auto): ", + "output": "prob lagsaggregates refers to Probability of exploring aggregation-based lag transformers (-1.0 = auto): Unnormalized probability of choosing other lag time-series transformers based on aggregations (-1.0 = auto)" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting prob_lagsaggregates", + "output": "prob lagsaggregates refers to Unnormalized probability of choosing other lag time-series transformers based on aggregations (-1.0 = auto)" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting prob_lagsaggregates", + "output": "prob lagsaggregates refers to Probability of exploring aggregation-based lag transformers (-1.0 = auto): Unnormalized probability of choosing other lag time-series transformers based on aggregations (-1.0 = auto)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "ts_target_trafo", + "output": "ts target trafo refers to Time series centering or detrending transformation. The free parameter(s) of the trend model are fitted and the trend is removed from the target signal, and the pipeline is fitted on the residuals. Predictions are made by adding back the trend. Note: Can be cascaded with 'Time series lag-based target transformation', but is mutually exclusive with regular target transformations. The robust centering or linear detrending variants use RANSAC to achieve a higher tolerance w.r.t. outliers. The Epidemic target transformer uses the SEIR model: https://en.wikipedia.org/wiki/Compartmental_models_in_epidemiology#The_SEIR_model" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "ts_target_trafo", + "output": "ts target trafo refers to Time series centering or detrending transformation: Time series centering or detrending transformation. The free parameter(s) of the trend model are fitted and the trend is removed from the target signal, and the pipeline is fitted on the residuals. Predictions are made by adding back the trend. Note: Can be cascaded with 'Time series lag-based target transformation', but is mutually exclusive with regular target transformations. The robust centering or linear detrending variants use RANSAC to achieve a higher tolerance w.r.t. outliers. The Epidemic target transformer uses the SEIR model: https://en.wikipedia.org/wiki/Compartmental_models_in_epidemiology#The_SEIR_model" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "ts target trafo", + "output": "ts target trafo refers to Time series centering or detrending transformation: Time series centering or detrending transformation. The free parameter(s) of the trend model are fitted and the trend is removed from the target signal, and the pipeline is fitted on the residuals. Predictions are made by adding back the trend. Note: Can be cascaded with 'Time series lag-based target transformation', but is mutually exclusive with regular target transformations. The robust centering or linear detrending variants use RANSAC to achieve a higher tolerance w.r.t. outliers. The Epidemic target transformer uses the SEIR model: https://en.wikipedia.org/wiki/Compartmental_models_in_epidemiology#The_SEIR_model" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Time series centering or detrending transformation: ", + "output": "ts target trafo refers to Time series centering or detrending transformation: Time series centering or detrending transformation. The free parameter(s) of the trend model are fitted and the trend is removed from the target signal, and the pipeline is fitted on the residuals. Predictions are made by adding back the trend. Note: Can be cascaded with 'Time series lag-based target transformation', but is mutually exclusive with regular target transformations. The robust centering or linear detrending variants use RANSAC to achieve a higher tolerance w.r.t. outliers. The Epidemic target transformer uses the SEIR model: https://en.wikipedia.org/wiki/Compartmental_models_in_epidemiology#The_SEIR_model" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting ts_target_trafo", + "output": "ts target trafo refers to Time series centering or detrending transformation. The free parameter(s) of the trend model are fitted and the trend is removed from the target signal, and the pipeline is fitted on the residuals. Predictions are made by adding back the trend. Note: Can be cascaded with 'Time series lag-based target transformation', but is mutually exclusive with regular target transformations. The robust centering or linear detrending variants use RANSAC to achieve a higher tolerance w.r.t. outliers. The Epidemic target transformer uses the SEIR model: https://en.wikipedia.org/wiki/Compartmental_models_in_epidemiology#The_SEIR_model" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting ts_target_trafo", + "output": "ts target trafo refers to Time series centering or detrending transformation: Time series centering or detrending transformation. The free parameter(s) of the trend model are fitted and the trend is removed from the target signal, and the pipeline is fitted on the residuals. Predictions are made by adding back the trend. Note: Can be cascaded with 'Time series lag-based target transformation', but is mutually exclusive with regular target transformations. The robust centering or linear detrending variants use RANSAC to achieve a higher tolerance w.r.t. outliers. The Epidemic target transformer uses the SEIR model: https://en.wikipedia.org/wiki/Compartmental_models_in_epidemiology#The_SEIR_model" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "ts_target_trafo_epidemic_params_dict", + "output": "ts target trafo epidemic params dict refers to Dictionary to control Epidemic SEIRD model for de-trending of target per time series group.Note: The target column must correspond to I(t), the infected cases as a function of time.For each training split and time series group, the SEIRD model is fitted to the target signal (by optimizingthe free parameters shown below for each time series group).Then, the SEIRD model's value is subtracted from the training response, and the residuals are passed tothe feature engineering and modeling pipeline. For predictions, the SEIRD model's value is added to the residualpredictions from the pipeline, for each time series group.Note: Careful selection of the bounds for the free parameters N, beta, gamma, delta, alpha, rho, lockdown,beta_decay, beta_decay_rate is extremely important for good results.- S(t) : susceptible/healthy/not immune- E(t) : exposed/not yet infectious- I(t) : infectious/active <= target column- R(t) : recovered/immune- D(t) : deceased### Free parameters:- N : total population, N=S+E+I+R+D- beta : rate of exposure (S -> E)- gamma : rate of recovering (I -> R)- delta : incubation period- alpha : fatality rate- rho : rate at which people die- lockdown : day of lockdown (-1 => no lockdown)- beta_decay : beta decay due to lockdown- beta_decay_rate : speed of beta decay### Dynamics:if lockdown >= 0: beta_min = beta * (1 - beta_decay) beta = (beta - beta_min) / (1 + np.exp(-beta_decay_rate * (-t + lockdown))) + beta_mindSdt = -beta * S * I / NdEdt = beta * S * I / N - delta * EdIdt = delta * E - (1 - alpha) * gamma * I - alpha * rho * IdRdt = (1 - alpha) * gamma * IdDdt = alpha * rho * IProvide lower/upper bounds for each parameter you want to control the bounds for. Valid parameters are:N_min, N_max, beta_min, beta_max, gamma_min, gamma_max, delta_min, delta_max, alpha_min, alpha_max,rho_min, rho_max, lockdown_min, lockdown_max, beta_decay_min, beta_decay_max,beta_decay_rate_min, beta_decay_rate_max. You can change any subset of parameters, e.g.,ts_target_trafo_epidemic_params_dict=\"{'N_min': 1000, 'beta_max': 0.2}\"To get SEIR model (in cases where death rates are very low, can speed up calculations significantly):set alpha_min=alpha_max=rho_min=rho_max=beta_decay_rate_min=beta_decay_rate_max=0, lockdown_min=lockdown_max=-1. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "ts_target_trafo_epidemic_params_dict", + "output": "ts target trafo epidemic params dict refers to Custom bounds for SEIRD epidemic model parameters: Dictionary to control Epidemic SEIRD model for de-trending of target per time series group.Note: The target column must correspond to I(t), the infected cases as a function of time.For each training split and time series group, the SEIRD model is fitted to the target signal (by optimizingthe free parameters shown below for each time series group).Then, the SEIRD model's value is subtracted from the training response, and the residuals are passed tothe feature engineering and modeling pipeline. For predictions, the SEIRD model's value is added to the residualpredictions from the pipeline, for each time series group.Note: Careful selection of the bounds for the free parameters N, beta, gamma, delta, alpha, rho, lockdown,beta_decay, beta_decay_rate is extremely important for good results.- S(t) : susceptible/healthy/not immune- E(t) : exposed/not yet infectious- I(t) : infectious/active <= target column- R(t) : recovered/immune- D(t) : deceased### Free parameters:- N : total population, N=S+E+I+R+D- beta : rate of exposure (S -> E)- gamma : rate of recovering (I -> R)- delta : incubation period- alpha : fatality rate- rho : rate at which people die- lockdown : day of lockdown (-1 => no lockdown)- beta_decay : beta decay due to lockdown- beta_decay_rate : speed of beta decay### Dynamics:if lockdown >= 0: beta_min = beta * (1 - beta_decay) beta = (beta - beta_min) / (1 + np.exp(-beta_decay_rate * (-t + lockdown))) + beta_mindSdt = -beta * S * I / NdEdt = beta * S * I / N - delta * EdIdt = delta * E - (1 - alpha) * gamma * I - alpha * rho * IdRdt = (1 - alpha) * gamma * IdDdt = alpha * rho * IProvide lower/upper bounds for each parameter you want to control the bounds for. Valid parameters are:N_min, N_max, beta_min, beta_max, gamma_min, gamma_max, delta_min, delta_max, alpha_min, alpha_max,rho_min, rho_max, lockdown_min, lockdown_max, beta_decay_min, beta_decay_max,beta_decay_rate_min, beta_decay_rate_max. You can change any subset of parameters, e.g.,ts_target_trafo_epidemic_params_dict=\"{'N_min': 1000, 'beta_max': 0.2}\"To get SEIR model (in cases where death rates are very low, can speed up calculations significantly):set alpha_min=alpha_max=rho_min=rho_max=beta_decay_rate_min=beta_decay_rate_max=0, lockdown_min=lockdown_max=-1. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "ts target trafo epidemic params dict", + "output": "ts target trafo epidemic params dict refers to Custom bounds for SEIRD epidemic model parameters: Dictionary to control Epidemic SEIRD model for de-trending of target per time series group.Note: The target column must correspond to I(t), the infected cases as a function of time.For each training split and time series group, the SEIRD model is fitted to the target signal (by optimizingthe free parameters shown below for each time series group).Then, the SEIRD model's value is subtracted from the training response, and the residuals are passed tothe feature engineering and modeling pipeline. For predictions, the SEIRD model's value is added to the residualpredictions from the pipeline, for each time series group.Note: Careful selection of the bounds for the free parameters N, beta, gamma, delta, alpha, rho, lockdown,beta_decay, beta_decay_rate is extremely important for good results.- S(t) : susceptible/healthy/not immune- E(t) : exposed/not yet infectious- I(t) : infectious/active <= target column- R(t) : recovered/immune- D(t) : deceased### Free parameters:- N : total population, N=S+E+I+R+D- beta : rate of exposure (S -> E)- gamma : rate of recovering (I -> R)- delta : incubation period- alpha : fatality rate- rho : rate at which people die- lockdown : day of lockdown (-1 => no lockdown)- beta_decay : beta decay due to lockdown- beta_decay_rate : speed of beta decay### Dynamics:if lockdown >= 0: beta_min = beta * (1 - beta_decay) beta = (beta - beta_min) / (1 + np.exp(-beta_decay_rate * (-t + lockdown))) + beta_mindSdt = -beta * S * I / NdEdt = beta * S * I / N - delta * EdIdt = delta * E - (1 - alpha) * gamma * I - alpha * rho * IdRdt = (1 - alpha) * gamma * IdDdt = alpha * rho * IProvide lower/upper bounds for each parameter you want to control the bounds for. Valid parameters are:N_min, N_max, beta_min, beta_max, gamma_min, gamma_max, delta_min, delta_max, alpha_min, alpha_max,rho_min, rho_max, lockdown_min, lockdown_max, beta_decay_min, beta_decay_max,beta_decay_rate_min, beta_decay_rate_max. You can change any subset of parameters, e.g.,ts_target_trafo_epidemic_params_dict=\"{'N_min': 1000, 'beta_max': 0.2}\"To get SEIR model (in cases where death rates are very low, can speed up calculations significantly):set alpha_min=alpha_max=rho_min=rho_max=beta_decay_rate_min=beta_decay_rate_max=0, lockdown_min=lockdown_max=-1. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Custom bounds for SEIRD epidemic model parameters: ", + "output": "ts target trafo epidemic params dict refers to Custom bounds for SEIRD epidemic model parameters: Dictionary to control Epidemic SEIRD model for de-trending of target per time series group.Note: The target column must correspond to I(t), the infected cases as a function of time.For each training split and time series group, the SEIRD model is fitted to the target signal (by optimizingthe free parameters shown below for each time series group).Then, the SEIRD model's value is subtracted from the training response, and the residuals are passed tothe feature engineering and modeling pipeline. For predictions, the SEIRD model's value is added to the residualpredictions from the pipeline, for each time series group.Note: Careful selection of the bounds for the free parameters N, beta, gamma, delta, alpha, rho, lockdown,beta_decay, beta_decay_rate is extremely important for good results.- S(t) : susceptible/healthy/not immune- E(t) : exposed/not yet infectious- I(t) : infectious/active <= target column- R(t) : recovered/immune- D(t) : deceased### Free parameters:- N : total population, N=S+E+I+R+D- beta : rate of exposure (S -> E)- gamma : rate of recovering (I -> R)- delta : incubation period- alpha : fatality rate- rho : rate at which people die- lockdown : day of lockdown (-1 => no lockdown)- beta_decay : beta decay due to lockdown- beta_decay_rate : speed of beta decay### Dynamics:if lockdown >= 0: beta_min = beta * (1 - beta_decay) beta = (beta - beta_min) / (1 + np.exp(-beta_decay_rate * (-t + lockdown))) + beta_mindSdt = -beta * S * I / NdEdt = beta * S * I / N - delta * EdIdt = delta * E - (1 - alpha) * gamma * I - alpha * rho * IdRdt = (1 - alpha) * gamma * IdDdt = alpha * rho * IProvide lower/upper bounds for each parameter you want to control the bounds for. Valid parameters are:N_min, N_max, beta_min, beta_max, gamma_min, gamma_max, delta_min, delta_max, alpha_min, alpha_max,rho_min, rho_max, lockdown_min, lockdown_max, beta_decay_min, beta_decay_max,beta_decay_rate_min, beta_decay_rate_max. You can change any subset of parameters, e.g.,ts_target_trafo_epidemic_params_dict=\"{'N_min': 1000, 'beta_max': 0.2}\"To get SEIR model (in cases where death rates are very low, can speed up calculations significantly):set alpha_min=alpha_max=rho_min=rho_max=beta_decay_rate_min=beta_decay_rate_max=0, lockdown_min=lockdown_max=-1. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting ts_target_trafo_epidemic_params_dict", + "output": "ts target trafo epidemic params dict refers to Dictionary to control Epidemic SEIRD model for de-trending of target per time series group.Note: The target column must correspond to I(t), the infected cases as a function of time.For each training split and time series group, the SEIRD model is fitted to the target signal (by optimizingthe free parameters shown below for each time series group).Then, the SEIRD model's value is subtracted from the training response, and the residuals are passed tothe feature engineering and modeling pipeline. For predictions, the SEIRD model's value is added to the residualpredictions from the pipeline, for each time series group.Note: Careful selection of the bounds for the free parameters N, beta, gamma, delta, alpha, rho, lockdown,beta_decay, beta_decay_rate is extremely important for good results.- S(t) : susceptible/healthy/not immune- E(t) : exposed/not yet infectious- I(t) : infectious/active <= target column- R(t) : recovered/immune- D(t) : deceased### Free parameters:- N : total population, N=S+E+I+R+D- beta : rate of exposure (S -> E)- gamma : rate of recovering (I -> R)- delta : incubation period- alpha : fatality rate- rho : rate at which people die- lockdown : day of lockdown (-1 => no lockdown)- beta_decay : beta decay due to lockdown- beta_decay_rate : speed of beta decay### Dynamics:if lockdown >= 0: beta_min = beta * (1 - beta_decay) beta = (beta - beta_min) / (1 + np.exp(-beta_decay_rate * (-t + lockdown))) + beta_mindSdt = -beta * S * I / NdEdt = beta * S * I / N - delta * EdIdt = delta * E - (1 - alpha) * gamma * I - alpha * rho * IdRdt = (1 - alpha) * gamma * IdDdt = alpha * rho * IProvide lower/upper bounds for each parameter you want to control the bounds for. Valid parameters are:N_min, N_max, beta_min, beta_max, gamma_min, gamma_max, delta_min, delta_max, alpha_min, alpha_max,rho_min, rho_max, lockdown_min, lockdown_max, beta_decay_min, beta_decay_max,beta_decay_rate_min, beta_decay_rate_max. You can change any subset of parameters, e.g.,ts_target_trafo_epidemic_params_dict=\"{'N_min': 1000, 'beta_max': 0.2}\"To get SEIR model (in cases where death rates are very low, can speed up calculations significantly):set alpha_min=alpha_max=rho_min=rho_max=beta_decay_rate_min=beta_decay_rate_max=0, lockdown_min=lockdown_max=-1. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting ts_target_trafo_epidemic_params_dict", + "output": "ts target trafo epidemic params dict refers to Custom bounds for SEIRD epidemic model parameters: Dictionary to control Epidemic SEIRD model for de-trending of target per time series group.Note: The target column must correspond to I(t), the infected cases as a function of time.For each training split and time series group, the SEIRD model is fitted to the target signal (by optimizingthe free parameters shown below for each time series group).Then, the SEIRD model's value is subtracted from the training response, and the residuals are passed tothe feature engineering and modeling pipeline. For predictions, the SEIRD model's value is added to the residualpredictions from the pipeline, for each time series group.Note: Careful selection of the bounds for the free parameters N, beta, gamma, delta, alpha, rho, lockdown,beta_decay, beta_decay_rate is extremely important for good results.- S(t) : susceptible/healthy/not immune- E(t) : exposed/not yet infectious- I(t) : infectious/active <= target column- R(t) : recovered/immune- D(t) : deceased### Free parameters:- N : total population, N=S+E+I+R+D- beta : rate of exposure (S -> E)- gamma : rate of recovering (I -> R)- delta : incubation period- alpha : fatality rate- rho : rate at which people die- lockdown : day of lockdown (-1 => no lockdown)- beta_decay : beta decay due to lockdown- beta_decay_rate : speed of beta decay### Dynamics:if lockdown >= 0: beta_min = beta * (1 - beta_decay) beta = (beta - beta_min) / (1 + np.exp(-beta_decay_rate * (-t + lockdown))) + beta_mindSdt = -beta * S * I / NdEdt = beta * S * I / N - delta * EdIdt = delta * E - (1 - alpha) * gamma * I - alpha * rho * IdRdt = (1 - alpha) * gamma * IdDdt = alpha * rho * IProvide lower/upper bounds for each parameter you want to control the bounds for. Valid parameters are:N_min, N_max, beta_min, beta_max, gamma_min, gamma_max, delta_min, delta_max, alpha_min, alpha_max,rho_min, rho_max, lockdown_min, lockdown_max, beta_decay_min, beta_decay_max,beta_decay_rate_min, beta_decay_rate_max. You can change any subset of parameters, e.g.,ts_target_trafo_epidemic_params_dict=\"{'N_min': 1000, 'beta_max': 0.2}\"To get SEIR model (in cases where death rates are very low, can speed up calculations significantly):set alpha_min=alpha_max=rho_min=rho_max=beta_decay_rate_min=beta_decay_rate_max=0, lockdown_min=lockdown_max=-1. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "ts_target_trafo_epidemic_target", + "output": "ts target trafo epidemic target refers to Which SEIRD model component the target column corresponds to: I: Infected, R: Recovered, D: Deceased.: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "ts_target_trafo_epidemic_target", + "output": "ts target trafo epidemic target refers to Which SEIRD model component the target column corresponds to: I: Infected, R: Recovered, D: Deceased.: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "ts target trafo epidemic target", + "output": "ts target trafo epidemic target refers to Which SEIRD model component the target column corresponds to: I: Infected, R: Recovered, D: Deceased.: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Which SEIRD model component the target column corresponds to: I: Infected, R: Recovered, D: Deceased.: ", + "output": "ts target trafo epidemic target refers to Which SEIRD model component the target column corresponds to: I: Infected, R: Recovered, D: Deceased.: " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting ts_target_trafo_epidemic_target", + "output": "ts target trafo epidemic target refers to Which SEIRD model component the target column corresponds to: I: Infected, R: Recovered, D: Deceased.: " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting ts_target_trafo_epidemic_target", + "output": "ts target trafo epidemic target refers to Which SEIRD model component the target column corresponds to: I: Infected, R: Recovered, D: Deceased.: " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "ts_lag_target_trafo", + "output": "ts lag target trafo refers to Time series lag-based target transformation. One can choose between difference and ratio of the current and a lagged target. The corresponding lag size can be set via 'Target transformation lag size'. Note: Can be cascaded with 'Time series target transformation', but is mutually exclusive with regular target transformations." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "ts_lag_target_trafo", + "output": "ts lag target trafo refers to Time series lag-based target transformation: Time series lag-based target transformation. One can choose between difference and ratio of the current and a lagged target. The corresponding lag size can be set via 'Target transformation lag size'. Note: Can be cascaded with 'Time series target transformation', but is mutually exclusive with regular target transformations." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "ts lag target trafo", + "output": "ts lag target trafo refers to Time series lag-based target transformation: Time series lag-based target transformation. One can choose between difference and ratio of the current and a lagged target. The corresponding lag size can be set via 'Target transformation lag size'. Note: Can be cascaded with 'Time series target transformation', but is mutually exclusive with regular target transformations." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Time series lag-based target transformation: ", + "output": "ts lag target trafo refers to Time series lag-based target transformation: Time series lag-based target transformation. One can choose between difference and ratio of the current and a lagged target. The corresponding lag size can be set via 'Target transformation lag size'. Note: Can be cascaded with 'Time series target transformation', but is mutually exclusive with regular target transformations." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting ts_lag_target_trafo", + "output": "ts lag target trafo refers to Time series lag-based target transformation. One can choose between difference and ratio of the current and a lagged target. The corresponding lag size can be set via 'Target transformation lag size'. Note: Can be cascaded with 'Time series target transformation', but is mutually exclusive with regular target transformations." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting ts_lag_target_trafo", + "output": "ts lag target trafo refers to Time series lag-based target transformation: Time series lag-based target transformation. One can choose between difference and ratio of the current and a lagged target. The corresponding lag size can be set via 'Target transformation lag size'. Note: Can be cascaded with 'Time series target transformation', but is mutually exclusive with regular target transformations." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "ts_target_trafo_lag_size", + "output": "ts target trafo lag size refers to Lag size used for time series target transformation. See setting 'Time series lag-based target transformation'. -1 => smallest valid value = prediction periods + gap (automatically adjusted by DAI if too small)." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "ts_target_trafo_lag_size", + "output": "ts target trafo lag size refers to Lag size used for time series target transformation: Lag size used for time series target transformation. See setting 'Time series lag-based target transformation'. -1 => smallest valid value = prediction periods + gap (automatically adjusted by DAI if too small)." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "ts target trafo lag size", + "output": "ts target trafo lag size refers to Lag size used for time series target transformation: Lag size used for time series target transformation. See setting 'Time series lag-based target transformation'. -1 => smallest valid value = prediction periods + gap (automatically adjusted by DAI if too small)." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Lag size used for time series target transformation: ", + "output": "ts target trafo lag size refers to Lag size used for time series target transformation: Lag size used for time series target transformation. See setting 'Time series lag-based target transformation'. -1 => smallest valid value = prediction periods + gap (automatically adjusted by DAI if too small)." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting ts_target_trafo_lag_size", + "output": "ts target trafo lag size refers to Lag size used for time series target transformation. See setting 'Time series lag-based target transformation'. -1 => smallest valid value = prediction periods + gap (automatically adjusted by DAI if too small)." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting ts_target_trafo_lag_size", + "output": "ts target trafo lag size refers to Lag size used for time series target transformation: Lag size used for time series target transformation. See setting 'Time series lag-based target transformation'. -1 => smallest valid value = prediction periods + gap (automatically adjusted by DAI if too small)." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "tgc_via_ui_max_ncols", + "output": "tgc via ui max ncols refers to Maximum amount of columns send from UI to backend in order to auto-detect TGC" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "tgc_via_ui_max_ncols", + "output": "tgc via ui max ncols refers to Maximum amount of columns send from UI to backend in order to auto-detect TGC" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "tgc via ui max ncols", + "output": "tgc via ui max ncols refers to Maximum amount of columns send from UI to backend in order to auto-detect TGC" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "tgc via ui max ncols refers to Maximum amount of columns send from UI to backend in order to auto-detect TGC" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting tgc_via_ui_max_ncols", + "output": "tgc via ui max ncols refers to Maximum amount of columns send from UI to backend in order to auto-detect TGC" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting tgc_via_ui_max_ncols", + "output": "tgc via ui max ncols refers to Maximum amount of columns send from UI to backend in order to auto-detect TGC" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "tgc_dup_tolerance", + "output": "tgc dup tolerance refers to Maximum frequency of duplicated timestamps for TGC detection" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "tgc_dup_tolerance", + "output": "tgc dup tolerance refers to Maximum frequency of duplicated timestamps for TGC detection" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "tgc dup tolerance", + "output": "tgc dup tolerance refers to Maximum frequency of duplicated timestamps for TGC detection" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "tgc dup tolerance refers to Maximum frequency of duplicated timestamps for TGC detection" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting tgc_dup_tolerance", + "output": "tgc dup tolerance refers to Maximum frequency of duplicated timestamps for TGC detection" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting tgc_dup_tolerance", + "output": "tgc dup tolerance refers to Maximum frequency of duplicated timestamps for TGC detection" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "timeseries_split_suggestion_timeout", + "output": "timeseries split suggestion timeout refers to Timeout in seconds for time-series properties detection in UI." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "timeseries_split_suggestion_timeout", + "output": "timeseries split suggestion timeout refers to Timeout in seconds for time-series properties detection in UI.: Timeout in seconds for time-series properties detection in UI." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "timeseries split suggestion timeout", + "output": "timeseries split suggestion timeout refers to Timeout in seconds for time-series properties detection in UI.: Timeout in seconds for time-series properties detection in UI." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Timeout in seconds for time-series properties detection in UI.: ", + "output": "timeseries split suggestion timeout refers to Timeout in seconds for time-series properties detection in UI.: Timeout in seconds for time-series properties detection in UI." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting timeseries_split_suggestion_timeout", + "output": "timeseries split suggestion timeout refers to Timeout in seconds for time-series properties detection in UI." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting timeseries_split_suggestion_timeout", + "output": "timeseries split suggestion timeout refers to Timeout in seconds for time-series properties detection in UI.: Timeout in seconds for time-series properties detection in UI." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "timeseries_recency_weight_power", + "output": "timeseries recency weight power refers to Weight TS models scores as split number to this power. E.g. Use 1.0 to weight split closest to horizon by a factor that is number of splits larger than oldest split. Applies to tuning models and final back-testing models. If 0.0 (default) is used, median function is used, else mean is used. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "timeseries_recency_weight_power", + "output": "timeseries recency weight power refers to Power of recency weight for TS splits: Weight TS models scores as split number to this power. E.g. Use 1.0 to weight split closest to horizon by a factor that is number of splits larger than oldest split. Applies to tuning models and final back-testing models. If 0.0 (default) is used, median function is used, else mean is used. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "timeseries recency weight power", + "output": "timeseries recency weight power refers to Power of recency weight for TS splits: Weight TS models scores as split number to this power. E.g. Use 1.0 to weight split closest to horizon by a factor that is number of splits larger than oldest split. Applies to tuning models and final back-testing models. If 0.0 (default) is used, median function is used, else mean is used. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Power of recency weight for TS splits: ", + "output": "timeseries recency weight power refers to Power of recency weight for TS splits: Weight TS models scores as split number to this power. E.g. Use 1.0 to weight split closest to horizon by a factor that is number of splits larger than oldest split. Applies to tuning models and final back-testing models. If 0.0 (default) is used, median function is used, else mean is used. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting timeseries_recency_weight_power", + "output": "timeseries recency weight power refers to Weight TS models scores as split number to this power. E.g. Use 1.0 to weight split closest to horizon by a factor that is number of splits larger than oldest split. Applies to tuning models and final back-testing models. If 0.0 (default) is used, median function is used, else mean is used. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting timeseries_recency_weight_power", + "output": "timeseries recency weight power refers to Power of recency weight for TS splits: Weight TS models scores as split number to this power. E.g. Use 1.0 to weight split closest to horizon by a factor that is number of splits larger than oldest split. Applies to tuning models and final back-testing models. If 0.0 (default) is used, median function is used, else mean is used. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "user_config_directory", + "output": "user config directory refers to Every *.toml file is read from this directory and process the same way as main config file." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "user_config_directory", + "output": "user config directory refers to Every *.toml file is read from this directory and process the same way as main config file." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "user config directory", + "output": "user config directory refers to Every *.toml file is read from this directory and process the same way as main config file." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "user config directory refers to Every *.toml file is read from this directory and process the same way as main config file." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting user_config_directory", + "output": "user config directory refers to Every *.toml file is read from this directory and process the same way as main config file." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting user_config_directory", + "output": "user config directory refers to Every *.toml file is read from this directory and process the same way as main config file." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "procsy_ip", + "output": "procsy ip refers to IP address and port of procsy process." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "procsy_ip", + "output": "procsy ip refers to IP address and port of procsy process." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "procsy ip", + "output": "procsy ip refers to IP address and port of procsy process." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "procsy ip refers to IP address and port of procsy process." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting procsy_ip", + "output": "procsy ip refers to IP address and port of procsy process." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting procsy_ip", + "output": "procsy ip refers to IP address and port of procsy process." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "procsy_port", + "output": "procsy port refers to IP address and port of procsy process." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "procsy_port", + "output": "procsy port refers to IP address and port of procsy process." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "procsy port", + "output": "procsy port refers to IP address and port of procsy process." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "procsy port refers to IP address and port of procsy process." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting procsy_port", + "output": "procsy port refers to IP address and port of procsy process." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting procsy_port", + "output": "procsy port refers to IP address and port of procsy process." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "h2o_ip", + "output": "h2o ip refers to IP address for use by MLI." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "h2o_ip", + "output": "h2o ip refers to IP address for use by MLI." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "h2o ip", + "output": "h2o ip refers to IP address for use by MLI." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "h2o ip refers to IP address for use by MLI." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting h2o_ip", + "output": "h2o ip refers to IP address for use by MLI." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting h2o_ip", + "output": "h2o ip refers to IP address for use by MLI." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "h2o_port", + "output": "h2o port refers to Port of H2O instance for use by MLI. Each H2O node has an internal port (web port+1, so by default port 12349) for internal node-to-node communication" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "h2o_port", + "output": "h2o port refers to Port of H2O instance for use by MLI. Each H2O node has an internal port (web port+1, so by default port 12349) for internal node-to-node communication" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "h2o port", + "output": "h2o port refers to Port of H2O instance for use by MLI. Each H2O node has an internal port (web port+1, so by default port 12349) for internal node-to-node communication" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "h2o port refers to Port of H2O instance for use by MLI. Each H2O node has an internal port (web port+1, so by default port 12349) for internal node-to-node communication" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting h2o_port", + "output": "h2o port refers to Port of H2O instance for use by MLI. Each H2O node has an internal port (web port+1, so by default port 12349) for internal node-to-node communication" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting h2o_port", + "output": "h2o port refers to Port of H2O instance for use by MLI. Each H2O node has an internal port (web port+1, so by default port 12349) for internal node-to-node communication" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "ip", + "output": "ip refers to IP address and port for Driverless AI HTTP server." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "ip", + "output": "ip refers to IP address and port for Driverless AI HTTP server." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "ip", + "output": "ip refers to IP address and port for Driverless AI HTTP server." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "ip refers to IP address and port for Driverless AI HTTP server." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting ip", + "output": "ip refers to IP address and port for Driverless AI HTTP server." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting ip", + "output": "ip refers to IP address and port for Driverless AI HTTP server." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "port", + "output": "port refers to IP address and port for Driverless AI HTTP server." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "port", + "output": "port refers to IP address and port for Driverless AI HTTP server." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "port", + "output": "port refers to IP address and port for Driverless AI HTTP server." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "port refers to IP address and port for Driverless AI HTTP server." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting port", + "output": "port refers to IP address and port for Driverless AI HTTP server." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting port", + "output": "port refers to IP address and port for Driverless AI HTTP server." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "port_range", + "output": "port range refers to A list of two integers indicating the port range to search over, and dynamically find an open port to bind to (e.g., [11111,20000])." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "port_range", + "output": "port range refers to A list of two integers indicating the port range to search over, and dynamically find an open port to bind to (e.g., [11111,20000])." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "port range", + "output": "port range refers to A list of two integers indicating the port range to search over, and dynamically find an open port to bind to (e.g., [11111,20000])." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "port range refers to A list of two integers indicating the port range to search over, and dynamically find an open port to bind to (e.g., [11111,20000])." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting port_range", + "output": "port range refers to A list of two integers indicating the port range to search over, and dynamically find an open port to bind to (e.g., [11111,20000])." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting port_range", + "output": "port range refers to A list of two integers indicating the port range to search over, and dynamically find an open port to bind to (e.g., [11111,20000])." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "strict_version_check", + "output": "strict version check refers to Strict version check for DAI" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "strict_version_check", + "output": "strict version check refers to Strict version check for DAI" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "strict version check", + "output": "strict version check refers to Strict version check for DAI" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "strict version check refers to Strict version check for DAI" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting strict_version_check", + "output": "strict version check refers to Strict version check for DAI" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting strict_version_check", + "output": "strict version check refers to Strict version check for DAI" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max_file_upload_size", + "output": "max file upload size refers to File upload limit (default 100GB)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max_file_upload_size", + "output": "max file upload size refers to File upload limit (default 100GB)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "max file upload size", + "output": "max file upload size refers to File upload limit (default 100GB)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "max file upload size refers to File upload limit (default 100GB)" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting max_file_upload_size", + "output": "max file upload size refers to File upload limit (default 100GB)" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting max_file_upload_size", + "output": "max file upload size refers to File upload limit (default 100GB)" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "data_directory", + "output": "data directory refers to Data directory. All application data and files related datasets and experiments are stored in this directory." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "data_directory", + "output": "data directory refers to Data directory. All application data and files related datasets and experiments are stored in this directory." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "data directory", + "output": "data directory refers to Data directory. All application data and files related datasets and experiments are stored in this directory." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "data directory refers to Data directory. All application data and files related datasets and experiments are stored in this directory." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting data_directory", + "output": "data directory refers to Data directory. All application data and files related datasets and experiments are stored in this directory." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting data_directory", + "output": "data directory refers to Data directory. All application data and files related datasets and experiments are stored in this directory." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "datasets_directory", + "output": "datasets directory refers to Datasets directory. If set, it will denote the location from which all datasets will be read from and written into, typically this location shall be configured to be on an external file system to allow for a more granular control to just the datasets volume. If empty then will default to data_directory." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "datasets_directory", + "output": "datasets directory refers to Datasets directory. If set, it will denote the location from which all datasets will be read from and written into, typically this location shall be configured to be on an external file system to allow for a more granular control to just the datasets volume. If empty then will default to data_directory." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "datasets directory", + "output": "datasets directory refers to Datasets directory. If set, it will denote the location from which all datasets will be read from and written into, typically this location shall be configured to be on an external file system to allow for a more granular control to just the datasets volume. If empty then will default to data_directory." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "datasets directory refers to Datasets directory. If set, it will denote the location from which all datasets will be read from and written into, typically this location shall be configured to be on an external file system to allow for a more granular control to just the datasets volume. If empty then will default to data_directory." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting datasets_directory", + "output": "datasets directory refers to Datasets directory. If set, it will denote the location from which all datasets will be read from and written into, typically this location shall be configured to be on an external file system to allow for a more granular control to just the datasets volume. If empty then will default to data_directory." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting datasets_directory", + "output": "datasets directory refers to Datasets directory. If set, it will denote the location from which all datasets will be read from and written into, typically this location shall be configured to be on an external file system to allow for a more granular control to just the datasets volume. If empty then will default to data_directory." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "data_connectors_logs_directory", + "output": "data connectors logs directory refers to Path to the directory where the logs of HDFS, Hive, JDBC, and KDB+ data connectors will be saved." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "data_connectors_logs_directory", + "output": "data connectors logs directory refers to Path to the directory where the logs of HDFS, Hive, JDBC, and KDB+ data connectors will be saved." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "data connectors logs directory", + "output": "data connectors logs directory refers to Path to the directory where the logs of HDFS, Hive, JDBC, and KDB+ data connectors will be saved." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "data connectors logs directory refers to Path to the directory where the logs of HDFS, Hive, JDBC, and KDB+ data connectors will be saved." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting data_connectors_logs_directory", + "output": "data connectors logs directory refers to Path to the directory where the logs of HDFS, Hive, JDBC, and KDB+ data connectors will be saved." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting data_connectors_logs_directory", + "output": "data connectors logs directory refers to Path to the directory where the logs of HDFS, Hive, JDBC, and KDB+ data connectors will be saved." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "server_logs_sub_directory", + "output": "server logs sub directory refers to Subdirectory within data_directory to store server logs." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "server_logs_sub_directory", + "output": "server logs sub directory refers to Subdirectory within data_directory to store server logs." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "server logs sub directory", + "output": "server logs sub directory refers to Subdirectory within data_directory to store server logs." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "server logs sub directory refers to Subdirectory within data_directory to store server logs." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting server_logs_sub_directory", + "output": "server logs sub directory refers to Subdirectory within data_directory to store server logs." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting server_logs_sub_directory", + "output": "server logs sub directory refers to Subdirectory within data_directory to store server logs." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "pid_sub_directory", + "output": "pid sub directory refers to Subdirectory within data_directory to store pid files for controlling kill/stop of DAI servers." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "pid_sub_directory", + "output": "pid sub directory refers to Subdirectory within data_directory to store pid files for controlling kill/stop of DAI servers." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "pid sub directory", + "output": "pid sub directory refers to Subdirectory within data_directory to store pid files for controlling kill/stop of DAI servers." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "pid sub directory refers to Subdirectory within data_directory to store pid files for controlling kill/stop of DAI servers." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting pid_sub_directory", + "output": "pid sub directory refers to Subdirectory within data_directory to store pid files for controlling kill/stop of DAI servers." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting pid_sub_directory", + "output": "pid sub directory refers to Subdirectory within data_directory to store pid files for controlling kill/stop of DAI servers." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mapr_tickets_directory", + "output": "mapr tickets directory refers to Path to the directory which will be use to save MapR tickets when MapR multi-user mode is enabled. This is applicable only when enable_mapr_multi_user_mode is set to true. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mapr_tickets_directory", + "output": "mapr tickets directory refers to Path to the directory which will be use to save MapR tickets when MapR multi-user mode is enabled. This is applicable only when enable_mapr_multi_user_mode is set to true. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mapr tickets directory", + "output": "mapr tickets directory refers to Path to the directory which will be use to save MapR tickets when MapR multi-user mode is enabled. This is applicable only when enable_mapr_multi_user_mode is set to true. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "mapr tickets directory refers to Path to the directory which will be use to save MapR tickets when MapR multi-user mode is enabled. This is applicable only when enable_mapr_multi_user_mode is set to true. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting mapr_tickets_directory", + "output": "mapr tickets directory refers to Path to the directory which will be use to save MapR tickets when MapR multi-user mode is enabled. This is applicable only when enable_mapr_multi_user_mode is set to true. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting mapr_tickets_directory", + "output": "mapr tickets directory refers to Path to the directory which will be use to save MapR tickets when MapR multi-user mode is enabled. This is applicable only when enable_mapr_multi_user_mode is set to true. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mapr_tickets_duration_minutes", + "output": "mapr tickets duration minutes refers to MapR tickets duration in minutes, if set to -1, it will use the default value (not specified in maprlogin command), otherwise will be the specified configuration value but no less than one day. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mapr_tickets_duration_minutes", + "output": "mapr tickets duration minutes refers to MapR tickets duration in minutes, if set to -1, it will use the default value (not specified in maprlogin command), otherwise will be the specified configuration value but no less than one day. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "mapr tickets duration minutes", + "output": "mapr tickets duration minutes refers to MapR tickets duration in minutes, if set to -1, it will use the default value (not specified in maprlogin command), otherwise will be the specified configuration value but no less than one day. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "mapr tickets duration minutes refers to MapR tickets duration in minutes, if set to -1, it will use the default value (not specified in maprlogin command), otherwise will be the specified configuration value but no less than one day. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting mapr_tickets_duration_minutes", + "output": "mapr tickets duration minutes refers to MapR tickets duration in minutes, if set to -1, it will use the default value (not specified in maprlogin command), otherwise will be the specified configuration value but no less than one day. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting mapr_tickets_duration_minutes", + "output": "mapr tickets duration minutes refers to MapR tickets duration in minutes, if set to -1, it will use the default value (not specified in maprlogin command), otherwise will be the specified configuration value but no less than one day. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "remove_uploads_temp_files_server_start", + "output": "remove uploads temp files server start refers to Whether at server start to delete all temporary uploaded files, left over from failed uploads. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "remove_uploads_temp_files_server_start", + "output": "remove uploads temp files server start refers to Whether at server start to delete all temporary uploaded files, left over from failed uploads. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "remove uploads temp files server start", + "output": "remove uploads temp files server start refers to Whether at server start to delete all temporary uploaded files, left over from failed uploads. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "remove uploads temp files server start refers to Whether at server start to delete all temporary uploaded files, left over from failed uploads. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting remove_uploads_temp_files_server_start", + "output": "remove uploads temp files server start refers to Whether at server start to delete all temporary uploaded files, left over from failed uploads. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting remove_uploads_temp_files_server_start", + "output": "remove uploads temp files server start refers to Whether at server start to delete all temporary uploaded files, left over from failed uploads. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "remove_temp_files_server_start", + "output": "remove temp files server start refers to Whether to run through entire data directory and remove all temporary files. Can lead to slow start-up time if have large number (much greater than 100) of experiments. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "remove_temp_files_server_start", + "output": "remove temp files server start refers to Whether to run through entire data directory and remove all temporary files. Can lead to slow start-up time if have large number (much greater than 100) of experiments. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "remove temp files server start", + "output": "remove temp files server start refers to Whether to run through entire data directory and remove all temporary files. Can lead to slow start-up time if have large number (much greater than 100) of experiments. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "remove temp files server start refers to Whether to run through entire data directory and remove all temporary files. Can lead to slow start-up time if have large number (much greater than 100) of experiments. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting remove_temp_files_server_start", + "output": "remove temp files server start refers to Whether to run through entire data directory and remove all temporary files. Can lead to slow start-up time if have large number (much greater than 100) of experiments. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting remove_temp_files_server_start", + "output": "remove temp files server start refers to Whether to run through entire data directory and remove all temporary files. Can lead to slow start-up time if have large number (much greater than 100) of experiments. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "remove_temp_files_aborted_experiments", + "output": "remove temp files aborted experiments refers to Whether to delete temporary files after experiment is aborted/cancelled. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "remove_temp_files_aborted_experiments", + "output": "remove temp files aborted experiments refers to Whether to delete temporary files after experiment is aborted/cancelled. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "remove temp files aborted experiments", + "output": "remove temp files aborted experiments refers to Whether to delete temporary files after experiment is aborted/cancelled. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "remove temp files aborted experiments refers to Whether to delete temporary files after experiment is aborted/cancelled. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting remove_temp_files_aborted_experiments", + "output": "remove temp files aborted experiments refers to Whether to delete temporary files after experiment is aborted/cancelled. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting remove_temp_files_aborted_experiments", + "output": "remove temp files aborted experiments refers to Whether to delete temporary files after experiment is aborted/cancelled. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "usage_stats_opt_in", + "output": "usage stats opt in refers to Whether to opt in to usage statistics and bug reporting" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "usage_stats_opt_in", + "output": "usage stats opt in refers to Whether to opt in to usage statistics and bug reporting" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "usage stats opt in", + "output": "usage stats opt in refers to Whether to opt in to usage statistics and bug reporting" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "usage stats opt in refers to Whether to opt in to usage statistics and bug reporting" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting usage_stats_opt_in", + "output": "usage stats opt in refers to Whether to opt in to usage statistics and bug reporting" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting usage_stats_opt_in", + "output": "usage stats opt in refers to Whether to opt in to usage statistics and bug reporting" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "core_site_xml_path", + "output": "core site xml path refers to Configurations for a HDFS data source Path of hdfs coresite.xml core_site_xml_path is deprecated, please use hdfs_config_path" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "core_site_xml_path", + "output": "core site xml path refers to Configurations for a HDFS data source Path of hdfs coresite.xml core_site_xml_path is deprecated, please use hdfs_config_path" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "core site xml path", + "output": "core site xml path refers to Configurations for a HDFS data source Path of hdfs coresite.xml core_site_xml_path is deprecated, please use hdfs_config_path" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "core site xml path refers to Configurations for a HDFS data source Path of hdfs coresite.xml core_site_xml_path is deprecated, please use hdfs_config_path" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting core_site_xml_path", + "output": "core site xml path refers to Configurations for a HDFS data source Path of hdfs coresite.xml core_site_xml_path is deprecated, please use hdfs_config_path" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting core_site_xml_path", + "output": "core site xml path refers to Configurations for a HDFS data source Path of hdfs coresite.xml core_site_xml_path is deprecated, please use hdfs_config_path" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "hdfs_config_path", + "output": "hdfs config path refers to (Required) HDFS config folder path. Can contain multiple config files." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "hdfs_config_path", + "output": "hdfs config path refers to (Required) HDFS config folder path. Can contain multiple config files." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "hdfs config path", + "output": "hdfs config path refers to (Required) HDFS config folder path. Can contain multiple config files." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "hdfs config path refers to (Required) HDFS config folder path. Can contain multiple config files." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting hdfs_config_path", + "output": "hdfs config path refers to (Required) HDFS config folder path. Can contain multiple config files." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting hdfs_config_path", + "output": "hdfs config path refers to (Required) HDFS config folder path. Can contain multiple config files." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "key_tab_path", + "output": "key tab path refers to Path of the principal key tab file. Required when hdfs_auth_type='principal'. key_tab_path is deprecated, please use hdfs_keytab_path " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "key_tab_path", + "output": "key tab path refers to Path of the principal key tab file. Required when hdfs_auth_type='principal'. key_tab_path is deprecated, please use hdfs_keytab_path " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "key tab path", + "output": "key tab path refers to Path of the principal key tab file. Required when hdfs_auth_type='principal'. key_tab_path is deprecated, please use hdfs_keytab_path " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "key tab path refers to Path of the principal key tab file. Required when hdfs_auth_type='principal'. key_tab_path is deprecated, please use hdfs_keytab_path " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting key_tab_path", + "output": "key tab path refers to Path of the principal key tab file. Required when hdfs_auth_type='principal'. key_tab_path is deprecated, please use hdfs_keytab_path " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting key_tab_path", + "output": "key tab path refers to Path of the principal key tab file. Required when hdfs_auth_type='principal'. key_tab_path is deprecated, please use hdfs_keytab_path " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "hdfs_keytab_path", + "output": "hdfs keytab path refers to Path of the principal key tab file. Required when hdfs_auth_type='principal'. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "hdfs_keytab_path", + "output": "hdfs keytab path refers to Path of the principal key tab file. Required when hdfs_auth_type='principal'. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "hdfs keytab path", + "output": "hdfs keytab path refers to Path of the principal key tab file. Required when hdfs_auth_type='principal'. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "hdfs keytab path refers to Path of the principal key tab file. Required when hdfs_auth_type='principal'. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting hdfs_keytab_path", + "output": "hdfs keytab path refers to Path of the principal key tab file. Required when hdfs_auth_type='principal'. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting hdfs_keytab_path", + "output": "hdfs keytab path refers to Path of the principal key tab file. Required when hdfs_auth_type='principal'. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "preview_cache_upon_server_exit", + "output": "preview cache upon server exit refers to Whether to delete preview cache on server exit" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "preview_cache_upon_server_exit", + "output": "preview cache upon server exit refers to Whether to delete preview cache on server exit" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "preview cache upon server exit", + "output": "preview cache upon server exit refers to Whether to delete preview cache on server exit" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "preview cache upon server exit refers to Whether to delete preview cache on server exit" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting preview_cache_upon_server_exit", + "output": "preview cache upon server exit refers to Whether to delete preview cache on server exit" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting preview_cache_upon_server_exit", + "output": "preview cache upon server exit refers to Whether to delete preview cache on server exit" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "all_tasks_visible_to_users", + "output": "all tasks visible to users refers to When this setting is enabled, any user can see all tasks running in the system, including their owner and an identification key. If this setting is turned off, user can see only their own tasks." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "all_tasks_visible_to_users", + "output": "all tasks visible to users refers to Enable users to see all tasks in task manager: When this setting is enabled, any user can see all tasks running in the system, including their owner and an identification key. If this setting is turned off, user can see only their own tasks." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "all tasks visible to users", + "output": "all tasks visible to users refers to Enable users to see all tasks in task manager: When this setting is enabled, any user can see all tasks running in the system, including their owner and an identification key. If this setting is turned off, user can see only their own tasks." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Enable users to see all tasks in task manager: ", + "output": "all tasks visible to users refers to Enable users to see all tasks in task manager: When this setting is enabled, any user can see all tasks running in the system, including their owner and an identification key. If this setting is turned off, user can see only their own tasks." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting all_tasks_visible_to_users", + "output": "all tasks visible to users refers to When this setting is enabled, any user can see all tasks running in the system, including their owner and an identification key. If this setting is turned off, user can see only their own tasks." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting all_tasks_visible_to_users", + "output": "all tasks visible to users refers to Enable users to see all tasks in task manager: When this setting is enabled, any user can see all tasks running in the system, including their owner and an identification key. If this setting is turned off, user can see only their own tasks." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable_health_api", + "output": "enable health api refers to When enabled, server exposes Health API at /apis/health/v1, which provides system overview and utilization statistics" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable_health_api", + "output": "enable health api refers to Enable Health API: When enabled, server exposes Health API at /apis/health/v1, which provides system overview and utilization statistics" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable health api", + "output": "enable health api refers to Enable Health API: When enabled, server exposes Health API at /apis/health/v1, which provides system overview and utilization statistics" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "Enable Health API: ", + "output": "enable health api refers to Enable Health API: When enabled, server exposes Health API at /apis/health/v1, which provides system overview and utilization statistics" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting enable_health_api", + "output": "enable health api refers to When enabled, server exposes Health API at /apis/health/v1, which provides system overview and utilization statistics" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting enable_health_api", + "output": "enable health api refers to Enable Health API: When enabled, server exposes Health API at /apis/health/v1, which provides system overview and utilization statistics" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "listeners_inherit_env_variables", + "output": "listeners inherit env variables refers to When enabled, the notification scripts will inherit the parent's process (DriverlessAI) environment variables. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "listeners_inherit_env_variables", + "output": "listeners inherit env variables refers to When enabled, the notification scripts will inherit the parent's process (DriverlessAI) environment variables. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "listeners inherit env variables", + "output": "listeners inherit env variables refers to When enabled, the notification scripts will inherit the parent's process (DriverlessAI) environment variables. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "listeners inherit env variables refers to When enabled, the notification scripts will inherit the parent's process (DriverlessAI) environment variables. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting listeners_inherit_env_variables", + "output": "listeners inherit env variables refers to When enabled, the notification scripts will inherit the parent's process (DriverlessAI) environment variables. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting listeners_inherit_env_variables", + "output": "listeners inherit env variables refers to When enabled, the notification scripts will inherit the parent's process (DriverlessAI) environment variables. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "listeners_experiment_start", + "output": "listeners experiment start refers to Notification scripts - the variable points to a location of script which is executed at given event in experiment lifecycle - the script should have executable flag enabled - use of absolute path is suggested The on experiment start notification script location" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "listeners_experiment_start", + "output": "listeners experiment start refers to Notification scripts - the variable points to a location of script which is executed at given event in experiment lifecycle - the script should have executable flag enabled - use of absolute path is suggested The on experiment start notification script location" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "listeners experiment start", + "output": "listeners experiment start refers to Notification scripts - the variable points to a location of script which is executed at given event in experiment lifecycle - the script should have executable flag enabled - use of absolute path is suggested The on experiment start notification script location" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "listeners experiment start refers to Notification scripts - the variable points to a location of script which is executed at given event in experiment lifecycle - the script should have executable flag enabled - use of absolute path is suggested The on experiment start notification script location" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting listeners_experiment_start", + "output": "listeners experiment start refers to Notification scripts - the variable points to a location of script which is executed at given event in experiment lifecycle - the script should have executable flag enabled - use of absolute path is suggested The on experiment start notification script location" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting listeners_experiment_start", + "output": "listeners experiment start refers to Notification scripts - the variable points to a location of script which is executed at given event in experiment lifecycle - the script should have executable flag enabled - use of absolute path is suggested The on experiment start notification script location" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "listeners_experiment_done", + "output": "listeners experiment done refers to The on experiment finished notification script location" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "listeners_experiment_done", + "output": "listeners experiment done refers to The on experiment finished notification script location" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "listeners experiment done", + "output": "listeners experiment done refers to The on experiment finished notification script location" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "listeners experiment done refers to The on experiment finished notification script location" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting listeners_experiment_done", + "output": "listeners experiment done refers to The on experiment finished notification script location" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting listeners_experiment_done", + "output": "listeners experiment done refers to The on experiment finished notification script location" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "listeners_mojo_done", + "output": "listeners mojo done refers to Notification script triggered when building of MOJO pipeline for experiment is finished. The value should be an absolute path to executable script. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "listeners_mojo_done", + "output": "listeners mojo done refers to Notification script triggered when building of MOJO pipeline for experiment is finished. The value should be an absolute path to executable script. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "listeners mojo done", + "output": "listeners mojo done refers to Notification script triggered when building of MOJO pipeline for experiment is finished. The value should be an absolute path to executable script. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "listeners mojo done refers to Notification script triggered when building of MOJO pipeline for experiment is finished. The value should be an absolute path to executable script. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting listeners_mojo_done", + "output": "listeners mojo done refers to Notification script triggered when building of MOJO pipeline for experiment is finished. The value should be an absolute path to executable script. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting listeners_mojo_done", + "output": "listeners mojo done refers to Notification script triggered when building of MOJO pipeline for experiment is finished. The value should be an absolute path to executable script. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "listeners_autodoc_done", + "output": "listeners autodoc done refers to Notification script triggered when rendering of AutoDoc for experiment is finished. The value should be an absolute path to executable script. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "listeners_autodoc_done", + "output": "listeners autodoc done refers to Notification script triggered when rendering of AutoDoc for experiment is finished. The value should be an absolute path to executable script. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "listeners autodoc done", + "output": "listeners autodoc done refers to Notification script triggered when rendering of AutoDoc for experiment is finished. The value should be an absolute path to executable script. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "listeners autodoc done refers to Notification script triggered when rendering of AutoDoc for experiment is finished. The value should be an absolute path to executable script. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting listeners_autodoc_done", + "output": "listeners autodoc done refers to Notification script triggered when rendering of AutoDoc for experiment is finished. The value should be an absolute path to executable script. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting listeners_autodoc_done", + "output": "listeners autodoc done refers to Notification script triggered when rendering of AutoDoc for experiment is finished. The value should be an absolute path to executable script. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "listeners_scoring_pipeline_done", + "output": "listeners scoring pipeline done refers to Notification script triggered when building of python scoring pipeline for experiment is finished. The value should be an absolute path to executable script. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "listeners_scoring_pipeline_done", + "output": "listeners scoring pipeline done refers to Notification script triggered when building of python scoring pipeline for experiment is finished. The value should be an absolute path to executable script. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "listeners scoring pipeline done", + "output": "listeners scoring pipeline done refers to Notification script triggered when building of python scoring pipeline for experiment is finished. The value should be an absolute path to executable script. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "listeners scoring pipeline done refers to Notification script triggered when building of python scoring pipeline for experiment is finished. The value should be an absolute path to executable script. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting listeners_scoring_pipeline_done", + "output": "listeners scoring pipeline done refers to Notification script triggered when building of python scoring pipeline for experiment is finished. The value should be an absolute path to executable script. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting listeners_scoring_pipeline_done", + "output": "listeners scoring pipeline done refers to Notification script triggered when building of python scoring pipeline for experiment is finished. The value should be an absolute path to executable script. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "listeners_experiment_artifacts_done", + "output": "listeners experiment artifacts done refers to Notification script triggered when experiment and all its artifacts selected at the beginning of experiment are finished building. The value should be an absolute path to executable script. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "listeners_experiment_artifacts_done", + "output": "listeners experiment artifacts done refers to Notification script triggered when experiment and all its artifacts selected at the beginning of experiment are finished building. The value should be an absolute path to executable script. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "listeners experiment artifacts done", + "output": "listeners experiment artifacts done refers to Notification script triggered when experiment and all its artifacts selected at the beginning of experiment are finished building. The value should be an absolute path to executable script. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "listeners experiment artifacts done refers to Notification script triggered when experiment and all its artifacts selected at the beginning of experiment are finished building. The value should be an absolute path to executable script. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting listeners_experiment_artifacts_done", + "output": "listeners experiment artifacts done refers to Notification script triggered when experiment and all its artifacts selected at the beginning of experiment are finished building. The value should be an absolute path to executable script. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting listeners_experiment_artifacts_done", + "output": "listeners experiment artifacts done refers to Notification script triggered when experiment and all its artifacts selected at the beginning of experiment are finished building. The value should be an absolute path to executable script. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable_quick_benchmark", + "output": "enable quick benchmark refers to Whether to run quick performance benchmark at start of application" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable_quick_benchmark", + "output": "enable quick benchmark refers to Whether to run quick performance benchmark at start of application" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable quick benchmark", + "output": "enable quick benchmark refers to Whether to run quick performance benchmark at start of application" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "enable quick benchmark refers to Whether to run quick performance benchmark at start of application" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting enable_quick_benchmark", + "output": "enable quick benchmark refers to Whether to run quick performance benchmark at start of application" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting enable_quick_benchmark", + "output": "enable quick benchmark refers to Whether to run quick performance benchmark at start of application" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable_extended_benchmark", + "output": "enable extended benchmark refers to Whether to run extended performance benchmark at start of application" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable_extended_benchmark", + "output": "enable extended benchmark refers to Whether to run extended performance benchmark at start of application" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable extended benchmark", + "output": "enable extended benchmark refers to Whether to run extended performance benchmark at start of application" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "enable extended benchmark refers to Whether to run extended performance benchmark at start of application" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting enable_extended_benchmark", + "output": "enable extended benchmark refers to Whether to run extended performance benchmark at start of application" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting enable_extended_benchmark", + "output": "enable extended benchmark refers to Whether to run extended performance benchmark at start of application" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "extended_benchmark_scale_num_rows", + "output": "extended benchmark scale num rows refers to Scaling factor for number of rows for extended performance benchmark. For rigorous performance benchmarking, values of 1 or larger are recommended." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "extended_benchmark_scale_num_rows", + "output": "extended benchmark scale num rows refers to Scaling factor for number of rows for extended performance benchmark. For rigorous performance benchmarking, values of 1 or larger are recommended." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "extended benchmark scale num rows", + "output": "extended benchmark scale num rows refers to Scaling factor for number of rows for extended performance benchmark. For rigorous performance benchmarking, values of 1 or larger are recommended." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "extended benchmark scale num rows refers to Scaling factor for number of rows for extended performance benchmark. For rigorous performance benchmarking, values of 1 or larger are recommended." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting extended_benchmark_scale_num_rows", + "output": "extended benchmark scale num rows refers to Scaling factor for number of rows for extended performance benchmark. For rigorous performance benchmarking, values of 1 or larger are recommended." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting extended_benchmark_scale_num_rows", + "output": "extended benchmark scale num rows refers to Scaling factor for number of rows for extended performance benchmark. For rigorous performance benchmarking, values of 1 or larger are recommended." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "extended_benchmark_num_cols", + "output": "extended benchmark num cols refers to Number of columns for extended performance benchmark." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "extended_benchmark_num_cols", + "output": "extended benchmark num cols refers to Number of columns for extended performance benchmark." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "extended benchmark num cols", + "output": "extended benchmark num cols refers to Number of columns for extended performance benchmark." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "extended benchmark num cols refers to Number of columns for extended performance benchmark." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting extended_benchmark_num_cols", + "output": "extended benchmark num cols refers to Number of columns for extended performance benchmark." + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting extended_benchmark_num_cols", + "output": "extended benchmark num cols refers to Number of columns for extended performance benchmark." + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "benchmark_memory_timeout", + "output": "benchmark memory timeout refers to Seconds to allow for testing memory bandwidth by generating numpy frames" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "benchmark_memory_timeout", + "output": "benchmark memory timeout refers to Seconds to allow for testing memory bandwidth by generating numpy frames" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "benchmark memory timeout", + "output": "benchmark memory timeout refers to Seconds to allow for testing memory bandwidth by generating numpy frames" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "benchmark memory timeout refers to Seconds to allow for testing memory bandwidth by generating numpy frames" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting benchmark_memory_timeout", + "output": "benchmark memory timeout refers to Seconds to allow for testing memory bandwidth by generating numpy frames" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting benchmark_memory_timeout", + "output": "benchmark memory timeout refers to Seconds to allow for testing memory bandwidth by generating numpy frames" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "benchmark_memory_vm_fraction", + "output": "benchmark memory vm fraction refers to Maximum portion of vm total to use for numpy memory benchmark" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "benchmark_memory_vm_fraction", + "output": "benchmark memory vm fraction refers to Maximum portion of vm total to use for numpy memory benchmark" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "benchmark memory vm fraction", + "output": "benchmark memory vm fraction refers to Maximum portion of vm total to use for numpy memory benchmark" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "benchmark memory vm fraction refers to Maximum portion of vm total to use for numpy memory benchmark" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting benchmark_memory_vm_fraction", + "output": "benchmark memory vm fraction refers to Maximum portion of vm total to use for numpy memory benchmark" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting benchmark_memory_vm_fraction", + "output": "benchmark memory vm fraction refers to Maximum portion of vm total to use for numpy memory benchmark" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "benchmark_memory_max_cols", + "output": "benchmark memory max cols refers to Maximum number of columns to use for numpy memory benchmark" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "benchmark_memory_max_cols", + "output": "benchmark memory max cols refers to Maximum number of columns to use for numpy memory benchmark" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "benchmark memory max cols", + "output": "benchmark memory max cols refers to Maximum number of columns to use for numpy memory benchmark" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "benchmark memory max cols refers to Maximum number of columns to use for numpy memory benchmark" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting benchmark_memory_max_cols", + "output": "benchmark memory max cols refers to Maximum number of columns to use for numpy memory benchmark" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting benchmark_memory_max_cols", + "output": "benchmark memory max cols refers to Maximum number of columns to use for numpy memory benchmark" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable_startup_checks", + "output": "enable startup checks refers to Whether to run quick startup checks at start of application" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable_startup_checks", + "output": "enable startup checks refers to Whether to run quick startup checks at start of application" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "enable startup checks", + "output": "enable startup checks refers to Whether to run quick startup checks at start of application" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "enable startup checks refers to Whether to run quick startup checks at start of application" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting enable_startup_checks", + "output": "enable startup checks refers to Whether to run quick startup checks at start of application" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting enable_startup_checks", + "output": "enable startup checks refers to Whether to run quick startup checks at start of application" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "application_id", + "output": "application id refers to Application ID override, which should uniquely identify the instance" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "application_id", + "output": "application id refers to Application ID override, which should uniquely identify the instance" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "application id", + "output": "application id refers to Application ID override, which should uniquely identify the instance" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "application id refers to Application ID override, which should uniquely identify the instance" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting application_id", + "output": "application id refers to Application ID override, which should uniquely identify the instance" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting application_id", + "output": "application id refers to Application ID override, which should uniquely identify the instance" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "db_backend", + "output": "db backend refers to Specifies the DB backend which application uses. Possible options are: - *legacy* - Uses legacy SQLite with entity JSON blobs - *sqlite* - Uses relational SQLite separate entity tables" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "db_backend", + "output": "db backend refers to Specifies the DB backend which application uses. Possible options are: - *legacy* - Uses legacy SQLite with entity JSON blobs - *sqlite* - Uses relational SQLite separate entity tables" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "db backend", + "output": "db backend refers to Specifies the DB backend which application uses. Possible options are: - *legacy* - Uses legacy SQLite with entity JSON blobs - *sqlite* - Uses relational SQLite separate entity tables" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "db backend refers to Specifies the DB backend which application uses. Possible options are: - *legacy* - Uses legacy SQLite with entity JSON blobs - *sqlite* - Uses relational SQLite separate entity tables" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting db_backend", + "output": "db backend refers to Specifies the DB backend which application uses. Possible options are: - *legacy* - Uses legacy SQLite with entity JSON blobs - *sqlite* - Uses relational SQLite separate entity tables" + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting db_backend", + "output": "db backend refers to Specifies the DB backend which application uses. Possible options are: - *legacy* - Uses legacy SQLite with entity JSON blobs - *sqlite* - Uses relational SQLite separate entity tables" + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "main_server_fork_timeout", + "output": "main server fork timeout refers to After how many seconds to abort MLI recipe execution plan or recipe compatibility checks. Blocks main server from all activities, so long timeout is not desired, esp. in case of hanging processes, while a short timeout can too often lead to abortions on busy system. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "main_server_fork_timeout", + "output": "main server fork timeout refers to After how many seconds to abort MLI recipe execution plan or recipe compatibility checks. Blocks main server from all activities, so long timeout is not desired, esp. in case of hanging processes, while a short timeout can too often lead to abortions on busy system. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "main server fork timeout", + "output": "main server fork timeout refers to After how many seconds to abort MLI recipe execution plan or recipe compatibility checks. Blocks main server from all activities, so long timeout is not desired, esp. in case of hanging processes, while a short timeout can too often lead to abortions on busy system. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "main server fork timeout refers to After how many seconds to abort MLI recipe execution plan or recipe compatibility checks. Blocks main server from all activities, so long timeout is not desired, esp. in case of hanging processes, while a short timeout can too often lead to abortions on busy system. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting main_server_fork_timeout", + "output": "main server fork timeout refers to After how many seconds to abort MLI recipe execution plan or recipe compatibility checks. Blocks main server from all activities, so long timeout is not desired, esp. in case of hanging processes, while a short timeout can too often lead to abortions on busy system. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting main_server_fork_timeout", + "output": "main server fork timeout refers to After how many seconds to abort MLI recipe execution plan or recipe compatibility checks. Blocks main server from all activities, so long timeout is not desired, esp. in case of hanging processes, while a short timeout can too often lead to abortions on busy system. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "audit_log_retention_period", + "output": "audit log retention period refers to After how many days the audit log records are removed. Set equal to 0 to disable removal of old records. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "audit_log_retention_period", + "output": "audit log retention period refers to After how many days the audit log records are removed. Set equal to 0 to disable removal of old records. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "audit log retention period", + "output": "audit log retention period refers to After how many days the audit log records are removed. Set equal to 0 to disable removal of old records. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "audit log retention period refers to After how many days the audit log records are removed. Set equal to 0 to disable removal of old records. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting audit_log_retention_period", + "output": "audit log retention period refers to After how many days the audit log records are removed. Set equal to 0 to disable removal of old records. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting audit_log_retention_period", + "output": "audit log retention period refers to After how many days the audit log records are removed. Set equal to 0 to disable removal of old records. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "dataset_tmp_upload_file_retention_time_min", + "output": "dataset tmp upload file retention time min refers to Time to wait after performing a cleanup of temporary files for in-browser dataset upload. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "dataset_tmp_upload_file_retention_time_min", + "output": "dataset tmp upload file retention time min refers to Time to wait after performing a cleanup of temporary files for in-browser dataset upload. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "dataset tmp upload file retention time min", + "output": "dataset tmp upload file retention time min refers to Time to wait after performing a cleanup of temporary files for in-browser dataset upload. " + }, + { + "prompt_type": "human_bot", + "instruction": "Explain the following expert setting for Driverless AI", + "input": "", + "output": "dataset tmp upload file retention time min refers to Time to wait after performing a cleanup of temporary files for in-browser dataset upload. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a short explanation of the expert setting dataset_tmp_upload_file_retention_time_min", + "output": "dataset tmp upload file retention time min refers to Time to wait after performing a cleanup of temporary files for in-browser dataset upload. " + }, + { + "prompt_type": "human_bot", + "instruction": "Provide a detailed explanation of the expert setting dataset_tmp_upload_file_retention_time_min", + "output": "dataset tmp upload file retention time min refers to Time to wait after performing a cleanup of temporary files for in-browser dataset upload. " + }, + { + "output": "Monotonicity Constraints\nMonotonicity can be enforced for the feature engineering pipeline, the\nfitted model(s), or the entire modeling pipeline. Monotonicity constraints enforce a monotonic relationship between a\nspecified feature and the target prediction. For example, given a model\ntrained to predict housing prices, you may want to enforce that the\nmodel predicts higher housing prices with increasing lot size and lower\nhousing prices with increasing neighborhood crime rate. When monotonicity constraints are enabled, Driverless AI automatically\ndetermines if monotonicity is present and then enforces it through all\nor part of the modeling pipelines. Depending on the level of correlation\nbetween a feature and the target, Driverless AI assigns positive,\nnegative, or no monotonicity constraints. Specifically, monotonicity is\nenforced if the absolute correlation is greater than a specific\nthreshold (default 0.1). To build an entire monotonic gbm modeling pipeline with a single click,\nuser can select the monotonic_gbm recipe from\nthe Experiment settings of the expert panel.", + "prompt_type": "plain" + }, + { + "output": "For details see\nMonotonic GBM in pipeline building recipe\nunder experiment expert settings. For more granular control, over thresholds, manual override of\nmonotonicity constraints etc, refer to\nthese settings under feature settings of the expert\npanel of an experiment. To build monotonic fitted models, ensure that:\n- The Interpretability setting for the experiment must be greater than\n or equal to the\n monotonicity_constraints_interpretability_switch ,\n that has a default value of 7). So Interpretability setting for the\n experiment and/or monotonicity_constraints_interpretability_switch\n can be toggled to achieve this. - The final model must be linear (for example, GLMModel) or otherwise\n support monotonic constraints (LightGBMModel, XGBoostGBMModel,\n XGBoostDartModel or Decision Tree models). These can be set to 'ON'\n from the Model settings of the expert panel. The ensemble level can\n be toggled by setting fixed_ensemble_level \n level.", + "prompt_type": "plain" + }, + { + "output": "- Drop features with low correlation to the target. See\n monotonicity constraints drop low correlation features . - For regression case, make sure the\n target_transformer is monotonic like 'identity'\n or 'identity_noclip'. This can be toggled under experiment settings\n of the expert panel. and for monotonic feature engineering:\n- Disable features engineered from multi-feature interaction i.e set\n max_feature_interaction_depth to 1\n in feature settings under expert settings panel. - Disable numerical to categorical feature transformations i.e set\n num_as_cat to False in the feature settings under\n expert settings panel. - For numeric features, allow only monotonic transformations i.e set\n included_transformers to\n ['OriginalTransformer'] only under recipe settings of the expert\n panel. The following table lists an example of settings to create a monotonic\nDriverless AI modeling pipeline.", + "prompt_type": "plain" + }, + { + "output": "Before You Begin\n\ndata-sampling missing-values-handling imputation-in-dai reproducibility\ntransformations internal-validation ensemble-learning\nmonotonicity-constraints leakage-shift-detection vi imbalanced-modeling\nwide gpu-dai queuing dai-free-space ts_bestpractices tips-n-tricks\nsimple_configs", + "prompt_type": "plain" + }, + { + "output": "Driverless AI Logs\nDriverless AI provides several logs that can be viewed and/or retrieved\nwhen performing different tasks. All content in the logs are labeled\nwith INFO, DATA, WARNING and ERROR tags. Driverless AI Modeling and MLI\nexperiments also provide access to anonymized logs that do not contain\ncontents from the DATA tag. - logs-available\n- logs-sending\n- Obtaining System Log Files \nAvailable Log Files\nThe following is a list of available Driverless AI log files. - dai_log\n - exp_log\n - mli_log\n - auto_viz_log\n - h2oai_server_log\n - audit_log\ndai.log\ndai.log are part of Driverless AI System Logs . They are\ngenerated as part of stderr/stdout and are useful for debugging or\ndetailed support in case of issues. If needed, the verbosity or logging\nlevel of this log file can be toggled using config.toml settings. Admin access to Driverless AI installation location is required to\nobtain these logs. See System Logs section on steps to\nobtain them.", + "prompt_type": "plain" + }, + { + "output": "It helps with understanding the run details and\ndebugging experiment related issues. The log file naming convention is\nh2oai_experiment_{experiment_ID}.log and the content is labeled with\nINFO, DATA, WARNING and ERROR tags. Users can download these log directly from the experiment page of the\nDriverless AI GUI. For an experiment in progress, logs can be accessed\nfrom under the Log tab to the right. For completed experiments, the logs\nreside with the summary zip file. []\nThe zip also contains an anonymized version of experiment logs that does\nnot report any information relating to the data used in the experiment\n(i.e no DATA label), such as column names and individual data points. And a details folder that comprises of error stack traces that may help\nwith debugging. []\nMLI Logs\nThese logs cover the model interpretation \nprocess runs for surrogate models and explainer/recipe runs for\nDriverless AI Machine Learning Interpretability jobs. MLI surrogate model run logs can be downloaded from the Action button on\nthe MLI GUI page.", + "prompt_type": "plain" + }, + { + "output": "It contains three files, the\nstdout/stderr log for full MLI process run, an anonymized copy (i.e no\nDATA label) of the same log file and surrogate model run logs. []\nThe explainer or recipe logs are accessible from the task run button. []\nMLI uses H2O_3 (Java backend) to build surrogate models. Admins can\naccess the h2o_3 server logs using System Logs commands in\ncase of issues with starting the MLI server. The /tmp folder of DAI\ncontains h2o_mli.log, that keeps track of rolling mli logs and are also\nadmin accessible. Auto Visualization Logs\nThis log store run information for automatic data visualization in\nDriverless AI. Users can obtain them from the Autoviz page of DAI GUI. []\nAdmins can access the viz-server logs using System Logs \ncommands in case of issues with starting of Viz server. The failure logs\nrelating to data visualization are also available from the /tmp folder\nas h2oai_server.log and requires admin access. h2oai_server Log\nThese logs register all issues relating to datasets like Adding Datasets\nor viewing Dataset Details or Auto Visualization of datasets.", + "prompt_type": "plain" + }, + { + "output": "An anonymized copy (i.e no\nDATA label) of this log file is also available in the same folder. Accessing h2oai_server log requires admin access to Driverless AI. Audit Logs\nAudit logs register all user interactions with the Driverless AI system\nlike login/logout, downloads/uploads, experiment creation/deletion etc. Admins can access them from /tmp folder of Driverless AI. Sending Logs to support@H2O.ai\nThis section describes what logs to send in the event of failures when\nrunning Driverless AI. All content in the logs are labeled with INFO,\nDATA, WARNING and ERROR tags. Driverless AI Modeling and MLI experiments\nalso provides access to anonymized logs that do not contain contents\nfrom the DATA tag. - Driverless AI starting Failures: This requires inspection of\n System Logs like dai.log file. - Dataset Failures: A simple error stack trace is displayed on the GUI\n in case of datasets failures like Adding Datasets or viewing Dataset\n Details and detailed logs are registered as\n h2oai_server logs that requires admin access.", + "prompt_type": "plain" + }, + { + "output": "A full detailed stacktrace is also available in the\n h2oai_server.log file in ./tmp folder of DAI that\n requires admin access. - Experiment Failures: User needs to send the\n experiment logs . In some cases, for in depth analysis,\n support@h2o.ai may request dai.logs that requires admin\n access to retrieve. - MLI Failures: See MLI Logs for details. - Custom Recipes Failures: If a Custom Recipe is producing errors, the\n entire zip file obtained by clicking on the Download Summary & Logs\n button on the experiment page, can be sent for\n troubleshooting. Note that these files may contain information that\n is not anonymized. System Logs\nSystem logs include useful information about Driverless AI. Driverless\nAI solution needs following set of services to work-\n- Driverless AI server: This is a python code, that internally starts\n a local worker to start a web server for UI pages (DAI GUI) and runs\n the actual experiment work.", + "prompt_type": "plain" + }, + { + "output": "- procsy: This handles the communication between the DAI server\n (python code) and other binaries or java jar files, like data\n connectors or the vis-sever. - vis-server: This is needed for Auto visualization of Datasets, DAI\n sends a request to procsy, which in turn will query the vis-server\n to make the computations necessary for autoviz. - redis-server: It is used as a communication bus between the backend\n (DAI) server and the local worker or remote workers (in case of DAI\n multinode set up). - minio: This is needed in multinode setup, and is used for data\n storage, for example, when running an experiment on a remote node,\n the remote worker gets the experiment configuration details via\n redis, and the actual dataset, is pushed to minio and the remote\n worker is instructed to fetch it. When experiment finishes, the\n model is sent back to the main server from the remote node via minio\n (upload and download). Each of these services creates a log file.", + "prompt_type": "plain" + }, + { + "output": "Transforming datasets\nWhen a training dataset is used in an experiment, Driverless AI\ntransforms the data into an improved, feature engineered dataset. (For\nmore information on the transformations that are provided in Driverless\nAI, see Transformations.) But what happens when new rows are added to\nyour dataset? In this case, you can specify to transform the new dataset\nafter adding it to Driverless AI, and the same transformations that\nDriverless AI applied to the original dataset are applied to these new\nrows. The following sections describe the two options for transforming\ndatasets that are available in Driverless AI:\n- transform_dataset\n- fit_and_transform_dataset\nNotes:\n- To avoid leakage, the result of transformations should not be used\n for training unless enable_target_encoding='off'. []\nTransform dataset\nThe following steps describe how to transform a dataset with the\nTransform dataset option, which transforms the dataset without fitting. Notes:\n- This transformation uses the experiment's full model pipeline,\n except instead of generating predictions, it generates the\n transformation before the model is applied.", + "prompt_type": "plain" + }, + { + "output": "1. Select the dataset that you want to transform. 2. Select the columns you want to include in the transformation frame. To confirm your selection, click Done. The dataset transformation\n job is added to the pending jobs queue. 3. When the transformed dataset is ready, click Download transformed\n dataset. Specify a filename for the dataset, then click the Download\n button to download the transformed dataset. Fit and transform dataset\nThe following steps describe how to transform a dataset with the Fit &\nTransform dataset option, which both fits and transforms the dataset. Notes:\n- This functionality is not available for Time Series experiments when\n time_series_recipe=true. (That is, when the lag-based recipe is\n used.) - This functionality provides the pipeline (engineered features) of\n the best individual model of the experiment, not the full pipeline\n of all models and folds. 1. On the completed experiment page for the original dataset, click\n Model Actions -> Fit & Transform Dataset.", + "prompt_type": "plain" + }, + { + "output": "Select the new training dataset that you want to transform. Note\n that this must have the same number of columns as the original\n dataset. 3. Select one of the following options:\n - Default: The validation split ratio is set to 0. - With validation dataset: Specify a validation dataset to use\n with this dataset. The validation split ratio is set to 0.2. - With training data split: Split the training data. The\n validation split ratio is set to 0.2. Note: To ensure that the transformed dataset respects the row\n order, choose a validation dataset instead of splitting the\n training data. Splitting the training data results in a shuffling\n of the row order. 4. Optionally specify a test dataset. If specified, then the output\n also includes the final test dataset for final scoring. 5. Click Launch Transformation. []\nThe following datasets are made available for download upon successful\ncompletion:\n- Training dataset (not for cross validation)\n- Validation dataset for parameter tuning\n- Test dataset for final scoring.", + "prompt_type": "plain" + }, + { + "output": "Native Installation\n\nThis section provides instructions for installing Driverless AI in\nnative Linux environments.\n\ninstall/x86-64\n\nFor instructions on installing the Driverless AI Docker image, refer to\ndocker_installs.", + "prompt_type": "plain" + }, + { + "output": "HDFS Setup\n\nDriverless AI lets you explore HDFS data sources from within the\nDriverless AI application. This section provides instructions for\nconfiguring Driverless AI to work with HDFS.\n\nNote: Depending on your Docker install version, use either the", + "prompt_type": "plain" + }, + { + "output": "docker run\n--runtime=nvidia(>= Docker 19.03) ornvidia-docker(< Docker 19.03) command when starting the Driverless AI Docker image. Usedocker\nversionto check which version of Docker you are using. Description of Configuration Attributes --------------------------------------- -hdfs_config_path(Required): The location the HDFS config folder path. This folder can contain multiple config files. -hdfs_auth_type(Required): Specifies the HDFS authentication. Available values are: -principal: Authenticate with HDFS with a principal user. -keytab: Authenticate with a keytab (recommended). If running DAI as a service, then the Kerberos keytab needs to be owned by the DAI user. -keytabimpersonation: Login with impersonation using a keytab. -noauth: No authentication needed. -key_tab_path: The path of the principal key tab file. This is required whenhdfs_auth_type='principal'. -hdfs_app_principal_user: The Kerberos application principal user. This is required whenhdfs_auth_type='keytab'.", + "prompt_type": "plain" + }, + { + "output": "Separate each argument with spaces. --Djava.security.krb5.conf--Dsun.security.krb5.debug--Dlog4j.configuration-hdfs_app_classpath: The HDFS classpath. -hdfs_app_supported_schemes: The list of DFS schemas that is used to check whether a valid input to the connector has been established. For example: :: hdfs_app_supported_schemes = ['hdfs://', 'maprfs://', 'custom://'] The following are the default values for this option. Additional schemas can be supported by adding values that are not selected by default to the list. -hdfs://-maprfs://-swift://-hdfs_max_files_listed: Specifies the maximum number of files that are viewable in the connector UI. Defaults to 100 files. To view more files, increase the default value. -hdfs_init_path: Specifies the starting HDFS path displayed in the UI of the HDFS browser. -enabled_file_systems: The file systems you want to enable. This must be configured in order for data connectors to function properly. Example 1: Enable HDFS with No Authentication --------------------------------------------- .. container:: tabs .. group-tab:: Docker Image Installs This example enables the HDFS data connector and disables HDFS authentication.", + "prompt_type": "plain" + }, + { + "output": "This lets you reference data stored in HDFS directly using name node address, for example:hdfs://name.node/datasets/iris.csv. .. code:: bash nvidia-docker run \\ --pid=host \\ --init \\ --rm \\ --shm-size=256m \\ --add-host name.node:172.16.2.186 \\ -e DRIVERLESS_AI_ENABLED_FILE_SYSTEMS=\"file,hdfs\" \\ -e DRIVERLESS_AI_HDFS_AUTH_TYPE='noauth' \\ -e DRIVERLESS_AI_PROCSY_PORT=8080 \\ -p 12345:12345 \\ -v /etc/passwd:/etc/passwd:ro \\ -v /etc/group:/etc/group:ro \\ -v /tmp/dtmp/:/tmp \\ -v /tmp/dlog/:/log \\ -v /tmp/dlicense/:/license \\ -v /tmp/ddata/:/data \\ -u $(id -u):$(id -g) \\ h2oai/dai-ubi8-x86_64:|tag| .. container:: group-tab Docker Image with the config.toml This example shows how to configure HDFS options in the config.toml file, and then specify that file when starting Driverless AI in Docker. Note that this example enables HDFS with no authentication.", + "prompt_type": "plain" + }, + { + "output": "Configure the Driverless AI config.toml file. Set the following configuration options. Note that the procsy port, which defaults to 12347, also has to be changed. .. -enabled_file_systems\n= \"file, upload, hdfs\"-procsy_ip = \"127.0.0.1\"-procsy_port =\n80802. Mount the config.toml file into the Docker container. .. .. code:: bash nvidia-docker run \\ --pid=host \\ --init \\ --rm \\ --shm-size=256m \\ --add-host name.node:172.16.2.186 \\ -e DRIVERLESS_AI_CONFIG_FILE=/path/in/docker/config.toml \\ -p 12345:12345 \\ -v /local/path/to/config.toml:/path/in/docker/config.toml \\ -v /etc/passwd:/etc/passwd:ro \\ -v /etc/group:/etc/group:ro \\ -v /tmp/dtmp/:/tmp \\ -v /tmp/dlog/:/log \\ -v /tmp/dlicense/:/license \\ -v /tmp/ddata/:/data \\ -u $(id -u):$(id -g) \\ h2oai/dai-ubi8-x86_64:|tag| .. container:: group-tab Native Installs This example enables the HDFS data connector and disables HDFS authentication in the config.toml file.", + "prompt_type": "plain" + }, + { + "output": "1. Export the Driverless AI config.toml file or add it to ~/.bashrc. For example: .. :: # DEB and RPM export DRIVERLESS_AI_CONFIG_FILE=\"/etc/dai/config.toml\" # TAR SH export DRIVERLESS_AI_CONFIG_FILE=\"/path/to/your/unpacked/dai/directory/config.toml\" 2. Specify the following configuration options in the config.toml file. Note that the procsy port, which defaults to 12347, also has to be changed. .. :: # IP address and port of procsy process. procsy_ip = \"127.0.0.1\" procsy_port = 8080 # File System Support # upload : standard upload feature # file : local file system/server file system # hdfs : Hadoop file system, remember to configure the HDFS config folder path and keytab below # dtap : Blue Data Tap file system, remember to configure the DTap section below # s3 : Amazon S3, optionally configure secret and access key below # gcs : Google Cloud Storage, remember to configure gcs_path_to_service_account_json below # gbq : Google Big Query, remember to configure gcs_path_to_service_account_json below # minio : Minio Cloud Storage, remember to configure secret and access key below # snow : Snowflake Data Warehouse, remember to configure Snowflake credentials below (account name, username, password) # kdb : KDB+ Time Series Database, remember to configure KDB credentials below (hostname and port, optionally: username, password, classpath, and jvm_args) # azrbs : Azure Blob Storage, remember to configure Azure credentials below (account name, account key) # jdbc: JDBC Connector, remember to configure JDBC below.", + "prompt_type": "plain" + }, + { + "output": "(hive_app_configs) # recipe_url: load custom recipe from URL # recipe_file: load custom recipe from local file system enabled_file_systems = \"file, hdfs\" 3. Save the changes when you are done, then stop/restart Driverless AI. Example 2: Enable HDFS with Keytab-Based Authentication ------------------------------------------------------- **Notes**: - If using Kerberos Authentication, then the time on the Driverless AI server must be in sync with Kerberos server. If the time difference between clients and DCs are 5 minutes or higher, there will be Kerberos failures. - If running Driverless AI as a service, then the Kerberos keytab needs to be owned by the Driverless AI user; otherwise Driverless AI will not be able to read/access the Keytab and will result in a fallback to simple authentication and, hence, fail. .. container:: tabs .. group-tab:: Docker Image Installs This example: - Places keytabs in the/tmp/dtmpfolder on your machine and provides the file path as described below.", + "prompt_type": "plain" + }, + { + "output": ".. code:: bash nvidia-docker run \\ --pid=host \\ --init \\ --rm \\ --shm-size=256m \\ -e DRIVERLESS_AI_ENABLED_FILE_SYSTEMS=\"file,hdfs\" \\ -e DRIVERLESS_AI_HDFS_AUTH_TYPE='keytab' \\ -e DRIVERLESS_AI_KEY_TAB_PATH='tmp/<>' \\ -e DRIVERLESS_AI_HDFS_APP_PRINCIPAL_USER='<>' \\ -e DRIVERLESS_AI_PROCSY_PORT=8080 \\ -p 12345:12345 \\ -v /etc/passwd:/etc/passwd:ro \\ -v /etc/group:/etc/group:ro \\ -v /tmp/dtmp/:/tmp \\ -v /tmp/dlog/:/log \\ -v /tmp/dlicense/:/license \\ -v /tmp/ddata/:/data \\ -u $(id -u):$(id -g) \\ h2oai/dai-ubi8-x86_64:|tag| .. container:: group-tab Docker Image with the config.toml This example: - Places keytabs in the/tmp/dtmpfolder on your machine and provides the file path as described below. - Configures the optionhdfs_app_prinicpal_userto reference a user for whom the keytab was created (usually in the form of user@realm).", + "prompt_type": "plain" + }, + { + "output": "Configure the Driverless AI config.toml file. Set the following configuration options. Note that the procsy port, which defaults to 12347, also has to be changed. .. -enabled_file_systems\n= \"file, upload, hdfs\"-procsy_ip = \"127.0.0.1\"-procsy_port =\n8080-hdfs_auth_type = \"keytab\"-key_tab_path =\n\"/tmp/\"-hdfs_app_principal_user =\n\"\"2. Mount the config.toml file into the Docker container. .. .. code:: bash nvidia-docker run \\ --pid=host \\ --init \\ --rm \\ --shm-size=256m \\ --add-host name.node:172.16.2.186 \\ -e DRIVERLESS_AI_CONFIG_FILE=/path/in/docker/config.toml \\ -p 12345:12345 \\ -v /local/path/to/config.toml:/path/in/docker/config.toml \\ -v /etc/passwd:/etc/passwd:ro \\ -v /etc/group:/etc/group:ro \\ -v /tmp/dtmp/:/tmp \\ -v /tmp/dlog/:/log \\ -v /tmp/dlicense/:/license \\ -v /tmp/ddata/:/data \\ -u $(id -u):$(id -g) \\ h2oai/dai-ubi8-x86_64:|tag| .. container:: group-tab Native Installs This example: - Places keytabs in the/tmp/dtmpfolder on your machine and provides the file path as described below.", + "prompt_type": "plain" + }, + { + "output": "1. Export the Driverless AI config.toml file or add it to ~/.bashrc. For example: .. :: # DEB and RPM export DRIVERLESS_AI_CONFIG_FILE=\"/etc/dai/config.toml\" # TAR SH export DRIVERLESS_AI_CONFIG_FILE=\"/path/to/your/unpacked/dai/directory/config.toml\" 2. Specify the following configuration options in the config.toml file. .. :: # IP address and port of procsy process. procsy_ip = \"127.0.0.1\" procsy_port = 8080 # File System Support # upload : standard upload feature # file : local file system/server file system # hdfs : Hadoop file system, remember to configure the HDFS config folder path and keytab below # dtap : Blue Data Tap file system, remember to configure the DTap section below # s3 : Amazon S3, optionally configure secret and access key below # gcs : Google Cloud Storage, remember to configure gcs_path_to_service_account_json below # gbq : Google Big Query, remember to configure gcs_path_to_service_account_json below # minio : Minio Cloud Storage, remember to configure secret and access key below # snow : Snowflake Data Warehouse, remember to configure Snowflake credentials below (account name, username, password) # kdb : KDB+ Time Series Database, remember to configure KDB credentials below (hostname and port, optionally: username, password, classpath, and jvm_args) # azrbs : Azure Blob Storage, remember to configure Azure credentials below (account name, account key) # jdbc: JDBC Connector, remember to configure JDBC below.", + "prompt_type": "plain" + }, + { + "output": "(hive_app_configs) # recipe_url: load custom recipe from URL # recipe_file: load custom recipe from local file system enabled_file_systems = \"file, hdfs\" # HDFS connector # Auth type can be Principal/keytab/keytabPrincipal # Specify HDFS Auth Type, allowed options are: # noauth : No authentication needed # principal : Authenticate with HDFS with a principal user # keytab : Authenticate with a Key tab (recommended) # keytabimpersonation : Login with impersonation using a keytab hdfs_auth_type = \"keytab\" # Path of the principal key tab file key_tab_path = \"/tmp/\" # Kerberos app principal user (recommended) hdfs_app_principal_user = \"\" 3. Save the changes when you are done, then stop/restart Driverless AI. Example 3: Enable HDFS with Keytab-Based Impersonation ------------------------------------------------------ **Notes**: - If using Kerberos, be sure that the Driverless AI time is synched with the Kerberos server.", + "prompt_type": "plain" + }, + { + "output": "- Logins are case sensitive when keytab-based impersonation is configured. .. container:: tabs .. group-tab:: Docker Image Installs The example: - Sets the authentication type tokeytabimpersonation. - Places keytabs in the/tmp/dtmpfolder on your machine and provides the file path as described below. - Configures theDRIVERLESS_AI_HDFS_APP_PRINCIPAL_USERvariable, which references a user for whom the keytab was created (usually in the form of user@realm). .. code:: bash nvidia-docker run \\ --pid=host \\ --init \\ --rm \\ --shm-size=256m \\ -e DRIVERLESS_AI_ENABLED_FILE_SYSTEMS=\"file,hdfs\" \\ -e DRIVERLESS_AI_HDFS_AUTH_TYPE='keytabimpersonation' \\ -e DRIVERLESS_AI_KEY_TAB_PATH='/tmp/<>' \\ -e DRIVERLESS_AI_HDFS_APP_PRINCIPAL_USER='<>' \\ -e DRIVERLESS_AI_PROCSY_PORT=8080 \\ -p 12345:12345 \\ -v /etc/passwd:/etc/passwd:ro \\ -v /etc/group:/etc/group:ro \\ -v /tmp/dlog/:/log \\ -v /tmp/dlicense/:/license \\ -v /tmp/ddata/:/data \\ -u $(id -u):$(id -g) \\ h2oai/dai-ubi8-x86_64:|tag| .. container:: group-tab Docker Image with the config.toml This example: - Sets the authentication type tokeytabimpersonation.", + "prompt_type": "plain" + }, + { + "output": "- Configures thehdfs_app_principal_uservariable, which references a user for whom the keytab was created (usually in the form of user@realm). 1. Configure the Driverless AI config.toml file. Set the following configuration options. Note that the procsy port, which defaults to 12347, also has to be changed. .. -enabled_file_systems\n= \"file, upload, hdfs\"-procsy_ip = \"127.0.0.1\"-procsy_port =\n8080-hdfs_auth_type = \"keytabimpersonation\"-key_tab_path =\n\"/tmp/\"-hdfs_app_principal_user =\n\"\"2. Mount the config.toml file into the Docker container. .. .. code:: bash nvidia-docker run \\ --pid=host \\ --init \\ --rm \\ --shm-size=256m \\ --add-host name.node:172.16.2.186 \\ -e DRIVERLESS_AI_CONFIG_FILE=/path/in/docker/config.toml \\ -p 12345:12345 \\ -v /local/path/to/config.toml:/path/in/docker/config.toml \\ -v /etc/passwd:/etc/passwd:ro \\ -v /etc/group:/etc/group:ro \\ -v /tmp/dtmp/:/tmp \\ -v /tmp/dlog/:/log \\ -v /tmp/dlicense/:/license \\ -v /tmp/ddata/:/data \\ -u $(id -u):$(id -g) \\ h2oai/dai-ubi8-x86_64:|tag| .. container:: group-tab Native Installs This example: - Sets the authentication type tokeytabimpersonation.", + "prompt_type": "plain" + }, + { + "output": "- Configures thehdfs_app_principal_uservariable, which references a user for whom the keytab was created (usually in the form of user@realm). 1. Export the Driverless AI config.toml file or add it to ~/.bashrc. For example: .. :: # DEB and RPM export DRIVERLESS_AI_CONFIG_FILE=\"/etc/dai/config.toml\" # TAR SH export DRIVERLESS_AI_CONFIG_FILE=\"/path/to/your/unpacked/dai/directory/config.toml\" 2. Specify the following configuration options in the config.toml file. .. :: # IP address and port of procsy process. procsy_ip = \"127.0.0.1\" procsy_port = 8080 # File System Support # upload : standard upload feature # file : local file system/server file system # hdfs : Hadoop file system, remember to configure the HDFS config folder path and keytab below # dtap : Blue Data Tap file system, remember to configure the DTap section below # s3 : Amazon S3, optionally configure secret and access key below # gcs : Google Cloud Storage, remember to configure gcs_path_to_service_account_json below # gbq : Google Big Query, remember to configure gcs_path_to_service_account_json below # minio : Minio Cloud Storage, remember to configure secret and access key below # snow : Snowflake Data Warehouse, remember to configure Snowflake credentials below (account name, username, password) # kdb : KDB+ Time Series Database, remember to configure KDB credentials below (hostname and port, optionally: username, password, classpath, and jvm_args) # azrbs : Azure Blob Storage, remember to configure Azure credentials below (account name, account key) # jdbc: JDBC Connector, remember to configure JDBC below.", + "prompt_type": "plain" + }, + { + "output": "(hive_app_configs) # recipe_url: load custom recipe from URL # recipe_file: load custom recipe from local file system enabled_file_systems = \"file, hdfs\" # HDFS connector # Auth type can be Principal/keytab/keytabPrincipal # Specify HDFS Auth Type, allowed options are: # noauth : No authentication needed # principal : Authenticate with HDFS with a principal user # keytab : Authenticate with a Key tab (recommended) # keytabimpersonation : Login with impersonation using a keytab hdfs_auth_type = \"keytabimpersonation\" # Path of the principal key tab file key_tab_path = \"/tmp/\" # Kerberos app principal user (recommended) hdfs_app_principal_user = \"\" 3. Save the changes when you are done, then stop/restart Driverless AI. Specifying a Hadoop Platform ---------------------------- The following example shows how to build an H2O-3 Hadoop image and run Driverless AI.", + "prompt_type": "plain" + }, + { + "output": "Change theH2O_TARGETto specify a different platform. 1. Clone and then build H2O-3 for CDH 6.0. .. .. code:: bash git clone https://github.com/h2oai/h2o-3.git cd h2o-3 ./gradlew clean build -x test export H2O_TARGET=cdh6.0 export BUILD_HADOOP=true ./gradlew clean build -x test 2. Start H2O. .. .. code:: bash docker run -it --rm \\ -v `pwd`:`pwd` \\ -w `pwd` \\ --entrypoint bash \\ --network=host \\ -p 8020:8020 \\ docker.h2o.ai/cdh-6-w-hive \\ -c 'sudo -E startup.sh && \\ source /envs/h2o_env_python3.8/bin/activate && \\ hadoop jar h2o-hadoop-3/h2o-cdh6.0-assembly/build/libs/h2odriver.jar -libjars \"$(cat /opt/hive-jars/hive-libjars)\" -n 1 -mapperXmx 2g -baseport 54445 -notify h2o_one_node -ea -disown && \\ export CLOUD_IP=localhost && \\ export CLOUD_PORT=54445 && \\ make -f scripts/jenkins/Makefile.jenkins test-hadoop-smoke; \\ bash' 3.", + "prompt_type": "plain" + }, + { + "output": "Key Features\nBelow are some of the key features available in Driverless AI. Flexibility of Data and Deployment\nDriverless AI works across a variety of data sources, including Hadoop\nHDFS, Amazon S3, and more. Driverless AI can be deployed everywhere,\nincluding all clouds (Microsoft Azure, AWS, and Google Cloud),\non-premises, and can run on machines with only CPUs or machines with\nCPUs and GPUs. NVIDIA GPU Acceleration\nDriverless AI is optimized to take advantage of GPU acceleration to\nachieve up to 40X speedups for automatic machine learning. It includes\nmulti-GPU algorithms for XGBoost, GLM, K-Means, and more. GPUs allow for\nthousands of iterations of model features and optimizations and give\nsignificant speedups for use cases involving images and/or text. For\nmore information, see gpu_in_dai. Automatic Data Visualization\nFor datasets, Driverless AI automatically selects data plots based on\nthe most relevant data statistics, generates visualizations, and creates\ndata plots that are most relevant from a statistical perspective based\non the most relevant data statistics.", + "prompt_type": "plain" + }, + { + "output": "They are also useful for understanding the composition\nof very large datasets and for seeing trends or even possible issues,\nsuch as large numbers of missing values or significant outliers that\ncould impact modeling results. For more information, see\nVisualizing Datasets . Automatic Feature Engineering\nFeature engineering is the secret weapon that advanced data scientists\nuse to extract the most accurate results from algorithms. H2O Driverless\nAI employs a library of algorithms and feature transformations to\nautomatically engineer new, high-value features for a given dataset. (See transformations for more information.) Included in the interface is\na variable importance chart that shows the significance of original and\nnewly engineered features. Automatic Model Documentation\nTo explain models to business users and regulators, data scientists and\ndata engineers must document the data, algorithms, and processes used to\ncreate machine learning models. Driverless AI provides an AutoDoc for\neach experiment, relieving the user from the time-consuming task of\ndocumenting and summarizing their workflow used when building machine\nlearning models.", + "prompt_type": "plain" + }, + { + "output": "With this capability in Driverless AI, practitioners can\nfocus more on drawing actionable insights from the models and save weeks\nor even months in development, validation, and deployment. Driverless AI also provides a number of autodoc_ configuration options,\ngiving users even more control over the output of the AutoDoc. (Refer to\nthe sample-configtoml topic for information about these configuration\noptions.) Click here to download and view a sample experiment\nreport in Word format. Time Series Forecasting\nTime series forecasting is one of the biggest challenges for data\nscientists. These models address key use cases, including demand\nforecasting, infrastructure monitoring, and predictive maintenance. Driverless AI delivers superior time series capabilities to optimize for\nalmost any prediction time window. Driverless AI incorporates data from\nnumerous predictors, handles structured character data and\nhigh-cardinality categorical variables, and handles gaps in time series\ndata and other missing values.", + "prompt_type": "plain" + }, + { + "output": "NLP with TensorFlow and PyTorch\nText data can contain critical information to inform better predictions. Driverless AI automatically converts text strings into features using\npowerful techniques like TFIDF and Embeddings. With TensorFlow and\nPyTorch, Driverless AI can process large text blocks and build models\nusing all the available data to solve business problems like sentiment\nanalysis, document classification, and content tagging. The Driverless\nAI platform has the ability to support both standalone text and text\nwith other columns as predictive features. For more information, see\nnlp-in-dai. Image Processing with TensorFlow\nDriverless AI can be used to gain insight from digital images. It\nsupports the use of both standalone images and images together with\nother data types as predictive features. For more information, see\nimage-processing-in-dai. Machine Learning Interpretability (MLI)\nDriverless AI provides robust interpretability of machine learning\nmodels to explain modeling results in a human-readable format.", + "prompt_type": "plain" + }, + { + "output": "A number of charts are generated automatically (depending on experiment\ntype), including K-LIME, Shapley, Variable Importance, Decision Tree\nSurrogate, Partial Dependence, Individual Conditional Expectation,\nSensitivity Analysis, NLP Tokens, NLP LOCO, and more. Additionally, you\ncan download a CSV of LIME and Shapley reasons codes from the MLI page. For more information, see interpreting_a_model. Automatic Reason Codes\nIn regulated industries, an explanation is often required for\nsignificant decisions relating to customers (for example, credit\ndenial). Reason codes show the key positive and negative factors in a\nmodel's scoring decision in a simple language. Reasons codes are also\nuseful in other industries, such as healthcare, because they can provide\ninsights into model decisions that can drive additional testing or\ninvestigation. For more information, see mli-explanations. Custom Recipe Support\nDriverless AI lets you import custom recipes for MLI algorithms, feature\nengineering (transformers), scorers, and configuration.", + "prompt_type": "plain" + }, + { + "output": "This lets you have greater influence over the Driverless AI\nAutomatic ML pipeline and gives you control over the optimization\nchoices that Driverless AI makes. For more information, see\ncustom-recipes. Automatic Scoring Pipelines\nFor completed experiments, Driverless AI automatically generates both\nPython scoring pipelines and new ultra-low-latency automatic scoring\npipelines (MOJO) for deploying the model to production. The new\nautomatic scoring pipeline is a unique technology that deploys all\nfeature engineering and the winning machine learning model in highly\noptimized, low-latency, production-ready Java or C++ code that can be\ndeployed anywhere. For more information, see Scoring_Pipeline. Experiment Setup Wizard\nThe Driverless AI Experiment Setup Wizard makes it simple for you to set\nup a Driverless AI experiment and ensure that the experiment's settings\nare optimally configured for your specific use case. The Experiment\nSetup Wizard helps you learn about your data and lets you provide\ninformation about your use case that is used to determine the\nexperiment's settings.", + "prompt_type": "plain" + }, + { + "output": "Introduction to H2O Driverless AI\nH2O Driverless AI is a high-performance, GPU-enabled, client-server\napplication for the rapid development and deployment of state-of-the-art\npredictive analytics models. It reads tabular data from various sources\nand automates data visualization, grand-master level automatic feature\nengineering, model validation (overfitting and leakage prevention),\nmodel parameter tuning, model interpretability, and model deployment. H2O Driverless AI is currently targeting common regression, binomial\nclassification, and multinomial classification applications, including\nloss-given-default, probability of default, customer churn, campaign\nresponse, fraud detection, anti-money-laundering, and predictive asset\nmaintenance models. It also handles time-series problems for individual\nor grouped time-series, such as weekly sales predictions per store and\ndepartment, with time-causal feature engineering and validation schemes. Driverless can also handle image and text data(NLP) use cases.", + "prompt_type": "plain" + }, + { + "output": "Visualizing Datasets\nPerform one of the following steps to visualize a dataset:\n- On the Datasets page, select the [Click for Actions] button beside\n the dataset that you want to view, and then click Visualize from the\n submenu that appears. - Click the Autoviz top menu link to go to the Visualizations list\n page, click the New Visualization button, then select or import the\n dataset that you want to visualize. The Visualization page shows all available graphs for the selected\ndataset. Note that the graphs on the Visualization page can vary based\non the information in your dataset. You can also view and download logs\nthat were generated during the visualization. Autoviz Recommendations\nFor some cases, Autoviz suggests certain recommended transformations to\nthe columns of the dataset. These recommendations can be directly applied to the experiment. This is\ndone internally by using the\nautoviz recommendation transformer . The following is a complete list of available graphs from Driverless AI\nAutoviz.", + "prompt_type": "plain" + }, + { + "output": "All possible scatterplots based on\npairs of features (variables) are examined for correlations. The\ndisplayed plots are ranked according to the correlation. Some of these\nplots may not look like textbook examples of correlation. The only\ncriterion is that they have a large value of squared Pearson's r\n(greater than .95). When modeling with these variables, you may want to\nleave out variables that are perfectly correlated with others. Note that points in the scatterplot can have different sizes. Because\n Driverless AI aggregates the data and does not display all points, the\n bigger the point is, the bigger number of exemplars (aggregated\n points) the plot covers. Spikey Histograms\nSpikey histograms are histograms with huge spikes. This often indicates\nan inordinate number of single values (usually zeros) or highly similar\nvalues. The measure of \"spikeyness\" is a bin frequency that is ten times\nthe average frequency of all the bins. You should be careful when\nmodeling (particularly regression models) with spikey variables.", + "prompt_type": "plain" + }, + { + "output": "The robust measure of skewness is derived from Groeneveld, R.A. and\nMeeden, G. (1984), \"Measuring Skewness and Kurtosis.\" The Statistician,\n33, 391-399. Highly skewed variables are often candidates for a\ntransformation (e.g., logging) before use in modeling. The histograms in\nthe output are sorted in descending order of skewness. Varying Boxplots\nVarying boxplots reveal unusual variability in a feature across the\ncategories of a categorical variable. The measure of variability is\ncomputed from a robust one-way analysis of variance (ANOVA). Sufficiently diverse variables are flagged in the ANOVA. A boxplot is a\ngraphical display of the fractiles of a distribution. The center of the\nbox denotes the median, the edges of a box denote the lower and upper\nquartiles, and the ends of the \"whiskers\" denote that range of values. Sometimes outliers occur, in which case the adjacent whisker is\nshortened to the next lower or upper value. For variables (features)\nhaving only a few values, the boxes can be compressed, sometimes into a\nsingle horizontal line at the median.", + "prompt_type": "plain" + }, + { + "output": "Heteroscedasticity is\ncalculated with a Brown-Forsythe test: Brown, M. B. and Forsythe, A. B. (1974), \"Robust tests for equality of variances. Journal of the American\nStatistical Association, 69, 364-367. Plots are ranked according to\ntheir heteroscedasticity values. A boxplot is a graphical display of the\nfractiles of a distribution. The center of the box denotes the median,\nthe edges of a box denote the lower and upper quartiles, and the ends of\nthe \"whiskers\" denote that range of values. Sometimes outliers occur, in\nwhich case the adjacent whisker is shortened to the next lower or upper\nvalue. For variables (features) having only a few values, the boxes can\nbe compressed, sometimes into a single horizontal line at the median. Biplots\nA Biplot is an enhanced scatterplot that uses both points and vectors to\nrepresent structure simultaneously for rows and columns of a data\nmatrix. Rows are represented as points (scores), and columns are\nrepresented as vectors (loadings). The plot is computed from the first\ntwo principal components of the correlation matrix of the variables\n(features).", + "prompt_type": "plain" + }, + { + "output": "And you\nshould look for purple vectors that are well-separated. Overlapping\nvectors can indicate a high degree of correlation between variables. Outliers\nVariables with anomalous or outlying values are displayed as red points\nin a dot plot. Dot plots are constructed using an algorithm in\nWilkinson, L. (1999). \"Dot plots.\" The American Statistician, 53,\n276\u2013281. Not all anomalous points are outliers. Sometimes the algorithm\nwill flag points that lie in an empty region (i.e., they are not near\nany other points). You should inspect outliers to see if they are\nmiscodings or if they are due to some other mistake. Outliers should\nordinarily be eliminated from models only when there is a reasonable\nexplanation for their occurrence. Correlation Graph\nThe correlation network graph is constructed from all pairwise squared\ncorrelations between variables (features). For continuous-continuous\nvariable pairs, the statistic used is the squared Pearson correlation. For continuous-categorical variable pairs, the statistic is based on the\nsquared intraclass correlation (ICC).", + "prompt_type": "plain" + }, + { + "output": "The\nformula is (MSbetween - MSwithin)/(MSbetween + (k - 1)MSwithin), where k\nis the number of categories in the categorical variable. For\ncategorical-categorical pairs, the statistic is computed from Cramer's V\nsquared. If the first variable has k1 categories and the second variable\nhas k2 categories, then a k1 x k2 table is created from the joint\nfrequencies of values. From this table, we compute a chi-square\nstatistic. Cramer's V squared statistic is then (chi-square / n) /\nmin(k1,k2), where n is the total of the joint frequencies in the table. Variables with large values of these respective statistics appear near\neach other in the network diagram. The color scale used for the\nconnecting edges runs from low (blue) to high (red). Variables connected\nby short red edges tend to be highly correlated. Parallel Coordinates Plot\nA Parallel Coordinates Plot is a graph used for comparing multiple\nvariables. Each variable has its own vertical axis in the plot. Each\nprofile connects the values on the axes for a single observation.", + "prompt_type": "plain" + }, + { + "output": "Radar Plot\nA Radar Plot is a two-dimensional graph that is used for comparing\nmultiple variables. Each variable has its own axis that starts from the\ncenter of the graph. The data are standardized on each variable between\n0 and 1 so that values can be compared across variables. Each profile,\nwhich usually appears in the form of a star, connects the values on the\naxes for a single observation. Multivariate outliers are represented by\nred profiles. The Radar Plot is the polar version of the popular\nParallel Coordinates plot. The polar layout enables us to represent more\nvariables in a single plot. Data Heatmap\nThe heatmap graphic is constructed from the transposed data matrix. Rows\nof the heatmap represent variables, and columns represent cases\n(instances). The data are standardized before display so that small\nvalues are yellow and large values are red. The rows and columns are\npermuted via a singular value decomposition (SVD) of the data matrix so\nthat similar rows and similar columns are near each other.", + "prompt_type": "plain" + }, + { + "output": "Also implemented\nare extensions of these three transformers that handle negative values,\nwhich are derived from I.K. Yeo and R.A. Johnson, \u201cA new family of power\ntransformations to improve normality or symmetry.\u201d Biometrika, 87(4),\n(2000). For each transformer, transformations are selected by comparing\nthe robust skewness of the transformed column with the robust skewness\nof the original raw column. When a transformation leads to a relatively\nlow value of skewness, it is recommended. Missing Values Heatmap\nThe missing values heatmap graphic is constructed from the transposed\ndata matrix. Rows of the heatmap represent variables and columns\nrepresent cases (instances). The data are coded into the values 0\n(missing) and 1 (nonmissing). Missing values are colored red and\nnonmissing values are left blank (white). The rows and columns are\npermuted via a singular value decomposition (SVD) of the data matrix so\nthat similar rows and similar columns are near each other. Gaps Histogram\nThe gaps index is computed using an algorithm of Wainer and Schacht\nbased on work by John Tukey.", + "prompt_type": "plain" + }, + { + "output": "Project Workspace\nDriverless AI provides a Project Workspace for managing datasets and\nexperiments related to a specific business problem or use case. Whether\nyou are trying to detect fraud or predict user retention, datasets and\nexperiments can be stored and saved in the individual projects. A\nLeaderboard on the Projects page lets you easily compare performance and\nresults and identify the best solution for your problem. The following sections describe how to create and manage projects. - create-project\n- link-datasets\n- link-experiments\n- experiments-list\nNote: For information on how to export Driverless AI experiments to H2O\nMLOps from the Projects page, see\nhttps://docs.h2o.ai/mlops-release/latest-stable/docs/userguide/using.html#exporting-experiments-from-driverless-ai-into-mlops. Creating a Project Workspace\nTo create a Project Workspace:\n1. Click the Projects option on the top menu. 2. Click New Project. 3. Specify a name for the project and provide a description.", + "prompt_type": "plain" + }, + { + "output": "Click Create Project. This creates an empty Project page. From the Projects page, you can link datasets and/or experiments, run\nnew experiments, and score experiments on a scoring dataset. When you\nlink an existing experiment to a Project, the datasets used for the\nexperiment are automatically linked to the project (if not already\nlinked). Linking Datasets\nAny dataset that has been added to Driverless AI can be linked to a\nproject. In addition, when you link an experiment, the datasets used for\nthat experiment are also automatically linked to the project. To link a dataset:\n1. Click the Link Dataset button, then select the type of dataset you\n want to upload. Choose from Training, Testing, and Validation. 2. Select the dataset(s) that you want to link. 3. (Optional) If there are any completed experiments that are based on\n the selected dataset(s), you can choose to link them as well. 4. (Optional) To filter the list of linked datasets by type, click\n Filter Dataset Type and select the type of dataset you want to view.", + "prompt_type": "plain" + }, + { + "output": "When datasets are linked, the same menu options are available here as on\nthe Datasets page. For more information, refer to Datasets. []\nSelecting Datasets\nIn the Datasets section, you can select a training, validation, or\ntesting dataset. The Experiments section shows experiments in the\nProject that use the selected dataset. Linking Experiments\nExisting experiments can be selected and linked to a Project. Additionally, you can run new experiments or checkpoint existing\nexperiments from this page. Experiments started from the Project page\nare automatically linked to the Project. To link an existing experiment to the project, click Link Experiments\nand select one of the following options:\n- By Selecting Experiments: Select one or more experiments to link to\n the Project. - By Selecting Dataset Used in Experiments: Upload all experiments\n that used the selected dataset as a Training, Testing, or Validation\n dataset. For example, if you select By Selecting Dataset Used in\n Experiments > Training and then select the dataset\n example-dataset.csv, all the experiments that used the\n example-dataset.csv as a training dataset are linked.", + "prompt_type": "plain" + }, + { + "output": "1. Click the New Experiment link to begin a new experiment. 2. Select your training data and optionally your validation and/or\n testing data. 3. Specify your desired experiment settings (refer to\n experiment_settings and expert-settings), and then click Launch\n Experiment. As the experiment is running, it will be listed at the top of the\nExperiments Leaderboard until it is completed. It will also be available\non the Experiments page. Checkpointing Experiments\nWhen experiments are linked to a Project, the same checkpointing options\nfor experiments are available here as on the Experiments page. Refer to\ncheckpointing for more information. []\nExperiments List\nWhen attempting to solve a business problem, a normal workflow will\ninclude running multiple experiments, either with different/new data or\nwith a variety of settings, and the optimal solution can vary for\ndifferent users and/or business problems. For some users, the model with\nthe highest accuracy for validation and test data could be the most\noptimal one.", + "prompt_type": "plain" + }, + { + "output": "For some, it could also mean how\nquickly the model could be trained with acceptable levels of accuracy. The Experiments list allows you to find the best solution for your\nbusiness problem. The list is organized based on experiment name. You can change the\nsorting of experiments by selecting the up/down arrows beside a column\nheading in the experiment menu. Hover over the right menu of an experiment to view additional\ninformation about the experiment, including the problem type, datasets\nused, and the target column. Experiment Scoring\nFinished experiments linked to the project show their validation and\ntest scores. You can also score experiments on other datasets. To do\nthis, you first need to add a dataset by clicking the Link Dataset\nbutton and choosing Testing from the drop-down menu. After the test\ndataset has been added, click the Score on Scoring Data button and\nchoose the experiment(s) that you want to score along with the test\ndataset to be applied. This triggers a diagnostics job, the results of\nwhich are located on the diagnostics page.", + "prompt_type": "plain" + }, + { + "output": "After the scoring process has completed, the\nresult appears in the Score and Scoring Time columns. The Score column\nshows results for the scorer specified by the Show Results for Scorer\npicker. Notes:\n- If an experiment has already been scored on a dataset, Driverless AI\n cannot score it again. The scoring step is deterministic, so for a\n particular test dataset and experiment combination, the score will\n be same regardless of how many times you repeat it. - The test dataset must have all the columns that are expected by the\n various experiments you are scoring it on. However, the columns of\n the test dataset need not be exactly the same as input features\n expected by the experiment. There can be additional columns in the\n test dataset. If these columns were not used for training, they will\n be ignored. This feature gives you the ability to train experiments\n on different training datasets (i.e., having different features),\n and if you have an \"uber test dataset\" that includes all these\n feature columns, then you can use the same dataset to score these\n experiments.", + "prompt_type": "plain" + }, + { + "output": "This\n value shows the total time (in seconds) that it took for calculating\n the experiment scores for all applicable scorers for the experiment\n type. This is valuable to users who need to estimate the runtime\n performance of an experiment. Comparing Experiments\nYou can compare two or three experiments and view side-by-side detailed\ninformation about each. 1. Select either two or three experiments that you want to compare. You\n cannot compare more than three experiments. 2. Click the Compare n Items button. This opens the Compare Experiments page. This page includes the\nexperiment summary and metric plots for each experiment. The metric\nplots vary depending on whether this is a classification or regression\nexperiment. For classification experiments, this page includes:\n - Variable Importance list\n - Confusion Matrix\n - ROC Curve\n - Precision Recall Curve\n - Lift Chart\n - Gains Chart\n - Kolmogorov-Smirnov Chart\nFor regression experiments, this page includes:\n- Variable Importance list\n- Actual vs.", + "prompt_type": "plain" + }, + { + "output": "The datasets and experiments will still be available on\nthe Datasets and Experiments pages. - Unlink a dataset by clicking on the dataset and selecting Unlink\n from the menu. Note: You cannot unlink datasets that are tied to\n experiments in the same project. - Unlink an experiment by selecting the experiment and clicking the\n Unlink Item button. Note that this will not automatically unlink\n datasets that were tied to the experiment. Deleting Projects\nTo delete a project, click the Projects option on the top menu to open\nthe main Projects page. Click the dotted menu the right-most column, and\nthen select Delete. You will be prompted to confirm the deletion. Note that deleting projects does not delete datasets and experiments\nfrom Driverless AI. Any datasets and experiments from deleted projects\nwill still be available on the Datasets and Experiments pages. []\nLeaderboard Wizard: Business value calculator\nFrom the Project page, you can access a business value calculator wizard\nby clicking the Analyze Results button.", + "prompt_type": "plain" + }, + { + "output": "Install the Google Cloud Platform Offering\nThis section describes how to install and start Driverless AI in a\nGoogle Compute environment using the GCP Marketplace. This assumes that\nyou already have a Google Cloud Platform account. If you don't have an\naccount, go to https://console.cloud.google.com/getting-started to\ncreate one. Before You Begin\nIf you are trying GCP for the first time and have just created an\naccount, check your Google Compute Engine (GCE) resource quota limits. By default, GCP allocates a maximum of 8 CPUs and no GPUs. Our default\nrecommendation for launching Driverless AI is 32 CPUs, 120 GB RAM, and 2\nP100 NVIDIA GPUs. You can change these settings to match your quota\nlimit, or you can request more resources from GCP. Refer to\nhttps://cloud.google.com/compute/quotas for more information, including\ninformation on how to check your quota and request additional quota. Installation Procedure\n1. In your browser, log in to the Google Compute Engine Console at\n https://console.cloud.google.com/.", + "prompt_type": "plain" + }, + { + "output": "In the left navigation panel, select Marketplace. 3. On the Marketplace page, search for Driverless and select the H2O.ai\n Driverless AI offering. The following page will display. 4. Click Launch on Compute Engine. (If necessary, refer to Google\n Compute Instance Types for information about machine and GPU types.) 5. A summary page displays when the compute engine is successfully\n deployed. This page includes the instance ID and the username\n (always h2oai) and password that will be required when starting\n Driverless AI. Click on the Instance link to retrieve the external\n IP address for starting Driverless AI. 6. In your browser, go to https://%5BExternal_IP%5D:12345 to start\n Driverless AI. 7. Agree to the Terms and Conditions. 8. Log in to Driverless AI using your user name and password. 9. Optionally enable GCS and Big Query access. Upgrading the Google Cloud Platform Offering\nPerform the following steps to upgrade the Driverless AI Google Platform\noffering.", + "prompt_type": "plain" + }, + { + "output": "NLP in Driverless AI\nThis section describes NLP (text) processing capabilities of Driverless\nAI. The Driverless AI platform has the ability to support both\nstandalone text and text with other column types as predictive features. TensorFlow based and PyTorch Transformer Architectures (for example,\nBERT) are used for Feature Engineering and Model Building. For details, see:\n - NLP Feature Engineering and Modeling \n - NLP Expert Settings \n - NLP Feature Naming Convention \n - nlp-explainers\n - An NLP example in Driverless AI \n - NLP Models to Production \nNote\n- NLP and image use cases in Driverless benefit significantly from\nGPU usage . - To download pretrained NLP models, visit\nhttp://s3.amazonaws.com/artifacts.h2o.ai/releases/ai/h2o/pretrained/bert_models.zip. You can use the pytorch_nlp_pretrained_models_dir configuration option\nto specify a path to pretrained PyTorch NLP models. This can be either a\npath in the local file system (/path/on/server/to/bert_models_folder), a\nURL, or an S3 location (s3://).", + "prompt_type": "plain" + }, + { + "output": "- You can use the Driverless AI Experiment Setup Wizard to guide you\nthrough the process of setting up NLP experiments. For more information,\nsee dai_wizard. NLP Feature Engineering and Modeling\n[]\nPretrained PyTorch Models in Driverless AI\n[]\nThe following NLP recipes are available for a text column. A full list\nof NLP Transformers is available here . - n-gram frequency/TF-IDF followed by Truncated SVD\n - n-gram frequency/TF-IDF followed by Linear/Logistic regression\n - Word embeddings followed by CNN model (TensorFlow)\n - Word embeddings followed by BiGRU model (TensorFlow)\n - Character embeddings followed by CNN model (TensorFlow)\n - BERT/DistilBERT based embeddings for Feature Engineering (PyTorch)\n - Support for multiple Transformer Architectures (eg.BERT) as\n modeling algorithms (PyTorch)\nn-gram\nAn n-gram is a contiguous sequence of n items from a given sample of\ntext or speech. n-gram Frequency\nFrequency-based features represent the count of each word from a given\ntext in the form of vectors.", + "prompt_type": "plain" + }, + { + "output": "For example, a one-gram is equivalent to a single word, a\ntwo-gram is equivalent to two consecutive words paired together, and so\non. Words and n-grams that occur more often will receive a higher\nweightage. The ones that are rare will receive a lower weightage. TF-IDF of n-grams\nFrequency-based features can be multiplied with the inverse document\nfrequency to get term frequency\u2013inverse document frequency (TF-IDF)\nvectors. Doing so also gives importance to the rare terms that occur in\nthe corpus, which may be helpful in certain classification tasks. []\nTruncated SVD Features\nTF-IDF and the frequency of n-grams both result in higher dimensions of\nthe representational vectors. To counteract this, Truncated SVD is\ncommonly used to decompose the vectorized arrays into lower dimensions. []\nLinear Models for TF-IDF Vectors\nLinear models are also available in the Driverless AI NLP recipe. These\ncapture linear dependencies that are crucial to the process of achieving\nhigh accuracy rates and are used as features in the base DAI model.", + "prompt_type": "plain" + }, + { + "output": "Representations are made so that\nwords with similar meanings are placed close to or equidistant from one\nanother. For example, the word \"king\" is closely associated with the\nword \"queen\" in this kind of vector representation. []\nTF-IDF and frequency-based models represent counts and significant word\ninformation, but they lack the semantic context for these words. Word\nembedding techniques are used to make up for this lack of semantic\ninformation. CNN Models for Word Embedding\nAlthough Convolutional Neural Network (CNN) models are primarily used on\nimage-level machine learning tasks, their use case on representing text\nas information has proven to be quite efficient and faster compared to\nRNN models. In Driverless AI, we pass word embeddings as input to CNN\nmodels, which return cross validated predictions that can be used as a\nnew set of features. []\nBi-directional GRU Models for Word Embedding\nRecurrent neural networks, like long short-term memory units (LSTM) and\ngated recurrent units (GRU), are state-of-the-art algorithms for NLP\nproblems.", + "prompt_type": "plain" + }, + { + "output": "For example, in the sentence \"John is walking on the golf course,\" a\nunidirectional model would represent states that represent \"golf\" based\non \"John is walking on,\" but would not represent \"course.\" Using a\nbi-directional model, the representation would also account the later\nrepresentations, giving the model more predictive power. In simple terms, a bi-directional GRU model combines two independent RNN\nmodels into a single model. A GRU architecture provides high speeds and\naccuracy rates similar to a LSTM architecture. As with CNN models, we\npass word embeddings as input to these models, which return cross\nvalidated predictions that can be used as a new set of features. []\nCNN Models for Character Embedding\nFor languages like Japanese and Mandarin Chinese, where characters play\na major role, character level embedding is available as an NLP recipe. In character embedding, each character is represented in the form of\nvectors rather than words. Driverless AI uses character level embedding\nas the input to CNN models and later extracts class probabilities to\nfeed as features for downstream models.", + "prompt_type": "plain" + }, + { + "output": "These models\ncapture the contextual relation between words by using an attention\nmechanism. Unlike directional models that read text sequentially, a\nTransformer-based model reads the entire sequence of text at once,\nallowing it to learn the context of the word based on all of its\nsurrounding words. The embeddings obtained by these models show improved\nresults in comparison to earlier embedding approaches. []\nBERT and DistilBERT models can be used for generating embeddings for any\ntext columns. These pretrained models are used to get embeddings for the\ntext followed by Linear/Logistic Regression to generate features that\ncan then be used for any downstream models in Driverless AI. Refer to\nnlp-settings in the Expert Settings topic for more information on how to\nenable these models for feature engineering. We recommend using GPU(s)\nto leverage the power of these models and accelerate the feature\nengineering process. PyTorch Transformer Architecture Models (eg. BERT) as Modeling\nAlgorithms\nStarting with Driverless AI 1.9 release, the Transformer-based\narchitectures shown in the diagram below is supported as models in\nDriverless AI.", + "prompt_type": "plain" + }, + { + "output": "DistilBERT is a distilled\nversion of BERT that has fewer parameters compared to BERT (40% less)\nand it is faster (60% speedup) while retaining 95% of BERT level\nperformance. The DistilBERT model can be useful when training time and\nmodel size is important. Refer to nlp-settings in the Expert Settings\ntopic for more information on how to enable these models as modeling\nalgorithms. We recommend using GPU(s) to leverage the power of these\nmodels and accelerate the model training time. In addition to these techniques, Driverless AI supports\ncustom NLP recipes using, for example, PyTorch or\nFlair. NLP Feature Naming Convention\nThe naming conventions of the NLP features help to understand the type\nof feature that has been created. The syntax for the feature names is as follows:\n[FEAT TYPE]:[COL]. [TARGET_CLASS]\n- [FEAT TYPE] represents one of the following:\n- [COL] represents the name of the text column. - [TARGET_CLASS] represents the target class for which the model\n predictions are made.", + "prompt_type": "plain" + }, + { + "output": "[]\nNLP Explainers\nThe following is a list of available NLP explainers. For more\ninformation, refer to mli_default_recipes and mli-nlp-plots. - NLP LOCO Explainer: The NLP LOCO plot applies a\n leave-one-covariate-out (LOCO) styled approach to NLP models by\n removing a specific token from all text features in a record and\n predicting local importance without that token. The difference\n between the resulting score and the original score (token included)\n is useful when trying to determine how specific changes to text\n features alter the predictions made by the model. - NLP Partial Dependence Plot Explainer: NLP partial dependence\n (yellow) portrays the average prediction behavior of the Driverless\n AI model when an input text token is left in its respective text and\n not included in its respective text along with +/- 1 standard\n deviation bands. ICE (grey) displays the prediction behavior for an\n individual row of data when an input text token is left in its\n respective text and not included in its respective text.", + "prompt_type": "plain" + }, + { + "output": "- NLP Tokenizer Explainer: NLP tokenizer plot shows both the global\n and local importance values of each token in a corpus (a large and\n structured set of texts). The corpus is automatically generated from\n text features used by Driverless AI models prior to the process of\n tokenization. Local importance values are calculated by using the\n term frequency-inverse document frequency (TF-IDF) as a weighting\n factor for each token in each row. The TF-IDF increases\n proportionally to the number of times a token appears in a given\n document and is offset by the number of documents in the corpus that\n contain the token. - NLP Vectorizer + Linear Model (VLM) Text Feature Importance\n Explainer: NLP Vectorizer + Linear Model (VLM) text feature\n importance uses TF-IDF of individual words as features from a text\n column of interest and builds a linear model (currently GLM) using\n those features and fits it to either the predicted class (binary\n classification) or the continuous prediction (regression) of the\n Driverless AI model.", + "prompt_type": "plain" + }, + { + "output": "Note that by default, this explainer uses\n the first text column based on alphabetical order. NLP Expert Settings\nA number of configurable settings are available for NLP in Driverless\nAI. For more information, refer to nlp-settings in the Expert Settings\ntopic. Also see nlp model and nlp transformer in\npipeline building recipes under experiment\nsettings. []\nAn NLP Example: Sentiment Analysis\nThe following section provides an NLP example. This information is based\non the Automatic Feature Engineering for Text Analytics blog post. A\nsimilar example using the Python Client is available in python_client. This example uses a classic example of sentiment analysis on tweets\nusing the US Airline Sentiment dataset. Note that the sentiment of each\ntweet has been labeled in advance and that our model will be used to\nlabel new tweets. We can split the dataset into training and test\n(80/20) with the random split in Driverless AI. We will use the tweets\nin the \u2018text\u2019 column and the sentiment (positive, negative or neutral)\nin the \u2018airline_sentiment\u2019 column for this demo.", + "prompt_type": "plain" + }, + { + "output": "Similar to other problems in the Driverless AI\nsetup, we need to choose the dataset, and then specify the target column\n(\u2018airline_sentiment\u2019). []\nBecause we don't want to use any other columns in the dataset, we need\nto click on Dropped Cols, and then exclude everything but text as shown\nbelow:\n[]\nNext, we will turn on our TensorFlow NLP recipes. We can go to the\nExpert Settings window, NLP and turn on the following:\nCNN TensorFlow models, BiGRU TensorFlow models, character-based\nTensorFlow models or pretrained PyTorch NLP models. []\nAt this point, we are ready to launch an experiment. Text features will\nbe automatically generated and evaluated during the feature engineering\nprocess. Note that some features such as TextCNN rely on TensorFlow\nmodels. We recommend using GPU(s) to leverage the power of TensorFlow or\nthe PyTorch Transformer models and accelerate the feature engineering\nprocess. []\nOnce the experiment is done, users can make new predictions and download\nthe scoring pipeline just like any other Driverless AI experiments.", + "prompt_type": "plain" + }, + { + "output": "Redis Multinode Training\n\nRedis Multinode training in Driverless AI can be used to run multiple\nexperiments at the same time. It is effective in situations where you\nneed to run and complete many experiments simultaneously in a short\namount of time without having to wait for each individual experiment to\nfinish.\n\nUnderstanding Redis Multinode Training\n\nRedis multinode training uses a load distribution technique in which a\nset of machines (worker nodes) are used to help a main server node\nprocess experiments. These machines can be CPU only or CPU + GPU, with\nexperiments being distributed accordingly.\n\n[]\n\nJobs (experiments) within the multinode setup are organized into a\nqueue . Jobs remain in this queue when no processor is\navailable. When a worker's processor becomes available, it asks the job\nqueue service to assign it a new job. By default, each worker node\nprocesses two jobs at a time (configured with the", + "prompt_type": "plain" + }, + { + "output": "worker_remote_processorsoption in the config.toml file). Each worker can process multiple jobs at the same time, but two workers cannot process the same experiment at the same time. Messaging and data exchange services are also implemented to allow the workers to effectively communicate with the main server node. **Notes**: - Redis multinode training in Driverless AI is currently in a preview stage. If you are interested in using multinode configurations, contact support@h2o.ai. - Redis multinode training requires the transfer of data to several different workers. For example, if an experiment is scheduled to be on a remote worker node, the datasets it is using need to be copied to the worker machine by using the MinIO service. The experiment can take longer to initialize depending on the size of the transferred objects. - The number of jobs that each worker node processes is controlled by theworker_remote_processors`\noption in the config.toml file. - Tasks are not distributed to best fit\nworkers.", + "prompt_type": "plain" + }, + { + "output": "- **A single experiment runs entirely on one machine (or\nnode)**. For this reason, using a large number of commodity-grade\nhardware is not useful in the context of multinode. - For more\ninformation on queuing in Driverless AI, see :ref:`dai-queuing. Requirements\n- Redis\nRedis Multinode Setup Example\nThe following example configures a two-node Redis Multinode Driverless\nAI cluster on AWS EC2 instances using bashtar distribution. This example\ncan be expanded to multiple worker nodes. This example assumes that you\nhave spun up two EC2 instances (Ubuntu 16.04) within the same VPC on\nAWS. VPC Settings\nIn the VPC settings, enable inbound rules to listen to TCP connections\non port 6379 for Redis and 9000 for MinIO. Install Driverless AI Natively\nInstall Driverless AI on the server node. Refer to one of the following\ndocuments for information on how to perform a native install on Linux\nsystems. - linux-deb\n- linux-rpms\n- linux-tarsh\nEdit the Driverless AI config.toml\nAfter Driverless AI is installed, edit the following configuration\noptions in the config.toml file.", + "prompt_type": "plain" + }, + { + "output": "enable_dask_cluster = false`` would not be done.\n\nStart the Driverless AI Server Node\n\n cd |VERSION-dir|-linux-x86_64\n ./run-dai.sh\n\nInstall the Linux deb/rpm/tar package on the EC2 instance to create a\nDriverless AI worker node. After the installation is complete, edit the\nfollowing in the config.toml.\n\n # Redis settings, point to the dai main server's redis server ip address\n redis_ip = \"\"\n\n # Redis settings\n redis_port = 6379\n\n # Redis settings, point to the dai main server's redis server password\n main_server_redis_password = \"\"\n\n # Location of the dai main server's minio server.\n main_server_minio_address = \":9000\"\n\n enable_dask_cluster = false\n\nTo use the full multinode with both redis and dask support, see the\nexample multinode-example, in which case", + "prompt_type": "plain" + }, + { + "output": "enable_dask_cluster = false`` would not be done.\n\nStart the Driverless AI Worker Node\n\n cd |VERSION-dir|-linux-x86_64\n ./run-dai.sh --worker\n\n # Note that when using rpm/deb you can run the following:\n sudo systemctl start dai-worker\n\nOnce the worker node starts, use the Driverless AI server IP to log into\nDriverless AI. Click on Resources > System Info to confirm that the\nnumber of workers is \"2\" if only one worker is used. (By default, each\nworker node processes two jobs at a time. This is configured with the", + "prompt_type": "plain" + }, + { + "output": "worker_remote_processorsoption in the config.toml file.) .. figure:: images/system_info_view.png :alt: .. _multinode-config-attributes: Description of Configuration Attributes --------------------------------------- -worker_mode: Specifies how the long-running tasks are scheduled. Available options include: -multiprocessing: Forks the current process immediately. -singlenode: Shares the task through Redis and needs a worker running. -multinode: Same assinglenode. Also shares the data through MinIO and allows the worker to run on the different machine. -redis_ip: Redis IP address. Defaults to 127.0.0.1 -redis_port: Redis port. Defaults to 6379. -redis_db: Redis database. Each DAI instance running on the Redis server should have unique integer. Defaults to 0. -main_server_redis_password: Main Server Redis password. Defaults to empty string. -local_minio_port: The port that MinIO will listen on. This only takes effect if the current system is a multinode main server.", + "prompt_type": "plain" + }, + { + "output": "check_distribution_shift``\n\nData Distribution Shift Detection\n\nSpecify whether Driverless AI should detect data distribution shifts\nbetween train/valid/test datasets (if provided). When train and test\ndataset differ (or train/valid or valid/test) in terms of distribution\nof data, then a model can be built with high accuracy that tells for\neach row, whether the row is in train or test. Currently, this\ninformation is only presented to the user and not acted upon.\n\nShifted features should either be dropped. Or more meaningful aggregate\nfeatures be created by using them as labels or bins.\n\nAlso see\ndrop_features_distribution_shift_threshold_auc \nand check_distribution_shift_drop .", + "prompt_type": "plain" + }, + { + "output": "check_distribution_shift_drop``\n\nData Distribution Shift Detection Drop of Features\n\nSpecify whether to drop high-shift features. This defaults to Auto. Note\nthat Auto for time series experiments turns this feature off.\n\nAlso see\ndrop_features_distribution_shift_threshold_auc \nand check_distribution_shift .", + "prompt_type": "plain" + }, + { + "output": "drop_features_distribution_shift_threshold_auc``\n\nMax Allowed Feature Shift (AUC) Before Dropping Feature\n\nSpecify the maximum allowed AUC value for a feature before dropping the\nfeature.\n\nWhen train and test dataset differ (or train/valid or valid/test) in\nterms of distribution of data, then a model can be built that tells for\neach row, whether the row is in train or test. This model includes an\nAUC value. If this AUC, GINI, or Spearman correlation of the model is\nabove the specified threshold, then Driverless AI will consider it a\nstrong enough shift to drop those features.\n\nThe default AUC threshold is 0.999.", + "prompt_type": "plain" + }, + { + "output": "check_leakage----------------- .. container:: dropdown **Data Leakage Detection** Specify whether to check for data leakage for each feature. Some of the features may contain over predictive power on the target column. This may affect model generalization. Driverless AI runs a model to determine the predictive power of each feature on the target variable. Then, a simple model is built on each feature with significant variable importance. The models with high AUC (for classification) or R2 score (regression) are reported to the user as potential leak. Note that this option is always disabled if the experiment is a time series experiment. This is set to **Auto** by default. The equivalent config.toml parameter ischeck_leakage`.\nAlso see :ref:`drop_features_leakage_threshold_auc\n", + "prompt_type": "plain" + }, + { + "output": "drop_features_leakage_threshold_auc--------------------------------------- .. container:: dropdown **Data Leakage Detection Dropping AUC/R2 Threshold** If :ref:`Leakage Detection ` is enabled, specify the threshold for dropping features. When the AUC (or R2 for regression), GINI, or Spearman correlation is above this value, the feature is dropped. This value defaults to 0.999. The equivalent config.toml parameter isdrop_features_leakage_threshold_auc``.", + "prompt_type": "plain" + }, + { + "output": "leakage_max_data_size``\n\nMax Rows X Columns for Leakage\n\nSpecify the maximum number of (rows x columns) to trigger sampling for\nleakage checks. This value defaults to 10,000,000.", + "prompt_type": "plain" + }, + { + "output": "max_features_importance``\n\nMax. num. features for variable importance\n\nSpecify the maximum number of features to use and show in importance\ntables. For any interpretability higher than 1, transformed or original\nfeatures with low importance than top max_features_importance features\nare always removed Feature importances of transformed or original\nfeatures correspondingly will be pruned. Higher values can lead to lower\nperformance and larger disk space used for datasets with more than 100k\ncolumns.", + "prompt_type": "plain" + }, + { + "output": "enable_wide_rules--------------------- .. container:: dropdown **Enable Wide Rules** Enable various rules to handle wide datasets( i.e no. of columns > no. of rows). The default value is \"auto\", that will automatically enable the wide rules when detect that number of columns is greater than number of rows. Setting \"on\" forces rules to be enabled regardless of any conditions. Enabling wide data rules sets allmax_cols,max_origcol``, and ``fs_origtomls to large values, and enforces monotonicity to be disabled unlessmonotonicity_constraints_dictis set or default value ofmonotonicity_constraints_interpretability_switch` is changed. It also disables shift detection and data leakage checks. And enables :ref:`Xgboost Random Forest model \n for modeling. To disable wide rules, set enable_wide_rules to \"off\". For mostly or\n entirely numeric datasets, selecting only 'OriginalTransformer' for\n faster speed is recommended (see\n included_transformers ).", + "prompt_type": "plain" + }, + { + "output": "orig_features_fs_report``\n\nReport Permutation Importance on Original Features\n\nSpecify whether Driverless AI reports permutation importance on original\nfeatures (represented as normalized change in the chosen metric) in logs\nand the report file. This is disabled by default.", + "prompt_type": "plain" + }, + { + "output": "max_rows_fs``\n\nMaximum Number of Rows to Perform Permutation-Based Feature Selection\n\nSpecify the maximum number of rows when performing permutation feature\nimportance, reduced by (stratified) random sampling. This value defaults\nto 500,000.", + "prompt_type": "plain" + }, + { + "output": "max_orig_cols_selected``\n\nMax Number of Original Features Used\n\nSpecify the maximum number of columns to be selected from an existing\nset of columns using feature selection. This value defaults to\n10,000000. For categorical columns, the selection is based upon how well\ntarget encoding (or frequency encoding if not available) on categoricals\nand numerics treated as categoricals helps. This is useful to reduce the\nfinal model complexity. First the best [max_orig_cols_selected] are\nfound through feature selection methods and then these features are used\nin feature evolution (to derive other features) and in modelling.", + "prompt_type": "plain" + }, + { + "output": "max_orig_nonnumeric_cols_selected``\n\nMax Number of Original Non-Numeric Features\n\nMaximum number of non-numeric columns selected, above which will do\nfeature selection on all features and avoid treating numerical as\ncategorical same as above (max_orig_numeric_cols_selected) but for\ncategorical columns. Feature selection is performed on all features when\nthis value is exceeded. This value defaults to 300.", + "prompt_type": "plain" + }, + { + "output": "fs_orig_cols_selected``\n\nMax Number of Original Features Used for FS Individual\n\nSpecify the maximum number of features you want to be selected in an\nexperiment. This value defaults to 10,0000000. Additional columns above\nthe specified value add special individual with original columns\nreduced.", + "prompt_type": "plain" + }, + { + "output": "fs_orig_numeric_cols_selected``\n\nNumber of Original Numeric Features to Trigger Feature Selection Model\nType\n\nThe maximum number of original numeric columns, above which Driverless\nAI will do feature selection. Note that this is applicable only to\nspecial individuals with original columns reduced. A separate individual\nin the genetic algorithm is created by doing feature selection by\npermutation importance on original features. This value defaults to\n10,000000.", + "prompt_type": "plain" + }, + { + "output": "fs_orig_nonnumeric_cols_selected``\n\nNumber of Original Non-Numeric Features to Trigger Feature Selection\nModel Type\n\nThe maximum number of original non-numeric columns, above which\nDriverless AI will do feature selection on all features. Note that this\nis applicable only to special individuals with original columns reduced.\nA separate individual in the genetic algorithm is created by doing\nfeature selection by permutation importance on original features. This\nvalue defaults to 200.", + "prompt_type": "plain" + }, + { + "output": "max_relative_cardinality``\n\nMax Allowed Fraction of Uniques for Integer and Categorical Columns\n\nSpecify the maximum fraction of unique values for integer and\ncategorical columns. If the column has a larger fraction of unique\nvalues than that, it will be considered an ID column and ignored. This\nvalue defaults to 0.95.", + "prompt_type": "plain" + }, + { + "output": "num_as_cat-------------- .. container:: dropdown **Allow Treating Numerical as Categorical** Specify whether to allow some numerical features to be treated as categorical features. This is enabled by default. The equivalent config.toml parameter isnum_as_cat``.", + "prompt_type": "plain" + }, + { + "output": "max_int_as_cat_uniques``\n\nMax Number of Unique Values for Int/Float to be Categoricals\n\nSpecify the number of unique values for integer or real columns to be\ntreated as categoricals. This value defaults to 50.", + "prompt_type": "plain" + }, + { + "output": "max_fraction_invalid_numeric``\n\nMax. fraction of numeric values to be non-numeric (and not missing) for\na column to still be considered numeric\n\nWhen the fraction of non-numeric (and non-missing) values is less or\nequal than this value, consider the column numeric. Can help with minor\ndata quality issues for experimentation, not recommended for production,\nsince type inconsistencies can occur. Note: Replaces non-numeric values\nwith missing values at start of experiment, so some information is lost,\nbut column is now treated as numeric, which can help. Disabled if < 0.", + "prompt_type": "plain" + }, + { + "output": "nfeatures_max----------------- .. container:: dropdown **Max Number of Engineered Features** Specify the maximum number of features to be included per model (and in each model within the final model if an ensemble). After each scoring, based on this parameter value, keeps top variable importance features, and prunes away rest of the features. Final ensemble will exclude any pruned-away features and only train on kept features, but may contain a few new features due to fitting on different data view (e.g. new clusters). Final scoring pipeline will exclude any pruned-away features, but may contain a few new features due to fitting on different data view (e.g. new clusters). The default value of **-1** means no restrictions are applied for this parameter except internally-determined memory and interpretability restrictions. Notes: - Ifinterpretability>remove_scored_0gain_genes_in_postprocessing_above_interpretability(see :ref:`config.toml ` for reference), then every GA (:ref:`genetic algorithm `) iteration post-processes features down to this value just after scoring them.", + "prompt_type": "plain" + }, + { + "output": "ngenes_max-------------- .. container:: dropdown **Max Number of Genes** Specify the maximum number of genes (transformer instances) kept per model (and per each model within the final model for ensembles). This controls the number of genes before features are scored, so Driverless AI will just randomly samples genes if pruning occurs. If restriction occurs after scoring features, then aggregated gene importances are used for pruning genes. Instances includes all possible transformers, including original transformer for numeric features. A value of -1 means no restrictions except internally-determined memory and interpretability restriction. The equivalent config.toml parameter isngenes_max``.", + "prompt_type": "plain" + }, + { + "output": "features_allowed_by_interpretability---------------------------------------- .. container:: dropdown **Limit Features by Interpretability** Specify whether to limit feature counts with the **Interpretability** training setting as specified by thefeatures_allowed_by_interpretability`\n:ref:`config.toml setting.", + "prompt_type": "plain" + }, + { + "output": "monotonicity_constraints_interpretability_switch``\n\nThreshold for Interpretability Above Which to Enable Automatic\nMonotonicity Constraints for Tree Models\n\nSpecify an Interpretability setting value equal and above which to use\nautomatic monotonicity constraints in XGBoostGBM, LightGBM, or Decision\nTree models. This value defaults to 7.\n\nAlso see monotonic gbm recipe and\nMonotonicity Constraints in Driverless AI for reference.", + "prompt_type": "plain" + }, + { + "output": "monotonicity_constraints_correlation_threshold``\n\nCorrelation Beyond Which to Trigger Monotonicity Constraints (if\nenabled)\n\nSpecify the threshold of Pearson product-moment correlation coefficient\nbetween numerical or encoded transformed feature and target above (below\nnegative for) which to use positive (negative) monotonicity for\nXGBoostGBM, LightGBM and Decision Tree models. This value defaults to\n0.1.\n\nNote: This setting is only enabled when Interpretability is greater than\nor equal to the value specified by the enable-constraints setting and\nwhen the constraints-override setting is not specified.\n\nAlso see monotonic gbm recipe and\nMonotonicity Constraints in Driverless AI for reference.", + "prompt_type": "plain" + }, + { + "output": "monotonicity_constraints_log_level``\n\nControl amount of logging when calculating automatic monotonicity\nconstraints (if enabled)\n\nFor models that support monotonicity constraints, and if enabled, show\nautomatically determined monotonicity constraints for each feature going\ninto the model based on its correlation with the target. 'low' shows\nonly monotonicity constraint direction. 'medium' shows correlation of\npositively and negatively constraint features. 'high' shows all\ncorrelation values.\n\nAlso see monotonic gbm recipe and\nMonotonicity Constraints in Driverless AI for reference.", + "prompt_type": "plain" + }, + { + "output": "monotonicity_constraints_drop_low_correlation_features``\n\nWhether to drop features that have no monotonicity constraint applied\n(e.g., due to low correlation with target)\n\nIf enabled, only monotonic features with +1/-1 constraints will be\npassed to the model(s), and features without monotonicity constraints\n(0) will be dropped. Otherwise all features will be in the model. Only\nactive when interpretability >=\nmonotonicity_constraints_interpretability_switch or\nmonotonicity_constraints_dict is provided.\n\nAlso see monotonic gbm recipe and\nMonotonicity Constraints in Driverless AI for reference.", + "prompt_type": "plain" + }, + { + "output": "monotonicity_constraints_dict``\n\nManual Override for Monotonicity Constraints\n\nSpecify a list of features for max_features_importance which\nmonotonicity constraints are applied. Original numeric features are\nmapped to the desired constraint:\n\n- 1: Positive constraint\n- -1: Negative constraint\n- 0: Constraint disabled\n\nConstraint is automatically disabled (set to 0) for features that are\nnot in this list.\n\nThe following is an example of how this list can be specified:\n\n \"{'PAY_0': -1, 'PAY_2': -1, 'AGE': -1, 'BILL_AMT1': 1, 'PAY_AMT1': -1}\"\n\nNote: If a list is not provided, then the automatic correlation-based\nmethod is used when monotonicity constraints are enabled at high enough\ninterpretability settings.\n\nSee Monotonicity Constraints in Driverless AI for reference.", + "prompt_type": "plain" + }, + { + "output": "max_feature_interaction_depth--------------------------------- .. container:: dropdown **Max Feature Interaction Depth** Specify the maximum number of features to use for interaction features like grouping for target encoding, weight of evidence, and other likelihood estimates. Exploring feature interactions can be important in gaining better predictive performance. The interaction can take multiple forms (i.e. feature1 + feature2 or feature1 \\* feature2 + \u2026 featureN). Although certain machine learning algorithms (like tree-based methods) can do well in capturing these interactions as part of their training process, still generating them may help them (or other algorithms) yield better performance. The depth of the interaction level (as in \"up to\" how many features may be combined at once to create one single feature) can be specified to control the complexity of the feature engineering process. Higher values might be able to make more predictive models at the expense of time.", + "prompt_type": "plain" + }, + { + "output": "fixed_feature_interaction_depth``\n\nFixed Feature Interaction Depth\n\nSpecify a fixed non-zero number of features to use for interaction\nfeatures like grouping for target encoding, weight of evidence, and\nother likelihood estimates. To use all features for each transformer,\nset this to be equal to the number of columns. To do a 50/50 sample and\na fixed feature interaction depth of n features, set this to -n.", + "prompt_type": "plain" + }, + { + "output": "enable_target_encoding``\n\nEnable Target Encoding\n\nSpecify whether to use Target Encoding when building the model. Target\nencoding refers to several different feature transformations (primarily\nfocused on categorical data) that aim to represent the feature using\ninformation of the actual target variable. A simple example can be to\nuse the mean of the target to replace each unique category of a\ncategorical feature. These type of features can be very predictive but\nare prone to overfitting and require more memory as they need to store\nmappings of the unique categories and the target values.", + "prompt_type": "plain" + }, + { + "output": "cvte_cv_in_cv----------------- .. container:: dropdown **Enable Outer CV for Target Encoding** For target encoding, specify whether an outer level of cross-fold validation is performed in cases where GINI is detected to flip sign or have an inconsistent sign for weight of evidence betweenfit_transform(on training data) andtransform`` (on training\n\n and validation data). The degree to which GINI is inaccurate is also\n used to perform fold-averaging of look-up tables instead of using\n global look-up tables. This is enabled by default.", + "prompt_type": "plain" + }, + { + "output": "enable_lexilabel_encoding``\n\nEnable Lexicographical Label Encoding\n\nSpecify whether to enable lexicographical label encoding. This is\ndisabled by default.", + "prompt_type": "plain" + }, + { + "output": "enable_isolation_forest``\n\nEnable Isolation Forest Anomaly Score Encoding\n\nIsolation Forest is useful for identifying anomalies or outliers in\ndata. Isolation Forest isolates observations by randomly selecting a\nfeature and then randomly selecting a split value between the maximum\nand minimum values of that selected feature. This split depends on how\nlong it takes to separate the points. Random partitioning produces\nnoticeably shorter paths for anomalies. When a forest of random trees\ncollectively produces shorter path lengths for particular samples, they\nare highly likely to be anomalies.\n\nThis option lets you specify whether to return the anomaly score of each\nsample. This is disabled by default.", + "prompt_type": "plain" + }, + { + "output": "enable_one_hot_encoding``\n\nEnable One HotEncoding\n\nSpecify whether one-hot encoding is enabled. The default Auto setting is\nonly applicable for small datasets and GLMs.", + "prompt_type": "plain" + }, + { + "output": "isolation_forest_nestimators``\n\nNumber of Estimators for Isolation Forest Encoding\n\nSpecify the number of estimators for Isolation Forest encoding. This\nvalue defaults to 200.", + "prompt_type": "plain" + }, + { + "output": "drop_constant_columns``\n\nDrop Constant Columns\n\nSpecify whether to drop columns with constant values. This is enabled by\ndefault.", + "prompt_type": "plain" + }, + { + "output": "drop_id_columns``\n\nDrop ID Columns\n\nSpecify whether to drop columns that appear to be an ID. This is enabled\nby default.", + "prompt_type": "plain" + }, + { + "output": "no_drop_features``\n\nDon't Drop Any Columns\n\nSpecify whether to avoid dropping any columns (original or derived).\nThis is disabled by default.", + "prompt_type": "plain" + }, + { + "output": "cols_to_drop``\n\nFeatures to Drop\n\nSpecify which features to drop. This setting allows you to select many\nfeatures at once by copying and pasting a list of column names (in\nquotes) separated by commas.", + "prompt_type": "plain" + }, + { + "output": "cols_to_force_in``\n\nFeatures to always keep or force in, e.g. \"G1\", \"G2\", \"G3\"\n\nControl over columns to force-in. Forced-in features are handled by the\nmost interpretable transformers allowed by the experiment options, and\nthey are never removed (even if the model assigns 0 importance to them).\nTransformers used by default includes:\n\n - OriginalTransformer for numeric,\n - CatOriginalTransformer or FrequencyTransformer for categorical,\n - TextOriginalTransformer for text,\n - DateTimeOriginalTransformer for date-times,\n - DateOriginalTransformer for dates,\n - ImageOriginalTransformer or ImageVectorizerTransformer for images,\n etc", + "prompt_type": "plain" + }, + { + "output": "cols_to_group_by``\n\nFeatures to Group By\n\nSpecify which features to group columns by. When this field is left\nempty (default), Driverless AI automatically searches all columns\n(either at random or based on which columns have high variable\nimportance).", + "prompt_type": "plain" + }, + { + "output": "sample_cols_to_group_by``\n\nSample from Features to Group By\n\nSpecify whether to sample from given features to group by or to always\ngroup all features. This is disabled by default.", + "prompt_type": "plain" + }, + { + "output": "agg_funcs_for_group_by``\n\nAggregation Functions (Non-Time-Series) for Group By Operations\n\nSpecify whether to enable aggregation functions to use for group by\noperations. Choose from the following (all are selected by default):\n\n- mean\n- sd\n- min\n- max\n- count", + "prompt_type": "plain" + }, + { + "output": "folds_for_group_by``\n\nNumber of Folds to Obtain Aggregation When Grouping\n\nSpecify the number of folds to obtain aggregation when grouping.\nOut-of-fold aggregations will result in less overfitting, but they\nanalyze less data in each fold. The default value is 5.", + "prompt_type": "plain" + }, + { + "output": "mutation_mode``\n\nType of Mutation Strategy\n\nSpecify which strategy to apply when performing mutations on\ntransformers. Select from the following:\n\n- sample: Sample transformer parameters (Default)\n- batched: Perform multiple types of the same transformation together\n- full: Perform more types of the same transformation together than\n the above strategy", + "prompt_type": "plain" + }, + { + "output": "dump_varimp_every_scored_indiv``\n\nEnable Detailed Scored Features Info\n\nSpecify whether to dump every scored individual's variable importance\n(both derived and original) to a csv/tabulated/json file. If enabled,\nDriverless AI produces files such as\n\"individual_scored_id%d.iter%d*features*\". This is disabled by default.", + "prompt_type": "plain" + }, + { + "output": "dump_trans_timings``\n\nEnable Detailed Logs for Timing and Types of Features Produced\n\nSpecify whether to dump every scored fold's timing and feature info to a\ntimings.txt file. This is disabled by default.", + "prompt_type": "plain" + }, + { + "output": "compute_correlation``\n\nCompute Correlation Matrix\n\nSpecify whether to compute training, validation, and test correlation\nmatrixes. When enabled, this setting creates table and heatmap PDF files\nthat are saved to disk. Note that this setting is currently a single\nthreaded process that may be slow for experiments with many columns.\nThis is disabled by default.", + "prompt_type": "plain" + }, + { + "output": "interaction_finder_gini_rel_improvement_threshold``\n\nRequired GINI Relative Improvement for Interactions\n\nSpecify the required GINI relative improvement value for the\nInteractionTransformer. If the GINI coefficient is not better than the\nspecified relative improvement value in comparison to the original\nfeatures considered in the interaction, then the interaction is not\nreturned. If the data is noisy and there is no clear signal in\ninteractions, this value can be decreased to return interactions. This\nvalue defaults to 0.5.", + "prompt_type": "plain" + }, + { + "output": "interaction_finder_return_limit``\n\nNumber of Transformed Interactions to Make\n\nSpecify the number of transformed interactions to make from generated\ntrial interactions. (The best transformed interactions are selected from\nthe group of generated trial interactions.) This value defaults to 5.", + "prompt_type": "plain" + }, + { + "output": "enable_rapids_transformers------------------------------ .. container:: dropdown **Whether to enable RAPIDS cuML GPU transformers (no mojo)** Specify whether to enable GPU-based `RAPIDS cuML `__ transformers. Note that **no MOJO** support for deployment is available for this selection at this time, but python scoring is supported and this is in beta testing status. The equivalent config.toml parameter isenable_rapids_transformers``\nand the default value is False.", + "prompt_type": "plain" + }, + { + "output": "varimp_threshold_at_interpretability_10``\n\nLowest allowed variable importance at interpretability 10\n\nSpecify the variable importance below which features are dropped (with\nthe possibility of a replacement being found that's better). This\nsetting also sets the overall scale for lower interpretability settings.\nSet this to a lower value if you're content with having many weak\nfeatures despite choosing high interpretability, or if you see a drop in\nperformance due to the need for weak features.", + "prompt_type": "plain" + }, + { + "output": "stabilize_fs``\nWhether to take minimum (True) or mean (False) of delta improvement in\nscore when aggregating feature selection scores across multiple\nfolds/depths\nWhether to take minimum (True) or mean (False) of delta improvement in\nscore when aggregating feature selection scores across multiple\nfolds/depths. Delta improvement of score corresponds to original metric\nminus metric of shuffled feature frame if maximizing metric, and\ncorresponds to negative of such a score difference if minimizing. Feature selection by permutation importance considers the change in\nscore after shuffling a feature, and using minimum operation ignores\noptimistic scores in favor of pessimistic scores when aggregating over\nfolds. Note, if using tree methods, multiple depths may be fitted, in\nwhich case regardless of this toml setting, only features that are kept\nfor all depths are kept by feature selection. If interpretability >=\nconfig toml value of fs_data_vary_for_interpretability, then half data\n(or setting of fs_data_frac) is used as another fit, in which case\nregardless of this toml setting, only features that are kept for all\ndata sizes are kept by feature selection.", + "prompt_type": "plain" + }, + { + "output": "The Interpreted Models Page\n\nClick the MLI link in the upper-right corner of the UI to view a list of\ninterpreted models.\n\nYou can sort this page by Name, Target, Model, Dataset, N-Folds, Feature\nSet, Cluster Col, LIME Method, Status, or ETA/Runtime. You can also use\nthe search bar to locate a specific interpreted model. To specify which\ncolumns are visible on this page, click the top right-most column, then\nselect Visible Columns.\n\nClick the right-most column of an interpreted model to view an\nadditional menu. This menu allows you to open, rename, or delete the\ninterpretation.\n\nNote: Driverless AI version 1.9 features a redesigned MLI page for\ninterpreted models. To view the legacy version of an interpreted model's\nMLI page, select Open Legacy from the menu.\n\nClick on an interpreted model to view the MLI page for that\ninterpretation. The MLI page that displays will vary depending on\nwhether the experiment was a regular experiment or a time series\nexperiment.", + "prompt_type": "plain" + }, + { + "output": "Docker Image Installation\n\nThis section provides instructions for installing the Driverless AI\nDocker image.\n\ninstall/linux-docker-images install/mac-osx install/windows\n\nFor instructions on installing Driverless AI in native Linux\nenvironments, refer to native_installs.\n\nNote that from version 1.10, DAI Docker image runs with internal", + "prompt_type": "plain" + }, + { + "output": "tinithat is equivalent to using--initfrom Docker. If both are enabled in the launch command, tini prints a (harmless) warning message. For GPU users, as GPU needs--pid=hostfor nvml, which makes tini not use pid=1, so it will show the warning message (still harmless). We recommend--shm-size=256m`\nin Docker launch command. But if user plans to build :ref:`image auto\nmodel extensively, then", + "prompt_type": "plain" + }, + { + "output": "--shm-size=2g`` is recommended for Driverless AI Docker command.", + "prompt_type": "plain" + }, + { + "output": "Scoring Pipelines Overview\nDriverless AI provides Scoring Pipelines that can be deployed to\nproduction for experiments and/or\ninterpreted models. - A standalone Python Scoring Pipeline is available for experiments\n and interpreted models. - A low-latency, standalone MOJO Scoring Pipeline is available for\n experiments, with both Java and C++ backends. The Python Scoring Pipeline is implemented as a Python whl file. While\nthis allows for a single process scoring engine, the scoring service is\ngenerally implemented as a client/server architecture and supports\ninterfaces for TCP and HTTP. The MOJO (Model Objects, Optimized) Scoring Pipeline provides a\nstandalone scoring pipeline that converts experiments to MOJOs, which\ncan be scored in real time. The MOJO Scoring Pipeline is available as\neither a Java runtime or a\nC++ runtime . For the C++ runtime, both Python and\nR wrappers are provided.", + "prompt_type": "plain" + }, + { + "output": "Downloading Datasets\n\nIn Driverless AI, you can download datasets from the Datasets Overview\npage.\n\nTo download a dataset, click on the dataset or select the [Click for\nActions] button beside the dataset that you want to download, and then\nselect Download from the submenu that appears.\n\nNote: The option to download datasets will not be available if the", + "prompt_type": "plain" + }, + { + "output": "enable_dataset_downloadingoption is set tofalse` when starting\nDriverless AI. This option can be specified in the :ref:`config.toml\n file.", + "prompt_type": "plain" + }, + { + "output": "MLI Overview\nDriverless AI provides robust interpretability of machine learning\nmodels to explain modeling results in a human-readable format. In the\nMachine Learning Interpretability (MLI) view, Driverless AI employs a\nhost of different techniques and methodologies for interpreting and\nexplaining the results of its models. A number of charts are generated\nautomatically (depending on experiment type), including K-LIME, Shapley,\nVariable Importance, Decision Tree Surrogate, Partial Dependence,\nIndividual Conditional Expectation, Sensitivity Analysis, NLP Tokens,\nNLP LOCO, and more. Additionally, you can download a CSV of LIME,\nShapley, and Original (Kernel SHAP) Shapley reason codes as well as text\nand Python files of Decision Tree Surrogate model rules from this view. The techniques and methodologies used by Driverless AI for model\ninterpretation can be extended with recipes (Python code snippets). For\nmore information on custom recipes for MLI, see\nhttps://github.com/h2oai/driverlessai-recipes/tree/rel-1.9.1/explainers.", + "prompt_type": "plain" + }, + { + "output": "Refer to the\nfollowing sections for more information:\n- interpreted-model-page\n- interpret-regular\n- interpret-ts\n- mli-byor\nNote\nMigration Information\n- Interpretations made in version 1.9.0 are supported in 1.9.x and\n later. - Interpretations made in version 1.8.x aren't supported in 1.9.x and\n later. However, interpretations made in 1.8.x can still be viewed\n and rerun. Note\n- MLI is not supported for unsupervised learning models. - MLI is not\nsupported for Image or multiclass Time Series experiments. - MLI does\nnot require an Internet connection to run on current models. - To\nspecify a port of a specific H2O instance for use by MLI, use the\nh2o_port config.toml setting. You can also specify\nan IP address for use by MLI with the h2o_ip setting. Additional Resources\n- Click here to download our MLI cheat sheet. - \"An Introduction to Machine Learning Interpretability\" book. - Click here to access the H2O.ai MLI Resources repository.", + "prompt_type": "plain" + }, + { + "output": "Quick-Start Tables by Environment\nUse the following tables for Cloud, Server, and Desktop to find the\nright setup instructions for your environment. Cloud\nRefer to the following for more information about instance types:\n- AWS Instance Types\n- Azure Instance Types\n- Google Compute Instance Types\n+-----------------+---------+------+----------+-----------------------+\n| Provider | I | Num | Suitable | Refer to Section |\n| | nstance | GPUs | for | |\n| | Type | | | |\n+=================+=========+======+==========+=======================+\n| NVIDIA GPU | | | Serious | i |\n| Cloud | | | use | nstall-on-nvidia-dgx |\n+-----------------+---------+------+----------+-----------------------+\n| AWS | p2 | 1 | Experim | install-on-aws |\n| | | | entation | |\n| - | .xlarge | --- | | |\n| - | | ---- | -- | |\n| - | -- | ---+ | -------- | |\n| - | ------- | | -------+ | |\n| - | ------+ | | | |\n| - | | 8 | | |\n| - | p2.", + "prompt_type": "plain" + }, + { + "output": "| | -------+ | |\n| | | | | |\n| | 2xlarge | 4 | | |\n| | | | Experim | |\n| | -- | --- | | |\n| | ------- | ---- | entation | |\n| | ------+ | ---+ | | |\n| | | | -- | |\n| | p3. | | -------- | |\n| | | 8 | -------+ | |\n| | 8xlarge | | | |\n| | | --- | | |\n| | -- | ---- | Serious | |\n| | ------- | ---+ | | |\n| | ------+ | | | |\n| | | | use | |\n| | | 1 | | |\n| | p3.1 | | -- | |\n| | | --- | -------- | |\n| | 6xlarge | ---- | -------+ | |\n| | | ---+ | | |\n| | -- | | | |\n| | ------- | | Serious | |\n| | ------+ | 2 | | |\n| | | | | |\n| | g3.", + "prompt_type": "plain" + }, + { + "output": "| | | |\n| | | | entation | |\n| | 8xlarge | | | |\n| | | | -- | |\n| | -- | | -------- | |\n| | ------- | | -------+ | |\n| | ------+ | | | |\n| | | | | |\n| | | | Experim | |\n| | g3.1 | | | |\n| | | | entation | |\n| | 6xlarge | | | |\n| | | | -- | |\n| | | | -------- | |\n| | | | -------+ | |\n| | | | | |\n| | | | | |\n| | | | Serious | |\n| | | | | |\n| | | | | |\n| | | | use | |\n+-----------------+---------+------+----------+-----------------------+\n| Azure | Stand | 1 | Experim | :r |\n| | ard_NV6 | | entation | ef:install-on-azure |\n| - | | --- | | |\n| - | -- | ---- | -- | |\n| - | ------- | ---+ | -------- | |\n| - | ------+ | | -------+ | |\n| - | | | | |\n| | | 2 | | |\n| | Standa | | Experim | |\n| | | --- | | |\n| | rd_NV12 | ---- | entation | |\n| | | ---+ | | |\n| | -- | | -- | |\n| | ------- | | -------- | |\n| | ------+ | 4 | -------+ | |\n| | | | | |\n| | | --- | | |\n| | Standa | ---- | Serious | |\n| | | ---+ | | |\n| | rd_NV24 | | | |\n| | | | use | |\n| | -- | 1 | | |\n| | ------- | | -- | |\n| | ------+ | --- | -------- | |\n| | | ---- | -------+ | |\n| | Stand | ---+ | | |\n| | | | | |\n| | ard_NC6 | | Experim | |\n| | | 2 | | |\n| | -- | | entation | |\n| | ------- | --- | | |\n| | ------+ | ---- | -- | |\n| | | ---+ | -------- | |\n| | | | -------+ | |\n| | Standa | | | |\n| | | 4 | | |\n| | rd_NC12 | | Experim | |\n| | | | | |\n| | -- | | entation | |\n| | ------- | | | |\n| | ------+ | | -- | |\n| | | | -------- | |\n| | | | -------+ | |\n| | Standa | | | |\n| | | | | |\n| | rd_NC24 | | Serious | |\n| | | | | |\n| | | | | |\n| | | | use | |\n+-----------------+---------+------+----------+-----------------------+\n| Google Compute | | | | insta |\n| | | | | ll-on-google-compute |\n+-----------------+---------+------+----------+-----------------------+\nServer\n --------------------------------------------------------------------\n Operating System GP Min Mem Refer to Section\n Us?", + "prompt_type": "plain" + }, + { + "output": "Datasets in Driverless AI\n\nThe Datasets Overview page is the Driverless AI home page. It displays\nthe datasets that have been imported into Driverless AI. Data Connectors\ncan be used to connect to various data sources.\n\ndatasets-import datasets-options datasets-download datasets-modify\ndatasets-join-wizard datasets-split\n\n[]", + "prompt_type": "plain" + }, + { + "output": "Experiment Summary\nAn experiment summary is available for each completed experiment. Click\nthe Download Summary & Logs button to download the\nh2oai_experiment_summary_.zip file. []\nThe files within the experiment summary zip provide textual explanations\nof the graphical representations that are shown on the Driverless AI UI. Details of each artifact are described below. Experiment AutoDoc\nA report file (AutoDoc) is included in the experiment summary. This\nreport provides insight into the training data and any detected shifts\nin distribution, the validation schema selected, model parameter tuning,\nfeature evolution and the final set of features chosen during the\nexperiment. For more information, see autodoc. Experiment Artifacts Overview\nThe Experiment Summary contains artifacts that provide overviews of the\nexperiment. - preview.txt: Provides a preview of the experiment. (This is the same\n information that was included on the UI before starting the\n experiment.)", + "prompt_type": "plain" + }, + { + "output": "(Available in txt or json.) - config.json: Provides a list of the settings used in the experiment. - config_overrides_toml_string.txt: Provides any overrides for this\n experiment that were made to the config.toml file. - args_do_auto_dl.json: The internal arguments used in the Driverless\n AI experiment based on the dataset and accuracy, time and\n interpretability settings. - experiment_column_types.json: Provides the column types for each\n column included in the experiment. - experiment_original_column.json: A list of all columns available in\n the dataset that was used in the experiment. - experiment_pipeline_original_required_columns.json: For columns used\n in the experiment, this includes the column name and type. - experiment_sampling_description.json: A description of the sampling\n performed on the dataset. - timing.json: The timing and number of models generated in each part\n of the Driverless AI pipeline. Tuning Artifacts\nDuring the Driverless AI experiment, model tuning is performed to\ndetermined the optimal algorithm and parameter settings for the provided\ndataset.", + "prompt_type": "plain" + }, + { + "output": "does taking\nthe log of the target column improve results). The results from these\ntuning steps are available in the Experiment Summary. - tuning_leaderboard: A table of the model tuning performed along with\n the score generated from the model and training time. (Available in\n txt or json.) - target_transform_tuning_leaderboard.txt: A table of the transforms\n applied to the target column along with the score generated from the\n model and training time. (This will be empty for binary and\n multiclass use cases.) Features Artifacts\nDriverless AI performs feature engineering on the dataset to determine\nthe optimal representation of the data. The top features used in the\nfinal model can be seen in the GUI. The complete list of features used\nin the final model is available in the Experiment Summary artifacts. The Experiment Summary also provides a list of the original features and\ntheir estimated feature importance. For example, given the features in\nthe final Driverless AI model, we can estimate the feature importance of\nthe original features.", + "prompt_type": "plain" + }, + { + "output": "- PAY_3: 0.92 * 1 (PAY_3 is the only variable used.) - ClusterDist9:BILL_AMT1:LIMIT_BAL:PAY_3: 0.90 * 1/3 (PAY_3 is one of\n three variables used.) Estimated Feature Importance = (1*0) + (0.92*1) + (0.9*(1/3)) = 1.22\nNote: The feature importance is converted to relative feature\nimportance. (The feature with the highest estimated feature importance\nwill have a relative feature importance of 1). - ensemble_features: A list of features used in the final model, a\n description of the feature, and the relative feature importance. Feature importances for multiple models are linearly blended with\n same weights as the final ensemble of models. (Available in txt,\n table, or json.) - ensemble_features_orig: A complete list of all original features\n used in the final model, a description of the feature, the relative\n feature importance, and the standard deviation of relative\n importance. (Available in txt or json.) - ensemble_features_orig_shift: A list of original user features used\n in the final model and the difference in relative feature importance\n between the final model and the corresponding feature importance of\n the final population.", + "prompt_type": "plain" + }, + { + "output": "- ensemble_features_prefit: A list of features used by the best\n individuals in the final population, each model blended with same\n weights as ensemble if ensemble used blending. (Available in txt,\n table, or json.) - ensemble_features_shift: A list of features used in the final model\n and the difference in relative feature importance between the final\n model and the corresponding feature importance of the final\n population. (Available in txt, table, or json.) - features: A list of features used by the best individual pipeline\n (identified by the genetic algorithm) and each feature's relative\n importance. (Available in txt, table, or json.) - features_orig: A list of original user features used by the best\n individual pipeline (identified by the genetic algorithm) and each\n feature's estimated relative importance. (Available in txt or json.) - leaked_features.json: A list of all leaked features provided along\n with the relative importance and the standard deviation of relative\n importance.", + "prompt_type": "plain" + }, + { + "output": "- leakage_features_orig.json: A list of leaked original features\n provided and an estimate of the relative feature importance of that\n leaked original feature in the final model. - shift_features.json: A list of all features provided along with the\n relative importance and the shift in standard deviation of relative\n importance of that feature. - shifit_features_orig.json: A list of original features provided and\n an estimate of the shift in relative feature importance of that\n original feature in the final model. Final Model Artifacts\nThe Experiment Summary includes artifacts that describe the final model. This is the model that is used to score new datasets and create the MOJO\nscoring pipeline. The final model may be an ensemble of models depending\non the Accuracy setting. - coefs: A list of coefficients and standard deviation of coefficients\n for features. (Available in txt or json.) - ensemble.txt: A summary of the final model which includes a\n description of the model(s), gains/lifts table, confusion matrix,\n and scores of the final model for our list of scorers.", + "prompt_type": "plain" + }, + { + "output": "(Available in table or json.) Note that this is not available for\n Time Series experiments. - ensemble_description.txt: A sentence describing the final model. (For example: \"Final TensorFlowModel pipeline with ensemble_level=0\n transforming 21 original features -> 54 features in each of 1 models\n each fit on full training data (i.e. no hold-out).\") - ensemble_coefs: The coefficient and standard deviation coefficient\n for each feature in the ensemble. (Available as txt or json.) - ensemble_coefs_shift: The coefficient and shift of coefficient for\n each feature in the ensemble. (Available as txt or json.) - ensemble_model_description.json/ensemble_model_extra_description: A\n json file describing the model(s) and for ensembles how the model\n predictions are weighted. - ensemble_model_params.json: A json file describing the parameters of\n the model(s). - ensemble_folds_data.json: A json file describing the folds used for\n the final model(s). This includes the size of each fold of data and\n the performance of the final model on each fold.", + "prompt_type": "plain" + }, + { + "output": "- ensemble_features_orig: A list of the original features provided and\n an estimate of the relative feature importance of that original\n feature in the ensemble of models. (Available in txt or json.) - ensemble_features: A complete list of all features used in the final\n ensemble of models, a description of the feature, and the relative\n feature importance. (Available in txt, table, or json.) - leakage_coefs.json: A list of coefficients and standard deviation of\n coefficients for leaked features. - pipeline: A visual representation of the experiment pipeline. - shift_coefs.json: A list of coefficients and the shift in standard\n deviation for those coefficients used in the experiment. The Experiment Summary also includes artifacts about the final model\nperformance. - ensemble_scores.json: The scores of the final model for our list of\n scorers. - ensemble_confusion_matrix_test: The confusion matrix for the test\n data if test data is provided. Note that this is not available for\n Time Series experiments.", + "prompt_type": "plain" + }, + { + "output": "Note that this is not available for\n Time Series experiments. - ensemble_confusion_matrix_stats_validation: The confusion matrix\n statistics on internal validation data. Note that this is not\n available for Time Series experiments. - ensemble_confusion_matrix_stats_test.json: Confusion matrix\n statistics on the test data. This is only available if test data is\n provided. Note that this is not available for Time Series\n experiments. - ensemble_gains_test: The lift and gains table for test data if test\n data is provided. (Visualization of lift and gains can be seen in\n the UI.) Note that this is not available for Time Series\n experiments. - ensemble_gains_with_validation: The lift and gains table for the\n internal validation data. (Visualization of lift and gains can be\n seen in the UI.) Note that this is not available for Time Series\n experiments. - ensemble_roc_test: The ROC and Precision Recall table for test data\n if test data is provided.", + "prompt_type": "plain" + }, + { + "output": "To stop the Driverless AI Docker image, type Ctrl + C in the Terminal\n(Mac OS X) or PowerShell (Windows 10) window that is running the\nDriverless AI Docker image.", + "prompt_type": "plain" + }, + { + "output": "Supported Algorithms\nConstant Model\nA Constant Model predicts the same constant value for any input data. The constant value is computed by optimizing the given scorer. For\nexample, for MSE/RMSE, the constant is the (weighted) mean of the target\ncolumn. For MAE, it is the (weighted) median. For other scorers like\nMAPE or custom scorers, the constant is found with an optimization\nprocess. For classification problems, the constant probabilities are the\nobserved priors. A constant model is meant as a baseline reference model. If it ends up\nbeing used in the final pipeline, a warning will be issued because that\nwould indicate a problem in the dataset or target column (e.g., when\ntrying to predict a random outcome). Decision Tree\nA Decision Tree is a single (binary) tree model that splits the training\ndata population into sub-groups (leaf nodes) with similar outcomes. No\nrow or column sampling is performed, and the tree depth and method of\ngrowth (depth-wise or loss-guided) is controlled by hyper-parameters.", + "prompt_type": "plain" + }, + { + "output": "This\nimplementation uses a hashing trick and Hogwild approach [3] for\nparallelization. FTRL supports binomial and multinomial classification\nfor categorical targets, as well as regression for continuous targets. GLM\nGeneralized Linear Models (GLM) estimate regression models for outcomes\nfollowing exponential distributions. GLMs are an extension of\ntraditional linear models. They have gained popularity in statistical\ndata analysis due to:\n- the flexibility of the model structure unifying the typical\n regression methods (such as linear regression and logistic\n regression for binary classification)\n- the recent availability of model-fitting software\n- the ability to scale well with large datasets\nDriverless AI uses the XGBoost GLM implementation (booster=gblinear) for\nmodeling. This GLM is subject to early stopping. Isolation Forest\nIsolation Forest is useful for identifying anomalies or outliers in\ndata. Isolation Forest isolates observations by randomly selecting a\nfeature and then randomly selecting a split value between the maximum\nand minimum values of that selected feature.", + "prompt_type": "plain" + }, + { + "output": "Random partitioning produces\nnoticeably shorter paths for anomalies. When a forest of random trees\ncollectively produces shorter path lengths for particular samples, they\nare highly likely to be anomalies. LightGBM\nLightGBM is a gradient boosting framework developed by Microsoft that\nuses tree based learning algorithms. It was specifically designed for\nlower memory usage and faster training speed and higher efficiency. Similar to XGBoost, it is one of the best gradient boosting\nimplementations available. It is also used for fitting Random Forest,\nDART (experimental), and Decision Tree models inside of Driverless AI. PyTorch Models\nPyTorch is an open source library used for deep learning tasks such as\nnatural language processing and computer vision. Driverless AI's NLP BERT models are implemented using PyTorch, for\ndetails see NLP in Driverless AI . PyTorch Grownet Model\nGradient Boosting Neural Networks or GrowNet applies gradient boosting\nto shallow neural networks.", + "prompt_type": "plain" + }, + { + "output": "Each model is fed the original features and the\npredictions of the previous model. The predictions of all the models are\nsummed to produce a final output. Every model can be as simple as having\nonly one hidden layer. As per the paper, GrowNet is easy to tune and\nrequires less computational cost and time to train, than deep neural\nnetworks and yet seems to outperform deep neural networks in regression,\nclassification, and ranking on multiple datasets. Driverless AI integrates the Pytorch implementation of Grownet. The\nmodel expert settings parameter enable_grownet controls\nthe run. Random Forest\nRandom Forest averages multiple deep decision trees on different parts\nof the same training data. Driverless AI supports both XGBoost RandomForest (XGBRF) and LightGBM\nRandomForest (boosting=rf) implementations for modeling. RuleFit\nThe RuleFit [2] algorithm creates an optimal set of decision rules by\nfirst fitting a tree model, and then fitting a Lasso (L1-regularized)\nGLM model to create a linear model consisting of the most important tree\nleaves (rules).", + "prompt_type": "plain" + }, + { + "output": "TensorFlow\nTensorFlow is an open source software library for performing high\nperformance numerical computation. Driverless AI includes\nTensorFlow NLP recipes based on CNN ad BiGRU (RNN) Deeplearning\nmodels and Tensorflow Imagenet models for\nimage data. A TensorFlow model is a fully connected neural network with a few hidden\nlayers (that is, a multilayer perceptron). It has a few tuning\nparameters that can add wide and deep or attention. TensorFlow is considered a model like XGB, LGBM, or GLM. In many cases,\nit may not perform as well as the aforementioned models, but it can be\nuseful for ensembles and multiclass as well as for small data recipes\nsince there are many folds / repeats and models involved. Only C++ MOJOs are currently available for TensorFlow models. XGBoost\nXGBoost is a supervised learning algorithm that implements a process\ncalled boosting to yield accurate models. Boosting refers to the\nensemble learning technique of building many models sequentially, with\neach new model attempting to correct for the deficiencies in the\nprevious model.", + "prompt_type": "plain" + }, + { + "output": "XGBoost provides parallel tree boosting\n(also known as GBDT, GBM) that solves many data science problems in a\nfast and accurate way. For many problems, XGBoost is one of the best\ngradient boosting machine (GBM) frameworks today. Driverless AI supports XGBoost GBM and XGBoost DART models. Zero-Inflated Models\nZero-inflated models fit the data with excess zero counts in the target\nvariable for example in insurance claim use case. In Driverless AI, this\nmodel trains a classifier that attempts to classify zero and non-zero\nvalues. It then trains a regression model that attempts to predict the\nnon-zero values. The classifier predictions are multiplied by the\nregression predictions to determine the final output. Driverless AI supports both LightGBM and XGBoost versions of\nzero-inflated models. References\n[1] DataTable for Python, https://github.com/h2oai/datatable\n[2] J. Friedman, B. Popescu. \"Predictive Learning via Rule Ensembles\". 2005. http://statweb.stanford.edu/~jhf/ftp/RuleFit.pdf\n[3] Niu, Feng, et al.", + "prompt_type": "plain" + }, + { + "output": "Passing additional pip install options\n\nYou can use the pip_install_options TOML option \nto pass additional pip install options formatted as a list. The\nfollowing are two examples that demonstrate how this option can be used.\n\n- When installing Python packages, you can use this TOML option to\n specify your organization's internal Python package index as\n follows:\n\n- You can use this TOML option to install Python packages with a proxy\n server as follows:\n\nPassing multiple pip install options to DAI\n\nThe following example demonstrates how to correctly pass multiple pip\ninstall options to DAI.\n\n pip_install_options=\"['--extra-index-url', 'http://my-own-repo1:port','--extra-index-url', 'http://my-own-repo2:port']\"", + "prompt_type": "plain" + }, + { + "output": "About Licenses\n\nDriverless AI is licensed per a single named user. Therefore, in order,\nto have different users run experiments simultaneously, they would each\nneed a license. Driverless AI manages the GPU(s) that it is given and\nensures that different experiments from different users can run safely\nsimultaneously and don\u2019t interfere with each other. So when two licensed\nusers log in with different credentials, neither of them will see the\nother\u2019s experiment. Similarly, if a licensed user logs in using a\ndifferent set of credentials, that user will not see any previously run\nexperiments.", + "prompt_type": "plain" + }, + { + "output": "Genetic Algorithm in Driverless AI\nDriverless AI aims to determine the best pipeline for a dataset. This\ninvolves data transformation, feature engineering, model hyperparameter\ntuning, scoring and ensembling. The genetic algorithm process is a trial-and-error selection process,\nbut it is reproducible. In Driverless AI,\ngenetic algorithm is performed during the\nFeature Evolution stage of an experiment. Feature Evolution\nis a competition between slowly mutating parameters to find best\nindividuals . The Feature Evolution is not completely random and\nis informed from the variable importance interactions tables\nof the modeling algorithms. Driverless AI Brain caches\ninformation about the set of best genes, interactions and parameters in\nthe population and also information from previous experiments (if\nenabled), can be used during genetic algorithm mutations. Driverless AI also integrates Optuna, that employs Bayesian optimization\ntechnique for model hyperparameter search.", + "prompt_type": "plain" + }, + { + "output": "Custom code can also be written to toggle inbuilt mutation\nstrategy. For details see additional information section. During model building and feature tuning processes, overfitting is\nprevented by doing bootstrapping and cross validation, while\nunderfitting is prevented by balancing exploitation vs exploration in\ngenetic algorithm. - Understanding Genetic Algorithm and its Driverless AI\n equivalent. - The Full Picture : The end to end pipeline in Driverless\n AI. - Reading the logs : Workflow as seen in the Experiment\n logs. - Some additional details \nUnderstanding Genetic Algorithm\nGenetic Algorithm is a search heuristic inspired by the process of\nnatural selection where the fittest individuals are selected to produce\noffspring for the next generation. Some Driverless AI equivalent definitions to consider before the deep\ndive:\n - A gene stores information about type of and parameters for a\n feature transformation .", + "prompt_type": "plain" + }, + { + "output": "- A transformer is the actual code that applies the gene. - An individual consists of a genome that includes a set of genes,\n i.e. information about which transformations and with what\n parameters to perform. It also includes model hyperparameters and\n some additional information like the target transformations\n applied etc. - Individuals create a population that goes through a randomly\n chosen pair-wise tournament process to decide\n the winners. - Fitness score for an individual is model evaluation or scores\n based on the scoring metric. Below are the steps involved in a Genetic Algorithm and their Driverless\nAI equivalent:\nInitialization\nConsider all the probable solutions to the given problem. This creates\nthe population. The most popular technique for initialization is the use\nof random binary strings. Driverless AI : The individuals from the Tuning Phase are fed\nin as the random probable solutions for Feature evolution via genetic\nalgorithm.", + "prompt_type": "plain" + }, + { + "output": "The higher the fitness\nscore, the higher the chances of being chosen for reproduction. Driverless AI : Fitness score for an individual is model evaluation\nbased on the scoring metric. Selection\nIndividuals are selected for the reproduction of offspring. The selected\nindividuals are then arranged in pairs of two to enhance reproduction. These individuals pass on their genes to the next generation. The\ngenetic algorithm uses the fitness proportionate selection technique to\nensure that useful solutions are used for recombination. Driverless AI : A tournament is performed within the\npopulation to find the best subset (half) of the population. Reproduction : crossover mutation\nThis phase involves the creation of a child population. The algorithm\nemploys variation operators that are applied to the parent population. The two main operators in this phase include crossover and mutation. mutation : This operator adds new genetic information to the new child\n population.", + "prompt_type": "plain" + }, + { + "output": "Mutation solves the problem of local minimum and enhances\n diversification. crossover : This operator swaps the genetic information of two parents\n to reproduce an offspring. It is performed on parent pairs that are\n selected randomly to generate a child population of equal size as the\n parent population. Driverless AI : Winning sub population's genes, features and model\nhyperparameters are mutated into new offspring (asexual reproduction). Mutation involves adding, perturbing, or pruning\ngenes . The strategy for adding genes is based on balancing exploitation and\n exploration of importance of original variables. Genes are added that\n explore additional transformations for original variables with high\n importance. The best genes from prior winners become part of the pool of great\n genes that are used and can be shared amongst the offspring. Specific output features can be pruned. Features are pruned when\n variable importance is below a certain threshold (based upon\n interpretability settings).", + "prompt_type": "plain" + }, + { + "output": "For some like CUML RF, it is based upon Shapley\n Permutation Importance. Replacement\nGenerational replacement takes place in this phase, which is a\nreplacement of the old population with the new child population. The new\npopulation consists of higher fitness scores than the old population,\nDriverless AI : Mutate winning sub-population's Genes (add, prune and\nperturb), Features, Model hyper parameters to fill-up the population\nback to pre-tournament size. Termination\nAfter replacement has been done, a stopping criterion is used to provide\nthe basis for termination. The algorithm will terminate after the\nthreshold fitness solution has been attained. It will identify this\nsolution as the best solution in the population. Driverless AI: Score the individuals and either terminate the evolution\nif stopping criteria is reached or continue the selection process. The Full Picture\nHere we describe in details the working of the different stages that\nDriverless performs in sequence during an experiment to output the best\npipeline for the dataset-\n1) Convert Accuracy, Time and Interpretabilty knob settings\n to number of iterations and models to be built.", + "prompt_type": "plain" + }, + { + "output": "This is achieved by building\n (LightGBM if available) models with simple allowed feature\n transformations and model parameters (chosen from the internal\n recipe pool) and choosing the target transformation with highest\n score. The target_transform_tuning_leaderboard_simple.json file in\n summary zip or Experiment GUI lists the built models with their\n scores and parameters. []\n3) Data Leakage and Shift Detection:\n A) Leakage Detection : To detect data leakage,\n Driverless AI runs a model (LightGBM if available) to get the\n variable importance table (that determines the predictive\n power of each feature on the target variable). Then, a simple\n model is built on each feature with significant variable\n importance. The models with high AUC (for classification) or\n R2 score (regression) are reported to the user as potential\n leak features. B) Shift Detection : To detect shift in\n distribution between the training, validation or testing\n datasets, Driverless AI trains a binomial model to predict\n which dataset a row belongs to.", + "prompt_type": "plain" + }, + { + "output": "Shifted\n features should either be dropped. Or more meaningful\n aggregate features be created by using them as labels/bins. These features are reported to the user as a notification and\n dropped if a threshold is set. 4) Model and Feature Tuning Phase: Tuning is random selection of\n parameters to find best individuals . A) Driverless creates a diverse set of individuals. First, it\n goes through and creates a \"SEQUENCE\" of models (based on\n allowed algorithms), adding them with simple feature\n transformations and model parameters. These allowed algorithms\n and feature transformations are displayed in the preview of\n the experiment. The DEFAULT includes simple genes like\n original numeric, date, tfidf or bert embeddings for text\n data, Target encodings, Frequency encodings, Weight of\n evidence encodings, clustering, interactions, etc. These\n default features are simple and support MOJO creation.", + "prompt_type": "plain" + }, + { + "output": "Then, if more individuals are needed in the population,\n \"RANDOM\" models are added. These have same model types\n (algorithms) as in SEQUENCE but with mutated parameters calls\n to the model to get random hyper parameters and (default +\n extra) random features. A \"GLM ONE HOT ENCODED\" model is evaluated and if seem to be\n performing well on the dataset, is added as an individual. A reference individual \"CONSTANT MODEL\" is added to the mix,\n so that we know what best constant predictions (predict the\n same thing whatever the input data) would give for a score. This is how a diverse population of individuals is created. B) All individuals are scored :\n a) Batches (given hardware) of individuals are scored for\n every tuning iteration\n b) At higher accuracy, the original feature set is\n re-created, each batch passing feature importance to\n next batch so it can exploit the importance in order to\n create better features.", + "prompt_type": "plain" + }, + { + "output": "C) Then a tournament is performed amongst the\n individuals to get the best individuals to be passed on to the\n evolution phase. D) An \"EXTRA_FS\" model is added in case \"FS\" strategy (feature\n selection strategy) is chosen ( for high interpretability\n settings) and it replaces one of the above non-reference\n individuals. This special individual has features that are\n pre-pruned based on the permutation importance of\n the dataset. The Tuning stage leaderboard of an experiment lists all the wining\n individuals (i.e models that scored highest during the tournament). The summary zip artifact includes it as the\n tuning_leaderboard_simple.json or txt file. []\n5) Feature Evolution Phase: Evolution is competition between slowly\n mutating parameters to find best individuals . During\n evolution phase, we start off with the best individuals (highest\n score) from the tuning phase.", + "prompt_type": "plain" + }, + { + "output": "So first step\n is to either prune or add new individuals to create the desired\n population size. The evolution_begin_leaderboard_simple.json file\n lists these individuals (the unscored are the new added individuals\n to bring the population to the right size). A) Every iteration of the experiment, each individual creates a\n new model based on its genes. B) Population of individuals is trained on the training data,\n with early stopping if available. C) Population is scored for given metric, with bootstrapping if\n chosen (default). D) Tournament is performed amongst the\n individuals based on the selected strategy, to decide winning\n subset of population\n E) Mutate winning sub-population's Genes, Features, Model to\n fill-up the population back to pre-tournament size (asexual\n reproduction). In the genetic algorithm, Mutation involves\n adding, pruning, or perturbing genes.", + "prompt_type": "plain" + }, + { + "output": "The strategy for adding genes is based on\n balancing exploitation and exploration of importance of\n original variables. Genes are added that explore additional\n transformations for original variables with high importance. Genes are pruned based on the Information Gain Variable\n Importance for most models, for some like CUML RF, it is based\n upon Shapley Permutation Importance. Features are pruned when\n variable importance is below a certain threshold (based upon\n interpretability settings). See also\n Mutation strategies . F) Back to A...\n6) Ensembling and Final Scoring Pipeline creation: Ensemble the final\n models and build Final Pipeline for production with a MOJO and/or\n Python scoring pipelines . Notes:\n - Feature and Model Tuning leaderboard table lists a parameter\n called feature cost of a model. Feature cost is not equal to the\n number of features used in the model but is based on their\n complexity (or interpretability) i.e.", + "prompt_type": "plain" + }, + { + "output": "For example a low cost model\n may have greater number of more interpretable features than a high\n cost model (i.e. cost number != number of feature used). This\n parameter is used in the workflow during genetic algorithm to\n decide if need to reduce feature count given interpretability dial\n settings of the experiment. - Certain individuals in the Evolution Begin leaderboard table are\n unscored. This can happen if:\n - They violated some constraint on feature counts imposed for\n given choice of interpretability settings and so were\n changed, and the score no longer applies. - They were added at end to fill-up the needed total number of\n individuals in the population and hence have not been scored\n yet. - Also see additional details. Reading the Logs\nThe Experiment preview gives an estimate of the number of iterations\ndone and the total number of models(including cross validation models)\nthat are built during the various stages of the experiment.", + "prompt_type": "plain" + }, + { + "output": "INFO | Number of individuals: 8\n INFO | Estimated target transform tuning iterations: 2\n INFO | Estimated model and feature parameter tuning iterations: 4\n INFO | Estimated total (tuning + feature evolution) number of iterations: 16\n INFO | Estimated total (backend + tuning + feature evolution + final) number of models to train: 598\n INFO | Backend tuning: 0 model(s)\n INFO | Target transform tuning: 18 model(s)\n INFO | Model and feature tuning: 48 model(s)\n INFO | Feature pre-pruning: 0 model(s)\n INFO | Feature evolution: 528 model(s)\n INFO | Final pipeline: 3 model(s)\n INFO | ACCURACY [7/10]:\n INFO | - Training data size: *1,000 rows, 11 cols*\n INFO | - Feature evolution: *LightGBM*, *3-fold CV**, 2 reps*\n INFO | - Final pipeline: *LightGBM, averaged across 3-fold CV splits*\n INFO | \n INFO | TIME [2/10]:\n INFO | - Feature evolution: *8 individuals*, up to *10 iterations*\n INFO | - Early stopping: After *5* iterations of no improvement\n INFO | \n INFO | INTERPRETABILITY [8/10]:\n INFO | - Feature pre-pruning strategy: Permutation Importance FS\n INFO | - Monotonicity constraints: enabled\n INFO | - Feature engineering search space: [Interactions, Original]\n INFO | \n INFO | LightGBM models to train:\n INFO | - Target transform tuning: *18*\n INFO | - Model and feature tuning: *48*\n INFO | - Feature evolution: *528*\n INFO | - Final pipeline: *3*\nThis experiment creates only LightGBM models.", + "prompt_type": "plain" + }, + { + "output": "As this is a regression problem, target tuning is performed and 18\nmodels are created to decide the best\ntarget transformation for the dataset. This create\n3 models with 3 fold cross validation each with 2 repeats, i.e two\ndifferent views of the dataset (in train/valid split). This is done in\ntwo iterations. Next 4 iterations are be used for model and feature parameter tuning. This involves creation of approximately 8*3*2\n(individuals*folds*repeats) ~ 48 models. The output models from tuning stage undergo Feature Evolution by genetic\nalgorithm. The genetic algorithm is performed on 8 individuals\n(population size). The next 10 iterations are used for feature evolution\nand around (10 * 8/2[population subset] * (3*2) (foldcv*repeats) ~240\nnew models are scored. The upper limit to it is 528 models. Early\nstopping is performed if the scores do not improve after 5 iterations. The final pipeline is created with the a single individual with 3 fold\ncross validation. These estimates are based on Accuracy/Time/Interpretabilty dial\nsettings, types of models selected, and other expert settings for the\nexperiment.", + "prompt_type": "plain" + }, + { + "output": "WARNING| - Feature engineering search space: [CVCatNumEncode, CVTargetEncode, Frequent, Interactions, NumCatTE, OneHotEncoding, Original]\n DATA | LightGBMModel *default* feature->transformer map\n DATA | X_0 :['OriginalTransformer', 'CVTargetEncodeTransformer', 'OneHotEncodingTransformer', 'InteractionsTransformer']\n DATA | X_1 :['OriginalTransformer', 'CVTargetEncodeTransformer', 'OneHotEncodingTransformer', 'InteractionsTransformer']\n DATA | X_2 :['OriginalTransformer', 'CVTargetEncodeTransformer', 'OneHotEncodingTransformer']\n DATA | X_3 :['OriginalTransformer', 'CVTargetEncodeTransformer', 'OneHotEncodingTransformer']\n DATA | X_4 :['OriginalTransformer', 'CVTargetEncodeTransformer', 'OneHotEncodingTransformer']\n DATA | X_5 :['OriginalTransformer', 'CVTargetEncodeTransformer', 'OneHotEncodingTransformer']\n DATA | X_6 :['OriginalTransformer', 'CVTargetEncodeTransformer', 'OneHotEncodingTransformer']\n DATA | X_7 :['OriginalTransformer', 'CVTargetEncodeTransformer', 'OneHotEncodingTransformer']\n DATA | X_8 :['OriginalTransformer', 'CVTargetEncodeTransformer', 'OneHotEncodingTransformer']\n DATA | X_9 :['OriginalTransformer', 'CVTargetEncodeTransformer', 'OneHotEncodingTransformer']\nValidation splits creation.", + "prompt_type": "plain" + }, + { + "output": "In this example,\nFeature evolution stage will require 3 folds for cross validation and\nand two repeats i.e data views are done. The for final pipeline will\nalso perform 3 folds cv. After splitting the datasets in to folds for\ninternal validations, a Kolmogorov-Smirnov statistics is calculated to\nsee if the folds have similar distribution of data. INFO | Preparing validation splits...\n INFO | [Feature evolution (repeat 1)] Optimized fold splits: Target fold mean (target transformed) stddev: 0.01329 | means: [14.346849, 14.358292, 14.362315, 14.327351, 14.342845, 14.366349]\n INFO | Kolmogorov-Smirnov statistics for splits of fold 0: KstestResult(statistic=0.02176727625829422, pvalue=0.9998424722802827)\n INFO | Kolmogorov-Smirnov statistics for splits of fold 1: KstestResult(statistic=0.025154089621855738, pvalue=0.9981216923269776)\n INFO | Kolmogorov-Smirnov statistics for splits of fold 2: KstestResult(statistic=0.02074638356497427, pvalue=0.9999414082418556)\n INFO | [Feature evolution (repeat 2)] Optimized fold splits: Target fold mean (target transformed) stddev: 0.01793 | means: [14.3447695, 14.362441, 14.366518, 14.318932, 14.340719, 14.370607]\n INFO | Kolmogorov-Smirnov statistics for splits of fold 0: KstestResult(statistic=0.024698351045656434, pvalue=0.9985813106473687)\n INFO | Kolmogorov-Smirnov statistics for splits of fold 1: KstestResult(statistic=0.027531279405342373, pvalue=0.9937850958604381)\n INFO | Kolmogorov-Smirnov statistics for splits of fold 2: KstestResult(statistic=0.02358730544637591, pvalue=0.9993204937887651)\n INFO | [Final pipeline ] Optimized fold splits: Target fold mean (target transformed) stddev: 0.01329 | means: [14.346849, 14.358292, 14.362315, 14.327351, 14.342845, 14.366349]\n INFO | Kolmogorov-Smirnov statistics for splits of fold 0: KstestResult(statistic=0.02176727625829422, pvalue=0.9998424722802827)\n INFO | Kolmogorov-Smirnov statistics for splits of fold 1: KstestResult(statistic=0.025154089621855738, pvalue=0.9981216923269776)\n INFO | Kolmogorov-Smirnov statistics for splits of fold 2: KstestResult(statistic=0.02074638356497427, pvalue=0.9999414082418556)\n INFO | Feature engineering training / validation splits:\n INFO | split #1: 666 / 334 - target min -1.264726 / 0.766517, target mean: 14.346850 / 14.358292, target max: 27.710434 / 26.761804, target std: 4.981032 / 5.059986\n INFO | split #2: 667 / 333 - target min -1.264726 / 2.914631, target mean: 14.362315 / 14.327350, target max: 26.761804 / 27.710434, target std: 4.999868 / 5.022746\n INFO | split #3: 667 / 333 - target min 0.766517 / -1.264726, target mean: 14.342844 / 14.366349, target max: 27.710434 / 25.879954, target std: 5.037666 / 4.946448\n INFO | split #4: 666 / 334 - target min -1.264726 / 1.490552, target mean: 14.344769 / 14.362441, target max: 27.710434 / 25.997716, target std: 5.026847 / 4.968671\n INFO | split #5: 667 / 333 - target min -1.264726 / 1.101135, target mean: 14.366518 / 14.318931, target max: 26.492384 / 27.710434, target std: 4.981698 / 5.058766\n INFO | split #6: 667 / 333 - target min 1.101135 / -1.264726, target mean: 14.340719 / 14.370606, target max: 27.710434 / 26.492384, target std: 5.010135 / 5.002203\n INFO | Doing backend tuning on data of shape (666, 11) / (334, 11)\n INFO | Maximum number of rows (train or valid) for feature evolution: 667\n INFO | Final ensemble training / validation splits:\n INFO | split #1: 666 / 334 - target min -1.264726 / 0.766517, target mean: 14.346850 / 14.358292, target max: 27.710434 / 26.761804, target std: 4.981032 / 5.059986\n INFO | split #2: 667 / 333 - target min -1.264726 / 2.914631, target mean: 14.362315 / 14.327350, target max: 26.761804 / 27.710434, target std: 4.999868 / 5.022746\n INFO | split #3: 667 / 333 - target min 0.766517 / -1.264726, target mean: 14.342844 / 14.366349, target max: 27.710434 / 25.879954, target std: 5.037666 / 4.946448\n INFO | Maximum number of rows (train or valid) for final model/ensemble: 667\nThe transformations and genes applicable and the\ntournament style for the genetic algorithm for\nfeature evolution is registered.", + "prompt_type": "plain" + }, + { + "output": "INFO | Auto-tuning modeling backend: start. INFO | Backend candidate Job# 0 Name: LightGBMModel using GPU (if applicable) with Booster: lightgbm\n INFO | Backend candidate Job# 1 Name: LightGBMModel using CPU with Booster: lightgbm\n ...\n INFO | Auto-tuning modeling backend: end : Duration: 299.8936 s\nLeakage detection A model is run to determine the predictive power of\neach feature on the target. Then, a simple model is built on each\nfeature with significant variable importance. The models with high AUC\n(for classification) or R2 score (regression) are reported to the user\nas potential leak. INFO | Checking for leakage...\n ...\n INFO | Time for leakage check for training and None: 30.6861 [secs]\n INFO | No significant leakage detected in training data ( R2: 0.7957284 )\nTarget tuning is performed for regression problems to find the best\ndistribution (log, unit box, square root, etc.) of the target variable\nto optimize for scorer So 3 models with 6 fold cross validation in 2\niterations.", + "prompt_type": "plain" + }, + { + "output": "INFO | Tuned 18/18 target transform tuning models. Tuned [LIGHTGBM] Tuning []\n INFO | Target transform search: end : Duration: 389.6202 s\n INFO | Target transform: TargetTransformer_identity_noclip\nParameter and feature tuning stage starts from 3rd iteration and 4\niterations are spent in building ~48 models (8*3*2). 8 Individuals are built and made sure that the features included in the\nmodels satisfy the interpretablity conditions (see nfeatures_max and\nngenes_max). Also an additional FS individual is added during the 6th\niteration. See tuning phase for reference. Hence this stage\nbuilds greater than 48 models. INFO | Model and feature tuning scores (RMSE, less is better):\n INFO | Individual 0 : 1.638517 +/- 0.04910973 [Tournament: 1.638517 Model: LIGHTGBM Feature Cost: 17]\n INFO | Individual 1 : 1.638517 +/- 0.04910973 [Tournament: 1.638517 Model: LIGHTGBM Feature Cost: 17]\n INFO | Individual 2 : 1.638517 +/- 0.04910973 [Tournament: 1.638517 Model: LIGHTGBM Feature Cost: 17]\n INFO | Individual 3 : 1.643672 +/- 0.06142867 [Tournament: 1.643672 Model: LIGHTGBM Feature Cost: 14]\n INFO | Individual 4 : 1.66976 +/- 0.04171555 [Tournament: 1.66976 Model: LIGHTGBM Feature Cost: 13]\n INFO | Individual 5 : 1.683212 +/- 0.06572724 [Tournament: 1.683212 Model: LIGHTGBM Feature Cost: 14]\n INFO | Individual 6 : 1.690918 +/- 0.05417363 [Tournament: 1.690918 Model: LIGHTGBM Feature Cost: 16]\n INFO | Individual 7 : 1.692052 +/- 0.04037833 [Tournament: 1.692052 Model: LIGHTGBM Feature Cost: 17]\n INFO | Individual 8 : 2.080228 +/- 0.03523514 [Tournament: 2.080228 Model: LIGHTGBM Feature Cost: 13]\n INFO | Applying nfeatures_max and ngenes_max limits to tuning population\n INFO | Parameter tuning: end : Duration: 634.5521 s\n INFO | Prepare Feature Evolution\n INFO | Feature evolution has 0 brain cached individuals out of 8 individuals\n INFO | Making 1 new individuals during preparation for evolution\n INFO | Pre-pruning 1 gene(s) from 12 active base genes\n INFO | Starting search for statistically relevant features (FS scheme)\n INFO | FS Permute population of size 1 has 2 unique transformations that include: ['InteractionsTransformer', 'OriginalTransformer']\n INFO | Transforming FS train\n INFO | Using 2 parallel workers (1 parent workers) for fit_transform.", + "prompt_type": "plain" + }, + { + "output": "At the end of the 16th iteration, the experiment has not converged so\nthe Feature evolution is stopped. It is made sure that the features\nincluded in the models satisfy the interpretablity conditions and are\nless than the maximum allowed limits (see nfeatures_max and ngenes_max). Best individual and population is stored in the Driverless AI brain for\nrestart or refitting of the experiment. The best individual(s) is\nproceeded the next stage. INFO | Scored 283/310 models on 31 features. Last Scored [LIGHTGBM]\n INFO | Scores (RMSE, less is better):\n INFO | Individual 0 : 1.540669 +/- 0.07447481 [Tournament: 1.540669 Model: LIGHTGBM Feature Cost: 10]\n INFO | Individual 1 : 1.541396 +/- 0.07796533 [Tournament: 1.541396 Model: LIGHTGBM Feature Cost: 9]\n INFO | Individual 2 : 1.542085 +/- 0.07796533 [Tournament: 1.542085 Model: LIGHTGBM Feature Cost: 9]\n INFO | Individual 3 : 1.543484 +/- 0.07796533 [Tournament: 1.543484 Model: LIGHTGBM Feature Cost: 9]\n INFO | Individual 4 : 1.547386 +/- 0.08567484 [Tournament: 1.547386 Model: LIGHTGBM Feature Cost: 10]\n INFO | Individual 5 : 1.557151 +/- 0.08078833 [Tournament: 1.557151 Model: LIGHTGBM Feature Cost: 8]\n INFO | Individual 6 : 3.961817 +/- 0.08480774 [Tournament: 3.961817 Model: LIGHTGBM Feature Cost: 4]\n INFO | Individual 7 : 4.052189 +/- 0.05662354 [Tournament: 4.052189 Model: LIGHTGBM Feature Cost: 1]\n INFO | Best individual with LIGHTGBM model has 7 transformers creating 10 total features and 10 features for model: 1.540669 RMSE\n DATA | Top 10 variable importances of best individual:\n DATA | LInteraction LGain\n DATA | 0 3_X_3 1.000000\n DATA | 1 10_InteractionMul:X_0:X_1 0.570066\n DATA | 2 4_X_4 0.264919\n DATA | 3 10_InteractionAdd:X_0:X_1 0.225805\n DATA | 4 2_X_2 0.183059\n DATA | 5 0_X_0 0.130161\n DATA | 6 1_X_1 0.124281\n DATA | 7 10_InteractionDiv:X_0:X_1 0.032255\n DATA | 8 10_InteractionSub:X_0:X_1 0.013721\n DATA | 9 7_X_7 0.007424\n INFO | Experiment has not yet converged after 16 iteration(s).", + "prompt_type": "plain" + }, + { + "output": "After sampling expected population size: 1. INFO | Final population size after sampling: 1 (0 reference) with models_final=3 and num_ensemble_folds=3\n INFO | Final Model sampled population with population of 8 individuals (best scores=['1.540669'])\nIn iteration 17, three fold cross validation is performed on the final\nensemble model, a few checks are done on the features used, predictions\nand python and mojo scoring pipelines are created. Logs and summary\nartifacts are collected. INFO | Completed 3/3 final ensemble models. INFO | Model performance:\n INFO | fold: 0, model name: LightGBM, model iterations: 500, model transformed features: 10, total model time: 2.4198, fit+predict model time: 0.376, total pipeline time: 0.48786, fit pipeline time: 0.29738\n INFO | fold: 1, model name: LightGBM, model iterations: 500, model transformed features: 10, total model time: 3.343, fit+predict model time: 0.34681, total pipeline time: 0.43664, fit pipeline time: 0.24267\n INFO | fold: 2, model name: LightGBM, model iterations: 473, model transformed features: 10, total model time: 2.1446, fit+predict model time: 0.38534, total pipeline time: 0.41979, fit pipeline time: 0.23152\n INFO | Checking for shift in tuning model -> final model variable importances\n DATA | New features created only in final pipeline: Count: 0 List: []\n DATA | Extra features created in final pipeline compared to genetic algorithm population: Count: 0 List: []\n DATA | Missing features from final StackedEnsemble pipeline compared to genetic algorithm population: Count: 0 List: []\n INFO | Completed training of the final scoring pipeline\n INFO | Predictions and Scoring final pipeline...\n INFO | Scored 286/310 models on 31 features.", + "prompt_type": "plain" + }, + { + "output": "Reducing number of features for all models is applicable only when\n (one of below satisfied):\n - num. of columns, is greater than max_orig_cols_selected or,\n - num of non-numeric columns, is greater than\n max_orig_nonnumeric_cols_selected or,\n - num. of numeric columns, is greater than\n max_orig_numeric_cols_selected\n Given the above requirements for all models is not satisfied;\n reducing number of features only for the FS individual (EXTRA_FS)\n is applicable only when (one of below satisfied) :\n - num. of columns, is greater than fs_orig_cols_selected or,\n - num. of non-numeric columns, is greater than\n fs_orig_numeric_cols_selected or,\n - num. of numeric columns, is greater than\n fs_orig_nonnumeric_cols_selected\n See tuning phase and permutation importance . 2) Tuning Phase Model Origins:\n - SEQUENCE and DefaultIndiv: Feature transformations and model\n hyper-parameters are chosen at random from the basic\n transformation sets and parameter lists as suggested by\n internal proprietary data science recipes.", + "prompt_type": "plain" + }, + { + "output": "- model_origin as RANDOM allows both features and model\n hyper-parameters to call their mutate lists or functions. - model_origin as EXTRA_FS is for the extra individuals added\n through Feature Selection(FS) based on permutation importance. - model_origin as REF# denotes for reference individuals\n provided as a baseline(eg. ConstantModel). - model_origin as GLM_OHE denotes features generated by GLM +\n OHE. 3) Driverless AI Brain: During an experiment building, Brain caches the\n best iterations, parameters, models, genes and populations. These\n are used for informed lookups, cross overs during mutation,\n restarts and refits of experiment. For\n details see feature_brain_level . 4) Mutation strategy: Strategy to apply when doing mutations on\n transformers :\n - Sample mode is default, with tendency to sample transformer\n parameters. - Batched mode tends to do multiple types of the same\n transformation together.", + "prompt_type": "plain" + }, + { + "output": "5) Mutation via custom recipe: Users can control and specify their own\n mutation strategy and the list of parameters to mutate on, by\n writing their own custom python code and hooking it up with the\n inbuilt Driverless AI Genetic Algorithm. Here is an example of such\n a recipe. The get_one function passes on the list of values to\n genetic algorithm or Optuna for that parameter. Reach out to\n support@h2o.ai if need more help with writing your own\n custom recipies . 6) Optuna: Driverless AI supports Optuna for model hyperparameter\n tuning during the Tuning phase of an experiment. Optuna\n employs a Bayesian optimization algorithm called Tree-structured\n Parzen Estimator for hyperparameter optimization. For details see\n enable_genetic_algorithm and tournament_style . When Optuna is selected then, model hyperparameters are tuned with\n Optuna and genetic algorithm is\n used for feature engineering.", + "prompt_type": "plain" + }, + { + "output": "Modifying Datasets\nViewing dataset details\nTo view a summary of a dataset or to preview the dataset, click on the\ndataset or select the [Click for Actions] button next to the dataset\nthat you want to view and select Details from the submenu that appears. This opens the Dataset Details page, which provides a summary of the\ndataset that lists each of the dataset's columns and displays\naccompanying rows for column name, feature engineering type\n(categorical, date, datetime, ID, numerical, text, or image), storage\ntype (integer, string, real, boolean, or time), count, number of missing\nvalues, mean, minimum, maximum, standard deviation, frequency, and\nnumber of unique values. Hover over the top of a column to view a summary of the first 20 rows of\nthat column. To view information for a specific column, type the column\nname in the field above the graph. To switch the view and preview the dataset, click the Dataset Rows\nbutton in the top right portion of the UI. Click the Dataset Overview\nbutton to return to the original view.", + "prompt_type": "plain" + }, + { + "output": "These are the same options that are available\nfrom the Datasets page. []\nChange column type\nDriverless AI also lets you change a column's type. If a column's data\ntype or distribution does not match the manner in which you want the\ncolumn to be handled during an experiment, changing the Logical Type can\nhelp to make the column fit better. For example, an integer zip code can\nbe changed into a categorical so that it is only used with\ncategorical-related feature engineering. For Date and Datetime columns,\nuse the Format option. To change the Logical Type or Format of a column,\nclick on the group of square icons located to the right of the words\nAuto-detect. (The squares light up when you hover over them with your\ncursor.) Then select the new column type for that column. Modify by custom data recipe\nThe option to create a new dataset by modifying an existing dataset with\ncustom recipes is also available from this page. Scoring pipelines can\nbe created on the new dataset by building an experiment.", + "prompt_type": "plain" + }, + { + "output": "For example, you\ncan change the target column from regression to classification, add a\nweight column to mark specific training rows as being more important, or\nremove outliers that you do not want to model on. Refer to the\ncustom_recipes_data_recipes section for more information. Click the Modify by Recipe drop-down menu in the top right portion of\nthe UI and select from the following options:\n- Data Recipe URL: Load a custom recipe from a URL to use to modify\n the dataset. The URL must point to either an HTML or raw version of\n the file, a GitHub repository or tree, or a local file. Sample\n custom data recipes are available in the\n driverlessai-recipes repository . - Upload Data Recipe: If you have a custom recipe available on your\n local system, click this button to upload that recipe. - Live Code: Manually enter custom recipe code that is used to modify\n the dataset. Click the Get Preview button to preview the code's\n effect on the dataset, then click Apply to create a new dataset.", + "prompt_type": "plain" + }, + { + "output": "- Apply Existing Data Recipe: Apply an existing data recipe to the\n dataset. For more information on adding recipes, see custom-recipes. Notes:\n- These options are enabled by default. You can disable them by\n removing recipe_file and recipe_url from the enabled_file_systems\n configuration option. - Modifying a dataset with a recipe does not overwrite the original\n dataset. The dataset that is selected for modification remains in\n the list of available datasets in its original form, and the\n modified dataset appears in this list as a new dataset. - Changes made to the original dataset through this feature are not\n applied to any new data that is scored. - Due to locale, parsing a datetime column with Live Code or a Data\n Recipe may result in an error or return different results when\n compared to running the same code outside of DAI. The following\n example illustrates the issue that might occur with certain datetime\n formats and describes how you can convert them so that they are\n accepted by DAI:\nRename datasets\nIn Driverless AI, you can rename datasets from the Datasets Overview\npage.", + "prompt_type": "plain" + }, + { + "output": "Exporting Artifacts\nIn some cases, you might find that you do not want your users to\ndownload artifacts directly to their machines. Driverless AI provides\nseveral configuration options/environment variables that enable\nexporting of artifacts instead of downloading. Artifacts can be exported\nto a file system directory, an Amazon S3 bucket, a Bitbucket repository,\nor Azure Blob storage. Note: The option to download artifacts is automatically disabled when\nexporting is enabled. Enabling Artifact Exports\nThe config.toml file exposes the following variables:\n- enable_artifacts_upload: Replace all the downloads on the experiment\n page to exports, and lets users push to the artifact store with\n artifacts_store. This is disabled by default. - artifacts_store: Specify one of the following storage methods:\n - file_system: Store artifacts in the file system directory\n specified by the artifacts_file_system_directory setting. - S3: Store artifacts in the S3 bucket specified by the\n artifacts_s3_bucket setting.", + "prompt_type": "plain" + }, + { + "output": "- azure: Store artifacts in Azure Blob storage. Specify the following for the storage method you selected:\nFile System Directory\n- artifacts_file_system_directory: The file system location where\n artifacts will be copied. This is expected to be a directory on your\n server. AWS S3\n- artifacts_s3_bucket: The AWS S3 bucket where artifacts will be\n stored. Bitbucket\n- bitbucket_skip_cert_verification: Specify whether to skip\n certificate verification for Bitbucket when using a repository with\n HTTPS. This is disabled by default. - bitbucket_tmp_relative_dir: Specify a local temporary directory to\n clone artifacts to (relative to data_directory). Azure Blob Storage\n- artifacts_azure_blob_account_name: Specify your Azure Blob Storage\n account name. - artifacts_azure_blob_account_key: Specify your Azure Blob Storage\n account key. - artifacts_azure_connection_string: Specify your Azure Blob Storage\n connection string. - artifacts_azure_sas_token: Specify your Azure Blob Storage shared\n access signatures (SAS) token.", + "prompt_type": "plain" + }, + { + "output": "enable_dataset_downloading`` configuration option, which is set to", + "prompt_type": "plain" + }, + { + "output": "trueby default. Set this tofalse`` if you do not want users to download\ndatasets to their local machine. There is currently no configuration\noption that enables exporting datasets to a file system. Docker Image Installs\nThe following example shows how to enable artifact exporting to a file\nsystem when starting the Driverless AI Docker image. docker run \\\n --pid=host \\\n --rm \\\n --shm-size=256m \\\n -e DRIVERLESS_AI_ENABLE_ARTIFACTS_UPLOAD=\"true\" \\\n -e DRIVERLESS_AI_ARTIFACTS_STORE=\"file_system\" \\\n -e DRIVERLESS_AI_ARTIFACTS_FILE_SYSTEM_DIRECTORY=\"tmp\" \\\n -u `id -u`:`id -g` \\\n -p 12345:12345 \\\n -v `pwd`/data:/data \\\n -v `pwd`/log:/log \\\n -v `pwd`/license:/license \\\n -v `pwd`/tmp:/tmp \\\n h2oai/dai-ubi8-x86_64:|tag|\nNative Installs\nThe following example shows how to enable artifact exporting to a file\nsystem on native installs. 1. Export the Driverless AI config.toml file or add it to ~/.bashrc. For example:\n # DEB and RPM\n export DRIVERLESS_AI_CONFIG_FILE=\"/etc/dai/config.toml\"\n # TAR SH\n export DRIVERLESS_AI_CONFIG_FILE=\"/path/to/your/unpacked/dai/directory/config.toml\"\n 1.", + "prompt_type": "plain" + }, + { + "output": "Save your changes when you are done. # Replace all the downloads on the experiment page to exports and allow users to push to the artifact store configured with artifacts_store\n enable_artifacts_upload = true\n # Artifacts store. # file_system: stores artifacts on a file system directory denoted by artifacts_file_system_directory. #\n artifacts_store = \"file_system\"\n # File system location where artifacts will be copied in case artifacts_store is set to file_system\n artifacts_file_system_directory = \"tmp\"\n 1. Start Driverless AI. Note that the command used to start\n Driverless AI varies depending on your install type. # Deb or RPM with systemd (preferred for Deb and RPM):\n # Start Driverless AI. sudo systemctl start dai\n # Deb or RPM without systemd:\n # Start Driverless AI. sudo -H -u dai /opt/h2oai/dai/run-dai.sh\n # Tar.sh\n # Start Driverless AI\n ./run-dai.sh\nExporting an Artifact\nWhen the export artifacts options are enabled/configured, the menu\noptions on the completed_experiment page will change.", + "prompt_type": "plain" + }, + { + "output": "AutoDoc Custom Template Placeholders\nThe following sections describe placeholders for AutoDoc's custom\ntemplate feature. Using placeholders\nYou can customize the content that appears in an AutoDoc report by using\nplaceholders. When you insert a placeholder into a template, the content\nunique to that specific placeholder appears in the generated report in\nthe location where you inserted it. A placeholder is defined as follows:\n {{p section.render('placeholder_name')}}\nThe following example shows how to define the Experiment Overview.DAI\nExperiment Pipeline Column Types placeholder:\n {{p section.render('Experiment Overview.DAI Experiment Pipeline Column Types')}}\nList of placeholders\nThe following is a list of available placeholders categories:\n- placeholders_experiment_overview\n- placeholders_data_overview\n- placeholders_methodology\n- placeholders_data_sampling\n- placeholders_validation\n- placeholders_feature_evolution\n- placeholders_feature_transformations\n- placeholders_final_model\n- placeholders_glm\n- placeholders_literature\n- placeholders_mli\n- placeholders_model_tuning\n- placeholders_nlp\n- placeholders_pdp\n- placeholders_appendix\nExperiment Overview\nPlaceholders related to the Experiment Overview:\n -----------------------------------------------------------------------\n Name Description\n ----------------------------------- -----------------------------------\n Experiment Overview.DAI Experiment A table with different column types\n Pipeline Column Types and type descriptions for DAI\n Experiment Overview.DAI Experiment A table of the DAI time series\n Pipeline Time Series settings and definitions for each\n setting\n Experiment Overview.DAI GPU A sentence indicating whether DAI\n Specifications used available GPUs\n Experiment Overview.DAI Intro Model An introductory paragraph on the\n Goal scorer the model is trying to\n optimize\n Experiment Overview.DAI Iterative A section describing the different\n Tuning iterative steps in the DAI\n experiment pipeline (that is,\n model, feature, target tuning, and\n feature evolution)\n Experiment Overview.DAI Validation A documentation-type section that\n Schema Options defines the different types of\n validation strategies available to\n the user\n Experiment Overview.Performance A summary performance table.", + "prompt_type": "plain" + }, + { + "output": "This\n placeholder is used in the standard\n AutoDoc. The content is similar to\n Data Overview.DAI Training Data\n Detailed but has less descriptive\n text and does not include\n information about missing values\n -----------------------------------------------------------------------\nMethodology\nPlaceholders related to Methodology:\n -----------------------------------------------------------------------\n Name Description\n ----------------------------------- -----------------------------------\n Methodology.Assumptions A high-level overview of DAI's\n assumptions and limitations. This\n section includes details about\n whether a shift was detected\n between datasets\n Methodology.DAI Assumptions A section describing whether a user\n Detailed provided a validation dataset and\n whether a shift in distribution\n between datasets was detected.", + "prompt_type": "plain" + }, + { + "output": "Note, permutation feature\n importance must be enabled in the\n AutoDoc expert settings for this\n section to render information\n Feature Transformations.template This template is used to call\n placeholders: Feature\n Transformation.Intro, Feature\n Transformations.Permutation Feature\n Importance, NLP.DAI NLP Detail\n -----------------------------------------------------------------------\nFinal Model\nPlaceholders related to the Final Model:\n -----------------------------------------------------------------------\n Name Description\n ----------------------------------- -----------------------------------\n Final Model.DAI All Feature This placeholder is designed to go\n Transformations in an Appendix section.", + "prompt_type": "plain" + }, + { + "output": "Final Model.DAI Final Model A table with the final model's\n Performance Table performance across available\n scorers\n Final Model.DAI Final Model This template is meant to be called\n Performance Text directly after the Experiment\n Overview.DAI Iterative Tuning\n placeholder. This placeholder\n includes a short paragraph about\n final model selection and a\n performance table\n Final Model.DAI Model and Component This section includes the model\n Table component table (i.e., this\n placeholder calls the Final\n Model.DAI Final Model Components\n Table), which shows information\n like the model type, model weight,\n number of folds, etc.", + "prompt_type": "plain" + }, + { + "output": "This placeholder is\n called by the Final Model.DAI Loss\n Function placeholder\n Final Model.DAI Model Package A table that provides the algorithm\n Description name, package name, version of the\n package and the packages primary\n documentation string. This\n placeholder is called by the Final\n Model.DAI Model Components\n placeholder\n Final Model.DAI Models Evaluated A table with the algorithms\n Table available in DAI and the reason an\n algorithm was or wasn't selected\n for the final model. This\n placeholder is called by the Final\n Model.DAI Model Components\n placeholder\n Final Model.Pipeline Overview This placeholder is called by the\n Final Model.Pipeline placeholder\n and shows a table of the final\n model components.", + "prompt_type": "plain" + }, + { + "output": "Note the local\n interpretation based plots and\n table require that the user\n specifies individual records of\n interest with the Python client's\n individual_rows parameter\n MLI.KLIME Plot A description of kLIME with the\n kLIME plot\n MLI.KLIME Reason Code Text A documentation-type section that\n describes kLIME reason codes\n MLI.Local Interpretability Row This placeholder is only available\n Information if the user-specified\n individual_rows are provided. This\n placeholder is called by the DAI\n MLI Section placeholder\n MLI.Surrogate DT The surrogate Decision Tree plot.", + "prompt_type": "plain" + }, + { + "output": "This\n template is specific to the\n standard AutoDoc\n -----------------------------------------------------------------------\nNatural Language Processing (NLP)\nPlaceholders related to Natural Language Processing (NLP):\n -----------------------------------------------------------------------\n Name Description\n ----------------------------------- -----------------------------------\n NLP.DAI NLP Detail Similar to DAI NLP Assumption, but\n includes information about NLP\n transformer sampling and\n limitations and does not\n distinguish between image and NLP\n transformers (i.e., you will see\n NLP/Image in the body text of this\n sub template).", + "prompt_type": "plain" + }, + { + "output": "This sub\n template includes additional\n explanations about sensitivity\n analysis in general and shows a\n records original feature values\n along with the ICE overlaid PDP. This template expects a user to\n pass in the individual_rows\n parameter to the Python client with\n records of interest\n Partial Dependence Plots.template A section describing how partial\n dependence plots work and showing\n the partial dependence plots. This\n section is used in the standard\n AutoDoc template\n -----------------------------------------------------------------------\nAppendix\nPlaceholders related to the Appendix:\n -----------------------------------------------------------------------\n Name Description\n ----------------------------------- -----------------------------------\n Appendix.DAI Performance Metrics A glossary of DAI performance\n metrics\n Appendix.DAI References A reference for the standard\n AutoDoc.", + "prompt_type": "plain" + }, + { + "output": "Appendix.PSI_Appendix The table used to calculate PSI\n Appendix.Response_Rates_Appendix The quantile-base plots calculation\n table. Appendix.template This template points to the\n Appendix.PSI,\n Appendix.Response_Rates_Appendix,\n and the Appendix.NLP Appendix. If\n the final model is or includes a\n GLM this section also include the\n full GLM coefficients tables and\n the documentation on how to\n understand the GLM coefficients\n table. If a user has set the\n AutoDoc to show all configurations,\n the full configuration table will\n be shown in the appendix.", + "prompt_type": "plain" + }, + { + "output": "Adding datasets\nYou can add datasets using one of the following methods:\nDrag and drop files from your local machine directly onto this page. Note that this method currently works for files that are less than 10\nGB. or\nClick the Add Dataset (or Drag & Drop) button to upload or add a\ndataset. Notes:\n- Upload File, File System, HDFS, S3, Data Recipe URL, and Upload Data\n Recipe are enabled by default. These can be disabled by removing\n them from the enabled_file_systems setting in the config.toml file. (Refer to Using the config.toml file section for more information.) - If File System is disabled, Driverless AI will open a local\n filebrowser by default. - If Driverless AI was started with data connectors enabled for Azure\n Blob Store, BlueData Datatap, Google Big Query, Google Cloud\n Storage, KDB+, Minio, Snowflake, or JDBC, then these options will\n appear in the Add Dataset (or Drag & Drop) dropdown menu. Refer to\n the Enabling Data Connectors section for more information.", + "prompt_type": "plain" + }, + { + "output": "Choosing an Install Method\n\nConsider the following when choosing between the AWS Marketplace and AWS\nCommunity AMIs:\n\nDriverless AI AWS Marketplace AMI\n\n- Native (Debian) install based\n- Certified by AWS\n- Will typically lag behind our standard releases, and may require\n updates to work with the latest versions of Driverless AI\n- Features several default configurations like default password and\n HTTPS configuration, which are required by AWS\n\nDriverless AI AWS Community AMI\n\n- Docker based\n- Not certified by AWS\n- Will typically have an up-to-date version of Driverless AI for both\n LTS and latest stable releases\n- Base Driverless AI installation on Docker does not feature preset\n configurations", + "prompt_type": "plain" + }, + { + "output": "included_transformers------------------------- .. container:: dropdown **Include Specific Transformers** Select the :ref:`transformer(s) ` that you want to use in the experiment. Use the **Check All**/**Uncheck All** button to quickly add or remove all transfomers at once. **Note**: If you uncheck all transformers so that none is selected, Driverless AI will ignore this and will use the default list of transformers for that experiment. This list of transformers will vary for each experiment. The equivalent config.toml parameter isincluded_transformers``.", + "prompt_type": "plain" + }, + { + "output": "included_scorers``\n\nInclude Specific Scorers\n\nSpecify the scorer(s) that you want Driverless AI to include when\nrunning the experiment.", + "prompt_type": "plain" + }, + { + "output": "included_pretransformers---------------------------- .. container:: dropdown **Include Specific Preprocessing Transformers** Specify which :ref:`transformers ` to use for preprocessing before other transformers are activated. Preprocessing transformers can take any original features and output arbitrary features that are used by the normal layer of transformers. **Notes**: - Preprocessing transformers and all other layers of transformers are part of the Python and (if applicable) MOJO scoring packages. - Any :ref:`custom transformer recipe ` or native DAI transformer can be used as a preprocessing transformer. For example, a preprocessing transformer can perform interactions, string concatenations, or date extractions as a preprocessing step before the next layer of Date and DateTime transformations are performed. Caveats: 1) one cannot currently do a time-series experiment on a time_column that hasn't yet been made (setup of experiment only knows about original data, not transformed).", + "prompt_type": "plain" + }, + { + "output": "num_pipeline_layers----------------------- .. container:: dropdown **Number of Pipeline Layers** Specify the number of pipeline layers. This value defaults to 1. The equivalent config.toml parameter isnum_pipeline_layers``.\n\n Note: This does not include the preprocessing layer specified by the\n included_pretransformers expert setting.", + "prompt_type": "plain" + }, + { + "output": "included_datas------------------ .. container:: dropdown **Include Specific Data Recipes During Experiment** Specify whether to include specific data recipes during the experiment. Avoids need for separate data preparation step, builds data preparation within experiment and within python scoring package. But Mojo will require data preparation applied before making predictions. The equivalent config.toml parameter isincluded_datas``.", + "prompt_type": "plain" + }, + { + "output": "included_individuals------------------------ .. container:: dropdown **Include Specific Individuals** In Driverless AI, every completed experiment automatically generates Python code for the experiment that corresponds to the individual(s) used to build the final model. You can edit this auto-generated Python code offline and upload it as a recipe, or edit and save it using the built-in :ref:`custom recipe management editor `. This feature gives you code-first access to a significant portion of DAI's internal transformer and model generation process. This expert setting lets you do one of the following: - Leave this field empty to have all individuals be freshly generated and treated by DAI's AutoML as a container of model and transformer choices. - Select recipe display names of custom individuals through the UI. If the number of included custom individuals is less than DAI needs, then the remaining individuals are freshly generated.", + "prompt_type": "plain" + }, + { + "output": "threshold_scorer``\n\nScorer to Optimize Threshold to Be Used in Other Confusion-Matrix Based\nScorers (For Binary Classification)\n\nSpecify the scorer used to optimize the binary probability threshold\nthat is being used in related Confusion Matrix based scorers such as\nPrecision, Recall, FalsePositiveRate, FalseDiscoveryRate,\nFalseOmissionRate, TrueNegativeRate, FalseNegativeRate, and\nNegativePredictiveValue. Select from the following:\n\n- Auto (Default): Use this option to sync the threshold scorer with\n the scorer used for the experiment. If this is not possible, F1 is\n used.\n- F05 More weight on precision, less weight on recall.\n- F1: Equal weight on precision and recall.\n- F2: Less weight on precision, more weight on recall.\n- MCC: Use this option when all classes are equally important.", + "prompt_type": "plain" + }, + { + "output": "prob_add_genes``\n\nProbability to Add Transformers\n\nSpecify the unnormalized probability to add genes or instances of\ntransformers with specific attributes. If no genes can be added, other\nmutations are attempted. This value defaults to 0.5.", + "prompt_type": "plain" + }, + { + "output": "prob_addbest_genes``\n\nProbability to Add Best Shared Transformers\n\nSpecify the unnormalized probability to add genes or instances of\ntransformers with specific attributes that have shown to be beneficial\nto other individuals within the population. This value defaults to 0.5.", + "prompt_type": "plain" + }, + { + "output": "prob_prune_genes``\n\nProbability to Prune Transformers\n\nSpecify the unnormalized probability to prune genes or instances of\ntransformers with specific attributes. This value defaults to 0.5.", + "prompt_type": "plain" + }, + { + "output": "prob_perturb_xgb``\n\nProbability to Mutate Model Parameters\n\nSpecify the unnormalized probability to change model hyper parameters.\nThis value defaults to 0.25.", + "prompt_type": "plain" + }, + { + "output": "prob_prune_by_features``\n\nProbability to Prune Weak Features\n\nSpecify the unnormalized probability to prune features that have low\nvariable importance instead of pruning entire instances of\ngenes/transformers. This value defaults to 0.25.", + "prompt_type": "plain" + }, + { + "output": "skip_transformer_failures``\n\nWhether to Skip Failures of Transformers\n\nSpecify whether to avoid failed transformers. This is enabled by\ndefault.", + "prompt_type": "plain" + }, + { + "output": "skip_model_failures``\n\nWhether to Skip Failures of Models\n\nSpecify whether to avoid failed models. Failures are logged according to\nthe specified level for logging skipped failures. This is enabled by\ndefault.", + "prompt_type": "plain" + }, + { + "output": "detailed_skip_failure_messages_level``\n\nLevel to Log for Skipped Failures\n\nSpecify one of the following levels for the verbosity of log failure\nmessages for skipped transformers or models:\n\n- 0 = Log simple message\n- 1 = Log code line plus message (Default)\n- 2 = Log detailed stack traces", + "prompt_type": "plain" + }, + { + "output": "notify_failures------------------- .. container:: dropdown **Whether to Notify About Failures of Transformers or Models or Other Recipe Failures** Specify whether to display notifications in the GUI about recipe failures. This is enabled by default. The equivalent config.toml parameter isnotify_failures``.", + "prompt_type": "plain" + }, + { + "output": "acceptance_test_timeout``\n\nTimeout in Minutes for Testing Acceptance of Each Recipe\n\nSpecify the number of minutes to wait until a recipe's acceptance\ntesting is aborted. A recipe is rejected if acceptance testing is\nenabled and it times out. This value defaults to 20.0.", + "prompt_type": "plain" + }, + { + "output": "Experiment Settings\nThis section describes the settings that are available when running an\nexperiment. Display Name\nOptional: Specify a display name for the new experiment. There are no\ncharacter or length restrictions for naming. If this field is left\nblank, Driverless AI will automatically generate a name for the\nexperiment. Dropped Columns\nDropped columns are columns that you do not want to be used as\npredictors in the experiment. Note that Driverless AI will automatically\ndrop ID columns and columns that contain a significant number of unique\nvalues (above max_relative_cardinality in the config.toml file or Max. allowed fraction of uniques for integer and categorical cols in Expert\nsettings). Validation Dataset\nThe validation dataset is used for tuning the modeling pipeline. If\nprovided, the entire training data will be used for training, and\nvalidation of the modeling pipeline is performed with only this\nvalidation dataset. When you do not include a validation dataset,\nDriverless AI will do K-fold cross validation for I.I.D.", + "prompt_type": "plain" + }, + { + "output": "For this reason it is not generally recommended to include a validation\ndataset as you are then validating on only a single dataset. Note that\ntime series experiments cannot be used with a validation dataset:\nincluding a validation dataset will disable the ability to select a time\ncolumn and vice versa. This dataset must have the same number of columns (and column types) as\nthe training dataset. Also note that if provided, the validation set is\nnot sampled down, so it can lead to large memory usage, even if\naccuracy=1 (which reduces the train size). Test Dataset\nThe test dataset is used for testing the modeling pipeline and creating\ntest predictions. The test set is never used during training of the\nmodeling pipeline. (Results are the same whether a test set is provided\nor not.) If a test dataset is provided, then test set predictions will\nbe available at the end of the experiment. Weight Column\nOptional: Column that indicates the observation weight (a.k.a. sample or\nrow weight), if applicable.", + "prompt_type": "plain" + }, + { + "output": "Rows with higher weights have higher importance. The weight affects\nmodel training through a weighted loss function and affects model\nscoring through weighted metrics. The weight column is not used when\nmaking test set predictions, but a weight column (if specified) is used\nwhen computing the test score. Note: The weight column is not used as a feature in modeling. Fold Column\nOptional: Rows with the same value in the fold column represent groups\nthat should be kept together in the training, validation, or\ncross-validation datasets. This can prevent data leakage and improve\ngeneralization for data that is naturally grouped and not i.i.d. (identically and independently distributed). This column must be an\ninteger or categorical variable, and it cannot be specified if a\nvalidation set is used or if a Time Column is specified. By default, Driverless AI assumes that the dataset is i.i.d. and creates\nvalidation datasets randomly for regression or with stratification of\nthe target variable for classification.", + "prompt_type": "plain" + }, + { + "output": "This can prevent data leakage and improve generalization. For example,\nwhen viewing data for a pneumonia dataset, person_id would be a good\nFold Column. This is because the data may include multiple diagnostic\nsnapshots per person, and we want to ensure that the same person\u2019s\ncharacteristics show up only in either the training or validation\nframes, but not in both to avoid data leakage. This column must be an integer or categorical variable and cannot be\nspecified if a validation set is used or if a Time Column is specified. Note: The fold column is not used as a feature in modeling. Time Column\nOptional: Specify a column that provides a time order (time stamps for\nobservations), if applicable. This can improve model performance and\nmodel validation accuracy for problems where the target values are\nauto-correlated with respect to the ordering (per time-series group). The values in this column must be a datetime format understood by\npandas.to_datetime(), like \"2017-11-29 00:30:35\" or \"2017/11/29\", or\ninteger values.", + "prompt_type": "plain" + }, + { + "output": "If a time column is found, feature engineering and model\nvalidation will respect the causality of time. If [OFF] is selected, no\ntime order is used for modeling and data may be shuffled randomly (any\npotential temporal causality will be ignored). When your data has a date column, then in most cases, specifying [AUTO]\nfor the Time Column will be sufficient. However, if you select a\nspecific date column, then Driverless AI will provide you with an\nadditional side menu. From this side menu, you can specify Time Group\ncolumns or specify [Auto] to let Driverless AI determine the best time\ngroup columns. You can also specify the columns that will be unavailable\nat prediction time (see ucapt for more information), the Forecast\nHorizon (in a unit of time identified by Driverless AI), and the Gap\nbetween the train and test periods. Refer to time-series-in-dai for more information about time series\nexperiments in Driverless AI and to see a time series example. []\nNotes:\n- Engineered features will be used for MLI when a time series\n experiment is built.", + "prompt_type": "plain" + }, + { + "output": "- A Time Column cannot be specified if a Fold Column is specified. This is because both fold and time columns are only used to split\n training datasets into training/validation, so once you split by\n time, you cannot also split with the fold column. If a Time Column\n is specified, then the time group columns play the role of the fold\n column for time series. - A Time Column cannot be specified if a validation dataset is used. - A column that is specified as being unavailable at prediction time\n will only have lag-related features created for (or with) it. - Unavailable Columns at Time of Prediction will only have lag-related\n features created for (or with) it, so this option is only used when\n time-series-lag-based-recipe is enabled. Accuracy, Time, and Interpretability Knobs\nThe experiment preview describes what the Accuracy, Time, and\nInterpretability settings mean for your specific experiment. This\npreview automatically updates when any of the experiment's settings\nchange (including the knobs).", + "prompt_type": "plain" + }, + { + "output": "Usually\n achieved through the use of larger data (less sampling), more\n modeling effort (more tuning, higher accuracy settings), more\n statistical calculations (cross-validation, bootstrapping). Doesn't always mean that the final model is better, but generally\n means that the final estimate is more accurate. If in doubt, trust\n the results of the experiment with higher accuracy settings. - The Time knob stands for relative time tolerance: Higher values\n generally lead to longer run times. Indicates patience to wait for\n convergence of the experiment score. Larger values mean higher\n chance of getting a better model. If it takes too long, just click\n on 'Finish' button and it will finish the experiment as if\n convergence was achieved. - The Interpretability knob stands for relative interpretability:\n Higher values favor more interpretable models (e.g. linear models,\n decision trees, single models) with less complex feature\n engineering (fewer features, simple features).", + "prompt_type": "plain" + }, + { + "output": "neural networks, GBMs, ensembles) and\n more complex feature pipelines (more features, higher-order\n interaction features). Note\n- You can manually select individual features to force into an\nexperiment\u2014regardless of Accuracy, Time, and Interpretability\nlevels\u2014with the Features to Force In expert setting. - To adjust the lowest allowed variable importance that features can\nhave before being dropped, use the\nLowest Allowed Variable Importance at Interpretability 10 \nexpert setting. [Accuracy, Time, and Interpretability Knobs]\n[Experiment Preview]\nAccuracy\nAs accuracy increases, Driverless AI gradually adjusts the method for\nperforming the evolution and ensemble. At low accuracy, Driverless AI\nvaries features and models, but they all compete evenly against each\nother. At higher accuracy, each independent main model will evolve\nindependently and be part of the final ensemble as an ensemble over\ndifferent main models.", + "prompt_type": "plain" + }, + { + "output": "Finally, at highest accuracies, Driverless AI\nperforms both model and feature tracking and ensembles all those\nvariations. Changing this value affects the feature evolution and final pipeline. Note: A check for a shift in the distribution between train and test is\ndone for accuracy >= 5. Training data size: Displays the number of rows and columns in the\ntraining data. Feature evolution: This represents the algorithms used to create the\nexperiment. If a test set is provided without a validation set, then\nDriverless AI will perform a 1/3 validation split during the experiment. If a validation set is provided, then the experiment will perform\nexternal validation. Final pipeline: This represents the number of models and the validation\nmethod used in the final pipeline. For ensemble modeling, information\nabout how models are combined is also shown here. Time\nThis specifies the relative time for completing the experiment (that is,\nhigher settings take longer). Feature Brain Level: Displays the feature brain level for the\nexperiment.", + "prompt_type": "plain" + }, + { + "output": "Feature evolution: Displays the number of individuals and maximum number\nof iterations that will be run in this experiment. Early stopping: Early stopping will take place if the experiment doesn't\nimprove the score for the specified amount of iterations. Interpretability\nSpecify the relative interpretability for this experiment. Higher values\nfavor more interpretable models. Changing the interpretability level\naffects the feature pre-pruning strategy, monotonicity constraints, and\nthe feature engineering search space. Feature pre-pruning strategy: This represents the feature selection\nstrategy (to prune-away features that do not clearly give improvement to\nmodel score). Strategy = \u201cPermutation Importance FS\u201d if interpretability\n>= 6; otherwise strategy is None. Monotonicity constraints: If Monotonicity Constraints are enabled, the\nmodel will satisfy knowledge about monotonicity in the data and monotone\nrelationships between the predictors and the target variable. For\nexample, in house price prediction, the house price should increase with\nlot size and number of rooms, and should decrease with crime rate in the\narea.", + "prompt_type": "plain" + }, + { + "output": "Depending on the correlation, Driverless AI will assign positive,\nnegative, or no monotonicity constraints. Monotonicity is enforced if\nthe absolute correlation is greater than 0.1. All other predictors will\nnot have monotonicity enforced. For more information, see mc. Note: Monotonicity constraints are used in XGBoost GBM, XGBoost Dart,\n LightGBM, and Decision Tree models. Feature engineering search space: This represents the transformers that\nwill be used during the experiment. [...] Models to Train\nFor the listed models:\n Model and feature tuning: Represents the number of validation splits\n multiplied by the tuning population size. Feature evolution: Represents the number of models trained in order to\n evaluate engineered features. Final pipeline: Represents the number of final models. Per-model hyperparameter optimization trials:\n - evolution - Represents the number of trials performed for\n hyperparameter optimization for tuning models. - final - Represents the number of trials performed for\n hyperparameter optimization for final models.", + "prompt_type": "plain" + }, + { + "output": "Though not recommended, you can override this setting\nby clicking this button. Reproducible\nThe Reproducible toggle lets you build an experiment with a random seed\nand get reproducible results. If this is disabled (default), then\nresults vary between runs, which can give a good sense of variance among\nexperiment results. When enabling this option, keep the following notes in mind:\n- Experiments are only reproducible when run on the same hardware\n (that is, using the same number and type of GPUs/CPUs and the same\n architecture). For example, you will not get the same results if you\n try an experiment on a GPU machine, and then attempt to reproduce\n the results on a CPU-only machine or on a machine with a different\n number and type of GPUs. - This option should be used with the reproducibility_level expert\n setting option, which ensures different degrees of reproducibility\n based on the OS and environment architecture. Keep in mind that when\n Reproducibility is enabled, then reproducibility_level=1 by default.", + "prompt_type": "plain" + }, + { + "output": "Dask Redis Multinode Example\nDask Multinode Example running docker\nOn main server with public IP address 172.16.2.210:\n mkdir -p /home/$USER/docker/data ; chmod u+rwx /home/$USER/docker/data\n mkdir -p /home/$USER/docker/log ; chmod u+rwx /home/$USER/docker/log\n mkdir -p /home/$USER/docker/tmp ; chmod u+rwx /home/$USER/docker/tmp\n mkdir -p /home/$USER/docker/license ; chmod u+rwx /home/$USER/docker/license\n mkdir -p /home/$USER/docker/jupyter/notebooks\n cp /home/$USER/.driverlessai/license.sig /home/$USER/docker/license/\n export server=172.16.2.210\n docker run \\\n --net host \\\n --runtime nvidia \\\n --rm \\\n --init \\\n --pid=host \\\n --gpus all \\\n --ulimit core=-1 \\\n --shm-size=2g \\\n -u `id -u`:`id -g` \\\n -v /etc/passwd:/etc/passwd:ro \\\n -v /etc/group:/etc/group:ro \\\n -v /home/$USER/docker/license:/license \\\n -v /home/$USER/docker/data:/data \\\n -v /home/$USER/docker/log:/log \\\n -v /home/$USER/docker/tmp:/tmp \\\n -v /home/$USER/docker/jupyter:/jupyter \\\n -e dai_dask_server_ip=$server \\\n -e dai_redis_ip=$server \\\n -e dai_redis_port=6379 \\\n -e dai_main_server_minio_address=$server:9001 \\\n -e dai_local_minio_port=9001 \\\n -e dai_ip=$server \\\n -e dai_main_server_redis_password=\"\" \\\n -e dai_worker_mode='multinode' \\\n -e dai_enable_dask_cluster=1 \\\n -e dai_enable_jupyter_server=1 \\\n -e dai_enable_jupyter_server_browser=1 \\\n -e NCCL_SOCKET_IFNAME=\"enp5s0\" \\\n -e NCCL_DEBUG=WARN \\\n -e NCCL_P2P_DISABLE=1 \\\n docker_image\nThe preceding example launches the following:\n- DAI main server on 12345\n- MinIO data server on 9001\n- Redis server on 6379\n- H2O-3 MLI server on 12348\n- H2O-3 recipe server on 50361\n- Juypter on 8889\n- Dask CPU scheduler on 8786\n- Dask CPU scheduler's dashboard on 8787\n- Dask GPU scheduler on 8790\n- Dask GPU scheduler's dashboard on 8791\n- LightGBM Dask listening port on 12400\nNotes:\n- (1) $USER in bash gives the username.", + "prompt_type": "plain" + }, + { + "output": "- (3) Replace various ports with alternative values if required. - (4) Replace docker_image with the image (include repository if\n remote image). - (5) For GPU usage, --runtime nvidia is required. Systems without\n GPUs should remove this line. - (6) Dask on cluster can be disabled by passing\n dai_enable_dask_cluster=0. If Dask on cluster is disabled, then\n dai_dask_server_ip does not need to be set. - (7) Dask dashboard ports (for example, 8787 and 8791) and H2O-3\n ports 12348, 50361, and 50362 are not required to be exposed. These are for user-level access to H2O-3 or Dask behavior. - (8) Jupyter can be disabled by passing dai_enable_jupyter_server=0\n and dai_enable_jupyter_server_browser=0. - (9) Dask requires the host network be used so scheduler can tell\n workers where to find other workers, so a subnet on new IP\n cannot be used, e.g. with\n docker network create --subnet=192.169.0.0/16 dainet. - (10) To isolate user access to single user, instead of doing\n -v /etc/passwd:/etc/passwd:ro -v /etc/group:/etc/group:ro one\n can map to user files with the same required information.", + "prompt_type": "plain" + }, + { + "output": "- (11) Directories created should have not existed or should be from a\n prior run by same user. Pre-existing directories should be\n moved or names changed to avoid conflicts. - (12) Services like the Procsy server, H2O-3 MLI and Recipe servers,\n and Vis-data server are only used internally for each node. - (13) The options -p 12400:12400 is only required to LightGBM Dask. - (14) NCCL_SOCKET_IFNAME should specify the actual hardware device to\n use, as required due to issues with NCCL obtaining the correct\n device automatically from IP. On any number of workers for server with public IP address 172.16.2.210:\n mkdir -p /home/$USER/docker/log ; chmod u+rwx /home/$USER/docker/log\n mkdir -p /home/$USER/docker/tmp ; chmod u+rwx /home/$USER/docker/tmp\n export server=172.16.2.210\n docker run \\\n --runtime nvidia \\\n --gpus all \\\n --rm \\\n --init \\\n --pid=host \\\n --net host \\\n --ulimit core=-1 \\\n --shm-size=2g \\\n -u `id -u`:`id -g` \\\n -v /etc/passwd:/etc/passwd:ro \\\n -v /etc/group:/etc/group:ro \\\n -v /home/$USER/docker/log:/log \\\n -v /home/$USER/docker/tmp:/tmp \\\n -e dai_dask_server_ip=$server \\\n -e dai_redis_ip=$server \\\n -e dai_redis_port=6379 \\\n -e dai_main_server_minio_address=$server:9001 \\\n -e dai_local_minio_port=9001 \\\n -e dai_ip=$server \\\n -e dai_main_server_redis_password=\"\" \\\n -e dai_worker_mode='multinode' \\\n -e dai_enable_dask_cluster=1 \\\n -e NCCL_SOCKET_IFNAME=\"enp4s0\" \\\n -e NCCL_DEBUG=WARN \\\n -e NCCL_P2P_DISABLE=1 \\\n docker_image --worker\nNotes:\n- (1) If same disk is used for main server and worker, change \"docker\"\n to \"docker_w1\" for worker 1, etc.", + "prompt_type": "plain" + }, + { + "output": "Dask Multinode Example running tar\nOn main server with public IP address 172.16.2.210:\n export DRIVERLESS_AI_LICENSE_FILE=/home/$$USER/.driverlessai/license.sig\n export server=172.16.2.210\n NCCL_SOCKET_IFNAME=\"enp5s0\" \\\n NCCL_DEBUG=WARN \\\n NCCL_P2P_DISABLE=1 \\\n dai_dask_server_ip=$server dai_redis_ip=$server dai_redis_port=6379 \\\n dai_main_server_minio_address=$server:9001 dai_ip=$server dai_main_server_redis_password=\"\" \\\n dai_worker_mode='multinode' dai_enable_dask_cluster=1 \\\n dai_enable_jupyter_server=1 dai_enable_jupyter_server_browser=1 \\\n /opt/h2oai/dai/dai-env.sh python -m h2oai &> multinode_main.txt\nOn each worker node, run the exact same command but with --worker added\nat the end, i.e. :\n export DRIVERLESS_AI_LICENSE_FILE=/home/$$USER/.driverlessai/license.sig\n export server=172.16.2.210\n NCCL_SOCKET_IFNAME=\"enp4s0\" \\\n NCCL_DEBUG=WARN \\\n NCCL_P2P_DISABLE=1 \\\n dai_dask_server_ip=$server dai_redis_ip=$server dai_redis_port=6379 \\\n dai_main_server_minio_address=$server:9001 dai_ip=$server dai_main_server_redis_password=\"\" \\\n dai_worker_mode='multinode' dai_enable_dask_cluster=1 \\\n /opt/h2oai/dai/dai-env.sh python -m h2oai --worker &> multinode_worker.txt\nNotes:\n- (1) In this example, address 172.16.2.210 needs to be the public IP\n associated with the network device to use for communication.", + "prompt_type": "plain" + }, + { + "output": "MLI for Regular (Non-Time-Series) Experiments\n\nThis section describes MLI functionality and features for regular\nexperiments. Refer to interpret-ts for MLI information with time-series\nexperiments.\n\ninterpret-a-model interpret-expert-settings\ninterpret-explainer-expert-settings interpret-understanding\nviewing-explanations interpret-general-considerations", + "prompt_type": "plain" + }, + { + "output": "Updating Licenses\nIf your current Driverless AI license has expired, you will be required\nto update it in order to continue running Driverless AI, in order to run\nthe scoring pipeline, in order to access deployed pipelines to AWS\nLambdas, etc. Updating the License for Driverless AI\nSimilar to adding a license for the first time, you can update your\nlicense for running Driverless AI either by replacing your current\nlicense.sig file or via the Web UI. Updating the license.sig File\nUpdate the license key in your\n/opt/h2oai/dai/home/.driverlessai/license.sig file by replacing the\nexisting license with your new one. Updating the License in the Web UI\nIf your license is expired, the Web UI will prompt you to enter a new\none. The steps are the same as adding a license for the first time via\nthe Driverless AI Web UI. Updating the License for Scoring Pipelines\nFor the Python Scoring Pipeline, include the updated license file when\nsetting the environment variable in Python. Refer to the above\npython_scoring_license section for adding licenses.", + "prompt_type": "plain" + }, + { + "output": "This is the same as adding a license for the\nfirst time. Refer to the above mojo_scoring_license section for adding\nlicenses. Updating Driverless AI Licenses on AWS Lambda\nUsers can manually update each of their Driverless AI licenses deployed\nin production on AWS Lambda. For users with many MOJOs in production,\nthough, H2O provides a script that will update Driverless AI licenses\nfor all of your MOJOs currently deployed on AWS Lambda. Manual Update\nThe Driverless AI deployment pipeline to AWS Lambdas explicitly sets the\nlicense key as an environment variable. Replace the expired license key\nwith your updated one. []\nAutomatic Update\nH2O provides a script that can be used to update Driverless AI licenses\nfor all of your MOJOs deployed on a specific AWS Lambda region. This\nscript can be run for any machine. Requirements\n- New Driverless AI license\n- The following Python packages are required for this script:\n - boto3\n - argparse\n - os\nUpdate Steps\nPerform the following steps to update your Driverless AI license for\nMOJOs on AWS Lambda.", + "prompt_type": "plain" + }, + { + "output": "Variable importance in Driverless AI\nGlobal Feature Importance\n- Model Specific Feature Importance: After completion of an experiment\n Driverless AI, reports the variable importance that is model or\n algorithm specific. For example for Tree based models, this\n importance is gain based. i.e It computes the average reduction in\n impurity across all trees in the forest due to each feature. Features that tend to split nodes closer to the root of a tree have\n a larger importance value. For say an n fold model the variable\n importance is averaged across the folds, normalized and reported. For an ensemble model, the importance is multiplied by the\n respective model weights and normalized. - Permutation Feature Importance: Permutation-based feature importance\n is a model-agnostic approach. After evaluating the performance or\n scoring a model, if you permute (shuffle) the values of a feature of\n interest and re-evaluate model performance, the observed mean\n difference in performance indicates feature\u2019s absolute permutation\n importance.", + "prompt_type": "plain" + }, + { + "output": "If a\n feature is highly predictive, however, shuffling its values should\n decrease the model\u2019s performance. ref. Driverless AI applies permutation based feature importance for\n upfront feature selection before genetic algorithm when the\n feature space is large. Local Feature Importance\n- LIME: Local interpretable model-agnostic explanations (LIME) is a\n model agnostic technique aiming to explain which features are most\n important in specific areas of the feature space. The main idea of\n LIME is to compute a local surrogate model in the area of interest. This surrogate model is an easily interpretable model such as a\n linear model or a decision tree trained to mimic the behavior of the\n more complex model of interest. For a specific prediction you want\n to explain, LIME slightly changes the values to create new data\n points that are similar. By feeding these perturbed data points to\n the complex model a relation between the the perturbed features and\n the model prediction emerges which is then captured by the surrogate\n model.", + "prompt_type": "plain" + }, + { + "output": "- Shapley: Shapley values can be used for local feature importance. They can be used to explain which feature(s) contribute most to a\n specific prediction, say fraud or not fraud. Shapley values are not\n designed to answer the \"what if\" questions that LIME\u2019s local\n surrogate models are designed for. Shapely has its origin in game theory where the problem at hand is\n to determine a fair payoff for all players in the team based on\n their individual capabilities or performance. Shapley value is\n defined as an average expected marginal contribution of one player\n after all possible combinations have been considered. A marginal\n contribution is defined as a value of the group with the player as a\n member minus the value of the group without the player minus the\n value created by the player working alone. As considering all possible subsets (or combinations) of features is\n computationally prohibitive in most realistic models with many\n features, Shapley value approximations are computed based on\n sampling.", + "prompt_type": "plain" + }, + { + "output": "Adding Licenses for the First Time\nSpecifying a License File for the Driverless AI Application\nA license file to run Driverless AI can be added in one of three ways\nwhen starting Driverless AI. - Specifying the license.sig file during launch in native installs\n- Using the DRIVERLESS_AI_LICENSE_FILE and DRIVERLESS_AI_LICENSE_KEY\n environment variables when starting the Driverless AI Docker image\n- Uploading your license in the Web UI\nSpecifying the license.sig File During Launch\nBy default, Driverless AI looks for a license key in\n/opt/h2oai/dai/home/.driverlessai/license.sig. If you are installing\nDriverless AI programmatically, you can copy a license key file to that\nlocation. If no license key is found, the application will prompt you to\nadd one via the Web UI. Specifying Environment Variables\nYou can use the DRIVERLESS_AI_LICENSE_FILE or DRIVERLESS_AI_LICENSE_KEY\nenvironment variable when starting the Driverless AI Docker image. For\nexample:\n nvidia-docker run \\\n --pid=host \\\n --rm \\\n --shm-size=256m \\\n -u `id -u`:`id -g` \\\n -p 12345:12345 \\\n -e DRIVERLESS_AI_LICENSE_FILE=\"/license/license.sig\" \\\n -v `pwd`/config:/config \\\n -v `pwd`/data:/data \\\n -v `pwd`/log:/log \\\n -v `pwd`/license:/license \\\n -v `pwd`/tmp:/tmp \\\n h2oai/dai-ubi8-x86_64:|tag|\nor\n nvidia-docker run \\\n --pid=host \\\n --rm \\\n --shm-size=256m \\\n -u `id -u`:`id -g` \\\n -p 12345:12345 \\\n -e DRIVERLESS_AI_LICENSE_KEY=\"Y0uRl1cens3KeyH3re\" \\\n -v `pwd`/config:/config \\\n -v `pwd`/data:/data \\\n -v `pwd`/log:/log \\\n -v `pwd`/license:/license \\\n -v `pwd`/tmp:/tmp \\\n h2oai/dai-ubi8-x86_64:|tag|\nUploading Your License in the Web UI\nIf Driverless AI does not locate a license.sig file during launch, then\nthe UI will prompt you to enter your license key after you log in the\nfirst time.", + "prompt_type": "plain" + }, + { + "output": "Click Save when you are done. Upon\nsuccessful completion, you will be able to begin using Driverless AI. []\nSpecifying a License for Scoring Pipelines\nWhen deploying models to production, Driverless AI requires a license to\nbe specified in order to run both the Python and MOJO Scoring Pipelines. Python Scoring Pipeline\nThe license can be specified via an environment variable in Python:\n # Set DRIVERLESS_AI_LICENSE_FILE, the path to the Driverless AI license file\n %env DRIVERLESS_AI_LICENSE_FILE=\"/home/ubuntu/license/license.sig\"\n # Set DRIVERLESS_AI_LICENSE_KEY, the Driverless AI license key (Base64 encoded string)\n %env DRIVERLESS_AI_LICENSE_KEY=\"oLqLZXMI0y...\"\nYou can also export the license file when running the scoring pipeline:\n export DRIVERLESS_AI_LICENSE_FILE=\"/path/to/license.sig\"\n bash run_example.sh\nMOJO Scoring Pipeline\nDriverless AI requires a license to be specified in order to run the\nMOJO Scoring Pipeline. The license can be specified in one of the\nfollowing ways:\n- Via an environment variable:\n - DRIVERLESS_AI_LICENSE_FILE: Path to the Driverless AI license\n file, or\n - DRIVERLESS_AI_LICENSE_KEY: The Driverless AI license key\n (Base64 encoded string)\n- Via a system property of JVM (-D option):\n - ai.h2o.mojos.runtime.license.file: Path to the Driverless AI\n license file, or\n - ai.h2o.mojos.runtime.license.key: The Driverless AI license\n key (Base64 encoded string)\n- Via an application classpath:\n - The license is loaded from a resource called /license.sig.", + "prompt_type": "plain" + }, + { + "output": "Enabling Notifications\nDriverless AI can be configured to trigger a user-defined script at the\nbeginning and end of an experiment. This functionality can be used to\nsend notifications to services like Slack or to trigger a machine\nshutdown. The config.toml file exposes the following variables:\n- listeners_experiment_start: Registers an absolute location of a\n script that gets executed at the start of an experiment. - listeners_experiment_done: Registers an absolute location of a\n script that gets executed when an experiment is finished\n successfully. Driverless AI accepts any executable as a script. (For example, a script\ncan be implemented in Bash or Python.) There are only two requirements:\n- The specified script can be executed. (i.e., The file has executable\n flag.) - The script should be able to accept command line parameters. Script Interfaces\nWhen Driverless AI executes a script, it passes the following parameters\nas a script command line:\n- Application ID: A unique identifier of a running Driverless AI\n instance.", + "prompt_type": "plain" + }, + { + "output": "setuidbit set up together with executable bit. For more info, visit: https://unix.stackexchange.com/questions/85663/poweroff-or-reboot-as-normal-user.) Theon_startScript ~~~~~~~~~~~~~~~~~~~~~~~ This script increases the counter of running experiments. :: #!/usr/bin/env bash app_id=\"${1}\" experiment_id=\"${3}\" tmp_dir=\"${TMPDIR:-/tmp}/${app_id}\" exp_file=\"${tmp_dir}/${experiment_id}\" mkdir -p \"${tmp_dir}\" touch \"${exp_file}\" Theon_doneScript ~~~~~~~~~~~~~~~~~~~~~~ This script decreases the counter and executes machine shutdown when the counter reaches 0-value. :: #!/usr/bin/env bash app_id=\"${1}\" experiment_id=\"${3}\" tmp_dir=\"${TMPDIR:-/tmp}/${app_id}\" exp_file=\"${tmp_dir}/${experiment_id}\" if [ -f \"${exp_file}\" ]; then rm -f \"${exp_file}\" fi running_experiments=$(ls -1 \"${tmp_dir}\" | wc -l) if [ \"${running_experiments}\" -gt 0 ]; then echo \"There is still ${running_experiments} running experiments!\"", + "prompt_type": "plain" + }, + { + "output": "Machine is going to shutdown!\" # Use instance meta-data API to get instance ID and then use AWS CLI to shutdown the machine # This expects, that AWS CLI is properly configured and has capability to shutdown instances enabled. aws ec2 stop-instances --instance-ids $(curl http://169.254.169.254/latest/meta-data/instance-id) fi .. container:: tabs .. group-tab:: Docker Image Installs 1. Copy the config.toml file from inside the Docker image to your local filesystem. (Changenvidia-docker runtodocker runfor non-GPU environments.) .. .. code:: bash # In your Driverless AI folder (for exmaple, dai_1.5.1), # make config and scripts directories mkdir config mkdir scripts # Copy the config.toml file to the new config directory. nvidia-docker run \\ --pid=host \\ --rm \\ -u `id -u`:`id -g` \\ -v `pwd`/config:/config \\ --entrypoint bash \\ h2oai/dai-ubi8-x86_64:|tag| -c \"cp /etc/dai/config.toml /config\" 2.", + "prompt_type": "plain" + }, + { + "output": "Note that in this example, the scripts are saved to a **dai_VERSION/scripts** folder. .. :: # Notification scripts # - the variable points to a location of script which is executed at given event in experiment lifecycle # - the script should have executable flag enabled # - use of absolute path is suggested # The on experiment start notification script location listeners_experiment_start = \"dai_VERSION/scripts/on_start.sh\" # The on experiment finished notification script location listeners_experiment_done = \"dai_VERSION/scripts/on_done.sh\" 3. Start Driverless AI with the DRIVERLESS_AI_CONFIG_FILE environment variable. Make sure this points to the location of the edited config.toml file so that the software finds the configuration file. (Changenvidia-docker runtodocker run`` for non-GPU\n environments.) nvidia-docker run \\\n --pid=host \\\n --rm \\\n -u `id -u`:`id -g` \\\n -e DRIVERLESS_AI_CONFIG_FILE=\"/config/config.toml\" \\\n -v `pwd`/config:/config \\\n -v `pwd`/data:/data \\\n -v `pwd`/log:/log \\\n -v `pwd`/license:/license \\\n -v `pwd`/tmp:/tmp \\\n -v `pwd`/scripts:/scripts \\\n h2oai/dai-ubi8-x86_64:|tag|\n Native Installs\n 4.", + "prompt_type": "plain" + }, + { + "output": "For example:\n # DEB and RPM\n export DRIVERLESS_AI_CONFIG_FILE=\"/etc/dai/config.toml\"\n # TAR SH\n export DRIVERLESS_AI_CONFIG_FILE=\"/path/to/your/unpacked/dai/directory/config.toml\"\n 2. Edit the Notification scripts section in the config.toml file to\n point to the new scripts. Save your changes when you are done. # Notification scripts\n # - the variable points to a location of script which is executed at given event in experiment lifecycle\n # - the script should have executable flag enabled\n # - use of absolute path is suggested\n # The on experiment start notification script location\n listeners_experiment_start = \"/opt/h2oai/dai/scripts/on_start.sh\"\n # The on experiment finished notification script location\n listeners_experiment_done = \"/opt/h2oai/dai/scripts/on_done.sh\"\n 3. Start Driverless AI. Note that the command used to start\n Driverless AI varies depending on your install type. # Deb or RPM with systemd (preferred for Deb and RPM):\n # Start Driverless AI.", + "prompt_type": "plain" + }, + { + "output": "Supported file types\nDriverless AI supports the following dataset file formats:\n- arff\n- avro\n- bin\n- bz2\n- csv (See note below)\n- dat\n- feather\n- gz\n- jay (See note below)\n- orc (See notes below)\n- parquet (See notes below)\n- pickle / pkl (See note below)\n- tgz\n- tsv\n- txt\n- xls\n- xlsx\n- xz\n- zip\nNote\n- Compressed Parquet files are typically the most efficient file type to\nuse with Driverless AI. - CSV in UTF-16 encoding is only supported when\nimplemented with a byte order mark (BOM). If a BOM is not present, the\ndataset is read as UTF-8. - For ORC and Parquet file formats, if you\nselect to import multiple files, those files will be imported as\nmultiple datasets. If you select a folder of ORC or Parquet files, the\nfolder will be imported as a single dataset. Tools like Spark/Hive\nexport data as multiple ORC or Parquet files that are stored in a\ndirectory with a user-defined name. For example, if you export with\nSpark dataFrame.write.parquet(\"/data/big_parquet_dataset\"), Spark\ncreates a folder /data/big_parquet_dataset, which will contain multiple\nParquet files (depending on the number of partitions in the input\ndataset) and metadata.", + "prompt_type": "plain" + }, + { + "output": "-\nFor ORC and Parquet file formats, you may receive a \"Failed to ingest\nbinary file with ORC / Parquet: lists with structs are not supported\"\nerror when ingesting an ORC or Parquet file that has a struct as an\nelement of an array. This is because PyArrow cannot handle a struct\nthat's an element of an array. - A workaround to flatten Parquet files\nis provided in Sparkling Water. Refer to our Sparkling Water solution\nfor more information. - To use Parquet files that have columns with list\ntype, the data_import_explode_list_type_columns_in_parquet\nconfig.toml option must be set to true. (Note that\nthis setting is disabled by default.) When this option is enabled,\ncolumns with list type are \"exploded\" into separate new columns. That\nis, each list in a cell is split into separate items which are then used\nto create new columns. Refer to the following image for a visual\nrepresentation of this process:\n[]\n- You can create new datasets from Python script files (custom\n recipes) by selecting Data Recipe URL or Upload Data Recipe from the\n Add Dataset (or Drag & Drop) dropdown menu.", + "prompt_type": "plain" + }, + { + "output": "Snowflake Integration\nOverview\nThis document describes how to use the external function feature of\nSnowflake to invoke Driverless AI models as HTTP REST API endpoints. Using the external function requires some setup and configuration in\nSnowflake and Amazon. For more information, refer to the Snowflake\ndocumentation on external functions. Note\nDownloads:\n- Download the Driverless AI Snowflake Java UDF. - Download the Driverless AI Snowflake external function\n (dai-snowflake-integration.tgz). The setup process for the Java UDF is typically easier than for the\nexternal function. []\nRequirements\n1. Snowflake login credentials\n2. Amazon EC2 login credentials\n3. Driverless AI MOJO (pipelineSF.mojo)\n - Included in the demo files\n4. DAIMojoRestServer\n - Included in the demo files\n5. Driverless AI license\n - Provided through the partnership portal\n - Copy the license to the Snowflake_H2Oai directory. Name the file\n license.sig. 6. Java JDK 1.8\n - An open source JDK is included in the demo zip file and the demo\n scripts use that as the default.", + "prompt_type": "plain" + }, + { + "output": "The output of the\n command should indicate JDK 1.8, for example:\n - If the output does not show JDK 1.8, download a 1.8 JDK\n for your environment from one of the following sites:\n - https://www.azul.com/downloads/zulu-community/\n - https://openjdk.java.net/install/\nSecurity\nWhen using the external function, a call is made from Snowflake to the\nAWS API Gateway. This requires the configuration of trust relationships\nin AWS so that the call can be made. The H2O REST Server only accepts calls from the AWS Gateway endpoint. When the parameter\n-DSecureModelAllowAgent=\u201dAmazonAPIGateway.|snowflake.\u201d is added to the\ncommand line, it\u2019s even possible to further limit this to a specific AWS\nfunction. Enabling -DModelSecureEndPoints=/** protects the Rest Server by\nrequiring full authentication, effectivity blocking requests. Installation\nDownloads\nDownload the Driverless AI Snowflake Java UDF. Download the Driverless AI Snowflake external function\n(dai-snowflake-integration.tgz).", + "prompt_type": "plain" + }, + { + "output": "The following installation includes steps in Snowflake, AWS, and an EC2\ninstance where the H2O REST server is installed. The following steps outline the REST server installation:\n1. Create an EC2 Instance, a demo system should have the following\n minimum specification:\n - Operating System: Linux\n - CPU: 2\n - Memory: 16GB\n - Disk: 500MB\n2. Copy the distribution to the EC2 instance and extract the file. 3. Create the database. 4. Populate the table with the sample data. 5. Verify that the data is available. Starting the REST Server\nUse the following steps to start the H2O REST server on the EC2\ninstance. 1. Ensure the current working directory is Snowflake-H2Oai/Function. 2. Press ENTER to background the program. The log is written to\n nohup.log. 3. The REST server initiates after several seconds have passed. Check\n for a ready message similar to the following:\nVerify REST Server Installation\nTo verify that the REST server and its model components were installed\nsuccessfully and that the server initialized correctly:\n1.", + "prompt_type": "plain" + }, + { + "output": "Run the following script from a separate terminal window:\nStopping the REST Server\nTo stop the H2O REST server on the EC2 instance, run the following\ncommands:\n cd Snowflake-H2Oai/Function\n ./stopServer.sh\nExternal Function Example\nThe following is an example of an external function:\n create or replace api integration demonstration_external_api_integration_01\n api_provider=aws_api_gateway \n api_aws_role_arn='arn:aws:iam::nnnnnnnn:role/snowflake' \n api_allowed_prefixes=('https://aaaaaaaa.execute-api.us-east-1.amazonaws.com/MojoTest') \n enabled=true;\n create or replace external function H2OPredict(v varchar, v0 number, v1 varchar, v2 number, v3 number, v4 number, v5 number, v6 varchar, v7 varchar, v8 number, v9 number, v10 number, v11 number)\n returns variant\n api_integration = demonstration_external_api_integration_01\n as 'https://aaaaaaaa.execute-api.us-east-1.amazonaws.com/MojoTest';\nFunction Data Types\nThe preceding function passes 13 parameters (v to V11).", + "prompt_type": "plain" + }, + { + "output": "- If the data in the table is a float and the function uses the\nSQL Examples\nOnce the Snowflake and AWS Gateway has been configured, the following\nexample SQL statements return predictions:\n select H2OPredict('Modelname=pipelineSF.mojo\u2019, LOAN_AMNT, TERM, INT_RATE, INSTALLMENT, EMP_LENGTH, ANNUAL_INC, VERIFICATION_STATUS, ADDR_STATE, DTI, DELINQ_2YRS, REVOL_BAL, REVOL_UTIL ) from LENDINGCLUB where ADDR_STATE='NJ' order by ID;\nPassing Runtime Parameters\nThe following is a list of parameters used to pass specific values to\nthe REST server:\n- Modelname: The name of the Driverless AI MOJO file that exists in\n the REST server ModelDirectory. This is pipeline.mojo by default. - Prediction: The numeric prediction to use. This is 0 by default. Sample parameter usage:\n select *, H2OPredict('Modelname=pipelineSF.mojo Prediction=0',LOAN_AMNT, TERM, INT_RATE, INSTALLMENT, EMP_LENGTH, ANNUAL_INC, VERIFICATION_STATUS, \n ADDR_STATE, DTI, DELINQ_2YRS, REVOL_BAL, REVOL_UTIL ) from LENDINGCLUB;\n Request: 10625, 36 months,6.62,326.23,4,33000,VERIFIED - income,WA,27.38,0,6290,46.3 \n Response: [\"bad_loan.0 : 0.917305\",\"bad_loan.1 : 0.08269503\"]\n 0.917305\nAdvanced Setup\nThe Snowflake External Function allows custom HTTP headers to be\ndefined.", + "prompt_type": "plain" + }, + { + "output": "create or replace external function H2OPredictHDR(v0 number, v1 varchar, v2 number, v3 number, v4 number, v5 number, v6 varchar, v7 varchar, v8 number, v9 number, v10 number, v11 number)\n returns variant\n HEADERS=('modelname' = 'pipelineSF.mojo')\n api_integration = demonstration_external_api_integration_01\n as 'https://aaaaaaaa.execute-api.us-east-1.amazonaws.com/production'; \nThis allows function calls to not require any parameters. A function by\nitself is enough for each model:\n select id, H2OPredictHDR(LOAN_AMNT, TERM, INT_RATE, INSTALLMENT, EMP_LENGTH, ANNUAL_INC, VERIFICATION_STATUS, \n ADDR_STATE, DTI, DELINQ_2YRS, REVOL_BAL, REVOL_UTIL ) from LENDINGCLUB;\nThe prediction can also be passed if required. Otherwise, a probability\nof 0 is returned. Building Models\nThe Snowflake external function feature lets you build Driverless AI\nmodels from a Snowflake worksheet. When requesting Driverless AI to\nbuild a model from a worksheet, the build status is updated in a table\ncalled MODELBUILD so that the build can be monitored.", + "prompt_type": "plain" + }, + { + "output": "Note: When the build finishes, the build experiment UUID is reported for\nauditability purposes. Define build function example:\n create or replace external function H2OBuild(v varchar)\n returns variant\n api_integration = demonstration_external_api_integration_01\n as 'https://bbbbb.execute-api.us-east-1.amazonaws.com/production';\nDefine Snowflake Table\nA Snowflake table is used to track the status of the model build that\nRequesting a Build Example\nUse the function H2OBuild to change the requesting parameters:\n select H2OBuild('Build --Table=LENDINGCLUB2 --Target=BAD_LOAN --Modelname=custchurn.mojo') ;\nFor more information on the parameters to the build request, see the\nfollowing table:\n ----------------------------------------------------------------------\n Parameter Optional Description\n ------------- ------------------------------------- ------------------\n Table no Defines which\n Snowflake table to\n use for the model\n build\n Target no The column\n (feature) name to\n use as the models\n target from\n training\n Modelname no The name the model\n will have when\n deployed\n Accuracy yes Model accuracy\n setting\n Time yes Model experiment\n time\n Inter yes Model\n pretability interpretability\n setting\n User yes Username required\n to access\n Snowflake table\n Password yes Password required\n to access\n Snowflake table\n Warehouse yes Snowflake\n warehouse\n Database yes Snowflake database\n Schema yes Snowflake schema\n ----------------------------------------------------------------------\n : Build Parameters\nDeployment\nOnce the model has finished building, it is copied to the REST server\nand becomes available for the H2OPredict scoring function.", + "prompt_type": "plain" + }, + { + "output": "By default,\nthis is /home/ec2-user/Snowflake-H2Oai/Function. Note: The script code must be updated based on the environment you are\nusing. Driverless AI Snowflake Configuration\nThe Driverless AI configuration uses the standard default settings\nexcept for settings related to user security. Use the authentication\nmethod that is best suited to the environment that you are using. For\nmore information, see config_file and dai_auth. authentication_method = \"local\"\n local_htpasswd_file = \"/home/ec2-user/dai-1.8.5.1-linux-x86_64/.htpasswd\" \n This resource must be secured from unauthorized access and use. To create a username and password using local authentication:\n sudo htpasswd -B -c .htpasswd snowflake \n Password yourpassword\nRequirements\nThe build functionality invokes a Python program that uses the\nDriverless AI Python Client to create an experiment. The following\npackages must be available:\n- sudo yum install httpd\n- sudo yum install python3\n- sudo pip3 install driverlessai\n- sudo pip3 install --upgrade snowflake-connector-python\nSample Workbook\nThe following example shows how to use the functions once the initial\nsetup has been completed.", + "prompt_type": "plain" + }, + { + "output": "Authentication Methods\nDriverless AI supports Client Certificate, LDAP, Local, mTLS, OpenID,\nPAM, none, and unvalidated (default) authentication. These can be\nconfigured by specifying the environment variables when starting the\nDriverless AI Docker image or by specifying the appropriate\nconfiguration options in the config.toml file. Notes:\n- You can enable multiple authentication methods with the\n additional_authentication_methods config.toml setting. These are\n enabled alongside the default method specified with the\n authentication_method config.toml setting. Login forms for each\n additional method are available on the\n /login/ path. - If multiple authentication methods are enabled, each method must be\n set up so that it results in the same username to provide access to\n the same resources. - Driverless AI is also integrated with IBM Spectrum Conductor and\n supports authentication from Conductor. Contact sales@h2o.ai for\n more information about using IBM Spectrum Conductor authentication.", + "prompt_type": "plain" + }, + { + "output": "Dataset Options\nThe following is a list of options that are available for every dataset\non the Datasets page. To view these options, click Click for Actions\nnext to any dataset listed on the Datasets page. - Details: View detailed information about the dataset. For more\n information, see view_dataset. - Visualize: View a variety of visualizations generated by Driverless\n AI using the dataset. For more information, see visualize_dataset. - Split: Split the dataset into two subsets. For more information, see\n split_dataset. - Predict: Opens the Experiment Setup page and automatically specifies\n the selected dataset as the training dataset. - Predict Wizard: Opens the Driverless AI experiment setup wizard. For\n more information, see dai_wizard. - Join Wizard: Opens the Driverless AI dataset join wizard. - Rename: Rename the dataset. - Download: Download the dataset to your local file system. - Display Logs: View logs relating to the dataset. - Delete: Delete the dataset from the list of datasets on the Datasets\n page.", + "prompt_type": "plain" + }, + { + "output": "Install on Ubuntu\nThis section describes how to install the Driverless AI Docker image on\nUbuntu. The installation steps vary depending on whether your system has\nGPUs or if it is CPU only. Environment\n -------------------------------------------\n Operating System GPUs? Min Mem\n ------------------------- ------- ---------\n Ubuntu with GPUs Yes 64 GB\n Ubuntu with CPUs No 64 GB\n -------------------------------------------\nInstall on Ubuntu with GPUs\nNote: Driverless AI is supported on Ubuntu 16.04 or later. Open a Terminal and ssh to the machine that will run Driverless AI. Once\nyou are logged in, perform the following steps. 1. Retrieve the Driverless AI Docker image from\n https://www.h2o.ai/download/. (Note that the contents of this Docker\n image include a CentOS kernel and CentOS packages.) 2. Install and run Docker on Ubuntu (if not already installed):\n3. Install nvidia-docker2 (if not already installed). More information\n is available at\n https://github.com/NVIDIA/nvidia-docker/blob/master/README.md.", + "prompt_type": "plain" + }, + { + "output": "Verify that the NVIDIA driver is up and running. If the driver is\n not up and running, log on to\n http://www.nvidia.com/Download/index.aspx?lang=en-us to get the\n latest NVIDIA Tesla V/P/K series driver:\n5. Set up a directory for the version of Driverless AI on the host\n machine:\n6. Change directories to the new folder, then load the Driverless AI\n Docker image inside the new directory:\n7. Enable persistence of the GPU. Note that this needs to be run once\n every reboot. Refer to the following for more information:\n http://docs.nvidia.com/deploy/driver-persistence/index.html. 8. Set up the data, log, and license directories on the host machine:\n9. At this point, you can copy data into the data directory on the host\n machine. The data will be visible inside the Docker container. 10. Run docker images to find the image tag. 11. Start the Driverless AI Docker image and replace TAG below with the\n image tag. Depending on your install version, use the\n docker run --runtime=nvidia (>= Docker 19.03) or nvidia-docker (<\n Docker 19.03) command.", + "prompt_type": "plain" + }, + { + "output": "nvidia-docker. GPU support will not be available. **Watch the installation video** `here `__. Note that some of the images in this video may change between releases, but the installation steps remain the same. Open a Terminal and ssh to the machine that will run Driverless AI. Once you are logged in, perform the following steps. 1. Retrieve the Driverless AI Docker image from https://www.h2o.ai/download/. 2. Install and run Docker on Ubuntu (if not already installed): .. .. code:: bash # Install and run Docker on Ubuntu curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo apt-key add - sudo apt-key fingerprint 0EBFCD88 sudo add-apt-repository \\ \"deb [arch=amd64] https://download.docker.com/linux/ubuntu $(lsb_release -cs) stable\" sudo apt-get update sudo apt-get install docker-ce sudo systemctl start docker 3. Set up a directory for the version of Driverless AI on the host machine: .. .. code:: bash # Set up directory with the version name mkdir |VERSION-dir| 4.", + "prompt_type": "plain" + }, + { + "output": "Set up the data, log, license, and tmp directories on the host machine (within the new directory): .. .. code:: bash # Set up the data, log, license, and tmp directories mkdir data mkdir log mkdir license mkdir tmp 6. At this point, you can copy data into the data directory on the host machine. The data will be visible inside the Docker container. 7. Rundocker\nimagesto find the new image tag. 8. Start the Driverless AI Docker image. Note that GPU support will not be available. Note that from version 1.10 DAI docker image runs with internaltinithat is equivalent to using--initfrom docker, if both are enabled in the launch command, tini will print a (harmless) warning message. .. We recommend--shm-size=256min docker launch command. But if user plans to build :ref:`image auto model ` extensively, then--shm-size=2gis recommended for Driverless AI docker command. .. code:: bash # Start the Driverless AI Docker image docker run \\ --pid=host \\ --rm \\ --shm-size=256m \\ -u `id -u`:`id -g` \\ -p 12345:12345 \\ -v `pwd`/data:/data \\ -v `pwd`/log:/log \\ -v `pwd`/license:/license \\ -v `pwd`/tmp:/tmp \\ -v /etc/passwd:/etc/passwd:ro \\ -v /etc/group:/etc/group:ro \\ h2oai/dai-ubi8-x86_64:|tag| Driverless AI will begin running: :: -------------------------------- Welcome to H2O.ai's Driverless AI --------------------------------- - Put data in the volume mounted at /data - Logs are written to the volume mounted at /log/20180606-044258 - Connect to Driverless AI on port 12345 inside the container - Connect to Jupyter notebook on port 8888 inside the container 9.", + "prompt_type": "plain" + }, + { + "output": "Upgrading the Docker Image -------------------------- This section provides instructions for upgrading Driverless AI versions that were installed in a Docker container. These steps ensure that existing experiments are saved. **WARNING**: Experiments, MLIs, and MOJOs reside in the Driverless AI tmp directory and are not automatically upgraded when Driverless AI is upgraded. - Build MLI models before upgrading. - Build MOJO pipelines before upgrading. - Stop Driverless AI and make a backup of your Driverless AI tmp directory before upgrading. If you did not build MLI on a model before upgrading Driverless AI, then you will not be able to view MLI on that model after upgrading. Before upgrading, be sure to run MLI jobs on models that you want to continue to interpret in future releases. If that MLI job appears in the list of Interpreted Models in your current version, then it will be retained after upgrading. If you did not build a MOJO pipeline on a model before upgrading Driverless AI, then you will not be able to build a MOJO pipeline on that model after upgrading.", + "prompt_type": "plain" + }, + { + "output": "**Note**: Stop Driverless AI if it is still running. Requirements ~~~~~~~~~~~~ We recommend to have NVIDIA driver >= installed (GPU only) in your host environment for a seamless experience on all architectures, including Ampere. Driverless AI ships with CUDA 11.2.2 for GPUs, but the driver must exist in the host environment. Go to `NVIDIA download driver `__ to get the latest NVIDIA Tesla A/T/V/P/K series drivers. For reference on CUDA Toolkit and Minimum Required Driver Versions and CUDA Toolkit and Corresponding Driver Versions, see `here `__ . .. note:: .. If you are using K80 GPUs, the minimum required NVIDIA driver version is 450.80.02. Upgrade Steps ~~~~~~~~~~~~~ 1. SSH into the IP address of the machine that is running Driverless AI. 2. Set up a directory for the version of Driverless AI on the host machine: .. .. code:: bash # Set up directory with the version name mkdir |VERSION-dir| # cd into the new directory cd |VERSION-dir| 3.", + "prompt_type": "plain" + }, + { + "output": "Azure Blob Store Setup\n\nDriverless AI lets you explore Azure Blob Store data sources from within\nthe Driverless AI application.\n\nNote: Depending on your Docker install version, use either the", + "prompt_type": "plain" + }, + { + "output": "docker run\n--runtime=nvidia(>= Docker 19.03) ornvidia-docker(< Docker 19.03) command when starting the Driverless AI Docker image. Usedocker\nversionto check which version of Docker you are using. Supported Data Sources Using the Azure Blob Store Connector ----------------------------------------------------------- The following data sources can be used with the Azure Blob Store connector. - :ref:`Azure Blob Storage (general purpose v1)` - Blob Storage - :ref:`Azure Files (File Storage)` - :ref:`Azure Data Lake Storage Gen 2 (Storage V2)` The following data sources can be used with the Azure Blob Store connector when also using the HDFS connector. - :ref:`Azure Data Lake Gen 1 (HDFS connector required)` - :ref:`Azure Data Lake Gen 2 (HDFS connector optional)` Description of Configuration Attributes --------------------------------------- The following configuration attributes are specific to enabling Azure Blob Storage. -azure_blob_account_name: The Microsoft Azure Storage account name.", + "prompt_type": "plain" + }, + { + "output": "-azure_blob_account_key: Specify the account key that maps to your account name. -azure_connection_string: Optionally specify a new connection string. With this option, you can include an override for a host, port, and/or account name. For example, .. code:: bash azure_connection_string = \"DefaultEndpointsProtocol=http;AccountName=;AccountKey=;BlobEndpoint=http://:/;\" -azure_blob_init_path: Specifies the starting Azure Blob store path displayed in the UI of the Azure Blob store browser. -enabled_file_systems: The file systems you want to enable. This must be configured in order for data connectors to function properly. The following additional configuration attributes can be used for enabling an HDFS Connector to connect to Azure Data Lake Gen 1 (and optionally with Azure Data Lake Gen 2). -hdfs_config_path: The location the HDFS config folder path. This folder can contain multiple config files. -hdfs_app_classpath: The HDFS classpath.", + "prompt_type": "plain" + }, + { + "output": ".. _example1: Example 1: Enabling the Azure Blob Store Data Connector ------------------------------------------------------- .. container:: tabs .. group-tab:: Docker Image Installs This example enables the Azure Blob Store data connector by specifying environment variables when starting the Driverless AI Docker image. This lets users reference data stored on your Azure storage account using the account name, for example:https://mystorage.blob.core.windows.net. .. code:: bash nvidia-docker run \\ --pid=host \\ --init \\ --rm \\ --shm-size=256m \\ -e DRIVERLESS_AI_ENABLED_FILE_SYSTEMS=\"file,azrbs\" \\ -e DRIVERLESS_AI_AZURE_BLOB_ACCOUNT_NAME=\"mystorage\" \\ -e DRIVERLESS_AI_AZURE_BLOB_ACCOUNT_KEY=\"\" \\ -p 12345:12345 \\ -v /tmp/dtmp/:/tmp \\ -v /tmp/dlog/:/log \\ -v /tmp/dlicense/:/license \\ -v /tmp/ddata/:/data \\ -u $(id -u):$(id -g) \\ h2oai/dai-ubi8-x86_64:|tag| .. container:: group-tab Docker Image with the config.toml This example shows how to configure Azure Blob Store options in the config.toml file, and then specify that file when starting Driverless AI in Docker.", + "prompt_type": "plain" + }, + { + "output": "Configure the Driverless AI config.toml file. Set the following configuration options: .. -enabled_file_systems\n= \"file, upload, azrbs\"-azure_blob_account_name =\n\"mystorage\"-azure_blob_account_key =\n\"\"2. Mount the config.toml file into the Docker container. .. .. code:: bash nvidia-docker run \\ --pid=host \\ --init \\ --rm \\ --shm-size=256m \\ --add-host name.node:172.16.2.186 \\ -e DRIVERLESS_AI_CONFIG_FILE=/path/in/docker/config.toml \\ -p 12345:12345 \\ -v /local/path/to/config.toml:/path/in/docker/config.toml \\ -v /etc/passwd:/etc/passwd:ro \\ -v /etc/group:/etc/group:ro \\ -v /tmp/dtmp/:/tmp \\ -v /tmp/dlog/:/log \\ -v /tmp/dlicense/:/license \\ -v /tmp/ddata/:/data \\ -u $(id -u):$(id -g) \\ h2oai/dai-ubi8-x86_64:|tag| .. container:: group-tab Native Installs This example shows how to enable the Azure Blob Store data connector in the config.toml file when starting Driverless AI in native installs.", + "prompt_type": "plain" + }, + { + "output": "1. Export the Driverless AI config.toml file or add it to ~/.bashrc. For example: .. :: # DEB and RPM export DRIVERLESS_AI_CONFIG_FILE=\"/etc/dai/config.toml\" # TAR SH export DRIVERLESS_AI_CONFIG_FILE=\"/path/to/your/unpacked/dai/directory/config.toml\" 2. Specify the following configuration options in the config.toml file. .. :: # File System Support # upload : standard upload feature # file : local file system/server file system # hdfs : Hadoop file system, remember to configure the HDFS config folder path and keytab below # dtap : Blue Data Tap file system, remember to configure the DTap section below # s3 : Amazon S3, optionally configure secret and access key below # gcs : Google Cloud Storage, remember to configure gcs_path_to_service_account_json below # gbq : Google Big Query, remember to configure gcs_path_to_service_account_json below # minio : Minio Cloud Storage, remember to configure secret and access key below # snow : Snowflake Data Warehouse, remember to configure Snowflake credentials below (account name, username, password) # kdb : KDB+ Time Series Database, remember to configure KDB credentials below (hostname and port, optionally: username, password, classpath, and jvm_args) # azrbs : Azure Blob Storage, remember to configure Azure credentials below (account name, account key) # jdbc: JDBC Connector, remember to configure JDBC below.", + "prompt_type": "plain" + }, + { + "output": "(hive_app_configs) # recipe_url: load custom recipe from URL # recipe_file: load custom recipe from local file system enabled_file_systems = \"file, azrbs\" # Azure Blob Store Connector credentials azure_blob_account_name = \"mystorage\" azure_blob_account_key = \"\" 3. Save the changes when you are done, then stop/restart Driverless AI. .. _example2: Example 2: Mount Azure File Shares to the Local File System ----------------------------------------------------------- Supported Data Sources Using the Local File System ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - Azure Files (File Storage) Mounting Azure File Shares ~~~~~~~~~~~~~~~~~~~~~~~~~~ Azure file shares can be mounted into the Local File system of Driverless AI. To mount the Azure file share, follow the steps listed on https://docs.microsoft.com/en-us/azure/storage/files/storage-how-to-use-files-linux. .. _example3: Example 3: Enable HDFS Connector to Connect to Azure Data Lake Gen 1 -------------------------------------------------------------------- This example enables the HDFS Connector to connect to Azure Data Lake Gen1.", + "prompt_type": "plain" + }, + { + "output": ".. container:: tabs .. group-tab:: Docker Image with the config.toml 1. Create an Azure AD web application for service-to-service authentication: https://docs.microsoft.com/en-us/azure/data-lake-store/data-lake-store-service-to-service-authenticate-using-active-directory 2. Add the information from your web application to the Hadoopcore-site.xmlconfiguration file: .. .. code:: bash fs.adl.oauth2.access.token.provider.type ClientCredential fs.adl.oauth2.refresh.url Token endpoint created in step 1. fs.adl.oauth2.client.id Client ID created in step 1 fs.adl.oauth2.credential Client Secret created in step 1 fs.defaultFS ADL URIt 3.", + "prompt_type": "plain" + }, + { + "output": "This file can found on any Hadoop version in:$HADOOP_HOME/share/hadoop/tools/lib/*. .. .. code:: bash echo \"$HADOOP_CLASSPATH:$HADOOP_HOME/share/hadoop/tools/lib/*\" 4. Configure the Driverless AI config.toml file. Set the following configuration options: .. .. code:: bash enabled_file_systems = \"upload, file, hdfs, azrbs, recipe_file, recipe_url\" hdfs_config_path = \"/path/to/hadoop/conf\" hdfs_app_classpath = \"/hadoop/classpath/\" hdfs_app_supported_schemes = \"['adl://']\" 5. Mount the config.toml file into the Docker container. .. .. code:: bash nvidia-docker run \\ --pid=host \\ --init \\ --rm \\ --shm-size=256m \\ --add-host name.node:172.16.2.186 \\ -e DRIVERLESS_AI_CONFIG_FILE=/path/in/docker/config.toml \\ -p 12345:12345 \\ -v /local/path/to/config.toml:/path/in/docker/config.toml \\ -v /etc/passwd:/etc/passwd:ro \\ -v /etc/group:/etc/group:ro \\ -v /tmp/dtmp/:/tmp \\ -v /tmp/dlog/:/log \\ -v /tmp/dlicense/:/license \\ -v /tmp/ddata/:/data \\ -u $(id -u):$(id -g) \\ h2oai/dai-ubi8-x86_64:|tag| .. container:: group-tab Native Installs 1.", + "prompt_type": "plain" + }, + { + "output": "https://docs.microsoft.com/en-us/azure/data-lake-store/data-lake-store-service-to-service-authenticate-using-active-directory 2. Add the information from your web application to the hadoopcore-site.xmlconfiguration file: .. .. code:: bash fs.adl.oauth2.access.token.provider.type ClientCredential fs.adl.oauth2.refresh.url Token endpoint created in step 1. fs.adl.oauth2.client.id Client ID created in step 1 fs.adl.oauth2.credential Client Secret created in step 1 fs.defaultFS ADL URIt 3.", + "prompt_type": "plain" + }, + { + "output": "abfs[s]://file_system@account_name.dfs.core.windows.net///. .. container:: tabs .. group-tab:: Docker Image with the config.toml 1. Create an Azure Service Principal: https://docs.microsoft.com/en-us/azure/active-directory/develop/howto-create-service-principal-portal 2. Grant permissions to the Service Principal created on step 1 to access blobs: https://docs.microsoft.com/en-us/azure/storage/common/storage-auth-aad 3. Add the information from your web application to the Hadoopcore-site.xmlconfiguration file: .. .. code:: bash fs.azure.account.auth.type OAuth fs.azure.account.oauth.provider.type org.apache.hadoop.fs.azurebfs.oauth2.ClientCredsTokenProvider fs.azure.account.oauth2.client.endpoint Token endpoint created in step 1. fs.azure.account.oauth2.client.id Client ID created in step 1 fs.azure.account.oauth2.client.secret Client Secret created in step 1 4.", + "prompt_type": "plain" + }, + { + "output": "These files can found on any Hadoop version 3.2 or higher at:$HADOOP_HOME/share/hadoop/tools/lib/*.. .. code:: bash echo \"$HADOOP_CLASSPATH:$HADOOP_HOME/share/hadoop/tools/lib/*\" **Note**: ABFS is only supported for Hadoop version 3.2 or higher. 5. Configure the Driverless AI config.toml file. Set the following configuration options: .. .. code:: bash enabled_file_systems = \"upload, file, hdfs, azrbs, recipe_file, recipe_url\" hdfs_config_path = \"/path/to/hadoop/conf\" hdfs_app_classpath = \"/hadoop/classpath/\" hdfs_app_supported_schemes = \"['abfs://']\" 6. Mount the config.toml file into the Docker container. .. .. code:: bash nvidia-docker run \\ --pid=host \\ --init \\ --rm \\ --shm-size=256m \\ --add-host name.node:172.16.2.186 \\ -e DRIVERLESS_AI_CONFIG_FILE=/path/in/docker/config.toml \\ -p 12345:12345 \\ -v /local/path/to/config.toml:/path/in/docker/config.toml \\ -v /etc/passwd:/etc/passwd:ro \\ -v /etc/group:/etc/group:ro \\ -v /tmp/dtmp/:/tmp \\ -v /tmp/dlog/:/log \\ -v /tmp/dlicense/:/license \\ -v /tmp/ddata/:/data \\ -u $(id -u):$(id -g) \\ h2oai/dai-ubi8-x86_64:|tag| .. container:: group-tab Native Installs 1.", + "prompt_type": "plain" + }, + { + "output": "https://docs.microsoft.com/en-us/azure/active-directory/develop/howto-create-service-principal-portal 2. Grant permissions to the Service Principal created on step 1 to access blobs: https://docs.microsoft.com/en-us/azure/storage/common/storage-auth-aad 3. Add the information from your web application to the hadoopcore-site.xmlconfiguration file: .. .. code:: bash fs.azure.account.auth.type OAuth fs.azure.account.oauth.provider.type org.apache.hadoop.fs.azurebfs.oauth2.ClientCredsTokenProvider fs.azure.account.oauth2.client.endpoint Token endpoint created in step 1. fs.azure.account.oauth2.client.id Client ID created in step 1 fs.azure.account.oauth2.client.secret Client Secret created in step 1 4.", + "prompt_type": "plain" + }, + { + "output": "These files can found on any hadoop version 3.2 or higher at:$HADOOP_HOME/share/hadoop/tools/lib/*.. .. code:: bash echo \"$HADOOP_CLASSPATH:$HADOOP_HOME/share/hadoop/tools/lib/*\" **Note**: ABFS is only supported for hadoop version 3.2 or higher 5. Configure the Driverless AI config.toml file. Set the following configuration options: .. .. code:: bash enabled_file_systems = \"upload, file, hdfs, azrbs, recipe_file, recipe_url\" hdfs_config_path = \"/path/to/hadoop/conf\" hdfs_app_classpath = \"/hadoop/classpath/\" hdfs_app_supported_schemes = \"['abfs://']\" 6. Save the changes when you are done, then stop/restart Driverless AI. Export MOJO artifact to Azure Blob Storage ------------------------------------------ In order to export the MOJO artifact to Azure Blob Storage, you must enable support for the shared access signatures (SAS) token.", + "prompt_type": "plain" + }, + { + "output": "Install on Google Compute\n\nDriverless AI can be installed on Google Compute using one of two\nmethods:\n\n- Install the Google Cloud Platform offering. This installs Driverless\n AI via the available GCP Marketplace offering.\n- Install and Run in a Docker Container on Google Compute Engine. This\n installs and runs Driverless AI from scratch in a Docker container\n on Google Compute Engine.\n\nSelect your desired installation procedure below:\n\ngoogle-cloud-platform google-docker-container", + "prompt_type": "plain" + }, + { + "output": "Automatic Visualization\n\ndatasets-viewing custom_viz", + "prompt_type": "plain" + }, + { + "output": "Supported Environments\nThe following tables list the environments that support Driverless AI. Linux\n ---------------------------------------------------------------------\n P ackage OS GPU C PU\n Type \n -------- ----------------------------------- ------------------- ----\n RPM RHEL 7 & 8/CentOS 7 & 8 CUDA 11.2 and x8 6\n above/CPU only 64\n DEB Ubuntu 16.04/Ubuntu 18.04/Ubuntu CUDA 11.2 and x8 6\n 20.04/Ubuntu 22.04 above/CPU only 64\n TAR SH Most Linux CUDA 11.2 and x8 6\n above/CPU only 64\n Docker Docker CE CUDA 11.2 and x8 6\n above/CPU only 64\n ---------------------------------------------------------------------\nNote\nUsing TensorFlow requires your CPUs to support Advanced Vector\nExtensions (AVX).", + "prompt_type": "plain" + }, + { + "output": "For install instructions, refer to linux. Windows 10 Pro, Enterprise, or Education\nCaution: Windows computers (laptops in particular) should only be used\nwith small datasets for the purpose of exploring the software. For\nserious use, server hardware is required. Consider spinning up a more\npowerful instance in the cloud instead of using a laptop. Avoid laptops\nwith less than 16 GB of RAM. GPUs are not supported on Windows. --------------------------------------------------------------------\n Package OS GPU CPU Min\n Type Support? Memory\n ---------- ------------------------------- ---------- ----- --------\n DEB Ubuntu 18.04 for WSL (not fully No x86 16 GB\n tested) _64 \n Docker Docker Desktop for Win 2.2.0.3 No x86 16 GB\n (42716) _64 \n --------------------------------------------------------------------\nFor install instructions, refer to install-on-windows.", + "prompt_type": "plain" + }, + { + "output": "Windows 10\nThis section describes how to install, start, stop, and upgrade\nDriverless AI on a Windows 10 machine. The installation steps assume\nthat you have a license key for Driverless AI. For information on how to\nobtain a license key for Driverless AI, visit\nhttps://h2o.ai/o/try-driverless-ai/. Once obtained, you will be prompted\nto paste the license key into the Driverless AI UI when you first log\nin, or you can save it as a .sig file and place it in the license folder\nthat you will create during the installation process. Overview of Installation on Windows\nTo install Driverless AI on Windows, use a Driverless AI Docker image. Notes:\n- GPU support is not available on Windows. - Scoring is not available on Windows. Caution: Installing Driverless AI on Windows 10 is not recommended for\nserious use. Environment\n -------------------------------------------------------------------\n Operating System GPU Support? Min Mem Suitable for\n ----------------------- --------------- --------- -----------------\n Windows 10 Pro No 16 GB Experimentation\n Windows 10 Enterprise No 16 GB Experimentation\n Windows 10 Education No 16 GB Experimentation\n -------------------------------------------------------------------\nNote: Driverless AI cannot be installed on versions of Windows 10 that\ndo not support Hyper-V.", + "prompt_type": "plain" + }, + { + "output": "Docker Image Installation\nNotes:\n- Be aware that there are known issues with Docker for Windows. More\n information is available here:\n https://github.com/docker/for-win/issues/188. - Consult with your Windows System Admin if\n - Your corporate environment does not allow third-part software\n installs\n - You are running Windows Defender\n - You your machine is not running with\n Enable-WindowsOptionalFeature -Online -FeatureName Microsoft-Windows-Subsystem-Linux. Watch the installation video here. Note that some of the images in this\nvideo may change between releases, but the installation steps remain the\nsame. Requirements\n- Windows 10 Pro / Enterprise / Education\n- Docker Desktop for Windows 2.2.0.3 (42716)\nNote: As of this writing, Driverless AI has only been tested on Docker\nDesktop for Windows version 2.2.0.3 (42716). Installation Procedure\n1. Retrieve the Driverless AI Docker image from\n https://www.h2o.ai/download/. 2. Download, install, and run Docker for Windows from\n https://docs.docker.com/docker-for-windows/install/.", + "prompt_type": "plain" + }, + { + "output": "Note that you may have to reboot after\n installation. 3. Before running Driverless AI, you must:\n4. Open a PowerShell terminal and set up a directory for the version of\n Driverless AI on the host machine:\n5. With Docker running, navigate to the location of your downloaded\n Driverless AI image. Move the downloaded Driverless AI image to your\n new directory. 6. Change directories to the new directory, then load the image using\n the following command:\n7. Set up the data, log, license, and tmp directories (within the new\n directory). 8. Copy data into the /data directory. The data will be visible inside\n the Docker container at /data. 9. Run docker images to find the image tag. 10. Start the Driverless AI Docker image. Be sure to replace path_to_\n below with the entire path to the location of the folders that you\n created (for example,\n \"c:/Users/user-name/driverlessai_folder/data\"). Note that this is\n regular Docker, not NVIDIA Docker. GPU support will not be\n available.", + "prompt_type": "plain" + }, + { + "output": "11. Connect to Driverless AI with your browser at\n http://localhost:12345. Stopping the Docker Image\nTo stop the Driverless AI Docker image, type Ctrl + C in the Terminal\n(Mac OS X) or PowerShell (Windows 10) window that is running the\nDriverless AI Docker image. Upgrading the Docker Image\nThis section provides instructions for upgrading Driverless AI versions\nthat were installed in a Docker container. These steps ensure that\nexisting experiments are saved. WARNING: Experiments, MLIs, and MOJOs reside in the Driverless AI tmp\ndirectory and are not automatically upgraded when Driverless AI is\nupgraded. - Build MLI models before upgrading. - Build MOJO pipelines before upgrading. - Stop Driverless AI and make a backup of your Driverless AI tmp\n directory before upgrading. If you did not build MLI on a model before upgrading Driverless AI,\n then you will not be able to view MLI on that model after upgrading. Before upgrading, be sure to run MLI jobs on models that you want to\n continue to interpret in future releases.", + "prompt_type": "plain" + }, + { + "output": "If you did not build a MOJO pipeline on a model before upgrading\n Driverless AI, then you will not be able to build a MOJO pipeline on\n that model after upgrading. Before upgrading, be sure to build MOJO\n pipelines on all desired models and then back up your Driverless AI\n tmp directory. Note: Stop Driverless AI if it is still running. Requirements\nWe recommend to have NVIDIA driver >= installed (GPU only) in your host\nenvironment for a seamless experience on all architectures, including\nAmpere. Driverless AI ships with CUDA 11.2.2 for GPUs, but the driver\nmust exist in the host environment. Go to NVIDIA download driver to get the latest NVIDIA Tesla A/T/V/P/K\nseries drivers. For reference on CUDA Toolkit and Minimum Required\nDriver Versions and CUDA Toolkit and Corresponding Driver Versions, see\nhere . Note\nUpgrade Steps\n1. SSH into the IP address of the machine that is running Driverless\n AI. 2. Set up a directory for the version of Driverless AI on the host\n machine:\n3.", + "prompt_type": "plain" + }, + { + "output": "reproducibility_level=1`` by default. []\nThe following section describes the different levels of reproducibility\nin more detail. Reproducibility levels\nYou can manually specify one of the four available levels of\nreproducibility with the reproducibility_level config option. The\nfollowing list describes how these levels of reproducibility are\ndistinct from one another. - 1 (default): Same experiment results for same operating system, same\n CPU(s), and same GPU(s). - 2: Same experiment results for same operating system, same CPU\n architecture, and same GPU architecture. - 3: Same experiment results for same operating system and same CPU\n architecture. Note that this reproducibility level excludes GPUs. - 4: Same experiment results for same operating system. This level is\n considered to be the best effort approximation. Notes:\n- Experiments are only reproducible when run on the same hardware\n (that is, when using the same number and type of GPUs/CPUs and the\n same architecture).", + "prompt_type": "plain" + }, + { + "output": "Custom Recipe Management\nThe following sections describe custom recipe management in Driverless\nAI. Understanding Custom Recipes\nCustom recipes are Python code snippets that can be uploaded into\nDriverless AI at runtime like plugins. Restarting Driverless AI is not\nrequired. Custom recipes can be provided for transformers, models, and\nscorers. During training of a supervised machine learning modeling\npipeline, Driverless AI can use these code snippets as building blocks\nin combination with or in place of built-in code pieces. When selecting\nrecipes for an experiment in the expert-settings panel, only custom\nrecipes that are currently active are visible. New datasets can be created by\nmodifying an existing dataset with a data recipe . You\ncan also apply data recipes as standalone recipes. Additionally, the set\nof MLI techniques and methodologies used in Driverless AI can be\nextended with recipes. For more information on MLI explainer recipes,\nsee mli-byor. Note\n- The Python Scoring Pipeline for deployment features full support for\ncustom recipes.", + "prompt_type": "plain" + }, + { + "output": "For\ncomplete examples that demonstrate how to download these dependencies\nand run the Python Scoring Pipeline, see Python_Pipeline. - In most cases, and especially for complex recipes, MOJO for model\n deployment is not available out of the box. However, it is possible\n to get the MOJO. Contact support@h2o.ai for more information about\n creating MOJOs for custom recipes. - To enable Shapley calculations in MLI, custom model recipes must use\n the has_pred_contribs method. Refer to the model recipe template for\n more info. - When enabling recipes, you can use the pip_install_options\n TOML option to specify your organization's\n internal Python package index as follows:\nAdding Custom Recipes\nTo add a custom recipe, go to the recipe management page by clicking\nRecipes in the top navigation, then click the Add Custom Recipes button. Select one of the following options from the drop-down menu that\nappears:\n[]\n- From computer: Add a custom recipe as a Python or ZIP file from your\n local file system.", + "prompt_type": "plain" + }, + { + "output": "For example, you can enter\n https://github.com/h2oai/driverlessai-recipes/ to add all the\n custom recipes contained in the official Recipes for\n Driverless AI repository. - A GitHub tree. For example, you can enter\n https://github.com/h2oai/driverlessai-recipes/tree/master/models\n to add only the custom model recipes contained in the official\n Recipes for Driverless AI repository, or enter\n https://github.com/h2oai/driverlessai-recipes/tree/master/models/algorithms\n to add only the custom algorithm recipes contained in the\n repository. - A file system path. This option is equivalent to the File\n System option when adding datasets. - From Bitbucket: Add a custom recipe from a Bitbucket repository. To\n use this option, your Bitbucket username and password must be\n provided along with the custom recipe Bitbucket URL. - With Editor: Add a custom recipe with a built-in code editor.", + "prompt_type": "plain" + }, + { + "output": "Note\nIf you set the _global_modules_needed_by_name parameter in a custom\nrecipe, then ensure that it is set on a single line before uploading it. Using line breaks when setting the _global_modules_needed_by_name\nparameter results in a syntax error when attempting to upload the custom\nrecipe. Managing Recipes\nTwo distinct views are available on this page:\n- List view: This view displays all available custom recipes. Only\n active recipes are listed by default, but deactivated recipes can\n also be viewed. For more information, see list-view. - Detail view: This view lets you edit custom recipe code in\n Driverless AI and save the edited code. The detail view is available\n for both active and deactivated recipes. For more information, see\n detail-view. List View\nThe following is a list of actions that you can take from the recipe\nlist view:\nGeneral actions:\n- View deactivated recipes by selecting Include inactive recipes. - Deactivate a recipe by selecting it and clicking Deactivate x\n Item(s).", + "prompt_type": "plain" + }, + { + "output": "Note that recipes can only be deactivated, not deleted. - Search and sort recipes. Note that if enough recipes are uploaded,\n they are listed on multiple pages. - Select which columns are visible on the list view. Recipe-specific actions:\n- Open: View a specific recipe in detail. - Edit note: Create or edit a note for a recipe to keep track of its\n functionality. - Deactivate: Deactivate the selected recipe. - Apply on Dataset (For data recipes only): Apply an existing data\n recipe to the dataset. For more information on modifying datasets\n with data recipes, see modify_by_recipe. - Apply Without Dataset (For data recipes only): Apply the selected\n data recipe as a standalone recipe. Detail View\nThe following is a list of actions that you can take from the recipe\ndetail view:\n- Edit custom recipe code:\n - You can toggle an in-code search feature by pressing Control+F\n (or Command+F on Mac). - To save the edited recipe, click the Save as New Recipe and\n Activate button.", + "prompt_type": "plain" + }, + { + "output": "If you don't change both the ClassName and _display_name\n defined in the recipe, the old version of the recipe is\n automatically deactivated when a new version is saved and\n activated. New versions of existing recipes keep references to\n the original recipes, letting you keep track of changes\n throughout multiple versions. - You can download recipe code and deactivate recipes from this\n view. - View the recipe's name, type, ID, filename, creation date, and\n whether the recipe is currently active. - (For data recipes only) Apply the data recipe on a dataset or as a\n standalone recipe. - If a recipe was downloaded from an external URL, the link is\n displayed under Original URL. - (For Individual recipes only) View a link to the experiment from\n which the Individual recipe was derived from. - More Actions drop-down:\n - (For Individual recipes only) To create a new experiment using\n the Individual recipe, click Use in New Experiment.", + "prompt_type": "plain" + }, + { + "output": "- Download the recipe by clicking Download. - Deactivate the recipe by clicking Deactivate. Note that\n recipes can only be deactivated, not deleted. []\nNote\nIf _display_name is not defined in a recipe, then that recipe's display\nname is derived from the ClassName defined in the recipe. Examples\ncustom-recipes-data-recipes custom-recipes-h2o-3-algos\ncustom-recipes-scorer custom-recipes-transformers\nAdditional Resources\n- Custom Recipes FAQ :\n For answers to common questions about custom recipes. - How to Write a Recipe :\n A guide for writing your own recipes. - Data Template :\n A template for creating your own Data recipe. - Model Template :\n A template for creating your own Model recipe. - Scorer Template :\n A template for creating your own Scorer recipe.", + "prompt_type": "plain" + }, + { + "output": "Driverless AI Clients\n\npython_client r_client", + "prompt_type": "plain" + }, + { + "output": "Monitoring and Logging\n\npending-jobs logging", + "prompt_type": "plain" + }, + { + "output": "GPUs in Driverless AI\nDriverless AI can run on machines with only CPUs or machines with CPUs\nand GPUs. For the best (and intended-as-designed) experience, install\nDriverless AI on modern data center hardware with GPUs and CUDA support. Feature engineering and model building are primarily performed on CPU\nand GPU respectively. For this reason, Driverless AI benefits from\nmulti-core CPUs with sufficient system memory and GPUs with sufficient\nRAM. For best results, we recommend GPUs that use the Pascal or Volta\narchitectures. Ampere-based NVIDIA GPUs are also supported on x86\nmachines (requires NVIDIA CUDA Driver 11.2 or later). Driverless AI ships with NVIDIA CUDA 11.2.2 and cuDNN. Image and NLP use cases in\nDriverless AI benefit significantly from GPU usage. Model building algorithms, namely, XGBoost (GBM/DART/RF/GLM), LightGBM\n(GBM/DART/RF), PyTorch (BERT models) and TensorFlow (CNN/BiGRU/ImageNet)\nmodels utilize GPU. Model scoring on GPUs can be enabled by selecting\nnon-zero number of GPUs for prediction/scoring via\nnum_gpus_for_prediction system expert setting\nof the experiment.", + "prompt_type": "plain" + }, + { + "output": "MOJO\nscoring for productionizing models on GPUs can be enabled for some uses\ncases. See tensorflow_nlp_have_gpus_in_production in\nconfig.toml . Driverless AI Tensorflow, BERT and\nImage models support C++ MOJO scoring for\nproduction. Feature engineering transformers such as\nClusterDist cuML Transformer, TruncSVDNum cuML Transformer, DBSCAN cuML\nTransformer run on GPUs. With Driverless AI Dask multinode setup, GPUs\ncan be used for extensive model hyperparamenter search. For details see -\nDriverless AI & NVIDIA cuDNN\nNVIDIA cuDNN is a library for deep neural nets built using CUDA and\noptimized for GPUs. For NLP data modeling and feature\nengineering , Driverless AI uses cuDNN PyTorch (BERT models) and\nTensorFlow NLP recipe based on CNN and BiGRU (RNN) deep learning models. For modeling Image data, TensorFlow (ImageNet\nmodels) are used. Driverless AI & NVIDIA RAPIDS\nNVIDIA RAPIDS provides PyData APIs that are GPU-accelerated.Driverless\nAI integrates RAPIDS cuML (scikit-learn)\ntransformers namely ClusterDist cuML Transformer,\nTruncSVDNum cuML Transformer, DBSCAN cuML Transformer for feature\nengineering and RAPIDS cuDF extension to\nXGBoost GBM / DART for building machine learning\nmodels on GPUs.", + "prompt_type": "plain" + }, + { + "output": "Automatic Feature Engineering\nDriverless AI performs automatic feature engineering as part of an\nexperiment's model building process. New features are created by\nperforming transformations and/or\ninteractions on the dataset columns. The\ndefault transformers picked up by Driverless depends on interpretability\nsettings of an experiment. For more interpretable models, simpler\ntransformations are applied. This can be seen in the preview of the\nexperiment. Feature engineering expert settings like include/exclude\ntransformers can be used to control the applied transformations. Transformers like binning, target encoding, weight of evidence,\nclustering, dimensionality reduction, autoencoders, TensorFlow, NLP BERT\nmodels, lags, aggregates, can be used to create Feature interactions. Feature creation and selection is evolutionary (based on variable\nimportance of previous iteration) in nature and uses\ngenetic algorithm to find the best set of feature transformations\nand model parameters for an experiment/dataset.", + "prompt_type": "plain" + }, + { + "output": "Launching Driverless AI\nDriverless AI is tested on Chrome and Firefox but is supported on all\nmajor browsers. For the best user experience, we recommend using Chrome. 1. After Driverless AI is installed and started, open a browser and\n navigate to :12345. 2. The first time you log in to Driverless AI, you will be prompted to\n read and accept the Evaluation Agreement. You must accept the terms\n before continuing. Review the agreement, then click I agree to these\n terms to continue. 3. Log in by entering unique credentials. For example:\n Username: h2oai Password: h2oai\n4. As with accepting the Evaluation Agreement, the first time you log\n in, you will be prompted to enter your License Key. Click the Enter\n License button, then paste the License Key into the License Key\n entry field. Click Save to continue. This license key will be saved\n in the host machine's /license folder. Upon successful completion, you will be ready to add datasets and run\nexperiments.", + "prompt_type": "plain" + }, + { + "output": "- Standard output (stdout) log files: These log files are the standard\n output for different servers (given as prefix). - Standard error (stderr) log files: These log files are standard\n error for different servers (given as prefix). - TMPDIR directories: These are temporary directories used by various\n packages or servers. - uploads directory: This directory is where files are uploaded by the\n web server. - funnels directory: This directory is where certain forked processes\n store stderr or stdout files. - sys directory: This directory is used by the system to perform\n various generic tasks. - startup_job_user directory: This directory is used by the system to\n perform various startup tasks. Note\nServer logs and pid files are located in separate directories\n(server_logs and pids, respectively). Resources\n[]\nThe Resources drop-down menu lets you view system information, download\nDAI clients, and view DAI-related tutorials and guides. - System Info: View information relating to hardware utilization and\n worker activity.", + "prompt_type": "plain" + }, + { + "output": "- Python Client: Download the Driverless AI Python client. For more\n information, see python_client. - R Client: Download the Driverless AI R client. For more information,\n see r_client. - MOJO Java Runtime: Download the MOJO Java Runtime. For more\n information, see Mojo_Pipeline. - MOJO Py Runtime: Download the MOJO Python Runtime. For more\n information, see cpp_scoring_pipeline. - MOJO R Runtime: Download the MOJO R Runtime. For more information,\n see cpp_scoring_pipeline. - Documentation: View the DAI documentation. - About: View version, current user, and license information for your\n Driverless AI install. - API Token: Click to retrieve an access token for authentication\n purposes. []\nUser Options\nTo view news and announcements relating to Driverless AI, click User in\nthe top navigation bar, then click Messages. To log out of Driverless\nAI, click User, then click Logout. You can also configure various\nuser-specific settings by clicking User Settings.", + "prompt_type": "plain" + }, + { + "output": "Data Connectors\nDriverless AI provides a number of data connectors for accessing\nexternal data sources. The following data connection types are enabled\nby default:\n- upload: The standard upload feature of Driverless AI. - file: Local file system or server file system. - hdfs: Hadoop file system. Remember to configure the HDFS config\n folder path and keytab. - s3: Amazon S3. Optionally configure secret and access key. - recipe_file: Custom recipe file upload. - recipe_url: Custom recipe upload via URL. Additionally, the following connections types can be enabled by\nmodifying the enabled_file_systems configuration option (Native\ninstalls) or environment variable (Docker image installs):\n- dtap: Blue Data Tap file system, remember to configure the DTap\n section\n- gcs: Google Cloud Storage, remember to configure\n gcs_path_to_service_account_json\n- gbq: Google Big Query, remember to configure\n gcs_path_to_service_account_json\n- hive: Hive Connector, remember to configure Hive\n- minio: Minio Cloud Storage, remember to configure\n secret and access key\n- snow: Snowflake Data Warehouse, remember to configure Snowflake\n credentials\n- kdb: KDB+ Time Series Database, remember to configure KDB\n credentials\n- azrbs: Azure Blob Storage, remember to configure Azure credentials\n- jdbc: JDBC Connector, remember to configure JDBC\n- h2o_drive: H2O Drive, remember to configure h2o_drive_endpoint_url\n- feature_store: Feature Store, remember to configure\n feature_store_endpoint_url below\nThese data sources are exposed in the form of the file systems, and each\nfile system is prefixed by a unique prefix.", + "prompt_type": "plain" + }, + { + "output": "Cloud Installation\n\nDriverless AI is available on the following cloud platforms:\n\n- H2O AI Cloud (HAIC)\n- AWS - Amazon Machine Image (AMI) \n- Azure \n- Google Cloud \n\nThe installation steps for AWS, Azure, and Google Cloud assume that you\nhave a license key for Driverless AI. For information on how to obtain a\nlicense key for Driverless AI, visit\nhttps://h2o.ai/o/try-driverless-ai/. Once obtained, you will be prompted\nto paste the license key into the Driverless AI UI when you first log\nin, or you can save it as a .sig file and place it in the license folder\nthat you will create during the installation process.\n\ninstall/aws install/azure install/google-compute", + "prompt_type": "plain" + }, + { + "output": "Sampling in Driverless AI\n\nData Sampling\n\nDriverless AI does not perform any type of data sampling unless the\ndataset is big or highly imbalanced (for improved accuracy). What is\nconsidered big is dependent on your accuracy setting and the", + "prompt_type": "plain" + }, + { + "output": "statistical_threshold_data_size_largeparameter in the :ref:`config.toml file ` or in the Expert Settings. You can see if the data will be sampled by viewing the Experiment Preview when you set up the experiment. In the experiment preview below, I can see that my data was sampled down to 5 million rows for the final model, and to 100k rows for the feature evolution part of the experiment. .. figure:: images/experiment-settings-summary.png :alt: If Driverless AI decides to sample the data based on these settings and the data size, then Driverless AI performs the following types of sampling at the start of (and/or throughout) the experiment: - Random sampling for regression problems - Stratified sampling for classification problems - Imbalanced sampling for binary problems where the target distribution is considered imbalanced and imbalanced sampling methods are enabled (imbalance_sampling_methodnot set to\"off\"``)\nImbalanced Model Sampling Methods\nImbalanced sampling techniques can help in binary classification use\ncases with highly imbalanced outcomes (churn, fraud, rare event\nmodeling, etc.)", + "prompt_type": "plain" + }, + { + "output": "ImbalancedLightGBMModelandImbalancedXGBoostGBMModel. Both perform repeated stratified sampling (bagging) inside their fit() method in an attempt to speed up modeling and to improve the resolution of the decision boundary between the two classes. Because these models are presented a training dataset with a different prior than the original data, they require a probability correction that is performed as part of postprocessing in the predict() method. When imbalanced sampling is enabled, no sampling is performed at the start of the experiment for either the feature evolution phase or the final model pipeline. Instead, sampling (with replacement) is performed during model fitting, and the model is presented a more balanced target class distribution than the original data. Because the sample is usually much smaller than the original data, this process can be repeated many times and each internal model's prediction can be averaged to improve accuracy (bagging). By default, the number of bags is automatically determined, but this value can be specified in expert settings (imbalance_sampling_number_of_bags=-1``\nmeans automatic).", + "prompt_type": "plain" + }, + { + "output": "\"over_under_sampling\", each bag can have a slightly different balance between minority and majority classes. There are multiple settings for imbalanced sampling: - Disabled (imbalance_sampling_method=\"off\", the default) - Automatic (imbalance_sampling_method=\"auto\"). A combination of the two methods below. - Under- and over-sample both minority and majority classes to reach roughly class balance in each sampled bag (imbalance_sampling_method=\"over_under_sampling\"). If original data has 500:10000 imbalance, this method could sample 1000:1500 samples for the first bag, 500:400 samples for the second bag, and so on. - Under-sample the majority class to reach exact class balance in each sampled bag (imbalance_sampling_method=\"under_sampling\"). Would create 500:500 samples per bag for the same example imbalance ratio . Each bag would then sample the 500 rows from each class with replacement, so each bag is still different. The amount of imbalance controls how aggressively imbalanced models are used for the experiment (ifimbalance_sampling_method is not \"off\"): - By default, imbalanced is defined as when the majority class is 5 times more common than the minority class (imbalance_ratio_sampling_threshold=5, configurable).", + "prompt_type": "plain" + }, + { + "output": "- By default, heavily imbalanced is defined as when the majority class is 25 times more common than the minority class (heavy_imbalance_ratio_sampling_threshold=25, configurable). In highly imbalanced cases, imbalanced models are used exclusively. Notes: - The binary imbalanced sampling techniques and settings described in this section apply only to the **Imbalanced Model** types listed above. - The data has to be large enough to enable imbalanced sampling: by default,imbalance_sampling_threshold_min_rows_originalis set to 100,000 rows. - Ifimbalance_sampling_number_of_bags=-1(automatic) andimbalance_sampling_method=\"auto\", the number of bags will be automatically determined by the experiment's accuracy settings and by the total size of all bags together, controlled byimbalance_sampling_max_multiple_data_size, which defaults to1. So all bags together will be no larger than 1x the original data by default. For an imbalance of 1:19, each balanced 1:1 sample would be as large as 10% of the data, so it would take up to 10 such 1:1 bags (or approximately 10 if the balance is different or slightly random) to reach that limit.", + "prompt_type": "plain" + }, + { + "output": "That's why the other limit of 3 (by default) for feature evolution exists. Feel free to adjust to your preferences. - Ifimbalance_sampling_number_of_bags=-1(automatic) andimbalance_sampling_method=\"over_under_sampling\"or\"under_sampling\", the number of bags will be equal to the experiment's accuracy settings (accuracy 7 will use 7 bags). - The upper limit for the number of bags can be specified separately for feature evolution (imbalance_sampling_max_number_of_bags_feature_evolution) and globally (i.e., final model) set by (imbalance_sampling_max_number_of_bags) and both will be strictly enforced. - Instead of balancing the target class distribution via default value ofimbalance_sampling_target_minority_fraction=-1(same as setting it to 0.5), one can control the target fraction of the minority class. So if the data starts with a 1:1000 imbalance and you wish to model with a 1:9 imbalance, specifyimbalance_sampling_target_minority_fraction=0.1.", + "prompt_type": "plain" + }, + { + "output": "Ensemble Learning in Driverless AI\nThis section describes Driverless AI's ensemble learning capabilities. Ensemble Method\nAn ensemble is a hierarchical composition of multiple models, where\nevery level in the hierarchy uses the output of the previous level as\ninput. The simplest ensemble is a 2-layer architecture with a single\nlinear model (the meta model or meta learner) combining the predictions\nfrom several first layer models (base models). This is the default\nensemble model in Driverless AI due to its robustness and linear\nproperties that allow Shapley contributions to be fully interpretable\neven for ensembles. By default, the meta learner is a linear blender that assigns\nnon-negative weights (that sum to 1) to all the base models. The weights\nare assigned at the model level and obtained using cross-validation (to\navoid overfitting of the meta learner). When making prediction on a test\nset, the predictions from all cross-validation models are averaged. For\nexample, if 2 models are ensembled together (e.g., a LightGBM model and\nan XGBoost model, each doing 4-fold cross validation), then the linear\nblender will find a weight for all 4 LightGBM models (e.g., 0.37) and a\nweight for all 4 XGBoost models (e.g., 0.63).", + "prompt_type": "plain" + }, + { + "output": "When Driverless AI ensembles a single model\n(level 1), then it is simply taking the average of the CV model\npredictions (the model itself is assigned a weight of 1). Ensemble Levels\nDriverless AI has multiple ensemble levels that are tied to the accuracy\nknob. As accuracy increases, the ensemble level increases. Ensemble level can also be controlled using\nEnsemble Level for Final Modeling Pipeline from\nthe Model settings of the expert settings panel. The following is a\ndescription of each ensemble level:\n- level 0: No ensemble, only a final single model. Cross validation is\n only used to determine the model validation performance. The final\n model is trained on the whole dataset. - level 1: Cross validation is performed for 1 model and the CV model\n predictions are ensembled. - level 2: Cross validation is performed for 2 models and the CV model\n predictions are ensembled. For example, Driverless AI may choose to\n ensemble an XGBoost model and a LightGBM model.", + "prompt_type": "plain" + }, + { + "output": "Driverless AI MLI Standalone Python Scoring Package\nThis package contains an exported model and Python 3.8 source code\nexamples for productionizing models built using H2O Driverless AI\nMachine Learning Interpretability (MLI) tool. This is only available for\ninterpreted models and can be downloaded by clicking the Scoring\nPipeline button on the Interpreted Models page. The files in this package let you obtain reason codes for a given row of\ndata in a couple of different ways:\n- From Python 3.8, you can import a scoring module and use it to\n transform and score on new data. - From other languages and platforms, you can use the TCP/HTTP scoring\n service bundled with this package to call into the scoring pipeline\n module through remote procedure calls (RPC). MLI Python Scoring Package Files\nThe scoring-pipeline-mli folder includes the following notable files:\n- example.py: An example Python script demonstrating how to import and\n interpret new records. - run_example.sh: Runs example.py (This also sets up a virtualenv with\n prerequisite libraries.)", + "prompt_type": "plain" + }, + { + "output": "This compares\n K-LIME and Driverless AI Shapley reason codes. - tcp_server.py: A standalone TCP server for hosting MLI services. - http_server.py: A standalone HTTP server for hosting MLI services. - run_tcp_server.sh: Runs the TCP scoring service (specifically,\n tcp_server.py). - run_http_server.sh: Runs HTTP scoring service (runs http_server.py). - example_client.py: An example Python script demonstrating how to\n communicate with the MLI server. - example_shapley.py: An example Python script demonstrating how to\n compare K-LIME and Driverless AI Shapley reason codes. - run_tcp_client.sh: Demonstrates how to communicate with the MLI\n service via TCP (runs example_client.py). - run_http_client.sh: Demonstrates how to communicate with the MLI\n service via HTTP (using curl). Quick Start\nThere are two methods for starting the MLI Standalone Scoring Pipeline. Quick Start - Recommended Method\nThis is the recommended method for running the MLI Scoring Pipeline.", + "prompt_type": "plain" + }, + { + "output": "- You want to use a quick start approach. Prerequisites\n- A valid Driverless AI license key. - A completed Driverless AI experiment. - Downloaded MLI Scoring Pipeline. Running the MLI Scoring Pipeline - Recommended\n1. Download the TAR SH version of Driverless AI from\n https://www.h2o.ai/download/. 2. Use bash to execute the download. This creates a new dai-nnn folder. 3. Change directories into the new Driverless AI folder. 4. Run the following to install the Python Scoring Pipeline for your\n completed Driverless AI experiment:\n5. Run the following command to run the included scoring pipeline\n example:\nQuick Start - Alternative Method\nThis section describes an alternative method for running the MLI\nStandalone Scoring Pipeline. This version requires Internet access. Note\nIf you use a scorer from a version prior to 1.10.4.1, you need to add\nexport SKLEARN_ALLOW_DEPRECATED_SKLEARN_PACKAGE_INSTALL=True prior to\ncreating the new scorer python environment, either in run_example.sh or\nin the same terminal where the shell scripts are executed.", + "prompt_type": "plain" + }, + { + "output": "Prerequisites\n- Valid Driverless AI license. - The scoring module and scoring service are supported only on Linux\n with Python 3.8 and OpenBLAS. - The scoring module and scoring service download additional packages\n at install time and require internet access. Depending on your\n network environment, you might need to set up internet access via a\n proxy. - Apache Thrift (to run the scoring service in TCP mode)\nExamples of how to install these prerequisites are below. Installing Python 3.8 on Ubuntu 16.10 or Later:\n sudo apt install python3.8 python3.8-dev python3-pip python3-dev \\\n python-virtualenv python3-virtualenv\nInstalling Python 3.8 on Ubuntu 16.04:\n sudo add-apt-repository ppa:deadsnakes/ppa\n sudo apt-get update\n sudo apt-get install python3.8 python3.8-dev python3-pip python3-dev \\\n python-virtualenv python3-virtualenv\nInstalling Conda 3.6:\n You can install Conda using either Anaconda or Miniconda. Refer to the\n links below for more information:\n - Anaconda - https://docs.anaconda.com/anaconda/install.html\n - Miniconda - https://docs.conda.io/en/latest/miniconda.html\nInstalling the Thrift Compiler\nRefer to Thrift documentation at\nhttps://thrift.apache.org/docs/BuildingFromSource for more information.", + "prompt_type": "plain" + }, + { + "output": "sudo ldconfig /usr/local/lib\nRunning the MLI Scoring Pipeline - Alternative Method\n1. On the MLI page, click the Scoring Pipeline button. 2. Unzip the scoring pipeline, and run the following examples in the\n scoring-pipeline-mli folder. MLI Python Scoring Module\nThe MLI scoring module is a Python module bundled into a standalone\nwheel file (name scoring*.whl). All the prerequisites for the scoring\nmodule to work correctly are listed in the 'requirements.txt' file. To\nuse the scoring module, all you have to do is create a Python\nvirtualenv, install the prerequisites, and then import and use the\nscoring module as follows:\n ----- See 'example.py' for complete example. -----\n from scoring_487931_20170921174120_b4066 import Scorer\n scorer = KLimeScorer() # Create instance. score = scorer.score_reason_codes([ # Call score_reason_codes()\n 7.416, # sepal_len\n 3.562, # sepal_wid\n 1.049, # petal_len\n 2.388, # petal_wid\n ])\nThe scorer instance provides the following methods:\n- score_reason_codes(list): Get K-LIME reason codes for one row (list\n of values).", + "prompt_type": "plain" + }, + { + "output": "-----\n virtualenv -p python3.8 env\n source env/bin/activate\n pip install --use-deprecated=legacy-resolver -r requirements.txt\n python example.py\nK-LIME vs Shapley Reason Codes\nThere are times when the K-LIME model score is not close to the\nDriverless AI model score. In this case it may be better to use reason\ncodes using the Shapley method on the Driverless AI model. Note that the\nreason codes from Shapley will be in the transformed feature space. To see an example of using both K-LIME and Driverless AI Shapley reason\ncodes in the same Python session, run:\n bash run_example_shapley.sh\nFor this batch script to succeed, MLI must be run on a Driverless AI\nmodel. If you have run MLI in standalone (external model) mode, there\nwill not be a Driverless AI scoring pipeline. If MLI was run with transformed features, the Shapley example scripts\nwill not be exported. You can generate exact reason codes directly from\nthe Driverless AI model scoring pipeline. MLI Scoring Service Overview\nThe MLI scoring service hosts the scoring module as a HTTP or TCP\nservice.", + "prompt_type": "plain" + }, + { + "output": "score_batch``. Both functions let you specify", + "prompt_type": "plain" + }, + { + "output": "pred_contribs=[True|False]`` to get MLI predictions (KLime/Shapley) on a\nnew dataset. See the example_shapley.py file for more information. MLI Scoring Service - TCP Mode (Thrift)\nThe TCP mode lets you use the scoring service from any language\nsupported by Thrift, including C, C++, C#, Cocoa, D, Dart, Delphi, Go,\nHaxe, Java, Node.js, Lua, perl, PHP, Python, Ruby and Smalltalk. To start the scoring service in TCP mode, you will need to generate the\nThrift bindings once, then run the server:\n ----- See 'run_tcp_server.sh' for complete example. -----\n thrift --gen py scoring.thrift\n python tcp_server.py --port=9090\nNote that the Thrift compiler is only required at build-time. It is not\na run time dependency, i.e. once the scoring services are built and\ntested, you do not need to repeat this installation process on the\nmachines where the scoring services are intended to be deployed. To call the scoring service, generate the Thrift bindings for your\nlanguage of choice, then make RPC calls via TCP sockets using Thrift's\nbuffered transport in conjunction with its binary protocol.", + "prompt_type": "plain" + }, + { + "output": "-----\n thrift --gen py scoring.thrift\n\n ----- See 'example_client.py' for complete example. -----\n socket = TSocket.TSocket('localhost', 9090)\n transport = TTransport.TBufferedTransport(socket)\n protocol = TBinaryProtocol.TBinaryProtocol(transport)\n client = ScoringService.Client(protocol)\n transport.open()\n row = Row()\n row.sepalLen = 7.416 # sepal_len\n row.sepalWid = 3.562 # sepal_wid\n row.petalLen = 1.049 # petal_len\n row.petalWid = 2.388 # petal_wid\n scores = client.score_reason_codes(row)\n transport.close()\nYou can reproduce the exact same result from other languages, e.g. Java:\n thrift --gen java scoring.thrift\n // Dependencies: \n // commons-codec-1.9.jar\n // commons-logging-1.2.jar\n // httpclient-4.4.1.jar\n // httpcore-4.4.1.jar\n // libthrift-0.10.0.jar\n // slf4j-api-1.7.12.jar\n import ai.h2o.scoring.Row;\n import ai.h2o.scoring.ScoringService;\n import org.apache.thrift.TException;\n import org.apache.thrift.protocol.TBinaryProtocol;\n import org.apache.thrift.transport.TSocket;\n import org.apache.thrift.transport.TTransport;\n import java.util.List;\n public class Main {\n public static void main(String[] args) {\n try {\n TTransport transport = new TSocket(\"localhost\", 9090);\n transport.open();\n ScoringService.Client client = new ScoringService.Client(\n new TBinaryProtocol(transport));\n Row row = new Row(7.642, 3.436, 6.721, 1.020);\n List scores = client.score_reason_codes(row);\n System.out.println(scores);\n transport.close();\n } catch (TException ex) {\n ex.printStackTrace();\n }\n }\n }\nScoring Service - HTTP Mode (JSON-RPC 2.0)\nThe HTTP mode lets you use the scoring service using plaintext JSON-RPC\ncalls.", + "prompt_type": "plain" + }, + { + "output": "MinIO Setup\n\nThis section provides instructions for configuring Driverless AI to work\nwith MinIO. Note that unlike S3, authentication must also be configured\nwhen the MinIO data connector is specified.\n\nNote: Depending on your Docker install version, use either the", + "prompt_type": "plain" + }, + { + "output": "docker run\n--runtime=nvidia(>= Docker 19.03) ornvidia-docker(< Docker 19.03) command when starting the Driverless AI Docker image. Usedocker\nversionto check which version of Docker you are using. Description of Configuration Attributes --------------------------------------- -minio_endpoint_url: The endpoint URL that will be used to access MinIO. -minio_access_key_id: The MinIO access key. -minio_secret_access_key: The MinIO secret access key. -minio_skip_cert_verification: If this is set to true, then MinIO connector will skip certificate verification. This is set to false by default. -enabled_file_systems: The file systems you want to enable. This must be configured in order for data connectors to function properly. Enable MinIO with Authentication -------------------------------- .. container:: tabs .. group-tab:: Docker Image Installs This example enables the MinIO data connector with authentication by passing an endpoint URL, access key ID, and an access key.", + "prompt_type": "plain" + }, + { + "output": "This lets you reference data stored in MinIO directly using the endpoint URL, for example: http://\\ //datasets/iris.csv. .. code:: bash nvidia-docker run \\ --shm-size=256m \\ --add-host name.node:172.16.2.186 \\ -e DRIVERLESS_AI_ENABLED_FILE_SYSTEMS=\"file,minio\" \\ -e DRIVERLESS_AI_MINIO_ENDPOINT_URL=\"\" -e DRIVERLESS_AI_MINIO_ACCESS_KEY_ID=\"\" \\ -e DRIVERLESS_AI_MINIO_SECRET_ACCESS_KEY=\"\" \\ -e DRIVERLESS_AI_MINIO_SKIP_CERT_VERIFICATION=\"false\" \\ -p 12345:12345 \\ --init -it --rm \\ -v /tmp/dtmp/:/tmp \\ -v /tmp/dlog/:/log \\ -v /tmp/dlicense/:/license \\ -v /tmp/ddata/:/data \\ -u $(id -u):$(id -g) \\ h2oai/dai-ubi8-x86_64:|tag| .. container:: group-tab Docker Image with the config.toml This example shows how to configure MinIO options in the config.toml file, and then specify that file when starting Driverless AI in Docker.", + "prompt_type": "plain" + }, + { + "output": "Python Client Admin API\nThe following sections describe Driverless AI's Admin API. Note\nThe Admin API is currently only available through the DAI Python client. Understanding the Admin API\nThe Driverless AI Admin API lets you manage entities created by other\nusers by providing options for listing, deleting, or transferring them. The primary component of the Admin API is the new user role called\nAdmin. Driverless AI currently supports only local Admin user\nauthorization, which is defined through the local_administrator_list\nconfig parameter. For example, to promote UserA and UserB to\nadministrator, add the following config override to the config.toml\nfile:\n local_administrator_list = ['UserA', 'UserB']\nAdmin API methods\nThe following is a list of DAI Admin API methods. Note\nThe following examples assume that you have initialized the h2oai Python\nclient and are logged in with a user that has the Admin role. Listing entities\nTo list the datasets of a particular user, use the following client\nmethod:\n # cli = h2oai_client.Client(...)\n cli.admin.list_entities(\n username=\"other-user-name\",\n kind=\"dataset\",\n )\nThe following is a list of entities that can be listed with the\npreceding method:\n- model: Experiments\n- dataset: Datasets\n- project: Projects\n- deployment: Deployments\n- interpretation: MLI interpretations\n- model_diagnostic: Model diagnostics\nDeleting entities\nIf you know the kind and key associated with an entity, you can delete\nthat entity with the following client method:\n # cli = h2oai_client.Client(...)\n cli.admin.delete_entity(\n username=\"other-user-name\",\n kind=\"model\",\n key=\"model-key\",\n )\nNote\nAn entity's kind and key can be obtained through the listing API.", + "prompt_type": "plain" + }, + { + "output": "Linux Docker Images\nTo simplify local installation, Driverless AI is provided as a Docker\nimage for the following system combinations:\n ---------------------------------------------------------------------\n Host OS Docker Version Host Architecture Min Mem\n --------------------------- -------------- ----------------- --------\n Ubuntu 16.04 or later Docker CE x86_64 64 GB\n RHEL or CentOS 7.4 or later Docker CE x86_64 64 GB\n NVIDIA DGX Registry x86_64 \n ---------------------------------------------------------------------\nNote: CUDA 11.2.2 or later with NVIDIA drivers >= is recommended (GPU\nonly). Note that if you are using K80 GPUs, the minimum required NVIDIA\ndriver version is 450.80.02. For the best performance, including GPU support, use nvidia-docker. For\na lower-performance experience without GPUs, use regular docker (with\nthe same docker image). These installation steps assume that you have a license key for\nDriverless AI.", + "prompt_type": "plain" + }, + { + "output": "--shm-size=2g`` is recommended for Driverless AI docker command.\n\nubuntu rhel nvidia-dgx", + "prompt_type": "plain" + }, + { + "output": "Install the Driverless AI AWS Marketplace AMI\nA Driverless AI AMI is available in the AWS Marketplace beginning with\nDriverless AI version 1.5.2. This section describes how to install and\nrun Driverless AI through the AWS Marketplace. Environment\n+---------------------------+--------------+---------+----------------+\n| Provider | Instance | Num | Suitable for |\n| | Type | GPUs | |\n+===========================+==============+=========+================+\n| AWS | p2.xlarge | 1 | E |\n| | | | |\n| - | ---- | ---- | xperimentation |\n| - | -----------+ | ------+ | |\n| - | | | ---- |\n| - | p2.8xlarge | 8 | -------------+ |\n| - | | | |\n| - | ---- | ---- | Serious |\n| - | -----------+ | ------+ | use |\n| - | | | |\n| | | 16 | ---- |\n| | p2.16xlarge | | -------------+ |\n| | | ---- | |\n| | ---- | ------+ | Serious |\n| | -----------+ | | use |\n| | | 1 | |\n| | p3.2xlarge | | ---- |\n| | | ---- | -------------+ |\n| | ---- | ------+ | |\n| | -----------+ | | E |\n| | | 4 | |\n| | p3.8xlarge | | xperimentation |\n| | | ---- | |\n| | ---- | ------+ | ---- |\n| | -----------+ | | -------------+ |\n| | | 8 | |\n| | | | Serious |\n| | p3.16xlarge | ---- | use |\n| | | ------+ | |\n| | ---- | | ---- |\n| | -----------+ | 1 | -------------+ |\n| | | | |\n| | g3.4xlarge | ---- | Serious |\n| | | ------+ | use |\n| | ---- | | |\n| | -----------+ | 2 | ---- |\n| | | | -------------+ |\n| | g3.8xlarge | ---- | |\n| | | ------+ | E |\n| | ---- | | |\n| | -----------+ | 4 | xperimentation |\n| | | | |\n| | | | ---- |\n| | g3.16xlarge | | -------------+ |\n| | | | |\n| | | | E |\n| | | | |\n| | | | xperimentation |\n| | | | |\n| | | | ---- |\n| | | | -------------+ |\n| | | | |\n| | | | Serious |\n| | | | use |\n+---------------------------+--------------+---------+----------------+\nInstallation Procedure\n1.", + "prompt_type": "plain" + }, + { + "output": "2. Search for Driverless AI. 3. Select the version of Driverless AI that you want to install. 4. Scroll down to review/edit your region and the selected\n infrastructure and pricing. 5. Return to the top and select Continue to Subscribe. 6. Review the subscription, then click Continue to Configure. 7. If desired, change the Fullfillment Option, Software Version, and\n Region. Note that this page also includes the AMI ID for the\n selected software version. Click Continue to Launch when you are\n done. 8. Review the configuration and choose a method for launching\n Driverless AI. Click the Usage Instructions button in AWS to review\n your Driverless AI username and password. Scroll down to the bottom\n of the page and click Launch when you are done. You will receive a \"Success\" message when the image launches\nsuccessfully. []\nStarting Driverless AI\nThis section describes how to start Driverless AI after the Marketplace\nAMI has been successfully launched. 1. Navigate to the EC2 Console.", + "prompt_type": "plain" + }, + { + "output": "Select your instance. 3. Open another browser and launch Driverless AI by navigating to\n https://\\ :12345. 4. Sign in to Driverless AI with the username h2oai and use the AWS\n InstanceID as the password. You will be prompted to enter your\n Driverless AI license key when you log in for the first time. Stopping the EC2 Instance\nThe EC2 instance will continue to run even when you close the\naws.amazon.com portal. To stop the instance:\n1. On the EC2 Dashboard, click the Running Instances link under the\n Resources section. 2. Select the instance that you want to stop. 3. In the Actions drop down menu, select Instance State > Stop. 4. A confirmation page will display. Click Yes, Stop to stop the\n instance. Upgrading the Driverless AI Marketplace Image\nNote that the first offering of the Driverless AI Marketplace image was\n1.5.2. As such, it is only possible to upgrade to versions greater than\nthat. Perform the following steps if you are upgrading to a Driverless AI\nMarketeplace image version greater than 1.5.2.", + "prompt_type": "plain" + }, + { + "output": "dai_NEWVERSION.debbelow with the new Driverless AI version (for example,dai_1.5.4_amd64.deb``).\nNote that this upgrade process inherits the service user and group from\n/etc/dai/User.conf and /etc/dai/Group.conf. You do not need to manually\nspecify the DAI_USER or DAI_GROUP environment variables during an\nupgrade.\n\n # Stop Driverless AI.\n sudo systemctl stop dai\n\n # Make a backup of /opt/h2oai/dai/tmp directory at this time.\n\n # Upgrade Driverless AI.\n sudo dpkg -i dai_NEWVERSION.deb\n sudo systemctl daemon-reload\n sudo systemctl start dai", + "prompt_type": "plain" + }, + { + "output": "exclusive_mode------------------ .. container:: dropdown **Exclusive level of access to node resources** There are three levels of access: - safe: this level assumes that there might be another experiment also running on same node. - moderate: this level assumes that there are no other experiments or tasks running on the same node, but still only uses physical core counts. - max: this level assumes that there is absolutly nothing else running on the node except the experiment The default level is \"safe\" and the equivalent config.toml parameter isexclusive_mode`. If :ref:`multinode is\n enabled, this option has no effect, unless\n worker_remote_processors=1 when it will still be applied. Each\n exclusive mode can be chosen, and then fine-tuned using each expert\n settings. Changing the exclusive mode will reset all exclusive mode\n related options back to default and then re-apply the specific rules\n for the new mode, which will undo any fine-tuning of expert options\n that are part of exclusive mode rules.", + "prompt_type": "plain" + }, + { + "output": "max_cores``\n\nNumber of Cores to Use\n\nSpecify the number of cores to use per experiment. Note that if you\nspecify 0, all available cores will be used. Lower values can reduce\nmemory usage but might slow down the experiment. This value defaults to\n0(all). One can also set it using the environment variable\nOMP_NUM_THREADS or OPENBLAS_NUM_THREADS (e.g., in bash: 'export\nOMP_NUM_THREADS=32' or 'export OPENBLAS_NUM_THREADS=32')", + "prompt_type": "plain" + }, + { + "output": "max_fit_cores``\n\nMaximum Number of Cores to Use for Model Fit\n\nSpecify the maximum number of cores to use for a model's fit call. Note\nthat if you specify 0, all available cores will be used. This value\ndefaults to 10.", + "prompt_type": "plain" + }, + { + "output": "use_dask_cluster-------------------- .. container:: dropdown **If full dask cluster is enabled, use full cluster** Specify whether to use full multinode distributed cluster (True) or single-node dask (False). In some cases, using entire cluster can be inefficient. E.g. several DGX nodes can be more efficient, if used one DGX at a time for medium-sized data. The equivalent config.toml parameter isuse_dask_cluster``.", + "prompt_type": "plain" + }, + { + "output": "max_predict_cores``\n\nMaximum Number of Cores to Use for Model Predict\n\nSpecify the maximum number of cores to use for a model's predict call.\nNote that if you specify 0, all available cores will be used. This value\ndefaults to 0(all).", + "prompt_type": "plain" + }, + { + "output": "max_predict_cores_in_dai``\n\nMaximum Number of Cores to Use for Model Transform and Predict When\nDoing MLI, AutoDoc\n\nSpecify the maximum number of cores to use for a model's transform and\npredict call when doing operations in the Driverless AI MLI GUI and the\nDriverless AI R and Python clients. Note that if you specify 0, all\navailable cores will be used. This value defaults to 4.", + "prompt_type": "plain" + }, + { + "output": "batch_cpu_tuning_max_workers``\n\nTuning Workers per Batch for CPU\n\nSpecify the number of workers used in CPU mode for tuning. A value of 0\nuses the socket count, while a value of -1 uses all physical cores\ngreater than or equal to 1. This value defaults to 0(socket count).", + "prompt_type": "plain" + }, + { + "output": "cpu_max_workers``\n\nNumber of Workers for CPU Training\n\nSpecify the number of workers used in CPU mode for training:\n\n- 0: Use socket count (Default)\n- -1: Use all physical cores >= 1 that count", + "prompt_type": "plain" + }, + { + "output": "num_gpus_per_experiment``\n\n#GPUs/Experiment\n\nSpecify the number of GPUs to use per experiment. A value of -1\n(default) specifies to use all available GPUs. Must be at least as large\nas the number of GPUs to use per model (or -1). In multinode context\nwhen using dask, this refers to the per-node value.", + "prompt_type": "plain" + }, + { + "output": "min_num_cores_per_gpu``\n\nNum Cores/GPU\n\nSpecify the number of CPU cores per GPU. In order to have a sufficient\nnumber of cores per GPU, this setting limits the number of GPUs used.\nThis value defaults to 2.", + "prompt_type": "plain" + }, + { + "output": "num_gpus_per_model---------------------- .. container:: dropdown **#GPUs/Model** Specify the number of GPUs to user per model. The equivalent config.toml parameter isnum_gpus_per_model`` and the default value\n\n is 1. Currently num_gpus_per_model other than 1 disables GPU\n locking, so is only recommended for single experiments and single\n users. Setting this parameter to -1 means use all GPUs per model. In\n all cases, XGBoost tree and linear models use the number of GPUs\n specified per model, while LightGBM and Tensorflow revert to using 1\n GPU/model and run multiple models on multiple GPUs. FTRL does not\n use GPUs. Rulefit uses GPUs for parts involving obtaining the tree\n using LightGBM. In multinode context when using dask, this parameter\n refers to the per-node value.", + "prompt_type": "plain" + }, + { + "output": "num_gpus_for_prediction--------------------------- .. container:: dropdown **Num. of GPUs for Isolated Prediction/Transform** Specify the number of GPUs to use forpredictfor models andtransformfor transformers when running outside offit/fit_transform. Ifpredictortransformare called in the same process asfit/fit_transform, the number of GPUs will match. New processes will use this count for applicable models and transformers. Note that enablingtensorflow_nlp_have_gpus_in_productionwill override this setting for relevant TensorFlow NLP transformers. The equivalent config.toml parameter isnum_gpus_for_prediction`` and the default value is\n\n \"0\".\n\n Note: When GPUs are used, TensorFlow, PyTorch models and\n transformers, and RAPIDS always predict on GPU. And RAPIDS requires\n Driverless AI python scoring package also to be used on GPUs. In\n multinode context when using dask, this refers to the per-node\n value.", + "prompt_type": "plain" + }, + { + "output": "gpu_id_start---------------- .. container:: dropdown **GPU Starting ID** Specify Which gpu_id to start with. If using CUDA_VISIBLE_DEVICES=... to control GPUs (preferred method), gpu_id=0 is the first in that restricted list of devices. For example, ifCUDA_VISIBLE_DEVICES='4,5'thengpu_id_start=0`` will refer to\n device #4. From expert mode, to run 2 experiments, each on a distinct GPU out\n of 2 GPUs, then:\n - Experiment#1: num_gpus_per_model=1, num_gpus_per_experiment=1,\n gpu_id_start=0\n - Experiment#2: num_gpus_per_model=1, num_gpus_per_experiment=1,\n gpu_id_start=1\n From expert mode, to run 2 experiments, each on a distinct GPU out\n of 8 GPUs, then:\n - Experiment#1: num_gpus_per_model=1, num_gpus_per_experiment=4,\n gpu_id_start=0\n - Experiment#2: num_gpus_per_model=1, num_gpus_per_experiment=4,\n gpu_id_start=4\n To run on all 4 GPUs/model, then\n - Experiment#1: num_gpus_per_model=4, num_gpus_per_experiment=4,\n gpu_id_start=0\n - Experiment#2: num_gpus_per_model=4, num_gpus_per_experiment=4,\n gpu_id_start=4\n If num_gpus_per_model!=1, global GPU locking is disabled.", + "prompt_type": "plain" + }, + { + "output": "assumed_simultaneous_dt_forks_munging``\n\nAssumed/Expected number of munging forks\n\nExpected maximum number of forks, used to ensure datatable doesn't\noverload system. For actual use beyond this value, system will start to\nhave slow-down issues. THe default value is 3.", + "prompt_type": "plain" + }, + { + "output": "max_max_dt_threads_munging``\n\nMaximum of threads for datatable for munging\n\nMaximum number of threads for datatable for munging.", + "prompt_type": "plain" + }, + { + "output": "max_dt_threads_munging``\n\nMax Number of Threads to Use for datatable and OpenBLAS for Munging and\nModel Training\n\nSpecify the maximum number of threads to use for datatable and OpenBLAS\nduring data munging (applied on a per process basis):\n\n- 0 = Use all threads\n- -1 = Automatically select number of threads (Default)", + "prompt_type": "plain" + }, + { + "output": "max_dt_threads_readwrite``\n\nMax Number of Threads to Use for datatable Read and Write of Files\n\nSpecify the maximum number of threads to use for datatable during data\nreading and writing (applied on a per process basis):\n\n- 0 = Use all threads\n- -1 = Automatically select number of threads (Default)", + "prompt_type": "plain" + }, + { + "output": "max_dt_threads_stats_openblas``\n\nMax Number of Threads to Use for datatable Stats and OpenBLAS\n\nSpecify the maximum number of threads to use for datatable stats and\nOpenBLAS (applied on a per process basis):\n\n- 0 = Use all threads\n- -1 = Automatically select number of threads (Default)", + "prompt_type": "plain" + }, + { + "output": "allow_reduce_features_when_failure``\nWhether to reduce features when model fails (GPU OOM Protection)\nBig models (on big data or with lot of features) can run out of memory\non GPUs. This option is primarily useful for avoiding model building\nfailure due to GPU Out Of Memory (OOM). Currently is applicable to all\nnon-dask XGBoost models (i.e. GLMModel, XGBoostGBMModel,\nXGBoostDartModel, XGBoostRFModel),during normal fit or when using\nOptuna. This is acheived by reducing features until model does not fail. For\nexample, If XGBoost runs out of GPU memory, this is detected, and\n(regardless of setting of skip_model_failures), we perform feature\nselection using XGBoost on subsets of features. The dataset is\nprogressively reduced by factor of 2 with more models to cover all\nfeatures. This splitting continues until no failure occurs. Then all\nsub-models are used to estimate variable importance by absolute\ninformation gain, in order to decide which features to include. Finally,\na single model with the most important features is built using the\nfeature count that did not lead to OOM.", + "prompt_type": "plain" + }, + { + "output": "reduce_repeats_when_failure``\n\nNumber of repeats for models used for feature selection during failure\nrecovery\n\nWith\nallow_reduce_features_when_failure ,\nthis controls how many repeats of sub-models are used for feature\nselection. A single repeat only has each sub-model consider a single\nsub-set of features, while repeats shuffle hich features are considered\nallowing more chance to find important interactions. More repeats can\nlead to higher accuracy. The cost of this option is proportional to the\nrepeat count. The default value is 1.", + "prompt_type": "plain" + }, + { + "output": "fraction_anchor_reduce_features_when_failure``\n\nFraction of features treated as anchor for feature selection during\nfailure recovery\n\nWith\nallow_reduce_features_when_failure ,\nthis controls the fraction of features treated as an anchor that are\nfixed for all sub-models. Each repeat gets new anchors. For tuning and\nevolution, the probability depends upon any prior importance (if\npresent) from other individuals, while final model uses uniform\nprobability for anchor features. The default fraction is 0.1.", + "prompt_type": "plain" + }, + { + "output": "xgboost_reduce_on_errors_list``\n\nErrors From XGBoost That Trigger Reduction of Features\n\nError strings from XGBoost that are used to trigger re-fit on reduced\nsub-models. See allow_reduce_features_when_failure.", + "prompt_type": "plain" + }, + { + "output": "lightgbm_reduce_on_errors_list``\n\nErrors From LightGBM That Trigger Reduction of Features\n\nError strings from LightGBM that are used to trigger re-fit on reduced\nsub-models. See allow_reduce_features_when_failure.", + "prompt_type": "plain" + }, + { + "output": "num_gpus_per_hyperopt_dask``\n\nGPUs / HyperOptDask\n\nSpecify the number of GPUs to use per model hyperopt training task. To\nuse all GPUs, set this to -1. For example, when this is set to -1 and\nthere are 4 GPUs available, all of them can be used for the training of\na single model across a Dask cluster. Ignored if GPUs are disabled or if\nthere are no GPUs on system. In multinode context, this refers to the\nper-node value.", + "prompt_type": "plain" + }, + { + "output": "detailed_traces``\n\nEnable Detailed Traces\n\nSpecify whether to enable detailed tracing in Driverless AI trace when\nrunning an experiment. This is disabled by default.", + "prompt_type": "plain" + }, + { + "output": "debug_log``\n\nEnable Debug Log Level\n\nIf enabled, the log files will also include debug logs. This is disabled\nby default.", + "prompt_type": "plain" + }, + { + "output": "log_system_info_per_experiment``\n\nEnable Logging of System Information for Each Experiment\n\nSpecify whether to include system information such as CPU, GPU, and disk\nspace at the start of each experiment log. Note that this information is\nalready included in system logs. This is enabled by default.", + "prompt_type": "plain" + }, + { + "output": "AutoDoc Settings\n\nThis section includes settings that can be used to configure AutoDoc.", + "prompt_type": "plain" + }, + { + "output": "make_autoreport``\n\nMake AutoDoc\n\nSpecify whether to create an AutoDoc for the experiment after it has\nfinished running. This is enabled by default.", + "prompt_type": "plain" + }, + { + "output": "autodoc_report_name``\n\nAutoDoc Name\n\nSpecify a name for the AutoDoc report. This is set to \"report\" by\ndefault.", + "prompt_type": "plain" + }, + { + "output": "autodoc_template``\n\nAutoDoc Template Location\n\nSpecify a path for the AutoDoc template:\n\n- To generate a custom AutoDoc template, specify the full path to your\n custom template.\n- To generate the standard AutoDoc, specify the default value for this\n setting.", + "prompt_type": "plain" + }, + { + "output": "autodoc_output_type``\n\nAutoDoc File Output Type\n\nSpecify the AutoDoc output type. Choose from the following file types:\n\n- docx (Default)\n- md", + "prompt_type": "plain" + }, + { + "output": "autodoc_subtemplate_type``\n\nAutoDoc SubTemplate Type\n\nSpecify the type of sub-templates to use. Choose from the following:\n\n- auto (Default)\n- md\n- docx", + "prompt_type": "plain" + }, + { + "output": "autodoc_max_cm_size``\n\nConfusion Matrix Max Number of Classes\n\nSpecify the maximum number of classes in the confusion matrix. This\nvalue defaults to 10.", + "prompt_type": "plain" + }, + { + "output": "autodoc_num_features``\n\nNumber of Top Features to Document\n\nSpecify the number of top features to display in the document. To\ndisable this setting, specify -1. This is set to 50 by default.", + "prompt_type": "plain" + }, + { + "output": "autodoc_min_relative_importance``\n\nMinimum Relative Feature Importance Threshold\n\nSpecify the minimum relative feature importance in order for a feature\nto be displayed. This value must be a float >= 0 and <= 1. This is set\nto 0.003 by default.", + "prompt_type": "plain" + }, + { + "output": "autodoc_include_permutation_feature_importance``\n\nPermutation Feature Importance\n\nSpecify whether to compute permutation-based feature importance. This is\ndisabled by default.", + "prompt_type": "plain" + }, + { + "output": "autodoc_feature_importance_num_perm``\n\nNumber of Permutations for Feature Importance\n\nSpecify the number of permutations to make per feature when computing\nfeature importance. This is set to 1 by default.", + "prompt_type": "plain" + }, + { + "output": "autodoc_feature_importance_scorer``\n\nFeature Importance Scorer\n\nSpecify the name of the scorer to be used when calculating feature\nimportance. Leave this setting unspecified to use the default scorer for\nthe experiment.", + "prompt_type": "plain" + }, + { + "output": "autodoc_pd_max_rows``\n\nPDP Max Number of Rows\n\nSpecify the number of rows for Partial Dependence Plots.", + "prompt_type": "plain" + }, + { + "output": "autodoc_pd_max_runtime``\n\nPDP Max Runtime in Seconds\n\nSpecify the maximum number of seconds Partial Dependency computation can\ntake when generating a report. Set this value to -1 to disable the time\nlimit. This is set to 20 seconds by default.", + "prompt_type": "plain" + }, + { + "output": "autodoc_out_of_range``\n\nPDP Out of Range\n\nSpecify the number of standard deviations outside of the range of a\ncolumn to include in partial dependence plots. This shows how the model\nreacts to data it has not seen before. This is set to 3 by default.", + "prompt_type": "plain" + }, + { + "output": "autodoc_num_rows``\n\nICE Number of Rows\n\nSpecify the number of rows to include in PDP and ICE plots if individual\nrows are not specified. This is set to 0 by default.", + "prompt_type": "plain" + }, + { + "output": "autodoc_population_stability_index``\n\nPopulation Stability Index\n\nSpecify whether to include a population stability index if the\nexperiment is a binary classification or regression problem. This is\ndisabled by default.", + "prompt_type": "plain" + }, + { + "output": "autodoc_population_stability_index_n_quantiles``\n\nPopulation Stability Index Number of Quantiles\n\nSpecify the number of quantiles to use for the population stability\nindex. This is set to 10 by default.", + "prompt_type": "plain" + }, + { + "output": "autodoc_prediction_stats``\n\nPrediction Statistics\n\nSpecify whether to include prediction statistics information if the\nexperiment is a binary classification or regression problem. This value\nis disabled by default.", + "prompt_type": "plain" + }, + { + "output": "autodoc_prediction_stats_n_quantiles``\n\nPrediction Statistics Number of Quantiles\n\nSpecify the number of quantiles to use for prediction statistics. This\nis set to 20 by default.", + "prompt_type": "plain" + }, + { + "output": "autodoc_response_rate``\n\nResponse Rates Plot\n\nSpecify whether to include response rates information if the experiment\nis a binary classification problem. This is disabled by default.", + "prompt_type": "plain" + }, + { + "output": "autodoc_response_rate_n_quantiles``\n\nResponse Rates Plot Number of Quantiles\n\nSpecify the number of quantiles to use for response rates information.\nThis is set to 10 by default.", + "prompt_type": "plain" + }, + { + "output": "autodoc_gini_plot``\n\nShow GINI Plot\n\nSpecify whether to show the GINI plot. This is disabled by default.", + "prompt_type": "plain" + }, + { + "output": "autodoc_enable_shapley_values``\n\nEnable Shapley Values\n\nSpecify whether to show Shapley values results in the AutoDoc. This is\nenabled by default.", + "prompt_type": "plain" + }, + { + "output": "autodoc_data_summary_col_num``\n\nNumber of Features in Data Summary Table\n\nSpecify the number of features to be shown in the data summary table.\nThis value must be an integer. To show all columns, specify any value\nlower than 1. This is set to -1 by default.", + "prompt_type": "plain" + }, + { + "output": "autodoc_list_all_config_settings``\n\nList All Config Settings\n\nSpecify whether to show all config settings. If this is disabled, only\nsettings that have been changed are listed. All settings are listed when\nenabled. This is disabled by default.", + "prompt_type": "plain" + }, + { + "output": "autodoc_keras_summary_line_length``\n\nKeras Model Architecture Summary Line Length\n\nSpecify the line length of the Keras model architecture summary. This\nvalue must be either an integer greater than 0 or -1. To use the default\nline length, set this value to -1 (default).", + "prompt_type": "plain" + }, + { + "output": "autodoc_transformer_architecture_max_lines``\n\nNLP/Image Transformer Architecture Max Lines\n\nSpecify the maximum number of lines shown for advanced transformer\narchitecture in the Feature section. Note that the full architecture can\nbe found in the appendix.", + "prompt_type": "plain" + }, + { + "output": "autodoc_full_architecture_in_appendix``\n\nAppendix NLP/Image Transformer Architecture\n\nSpecify whether to show the full NLP/Image transformer architecture in\nthe appendix. This is disabled by default.", + "prompt_type": "plain" + }, + { + "output": "autodoc_coef_table_appendix_results_table``\n\nFull GLM Coefficients Table in the Appendix\n\nSpecify whether to show the full GLM coefficient table(s) in the\nappendix. This is disabled by default.", + "prompt_type": "plain" + }, + { + "output": "autodoc_coef_table_num_models``\n\nGLM Coefficient Tables Number of Models\n\nSpecify the number of models for which a GLM coefficients table is shown\nin the AutoDoc. This value must be -1 or an integer >= 1. Set this value\nto -1 to show tables for all models. This is set to 1 by default.", + "prompt_type": "plain" + }, + { + "output": "autodoc_coef_table_num_folds``\n\nGLM Coefficient Tables Number of Folds Per Model\n\nSpecify the number of folds per model for which a GLM coefficients table\nis shown in the AutoDoc. This value must be be -1 (default) or an\ninteger >= 1 (-1 shows all folds per model).", + "prompt_type": "plain" + }, + { + "output": "autodoc_coef_table_num_coef``\n\nGLM Coefficient Tables Number of Coefficients\n\nSpecify the number of coefficients to show within a GLM coefficients\ntable in the AutoDoc. This is set to 50 by default. Set this value to -1\nto show all coefficients.", + "prompt_type": "plain" + }, + { + "output": "autodoc_coef_table_num_classes``\n\nGLM Coefficient Tables Number of Classes\n\nSpecify the number of classes to show within a GLM coefficients table in\nthe AutoDoc. Set this value to -1 to show all classes. This is set to 9\nby default.", + "prompt_type": "plain" + }, + { + "output": "autodoc_num_histogram_plots``\n\nNumber of Histograms to Show\n\nSpecify the number of top features for which to show histograms. This is\nset to 10 by default.", + "prompt_type": "plain" + }, + { + "output": "Driverless AI MOJO Scoring Pipeline - C++ Runtime with Python (Supports Shapley) and R Wrappers\nThe C++ Scoring Pipeline is provided as R and Python packages for the\nprotobuf-based MOJO2 protocol. Use your preferred method once the MOJO\nScoring Pipeline has been built. Notes:\n - These scoring pipelines are currently not available for RuleFit\n models. - Unlike the Java Runtime, TensorFlow/Bert are supported by C++\n Runtime MOJO. - You can have Driverless AI attempt to reduce the size of the MOJO\n scoring pipeline when the experiment is being built by enabling\n the Reduce MOJO Size expert setting also\n see . - Shapley contributions come with the downloaded experiment MOJO\n scoring pipeline. See cpp_scoring_shapley for scoring example. - Shapley contributions for transformed\n features and original features are currently available for XGBoost\n (GBM, GLM, RF, DART), LightGBM, Zero-Inflated, Imbalanced and\n DecisionTree models (and their ensemble).", + "prompt_type": "plain" + }, + { + "output": "libopenblas-dev, run the following command: :: sudo apt install libopenblas-dev .. _cpp-mojo-downloads: Downloads --------- This section contains download links for the C++ MOJO runtime and its Python and R wrappers. **Python:** - :mojo-runtime38:C++ MOJO runtime (Python 3.8) - :mojo-runtime37:C++ MOJO runtime (Python 3.7) - :mojo-runtime36:C++ MOJO runtime (Python 3.6) **R**: - :daimojo-r:`C++ MOJO runtime ` .. note:: The Python and R packages can also be downloaded from within the Driverless AI application by clicking **Resources**, and then clicking **MOJO Py Runtime** or **MOJO R Runtime** from the drop-down menu. Examples -------- The following examples show how to use the R and Python APIs of the C++ MOJO runtime.", + "prompt_type": "plain" + }, + { + "output": "--with-prediction-interval.. code:: bash java -Xmx5g -Dai.h2o.mojos.runtime.license.file=license.file -jar mojo2-runtime.jar --with-prediction-interval pipeline.mojo example.csv .. _cpp_scoring_shapley: C++ MOJO runtime Shapley values support --------------------------------------- The C++ MOJO runtime and its Python wrapper support Shapley contributions for transformed features and original features. The following example demonstrates how to retrieve Shapley contributions for transformed and original features when making predictions: .. code:: python import datatable as dt import daimojo X = dt.Frame(\"example.jay\") m = daimojo.model(\"pipeline.mojo\") m.predict(X) # Prediction call that returns regular predictions m.predict(X, pred_contribs=True) # Prediction call that returns Shapley contributions for transformed features m.predict(X, pred_contribs=True, pred_contribs_original=True) # Prediction call that returns Shapley contributions for original features .. note:: - Settingpred_contribs_original=Truerequires thatpred_contribsis also set toTrue.", + "prompt_type": "plain" + }, + { + "output": "Python Client\n\nThis section describes how to install the Driverless AI Python client.\nSeveral end-to-end examples that demonstrate how to use the client are\nalso provided. Additional examples are available in the Driverless AI\nCode Samples and Tutorials GitHub repository.\n\nFor more information on the Python client, see the Driverless AI Python\nclient documentation.\n\nNote\n\nThe Python client does not currently support the following Driverless AI\nfeatures:\n\n- Diagnostics\n- Deployments\n- MLI Bring Your Own Recipe (BYOR)\n- mTLS authentication\n\npython_install_client python_client_admin\nexamples/credit_card/credit_card_default.ipynb\nexamples/walmart_timeseries_experiment/training_timeseries_model.ipynb\nexamples/stock_timeseries_experiment/demo_stock_timeseries.ipynb\nexamples/nlp_airline_sentiment/demo_nlp_airline_sentiment.ipynb", + "prompt_type": "plain" + }, + { + "output": "enable_constant_model``\n\nConstant Models\n\nSpecify whether to enable constant models . This is set\nto Auto (enabled) by default.", + "prompt_type": "plain" + }, + { + "output": "enable_decision_tree------------------------ .. container:: dropdown **Decision Tree Models** Specify whether to build Decision Tree models as part of the experiment. This is set to **Auto** by default. In this case, Driverless AI will build Decision Tree models if interpretability is greater than or equal to the value ofdecision_tree_interpretability_switch(which defaults to 7) and accuracy is less than or equal todecision_tree_accuracy_switch``\n\n (which defaults to 7).", + "prompt_type": "plain" + }, + { + "output": "enable_glm``\n\nGLM Models\n\nSpecify whether to build GLM models (generalized linear models) as part\nof the experiment (usually only for the final model unless it's used\nexclusively). GLMs are very interpretable models with one coefficient\nper feature, an intercept term and a link function. This is set to Auto\nby default (enabled if accuracy <= 5 and interpretability >= 6).", + "prompt_type": "plain" + }, + { + "output": "enable_xgboost_gbm``\n\nXGBoost GBM Models\n\nSpecify whether to build XGBoost models as part of the experiment (for\nboth the feature engineering part and the final model). XGBoost is a\ntype of gradient boosting method that has been widely successful in\nrecent years due to its good regularization techniques and high\naccuracy. This is set to Auto by default. In this case, Driverless AI\nwill use XGBoost unless the number of rows * columns is greater than a\nthreshold. This threshold is a config setting that is 100M by default\nfor CPU and 30M by default for GPU.", + "prompt_type": "plain" + }, + { + "output": "enable_lightgbm``\n\nLightGBM Models\n\nSpecify whether to build LightGBM models as part of the experiment.\nLightGBM Models are the default models. This is set to Auto (enabled) by\ndefault.", + "prompt_type": "plain" + }, + { + "output": "enable_xgboost_dart``\n\nXGBoost Dart Models\n\nSpecify whether to use XGBoost's Dart method when building models for\nexperiment (for both the feature engineering part and the final model).\nThis is set to Auto (disabled) by default.", + "prompt_type": "plain" + }, + { + "output": "enable_xgboost_rapids------------------------- .. container:: dropdown **Enable RAPIDS-cuDF extensions to XGBoost GBM/Dart** Specify whether to enable RAPIDS extensions to XGBoost GBM/Dart. **If selected, python scoring package can only be used on GPU system**. The equivalent config.toml parameter isenable_xgboost_rapids`` and\n\n the default value is False. Disabled for dask multinode models due\n to bug in dask_cudf and xgboost.", + "prompt_type": "plain" + }, + { + "output": "enable_xgboost_rf``\n\nEnable XGBoost RF model\n\nSpecify whether to enable XGBoost RF mode without early stopping. This\nsetting is disabled unless switched on.", + "prompt_type": "plain" + }, + { + "output": "enable_xgboost_gbm_dask--------------------------- .. container:: dropdown **Enable Dask_cuDF (multi-GPU) XGBoost GBM** Specify whether to enable Dask_cudf (multi-GPU) version of XGBoost GBM. Disabled unless switched on. Only applicable for single final model without early stopping. **No Shapley possible**. The equivalent config.toml parameter isenable_xgboost_gbm_dask`` and the default\n\n value is \"auto\".", + "prompt_type": "plain" + }, + { + "output": "enable_xgboost_dart_dask---------------------------- .. container:: dropdown **Enable Dask_cuDF (multi-GPU) XGBoost Dart** Specify whether to enable Dask_cudf (multi-GPU) version of XGBoost GBM/Dart. This option is disabled unless switched on. Only applicable for single final model without early stopping. **No Shapley is possible**. The equivalent config.toml parameter isenable_xgboost_dart_daskand the default value is \"auto\". It is recommended to run Dask_cudf on multi gpus; if for say debugging purposes, user would like to enable them on 1 GPU, then setuse_dask_for_1_gpu``\nto True via config.toml setting.", + "prompt_type": "plain" + }, + { + "output": "enable_lightgbm_dask------------------------ .. container:: dropdown **Enable Dask (multi-node) LightGBM** Specify whether to enable multi-node LightGBM. It is disabled by default unless switched on. The equivalent config.toml parameter isenable_lightgbm_dask``\nand default value is \"auto\".\n\nTo enable multinode Dask see\nDask Multinode Training .", + "prompt_type": "plain" + }, + { + "output": "enable_hyperopt_dask------------------------ .. container:: dropdown **Enable Dask (multi-node/multi-GPU) hyperparameter search** Specify whether to enable Dask (multi-node/multi-GPU) version of hyperparameter search. \"auto\" and \"on\" are same currently. Dask mode for hyperparameter search is enabled if: 1) Have a :ref:`Dask multinode cluster ` or multi-GPU node and model uses 1 GPU for each model( see :ref:`num-gpus-per-model`). 2) Not already using a Dask model. The equivalent config.toml parameter isenable_hyperopt_dask`` and\n\n the default value is \"auto\".", + "prompt_type": "plain" + }, + { + "output": "num_inner_hyperopt_trials_prefinal-------------------------------------- .. container:: dropdown **Number of trials for hyperparameter optimization during model tuning only** Specify the number of trials for **Optuna** hyperparameter optimization for tuning and evolution of models. If using **RAPIDS** or **DASK**, this parameter specifies the number of trials for hyperparameter optimization within XGBoost GBM/Dart and LightGBM and hyperparameter optimization keeps data on GPU entire time. 0 means no trials. For small data, 100 is fine, while for larger data smaller values are reasonable if need results quickly. If using RAPIDS or DASK, hyperparameter optimization stays on GPU the entire time. The equivalent config.toml parameter isnum_inner_hyperopt_trials_prefinal`` and the default value is\n 0. Note that, this is useful when there is high overhead of DAI outside\n inner model fit/predict (i.e the various file, process, and other\n DAI management processes), so this tunes without that overhead.", + "prompt_type": "plain" + }, + { + "output": "num_inner_hyperopt_trials_final----------------------------------- .. container:: dropdown **Number of trials for hyperparameter optimization for final model only** Number of trials for **Optuna** hyperparameter optimization for final models. If using **RAPIDS** or **DASK**, this is number of trials for rapids-cudf hyperparameter optimization within XGBoost GBM/Dart and LightGBM, and hyperparameter optimization keeps data on GPU entire time. 0 means no trials.For small data, 100 is ok choice, while for larger data smaller values are reasonable if need results quickly. This setting applies to final model only, even if num_inner_hyperopt_trials=0. The equivalent config.toml parameter isnum_inner_hyperopt_trials_final``\nand the default value is 0.", + "prompt_type": "plain" + }, + { + "output": "num_hyperopt_individuals_final---------------------------------- .. container:: dropdown **Number of individuals in final ensemble to use Optuna on** Number of individuals in final model (all folds/repeats for given base model) to optimize with **Optuna** hyperparameter tuning. The default value is -1, means all. 0 is same as choosing no Optuna trials. Might be only beneficial to optimize hyperparameters of best individual (i.e. value of 1) in ensemble. The default value is -1, means all. The equivalent config.toml parameter isnum_hyperopt_individuals_final``", + "prompt_type": "plain" + }, + { + "output": "optuna_pruner----------------- .. container:: dropdown **Optuna Pruners** `Optuna Pruner `__ algorithm to use for early stopping of unpromising trials (applicable to XGBoost and LightGBM that support Optuna callbacks). The default is **MedianPruner**. To disable choose None. The equivalent config.toml parameter isoptuna_pruner``", + "prompt_type": "plain" + }, + { + "output": "optuna_sampler------------------ .. container:: dropdown **Optuna Samplers** `Optuna Sampler `__ algorithm to use for narrowing down and optimizing the search space (applicable to XGBoost and LightGBM that support Optuna callbacks). The default is **TPESampler**. To disable choose None. The equivalent config.toml parameter isoptuna_sampler``", + "prompt_type": "plain" + }, + { + "output": "enable_xgboost_hyperopt_callback------------------------------------ .. container:: dropdown **Enable Optuna XGBoost Pruning callback** Specify whether to enable Optuna's XGBoost Pruning callback to abort unpromising runs. This is True by default. This not is enabled when tuning learning rate. The equivalent config.toml parameter isenable_xgboost_hyperopt_callback``", + "prompt_type": "plain" + }, + { + "output": "enable_lightgbm_hyperopt_callback------------------------------------- .. container:: dropdown **Enable Optuna LightGBM Pruning callback** Specify whether to enable Optuna's LightGBM Pruning callback to abort unpromising runs. This is True by default. This not is enabled when tuning learning rate. The equivalent config.toml parameter isenable_lightgbm_hyperopt_callback``", + "prompt_type": "plain" + }, + { + "output": "enable_tensorflow``\n\nTensorFlow Models\n\nSpecify whether to build TensorFlow models as part of the experiment\n(usually only for text features engineering and for the final model\nunless it's used exclusively). Enable this option for NLP experiments.\nThis is set to Auto by default (not used unless the number of classes is\ngreater than 10).\n\nTensorFlow models are not yet supported by Java MOJOs (only Python\nscoring pipelines and C++ MOJOs are supported).", + "prompt_type": "plain" + }, + { + "output": "enable_grownet``\n\nPyTorch GrowNet Models\n\nSpecify whether to enable PyTorch-based GrowNet models. By\ndefault, this parameter is set to auto i.e Driverless decides internally\nwhether to use the algorithm for the experiment. Set it to on to force\nthe experiment to build a GrowNet model.", + "prompt_type": "plain" + }, + { + "output": "enable_ftrl``\n\nFTRL Models\n\nSpecify whether to build Follow the Regularized Leader (FTRL) models as\npart of the experiment. Note that MOJOs are not yet supported (only\nPython scoring pipelines). FTRL supports binomial and multinomial\nclassification for categorical targets, as well as regression for\ncontinuous targets. This is set to Auto (disabled) by default.", + "prompt_type": "plain" + }, + { + "output": "enable_rulefit``\n\nRuleFit Models\n\nSpecify whether to build RuleFit models as part of the experiment. Note\nthat MOJOs are not yet supported (only Python scoring pipelines). Note\nthat multiclass classification is not yet supported for RuleFit models.\nRules are stored to text files in the experiment directory for now. This\nis set to Auto (disabled) by default.", + "prompt_type": "plain" + }, + { + "output": "enable_zero_inflated_models``\n\nZero-Inflated Models\n\nSpecify whether to enable the automatic addition of\nzero-inflated models for regression problems with\nzero-inflated target values that meet certain conditions:\n\n y >= 0, y.std() > y.mean()\")\n\nThis is set to Auto by default.", + "prompt_type": "plain" + }, + { + "output": "enable_lightgbm_boosting_types``\n\nLightGBM Boosting Types\n\nSpecify which boosting types to enable for LightGBM. Select one or more\nof the following:\n\n- gbdt: Boosted trees\n- rf_early_stopping: Random Forest with early stopping\n- rf: Random Forest\n- dart: Dropout boosted trees with no early stopping\n\ngbdt and rf are both enabled by default.", + "prompt_type": "plain" + }, + { + "output": "enable_lightgbm_cat_support``\n\nLightGBM Categorical Support\n\nSpecify whether to enable LightGBM categorical feature support. This is\ndisabled by default.\n\nNotes:\n\n- Only supported for CPU.\n- A MOJO is not built when this is enabled.", + "prompt_type": "plain" + }, + { + "output": "enable_lightgbm_cuda_support``\n\nLightGBM CUDA Support\n\nSpecify whether to enable LightGBM CUDA implementation instead of\nOpenCL. LightGBM CUDA is supported on Linux x86-64 environments.", + "prompt_type": "plain" + }, + { + "output": "show_constant_model``\n\nWhether to Show Constant Models in Iteration Panel\n\nSpecify whether to show constant models in the iteration panel. This is\ndisabled by default.", + "prompt_type": "plain" + }, + { + "output": "params_tensorflow``\n\nParameters for TensorFlow\n\nSpecify specific parameters for TensorFlow to override Driverless AI\nparameters. The following is an example of how the parameters can be\nconfigured:\n\n params_tensorflow = '{'lr': 0.01, 'add_wide': False, 'add_attention': True, 'epochs': 30,\n 'layers': [100, 100], 'activation': 'selu', 'batch_size': 64, 'chunk_size': 1000, 'dropout': 0.3,\n 'strategy': 'one_shot', 'l1': 0.0, 'l2': 0.0, 'ort_loss': 0.5, 'ort_loss_tau': 0.01, 'normalize_type': 'streaming'}'\n\nThe following is an example of how layers can be configured:\n\n [500, 500, 500], [100, 100, 100], [100, 100], [50, 50]\n\nMore information about TensorFlow parameters can be found in the Keras\ndocumentation. Different strategies for using TensorFlow parameters can\nbe viewed here.", + "prompt_type": "plain" + }, + { + "output": "max_nestimators``\n\nMax Number of Trees/Iterations\n\nSpecify the upper limit on the number of trees (GBM) or iterations\n(GLM). This defaults to 3000. Depending on accuracy settings, a fraction\nof this limit will be used.", + "prompt_type": "plain" + }, + { + "output": "n_estimators_list_no_early_stopping--------------------------------------- .. container:: dropdown **n_estimators List to Sample From for Model Mutations for Models That Do Not Use Early Stopping** For LightGBM, the dart and normal random forest modes do not use early stopping. This setting lets you specify then_estimators``\n\n (number of trees in the forest) list to sample from for model\n mutations for these types of models.", + "prompt_type": "plain" + }, + { + "output": "min_learning_rate_final``\n\nMinimum Learning Rate for Final Ensemble GBM Models\n\nThis value defaults to 0.01. This is the lower limit on learning rate\nfor final ensemble GBM models.In some cases, the maximum number of\ntrees/iterations is insufficient for the final learning rate, which can\nlead to no early stopping getting triggered and poor final model\nperformance. Then, one can try increasing the learning rate by raising\nthis minimum, or one can try increasing the maximum number of\ntrees/iterations.", + "prompt_type": "plain" + }, + { + "output": "max_learning_rate_final``\n\nMaximum Learning Rate for Final Ensemble GBM Models\n\nSpecify the maximum (upper limit) learning rate for final ensemble GBM\nmodels. This value defaults to 0.05.", + "prompt_type": "plain" + }, + { + "output": "max_nestimators_feature_evolution_factor``\n\nReduction Factor for Max Number of Trees/Iterations During Feature\nEvolution\n\nSpecify the factor by which the value specified by the\nmax-trees-iterations setting is reduced for tuning and feature\nevolution. This option defaults to 0.2. So by default, Driverless AI\nwill produce no more than 0.2 * 3000 trees/iterations during feature\nevolution.", + "prompt_type": "plain" + }, + { + "output": "max_abs_score_delta_train_valid``\n\nMax. absolute delta between training and validation scores for tree\nmodels\n\nModify early stopping behavior for tree-based models (LightGBM,\nXGBoostGBM, CatBoost) such that training score (on training data, not\nholdout) and validation score differ no more than this absolute value\n(i.e., stop adding trees once abs(train_score - valid_score) >\nmax_abs_score_delta_train_valid). Keep in mind that the meaning of this\nvalue depends on the chosen scorer and the dataset (i.e., 0.01 for\nLogLoss is different than 0.01 for MSE). This option is Experimental,\nand only for expert use to keep model complexity low. To disable, set to\n0.0. By default this option is disabled.", + "prompt_type": "plain" + }, + { + "output": "max_rel_score_delta_train_valid``\n\nMax. relative delta between training and validation scores for tree\nmodels\n\nModify early stopping behavior for tree-based models (LightGBM,\nXGBoostGBM, CatBoost) such that training score (on training data, not\nholdout) and validation score differ no more than this relative value\n(i.e., stop adding trees once abs(train_score - valid_score) >\nmax_rel_score_delta_train_valid * abs(train_score)). Keep in mind that\nthe meaning of this value depends on the chosen scorer and the dataset\n(i.e., 0.01 for LogLoss is different than 0.01 for MSE etc). This option\nis Experimental, and only for expert use to keep model complexity low.\nTo disable, set to 0.0. By default this option is disabled.", + "prompt_type": "plain" + }, + { + "output": "min_learning_rate``\n\nMinimum Learning Rate for Feature Engineering GBM Models\n\nSpecify the minimum learning rate for feature engineering GBM models.\nThis value defaults to 0.05.", + "prompt_type": "plain" + }, + { + "output": "max_learning_rate``\n\nMax Learning Rate for Tree Models\n\nSpecify the maximum learning rate for tree models during feature\nengineering. Higher values can speed up feature engineering but can hurt\naccuracy. This value defaults to 0.5.", + "prompt_type": "plain" + }, + { + "output": "max_epochs``\n\nMax Number of Epochs for TensorFlow/FTRL\n\nWhen building TensorFlow or FTRL models, specify the maximum number of\nepochs to train models with (it might stop earlier). This value defaults\nto 10. This option is ignored if TensorFlow models and/or FTRL models is\ndisabled.", + "prompt_type": "plain" + }, + { + "output": "max_max_depth----------------- .. container:: dropdown **Max Tree Depth** Specify the maximum tree depth. The corresponding maximum value formax_leaves`` is double the specified value. This value defaults to\n\n 12.", + "prompt_type": "plain" + }, + { + "output": "max_max_bin--------------- .. container:: dropdown **Max max_bin for Tree Features** Specify the maximummax_bin`` for tree features. This value\n\n defaults to 256.", + "prompt_type": "plain" + }, + { + "output": "rulefit_max_num_rules``\n\nMax Number of Rules for RuleFit\n\nSpecify the maximum number of rules to be used for RuleFit models. This\ndefaults to -1, which specifies to use all rules.", + "prompt_type": "plain" + }, + { + "output": "ensemble_meta_learner``\n\nEnsemble Level for Final Modeling Pipeline\n\nModel to combine base model predictions, for experiments that create a\nfinal pipeline consisting of multiple base models:\n\n- blender: Creates a linear blend with non-negative weights that add\n to 1 (blending) - recommended\n- extra_trees: Creates a tree model to non-linearly combine the base\n models (stacking) - experimental, and recommended to also set enable\n cross_validate_meta_learner.", + "prompt_type": "plain" + }, + { + "output": "fixed_ensemble_level------------------------ .. container:: dropdown **Ensemble Level for Final Modeling Pipeline** Specify one of the following ensemble levels: - -1 = auto, based upon ensemble_accuracy_switch, accuracy, size of data, etc. (Default) - 0 = No ensemble, only final single model on validated iteration/tree count. Note that holdout predicted probabilities will not be available. (For more information, refer to this :ref:`FAQ `.) - 1 = 1 model, multiple ensemble folds (cross-validation) - 2 = 2 models, multiple ensemble folds (cross-validation) - 3 = 3 models, multiple ensemble folds (cross-validation) - 4 = 4 models, multiple ensemble folds (cross-validation) The equivalent config.toml parameter isfixed_ensemble_level``.", + "prompt_type": "plain" + }, + { + "output": "cross_validate_meta_learner``\n\nEnsemble Level for Final Modeling Pipeline\n\nIf enabled, use cross-validation to create an ensemble for the meta\nlearner itself. Especially recommended for\nensemble_meta_learner='extra_trees', to make unbiased training holdout\npredictions. No MOJO will be created if this setting is enabled. Not\nneeded for ensemble_meta_learner='blender'.", + "prompt_type": "plain" + }, + { + "output": "cross_validate_single_final_model``\n\nCross-Validate Single Final Model\n\nDriverless AI normally produces a single final model for low accuracy\nsettings (typically, less than 5). When the Cross-validate single final\nmodel option is enabled (default for regular experiments), Driverless AI\nwill perform cross-validation to determine optimal parameters and early\nstopping before training the final single modeling pipeline on the\nentire training data. The final pipeline will build N\u2005+\u20051 models, with\nN-fold cross validation for the single final model. This also creates\nholdout predictions for all non-time-series experiments with a single\nfinal model.\n\nNote that the setting for this option is ignored for time-series\nexperiments or when a validation dataset is provided.", + "prompt_type": "plain" + }, + { + "output": "parameter_tuning_num_models``\n\nNumber of Models During Tuning Phase\n\nSpecify the number of models to tune during pre-evolution phase. Specify\na lower value to avoid excessive tuning, or specify a higher to perform\nenhanced tuning. This option defaults to -1 (auto).", + "prompt_type": "plain" + }, + { + "output": "imbalance_sampling_method``\nSampling Method for Imbalanced Binary Classification Problems\nSpecify the sampling method for imbalanced binary classification\nproblems. This is set to off by default. Choose from the following\noptions:\n- auto: sample both classes as needed, depending on data\n- over_under_sampling: over-sample the minority class and under-sample\n the majority class, depending on data\n- under_sampling: under-sample the majority class to reach class\n balance\n- off: do not perform any sampling\nThis option is closely tied with the Imbalanced Light GBM and Imbalanced\nXGBoost GBM models, which can be enabled/disabled on the Recipes tab\nunder included_models. Specifically:\n- If this option is ENABLED (set to a value other than off) and the\n ImbalancedLightGBM and/or ImbalancedXGBoostGBM models are ENABLED,\n then Driverless AI will check your target imbalance fraction. If the\n target fraction proves to be above the allowed imbalance threshold,\n then sampling will be triggered.", + "prompt_type": "plain" + }, + { + "output": "imbalance_sampling_threshold_min_rows_original``\n\nThreshold for Minimum Number of Rows in Original Training Data to Allow\nImbalanced Sampling\n\nSpecify a threshold for the minimum number of rows in the original\ntraining data that allow imbalanced sampling. This value defaults to\n100,000.", + "prompt_type": "plain" + }, + { + "output": "imbalance_ratio_sampling_threshold``\n\nRatio of Majority to Minority Class for Imbalanced Binary Classification\nto Trigger Special Sampling Techniques (if Enabled)\n\nFor imbalanced binary classification problems, specify the ratio of\nmajority to minority class. Special imbalanced models with sampling\ntechniques are enabled when the ratio is equal to or greater than the\nspecified ratio. This value defaults to 5.", + "prompt_type": "plain" + }, + { + "output": "heavy_imbalance_ratio_sampling_threshold``\n\nRatio of Majority to Minority Class for Heavily Imbalanced Binary\nClassification to Only Enable Special Sampling Techniques (if Enabled)\n\nFor heavily imbalanced binary classification, specify the ratio of the\nmajority to minority class equal and above which to enable only special\nimbalanced models on the full original data without upfront sampling.\nThis value defaults to 25.", + "prompt_type": "plain" + }, + { + "output": "imbalance_sampling_number_of_bags``\n\nNumber of Bags for Sampling Methods for Imbalanced Binary Classification\n(if Enabled)\n\nSpecify the number of bags for sampling methods for imbalanced binary\nclassification. This value defaults to -1.", + "prompt_type": "plain" + }, + { + "output": "imbalance_sampling_max_number_of_bags``\n\nHard Limit on Number of Bags for Sampling Methods for Imbalanced Binary\nClassification\n\nSpecify the limit on the number of bags for sampling methods for\nimbalanced binary classification. This value defaults to 10.", + "prompt_type": "plain" + }, + { + "output": "imbalance_sampling_max_number_of_bags_feature_evolution``\n\nHard Limit on Number of Bags for Sampling Methods for Imbalanced Binary\nClassification During Feature Evolution Phase\n\nSpecify the limit on the number of bags for sampling methods for\nimbalanced binary classification. This value defaults to 3. Note that\nthis setting only applies to shift, leakage, tuning, and feature\nevolution models. To limit final models, use the Hard Limit on Number of\nBags for Sampling Methods for Imbalanced Binary Classification setting.", + "prompt_type": "plain" + }, + { + "output": "imbalance_sampling_max_multiple_data_size``\n\nMax Size of Data Sampled During Imbalanced Sampling\n\nSpecify the maximum size of the data sampled during imbalanced sampling\nin terms of the dataset's size. This setting controls the approximate\nnumber of bags and is only active when the \"Hard limit on number of bags\nfor sampling methods for imbalanced binary classification during feature\nevolution phase\" option is set to -1. This value defaults to 1.", + "prompt_type": "plain" + }, + { + "output": "imbalance_sampling_target_minority_fraction``\n\nTarget Fraction of Minority Class After Applying Under/Over-Sampling\nTechniques\n\nSpecify the target fraction of a minority class after applying\nunder/over-sampling techniques. A value of 0.5 means that\nmodels/algorithms will be given a balanced target class distribution.\nWhen starting from an extremely imbalanced original target, it can be\nadvantageous to specify a smaller value such as 0.1 or 0.01. This value\ndefaults to -1.", + "prompt_type": "plain" + }, + { + "output": "ftrl_max_interaction_terms_per_degree``\n\nMax Number of Automatic FTRL Interactions Terms for 2nd, 3rd, 4th order\ninteractions terms (Each)\n\nSamples the number of automatic FTRL interactions terms to no more than\nthis value (for each of 2nd, 3rd, 4th order terms). This value defaults\nto 10000", + "prompt_type": "plain" + }, + { + "output": "enable_bootstrap``\n\nWhether to Enable Bootstrap Sampling for Validation and Test Scores\n\nSpecify whether to enable bootstrap sampling. When enabled, this setting\nprovides error bars to validation and test scores based on the standard\nerror of the bootstrap mean. This is enabled by default.", + "prompt_type": "plain" + }, + { + "output": "tensorflow_num_classes_switch``\n\nFor Classification Problems with This Many Classes, Default to\nTensorFlow\n\nSpecify the number of classes above which to use TensorFlow when it is\nenabled. Others model that are set to Auto will not be used above this\nnumber. (Models set to On, however, are still used.) This value defaults\nto 10.", + "prompt_type": "plain" + }, + { + "output": "prediction_intervals``\n\nCompute Prediction Intervals\n\nSpecify whether to compute empirical prediction intervals based on\nholdout predictions. This is enabled by default.", + "prompt_type": "plain" + }, + { + "output": "prediction_intervals_alpha``\n\nConfidence Level for Prediction Intervals\n\nSpecify a confidence level for prediction intervals. This value defaults\nto 0.9.", + "prompt_type": "plain" + }, + { + "output": "dump_modelparams_every_scored_indiv``\n\nEnable detailed scored model info\n\nWhether to dump every scored individual's model parameters to\ncsv/tabulated/json file produces files. For example:\nindividual_scored.params.[txt, csv, json]", + "prompt_type": "plain" + }, + { + "output": "Linux DEBs\nFor Linux machines that will not use the Docker image or RPM, a deb\ninstallation is available for x86_64 Ubuntu 16.04/18.04/20.04/22.04. The following installation steps assume that you have a valid license\nkey for Driverless AI. For information on how to obtain a license key\nfor Driverless AI, visit https://www.h2o.ai/products/h2o-driverless-ai/. Once obtained, you will be prompted to paste the license key into the\nDriverless AI UI when you first log in, or you can save it as a .sig\nfile and place it in the license folder that you will create during the\ninstallation process. Note\n- To ensure that AutoDoc pipeline visualizations are generated\ncorrectly on native installations, installing fontconfig is recommended. - When using systemd, remove the dai-minio, dai-h2o, dai-redis,\n dai-procsy, and dai-vis-server services. When upgrading, you can use\n the following commands to deactivate these services:\n systemctl stop dai-minio\n systemctl disable dai-minio\n systemctl stop dai-h2o\n systemctl disable dai-h2o\n systemctl stop dai-redis\n systemctl disable dai-redis\n systemctl stop dai-procsy\n systemctl disable dai-procsy\n systemctl stop dai-vis-server\n systemctl disable dai-vis-server\nEnvironment\n -----------------------------------\n Operating System Min Mem\n ------------------------- ---------\n Ubuntu with GPUs 64 GB\n Ubuntu with CPUs 64 GB\n -----------------------------------\nRequirements\n- Ubuntu 16.04/Ubuntu 18.04/Ubuntu 20.04/Ubuntu 22.04\n- NVIDIA drivers >= is recommended (GPU only).", + "prompt_type": "plain" + }, + { + "output": "About the Install\n- The 'dai' service user is created locally (in /etc/passwd) if it is\n not found by 'getent passwd'. You can override the user by providing\n the DAI_USER environment variable during rpm or dpkg installation. - The 'dai' service group is created locally (in /etc/group) if it is\n not found by 'getent group'. You can override the group by providing\n the DAI_GROUP environment variable during rpm or dpkg installation. - Configuration files are placed in /etc/dai and owned by the 'root'\n user:\n - /etc/dai/config.toml: Driverless AI config file (See config_file\n section for details). - /etc/dai/User.conf: systemd config file specifying the service\n user. - /etc/dai/Group.conf: systemd config file specifying the service\n group. - /etc/dai/EnvironmentFile.conf: systemd config file specifying\n (optional) environment variable overrides. - Software files are placed in /opt/h2oai/dai and owned by the 'root'\n user\n- The following directories are owned by the service user so that they\n can be updated by the running software:\n - /opt/h2oai/dai/home: The application's home directory (license\n key files are stored here).", + "prompt_type": "plain" + }, + { + "output": "- /opt/h2oai/dai/log: Log files go here if you are not using\n systemd (if you are using systemd, then the use the standard\n journalctl tool). - By default, for Docker or DEB/RPM installs, Driverless AI looks for\n a license key in /opt/h2oai/dai/home/.driverlessai/license.sig. If\n you are installing Driverless AI programmatically, you can copy a\n license key file to that location. For TAR SH installs, the\n equivalent location is /home/.driverlessai, and after\n the license is imported, it is copied under ~/.driverlessai. If no\n license key is found, the application guides you through the process\n of adding one through the UI. - systemd unit files are placed in /usr/lib/systemd/system. - Symbolic links to the configuration files in /etc/dai files are\n placed in /etc/systemd/system. If your environment is running an operational systemd, that is the\npreferred way to manage Driverless AI. The package installs the\nfollowing systemd services and a wrapper service:\n- dai: Wrapper service that starts/stops the other three services.", + "prompt_type": "plain" + }, + { + "output": "- dai-h2o: H2O-3 helper process used by Driverless AI. - dai-procsy: Procsy helper process used by Driverless AI. - dai-vis-server: Visualization server helper process used by\n Driverless AI. If you don't have systemd, refer to linux-tarsh for install\ninstructions. Starting NVIDIA Persistence Mode (GPU only)\nIf you have NVIDIA GPUs, you must run the following NVIDIA command. This\ncommand needs to be run every reboot. For more information:\nhttp://docs.nvidia.com/deploy/driver-persistence/index.html. sudo nvidia-smi -pm 1\nInstalling OpenCL\nOpenCL is required for full LightGBM support on GPU-powered systems. To\ninstall OpenCL, run the following as root:\n mkdir -p /etc/OpenCL/vendors && echo \"libnvidia-opencl.so.1\" > /etc/OpenCL/vendors/nvidia.icd && chmod a+r /etc/OpenCL/vendors/nvidia.icd && chmod a+x /etc/OpenCL/vendors/ && chmod a+x /etc/OpenCL\nNote\nIf OpenCL is not installed, then CUDA LightGBM is automatically used. CUDA LightGBM is only supported on Pascal-powered (and later) systems,\nand can be enabled manually with the enable_lightgbm_cuda_support\nconfig.toml setting.", + "prompt_type": "plain" + }, + { + "output": "# Install Driverless AI. sudo dpkg -i |VERSION-deb-lin|\nBy default, the Driverless AI processes are owned by the 'dai' user and\n'dai' group. You can optionally specify a different service user and\ngroup as shown below. Replace and as appropriate. # Temporarily specify service user and group when installing Driverless AI. # dpkg saves these for systemd in the /etc/dai/User.conf and /etc/dai/Group.conf files. sudo DAI_USER=myuser DAI_GROUP=mygroup dpkg -i |VERSION-deb-lin|\nYou may now optionally make changes to /etc/dai/config.toml. Starting Driverless AI\nTo start Driverless AI, use the following command:\n # Start Driverless AI. sudo systemctl start dai\nNote: If you don't have systemd, refer to linux-tarsh for install\ninstructions. Viewing Driverless AI Log Files\nIf you have systemd (preferred):\n sudo systemctl status dai-dai\n sudo journalctl -u dai-dai\nIf you do not have systemd:\n sudo less /opt/h2oai/dai/log/dai.log\n sudo less /opt/h2oai/dai/log/h2o.log\n sudo less /opt/h2oai/dai/log/procsy.log\n sudo less /opt/h2oai/dai/log/vis-server.log\nStopping Driverless AI\nIf you have systemd (preferred):\n # Stop Driverless AI.", + "prompt_type": "plain" + }, + { + "output": "Verify. sudo ps -u dai\nIf you do not have systemd:\n # Stop Driverless AI. sudo pkill -U dai\n # The processes should now be stopped. Verify. sudo ps -u dai\nUpgrading Driverless AI\nWARNINGS:\n- This release deprecates experiments and MLI models from 1.7.0 and\n earlier. - Experiments, MLIs, and MOJOs reside in the Driverless AI tmp\n directory and are not automatically upgraded when Driverless AI is\n upgraded. We recommend you take the following steps before\n upgrading. - Build MLI models before upgrading. - Build MOJO pipelines before upgrading. - Stop Driverless AI and make a backup of your Driverless AI tmp\n directory before upgrading. The upgrade process inherits the service user and group from\n/etc/dai/User.conf and /etc/dai/Group.conf. You do not need to manually\nspecify the DAI_USER or DAI_GROUP environment variables during an\nupgrade. Requirements\nWe recommend to have NVIDIA driver >= installed (GPU only) in your host\nenvironment for a seamless experience on all architectures, including\nAmpere.", + "prompt_type": "plain" + }, + { + "output": "Go to NVIDIA download driver to get the latest NVIDIA Tesla A/T/V/P/K\nseries drivers. For reference on CUDA Toolkit and Minimum Required\nDriver Versions and CUDA Toolkit and Corresponding Driver Versions, see\nhere . Note\nIf you are using K80 GPUs, the minimum required NVIDIA driver version is\n450.80.02. Upgrade Steps\nIf you have systemd (preferred):\n # Stop Driverless AI. sudo systemctl stop dai\n # Make a backup of /opt/h2oai/dai/tmp directory at this time. # Upgrade Driverless AI. sudo dpkg -i |VERSION-deb-lin|\n sudo systemctl daemon-reload\n sudo systemctl start dai\nIf you do not have systemd:\n # Stop Driverless AI. sudo pkill -U dai\n # The processes should now be stopped. Verify. sudo ps -u dai\n # Make a backup of /opt/h2oai/dai/tmp directory at this time. If you do not, all previous data will be lost. # Upgrade and restart. sudo dpkg -i |VERSION-deb-lin|\n sudo -H -u dai /opt/h2oai/dai/run-dai.sh\nUninstalling Driverless AI\nIf you have systemd (preferred):\n # Stop Driverless AI.", + "prompt_type": "plain" + }, + { + "output": "Verify. sudo ps -u dai\n # Uninstall Driverless AI. sudo dpkg -r dai\n # Purge Driverless AI. sudo dpkg -P dai\nIf you do not have systemd:\n # Stop Driverless AI. sudo pkill -U dai\n # The processes should now be stopped. Verify. sudo ps -u dai\n # Uninstall Driverless AI. sudo dpkg -r dai\n # Purge Driverless AI. sudo dpkg -P dai\nCAUTION! At this point you can optionally completely remove all\nremaining files, including the database (this cannot be undone):\n sudo rm -rf /opt/h2oai/dai\n sudo rm -rf /etc/dai\nNote: The UID and GID are not removed during the uninstall process. These can be removed with userdel and usergroup. However, we DO NOT\nrecommend removing the UID and GID if you plan to re-install Driverless\nAI. If you remove the UID and GID and then reinstall Driverless AI, the\nUID and GID will likely be re-assigned to a different (unrelated)\nuser/group in the future; this may cause confusion if there are any\nremaining files on the filesystem referring to the deleted user or\ngroup.", + "prompt_type": "plain" + }, + { + "output": "pip\ninstallcommand. Once installed, you can launch a Jupyter notebook and begin using the Driverless AI Python client. Installing from Python Package Index (PyPI) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The latest release of the client is available on PyPI and can be installed to your desired Python environment withpip``.\nThe following command installs the latest version of the Python Client:\n\n pip install driverlessai\n\nTo upgrade when new versions of the client are released, run the\nfollowing command:\n\n pip install --upgrade driverlessai\n\nInstalling from Anaconda Cloud\n\nTo install the Python Client as a conda package, use the following\ncommand:\n\n conda install -c h2oai driverlessai", + "prompt_type": "plain" + }, + { + "output": "Wide Datasets in Driverless AI\nA wide dataset with many features comes with its own challenges for\nfeature engineering and model building. In Driverless AI, datasets where number of columns > number of rows are\nconsidered as wide. When running experiments on such datasets,\nDriverless AI automatically enables wide rules that\nextend the limits on the maximum number of allowed features (that can be\nselected for feature evolution and selection) to a large number,\ndisables certain checks like data leakage and shift detection,\nmonotonicity constraints, AutoDoc and pipeline visualization creation. It also enables XGBoost random forest model for modeling, which helps to\navoid overfitting on wide datasets with few rows. See\nenable_wide_rules . A big-wide dataset can result in large models that can run out of memory\non GPUs. To avoid such model failures for XGBoost models (GBM, GLM, RF,\nDART), Driverless AI provides protection against GPU OOM by performing\nautomatic feature selection by building sub-models (with repeats) to\nselect features.", + "prompt_type": "plain" + }, + { + "output": "See\nallow_reduce_features_when_failure \nfor details. Here is an example of config.toml settings for a quick model run on a\nwide dataset. This disables genetic algorithm/tuning/evolution to get a quick final\nmodel. It also uses (XGBoost) random forest that is best to avoid\noverfit on wide data with few rows. The following config settings can be\ncopy/pasted in the expert settings GUI TOML to run this model. num_as_cat=false\n target_transformer=\"identity_noclip\"\n included_models=[\"XGBoostRFModel\"]\n included_transformers=[\"OriginalTransformer\"]\n fixed_ensemble_level=1\n make_mojo_scoring_pipeline=\"off\"\n make_pipeline_visualization=\"off\"\n n_estimators_list_no_early_stopping=[200]\n fixed_num_folds=2\n enable_genetic_algorithm=\"off\"\n max_max_bin=128\n reduce_repeats_when_failure=1\nThe reduce_repeats_when_failure controls the repeats, 1 is default. A\nvalue of 3 or more can take longer but can give more accuracy by finding\nthe best features to build a final model on.", + "prompt_type": "plain" + }, + { + "output": "Install on Azure\nThis section describes how to install the Driverless AI image from\nAzure. Note: Prior versions of the Driverless AI installation and upgrade on\nAzure were done via Docker. This is no longer the case as of version\n1.5.2. Watch the installation video here. Note that some of the images in this\nvideo may change between releases, but the installation steps remain the\nsame. Environment\n+---------------------------+--------------+---------+----------------+\n| Provider | Instance | Num | Suitable for |\n| | Type | GPUs | |\n+===========================+==============+=========+================+\n| Azure | Standard_NV6 | 1 | E |\n| | | | |\n| - | ---- | ---- | xperimentation |\n| - | -----------+ | ------+ | |\n| - | | | ---- |\n| - | S | 2 | -------------+ |\n| - | | | |\n| | tandard_NV12 | ---- | E |\n| | | ------+ | |\n| | ---- | | xperimentation |\n| | -----------+ | 4 | |\n| | | | ---- |\n| | S | ---- | -------------+ |\n| | | ------+ | |\n| | tandard_NV24 | | Serious |\n| | | 1 | use |\n| | ---- | | |\n| | -----------+ | ---- | ---- |\n| | | ------+ | -------------+ |\n| | Standard_NC6 | | |\n| | | 2 | E |\n| | ---- | | |\n| | -----------+ | ---- | xperimentation |\n| | | ------+ | |\n| | S | | ---- |\n| | | 4 | -------------+ |\n| | tandard_NC12 | | |\n| | | | E |\n| | ---- | | |\n| | -----------+ | | xperimentation |\n| | | | |\n| | S | | ---- |\n| | | | -------------+ |\n| | tandard_NC24 | | |\n| | | | Serious |\n| | | | use |\n+---------------------------+--------------+---------+----------------+\nAbout the Install\n- The 'dai' service user is created locally (in /etc/passwd) if it is\n not found by 'getent passwd'.", + "prompt_type": "plain" + }, + { + "output": "- The 'dai' service group is created locally (in /etc/group) if it is\n not found by 'getent group'. You can override the group by providing\n the DAI_GROUP environment variable during rpm or dpkg installation. - Configuration files are placed in /etc/dai and owned by the 'root'\n user:\n - /etc/dai/config.toml: Driverless AI config file (See config_file\n section for details). - /etc/dai/User.conf: systemd config file specifying the service\n user. - /etc/dai/Group.conf: systemd config file specifying the service\n group. - /etc/dai/EnvironmentFile.conf: systemd config file specifying\n (optional) environment variable overrides. - Software files are placed in /opt/h2oai/dai and owned by the 'root'\n user\n- The following directories are owned by the service user so that they\n can be updated by the running software:\n - /opt/h2oai/dai/home: The application's home directory (license\n key files are stored here). - /opt/h2oai/dai/tmp: Experiments and imported data are stored\n here.", + "prompt_type": "plain" + }, + { + "output": "- By default, for Docker or DEB/RPM installs, Driverless AI looks for\n a license key in /opt/h2oai/dai/home/.driverlessai/license.sig. If\n you are installing Driverless AI programmatically, you can copy a\n license key file to that location. For TAR SH installs, the\n equivalent location is /home/.driverlessai, and after\n the license is imported, it is copied under ~/.driverlessai. If no\n license key is found, the application guides you through the process\n of adding one through the UI. - systemd unit files are placed in /usr/lib/systemd/system. - Symbolic links to the configuration files in /etc/dai files are\n placed in /etc/systemd/system. If your environment is running an operational systemd, that is the\npreferred way to manage Driverless AI. The package installs the\nfollowing systemd services and a wrapper service:\n- dai: Wrapper service that starts/stops the other three services. - dai-dai: Main Driverless AI process. - dai-h2o: H2O-3 helper process used by Driverless AI.", + "prompt_type": "plain" + }, + { + "output": "- dai-vis-server: Visualization server helper process used by\n Driverless AI. If you don't have systemd, refer to linux-tarsh for install\ninstructions. Installing the Azure Instance\n1. Log in to your Azure portal at https://portal.azure.com, and click\n the Create a Resource button. 2. Search for and select H2O DriverlessAI in the Marketplace. 3. Click Create. This launches the H2O DriverlessAI Virtual Machine\n creation process. 4. On the Basics tab:\n5. On the Size tab, select your virtual machine size. Specify the HDD\n disk type and select a configuration. We recommend using an N-Series\n type, which comes with a GPU. Also note that Driverless AI requires\n 10 GB of free space in order to run and will stop working of less\n than 10 GB is available. We recommend a minimum of 30 GB of disk\n space. Click OK when you are done. 6. On the Settings tab, select or create the Virtual Network and Subnet\n where the VM is going to be located and then click OK.\n7.", + "prompt_type": "plain" + }, + { + "output": "When the validation passes\n successfully, click Create to create the VM. 8. After the VM is created, it will be available under the list of\n Virtual Machines. Select this Driverless AI VM to view the IP\n address of your newly created machine. 9. Connect to Driverless AI with your browser using the IP address\n retrieved in the previous step. Stopping the Azure Instance\nThe Azure instance will continue to run even when you close the Azure\nportal. To stop the instance:\n1. Click the Virtual Machines left menu item. 2. Select the checkbox beside your DriverlessAI virtual machine. 3. On the right side of the row, click the ... button, then select\n Stop. (Note that you can then restart this by selecting Start.) [image]\nUpgrading the Driverless AI Image\nWARNINGS:\n- This release deprecates experiments and MLI models from 1.7.0 and\n earlier. - Experiments, MLIs, and MOJOs reside in the Driverless AI tmp\n directory and are not automatically upgraded when Driverless AI is\n upgraded.", + "prompt_type": "plain" + }, + { + "output": "- Build MLI models before upgrading. - Build MOJO pipelines before upgrading. - Stop Driverless AI and make a backup of your Driverless AI tmp\n directory before upgrading. The upgrade process inherits the service user and group from\n/etc/dai/User.conf and /etc/dai/Group.conf. You do not need to manually\nspecify the DAI_USER or DAI_GROUP environment variables during an\nupgrade. Upgrading from Version 1.2.2 or Earlier\nIt is not possible to upgrade from version 1.2.2 or earlier to the\nlatest version. You have to manually remove the 1.2.2 container and then\nreinstall the latest Driverless AI version. Be sure to backup your data\nbefore doing this. Upgrading from Version 1.3.0 to 1.5.1\n1. SSH into the IP address of the image instance and copy the existing\n experiments to a backup location:\n2. wget the newer image. Replace VERSION and BUILD below with the\n Driverless AI version. 3. Use the docker load command to load the image:\n4. Run docker images to find the new image tag.", + "prompt_type": "plain" + }, + { + "output": "Start the Driverless AI Docker image and replace TAG below with the\n image tag. Depending on your install version, use the\n docker run --runtime=nvidia (>= Docker 19.03) or nvidia-docker (<\n Docker 19.03) command. Upgrading from version 1.5.2 or Later\nUpgrading to versions 1.5.2 and later is no longer done via Docker. Instead, perform the following steps if you are upgrading to version\n1.5.2 or later. Replace dai_NEWVERSION.deb below with the new Driverless\nAI version (for example, dai_1.8.4.1_amd64.deb). Note that this upgrade\nprocess inherits the service user and group from /etc/dai/User.conf and\n/etc/dai/Group.conf. You do not need to manually specify the DAI_USER or\nDAI_GROUP environment variables during an upgrade. We recommend to have NVIDIA driver >= installed (GPU only) in your host\nenvironment for a seamless experience on all architectures, including\nAmpere. Driverless AI ships with CUDA 11.2.2 for GPUs, but the driver\nmust exist in the host environment. Go to NVIDIA download driver to get the latest NVIDIA Tesla A/T/V/P/K\nseries drivers.", + "prompt_type": "plain" + }, + { + "output": "Sharing Experiments\nThis page describes how to share Driverless AI (DAI) experiments by\nexporting and importing experiments or by using Remote Storage. - export_import\n- remote_storage\n -----------------------------------------------------------------------\n Sharing Method Requirements\n ----------------------------------- -----------------------------------\n Exporting and Importing Experiments Requires only DAI\n Experiments \n Remote Storage Requires H2O AI Cloud (HAIC) __\n -----------------------------------------------------------------------\nExporting and Importing Experiments\nAs of version 1.10, DAI supports exporting and importing DAI\nexperiments. You can download experiments as a .dai file that can be\nimported by other DAI users. Exporting an Experiment\nAn experiment can be exported either from the main Experiment listing\npage by clicking the three dot icons to the right of the experiment name\nand selecting Export or from the\ncompleted experiment page by clicking Model\nActions > Export.", + "prompt_type": "plain" + }, + { + "output": "Importing an Experiment\nTo import an experiment, click the Import Experiment button on the\nExperiment listing page, and then select the DAI experiment file you\nwant to import from your local file system. You can also drag the DAI\nexperiment file from your local file system to the Experiment listing\npage. If the selected experiment used custom recipes, the custom recipes\nassociated with the experiment are also imported. Datasets associated with imported experiments are not imported as part\nof the experiment import process. Instead, only a minimal set of\nmetadata is imported. To take advantage of certain features such as\ninterpreting experiments and previewing datasets, you must manually\nimport the datasets associated with the imported experiment. Warning\nTo ensure that the import process is not interrupted, do not refresh the\npage while the experiment is being imported. Note\nWhen projects are shared with users, the users with whom the project is\nshared must import the experiments and datasets associated with the\nshared project.", + "prompt_type": "plain" + }, + { + "output": "For more information on HAIC,\nsee the HAIC documentation. Note\nUnsupervised experiments are not currently supported by both Remote\nStorage and H2O MLOps. Remote storage is only available to H2O AI Cloud (HAIC) users. In most\ncases, experiments that are placed in a Project are automatically added\nto Remote Storage. However, if the Project is created by clicking New\nExperiment > Create Leaderboard, the experiments in that Project are not\nautomatically added to Remote Storage. To add an experiment in a\nLeaderboard Project to Remote Storage, navigate to the Project and open\nthe drop-down options menu for the experiment, and then click Link\nRemotely. If a project is shared with you by another DAI user, the experiments and\ndatasets associated with that project are initially greyed out,\nindicating that they live only in the Remote Storage. Before they can be\nviewed and used, you must import them. This can be done by either\nclicking on the IMPORT button at a given row or by clicking the row menu\nand choosing the IMPORT option.", + "prompt_type": "plain" + }, + { + "output": "Both the\nexperiment and its datasets must be imported to use all of the\nexperiment's functionalities. Experiments in Remote Storage are made available in H2O MLOps and can be\nshared with other users. If a DAI instance is terminated and deleted,\nthe Projects associated with that instance of DAI remain saved in Remote\nStorage. Projects saved in Remote Storage are made available in newly\ncreated instances of DAI. This means that in cases where you need to\nkeep an old experiment, model interpretation, or AutoDoc for reference\npurposes, keeping the specific DAI instance containing them isn't\nnecessary. Instead, you can create a project, link the relevant\nexperiment and data, and delete the DAI instance. The model can then be\ndeployed to H2O MLOps, from which you can download the AutoDoc\nassociated with the model. In addition, you can create a new DAI\ninstance, import the project, and run and view the model interpretation. Following this practice can help lower costs by eliminating the need to\nkeep specific instances of DAI.", + "prompt_type": "plain" + }, + { + "output": "Sharing With Other Users\nTo share your project with other users, go to the Projects page and open\nthe drop-down menu for the project you want to share, then click Share. In the Sharing window, you can select a specific user and their role\nbefore adding them to the list of users your project is shared with. Select one of the following roles:\n- Default: This role is equivalent to granting write access to a user. Users with this role can make any modification to the shared\n project, including renaming the project, adding datasets, adding\n experiments, adding a note, and rerunning experiments. Users that\n are granted this role can perform any action that they are able to\n perform on projects they create and own. Warning\n Users with the Default role can delete projects that have been\n shared with them. If a user with the Default role deletes a project,\n it is also deleted for both the original owner and other shared\n users. - Reader: This role is equivalent to granting read-only access to a\n user.", + "prompt_type": "plain" + }, + { + "output": "Linux TAR SH\nThe Driverless AI software is available for use in pure user-mode\nenvironments as a self-extracting TAR SH archive. This form of\ninstallation does not require a privileged user to install or to run. This artifact has the same compatibility matrix as the RPM and DEB\npackages (combined), it just comes packaged slightly differently. See\nthose sections for a full list of supported environments. The installation steps assume that you have a valid license key for\nDriverless AI. For information on how to obtain a license key for\nDriverless AI, visit https://www.h2o.ai/products/h2o-driverless-ai/. Once obtained, you will be prompted to paste the license key into the\nDriverless AI UI when you first log in. Note\nTo ensure that AutoDoc pipeline visualizations are generated\ncorrectly on native installations, installing fontconfig is recommended. Requirements\n- RedHat 7/RedHat 8 or Ubuntu 16.04/Ubuntu 18.04/Ubuntu 20.04/Ubuntu\n 22.04\n- NVIDIA drivers >= recommended (GPU only).", + "prompt_type": "plain" + }, + { + "output": "Installing OpenCL\nOpenCL is required for full LightGBM support on GPU-powered systems. To\ninstall OpenCL, run the following as root:\n mkdir -p /etc/OpenCL/vendors && echo \"libnvidia-opencl.so.1\" > /etc/OpenCL/vendors/nvidia.icd && chmod a+r /etc/OpenCL/vendors/nvidia.icd && chmod a+x /etc/OpenCL/vendors/ && chmod a+x /etc/OpenCL\nNote\nIf OpenCL is not installed, then CUDA LightGBM is automatically used. CUDA LightGBM is only supported on Pascal-powered (and later) systems,\nand can be enabled manually with the enable_lightgbm_cuda_support\nconfig.toml setting. Installing Driverless AI\nRun the following commands to install the Driverless AI TAR SH. # Install Driverless AI. chmod 755 |VERSION-tar-lin|\n ./|VERSION-tar-lin|\nYou may now cd to the unpacked directory and optionally make changes to\nconfig.toml. Starting Driverless AI\n # Start Driverless AI. ./run-dai.sh\nStarting NVIDIA Persistence Mode\nIf you have NVIDIA GPUs, you must run the following NVIDIA command. This\ncommand needs to be run every reboot.", + "prompt_type": "plain" + }, + { + "output": "sudo nvidia-smi -pm 1\nInstall OpenCL\nOpenCL is required in order to run LightGBM on GPUs. Run the following\nfor Centos7/RH7 based systems using yum and x86. yum -y clean all\n yum -y makecache\n yum -y update\n wget http://dl.fedoraproject.org/pub/epel/7/x86_64/Packages/c/clinfo-2.1.17.02.09-1.el7.x86_64.rpm\n wget http://dl.fedoraproject.org/pub/epel/7/x86_64/Packages/o/ocl-icd-2.2.12-1.el7.x86_64.rpm\n rpm -if clinfo-2.1.17.02.09-1.el7.x86_64.rpm\n rpm -if ocl-icd-2.2.12-1.el7.x86_64.rpm\n clinfo\n mkdir -p /etc/OpenCL/vendors && \\\n echo \"libnvidia-opencl.so.1\" > /etc/OpenCL/vendors/nvidia.icd\nLooking at Driverless AI log files\n less log/dai.log\n less log/h2o.log\n less log/procsy.log\n less log/vis-server.log\nStopping Driverless AI\n # Stop Driverless AI. ./kill-dai.sh\nUninstalling Driverless AI\nTo uninstall Driverless AI, just remove the directory created by the\nunpacking process. By default, all files for Driverless AI are contained\nwithin this directory.", + "prompt_type": "plain" + }, + { + "output": "- Experiments, MLIs, and MOJOs reside in the Driverless AI tmp\n directory and are not automatically upgraded when Driverless AI is\n upgraded. We recommend you take the following steps before\n upgrading. - Build MLI models before upgrading. - Build MOJO pipelines before upgrading. - Stop Driverless AI and make a backup of your Driverless AI tmp\n directory before upgrading. The upgrade process inherits the service user and group from\n/etc/dai/User.conf and /etc/dai/Group.conf. You do not need to manually\nspecify the DAI_USER or DAI_GROUP environment variables during an\nupgrade. Requirements\nWe recommend to have NVIDIA driver >= installed (GPU only) in your host\nenvironment for a seamless experience on all architectures, including\nAmpere. Driverless AI ships with CUDA 11.2.2 for GPUs, but the driver\nmust exist in the host environment. Go to NVIDIA download driver to get the latest NVIDIA Tesla A/T/V/P/K\nseries drivers. For reference on CUDA Toolkit and Minimum Required\nDriver Versions and CUDA Toolkit and Corresponding Driver Versions, see\nhere .", + "prompt_type": "plain" + }, + { + "output": "Importing Datasets\nSupported file types\nDriverless AI supports the following dataset file formats:\n- arff\n- avro\n- bin\n- bz2\n- csv (See note below)\n- dat\n- feather\n- gz\n- jay (See note below)\n- orc (See notes below)\n- parquet (See notes below)\n- pickle / pkl (See note below)\n- tgz\n- tsv\n- txt\n- xls\n- xlsx\n- xz\n- zip\nNote\nAdding datasets\nYou can add datasets using one of the following methods:\nDrag and drop files from your local machine directly onto this page. Note that this method currently works for files that are less than 10\nGB. or\nClick the Add Dataset (or Drag & Drop) button to upload or add a\ndataset. Notes:\n- Upload File, File System, HDFS, S3, Data Recipe URL, and Upload Data\n Recipe are enabled by default. These can be disabled by removing\n them from the enabled_file_systems setting in the config.toml file. (Refer to Using the config.toml file section for more information.) - If File System is disabled, Driverless AI will open a local\n filebrowser by default.", + "prompt_type": "plain" + }, + { + "output": "Refer to\n the Enabling Data Connectors section for more information. - When specifying to add a dataset using Data Recipe URL, the URL must\n point to either an HTML or raw version of the file, a GitHub\n repository or tree, or a local file. When adding or uploading\n datasets via recipes, the dataset will be saved as a .jay file. - Datasets must be in delimited text format. - Driverless AI can detect the following separators: ,|;t\n- When importing a folder, the entire folder and all of its contents\n are read into Driverless AI as a single file. - When importing a folder, all of the files in the folder must have\n the same columns. - If you try to import a folder via a data connector on Windows, the\n import will fail if the folder contains files that do not have file\n extensions (the resulting error is usually related to the above\n note). Upon completion, the datasets will appear in the Datasets Overview page. Click on a dataset to open a submenu.", + "prompt_type": "plain" + }, + { + "output": "Tips 'n Tricks\nThis section includes Arno\u2019s tips for running Driverless AI. Pipeline Tips\nGiven training data and a target column to predict, H2O Driverless AI\nproduces an end-to-end pipeline tuned for high predictive performance\n(and/or high interpretability) for general classification and regression\ntasks. The pipeline has only one purpose: to take a test set, row by\nrow, and turn its feature values into predictions. A typical pipeline creates dozens or even hundreds of derived features\nfrom the user-given dataset. Those transformations are often based on\nprecomputed lookup tables and parameterized mathematical operations that\nwere selected and optimized during training. It then feeds all these\nderived features to one or several machine learning algorithms such as\nlinear models, deep learning models, or gradient boosting models (and\nseveral more derived models). If there are multiple models, then their\noutput is post-processed to form the final prediction (either\nprobabilities or target values).", + "prompt_type": "plain" + }, + { + "output": "It is important to note that the training dataset is processed as a\nwhole for better results (e.g., aggregate statistics). For scoring,\nhowever, every row of the test dataset must be processed independently\nto mimic the actual production scenario. To facilitate deployment to various production environments, there are\nmultiple ways to obtain predictions from a completed Driverless AI\nexperiment, either from the GUI, from the R or Python client API, or\nfrom a standalone pipeline. GUI\n- Score on Another Dataset - Convenient, parallelized, ideal for\n imported data\n- Download Predictions - Available if a test set was provided during\n training\n- Deploy - Creates an Amazon Lambda endpoint (more endpoints coming\n soon)\n- Diagnostics - Useful if the test set includes a target column\nClient APIs\n- Python client - Use the make_prediction_sync() method. An optional\n argument can be used to get per-row and per-feature 'Shapley'\n prediction contributions. (Pass pred_contribs=True.)", + "prompt_type": "plain" + }, + { + "output": "An optional argument can be\n used to get per-row and per-feature 'Shapley' prediction\n contributions. (Pass pred_contribs=True.) Standalone Pipelines\n- Python - Supports all models and transformers, and supports\n 'Shapley' prediction contributions and MLI reason codes\n- Java - Most portable, low latency, supports all models and\n transformers that are enabled by default (except TensorFlow NLP\n transformers), can be used in Spark/H2O-3/SparklingWater for scale\n- C++ - Highly portable, low latency, standalone runtime with a\n convenient Python and R wrapper\nTime Series Tips\nH2O Driverless AI handles time-series forecasting problems out of the\nbox. All you need to do when starting a time-series experiment is to provide\na regular columnar dataset containing your features. Then pick a target\ncolumn and also pick a \"time column\" - a designated column containing\ntime stamps for every record (row) such as \"April 10 2019 09:13:41\" or\n\"2019/04/10\". If you have a test set for which you want predictions for\nevery record, make sure to provide future time stamps and features as\nwell.", + "prompt_type": "plain" + }, + { + "output": "You can launch the experiment and let\nDriverless AI do the rest. It will even auto-detect multiple time series\nin the same dataset for different groups such as weekly sales for stores\nand departments (by finding the columns that identify stores and\ndepartments to group by). Driverless AI will also auto-detect the time\nperiod including potential gaps during weekends, as well as the forecast\nhorizon, a possible time gap between training and testing time periods\n(to optimize for deployment delay) and even keeps track of holiday\ncalendars. Of course, it automatically creates multiple causal\ntime-based validation splits (sliding time windows) for proper\nvalidation, and incorporates many other related grand-master recipes\nsuch as automatic target and non-target lag feature generation as well\nas interactions between lags, first and second derivatives and\nexponential smoothing. - If you find that the automatic lag-based time-series recipe isn't\n performing well for your dataset, we recommend that you try to\n disable the creation of lag-based features by disabling \"Time-series\n lag-based recipe\" in the expert settings.", + "prompt_type": "plain" + }, + { + "output": "Especially for small datasets and short forecast periods, this can\n lead to better results. - If the target column is present in the test set and has partially\n filled information (non-missing values), then Driverless AI will\n automatically augment the model with those future target values to\n make better predictions. This can be used to extend the usable\n lifetime of the model into the future without the need for\n retraining by providing past known outcomes. Contact us if you're\n interested in learning more about test-time augmentation. - For now, training and test datasets should have the same input\n features available, so think about which of the predictors (input\n features) will be available during production time and drop the rest\n (or create your own lag features that can be available to both train\n and test sets). - For datasets that are non-stationary in time, create a test set from\n the last temporal portion of data, and create time-based features.", + "prompt_type": "plain" + }, + { + "output": "- We are working on further improving many aspects of our time-series\n recipe. For example, we will add support to automatically generate\n lags for features that are only available in the training set, but\n not in the test set, such as environmental or economic factors. We'll also improve the performance of back-testing using rolling\n windows. Scorer Tips\nA core capability of H2O Driverless AI is the creation of automatic\nmachine learning modeling pipelines for supervised problems. In addition\nto the data and the target column to be predicted, the user can pick a\nscorer. A scorer is a function that takes actual and predicted values\nfor a dataset and returns a number. Looking at this single number is the\nmost common way to estimate the generalization performance of a\npredictive model on unseen data by comparing the model's predictions on\nthe dataset with its actual values. There are more detailed ways to\nestimate the performance of a machine learning model such as residual\nplots (available on the Diagnostics page in Driverless AI), but we will\nfocus on scorers here.", + "prompt_type": "plain" + }, + { + "output": "The default scorer for\nregression problems is RMSE (root mean squared error), where 0 is the\nbest possible value. For example, for a dataset containing 4 rows, if\nactual target values are [1, 1, 10, 0], but predictions are [2, 3, 4,\n-1], then the RMSE is sqrt((1+4+36+1)/4) and the largest misprediction\ndominates the overall score (quadratically). Driverless AI will focus on\nimproving the predictions for the third data point, which can be very\ndifficult when hard-to-predict outliers are present in the data. If\noutliers are not that important to get right, a metric like the MAE\n(mean absolute error) can lead to better results. For this case, the MAE\nis (1+2+6+1)/4 and the optimization process will consider all errors\nequally (linearly). Another scorer that is robust to outliers is RMSLE\n(root mean square logarithmic error), which is like RMSE but after\ntaking the logarithm of actual and predicted values - however, it is\nrestricted to positive values. For price predictions, scorers such as\nMAPE (mean absolute percentage error) or MER (median absolute percentage\nerror) are useful, but have problems with zero or small positive values.", + "prompt_type": "plain" + }, + { + "output": "For classification problems, the default scorer is either the AUC (area\nunder the receiver operating characteristic curve) or LOGLOSS\n(logarithmic loss) for imbalanced problems. LOGLOSS focuses on getting\nthe probabilities right (strongly penalizes wrong probabilities), while\nAUC is designed for ranking problems. Gini is similar to the AUC, but\nmeasures the quality of ranking (inequality) for regression problems. For general imbalanced classification problems, AUCPR and MCC are good\nchoices, while F05, F1 and F2 are designed to balance recall against\nprecision. We highly suggest experimenting with different scorers and to study\ntheir impact on the resulting models. Using the Diagnostics page in\nDriverless AI, all applicable scores can be computed for any given\nmodel, no matter which scorer was used during training. Knob Settings Tips\nH2O Driverless AI lets you customize every experiment in great detail\nvia the expert settings. The most important controls however are the\nthree knobs for accuracy, time and interpretability.", + "prompt_type": "plain" + }, + { + "output": "Higher time\nsettings means the experiment is given more time to converge to an\noptimal solution. Higher interpretability settings reduces the model's\ncomplexity through less feature engineering and using simpler models. In\ngeneral, a setting of 1/1/10 will lead to the simplest and usually least\naccurate modeling pipeline, while a setting of 10/10/1 will lead to the\nmost complex and most time consuming experiment possible. Generally, it\nis sufficient to use settings of 7/5/5 or similar, and we recommend to\nstart with the default settings. We highly recommend studying the\nexperiment preview on the left-hand side of the GUI before each\nexperiment - it can help you fine-tune the settings and save time\noverall. Note that you can always finish an experiment early, either by clicking\n'Finish' to get the deployable final pipeline out, or by clicking\n'Abort' to instantly terminate the experiment. In either case, the\nexperiment can be continued seamlessly at a later time with 'Restart\nfrom last Checkpoint' or 'Retrain Final Pipeline', and you can always\nturn the knobs (or modify the expert settings) to adapt to your\nrequirements.", + "prompt_type": "plain" + }, + { + "output": "The predictive performance of the pipeline is a function of both the\ntraining data and the parameters of the pipeline (details of feature\nengineering and modeling). During an experiment, Driverless AI\nautomatically tunes these parameters by scoring candidate pipelines on\nheld out (\"validation\") data. This important validation data is either\nprovided by the user (for experts) or automatically created (random,\ntime-based or fold-based) by Driverless AI. Once a final pipeline has\nbeen created, it should be scored on yet another held out dataset (\"test\ndata\") to estimate its generalization performance. Understanding the\norigin of the training, validation and test datasets (\"the validation\nscheme\") is critical for success with machine learning, and we welcome\nyour feedback and suggestions to help us create the right validation\nschemes for your use cases. Expert Settings Tips\nH2O Driverless AI offers a range of 'Expert Settings' that let you\ncustomize each experiment. For example, you can limit the amount of\nfeature engineering by reducing the value for 'Feature engineering\neffort' or 'Max.", + "prompt_type": "plain" + }, + { + "output": "You can also select the model types to be used for training\non the engineered features (such as XGBoost, LightGBM, GLM, TensorFlow,\nFTRL, or RuleFit). For time-series problems where the selected\ntime_column leads to an error message (this can currently happen if the\nthe time structure is not regular enough - we are working on an improved\nversion), you can disable the 'Time-series lag-based recipe' and\nDriverless AI will create train/validation splits based on the time\norder instead, which can increase the model's performance if the time\ncolumn is important. Checkpointing Tips\nDriverless AI provides the option to checkpoint experiments to speed up\nfeature engineering and model tuning when running multiple experiments\non the same dataset. By default, H2O Driverless AI automatically scans\nall prior experiments (including aborted ones) for an optimal checkpoint\nto restart from. You can select a specific prior experiment to restart a\nnew experiment from with \u201cRestart from Last Checkpoint\u201d in the\nexperiment listing page (click on the 3 yellow bars on the right).", + "prompt_type": "plain" + }, + { + "output": "Time Series Best Practices\nThis document describes best practices for running time series\nexperiments in Driverless AI. Preparing Your Data\nThe goal for a time series use case is to use historical data to\nforecast. The manner in which the data for forecasting is formatted\ndepends on what we want to do with this forecast. To format your data\nfor forecasting, aggregate the data for each group you are interested in\nfor a specific period of time. The following are three use cases in which the volume of stocks sold in\nthe S&P 500 is predicted. Each use case provides a unique scenario that\ndetermines how the data is formatted. Our raw data looks like this:\n[]\n- Use Case 1: Forecast the total volume for a stock tomorrow. - Use Case 2: Forecast the total volume for a stock next month. - Use Case 3: Forecast the total volume of all S&P 500 stocks next\n year. Experiment Setup\nOnce your data is formatted to match your use case, you can begin\nsetting up your experiment. Enabling the Time Series Recipe\nTo begin setting up your experiment, provide the following:\n- Training data\n- Target column\n- Time column (providing the time column enables the Time Series\n recipe)\n[]\nTime Series Settings\nOnce you have provided the time column, you are asked to fill in time\nseries-specific configurations.", + "prompt_type": "plain" + }, + { + "output": "In this example, there is one time series\n per stock (column: Name ), so Name is selected as the time group\n column. - Unavailable Columns at Prediction Time: The columns that are not\n known at time of prediction. In the S&P 500 data example, the\n independent variables are open, high, low, and close. Any variables\n that are not known in advance must be marked as columns that are\n unavailable at prediction time. Driverless AI only uses historical\n values for the independent variables that are marked. - Forecast Horizon: How far in advance you want to forecast. - Gap: Specify whether there is any gap between the training data and\n when you want to start forecasting. For example, if on Monday you\n want to predict the volume of a stock for Wednesday and Thursday,\n then you must provide the following configurations:\nValidation and Testing\nFor a time series use case, always validate and test the models on more\nrecent data. In Driverless AI, validation data is automatically created\nby default, and this data is used to evaluate the performance of each\nmodel.", + "prompt_type": "plain" + }, + { + "output": "It is\nnot used by Driverless AI until after the final model has already been\nchosen to prevent any accidental overfitting on the test data. Validation Data\nValidation data is automatically generated by Driverless AI using a\nrolling window approach. The number of time units contained in the\nvalidation data matches the forecast horizon and gap configurations. If\nyou want to forecast the next day, the validation data must consist of\none day's worth of data. If you want to forecast the next five days, the\nvalidation data must consist of five days' worth of data. In the first\nuse case, Driverless AI internally creates splits where the validation\ndata always consists of one day of data. []\nThe total number of data points used to validate models is:\nNumber of validation splits\u2005*\u2005Number of Time Group Columns\u2005*\u2005Forecast Horizon\nIn a use case where the number of Time Group Columns is small and you\nonly want to forecast stock volume for a specific stock, the validation\ndata can become very small.", + "prompt_type": "plain" + }, + { + "output": "There are generally two ways to do this: increase the number of\nvalidation splits done by Driverless AI, or increase the number of Time\nGroup Columns in the dataset. You can increase the number of validation\nsplits performed by Driverless AI by going to the Expert Settings under\nthe Time Series tab:\n[]\nBy default, Driverless AI automatically determines the number of\nvalidation splits based on the Accuracy setting (higher accuracy leads\nto more validation splits). You can override this to a larger number if\nyou know that the number of rows for each validation split will be small\n(that is, a small number of Time Group Columns and/or a small Forecast\nHorizon). If you override this, you can see the change reflected in the experiment\npreview. In the following experiment, the number of validation splits\nhas been increased to 20 in the expert settings panel. This change is\nreflected in the experiment preview. []\nAnother way to prevent small validation data is to consider including\nmore Time Group Columns.", + "prompt_type": "plain" + }, + { + "output": "Test Data\nThe test data is an optional dataset provided by the user. Driverless AI\nautomatically calculates the performance of the final model on this\ndataset but does not use it for model selection. The test dataset can be\nlarger than the Forecast Horizon. The first use case involves\nforecasting the next day's stock volume. You can, however, provide\nDriverless AI with one month of test data. In this scenario, Driverless\nAI evaluates how the model does at forecasting the next day's stock\nvolume over the one month period. Scorers\nThe scorer determines how Driverless AI evaluates the success of each\nmodel. []\nThe following is a list of popular scorers with information about which\nuse cases they excel in. []\nInterpreting Models with MLI\nBy clicking on Interpret this Model once an experiment has completed,\nyou can gather more information about how your final model performed on\nthe validation and test data. The first graph in the Model Interpretability module shows the error for\neach date in the validation and test data:\n[]\nYou can also see groups with very high error and very low error:\n[]\nYou can search for a specific group to see the actual time series vs\npredicted:\n[]\nBy clicking on a specific forecasted point, you can see the Shapley\ncontributions for that point.", + "prompt_type": "plain" + }, + { + "output": "[]\nThe Shapley contributions also show the strength and direction of each\npredictor for the selected date. Scoring\nBecause Driverless AI is building a traditional machine learning model\n(such as GLM, GBM, Random Forest), it requires a record to score on to\ngenerate a prediction. If you want to use the model to forecast, you\nhave three different scoring options:\n- Using Driverless AI\n- The Python Scoring pipeline\n - Independent of Driverless AI\n - Python whl with scoring function inside\n- The MOJO Scoring pipeline\n - Independent of Driverless AI\n - Java runtime or C++ runtime\nIf you want to use the model to score past the Forecast Horizon, then\nyou can only use Driverless AI or the Python Scoring pipeline for\nscoring. This means that if you provide Driverless AI with training data\nup to 2018-02-07 and ask it to build a model to predict tomorrow's\nvolume, the MOJO can only be used to score for 2018-02-08. The MOJO is stateless. It takes a single record and provides a\nprediction.", + "prompt_type": "plain" + }, + { + "output": "If a\nDriverless AI model shows that the previous day's stock volume is very\nimportant, then once the MOJO is used to start scoring past 2018-02-08,\nit no longer has information about the previous day's stock volume. Predicting Within Forecast Horizon\nIf you want to predict within the Forecast Horizon, you can provide\nDriverless AI, the Python Scoring pipeline, or the MOJO scoring pipeline\nwith the record that you want to predict for. Consider the following\nexample:\nThe training data ends on Friday 2018-01-05 and you want to forecast the\nnext business day's stock volume. Therefore, Monday 2018-01-08 is within\nthe Forecast Horizon. To predict the Stock volume for Stock: AAL on\n2018-01-08, provide any scoring method with the following data. []\nThe output is the volume prediction. Note: Because open, high, low, and close are not known at the time of\nprediction, these are filled in with NAs. Predicting Outside Forecast Horizon\nIf you now want to use the model to predict past 2018-01-08, then you\ncan only use Driverless AI or the Python scoring pipeline to score\nbecause the MOJO is stateless and cannot be used outside of the Forecast\nHorizon.", + "prompt_type": "plain" + }, + { + "output": "In the case where\nyou want to forecast for 2018-01-09, you must tell the model what\nhappened on 2018-01-08 (this date was not in the training data, so\nDriverless AI does not know what ended up happening on that date). In order to score for 2018-01-09, provide Driverless AI with the\nfollowing data. []\nThe model now returns two predictions: one for 2018-01-08 and one for\n2018-01-09 (the prediction of interest). Other Approaches\nUsing the IID Recipe\nSometimes it can be helpful to try building an experiment without the\nTime Series recipe even if you have a forecasting use case. The Time\nSeries recipe relies heavily on lagging the data, which means that it is\nmost helpful for cases where the past behavior is predictive. If you\nhave a use case where there is no strong temporal trend, then it may be\nhelpful to use Driverless AI without the Time Series recipe turned on. You can do this by simply not providing a Time Column when setting up\nthe experiment. Notes:\n- If you decide to try the model without Time Series turned on, make\n sure to provide a test dataset that is out of time.", + "prompt_type": "plain" + }, + { + "output": "Model Performance on Another Dataset\nThe Diagnose Model on New Dataset option lets you view model performance\nfor multiple scorers based on existing model and dataset. On the completed experiment page, click the Diagnose Model on New\nDataset button. Note: You can also diagnose a model by selecting Diagnostics from the\ntop menu, then selecting an experiment and test dataset. []\nSelect a dataset to use when diagnosing this experiment. Note that the\ndataset must include the target column that is in the original dataset. At this point, Driverless AI will begin calculating all available scores\nfor the experiment. When the diagnosis is complete, it will be available on the Model\nDiagnostics page. Click on the new diagnosis. From this page, you can\ndownload predictions. You can also view scores and metric plots. The\nplots are interactive. Click a graph to enlarge. In the enlarged view,\nyou can hover over the graph to view details for a specific point. You\ncan also download the graph in the enlarged view.", + "prompt_type": "plain" + }, + { + "output": "New Experiments\nThis page describes how to start a new experiment in Driverless AI. Note\nAn experiment setup wizard that guides you through the process of\nsetting up an experiment is also available. For more information, see\ndai_wizard. 1. Run an experiment by selecting [Click for Actions] button beside the\n training dataset that you want to use. Click Predict to begin an\n experiment. Alternatively, you can click the New Experiment ->\n Standard Setup button on the Experiments page, which prompts you to\n select a training dataset. (To go to the _dai_wizard, click New\n Experiment -> Wizard Setup.) Clicking Standard Setup takes you\n directly to the dataset list page:\nYou can also get to the dataset list page from the Experiment Setup page\nby clicking Training Dataset, Test Dataset, or Validation Dataset. The\ndataset list page lets you view a list of datasets that are available\nfor selection. You can also click the link icon next to a particular\ndataset to open the Dataset Details page for that dataset in a new\nbrowser tab.", + "prompt_type": "plain" + }, + { + "output": "2. The Experiment Settings form displays and auto-fills with the\n selected dataset. Optionally enter a custom name for this\n experiment. If you do not add a name, Driverless AI will create one\n for you. 3. Optionally specify a validation dataset and/or a test dataset. 4. Specify the target (response) column. Note that not all explanatory\n functionality will be available for multiclass classification\n scenarios (scenarios with more than two outcomes). When the target\n column is selected, Driverless AI automatically provides the target\n column type and the number of rows. If this is a classification\n problem, then the UI shows unique and frequency statistics (Target\n Freq/Most Freq) for numerical columns. If this is a regression\n problem, then the UI shows the dataset mean and standard deviation\n values. 5. The next step is to set the parameters and settings for the\n experiment. (Refer to the Experiment Settings section for more\n information about these settings.)", + "prompt_type": "plain" + }, + { + "output": "Available parameters and\n settings include the following:\n6. After your settings are made, review the Experiment Preview to learn\n what each of the settings means. Note: When changing the algorithms\n used via expert-settings, you may notice that those changes are not\n applied. Driverless AI determines whether to include models and/or\n recipes based on a hierarchy of those expert settings. Refer to the\n Why do my selected algorithms not show up in the Experiment Preview?\n FAQ for more information. 7. Click Launch Experiment to start the experiment. Understanding the Experiment Page\nIn addition to the status, as an experiment is running, the UI also\ndisplays the following:\n- Details about the dataset. - The iteration data (internal validation) for each cross validation\n fold along with the specified scorer value. Click on a specific\n iteration or drag to view a range of iterations. Double click in the\n graph to reset the view.", + "prompt_type": "plain" + }, + { + "output": "During the iteration, Driverless AI\n will train n models. (This is called individuals in the experiment\n preview.) So for any column, you may see the score value for those n\n models for each iteration on the graph. - The variable importance values. To view variable importance for a\n specific iteration, just select that iteration in the Iteration Data\n graph. The Variable Importance list will automatically update to\n show variable importance information for that iteration. Hover over\n an entry to view more info. - CPU/Memory information along with Insights (for\n time-series experiments), Scores , Notifications, Logs, and\n Trace info. (Note that Trace is used for development/debugging and\n to show what the system is doing at that moment.) - For classification problems, the lower right section includes a\n toggle between an ROC curve, Precision-Recall graph, Lift chart,\n Gains chart, and GPU Usage information (if GPUs are available).", + "prompt_type": "plain" + }, + { + "output": "Predicted chart, and GPU\n Usage information (if GPUs are available). (Refer to the Experiment\n Graphs section for more information.) Upon completion, an Experiment\n Summary section will populate in the lower right section. - The bottom portion of the experiment screen will show any warnings\n that Driverless AI encounters. You can hide this pane by clicking\n the x icon. []\nFinishing/Aborting Experiments\nYou can finish and/or abort experiments that are currently running. - Finish Click the Finish button to stop a running experiment. Driverless AI will end the experiment and then complete the\n ensembling and the deployment package. - Abort: After clicking Finish, you have the option to click Abort,\n which terminates the experiment. (You will be prompted to confirm\n the abort.) Aborted experiments will display on the Experiments\n page as Failed. You can restart aborted experiments by clicking\n the right side of the experiment, then selecting Restart from Last\n Checkpoint.", + "prompt_type": "plain" + }, + { + "output": "Experiment Settings\n\nThis section includes settings that can be used to customize the\nexperiment like total runtime, reproducibility level, pipeline building,\nfeature brain control, adding config.toml settings and more.", + "prompt_type": "plain" + }, + { + "output": "max_runtime_minutes``\n\nMax Runtime in Minutes Before Triggering the Finish Button\n\nSpecify the maximum runtime in minutes for an experiment. This is\nequivalent to pushing the Finish button once half of the specified time\nvalue has elapsed. Note that the overall enforced runtime is only an\napproximation.\n\nThis value defaults to 1440, which is the equivalent of a 24 hour\napproximate overall runtime. The Finish button will be automatically\nselected once 12 hours have elapsed, and Driverless AI will subsequently\nattempt to complete the overall experiment in the remaining 12 hours.\nSet this value to 0 to disable this setting.\n\nNote that this setting applies to per experiment so if building\nleaderboard models(n) it will apply to each experiment separately(i.e\ntotal allowed runtime will be n*24hrs. This time estimate assumes\nrunning each experiment one at a time, sequentially)", + "prompt_type": "plain" + }, + { + "output": "max_runtime_minutes_until_abort``\n\nMax Runtime in Minutes Before Triggering the Abort Button\n\nSpecify the maximum runtime in minutes for an experiment before\ntriggering the abort button. This option preserves experiment artifacts\nthat have been generated for the summary and log zip files while\ncontinuing to generate additional artifacts. This value defaults to\n10080 mins (7 days).\n\nNote that this setting applies to per experiment so if building\nleaderboard models( say n), it will apply to each experiment\nseparately(i.e total allowed runtime will be n*7days. This time estimate\nassumes running each experiment one at a time, sequentially). Also see\ntime_abort .", + "prompt_type": "plain" + }, + { + "output": "pipeline-building-recipe---------------------------- .. container:: dropdown **Pipeline Building Recipe** Specify the Pipeline Building recipe type (overrides GUI settings). Select from the following: - **Auto**: Specifies that all models and features are automatically determined by experiment settings, config.toml settings, and the feature engineering effort. (Default) - **Compliant**: Similar to **Auto** except for the following: - Interpretability is set to 10. - Only uses GLM or booster as 'giblinear'. - :ref:`Fixed ensemble level ` is set to 0. - :ref:`Feature brain level ` is set to 0. - Max feature interaction depth is set to 1 i.e no interactions. - Target transformers is set to 'identity' for regression. - Does not use :ref:`distribution shift ` detection. - :ref:`monotonicity_constraints_correlation_threshold ` is set to 0.", + "prompt_type": "plain" + }, + { + "output": "- Drops features that are not correlated with target by at least 0.01. See :ref:`monotonicity-constraints-drop-low-correlation-features ` and :ref:`monotonicity-constraints-correlation-threshold `. - Does not build an ensemble model i.e setfixed_ensemble_level=0- No :ref:`feature brain ` is used to ensure every restart is identical. - :ref:`Interaction depth ` is set to 1 i.e no multi-feature interactions done to avoid complexity. - No target transformations applied for regression problems i.e sets :ref:`target_transformer ` to 'identity'. The equivalent config.toml parameter isrecipe=['monotonic_gbm']. - :ref:`num_as_cat ` feature transformation is disabled. - List of included_transformers | 'OriginalTransformer', #numeric (no clustering, no interactions, no num->cat) | 'CatOriginalTransformer', 'RawTransformer','CVTargetEncodeTransformer', 'FrequentTransformer','WeightOfEvidenceTransformer','OneHotEncodingTransformer', #categorical (but no num-cat) | 'CatTransformer','StringConcatTransformer', # big data only | 'DateOriginalTransformer', 'DateTimeOriginalTransformer', 'DatesTransformer', 'DateTimeDiffTransformer', 'IsHolidayTransformer', 'LagsTransformer', 'EwmaLagsTransformer', 'LagsInteractionTransformer', 'LagsAggregatesTransformer',#dates/time | 'TextOriginalTransformer', 'TextTransformer', 'StrFeatureTransformer', 'TextCNNTransformer', 'TextBiGRUTransformer', 'TextCharCNNTransformer', 'BERTTransformer',#text | 'ImageOriginalTransformer', 'ImageVectorizerTransformer'] #image For reference also see :ref:`Monotonicity Constraints in Driverless AI `.", + "prompt_type": "plain" + }, + { + "output": "- The test set is concatenated with the train set, with the target marked as missing - Transformers that do not use the target are allowed tofit_transform`` across the entirety of the train,\n validation, and test sets. - Has several config.toml expert options\n open-up limits. - nlp_model: Only enable NLP BERT models based on PyTorch to process\n pure text. To avoid slowdown when using this recipe, enabling one or\n more GPUs is strongly recommended. For more information, see\n nlp-in-dai. - included_models = ['TextBERTModel', 'TextMultilingualBERTModel',\n 'TextXLNETModel', 'TextXLMModel','TextRoBERTaModel',\n 'TextDistilBERTModel', 'TextALBERTModel', 'TextCamemBERTModel',\n 'TextXLMRobertaModel'] - enable_pytorch_nlp_transformer = 'off' -\n enable_pytorch_nlp_model = 'on'\n - nlp_transformer: Only enable PyTorch based BERT transformers that\n process pure text. To avoid slowdown when using this recipe,\n enabling one or more GPUs is strongly recommended.", + "prompt_type": "plain" + }, + { + "output": "- included_transformers = ['BERTTransformer']\n - excluded_models = ['TextBERTModel', 'TextMultilingualBERTModel',\n 'TextXLNETModel', 'TextXLMModel','TextRoBERTaModel',\n 'TextDistilBERTModel', 'TextALBERTModel', 'TextCamemBERTModel',\n 'TextXLMRobertaModel'] - enable_pytorch_nlp_transformer = 'on' -\n enable_pytorch_nlp_model = 'off'\n - image_model: Only enable image models that process pure images\n (ImageAutoModel). To avoid slowdown when using this recipe, enabling\n one or more GPUs is strongly recommended. For more information, see\n image-model. Notes:\n - This option disables the Genetic Algorithm (GA). - Image insights are only available when this option is selected. - image_transformer: Only enable the ImageVectorizer transformer,\n which processes pure images. For more information, see\n image-embeddings. - unsupervised: Only enable unsupervised transformers, models and\n scorers. See for reference. - gpus_max: Maximize use of GPUs (e.g.", + "prompt_type": "plain" + }, + { + "output": "enable_genetic_algorithm---------------------------- .. container:: dropdown **Enable Genetic Algorithm for Selection and Tuning of Features and Models** Specify whether to enable :ref:`genetic algorithm ` for selection and hyper-parameter tuning of features and models: - **auto**: Default value is 'auto'. This is same as 'on' unless it is a pure NLP or Image experiment. - **on**: Driverless AI genetic algorithm is used for feature engineering and model tuning and selection. - **Optuna**: When 'Optuna' is selected, model hyperparameters are tuned with :ref:`Optuna ` and Driverless AI genetic algorithm is used for feature engineering. In the Optuna case, the scores shown in the iteration panel are the best score and trial scores. Optuna mode currently only uses Optuna for XGBoost, LightGBM, and CatBoost (custom recipe). If Pruner is enabled, as is default, Optuna mode disables mutations of evaluation metric (eval_metric) so pruning uses same metric across trials to compare.", + "prompt_type": "plain" + }, + { + "output": "tournament_style``\nTournament Model for Genetic Algorithm\nSelect a method to decide which models are best at each iteration. This\nis set to Auto by default. Choose from the following:\n- auto: Choose based upon accuracy and interpretability\n- uniform: all individuals in population compete to win as best (can\n lead to all, e.g. LightGBM models in final ensemble, which may not\n improve ensemble performance due to lack of diversity)\n- fullstack: Choose from optimal model and feature types\n- feature: individuals with similar feature types compete (good if\n target encoding, frequency encoding, and other feature sets lead to\n good results)\n- model: individuals with same model type compete (good if multiple\n models do well but some models that do not do as well still\n contribute to improving ensemble)\nFor each case, a round robin approach is used to choose best scores\namong type of models to choose from. If enable_genetic_algorithm=='Optuna', then every individual is\nself-mutated without any tournament during the genetic algorithm .", + "prompt_type": "plain" + }, + { + "output": "make_python_scoring_pipeline``\n\nMake Python Scoring Pipeline\n\nSpecify whether to automatically build a Python Scoring Pipeline for the\nexperiment. Select On or Auto (default) to make the Python Scoring\nPipeline immediately available for download when the experiment is\nfinished. Select Off to disable the automatic creation of the Python\nScoring Pipeline.", + "prompt_type": "plain" + }, + { + "output": "make_mojo_scoring_pipeline``\n\nMake MOJO Scoring Pipeline\n\nSpecify whether to automatically build a MOJO (Java) Scoring Pipeline\nfor the experiment. Select On to make the MOJO Scoring Pipeline\nimmediately available for download when the experiment is finished. With\nthis option, any capabilities that prevent the creation of the pipeline\nare dropped. Select Off to disable the automatic creation of the MOJO\nScoring Pipeline. Select Auto (default) to attempt to create the MOJO\nScoring Pipeline without dropping any capabilities.", + "prompt_type": "plain" + }, + { + "output": "mojo_for_predictions------------------------ .. container:: dropdown **Allow Use of MOJO for Making Predictions** Specify whether to use MOJO for making fast, low-latency predictions after the experiment has finished. When this is set to **Auto** (default), the MOJO is only used if the number of rows is equal to or below the value specified bymojo_for_predictions_max_rows``.", + "prompt_type": "plain" + }, + { + "output": "reduce_mojo_size-------------------- .. container:: dropdown **Attempt to Reduce the Size of the MOJO (Small MOJO)** Specify whether to attempt to create a small MOJO scoring pipeline when the experiment is being built. A smaller MOJO leads to less memory footprint during scoring. This setting attempts to reduce the mojo size by limiting experiment's maximum :ref:`interaction depth ` to **3**, setting :ref:`ensemble level ` to **0** i.e no ensemble model for final pipeline and limiting the :ref:`maximum number of features ` in the model to **200**. Note that these settings in some cases can affect the overall model's predictive accuracy as it is limiting the complexity of the feature engineering and model building space. This is disabled by default. The equivalent config.toml setting isreduce_mojo_size``", + "prompt_type": "plain" + }, + { + "output": "make_pipeline_visualization``\n\nMake Pipeline Visualization\n\nSpecify whether to create a visualization of the scoring pipeline at the\nend of an experiment. This is set to Auto by default. Note that the\nVisualize Scoring Pipeline feature is experimental and is not available\nfor deprecated models. Visualizations are available for all newly\ncreated experiments.", + "prompt_type": "plain" + }, + { + "output": "benchmark_mojo_latency``\n\nMeasure MOJO Scoring Latency\n\nSpecify whether to measure the MOJO scoring latency at the time of MOJO\ncreation. This is set to Auto by default. In this case, MOJO scoring\nlatency will be measured if the pipeline.mojo file size is less than 100\nMB.", + "prompt_type": "plain" + }, + { + "output": "mojo_building_timeout``\n\nTimeout in Seconds to Wait for MOJO Creation at End of Experiment\n\nSpecify the amount of time in seconds to wait for MOJO creation at the\nend of an experiment. If the MOJO creation process times out, a MOJO can\nstill be made from the GUI or the R and Python clients (the timeout\nconstraint is not applied to these). This value defaults to 1800 sec (30\nminutes).", + "prompt_type": "plain" + }, + { + "output": "mojo_building_parallelism``\n\nNumber of Parallel Workers to Use During MOJO Creation\n\nSpecify the number of parallel workers to use during MOJO creation.\nHigher values can speed up MOJO creation but use more memory. Set this\nvalue to -1 (default) to use all physical cores.", + "prompt_type": "plain" + }, + { + "output": "kaggle_username``\n\nKaggle Username\n\nOptionally specify your Kaggle username to enable automatic submission\nand scoring of test set predictions. If this option is specified, then\nyou must also specify a value for the Kaggle Key option. If you don't\nhave a Kaggle account, you can sign up at https://www.kaggle.com.", + "prompt_type": "plain" + }, + { + "output": "kaggle_key``\n\nKaggle Key\n\nSpecify your Kaggle API key to enable automatic submission and scoring\nof test set predictions. If this option is specified, then you must also\nspecify a value for the Kaggle Username option. For more information on\nobtaining Kaggle API credentials, see\nhttps://github.com/Kaggle/kaggle-api#api-credentials.", + "prompt_type": "plain" + }, + { + "output": "kaggle_timeout``\n\nKaggle Submission Timeout in Seconds\n\nSpecify the Kaggle submission timeout in seconds. This value defaults to\n120 sec.", + "prompt_type": "plain" + }, + { + "output": "min_num_rows``\n\nMin Number of Rows Needed to Run an Experiment\n\nSpecify the minimum number of rows that a dataset must contain in order\nto run an experiment. This value defaults to 100.", + "prompt_type": "plain" + }, + { + "output": "reproducibility_level``\n\nReproducibility Level\n\nSpecify one of the following levels of reproducibility. Note that this\nsetting is only used when the reproducible option is enabled in the\nexperiment:\n\n- 1 = Same experiment results for same O/S, same CPU(s), and same\n GPU(s) (Default)\n- 2 = Same experiment results for same O/S, same CPU architecture, and\n same GPU architecture\n- 3 = Same experiment results for same O/S, same CPU architecture\n (excludes GPUs)\n- 4 = Same experiment results for same O/S (best approximation)\n\nThis value defaults to 1.", + "prompt_type": "plain" + }, + { + "output": "seed``\n\nRandom Seed\n\nSpecify a random seed for the experiment. When a seed is defined and the\nreproducible button is enabled (not by default), the algorithm will\nbehave deterministically.", + "prompt_type": "plain" + }, + { + "output": "allow_different_classes_across_fold_splits``\n\nAllow Different Sets of Classes Across All Train/Validation Fold Splits\n\n(Note: Applicable for multiclass problems only.) Specify whether to\nenable full cross-validation (multiple folds) during feature evolution\nas opposed to a single holdout split. This is enabled by default.", + "prompt_type": "plain" + }, + { + "output": "save_validation_splits``\n\nStore Internal Validation Split Row Indices\n\nSpecify whether to store internal validation split row indices. This\nincludes pickles of (train_idx, valid_idx) tuples (numpy row indices for\noriginal training data) for all internal validation folds in the\nexperiment summary ZIP file. Enable this setting for debugging purposes.\nThis setting is disabled by default.", + "prompt_type": "plain" + }, + { + "output": "max_num_classes``\n\nMax Number of Classes for Classification Problems\n\nSpecify the maximum number of classes to allow for a classification\nproblem. A higher number of classes may make certain processes more\ntime-consuming. Memory requirements also increase with a higher number\nof classes. This value defaults to 200.", + "prompt_type": "plain" + }, + { + "output": "max_num_classes_compute_roc------------------------------- .. container:: dropdown **Max Number of Classes to Compute ROC and Confusion Matrix for Classification Problems** Specify the maximum number of classes to use when computing the ROC and CM. When this value is exceeded, the reduction type specified byroc_reduce_type`` is applied. This value defaults to 200 and cannot\n\n be lower than 2.", + "prompt_type": "plain" + }, + { + "output": "max_num_classes_client_and_gui---------------------------------- .. container:: dropdown **Max Number of Classes to Show in GUI for Confusion Matrix** Specify the maximum number of classes to show in the GUI for CM, showing firstmax_num_classes_client_and_gui`` labels. This value\n\n defaults to 10, but any value beyond 6 will result in visually\n truncated diagnostics. Note that if this value is changed in the\n config.toml and the server is restarted, then this setting will only\n modify client-GUI launched diagnostics. To control experiment plots,\n this value must be changed in the expert settings panel.", + "prompt_type": "plain" + }, + { + "output": "roc_reduce_type------------------- .. container:: dropdown **ROC/CM Reduction Technique for Large Class Counts** Specify the ROC confusion matrix reduction technique used for large class counts: - **Rows** (Default): Reduce by randomly sampling rows - **Classes**: Reduce by truncating classes to no more than the value specified bymax_num_classes_compute_roc``", + "prompt_type": "plain" + }, + { + "output": "max_rows_cm_ga``\n\nMaximum Number of Rows to Obtain Confusion Matrix Related Plots During\nFeature Evolution\n\nSpecify the maximum number of rows to obtain confusion matrix related\nplots during feature evolution. Note that this doesn't limit final model\ncalculation.", + "prompt_type": "plain" + }, + { + "output": "use_feature_brain_new_experiments``\n\nWhether to Use Feature Brain for New Experiments\n\nSpecify whether to use feature_brain results even if running new\nexperiments. Feature brain can be risky with some types of changes to\nexperiment setup. Even rescoring may be insufficient, so by default this\nis False. For example, one experiment may have training=external\nvalidation by accident, and get high score, and while\nfeature_brain_reset_score='on' means we will rescore, it will have\nalready seen during training the external validation and leak that data\nas part of what it learned from. If this is False, feature_brain_level\njust sets possible models to use and logs/notifies, but does not use\nthese feature brain cached models.", + "prompt_type": "plain" + }, + { + "output": "feature_brain_level``\nModel/Feature Brain Level\nSpecify whether to use H2O.ai brain, which enables local caching and\nsmart re-use (checkpointing) of prior experiments to generate useful\nfeatures and models for new experiments. It can also be used to control\ncheckpointing for experiments that have been paused or interrupted. When enabled, this will use the H2O.ai brain cache if the cache file:\n - has any matching column names and types for a similar experiment\n type\n - has classes that match exactly\n - has class labels that match exactly\n - has basic time series choices that match\n - the interpretability of the cache is equal or lower\n - the main model (booster) is allowed by the new experiment\n- -1: Don't use any brain cache (default)\n- 0: Don't use any brain cache but still write to cache. Use case:\n Want to save the model for later use, but we want the current model\n to be built without any brain models. - 1: Smart checkpoint from the latest best individual model.", + "prompt_type": "plain" + }, + { + "output": "The match may not be precise,\n so use with caution. - 2: Smart checkpoint if the experiment matches all column names,\n column types, classes, class labels, and time series options\n identically. Use case: Driverless AI scans through the H2O.ai brain\n cache for the best models to restart from. - 3: Smart checkpoint like level #1 but for the entire population. Tune only if the brain population is of insufficient size. Note that\n this will re-score the entire population in a single iteration, so\n it appears to take longer to complete first iteration. - 4: Smart checkpoint like level #2 but for the entire population. Tune only if the brain population is of insufficient size. Note that\n this will re-score the entire population in a single iteration, so\n it appears to take longer to complete first iteration. - 5: Smart checkpoint like level #4 but will scan over the entire\n brain cache of populations to get the best scored individuals. Note\n that this can be slower due to brain cache scanning if the cache is\n large.", + "prompt_type": "plain" + }, + { + "output": "feature_brain2``\nFeature Brain Save Every Which Iteration\nSave feature brain iterations every iter_num %\nfeature_brain_iterations_save_every_iteration == 0, to be able to\nrestart/refit with which_iteration_brain >= 0. This is disabled (0) by\ndefault. - -1: Don't use any brain cache. - 0: Don't use any brain cache but still write to cache. - 1: Smart checkpoint if an old experiment_id is passed in (for\n example, via running \"resume one like this\" in the GUI). - 2: Smart checkpoint if the experiment matches all column names,\n column types, classes, class labels, and time series options\n identically. (default)\n- 3: Smart checkpoint like level #1 but for the entire population. Tune only if the brain population is of insufficient size. - 4: Smart checkpoint like level #2 but for the entire population. Tune only if the brain population is of insufficient size. - 5: Smart checkpoint like level #4 but will scan over the entire\n brain cache of populations (starting from resumed experiment if\n chosen) in order to get the best scored individuals.", + "prompt_type": "plain" + }, + { + "output": "feature_brain3``\n\nFeature Brain Restart from Which Iteration\n\nWhen performing restart or re-fit of type feature_brain_level with a\nresumed ID, specify which iteration to start from instead of only last\nbest. Available options include:\n\n- -1: Use the last best\n- 1: Run one experiment with\n feature_brain_iterations_save_every_iteration=1 or some other number\n- 2: Identify which iteration brain dump you wants to restart/refit\n from\n- 3: Restart/Refit from the original experiment, setting\n which_iteration_brain to that number here in expert settings.\n\nNote: If restarting from a tuning iteration, this will pull in the\nentire scored tuning population and use that for feature evolution. This\nvalue defaults to -1.", + "prompt_type": "plain" + }, + { + "output": "feature_brain4``\n\nFeature Brain Refit Uses Same Best Individual\n\nSpecify whether to use the same best individual when performing a refit.\nDisabling this setting allows the order of best individuals to be\nrearranged, leading to a better final result. Enabling this setting lets\nyou view the exact same model or feature with only one new feature\nadded. This is disabled by default.", + "prompt_type": "plain" + }, + { + "output": "feature_brain5``\n\nFeature Brain Adds Features with New Columns Even During Retraining of\nFinal Model\n\nSpecify whether to add additional features from new columns to the\npipeline, even when performing a retrain of the final model. Use this\noption if you want to keep the same pipeline regardless of new columns\nfrom a new dataset. New data may lead to new dropped features due to\nshift or leak detection. Disable this to avoid adding any columns as new\nfeatures so that the pipeline is perfectly preserved when changing data.\nThis is enabled by default.", + "prompt_type": "plain" + }, + { + "output": "force_model_restart_to_defaults``\n\nRestart-Refit Use Default Model Settings If Model Switches\n\nWhen restarting or refitting, specify whether to use the model class's\ndefault settings if the original model class is no longer available. If\nthis is disabled, the original hyperparameters will be used instead.\n(Note that this may result in errors.) This is enabled by default.", + "prompt_type": "plain" + }, + { + "output": "min_dai_iterations``\n\nMin DAI Iterations\n\nSpecify the minimum number of Driverless AI iterations for an\nexperiment. This can be used during restarting, when you want to\ncontinue for longer despite a score not improving. This value defaults\nto 0.", + "prompt_type": "plain" + }, + { + "output": "target_transformer---------------------- .. container:: dropdown **Select Target Transformation of the Target for Regression Problems** Specify whether to automatically select target transformation for regression problems. Available options include: - auto - identity - identity_noclip - center - standardize - unit_box - log - log_noclip - square - sqrt - double_sqrt - inverse - logit - sigmoid If set to **auto** (default), Driverless AI will automatically pick the best target transformer if the **Accuracy** is set to the value of thetune_target_transform_accuracy_switchconfiguration option (defaults to 5) or larger. Selecting **identity_noclip** automatically turns off any target transformations. All transformers except for **center**, **standardize**, **identity_noclip** and **log_noclip** perform clipping to constrain the predictions to the domain of the target in the training data, so avoid them if you want to enable extrapolations.", + "prompt_type": "plain" + }, + { + "output": "fixed_num_folds_evolution``\n\nNumber of Cross-Validation Folds for Feature Evolution\n\nSpecify the fixed number of cross-validation folds (if >= 2) for feature\nevolution. Note that the actual number of allowed folds can be less than\nthe specified value, and that the number of allowed folds is determined\nat the time an experiment is run. This value defaults to -1 (auto).", + "prompt_type": "plain" + }, + { + "output": "fixed_num_folds``\n\nNumber of Cross-Validation Folds for Final Model\n\nSpecify the fixed number of cross-validation folds (if >= 2) for the\nfinal model. Note that the actual number of allowed folds can be less\nthan the specified value, and that the number of allowed folds is\ndetermined at the time an experiment is run. This value defaults to -1\n(auto).", + "prompt_type": "plain" + }, + { + "output": "fixed_only_first_fold_model``\n\nForce Only First Fold for Models\n\nSpecify whether to force only the first fold for models. Select from\nAuto (Default), On, or Off. Set \"on\" to force only first fold for\nmodels.This is useful for quick runs regardless of data", + "prompt_type": "plain" + }, + { + "output": "feature_evolution_data_size``\n\nMax Number of Rows Times Number of Columns for Feature Evolution Data\nSplits\n\nSpecify the maximum number of rows allowed for feature evolution data\nsplits (not for the final pipeline). This value defaults to 100,000,000.", + "prompt_type": "plain" + }, + { + "output": "final_pipeline_data_size``\n\nMax Number of Rows Times Number of Columns for Reducing Training Dataset\n\nSpecify the upper limit on the number of rows times the number of\ncolumns for training the final pipeline. This value defaults to\n500,000,000.", + "prompt_type": "plain" + }, + { + "output": "max_validation_to_training_size_ratio_for_final_ensemble``\n\nMaximum Size of Validation Data Relative to Training Data\n\nSpecify the maximum size of the validation data relative to the training\ndata. Smaller values can make the final pipeline model training process\nquicker. Note that final model predictions and scores will always be\nprovided on the full dataset provided. This value defaults to 2.0.", + "prompt_type": "plain" + }, + { + "output": "force_stratified_splits_for_imbalanced_threshold_binary``\n\nPerform Stratified Sampling for Binary Classification If the Target Is\nMore Imbalanced Than This\n\nFor binary classification experiments, specify a threshold ratio of\nminority to majority class for the target column beyond which stratified\nsampling is performed. If the threshold is not exceeded, random sampling\nis performed. This value defaults to 0.01. You can choose to always\nperform random sampling by setting this value to 0, or to always perform\nstratified sampling by setting this value to 1.", + "prompt_type": "plain" + }, + { + "output": "last_recipe``\n\nlast_recipe\n\nInternal helper to allow memory of if changed recipe", + "prompt_type": "plain" + }, + { + "output": "feature_brain_save_every_iteration``\n\nFeature Brain Save every which iteration\n\nSpecify whether to save feature brain iterations every iter_num %\nfeature_brain_iterations_save_every_iteration == 0, to be able to\nrestart/refit with which_iteration_brain >= 0. Set to 0 to disable this\nsetting.", + "prompt_type": "plain" + }, + { + "output": "which_iteration_brain``\n\nFeature Brain Restart from which iteration\n\nWhen performing restart or re-fit type feature_brain_level with\nresumed_experiment_id, choose which iteration to start from, instead of\nonly last best -1 means just use last best.\n\nUsage:\n\n - 1) Run one experiment with\n feature_brain_iterations_save_every_iteration=1 or some other\n number\n\n - 2) Identify which iteration brain dump one wants to restart/refit\n from\n\n - 3) Restart/Refit from original experiment, setting\n which_iteration_brain to that number in expert settings\n\nNote: If restart from a tuning iteration, this will pull in entire\nscored tuning population and use that for feature evolution.", + "prompt_type": "plain" + }, + { + "output": "refit_same_best_individual``\n\nFeature Brain refit uses same best individual\n\nWhen doing re-fit from feature brain, if change columns or features,\npopulation of individuals used to refit from may change order of which\nwas best, leading to better result chosen (False case). But sometimes\nyou want to see exact same model/features with only one feature added,\nand then would need to set this to True case. That is, if refit with\njust 1 extra column and have interpretability=1, then final model will\nbe same features, with one more engineered feature applied to that new\noriginal feature.", + "prompt_type": "plain" + }, + { + "output": "brain_add_features_for_new_columns``\n\nFeature Brain adds features with new columns even during retraining\nfinal model\n\nWhether to take any new columns and add additional features to pipeline,\neven if doing retrain final model. In some cases, one might have a new\ndataset but only want to keep same pipeline regardless of new columns,\nin which case one sets this to False. For example, new data might lead\nto new dropped features, due to shift or leak detection. To avoid change\nof feature set, one can disable all dropping of columns, but set this to\nFalse to avoid adding any columns as new features, so pipeline is\nperfectly preserved when changing data.", + "prompt_type": "plain" + }, + { + "output": "force_model_restart_to_defaults``\n\nRestart-refit use default model settings if model switches\n\nIf restart/refit and no longer have the original model class available,\nbe conservative and go back to defaults for that model class. If False,\nthen try to keep original hyperparameters, which can fail to work in\ngeneral.", + "prompt_type": "plain" + }, + { + "output": "dump_modelparams_every_scored_indiv``\n\nEnable detailed scored model info\n\nWhether to dump every scored individual's model parameters to\ncsv/tabulated/json file produces files. For example:\nindividual_scored.params.[txt, csv, json]", + "prompt_type": "plain" + }, + { + "output": "fast_approx_num_trees------------------------- .. container:: dropdown **Max number of trees to use for fast approximation** Whenfast_approx=True, specify the maximum number of trees to use. By default, this value is 250. .. note:: By default,fast_approx`` is enabled for MLI and AutoDoc and\n\n disabled for Experiment predictions.", + "prompt_type": "plain" + }, + { + "output": "fast_approx_do_one_fold--------------------------- .. container:: dropdown **Whether to use only one fold for fast approximation** Whenfast_approx=True, specify whether to speed up fast approximation further by using only one fold out of all cross-validation folds. By default, this setting is enabled. .. note:: By default,fast_approx`` is enabled for MLI and AutoDoc and\n\n disabled for Experiment predictions.", + "prompt_type": "plain" + }, + { + "output": "fast_approx_do_one_model---------------------------- .. container:: dropdown **Whether to use only one model for fast approximation** Whenfast_approx=True, specify whether to speed up fast approximation further by using only one model out of all ensemble models. By default, this setting is disabled. .. note:: By default,fast_approx`` is enabled for MLI and AutoDoc and\n\n disabled for Experiment predictions.", + "prompt_type": "plain" + }, + { + "output": "fast_approx_contribs_num_trees---------------------------------- .. container:: dropdown **Maximum number of trees to use for fast approximation when making Shapley predictions** Whenfast_approx_contribs=True, specify the maximum number of trees to use for 'Fast Approximation' in GUI when making Shapley predictions and for AutoDoc/MLI. By default, this value is 50. .. note:: By default,fast_approx_contribs`` is enabled for MLI and\n\n AutoDoc.", + "prompt_type": "plain" + }, + { + "output": "fast_approx_contribs_do_one_fold------------------------------------ .. container:: dropdown **Whether to use only one fold for fast approximation when making Shapley predictions** Whenfast_approx_contribs=True, specify whether to speed upfast_approx_contribsfurther by using only one fold out of all cross-validation folds for 'Fast Approximation' in GUI when making Shapley predictions and for AutoDoc/MLI. By default, this setting is enabled. .. note:: By default,fast_approx_contribs`` is enabled for MLI and\n\n AutoDoc.", + "prompt_type": "plain" + }, + { + "output": "fast_approx_contribs_do_one_model------------------------------------- .. container:: dropdown **Whether to use only one model for fast approximation when making Shapley predictions** Whenfast_approx_contribs=True, specify whether to speed upfast_approx_contribsfurther by using only one model out of all ensemble models for 'Fast Approximation' in GUI when making Shapley predictions and for AutoDoc/MLI. By default, this setting is enabled. .. note:: By default,fast_approx_contribs`` is enabled for MLI and\n\n AutoDoc.", + "prompt_type": "plain" + }, + { + "output": "autoviz_recommended_transformation``\n\nAutoviz Recommended Transformations\n\nKey-value pairs of column names and transformations that\nAutoviz recommended. Also see\nAutoviz Recommendation Transformer\n.", + "prompt_type": "plain" + }, + { + "output": "Appendix A: Third-Party Integrations\nH2O Driverless AI integrates with a (continuously growing) number of\nthird-party products. Please contact sales@h2o.ai to schedule a\ndiscussion with one of our Solution Engineers for more information. If you are interested in a product not yet listed here, please ask us\nabout it! Instance Life-Cycle Management\nThe following products are able to manage (start and stop) Driverless AI\ninstances themselves:\n ---------------------------------------------------------------------\n Name Notes\n ------------------------- -------------------------------------------\n BlueData DAI runs in a BlueData container\n Domino DAI runs in a Domino container\n IBM Spectrum Conductor DAI runs in user mode via TAR SH\n distribution\n IBM Cloud Private (ICP) Uses Kubernetes underneath; DAI runs in a\n docker container; requires HELM chart\n Kubernetes DAI runs in as a long running service via\n Docker container\n Kubeflow Abstraction of Kubernetes; allows\n additional monitoring and management of\n Kubernetes deployments.", + "prompt_type": "plain" + }, + { + "output": "Puddle (from H2O.ai) Multi-tenant orchestration platform for DAI\n instances (not a third party, but listed\n here for completeness)\n SageMaker Bring your own algorithm docker container\n ---------------------------------------------------------------------\nAPI Clients\nThe following products have Driverless AI client API integrations:\n ---------------------------------------------------------------------\n Name Notes\n ---------------- ----------------------------------------------------\n Alteryx Lets users interact with a remote DAI server from\n Alteryx Designer\n Cinchy Data collaboration for the Enterprise, use MOJOs to\n enrich data and use Cinchy data network to train\n models\n Jupyter/Python DAI Python API client library can be downloaded from\n the Web UI of a running instance\n KDB Use KDB as a data source in Driverless AI for\n training\n RStudio/R DAI R API client library can be downloaded from the\n Web UI of a running instance.", + "prompt_type": "plain" + }, + { + "output": "Appendix C: Installed Components and Dependencies\nH2O Driverless AI is an artificial intelligence (AI) platform that\nautomates some of the most difficult data science and machine learning\nworkflows such as feature engineering, model validation, model tuning,\nmodel selection and model deployment. It aims to achieve highest\npredictive accuracy, comparable to expert data scientists, but in much\nshorter time thanks to end-to-end automation. Driverless AI also offers\nautomatic visualizations and machine learning interpretability (MLI). Especially in regulated industries, model transparency and explanation\nare just as important as predictive performance. This section describes components that included with the Driverless AI\nDocker image and information on additional Driverless AI dependencies. Installed Components\nh2oaicore--cp38-cp38-linux_x86_64.whl\nH2O-3: H2O is an open source, in-memory, distributed, fast, and scalable\nmachine learning and predictive analytics platform that allows you to\nbuild machine learning models on big data and provides easy\nproductionalization of those models in an enterprise environment.", + "prompt_type": "plain" + }, + { + "output": "It provides a high-performance version of base R's\u00a0data.frame\u00a0with\nsyntax and feature enhancements for ease of use, convenience, and\nprogramming speed. h2o4gpu-0.2.0+master.b1ef476-cp38-cp38-linux_x86_64.whl: H2O4GPU\u00a0is a\ncollection of GPU solvers provided by\u00a0H2Oai\u00a0with APIs in Python and R.\nThe Python API builds upon the easy-to-use\u00a0scikit-learn\u00a0API and its\nwell-tested CPU-based algorithms. It can be used as a drop-in\nreplacement for scikit-learn (i.e. import h2o4gpu as sklearn) with\nsupport for GPUs on selected (and ever-growing) algorithms. H2O4GPU\ninherits all the existing scikit-learn algorithms and falls back to CPU\nalgorithms when the GPU algorithm does not support an important existing\nscikit-learn class option. The R package is a wrapper around the H2O4GPU\nPython package, and the interface follows standard R conventions for\nmodeling. The DAAL library added for CPU is currently only supported on\nx86_64 architecture. Python and Other Dependencies for Driverless AI\nPython 3.6: Python is a programming language that lets you work more\nquickly and integrate your systems more effectively.", + "prompt_type": "plain" + }, + { + "output": "pycrypto 2.6.1: The Python Cryptography Toolkit (pycrypto) is a\ncollection of both secure hash functions (such as SHA256 and RIPEMD160)\nand various encryption algorithms (AES, DES, RSA, ElGamal, etc.). The\npackage is structured to make adding new modules easy. This section is\nessentially complete, and the software interface will almost certainly\nnot change in an incompatible way in the future; all that remains to be\ndone is to fix any bugs that show up. If you encounter a bug, please\nreport it in the Launchpad bug tracker. filelock 2.0.13: This package contains a single module that implements a\nplatform-independent file lock in Python, which provides a simple method\nof inter-process communication. numpy 1.14.0 NumPy is the fundamental package for scientific computing\nwith Python. It contains among other components:\n - A powerful N-dimensional array object\n - Sophisticated (broadcasting) functions\n - Tools for integrating C/C++ and Fortran code\n - Useful linear algebra, Fourier transform, and random number\n capabilities\n Besides its obvious scientific uses, NumPy can also be used as an\n efficient multi-dimensional container of generic data.", + "prompt_type": "plain" + }, + { + "output": "This allows NumPy to seamlessly and\n speedily integrate with a wide variety of databases. NumPy is licensed\n under the\u00a0BSD license, enabling reuse with few restrictions. pandas 0.22.0: The Python Data Analysis Library, pandas\u00a0is an open\nsource, BSD-licensed library providing high-performance, easy-to-use\ndata structures and data analysis tools for the\u00a0Python\u00a0programming\nlanguage. requests 2.13.0: Requests\u00a0allows you to send\u00a0organic, grass-fed\u00a0HTTP/1.1\nrequests without the need for manual labor. There's no need to manually\nadd query strings to your URLs or to form-encode your POST data. Keep-alive and HTTP connection pooling are 100% automatic, thanks\nto\u00a0urllib3. scikit-learn 0.19.1: Simple and efficient tools for data mining and data\nanalysis, accessible to everybody, and reusable in various contexts. scikit-learn is built on NumPy, SciPy, and matplotlib open source,\ncommercially usable BSD license. scipy 1.0.0: SciPy (pronounced \u201cSigh Pie\u201d) is a Python-based ecosystem\nof open-source software for mathematics, science, and engineering.", + "prompt_type": "plain" + }, + { + "output": "Changing\nthe title is mostly useful in multi-process systems, for example when a\nmaster process is forked: changing the children\u2019s title allows to\nidentify the task each process is busy with. The technique is used\nby\u00a0PostgreSQL\u00a0and the\u00a0OpenSSH Server\u00a0for example. statsmodels 0.8.0: statsmodels\u00a0is a Python module that provides classes\nand functions for the estimation of many different statistical models,\nas well as for conducting statistical tests, and statistical data\nexploration. An extensive list of result statistics are available for\neach estimator. The results are tested against existing statistical\npackages to ensure that they are correct. The package is released under\nthe open source Modified BSD (3-clause) license. toml 0.9.3.1: This is a Python library for parsing and creating\u00a0TOML. The module passes\u00a0the TOML test suite\u00a0which is a fork of\u00a0BurntSushi\u2019s\nTOML test suite. TOML\u00a0is a\u00a0configuration file\u00a0format that is easy to\nread due to obvious semantics and aims to be \"minimal\".", + "prompt_type": "plain" + }, + { + "output": "clang: Python bindings for clang from clang release branches\nclang+llvm-4.0.0-x86_64-linux-gnu-ubuntu-16.04/ clang: The LLVM compiler\ninfrastructure supports a wide range of projects, from industrial\nstrength compilers to specialized JIT applications to small research\nprojects. apt-get: This\u00a0is a tool to automatically update your Debian machine and\nget and install debian packages/programs. This tool is a part of\nthe\u00a0DebianPackageManagement\u00a0system. curl: PycURL is a Python interface to\u00a0libcurl, the multiprotocol file\ntransfer library. Similar to the\u00a0urllib\u00a0Python module, PycURL can be\nused to fetch objects identified by a URL from a Python program. Beyond\nsimple fetches however PycURL exposes most of the functionality of\nlibcurl. apt-utils: A package management related utility program. This package\ncontains some less used command line utilities related to package\nmanagement with APT. python-software-properties: This manages the repositories that you\ninstall software from (universe).", + "prompt_type": "plain" + }, + { + "output": "iputils-ping: The iputils package is set of small useful utilities for\nLinux networking. wget: GNU Wget is a\u00a0free software\u00a0package for retrieving files using\nHTTP, HTTPS, FTP and FTPS - the most widely-used Internet protocols. It\nis a non-interactive command line tool, so it can easily be called from\nscripts,\u00a0cron\u00a0jobs, terminals without X-Windows support, etc. cpio: GNU cpio copies files into or out of a cpio or tar archive. The\narchive can be another file on the disk, a magnetic tape, or a pipe. GNU\ncpio supports the following archive formats: binary, old ASCII, new\nASCII, crc, HPUX binary, HPUX old ASCII, old tar, and POSIX.1 tar. The\ntar format is provided for compatibility with the\u00a0tar\u00a0program. By\ndefault, cpio creates binary format archives, for compatibility with\nolder cpio programs. When extracting from archives, cpio automatically\nrecognizes which kind of archive it is reading and can read archives\ncreated on machines with a different byte-order. net-tools: A collection of programs that form the base set of the NET-3\nnetworking distribution for the Linux operating system.", + "prompt_type": "plain" + }, + { + "output": "git: Git is a\u00a0free and open source\u00a0distributed version control system\ndesigned to handle everything from small to very large projects with\nspeed and efficiency. zip: zip\u00a0is a compression and file packaging utility for Unix, VMS,\nMSDOS, OS/2, Windows 9x/NT/XP, Minix, Atari, Macintosh, Amiga, and Acorn\nRISC OS. It is analogous to a combination of the Unix commands\u00a0tar(1)\nand\u00a0compress(1) and is compatible with PKZIP (Phil Katz's ZIP for MSDOS\nsystems). dirmngr: Dirmngr is a server for managing and downloading certificate\nrevocation lists (CRLs) for X.509 certificates and for downloading the\ncertificates themselves. Dirmngr also handles OCSP requests as an\nalternative to CRLs. Dirmngr is either invoked internally by gpgsm (from\nGnuPG 2) or when running as a system daemon through\nthe\u00a0dirmngr-client\u00a0tool. curl -sL\u00a0https://deb.nodesource.com/setup_15.x\u00a0| bash - &&: This\nrepository contains the source of\nthe\u00a0NodeSource\u00a0Node.js\u00a0and\u00a0io.js\u00a0Binary Distributions setup and support\nscripts. nodejs: Node.js is a JavaScript runtime built on\u00a0Chrome's V8 JavaScript\nengine.", + "prompt_type": "plain" + }, + { + "output": "The node.js package ecosystem,\u00a0npm, is the\nlargest ecosystem of open source libraries in the world. build-essential: An informational list of build-essential packages. ccache: ccache is a compiler cache. It\u00a0speeds up recompilation\u00a0by\ncaching previous compilations and detecting when the same compilation is\nbeing done again. Supported languages are C, C++, Objective-C and\nObjective-C++. ccache is free software, released under the\u00a0GNU General\nPublic License version 3\u00a0or later. libopenblas-dev: Optimized BLAS (linear algebra) library (development\nfiles)\nPBZip2: PBZIP2 is a parallel implementation of the\u00a0bzip2\u00a0block-sorting\nfile compressor that uses pthreads and achieves near-linear speedup on\nSMP machines. The output of this version is fully compatible with bzip2\nv1.0.2 or newer\u00a0(ie: anything compressed with pbzip2 can be decompressed\nwith bzip2). PBZIP2 should work on any system that has a pthreads\ncompatible C++ compiler (such as gcc). It has been tested on: Linux,\nWindows (cygwin & MinGW), Solaris, Tru64/OSF1, HP-UX, OS/2, OSX, and\nIrix.", + "prompt_type": "plain" + }, + { + "output": "Python\u00a02.7.9 and later (on the\npython2 series), and Python\u00a03.4 and later include\u00a0pip\u00a0(pip3\nfor\u00a0Python\u00a03) by default. pip\u00a0is a recursive acronym that can stand for\neither \"Pip\u00a0Installs Packages\" or \"Pip\u00a0Installs\u00a0Python\". setuptools: Allows you to easily download, build, install, upgrade, and\nuninstall Python packages. tensorflow-gpu: An open source machine learning framework for numerical\ncomputation using data flow graphs. psutil: psutil (process and system utilities) is a cross-platform\nlibrary for retrieving information on\u00a0running processes\u00a0and\u00a0system\nutilization\u00a0(CPU, memory, disks, network, sensors) in Python. It is\nuseful mainly for\u00a0system monitoring,\u00a0profiling and limiting process\nresources\u00a0and\u00a0management of running processes. It implements many\nfunctionalities offered by UNIX command line tools such as: ps, top,\nlsof, netstat, ifconfig, who, df, kill, free, nice, ionice, iostat,\niotop, uptime, pidof, tty, taskset, pmap. jupyter: The\u00a0Jupyter\u00a0Notebook is an open-source web application that\nallows you to create and share documents that contain live code,\nequations, visualizations and narrative text.", + "prompt_type": "plain" + }, + { + "output": "Interpretation Expert Settings\n\nThe following is a list of the Interpretation expert settings that are\navailable when setting up a new interpretation from the\nMLI page . The name of each setting is preceded by its\nconfig.toml label. For info on explainer-specific expert\nsettings, see explainer-expert-settings.\n\n- interpretation-expert-settings-mli\n- interpretation-expert-settings-nlp\n- interpretation-expert-settings-surrogate\n\nMLI Tab", + "prompt_type": "plain" + }, + { + "output": "mli_sample~~~~~~~~~~~~~~ .. container:: dropdown **Sample All Explainers** Specify whether to perform the interpretation on a sample of the training data. By default, MLI will sample the training dataset if it is greater than 100k rows. (The equivalent config.toml setting ismli_sample_size``.) This is enabled by default. Turn this toggle\n\n off to run MLI on the entire dataset.", + "prompt_type": "plain" + }, + { + "output": "mli_enable_mojo_scorer``\n\nAllow Use of MOJO Scoring Pipeline\n\nUse this option to disable MOJO scoring pipeline. Scoring pipeline is\nchosen automatically (from MOJO and Python pipelines) by default. In\ncase of certain models, MOJO vs. Python choice can impact pipeline\nperformance and robustness.", + "prompt_type": "plain" + }, + { + "output": "mli_fast_approx``\n\nSpeed up predictions with a fast approximation\n\nSpecify whether to speed up predictions with a fast approximation. When\nenabled, this setting can reduce the number of trees or cross-validation\nfolds and ultimately reduce the time needed to complete interpretations.\nThis setting is enabled by default.", + "prompt_type": "plain" + }, + { + "output": "mli_custom``\n\nAdd to config.toml via TOML String\n\nUse this input field to add to the Driverless AI server config.toml\nconfiguration file with TOML string.\n\nMLI NLP Tab", + "prompt_type": "plain" + }, + { + "output": "mli_nlp_top_n``\n\nNumber of Tokens Used for MLI NLP Explanations\n\nSpecify the number of tokens used for MLI NLP explanations. To use all\navailable tokens, set this value to -1. By default, this value is set to\n20.", + "prompt_type": "plain" + }, + { + "output": "mli_nlp_sample_limit``\n\nSample Size for NLP Surrogate Models\n\nSpecify the maximum number of records used by MLI NLP explainers. The\ndefault value is 10000.", + "prompt_type": "plain" + }, + { + "output": "mli_nlp_min_df``\n\nMinimum Number of Documents in Which Token Has to Appear\n\nSpecify the minimum number of documents in which token has to appear.\nUse integer values to denote absolute counts and floating-point values\nto denote percentages. By default, this value is set to 3.", + "prompt_type": "plain" + }, + { + "output": "mli_nlp_max_df``\n\nMaximum Number of Documents in Which Token Has to Appear", + "prompt_type": "plain" + }, + { + "output": "mli_nlp_min_ngram``\n\nMinimum Value in n-gram Range", + "prompt_type": "plain" + }, + { + "output": "mli_nlp_max_ngram``\n\nMaximum Value in n-gram Range", + "prompt_type": "plain" + }, + { + "output": "mli_nlp_min_token_mode``\n\nMode Used to Choose N Tokens for MLI NLP", + "prompt_type": "plain" + }, + { + "output": "mli_nlp_tokenizer_max_features``\n\nNumber of Top Tokens to Use as Features (Token-based Feature Importance)", + "prompt_type": "plain" + }, + { + "output": "mli_nlp_loco_max_features``\n\nNumber of Top Tokens to Use as Features (LOCO)", + "prompt_type": "plain" + }, + { + "output": "mli_nlp_surrogate_tokens``\n\nNumber of Top Tokens to Use as Features (Surrogate Model)", + "prompt_type": "plain" + }, + { + "output": "mli_nlp_use_stop_words``\n\nStop Words for MLI NLP", + "prompt_type": "plain" + }, + { + "output": "mli_nlp_stop_words``\n\nList of Words to Filter Before Generating Text Tokens", + "prompt_type": "plain" + }, + { + "output": "mli_nlp_append_to_english_stop_words``\n\nAppend List of Custom Stop Words to Default Stop Words\n\nMLI Surrogate Models Tab", + "prompt_type": "plain" + }, + { + "output": "mli_lime_method~~~~~~~~~~~~~~~~~~~ .. container:: dropdown **LIME Method** Select a LIME method of either K-LIME (default) or LIME-SUP. - **K-LIME** (default): creates one global surrogate GLM on the entire training data and also creates numerous local surrogate GLMs on samples formed from *k*-means clusters in the training data. The features used for *k*-means are selected from the Random Forest surrogate model's variable importance. The number of features used for *k*-means is the minimum of the top 25% of variables from the Random Forest surrogate model's variable importance and the max number of variables that can be used for *k*-means, which is set by the user in the config.toml setting formli_max_number_cluster_vars. (Note, if the number of features in the dataset are less than or equal to 6, then all features are used for *k*-means clustering.) The previous setting can be turned off to use all features for k-means by settinguse_all_columns_klime_kmeansin the config.toml file totrue`.", + "prompt_type": "plain" + }, + { + "output": "mli_use_raw_features``\n\nUse Original Features for Surrogate Models\n\nSpecify whether to use original features or transformed features in the\nsurrogate model for the new interpretation. This is enabled by default.\n\nNote: When this setting is disabled, the K-LIME clustering column and\nquantile binning options are unavailable.", + "prompt_type": "plain" + }, + { + "output": "mli_vars_to_pdp``\n\nNumber of Features for Partial Dependence Plot\n\nSpecify the maximum number of features to use when building the Partial\nDependence Plot. Use -1 to calculate Partial Dependence Plot for all\nfeatures. By default, this value is set to 10.", + "prompt_type": "plain" + }, + { + "output": "mli_nfolds``\n\nCross-validation Folds for Surrogate Models\n\nSpecify the number of surrogate cross-validation folds to use (from 0 to\n10). When running experiments, Driverless AI automatically splits the\ntraining data and uses the validation data to determine the performance\nof the model parameter tuning and feature engineering steps. For a new\ninterpretation, Driverless AI uses 3 cross-validation folds by default\nfor the interpretation.", + "prompt_type": "plain" + }, + { + "output": "mli_qbin_count``\n\nNumber of Columns to Bin for Surrogate Models\n\nSpecify the number of columns to bin for surrogate models. This value\ndefaults to 0.", + "prompt_type": "plain" + }, + { + "output": "mli_sample_size``\n\nSample Size for Surrogate Models\n\nWhen the number of rows is above this limit, sample for surrogate\nmodels. The default value is 100000.", + "prompt_type": "plain" + }, + { + "output": "mli_num_quantiles``\n\nNumber of Bins for Quantile Binning\n\nSpecify the number of bins for quantile binning. By default, this value\nis set to -10.", + "prompt_type": "plain" + }, + { + "output": "mli_dia_sample_size``\n\nSample Size for Disparate Impact Analysis\n\nWhen the number of rows is above this limit, sample for Disparate Impact\nAnalysis (DIA). The default value is 100000.", + "prompt_type": "plain" + }, + { + "output": "mli_pd_sample_size``\n\nSample Size for Partial Dependence Plot\n\nWhen number of rows is above this limit, sample for the Driverless AI\npartial dependence plot. The default value is 25000.", + "prompt_type": "plain" + }, + { + "output": "mli_pd_numcat_num_chart``\n\nUnique Feature Values Count Driven Partial Dependence Plot Binning and\nChart Selection\n\nSpecify whether to use dynamic switching between PDP numeric and\ncategorical binning and UI chart selection in cases where features were\nused both as numeric and categorical by the experiment. This is enabled\nby default.", + "prompt_type": "plain" + }, + { + "output": "mli_pd_numcat_threshold~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. container:: dropdown **Threshold for PD/ICE Binning and Chart Selection** Ifmli_pd_numcat_num_chart`` is enabled, and if the number of\n\n unique feature values is greater than the threshold, then numeric\n binning and chart is used. Otherwise, categorical binning and chart\n is used. The default threshold value is 11.", + "prompt_type": "plain" + }, + { + "output": "mli_sa_sampling_limit``\n\nSample Size for Sensitivity Analysis (SA)\n\nWhen the number of rows is above this limit, sample for Sensitivity\nAnalysis (SA). The default value is 500000.", + "prompt_type": "plain" + }, + { + "output": "klime_cluster_col``\n\nk-LIME Clustering Columns\n\nFor k-LIME interpretations, optionally specify which columns to have\nk-LIME clustering applied to.\n\nNote: This setting is not found in the config.toml file.", + "prompt_type": "plain" + }, + { + "output": "qbin_cols``\n\nQuantile Binning Columns\n\nFor k-LIME interpretations, specify one or more columns to generate\ndecile bins (uniform distribution) to help with MLI accuracy. Columns\nselected are added to top n columns for quantile binning selection. If a\ncolumn is not numeric or not in the dataset (transformed features), then\nthe column will be skipped.\n\nNote: This setting is not found in the config.toml file.", + "prompt_type": "plain" + }, + { + "output": "Mac OS X\nThis section describes how to install, start, stop, and upgrade the\nDriverless AI Docker image on Mac OS X. Note that this uses regular\nDocker and not NVIDIA Docker. Note: Support for GPUs and MOJOs is not available on Mac OS X. The installation steps assume that you have a license key for Driverless\nAI. For information on how to obtain a license key for Driverless AI,\nvisit https://h2o.ai/o/try-driverless-ai/. Once obtained, you will be\nprompted to paste the license key into the Driverless AI UI when you\nfirst log in, or you can save it as a .sig file and place it in the\nlicense folder that you will create during the installation process. Caution:\n- This is an extremely memory-constrained environment for experimental\n purposes only. Stick to small datasets! For serious use, please use\n Linux. - Be aware that there are known performance issues with Docker for\n Mac. More information is available here:\n https://docs.docker.com/docker-for-mac/osxfs/#technology.", + "prompt_type": "plain" + }, + { + "output": "Min Mem Suitable for\n ----------------------- --------------- --------- -----------------\n Mac OS X No 16 GB Experimentation\n -------------------------------------------------------------------\nInstalling Driverless AI\n1. Retrieve the Driverless AI Docker image from\n https://www.h2o.ai/download/. 2. Download and run Docker for Mac from\n https://docs.docker.com/docker-for-mac/install. 3. Adjust the amount of memory given to Docker to be at least 10 GB. Driverless AI won't run at all with less than 10 GB of memory. You\n can optionally adjust the number of CPUs given to Docker. You will\n find the controls by clicking on (Docker\n Whale)->Preferences->Advanced as shown in the following screenshots. (Don't forget to Apply the changes after setting the desired memory\n value.) [image]\n[image]\n4. On the File Sharing tab, verify that your macOS directories (and\n their subdirectories) can be bind mounted into Docker containers.", + "prompt_type": "plain" + }, + { + "output": "[image]\n5. Set up a directory for the version of Driverless AI within the\n Terminal:\n6. With Docker running, open a Terminal and move the downloaded\n Driverless AI image to your new directory. 7. Change directories to the new directory, then load the image using\n the following command:\n8. Set up the data, log, license, and tmp directories (within the new\n Driverless AI directory):\n9. Optionally copy data into the data directory on the host. The data\n will be visible inside the Docker container at /data. You can also\n upload data after starting Driverless AI. 10. Run docker images to find the image tag. 11. Start the Driverless AI Docker image (still within the new\n Driverless AI directory). Replace TAG below with the image tag. Note\n that GPU support will not be available. Note that from version 1.10\n DAI docker image runs with internal tini that is equivalent to using\n --init from docker, if both are enabled in the launch command, tini\n prints a (harmless) warning message.", + "prompt_type": "plain" + }, + { + "output": "Connect to Driverless AI with your browser at\n http://localhost:12345. Stopping the Docker Image\nTo stop the Driverless AI Docker image, type Ctrl + C in the Terminal\n(Mac OS X) or PowerShell (Windows 10) window that is running the\nDriverless AI Docker image. Upgrading the Docker Image\nThis section provides instructions for upgrading Driverless AI versions\nthat were installed in a Docker container. These steps ensure that\nexisting experiments are saved. WARNING: Experiments, MLIs, and MOJOs reside in the Driverless AI tmp\ndirectory and are not automatically upgraded when Driverless AI is\nupgraded. - Build MLI models before upgrading. - Build MOJO pipelines before upgrading. - Stop Driverless AI and make a backup of your Driverless AI tmp\n directory before upgrading. If you did not build MLI on a model before upgrading Driverless AI,\n then you will not be able to view MLI on that model after upgrading. Before upgrading, be sure to run MLI jobs on models that you want to\n continue to interpret in future releases.", + "prompt_type": "plain" + }, + { + "output": "Creating Custom Plots\nTo create a custom plot, click the Add Graph button in the upper-right\ncorner and select one of the available plot types. After selecting a\nplot, configure the available settings for that plot type and click\nSave. The custom plot appears on the Visualization page once it has been\ncreated. The following example creates a custom histogram plot for the\nCreditCard-Train dataset:\nThe following is a complete list of available graph types. Bar chart\nThis plot presents categorical data with rectangular bars that are\nproportional to the values they represent. The type of marker used to\nrepresent bars determines the bar chart type. The most common marker is\nthe bar marker, which ranges from a lower value (usually zero) to an\nupper value. Also available are the Cleveland dot plot (replaces the bar\nwith a dot located at the upper value) and the area chart (covers the\nbars with a solid area marker). Bars are always plotted against the\ncategories of a categorical variable.", + "prompt_type": "plain" + }, + { + "output": "When creating a bar chart, specify the following options:\n - x variable name: Specify the name of the x variable\n - y variable name: Specify the name of the y variable\n - Transpose: Specify whether to switch the X-axis and Y-axis\n - Sort: Specify whether to sort bars alphabetically by x values\n - Mark: Specify a marker type. Select point to create a Cleveland\n dot plot\nBoxplot\nThis plot presents the fractiles of a distribution. The center of the\nbox represents the median, the edges of a box represent the lower and\nupper quartiles, and the ends of the \"whiskers\" represent that range of\nvalues. When outliers occur, the adjacent whisker is shortened to the\nnext lower or upper value. For variables having only a few values, the\nboxes can be compressed. When creating a boxplot, specify the following options:\n - Variable name: Specify the variable that you want the box to\n represent\n - Transpose: Specify whether to switch the X-axis and Y-axis\nDotplot\nThis plot represents individual data values with dots.", + "prompt_type": "plain" + }, + { + "output": "When creating a dotplot, specify the following options:\n - Variable name: Specify the name of the variable on which dots\n are calculated\n - Mark: Specify a marker type\nGrouped Boxplot\nThis plot is a boxplot where categories are organized into groups and\nsubgroups. When creating a grouped boxplot, specify the following options:\n - Variable name: Specify the variable that you want the box to\n represent\n - Group variable name: Specify the name of the grouping variable\n - Transpose: Specify whether to switch the X-axis and Y-axis\nHeatmap\nSee data heatmap. When creating a heatmap, specify the following\noptions:\n - Variable names: Specify one or more variables to use. If none are\n specified, all the variables in the dataset are used\n - Permute: Specify whether to reorder variables using singular value\n decomposition (SVD)\n - Transpose: Specify whether to switch the X-axis and Y-axis\n - Matrix type: Specify a matrix type.", + "prompt_type": "plain" + }, + { + "output": "Each bar groups numbers into ranges by its width, and taller\nbars show that more data falls within a specific range. This plot is\noften used to display the shape and spread of a continuous variable. When creating a histogram, specify the following options:\n - Variable name: Specify the variable name\n - Transformation: Specify whether to use a transformation. Choose\n from log and square root\n - Number of bars: Specify the number of bars to use\n - Mark: Specify a marker type. Use area to create a density\n polygon\nLinear Regression\nThis plot predicts a set of values on a variable y from values on a\nvariable x by fitting a linear function (ax\u2005+\u2005b) so that for any value\non the x variable, this function yields the most probable value on the y\nvariable. The effectiveness of this prediction in a sample of values is\nrepresented by the discrepancies between the y values and their\ncorresponding predicted values. When creating a linear regression plot, specify the following options:\n - x variable name: Specify the name of the x variable\n - y variable name: Specify the name of the y variable\n - Mark: Specify a marker type.", + "prompt_type": "plain" + }, + { + "output": "The effectiveness of this prediction in a sample of values is\nrepresented by the discrepancies between the y values and their\ncorresponding predicted values. When creating a LOESS regression plot, specify the following options:\n - x variable name: Specify the name of the x variable\n - y variable name: Specify the name of the y variable\n - Mark: Specify a marker type. Choose from point and square\n - Bandwidth: Specify the interval that represents the proportion\n of cases during the smoothing window. This is set to 0.5 by\n default\nParallel Coordinates Plot\nThis plot is used for comparing multiple variables. Each variable has\nits own vertical axis in the plot, and each profile connects the values\non the axes for a single observation. If the data contains clusters,\nthese profiles are color-coded by their cluster number. When creating a parallel coordinates plot, specify the following\n options:\n - Variable names: Specify one or more variables to use.", + "prompt_type": "plain" + }, + { + "output": "Unique colors are assigned for each cluster ID\nProbability Plot\nThis plot evaluates the skewness of a distribution by plotting two\ncumulative distribution functions against each other. When creating a probability plot, specify the following options:\n - x variable name: Specify the name of the x variable\n - Distribution: Specify a distribution type. Choose from normal\n and uniform\n - Mark: Specify a marker type. Choose from point and square\n - Transpose: Specify whether to switch the X-axis and Y-axis\nQuantile Plot\nThis plot compares two probability distributions by plotting their\nquantiles against each other. When creating a quantile plot, specify the following options:\n - x variable name: Specify the name of the x variable\n - y variable name: Specify the name of the y variable\n - Distribution: Specify a distribution type. Choose from normal\n and uniform\n - Mark: Specify a marker type. Choose from point and square\n - Transpose: Specify whether to switch the X-axis and Y-axis\nScatterplot\nThis plot represents the values of two variables (y and x) in a frame\nthat contains one point for each row of the input sample data.", + "prompt_type": "plain" + }, + { + "output": "About Version Support\n\nEach X.Y.Z long-term support (LTS) release of Driverless AI is supported\nfor 18 months. For example, the end of support date for 1.10.4 is April\n13, 2024, which is 18 months after the release date of October 13, 2022.\nNote that the end of support date for each base version is also applied\nto each X.Y.Z.{1,2,3...} release.\n\nTo view end of support dates for recent DAI LTS releases, see the\nDriverless AI prior releases page.", + "prompt_type": "plain" + }, + { + "output": "Explainer (Recipes) Expert Settings\n\nThe following is a list of the explainer-specific expert settings that\nare available when setting up a new interpretation. These settings can\nbe accessed when running interpretation from the\nMLI page under recipes tab.\nFor info on general MLI expert settings, see\ninterpretation-expert-settings.\n\n- interpretation-expert-settings-absolute-permutation\n- interpretation-expert-settings-autodoc\n- interpretation-expert-settings-dia\n- interpretation-expert-settings-nlp-pdp\n- interpretation-expert-settings-nlp-vectorizer\n- interpretation-expert-settings-pdp\n- interpretation-expert-settings-sa\n- interpretation-expert-settings-shapley\n- interpretation-expert-settings-shapley-values\n- interpretation-expert-settings-surrogate-dt\n\nAbsolute Permutation Feature Importance Explainer Settings", + "prompt_type": "plain" + }, + { + "output": "mli_sample_size``\n\nSample size\n\nSpecify the sample size for the absolute permutation feature importance\nexplainer. This value defaults to 100000.", + "prompt_type": "plain" + }, + { + "output": "missing_values~~~~~~~~~~~~~~~~~~ .. container:: dropdown **List of values that should be interpreted as missing values** Specify the list of values that should be interpreted as missing values during data import. This applies to both numeric and string columns. Note that 'nan' is always interpreted as a missing value for numeric columns. Example:\"\"\"['',\n'?', 'None', 'nan', 'N/A', 'unknown', 'inf']\"\"``", + "prompt_type": "plain" + }, + { + "output": "autodoc_feature_importance_num_perm``\n\nNumber of Permutations for Feature Importance\n\nSpecify the number of permutations to make per feature when computing\nfeature importance. This is set to 1 by default.", + "prompt_type": "plain" + }, + { + "output": "autodoc_feature_importance_scorer``\n\nFeature Importance Scorer\n\nSpecify the name of the scorer to be used when calculating feature\nimportance. Leave this setting unspecified to use the default scorer for\nthe experiment.\n\nMLI AutoDoc Explainer Settings", + "prompt_type": "plain" + }, + { + "output": "autodoc_report_name``\n\nAutoDoc Name\n\nSpecify the name of the AutoDoc.", + "prompt_type": "plain" + }, + { + "output": "autodoc_template``\n\nAutoDoc Template Location\n\nSpecify the AutoDoc template path. Provide the full path to your custom\nAutoDoc template. To generate the standard AutoDoc, leave this field\nempty.", + "prompt_type": "plain" + }, + { + "output": "autodoc_output_type~~~~~~~~~~~~~~~~~~~~~~~ .. container:: dropdown **AutoDoc File Output Type** Specify the AutoDoc file output type. Choose fromdocx(the default value) andmd``.", + "prompt_type": "plain" + }, + { + "output": "autodoc_subtemplate_type``\n\nAutoDoc Sub-Template Type\n\nSpecify the type of sub-templates to use. Choose from the following:\n\n- auto (Default)\n- md\n- docx", + "prompt_type": "plain" + }, + { + "output": "autodoc_max_cm_size``\n\nConfusion Matrix Max Number of Classes\n\nSpecify the maximum number of classes in the confusion matrix. This\nvalue defaults to 10.", + "prompt_type": "plain" + }, + { + "output": "autodoc_num_features``\n\nNumber of Top Features to Document\n\nSpecify the number of top features to display in the document. To\ndisable this setting, specify -1. This is set to 50 by default.", + "prompt_type": "plain" + }, + { + "output": "autodoc_min_relative_importance``\n\nMinimum Relative Feature Importance Threshold\n\nSpecify the minimum relative feature importance in order for a feature\nto be displayed. This value must be a float >= 0 and <= 1. This is set\nto 0.003 by default.", + "prompt_type": "plain" + }, + { + "output": "autodoc_include_permutation_feature_importance``\n\nPermutation Feature Importance\n\nSpecify whether to compute permutation-based feature importance. This is\ndisabled by default.", + "prompt_type": "plain" + }, + { + "output": "autodoc_feature_importance_num_perm``\n\nNumber of Permutations for Feature Importance\n\nSpecify the number of permutations to make per feature when computing\nfeature importance. This is set to 1 by default.", + "prompt_type": "plain" + }, + { + "output": "autodoc_feature_importance_scorer``\n\nFeature Importance Scorer\n\nSpecify the name of the scorer to be used when calculating feature\nimportance. Leave this setting unspecified to use the default scorer for\nthe experiment.", + "prompt_type": "plain" + }, + { + "output": "autodoc_pd_max_rows~~~~~~~~~~~~~~~~~~~~~~~ .. container:: dropdown **PDP and Shapley Summary Plot Max Rows** Specify the number of rows shown for the partial dependence plots (PDP) and Shapley values summary plot in the AutoDoc. Random sampling is used for datasets with more than theautodoc_pd_max_rows``\n\n limit. This value defaults to 10000.", + "prompt_type": "plain" + }, + { + "output": "autodoc_pd_max_runtime``\n\nPDP Max Runtime in Seconds\n\nSpecify the maximum number of seconds Partial Dependency computation can\ntake when generating a report. Set to -1 for no time limit.", + "prompt_type": "plain" + }, + { + "output": "autodoc_out_of_range``\n\nPDP Out of Range\n\nSpecify the number of standard deviations outside of the range of a\ncolumn to include in partial dependence plots. This shows how the model\nreacts to data it has not seen before. This is set to 3 by default.", + "prompt_type": "plain" + }, + { + "output": "autodoc_num_rows``\n\nICE Number of Rows\n\nSpecify the number of rows to include in PDP and ICE plots if individual\nrows are not specified. This is set to 0 by default.", + "prompt_type": "plain" + }, + { + "output": "autodoc_population_stability_index``\n\nPopulation Stability Index\n\nSpecify whether to include a population stability index if the\nexperiment is a binary classification or regression problem. This is\ndisabled by default.", + "prompt_type": "plain" + }, + { + "output": "autodoc_population_stability_index_n_quantiles``\n\nPopulation Stability Index Number of Quantiles\n\nSpecify the number of quantiles to use for the population stability\nindex. This is set to 10 by default.", + "prompt_type": "plain" + }, + { + "output": "autodoc_prediction_stats``\n\nPrediction Statistics\n\nSpecify whether to include prediction statistics information if the\nexperiment is a binary classification or regression problem. This value\nis disabled by default.", + "prompt_type": "plain" + }, + { + "output": "autodoc_prediction_stats_n_quantiles``\n\nPrediction Statistics Number of Quantiles\n\nSpecify the number of quantiles to use for prediction statistics. This\nis set to 20 by default.", + "prompt_type": "plain" + }, + { + "output": "autodoc_response_rate``\n\nResponse Rates Plot\n\nSpecify whether to include response rates information if the experiment\nis a binary classification problem. This is disabled by default.", + "prompt_type": "plain" + }, + { + "output": "autodoc_response_rate_n_quantiles``\n\nResponse Rates Plot Number of Quantiles\n\nSpecify the number of quantiles to use for response rates information.\nThis is set to 10 by default.", + "prompt_type": "plain" + }, + { + "output": "autodoc_gini_plot``\n\nShow GINI Plot\n\nSpecify whether to show the GINI plot. This is disabled by default.", + "prompt_type": "plain" + }, + { + "output": "autodoc_enable_shapley_values``\n\nEnable Shapley Values\n\nSpecify whether to show Shapley values results in the AutoDoc. This is\nenabled by default.", + "prompt_type": "plain" + }, + { + "output": "autodoc_global_klime_num_features``\n\nGlobal k-LIME Number of Features\n\nSpecify the number of features to show in a k-LIME global GLM\ncoefficients table. This value must be an integer greater than 0 or -1.\nTo show all features, set this value to -1.", + "prompt_type": "plain" + }, + { + "output": "autodoc_global_klime_num_tables``\n\nGlobal k-LIME Number of Tables\n\nSpecify the number of k-LIME global GLM coefficients tables to show in\nthe AutoDoc. Set this value to 1 to show one table with coefficients\nsorted by absolute value. Set this value to 2 to show two tables - one\nwith the top positive coefficients and another with the top negative\ncoefficients. This value is set to 1 by default.", + "prompt_type": "plain" + }, + { + "output": "autodoc_data_summary_col_num``\n\nNumber of Features in Data Summary Table\n\nSpecify the number of features to be shown in the data summary table.\nThis value must be an integer. To show all columns, specify any value\nlower than 1. This is set to -1 by default.", + "prompt_type": "plain" + }, + { + "output": "autodoc_list_all_config_settings``\n\nList All Config Settings\n\nSpecify whether to show all config settings. If this is disabled, only\nsettings that have been changed are listed. All settings are listed when\nenabled. This is disabled by default.", + "prompt_type": "plain" + }, + { + "output": "autodoc_keras_summary_line_length``\n\nKeras Model Architecture Summary Line Length\n\nSpecify the line length of the Keras model architecture summary. This\nvalue must be either an integer greater than 0 or -1. To use the default\nline length, set this value to -1 (default).", + "prompt_type": "plain" + }, + { + "output": "autodoc_transformer_architecture_max_lines``\n\nNLP/Image Transformer Architecture Max Lines\n\nSpecify the maximum number of lines shown for advanced transformer\narchitecture in the Feature section. Note that the full architecture can\nbe found in the appendix.", + "prompt_type": "plain" + }, + { + "output": "autodoc_full_architecture_in_appendix``\n\nAppendix NLP/Image Transformer Architecture\n\nSpecify whether to show the full NLP/Image transformer architecture in\nthe appendix. This is disabled by default.", + "prompt_type": "plain" + }, + { + "output": "autodoc_coef_table_appendix_results_table``\n\nFull GLM Coefficients Table in the Appendix\n\nSpecify whether to show the full GLM coefficient table(s) in the\nappendix. This is disabled by default.", + "prompt_type": "plain" + }, + { + "output": "autodoc_coef_table_num_models``\n\nGLM Coefficient Tables Number of Models\n\nSpecify the number of models for which a GLM coefficients table is shown\nin the AutoDoc. This value must be -1 or an integer >= 1. Set this value\nto -1 to show tables for all models. This is set to 1 by default.", + "prompt_type": "plain" + }, + { + "output": "autodoc_coef_table_num_folds``\n\nGLM Coefficient Tables Number of Folds Per Model\n\nSpecify the number of folds per model for which a GLM coefficients table\nis shown in the AutoDoc. This value must be be -1 (default) or an\ninteger >= 1 (-1 shows all folds per model).", + "prompt_type": "plain" + }, + { + "output": "autodoc_coef_table_num_coef``\n\nGLM Coefficient Tables Number of Coefficients\n\nSpecify the number of coefficients to show within a GLM coefficients\ntable in the AutoDoc. This is set to 50 by default. Set this value to -1\nto show all coefficients.", + "prompt_type": "plain" + }, + { + "output": "autodoc_coef_table_num_classes``\n\nGLM Coefficient Tables Number of Classes\n\nSpecify the number of classes to show within a GLM coefficients table in\nthe AutoDoc. Set this value to -1 to show all classes. This is set to 9\nby default.", + "prompt_type": "plain" + }, + { + "output": "autodoc_num_histogram_plots``\n\nNumber of Histograms to Show\n\nSpecify the number of top features for which to show histograms. This is\nset to 10 by default.\n\nDisparate Impact Analysis Explainer Settings\n\nFor information on Disparate Impact Analysis in Driverless AI, see\ndai-dia. The following is a list of parameters that can be toggled from\nthe recipes tab of the MLI page when running a new interpretation.", + "prompt_type": "plain" + }, + { + "output": "dia_cols``\n\nList of Features for Which to Compute DIA\n\nSpecify a list of specific features for which to compute DIA.", + "prompt_type": "plain" + }, + { + "output": "cut_off``\n\nCut Off\n\nSpecify a cut off when performing DIA.", + "prompt_type": "plain" + }, + { + "output": "maximize_metric``\n\nMaximize Metric\n\nSpecify a metric to use when computing DIA. Choose from the following:\n\n- F1\n- F05\n- F2\n- MCC", + "prompt_type": "plain" + }, + { + "output": "use_holdout_preds``\n\nUse Internal Holdout Predictions\n\nSpecify whether to use internal holdout predictions when computing DIA.\nThis is enabled by default.", + "prompt_type": "plain" + }, + { + "output": "sample_size``\n\nSample Size for Disparate Impact Analysis\n\nSpecify the sample size for Disparate Impact Analysis. By default, this\nvalue is set to 100000.", + "prompt_type": "plain" + }, + { + "output": "max_card``\n\nMax Cardinality for Categorical Variables\n\nSpecify the max cardinality for categorical variables. By default, this\nvalue is set to 10.", + "prompt_type": "plain" + }, + { + "output": "min_card``\n\nMinimum Cardinality for Categorical Variables\n\nSpecify the minimum cardinality for categorical variables. By default,\nthis value is set to 2.", + "prompt_type": "plain" + }, + { + "output": "num_card``\n\nMax Cardinality for Numeric Variables to be Considered Categorical\n\nSpecify the max cardinality for numeric variables to be considered\ncategorical. By default, this value is set to 25.", + "prompt_type": "plain" + }, + { + "output": "fast_approx``\n\nSpeed Up Predictions With a Fast Approximation\n\nSpecify whether to increase the speed of predictions with a fast\napproximation. This is enabled by default.\n\nNLP Partial Dependence Plot Explainer Settings", + "prompt_type": "plain" + }, + { + "output": "max_tokens``\n\nNumber of text tokens\n\nSpecify the number of text tokens for the NLP Partial Dependence plot.\nThis value defaults to 20.", + "prompt_type": "plain" + }, + { + "output": "custom_tokens~~~~~~~~~~~~~~~~~ .. container:: dropdown **List of custom text tokens** Specify a list of custom text tokens for which to compute NLP partial dependence. For example,[\"text_feature('word_1')\"], wheretext_feature``\nis the name of the model text feature.\n\nNLP Vectorizer + Linear Model Text Feature Importance Explainer Settings", + "prompt_type": "plain" + }, + { + "output": "txt_cols``\n\nText feature for which to compute explanation\n\nSpecify the text feature for which to compute explanation.", + "prompt_type": "plain" + }, + { + "output": "cut_off``\n\nCut off for deciphering binary class outcome\n\nSpecify the cut off for deciphering binary class outcome based on DAI\nmodel predictions. Any DAI prediction greater than the cut off is the\ntarget label and any DAI prediction less than the cut off is the\nnon-target label.", + "prompt_type": "plain" + }, + { + "output": "maximize_metric``\n\nCut off based on a metric to maximize\n\nCalculate cut off based on a metric to maximize, which will decipher\nbinary class outcome based on DAI model predictions. Any DAI prediction\ngreater than the cut off is the target label and any DAI prediction less\nthan the cut off is the non-target label. It should be noted that\nspecifying a cut off AND a max metric will give precedence to the cut\noff.\n\nPartial Dependence Plot Explainer Settings\n\nFor information on Partial Dependence Plots in Driverless AI, see\npartial-dependence-plot. The following is a list of parameters that can\nbe toggled from the recipes tab of the MLI page when running a new\ninterpretation.", + "prompt_type": "plain" + }, + { + "output": "sample_size``\n\nSample Size for Partial Dependence Plot\n\nWhen number of rows is above this limit, sample for the Driverless AI\npartial dependence plot.", + "prompt_type": "plain" + }, + { + "output": "max_features``\n\nPartial Dependence Plot Number of Features\n\nSpecify the number of features that can be viewed on the partial\ndependence plot. By default, this is set to 10. To view all features,\nset this value to -1.", + "prompt_type": "plain" + }, + { + "output": "features``\n\nPartial Dependence Plot Feature List\n\nSpecify a list of features for the partial dependence plot.", + "prompt_type": "plain" + }, + { + "output": "oor_grid_resolution``\n\nPDP Number of Out of Range Bins\n\nSpecify the number of out of range bins for the partial dependence plot.\nBy default, this is set to 0.", + "prompt_type": "plain" + }, + { + "output": "qtile_grid_resolution``\n\nPDP Quantile Binning\n\nSpecify the total quantile points used to create bins. By default, this\nis set to 0.", + "prompt_type": "plain" + }, + { + "output": "grid_resolution``\n\nPDP Observations Per Bin\n\nSpecify the number of equally spaced points used to create bins. By\ndefault, this is set to 20.", + "prompt_type": "plain" + }, + { + "output": "center``\n\nCenter PDP Using ICE Centered at 0\n\nSpecify whether center the partial dependence plot using ICE centered at\n0. This is disabled by default.", + "prompt_type": "plain" + }, + { + "output": "sort_bins``\n\nEnsure Bin Values Sorting\n\nSpecify whether to ensure bin values sorting. This is enabled by\ndefault.", + "prompt_type": "plain" + }, + { + "output": "histograms``\n\nEnable Histograms\n\nSpecify whether to enable histograms for the partial dependence plot.\nThis is disabled by default.", + "prompt_type": "plain" + }, + { + "output": "qtile-bins~~~~~~~~~~~~~~ .. container:: dropdown **Per-Feature Quantile Binning** Specify per-feature quantile binning. For example, if you select features F1 and F2, this parameter can be specified as'{\"F1\":\n2,\"F2\": 5}'``.\n\n Note: You can set all features to use the same quantile binning with\n the quantile-bins parameter and then adjust the quantile binning for a\n subset of PDP features with this parameter.", + "prompt_type": "plain" + }, + { + "output": "1_frame``\n\nEnable PDP Calculation Optimization\n\nSpecify whether to enable PDP calculation optimization, which minimizes\nthe number of predictions by combining per-bin frames together. By\ndefault, this is set to 'Auto'.", + "prompt_type": "plain" + }, + { + "output": "numcat_num_chart``\n\nUnique Feature Values Count-Driven PDP Binning and Chart Selection\n\nSpecify whether to use dynamic switching between PDP numeric and\ncategorical binning and UI chart selection in cases where features were\nused both as numeric and categorical by the experiment. This is enabled\nby default.", + "prompt_type": "plain" + }, + { + "output": "numcat_threshold~~~~~~~~~~~~~~~~~~~~ .. container:: dropdown **Threshold for PD/ICE Binning and Chart Selection** Ifmli_pd_numcat_num_chart`` is enabled, and if the number of\n\n unique feature values is greater than the threshold, then numeric\n binning and chart is used. Otherwise, categorical binning and chart\n is used. The default threshold value is 11.\n\nSensitivity Analysis Explainer Settings", + "prompt_type": "plain" + }, + { + "output": "sample_size``\n\nSample Size for Sensitivity Analysis (SA)\n\nWhen the number of rows is above this limit, sample for Sensitivity\nAnalysis (SA). The default value is 500000.\n\nShapley Summary Plot Explainer Settings\n\nFor information on Shapley Summary Plots in Driverless AI, see\ndai-shapley-summary. The following is a list of parameters that can be\ntoggled from the recipes tab of the MLI page when running a new\ninterpretation.", + "prompt_type": "plain" + }, + { + "output": "max_features``\n\nMaximum Number of Features to be Shown\n\nSpecify the maximum number of features that are shown in the plot. By\ndefault, this value is set to 50.", + "prompt_type": "plain" + }, + { + "output": "sample_size``\n\nSample Size\n\nSpecify the sample size for the plot. By default, this value is set to\n20000.", + "prompt_type": "plain" + }, + { + "output": "x_resolution``\n\nX-Axis Resolution\n\nSpecify the number of Shapley value bins. By default, this value is set\nto 500.", + "prompt_type": "plain" + }, + { + "output": "drilldown_charts``\n\nEnable Creation of Per-Feature Shapley / Feature Value Scatter Plots\n\nSpecify whether to enable the creation of per-feature Shapley or feature\nvalue scatter plots. This is enabled by default.", + "prompt_type": "plain" + }, + { + "output": "fast_approx``\n\nSpeed Up Predictions With a Fast Approximation\n\nSpecify whether to increase the speed of predictions with a fast\napproximation. This is enabled by default.\n\nShapley Values for Original Features Settings", + "prompt_type": "plain" + }, + { + "output": "sample_size``\n\nSample Size for Naive Shapley\n\nWhen the number of rows is above this limit, sample for Naive Shapley.\nBy default, this value is set to 100000.", + "prompt_type": "plain" + }, + { + "output": "fast_approx``\n\nSpeed Up Predictions With a Fast Approximation\n\nSpecify whether to increase the speed of predictions with a fast\napproximation. This is enabled by default.\n\nSurrogate Decision Tree Explainer Settings\n\nFor information on Surrogate Decision Tree Plots in Driverless AI, see\ndecision-tree. The following is a list of parameters that can be toggled\nfrom the recipes tab of the MLI page when running a new interpretation.", + "prompt_type": "plain" + }, + { + "output": "dt_tree_depth``\n\nDecision Tree Depth\n\nSpecify the depth of the decision tree. By default, this value is set to\n3.", + "prompt_type": "plain" + }, + { + "output": "nfolds``\n\nNumber of CV Folds\n\nSpecify the number of CV folds to use. By default, this value is set to\n0.", + "prompt_type": "plain" + }, + { + "output": "qbin_cols``\n\nQuantile Binning Columns\n\nSpecify quantile binning columns.", + "prompt_type": "plain" + }, + { + "output": "qbin_count``\n\nQuantile Bins Count\n\nSpecify the number of quantile bins. By default, this value is set to 0.", + "prompt_type": "plain" + }, + { + "output": "Building Models in Driverless AI\n\nlaunching ga modeling_before_you_begin running-experiment time-series\nnlp image-processing unsupervised", + "prompt_type": "plain" + }, + { + "output": "References\nAdebayo, Julius A. \"Fairml: Toolbox for diagnosing bias in predictive\nmodeling.\" Master\u2019s Thesis, MIT, 2016. Breiman, Leo. \"Statistical Modeling: The Two Cultures (with comments and\na rejoinder by the author).\" Statistical Science 16, no. 3, 2001. Craven, Mark W. and Shavlik, Jude W. \"Extracting tree structured\nrepresentations of trained networks.\" Advances in Neural Information\nProcessing Systems, 1996. Goldstein, Alex, Kapelner, Adam, Bleich, Justin, and Pitkin, Emil. \"Peeking inside the black box: Visualizing statistical learning with\nplots of individual conditional expectation.\" Journal of Computational\nand Graphical Statistics, no. 24, 2015. Groeneveld, R.A. and Meeden, G. (1984), \u201cMeasuring Skewness and\nKurtosis.\u201d The Statistician, 33, 391-399. Hall, Patrick, Wen Phan, and SriSatish Ambati. \u201cIdeas for Interpreting\nMachine Learning.\u201d O\u2019Reilly Ideas. O\u2019Reilly Media, 2017. Hartigan, J. A. and Mohanty, S. (1992), \u201cThe RUNT test for\nmultimodality,\u201d Journal of Classification, 9, 63\u201370.", + "prompt_type": "plain" + }, + { + "output": "Local Authentication Example\nThis section describes how to enable local authentication in Driverless\nAI. Docker Image Installs\nTo enable authentication in Docker images, specify the authentication\nenvironment variable that you want to use. Each variable must be\nprepended with DRIVERLESS_AI. The example below starts Driverless AI\nwith environment variables the enable the following:\n- Local authentication when starting Driverless AI\n- S3 and HDFS access (without authentication)\n nvidia-docker run \\\n --pid=host \\\n --init \\\n --rm \\\n --shm-size=256m \\\n -p 12345:12345 \\\n -u `id -u`:`id -g` \\\n -e DRIVERLESS_AI_ENABLED_FILE_SYSTEMS=\"file,s3,hdfs\" \\\n -e DRIVERLESS_AI_AUTHENTICATION_METHOD=\"local\" \\\n -e DRIVERLESS_AI_LOCAL_HTPASSWD_FILE=\"\" \\\n -v `pwd`/data:/data \\\n -v `pwd`/log:/log \\\n -v `pwd`/license:/license \\\n -v `pwd`/tmp:/tmp \\\n h2oai/dai-ubi8-x86_64:|tag|\nNative Installs\nNative installs include DEBs, RPMs, and TAR SH installs.", + "prompt_type": "plain" + }, + { + "output": "Completed Experiment Page\nThe following sections describe the completed experiment page. - completed-actions\n- completed-insights-scores\nCompleted Experiment Actions\nThe following is a description of the actions that can be performed\nafter the status of an experiment changes from Running to Complete. []\n- Interpret This Model: Create an interpretation for the model. For\n more information, see interpreting_a_model. - Diagnose Model on New Dataset: For more information, see\n diagnosing_a_model. - Model Actions drop-down:\n - Predict: See Score_On_Another_Dataset. - Transform Dataset: See transform_dataset. (Not available for\n Time Series experiments.) - Fit & Transform Dataset: See fit_and_transform_dataset. (Not\n available for Time Series experiments.) - Shapley Values drop-down: Download\n Shapley values for original or transformed\n features. Driverless AI calls XGBoost and LightGBM SHAP\n functions to get contributions for transformed features.", + "prompt_type": "plain" + }, + { + "output": "For more information, see\n Shapley values in DAI . Select Fast Approximation\n to make Shapley predictions using only a single fold and model\n from all of the available folds and models in the ensemble. For more information on the fast approximation options, refer\n to the fast_approx_num_trees and\n fast_approx_do_one_fold_one_model\n config.toml settings . - Original Features (Fast Approximation)\n - Original Features\n - Transformed Features (Fast Approximation)\n - Transformed Features\n - Export: Export the experiment. For more information, see\n export_import. - Visualize Scoring Pipeline (Experimental): View a visualization of\n the experiment scoring pipeline. For more information, refer to\n visualize_scoring_pipeline. - Download Scoring Pipeline drop-down:\n - Download Python Scoring Pipeline: Download a standalone Python\n scoring pipeline for H2O Driverless AI.", + "prompt_type": "plain" + }, + { + "output": "- Download MOJO Scoring Pipeline: A standalone Model Object,\n Optimized scoring pipeline. For more information, refer to\n mojo_scoring_pipelines. (Note that this option is not\n available for TensorFlow or RuleFit models.) - (If h2o_mlops_ui_url is specified) Go to MLOps: When this button is\n clicked, a prompt is displayed on the screen. To open H2O MLOps in a\n new tab, click OK.\n- (If gui_enable_deploy_button=true) Deploy: Deploy the model. Note\n that by default, this button is disabled, and that the Completed\n Experiment -> Deploy functionality will be deprecated in version\n 1.10.5. For more information, refer to deployment. - Download Predictions: For regression experiments, output includes\n predictions with lower and upper bounds. For classification\n experiments, output includes probability for each class and labels\n created by using the threshold_scorer. For binary problems, F1 is\n the default threshold_scorer, so if a validation set is provided,\n then the threshold for max F1 on the validation set is used to\n create the labels.", + "prompt_type": "plain" + }, + { + "output": "For multiclass problems, argmax is used to create the\n labels. - Training (Holdout) Predictions: In CSV format, available if a\n validation set was not provided. - Validation Set Predictions: In CSV format, available if a\n validation set was provided. - Test Set Predictions: In CSV format, available if a test\n dataset is used. - Download Summary & Logs: Download a zip file containing the\n following files. For more information, refer to the\n experiment_summary section. - Experiment logs (regular and anonymized)\n - A summary of the experiment\n - The experiment features along with their relative importance\n - The individual_recipe for the experiment\n - Ensemble information\n - An experiment preview\n - Word version of an auto-generated report for the experiment\n - A target transformations tuning leaderboard\n - A tuning leaderboard\n- Download AutoDoc: Download an auto-generated report for the\n experiment as a Word (DOCX) document.", + "prompt_type": "plain" + }, + { + "output": "Note that this option is not\n available for deprecated models. For more information, see autodoc. - Tune Experiment drop-down: Tune the completed experiment by using\n the following options:\n - New / Continue: Select one of the following options:\n - With same settings: Create a new experiment that copies\n the setup of the original experiment. Selecting this\n option takes you to the Experiment Setup page, where you\n can change any parameter of the original experiment. - From last checkpoint: Create a new experiment that\n copies the setup of the original experiment and\n continues from the last iteration's checkpoint of models\n and features. Selecting this option takes you to the\n Experiment Setup page, where you can change any\n parameter of the original experiment. - Retrain / Refit: Retrain the experiment\u2019s final pipeline. For\n more information, see retrain.", + "prompt_type": "plain" + }, + { + "output": "Experiment performance\nThis page describes the factors that contribute to the performance of\nDriverless AI experiments. Each completed experiment iteration in Driverless AI experiments is a\nfitted model, but you can control the number of iterations with the time\ndial and the parameter_tuning_num_models TOML config mentioned in the\nfollowing section. Additionally, each model takes some number of model\niterations. XGBoost builds trees with a default up to about 3000 trees,\nbut this can be modified with the max_nestimators TOML config mentioned\nin the following section. List of TOML configs that can affect performance\nThe following list describes a variety of controls over the experiment\nand model runtimes:\n- Set max_runtime_minutes to a smaller number of minutes, e.g. 60 for\n 1 hour allowed. By default, DAI uses minimum of its estimate of an\n experiment runtime and max_runtime_minutes, or greater than 1 hour\n as chosen by min_auto_runtime_minutes. - Some algorithms perform much better on GPUs, like XGBoost, Bert, and\n Image models.", + "prompt_type": "plain" + }, + { + "output": "- Set the time dial to a lower value, which will do fewer models in\n tuning and evolution phases. - Set the interpretability dial to a larger value, which will more\n aggressively prune weak features, prune weak base models in\n ensemble, and avoid high-order feature interactions (interaction\n depth). You can also set fixed_feature_interaction_depth to control\n interaction depth directly. - Set parameter_tuning_num_models to a fixed non-zero but small value,\n to directly control number of tuning models instead of set\n automatically by dials. - Set the max_nestimators TOML config to a lower value (for example,\n 500, 1000, 1500, or 2000) instead of the default value of\n 3000. This controls the final model, and via\n max_nestimators_feature_evolution_factor (default 0.2), controls the\n max for tuning and evolution models. Sometimes the data and model\n are such that many trees continue to learn, but the gains are\n minimal for the metric chosen.", + "prompt_type": "plain" + }, + { + "output": "For RF and Dart, change n_estimators_list_no_early_stopping instead. - If the system is used by single user, set exclusive_mode to\n moderate. - Set enable_early_stopping_threshold to 0.01-0.1, which for (only)\n LightGBM will avoid using too many trees when evaluation metric for\n tree building has relative change less than this value. - Set max_abs_score_delta_train_valid and\n max_rel_score_delta_train_valid to a non-zero value to limit the\n number of trees by difference between train and valid scores on\n metric chosen to optimize. - Set reduce_mojo_size=True. In cases where the MOJO is too large or\n slow, you can also set the nfeatures_max TOML config to a value that\n is lower than the number of features you have. This lets you avoid\n too many features. - Set the min_learning_rate_final to a higher value (for example,\n 0.03). You can set max_learning_rate_final equal to\n min_learning_rate_final to force a fixed learning rate in final\n model.", + "prompt_type": "plain" + }, + { + "output": "- Set nfeatures_max to limit the number of features. This is useful in\n conjuction with ngenes_max to control the maximum number of\n transformations (each could make 1 or more features). - Set ensemble_level and fixed_ensemble_level to smaller values, e.g. 0 or 1, to limit the number of base models in final model. - Set fixed_fold_reps to a smaller value, e.g. 1, to limit the number\n of repeats. - Set max_max_depth to a smaller value, e.g. 8, to avoid trying larger\n depths for tree models. - Set max_max_bin to a smaller value, e.g. 128, to avoid larger\n max_bin values for tree models. - If TensorFlow MLP model is used and reproducible is set, only 1 core\n is used, unless you set\n tensorflow_use_all_cores_even_if_reproducible_true to true. This\n loses reproducibility for the TensorFlow model, but the rest of DAI\n will be reproducible. Note that the runtime estimate doesn't take into account the number of\ntrees needed for your data. The more trees needed by your data, the\ngreater the amount of time needed to complete an experiment.", + "prompt_type": "plain" + }, + { + "output": "The F0.5 score is the weighted harmonic mean of the precision and recall\n(given a threshold value). Unlike the F1 score, which gives equal weight\nto precision and recall, the F0.5 score gives more weight to precision\nthan to recall. More weight should be given to precision for cases where\nFalse Positives are considered worse than False Negatives. For example,\nif your use case is to predict which products you will run out of, you\nmay consider False Positives worse than False Negatives. In this case,\nyou want your predictions to be very precise and only capture the\nproducts that will definitely run out. If you predict a product will\nneed to be restocked when it actually doesn't, you incur cost by having\npurchased more inventory than you actually need. F05 equation:\n$$F0.5 = 1.25 \\;\\Big(\\; \\frac{(precision) \\; (recall)}{((0.25) \\; (precision)) + recall}\\; \\Big)$$\nWhere:\n- precision is the positive observations (true positives) the model\n correctly identified from all the observations it labeled as\n positive (the true positives + the false positives).", + "prompt_type": "plain" + }, + { + "output": "Missing and Unseen Levels Handling\nThis section describes how missing and unseen levels are handled by each\nalgorithm during training and scoring. How Does the Algorithm Handle Missing Values During Training? LightGBM, XGBoost, RuleFit\nDriverless AI treats missing values natively. (I.e., a missing value is\ntreated as a special value.) Experiments rarely benefit from imputation\ntechniques, unless the user has a strong understanding of the data. GLM\nDriverless AI automatically performs mean value imputation (equivalent\nto setting the value to zero after standardization). TensorFlow\nDriverless AI provides an imputation setting for TensorFlow in the\nconfig.toml file: tf_nan_impute_value (post-normalization). If you set\nthis option to 0, then missing values will be imputed by the mean. Setting it to (for example) +5 will specify 5 standard deviations above\nthe mean of the distribution. The default value in Driverless AI is -5,\nwhich specifies that TensorFlow will treat missing values as outliers on\nthe negative end of the spectrum.", + "prompt_type": "plain" + }, + { + "output": "FTRL\nIn FTRL, missing values have their own representation for each datable\ncolumn type. These representations are used to hash the missing value,\nwith their column's name, to an integer. This means FTRL replaces\nmissing values with special constants that are the same for each column\ntype, and then treats these special constants like a normal data value. Unsupervised Algorithms\nFor unsupervised algorithms , standardization in the\npre-transformation layer (where it is decided which columns and column\nencodings are fed in for clustering) is performed by ignoring any\nmissing values. Scikit-learn\u2019s StandardScaler is used internally during\nthe standardization process. Missing values are then replaced with 0 for\nfurther calculations or clustering. How Does the Algorithm Handle Missing Values During Scoring (Production)? LightGBM, XGBoost, RuleFit\nIf missing data is present during training, these tree-based algorithms\nlearn the optimal direction for missing data for each split (left or\nright).", + "prompt_type": "plain" + }, + { + "output": "If no missing data is present during training (for a particular\nfeature), then the majority path is followed if the value is missing. GLM\nMissing values are replaced by the mean value (from training), same as\nin training. TensorFlow\nMissing values are replaced by the same value as specified during\ntraining (parameterized by tf_nan_impute_value). FTRL\nTo ensure consistency, FTRL treats missing values during scoring in\nexactly the same way as during training. Clustering in Transformers\nMissing values are replaced with the mean along each column. This is\nused only on numeric columns. Isolation Forest Anomaly Score Transformer\nIsolation Forest uses out-of-range imputation that fills missing values\nwith the values beyond the maximum. What Happens When You Try to Predict on a Categorical Level Not Seen During Training? XGBoost, LightGBM, RuleFit, TensorFlow, GLM\nDriverless AI's feature engineering pipeline will compute a numeric\nvalue for every categorical level present in the data, whether it's a\npreviously seen value or not.", + "prompt_type": "plain" + }, + { + "output": "enable_tensorflow_textcnn``\n\nEnable Word-Based CNN TensorFlow Models for NLP\n\nSpecify whether to use out-of-fold predictions from Word-based CNN\nTensorFlow models as transformers for NLP. This option is ignored if\nTensorFlow is disabled. We recommend that you disable this option on\nsystems that do not use GPUs.", + "prompt_type": "plain" + }, + { + "output": "enable_tensorflow_textbigru``\n\nEnable Word-Based BiGRU TensorFlow Models for NLP\n\nSpecify whether to use out-of-fold predictions from Word-based BiG-RU\nTensorFlow models as transformers for NLP. This option is ignored if\nTensorFlow is disabled. We recommend that you disable this option on\nsystems that do not use GPUs.", + "prompt_type": "plain" + }, + { + "output": "enable_tensorflow_charcnn``\n\nEnable Character-Based CNN TensorFlow Models for NLP\n\nSpecify whether to use out-of-fold predictions from Character-level CNN\nTensorFlow models as transformers for NLP. This option is ignored if\nTensorFlow is disabled. We recommend that you disable this option on\nsystems that do not use GPUs.", + "prompt_type": "plain" + }, + { + "output": "enable_pytorch_nlp_model``\n\nEnable PyTorch Models for NLP\n\nSpecify whether to enable pretrained PyTorch models and fine-tune them\nfor NLP tasks. This is set to Auto by default. You need to set this to\nOn if you want to use the PyTorch models like BERT for modeling. Only\nthe first text column will be used for modeling with these models. We\nrecommend that you disable this option on systems that do not use GPUs.", + "prompt_type": "plain" + }, + { + "output": "enable_pytorch_nlp_transformer``\n\nEnable pre-trained PyTorch Transformers for NLP\n\nSpecify whether to enable pretrained PyTorch models for NLP tasks. This\nis set to Auto by default, and is enabled for text-dominated problems\nonly. You need to set this to On if you want to use the PyTorch models\nlike BERT for feature engineering (via fitting a linear model on top of\npretrained embeddings). We recommend that you disable this option on\nsystems that do not use GPUs.\n\nNotes:\n\n- This setting requires an Internet connection.", + "prompt_type": "plain" + }, + { + "output": "pytorch_nlp_pretrained_models``\n\nSelect Which Pretrained PyTorch NLP Models to Use\n\nSpecify one or more pretrained PyTorch NLP models to use. Select from\nthe following:\n\n- bert-base-uncased (Default)\n- distilbert-base-uncased (Default)\n- xlnet-base-cased\n- xlm-mlm-enfr-1024\n- roberta-base\n- albert-base-v2\n- camembert-base\n- xlm-roberta-base\n\nNotes:\n\n- This setting requires an Internet connection.\n- Models that are not selected by default may not have MOJO support.\n- Using BERT-like models may result in a longer experiment completion\n time.", + "prompt_type": "plain" + }, + { + "output": "tensorflow_max_epochs_nlp``\n\nMax TensorFlow Epochs for NLP\n\nWhen building TensorFlow NLP features (for text data), specify the\nmaximum number of epochs to train feature engineering models with (it\nmight stop earlier). The higher the number of epochs, the higher the run\ntime. This value defaults to 2 and is ignored if TensorFlow models is\ndisabled.", + "prompt_type": "plain" + }, + { + "output": "enable_tensorflow_nlp_accuracy_switch``\n\nAccuracy Above Enable TensorFlow NLP by Default for All Models\n\nSpecify the accuracy threshold. Values equal and above will add all\nenabled TensorFlow NLP models at the start of the experiment for\ntext-dominated problems when the following NLP expert settings are set\nto Auto:\n\n- Enable word-based CNN TensorFlow models for NLP\n- Enable word-based BigRU TensorFlow models for NLP\n- Enable character-based CNN TensorFlow models for NLP\n\nIf the above transformations are set to ON, this parameter is ignored.\n\nAt lower accuracy, TensorFlow NLP transformations will only be created\nas a mutation. This value defaults to 5.", + "prompt_type": "plain" + }, + { + "output": "pytorch_nlp_fine_tuning_num_epochs``\n\nNumber of Epochs for Fine-Tuning of PyTorch NLP Models\n\nSpecify the number of epochs used when fine-tuning PyTorch NLP models.\nThis value defaults to 2.", + "prompt_type": "plain" + }, + { + "output": "pytorch_nlp_fine_tuning_batch_size``\n\nBatch Size for PyTorch NLP Models\n\nSpecify the batch size for PyTorch NLP models. This value defaults to\n10.\n\nNote: Large models and batch sizes require more memory.", + "prompt_type": "plain" + }, + { + "output": "pytorch_nlp_fine_tuning_padding_length``\n\nMaximum Sequence Length for PyTorch NLP Models\n\nSpecify the maximum sequence length (padding length) for PyTorch NLP\nmodels. This value defaults to 100.\n\nNote: Large models and padding lengths require more memory.", + "prompt_type": "plain" + }, + { + "output": "pytorch_nlp_pretrained_models_dir``\n\nPath to Pretrained PyTorch NLP Models\n\nSpecify a path to pretrained PyTorch NLP models. To get all available\nmodels, download\nhttp://s3.amazonaws.com/artifacts.h2o.ai/releases/ai/h2o/pretrained/bert_models.zip,\nthen extract the folder and store it in a directory on the instance\nwhere Driverless AI is installed:\n\n pytorch_nlp_pretrained_models_dir = /path/on/server/to/bert_models_folder", + "prompt_type": "plain" + }, + { + "output": "tensorflow_nlp_pretrained_embeddings_file_path-------------------------------------------------- .. container:: dropdown **Path to Pretrained Embeddings for TensorFlow NLP Models** Specify a path to pretrained embeddings that will be used for the TensorFlow NLP models. Note that this can be either a path in the local file system (/path/on/server/to/file.txt) or an S3 location (s3://``). Notes:\n - If an S3 location is specified, an S3 access key ID and S3 secret\n access key can also be specified with the\n tensorflow_nlp_pretrained_s3_access_key_id and\n tensorflow_nlp_pretrained_s3_secret_access_key expert settings\n respectively. - You can download the Glove embeddings from here and specify the\n local path in this box. - You can download the fasttext embeddings from here and specify the\n local path in this box. - You can also train your own custom embeddings. Please refer to\n this code sample for creating custom embeddings that can be passed\n on to this option.", + "prompt_type": "plain" + }, + { + "output": "tensorflow_nlp_pretrained_s3_access_key_id---------------------------------------------- .. container:: dropdown **S3 access key ID to use when**tensorflow_nlp_pretrained_embeddings_file_path**is set to an S3 location** Specify an S3 access key ID to use whentensorflow_nlp_pretrained_embeddings_file_path` is set to an S3 location. For more information, see :ref:`the entry on the tensorflow_nlp_pretrained_embeddings_file_path \n\n expert setting.", + "prompt_type": "plain" + }, + { + "output": "tensorflow_nlp_pretrained_s3_secret_access_key-------------------------------------------------- .. container:: dropdown **S3 secret access key to use when**tensorflow_nlp_pretrained_embeddings_file_path**is set to an S3 location** Specify an S3 secret access key to use whentensorflow_nlp_pretrained_embeddings_file_path` is set to an S3 location. For more information, see :ref:`the entry on the tensorflow_nlp_pretrained_embeddings_file_path \n\n expert setting.", + "prompt_type": "plain" + }, + { + "output": "tensorflow_nlp_pretrained_embeddings_trainable``\n\nFor TensorFlow NLP, Allow Training of Unfrozen Pretrained Embeddings\n\nSpecify whether to allow training of all weights of the neural network\ngraph, including the pretrained embedding layer weights. If this is\ndisabled, the embedding layer will be frozen. All other weights,\nhowever, will still be fine-tuned. This is disabled by default.", + "prompt_type": "plain" + }, + { + "output": "text_fraction_for_text_dominated_problem``\n\nFraction of Text Columns Out of All Features to be Considered a\nText-Dominanted Problem\n\nSpecify the fraction of text columns out of all features to be\nconsidered as a text-dominated problem. This value defaults to 0.3.\n\nSpecify when a string column will be treated as text (for an NLP\nproblem) or just as a standard categorical variable. Higher values will\nfavor string columns as categoricals, while lower values will favor\nstring columns as text. This value defaults to 0.3.", + "prompt_type": "plain" + }, + { + "output": "text_transformer_fraction_for_text_dominated_problem``\n\nFraction of Text per All Transformers to Trigger That Text Dominated\n\nSpecify the fraction of text columns out of all features to be\nconsidered a text-dominated problem. This value defaults to 0.3.", + "prompt_type": "plain" + }, + { + "output": "string_col_as_text_threshold``\n\nThreshold for String Columns to be Treated as Text\n\nSpecify the threshold value (from 0 to 1) for string columns to be\ntreated as text (0.0 - text; 1.0 - string). This value defaults to 0.3.", + "prompt_type": "plain" + }, + { + "output": "text_transformers_max_vocabulary_size``\n\nMax Size of the Vocabulary for Text Transformers\n\nMax number of tokens created during fitting of Tfidf/Count based text\ntransformers. If multiple values are provided, will use the first one\nfor initial models, and use remaining values during parameter tuning and\nfeature evolution. The default value is [1000, 5000]. Values smaller\nthan 10000 are recommended for speed.", + "prompt_type": "plain" + }, + { + "output": "Which Pipeline Should I Use? Driverless AI Python Scoring Pipeline\nDriverless AI Python Scoring Pipeline is implemented as a Python whl\nfile. While this allows for a single process scoring engine, the scoring\nservice is generally implemented as a client/server architecture and\nsupports interfaces for TCP and HTTP. When running the Python Scoring\nPipeline:\n - HTTP is supported by virtually any language. HTTP supports RESTful\n calls via curl, wget, or supported packages in various scripting\n languages. - TCP is a bit more complex, though faster. TCP also requires\n Thrift, which currently does not handle NAs. k-LIME reason codes and Shapley reason codes whl file can be obtained\nfor all models from MLI Standalone Python Scoring Pipeline from the MLI\nexperiment page. Driverless AI MOJO Scoring Pipeline\nDriverless AI MOJO Scoring Pipeline is flexible and is faster than the\nPython Scoring Pipeline. It requires some coding. The MOJO Scoring\nPipeline is available as either a Java runtime or a\nC++ runtime (with R and Python wrappers).", + "prompt_type": "plain" + }, + { + "output": "JDBC Setup\n\nDriverless AI lets you explore Java Database Connectivity (JDBC) data\nsources from within the Driverless AI application. This section provides\ninstructions for configuring Driverless AI to work with JDBC.\n\nNote: Depending on your Docker install version, use either the", + "prompt_type": "plain" + }, + { + "output": "docker run --runtime=nvidia(>= Docker 19.03) ornvidia-docker(< Docker 19.03) command when starting the Driverless AI Docker image. Usedocker versionto check which version of Docker you are using. Tested Databases ---------------- The following databases have been tested for minimal functionality. Note that JDBC drivers that are not included in this list should work with Driverless AI. We recommend that you test out your JDBC driver even if you do not see it on list of tested databases. See the :ref:`untested-jdbc-driver` section at the end of this chapter for information on how to try out an untested JDBC driver. - Oracle DB - PostgreSQL - Amazon Redshift - Teradata Description of Configuration Attributes --------------------------------------- -jdbc_app_configs: Configuration for the JDBC connector. This is a JSON/Dictionary String with multiple keys. **Note**: This requires a JSON key (typically the name of the database being configured) to be associated with a nested JSON that contains theurl,jarpath, andclasspathfields.", + "prompt_type": "plain" + }, + { + "output": "Double quotation marks (\"...\") must be used to denote keys and values *within* the JSON dictionary, and *outer* quotations must be formatted as either\"\"\",''', or'. Depending on how the configuration value is applied, different forms of outer quotations may be required. The following examples show two unique methods for applying outer quotations. - Configuration value applied with the config.toml file: :: jdbc_app_configs = \"\"\"{\"my_json_string\": \"value\", \"json_key_2\": \"value2\"}\"\"\" - Configuration value applied with an **environment variable**: :: DRIVERLESS_AI_JDBC_APP_CONFIGS='{\"my_json_string\": \"value\", \"json_key_2\": \"value2\"}' For example: :: DRIVERLESS_AI_JDBC_APP_CONFIGS='{ \"postgres\": {\"url\": \"jdbc:postgresql://192.xxx.x.xxx:aaaa:/name_of_database;user=name_of_user;password=your_password\",\"jarpath\": \"/config/postgresql-xx.x.x.jar\",\"classpath\": \"org.postgresql.Driver\"}, \"postgres-local\": {\"url\": \"jdbc:postgresql://123.xxx.xxx.xxx:aaaa/name_of_database\",\"jarpath\": \"/config/postgresql-xx.x.x.jar\",\"classpath\": \"org.postgresql.Driver\"}, \"ms-sql\": {\"url\": \"jdbc:sqlserver://192.xxx.x.xxx:aaaa;databaseName=name_of_database;user=name_of_user;password=your_password\",\"Username\":\"your_username\",\"passsword\":\"your_password\",\"jarpath\": \"/config/sqljdbc42.jar\",\"classpath\": \"com.microsoft.sqlserver.jdbc.SQLServerDriver\"}, \"oracle\": {\"url\": \"jdbc:oracle:thin:@192.xxx.x.xxx:aaaa/orclpdb1\",\"jarpath\": \"ojdbc7.jar\",\"classpath\": \"oracle.jdbc.OracleDriver\"}, \"db2\": {\"url\": \"jdbc:db2://127.x.x.x:aaaaa/name_of_database\",\"jarpath\": \"db2jcc4.jar\",\"classpath\": \"com.ibm.db2.jcc.DB2Driver\"}, \"mysql\": {\"url\": \"jdbc:mysql://192.xxx.x.xxx:aaaa;\",\"jarpath\": \"mysql-connector.jar\",\"classpath\": \"com.mysql.jdbc.Driver\"}, \"Snowflake\": {\"url\": \"jdbc:snowflake://.snowflakecomputing.com/?\",\"jarpath\": \"/config/snowflake-jdbc-x.x.x.jar\",\"classpath\": \"net.snowflake.client.jdbc.SnowflakeDriver\"}, \"Derby\": {\"url\": \"jdbc:derby://127.x.x.x:aaaa/name_of_database\",\"jarpath\": \"/config/derbyclient.jar\",\"classpath\": \"org.apache.derby.jdbc.ClientDriver\"} }'\\ -jdbc_app_jvm_args: Extra jvm args for JDBC connector.", + "prompt_type": "plain" + }, + { + "output": "-jdbc_app_classpath: Optionally specify an alternative classpath for the JDBC connector. -enabled_file_systems: The file systems you want to enable. This must be configured in order for data connectors to function properly. Retrieve the JDBC Driver ------------------------ 1. Download JDBC Driver JAR files: .. - `Oracle DB `__ - `PostgreSQL `__ - `Amazon Redshift `__ - `Teradata `__ **Note**: Remember to take note of the driver classpath, as it is needed for the configuration steps (for example, org.postgresql.Driver). 2. Copy the driver JAR to a location that can be mounted into the Docker container. .. **Note**: The folder storing the JDBC jar file must be visible/readable by the dai process user.", + "prompt_type": "plain" + }, + { + "output": "Note that the JDBC connection strings will vary depending on the database that is used. .. code:: bash nvidia-docker run \\ --pid=host \\ --init \\ --rm \\ --shm-size=256m \\ --add-host name.node:172.16.2.186 \\ -e DRIVERLESS_AI_ENABLED_FILE_SYSTEMS=\"file,hdfs,jdbc\" \\ -e DRIVERLESS_AI_JDBC_APP_CONFIGS='{\"postgres\": {\"url\": \"jdbc:postgres://localhost:5432/my_database\", \"jarpath\": \"/path/to/postgresql/jdbc/driver.jar\", \"classpath\": \"org.postgresql.Driver\"}}' \\ -e DRIVERLESS_AI_JDBC_APP_JVM_ARGS=\"-Xmx2g\" \\ -p 12345:12345 \\ -v /path/to/local/postgresql/jdbc/driver.jar:/path/to/postgresql/jdbc/driver.jar \\ -v /etc/passwd:/etc/passwd:ro \\ -v /etc/group:/etc/group:ro \\ -v /tmp/dtmp/:/tmp \\ -v /tmp/dlog/:/log \\ -v /tmp/dlicense/:/license \\ -v /tmp/ddata/:/data \\ -u $(id -u):$(id -g) \\ h2oai/dai-ubi8-x86_64:|tag| .. container:: group-tab Docker Image with the config.toml This example shows how to configure JDBC options in the config.toml file, and then specify that file when starting Driverless AI in Docker.", + "prompt_type": "plain" + }, + { + "output": "Configure the Driverless AI config.toml file. Set the following configuration options: .. .. code:: bash enabled_file_systems = \"file, upload, jdbc\" jdbc_app_configs = \"\"\"{\"postgres\": {\"url\": \"jdbc:postgres://localhost:5432/my_database\", \"jarpath\": \"/path/to/postgresql/jdbc/driver.jar\", \"classpath\": \"org.postgresql.Driver\"}}\"\"\" 2. Mount the config.toml file and requisite JAR files into the Docker container. .. .. code:: bash nvidia-docker run \\ --pid=host \\ --init \\ --rm \\ --shm-size=256m \\ --add-host name.node:172.16.2.186 \\ -e DRIVERLESS_AI_CONFIG_FILE=/path/in/docker/config.toml \\ -p 12345:12345 \\ -v /local/path/to/jdbc/driver.jar:/path/in/docker/jdbc/driver.jar \\ -v /local/path/to/config.toml:/path/in/docker/config.toml \\ -v /etc/passwd:/etc/passwd:ro \\ -v /etc/group:/etc/group:ro \\ -v /tmp/dtmp/:/tmp \\ -v /tmp/dlog/:/log \\ -v /tmp/dlicense/:/license \\ -v /tmp/ddata/:/data \\ -u $(id -u):$(id -g) \\ h2oai/dai-ubi8-x86_64:|tag| .. container:: group-tab Native Installs This example enables the JDBC connector for PostgresQL.", + "prompt_type": "plain" + }, + { + "output": "- The configuration requires a JSON key (typically the name of the database being configured) to be associated with a nested JSON that contains theurl,jarpath, andclasspathfields. In addition, this should take the format: :: \"\"\"{\"my_jdbc_database\": {\"url\": \"jdbc:my_jdbc_database://hostname:port/database\", \"jarpath\": \"/path/to/my/jdbc/database.jar\", \"classpath\": \"com.my.jdbc.Driver\"}}\"\"\" 1. Export the Driverless AI config.toml file or add it to ~/.bashrc. For example: .. :: # DEB and RPM export DRIVERLESS_AI_CONFIG_FILE=\"/etc/dai/config.toml\" # TAR SH export DRIVERLESS_AI_CONFIG_FILE=\"/path/to/your/unpacked/dai/directory/config.toml\" 2. Edit the following values in the config.toml file. .. :: # File System Support # upload : standard upload feature # file : local file system/server file system # hdfs : Hadoop file system, remember to configure the HDFS config folder path and keytab below # dtap : Blue Data Tap file system, remember to configure the DTap section below # s3 : Amazon S3, optionally configure secret and access key below # gcs : Google Cloud Storage, remember to configure gcs_path_to_service_account_json below # gbq : Google Big Query, remember to configure gcs_path_to_service_account_json below # minio : Minio Cloud Storage, remember to configure secret and access key below # snow : Snowflake Data Warehouse, remember to configure Snowflake credentials below (account name, username, password) # kdb : KDB+ Time Series Database, remember to configure KDB credentials below (hostname and port, optionally: username, password, classpath, and jvm_args) # azrbs : Azure Blob Storage, remember to configure Azure credentials below (account name, account key) # jdbc: JDBC Connector, remember to configure JDBC below.", + "prompt_type": "plain" + }, + { + "output": "(hive_app_configs) # recipe_url: load custom recipe from URL # recipe_file: load custom recipe from local file system enabled_file_systems = \"upload, file, hdfs, jdbc\" # Configuration for JDBC Connector. # JSON/Dictionary String with multiple keys. # Format as a single line without using carriage returns (the following example is formatted for readability). # Use triple quotations to ensure that the text is read as a single string. # Example: # \"\"\"{ # \"postgres\": { # \"url\": \"jdbc:postgresql://ip address:port/postgres\", # \"jarpath\": \"/path/to/postgres_driver.jar\", # \"classpath\": \"org.postgresql.Driver\" # }, # \"mysql\": { # \"url\":\"mysql connection string\", # \"jarpath\": \"/path/to/mysql_driver.jar\", # \"classpath\": \"my.sql.classpath.Driver\" # } # }\"\"\" jdbc_app_configs = \"\"\"{\"postgres\": {\"url\": \"jdbc:postgres://localhost:5432/my_database\", \"jarpath\": \"/path/to/postgresql/jdbc/driver.jar\", \"classpath\": \"org.postgresql.Driver\"}}\"\"\" # extra jvm args for jdbc connector jdbc_app_jvm_args = \"\" # alternative classpath for jdbc connector jdbc_app_classpath = \"\" 3.", + "prompt_type": "plain" + }, + { + "output": "Adding Datasets Using JDBC -------------------------- After the JDBC connector is enabled, you can add datasets by selecting **JDBC** from the **Add Dataset (or Drag and Drop)** drop-down menu. .. figure:: ../images/jdbc.png :alt: 1. Click on the **Add Dataset** button on the Datasets page. 2. Select **JDBC** from the list that appears. 3. Click on the **Select JDBC Connection** button to select a JDBC configuration. 4. The form will populate with the JDBC Database, URL, Driver, and Jar information. Complete the following remaining fields: .. - **JDBC Username**: Enter your JDBC username. - **JDBC Password**: Enter your JDBC password. (See the *Notes* section) - **Destination Name**: Enter a name for the new dataset. - (Optional) **ID Column Name**: Enter a name for the ID column. Specify this field when making large data queries. **Notes**: - Do not include the password as part of the JDBC URL. Instead, enter the password in the **JDBC Password** field.", + "prompt_type": "plain" + }, + { + "output": "- Due to resource sharing within Driverless AI, the JDBC Connector is only allocated a relatively small amount of memory. - When making large queries, the ID column is used to partition the data into manageable portions. This ensures that the maximum memory allocation is not exceeded. - If a query that is larger than the maximum memory allocation is made without specifying an ID column, the query will not complete successfully. 5. Write a SQL Query in the format of the database that you want to query. (See the `Query Examples <#queryexamples>`__ section below.) The format will vary depending on the database that is used. 6. Click the **Click to Make Query** button to execute the query. The time it takes to complete depends on the size of the data being queried and the network speeds to the database. On a successful query, you will be returned to the datasets page, and the queried data will be available as a new dataset.", + "prompt_type": "plain" + }, + { + "output": "Configuration: .. :: jdbc_app_configs = \"\"\"{\"oracledb\": {\"url\": \"jdbc:oracle:thin:@localhost:1521/oracledatabase\", \"jarpath\": \"/home/ubuntu/jdbc-jars/ojdbc8.jar\", \"classpath\": \"oracle.jdbc.OracleDriver\"}}\"\"\" 2. Sample Query: .. - Select **oracledb** from the **Select JDBC Connection** dropdown menu. - **JDBC Username**:oracleuser- **JDBC Password**:oracleuserpassword- **ID Column Name**: - **Query**: .. :: SELECT MIN(ID) AS NEW_ID, EDUCATION, COUNT(EDUCATION) FROM my_oracle_schema.creditcardtrain GROUP BY EDUCATION **Note**: Because this query does not specify an **ID Column Name**, it will only work for small data. However, the **NEW_ID** column can be used as the ID Column if the query is for larger data. 3. Click the **Click to Make Query** button to execute the query. .. container:: group-tab PostgreSQL 1. Configuration: .. :: jdbc_app_configs = \"\"\"{\"postgres\": {\"url\": \"jdbc:postgresql://localhost:5432/postgresdatabase\", \"jarpath\": \"/home/ubuntu/postgres-artifacts/postgres/Driver.jar\", \"classpath\": \"org.postgresql.Driver\"}}\"\"\" 2.", + "prompt_type": "plain" + }, + { + "output": "- **JDBC Username**:postgres_user- **JDBC Password**:pguserpassword- **ID Column Name**:id``\n - Query:\n 3. Click the Click to Make Query button to execute the query. Adding an Untested JDBC Driver\nWe encourage you to try out JDBC drivers that are not tested in house. Docker Image Installs\n1. Download the JDBC jar for your database. 2. Move your JDBC jar file to a location that DAI can access. 3. Start the Driverless AI Docker image using the JDBC-specific\n environment variables. nvidia-docker run \\\n --pid=host \\\n --init \\\n --rm \\\n --shm-size=256m \\\n --add-host name.node:172.16.2.186 \\\n -e DRIVERLESS_AI_ENABLED_FILE_SYSTEMS=\"upload,file,hdfs,s3,recipe_file,jdbc\" \\\n -e DRIVERLESS_AI_JDBC_APP_CONFIGS=\"\"\"{\"my_jdbc_database\": {\"url\": \"jdbc:my_jdbc_database://hostname:port/database\",\n \"jarpath\": \"/path/to/my/jdbc/database.jar\", \n \"classpath\": \"com.my.jdbc.Driver\"}}\"\"\"\\ \n -e DRIVERLESS_AI_JDBC_APP_JVM_ARGS=\"-Xmx2g\" \\\n -p 12345:12345 \\\n -v /path/to/local/postgresql/jdbc/driver.jar:/path/to/postgresql/jdbc/driver.jar \\\n -v /etc/passwd:/etc/passwd:ro \\\n -v /etc/group:/etc/group:ro \\\n -v /tmp/dtmp/:/tmp \\\n -v /tmp/dlog/:/log \\\n -v /tmp/dlicense/:/license \\\n -v /tmp/ddata/:/data \\\n -u $(id -u):$(id -g) \\\n h2oai/dai-ubi8-x86_64:|tag|\nDocker Image with the config.toml\n1.", + "prompt_type": "plain" + }, + { + "output": "Install and Run in a Docker Container on Google Compute Engine\nThis section describes how to install and start Driverless AI from\nscratch using a Docker container in a Google Compute environment. This installation assumes that you already have a Google Cloud Platform\naccount. If you don't have an account, go to\nhttps://console.cloud.google.com/getting-started to create one. In\naddition, refer to Google's Machine Types documentation for information\non Google Compute machine types. Watch the installation video here. Note that some of the images in this\nvideo may change between releases, but the installation steps remain the\nsame. Before You Begin\nIf you are trying GCP for the first time and have just created an\naccount, check your Google Compute Engine (GCE) resource quota limits. By default, GCP allocates a maximum of 8 CPUs and no GPUs. You can\nchange these settings to match your quota limit, or you can request more\nresources from GCP. Refer to https://cloud.google.com/compute/quotas for\nmore information, including information on how to check your quota and\nrequest additional quota.", + "prompt_type": "plain" + }, + { + "output": "In your browser, log in to the Google Compute Engine Console at\n https://console.cloud.google.com/. 2. In the left navigation panel, select Compute Engine > VM Instances. 3. Click Create Instance. 4. Specify the following at a minimum:\n5. Create a Firewall rule for Driverless AI. On the Google Cloud\n Platform left navigation panel, select VPC network > Firewall rules. Specify the following settings:\n6. On the VM Instances page, SSH to the new VM Instance by selecting\n Open in Browser Window from the SSH dropdown. 7. H2O provides a script for you to run in your VM instance. Open an\n editor in the VM instance (for example, vi). Copy one of the scripts\n below (depending on whether you are running GPUs or CPUs). Save the\n script as install.sh. 8. Type the following commands to run the install script. 9. In your user folder, create the following directories as your user. 10. Add your Google Compute user name to the Docker container. 11. Reboot the system to enable NVIDIA drivers.", + "prompt_type": "plain" + }, + { + "output": "Retrieve the Driverless AI Docker image from\n https://www.h2o.ai/download/. 13. Load the Driverless AI Docker image. The following example shows how\n to load Driverless AI. Replace VERSION with your image. 14. If you are running CPUs, you can skip this step. Otherwise, you must\n enable persistence of the GPU. Note that this needs to be run once\n every reboot. Refer to the following for more information:\n http://docs.nvidia.com/deploy/driver-persistence/index.html. 15. Start the Driverless AI Docker image and replace TAG below with the\n image tag. Depending on your install version, use the\n docker run --runtime=nvidia (>= Docker 19.03) or nvidia-docker (<\n Docker 19.03) command. Refer to Data Connectors for information on\n how to add the GCS and GBQ data connectors to your Driverless AI\n instance. 16. Connect to Driverless AI with your browser:\nStopping the GCE Instance\nThe Google Compute Engine instance will continue to run even when you\nclose the portal.", + "prompt_type": "plain" + }, + { + "output": "On the VM Instances page, click on the VM instance that you want to\n stop. 2. Click Stop at the top of the page. 3. A confirmation page will display. Click Stop to stop the instance. Stopping in Terminal\nSSH into the machine that is running Driverless AI, and then run the\nfollowing:\n h2oai stop\nUpgrading Driverless AI\nThis section provides instructions for upgrading Driverless AI versions\nthat were installed in a Docker container. These steps ensure that\nexisting experiments are saved. WARNING: Experiments, MLIs, and MOJOs reside in the Driverless AI tmp\ndirectory and are not automatically upgraded when Driverless AI is\nupgraded. - Build MLI models before upgrading. - Build MOJO pipelines before upgrading. - Stop Driverless AI and make a backup of your Driverless AI tmp\n directory before upgrading. If you did not build MLI on a model before upgrading Driverless AI,\n then you will not be able to view MLI on that model after upgrading. Before upgrading, be sure to run MLI jobs on models that you want to\n continue to interpret in future releases.", + "prompt_type": "plain" + }, + { + "output": "If you did not build a MOJO pipeline on a model before upgrading\n Driverless AI, then you will not be able to build a MOJO pipeline on\n that model after upgrading. Before upgrading, be sure to build MOJO\n pipelines on all desired models and then back up your Driverless AI\n tmp directory. Note: Stop Driverless AI if it is still running. Requirements\nWe recommend to have NVIDIA driver >= installed (GPU only) in your host\nenvironment for a seamless experience on all architectures, including\nAmpere. Driverless AI ships with CUDA 11.2.2 for GPUs, but the driver\nmust exist in the host environment. Go to NVIDIA download driver to get the latest NVIDIA Tesla A/T/V/P/K\nseries drivers. For reference on CUDA Toolkit and Minimum Required\nDriver Versions and CUDA Toolkit and Corresponding Driver Versions, see\nhere . Note\nUpgrade Steps\n1. SSH into the IP address of the machine that is running Driverless\n AI. 2. Set up a directory for the version of Driverless AI on the host\n machine:\n3.", + "prompt_type": "plain" + }, + { + "output": "Scorers\nClassification or Regression\nGINI (Gini Coefficient)\nThe Gini index is a well-established method to quantify the inequality\namong values of a frequency distribution, and can be used to measure the\nquality of a binary classifier. A Gini index of zero expresses perfect\nequality (or a totally useless classifier), while a Gini index of one\nexpresses maximal inequality (or a perfect classifier). The Gini index is based on the Lorenz curve. The Lorenz curve plots the\ntrue positive rate (y-axis) as a function of percentiles of the\npopulation (x-axis). The Lorenz curve represents a collective of models represented by the\nclassifier. The location on the curve is given by the probability\nthreshold of a particular model. (i.e., Lower probability thresholds for\nclassification typically lead to more true positives, but also to more\nfalse positives.) The Gini index itself is independent of the model and only depends on\nthe Lorenz curve determined by the distribution of the scores (or\nprobabilities) obtained from the classifier.", + "prompt_type": "plain" + }, + { + "output": "The R2 value varies between 0 and 1 where 0\nrepresents no correlation between the predicted and actual value and 1\nrepresents complete correlation. Calculating the R2 value for linear models is mathematically equivalent\nto 1\u2005\u2212\u2005SSE/SST (or 1\u2005\u2212\u2005residual sum of squares/total sum of squares). For all other models, this equivalence does not hold, so the 1\u2005\u2212\u2005SSE/SST\nformula cannot be used. In some cases, this formula can produce negative\nR2 values, which is mathematically impossible for a real number. Because\nDriverless AI does not necessarily use linear models, the R2 value is\ncalculated using the squared Pearson correlation coefficient. R2 equation:\n$$R2 = \\frac{\\sum_{i=1}^{n}(x_i-\\bar{x})(y_i-\\bar{y})}{\\sqrt{\\sum_{i=1}^{n}(x_i-\\bar{x})^2\\sum_{i=1}^{n}(y_i-\\bar{y})^2}}$$\nWhere:\n- x is the predicted target value\n- y is the actual target value\nMSE (Mean Squared Error)\nThe MSE metric measures the average of the squares of the errors or\ndeviations. MSE takes the distances from the points to the regression\nline (these distances are the \u201cerrors\u201d) and squaring them to remove any\nnegative signs.", + "prompt_type": "plain" + }, + { + "output": "MSE also gives more weight to larger differences. The bigger the error,\nthe more it is penalized. For example, if your correct answers are 2,3,4\nand the algorithm guesses 1,4,3, then the absolute error on each one is\nexactly 1, so squared error is also 1, and the MSE is 1. But if the\nalgorithm guesses 2,3,6, then the errors are 0,0,2, the squared errors\nare 0,0,4, and the MSE is a higher 1.333. The smaller the MSE, the\nbetter the model's performance. (Tip: MSE is sensitive to outliers. If\nyou want a more robust metric, try mean absolute error (MAE).) MSE equation:\n$$MSE = \\frac{1}{N} \\sum_{i=1}^{N}(y_i -\\hat{y}_i)^2$$\nRMSE (Root Mean Squared Error)\nThe RMSE metric evaluates how well a model can predict a continuous\nvalue. The RMSE units are the same as the predicted target, which is\nuseful for understanding if the size of the error is of concern or not. The smaller the RMSE, the better the model's performance. (Tip: RMSE is\nsensitive to outliers. If you want a more robust metric, try mean\nabsolute error (MAE).)", + "prompt_type": "plain" + }, + { + "output": "- y is the actual target value. - y\u0302 is the predicted target value. RMSLE (Root Mean Squared Logarithmic Error)\nThis metric measures the ratio between actual values and predicted\nvalues and takes the log of the predictions and actual values. Use this\ninstead of RMSE if an under-prediction is worse than an over-prediction. You can also use this when you don't want to penalize large differences\nwhen both of the values are large numbers. RMSLE equation:\n$$RMSLE = \\sqrt{\\frac{1}{N} \\sum_{i=1}^{N} \\big(ln \\big(\\frac{y_i +1} {\\hat{y}_i +1}\\big)\\big)^2 }$$\nWhere:\n- N is the total number of rows (observations) of your corresponding\n dataframe. - y is the actual target value. - y\u0302 is the predicted target value. RMSPE (Root Mean Square Percentage Error)\nThis metric is the RMSE expressed as a percentage. The smaller the\nRMSPE, the better the model performance. RMSPE equation:\n$$RMSPE = \\sqrt{\\frac{1}{N} \\sum_{i=1}^{N} \\frac{(y_i -\\hat{y}_i)^2 }{(y_i)^2}}$$\nMAE (Mean Absolute Error)\nThe mean absolute error is an average of the absolute errors.", + "prompt_type": "plain" + }, + { + "output": "The\nsmaller the MAE the better the model's performance. (Tip: MAE is robust\nto outliers. If you want a metric that is sensitive to outliers, try\nroot mean squared error (RMSE).) MAE equation:\n$$MAE = \\frac{1}{N} \\sum_{i=1}^{N} | x_i - x |$$\nWhere:\n- N is the total number of errors\n- |x_(i)\u2005\u2212\u2005x| equals the absolute errors. MAPE (Mean Absolute Percentage Error)\nMAPE measures the size of the error in percentage terms. It is\ncalculated as the average of the unsigned percentage error. MAPE equation:\n$$MAPE = \\big(\\frac{1}{N} \\sum \\frac {|Actual - Forecast |}{|Actual|} \\big) * 100$$\nBecause the MAPE measure is in percentage terms, it gives an indication\nof how large the error is across different scales. Consider the\nfollowing example:\n --------------------------------------------------------------------\n Actual Predicted Absolute Error Absolute Percentage Error\n ---------- ------------ ---------------- ---------------------------\n 5 1 4 80%\n 15,000 15,004 4 0.03%\n --------------------------------------------------------------------\nBoth records have an absolute error of 4, but this error could be\nconsidered \"small\" or \"big\" when you compare it to the actual value.", + "prompt_type": "plain" + }, + { + "output": "This is important when the actual values\ncan be 0 or near 0. Actual values near 0 cause the MAPE value to become\ninfinitely high. Because SMAPE includes both the actual and the\npredicted values, the SMAPE value can never be greater than 200%. Consider the following example:\n -----------------------\n Actual Predicted\n ---------- ------------\n 0.01 0.05\n 0.03 0.04\n -----------------------\nThe MAPE for this data is 216.67% but the SMAPE is only 80.95%. Both records have an absolute error of 4, but this error could be\nconsidered \"small\" or \"big\" when you compare it to the actual value. MER (Median Error Rate or Median Absolute Percentage Error)\nMER measures the median size of the error in percentage terms. It is\ncalculated as the median of the unsigned percentage error. MER equation:\n$$MER = \\big(median \\frac {|Actual - Forecast |}{|Actual|} \\big) * 100$$\nBecause the MER is the median, half the scored population has a lower\nabsolute percentage error than the MER, and half the population has a\nlarger absolute percentage error than the MER.", + "prompt_type": "plain" + }, + { + "output": "The MCC metric combines the true positives,\nfalse positives, true negatives, and false negatives using the equation\ndescribed below. A Driverless AI model will return probabilities, not predicted classes. To convert probabilities to predicted classes, a threshold needs to be\ndefined. Driverless AI iterates over possible thresholds to calculate a\nconfusion matrix for each threshold. It does this to find the maximum\nMCC value. Driverless AI's goal is to continue increasing this maximum\nMCC. Unlike metrics like Accuracy, MCC is a good scorer to use when the\ntarget variable is imbalanced. In the case of imbalanced data, high\nAccuracy can be found by predicting the majority class. Metrics like\nAccuracy and F1 can be misleading, especially in the case of imbalanced\ndata, because they do not consider the relative size of the four\nconfusion matrix categories. MCC, on the other hand, takes the\nproportion of each class into account. The MCC value ranges from -1 to 1\nwhere -1 indicates a classifier that predicts the opposite class from\nthe actual value, 0 means the classifier does no better than random\nguessing, and 1 indicates a perfect classifier.", + "prompt_type": "plain" + }, + { + "output": "To\nconvert probabilities to predicted classes, a threshold needs to be\ndefined. Driverless AI iterates over possible thresholds to calculate a\nconfusion matrix for each threshold. It does this to find the maximum F\nmetric value. Driverless AI's goal is to continue increasing this\nmaximum F metric. The F1 score provides a measure for how well a binary classifier can\nclassify positive cases (given a threshold value). The F1 score is\ncalculated from the harmonic mean of the precision and recall. An F1\nscore of 1 means both precision and recall are perfect and the model\ncorrectly identified all the positive cases and didn't mark a negative\ncase as a positive case. If either precision or recall are very low it\nwill be reflected with a F1 score closer to 0. F1 equation:\n$$F1 = 2 \\;\\Big(\\; \\frac{(precision) \\; (recall)}{precision + recall}\\; \\Big)$$\nWhere:\n- precision is the positive observations (true positives) the model\n correctly identified from all the observations it labeled as\n positive (the true positives + the false positives).", + "prompt_type": "plain" + }, + { + "output": "The F2 score is the weighted harmonic mean of the precision and recall\n(given a threshold value). Unlike the F1 score, which gives equal weight\nto precision and recall, the F2 score gives more weight to recall than\nto precision. More weight should be given to recall for cases where\nFalse Negatives are considered worse than False Positives. For example,\nif your use case is to predict which customers will churn, you may\nconsider False Negatives worse than False Positives. In this case, you\nwant your predictions to capture all of the customers that will churn. Some of these customers may not be at risk for churning, but the extra\nattention they receive is not harmful. More importantly, no customers\nactually at risk of churning have been missed. F2 equation:\n$$F2 = 5 \\;\\Big(\\; \\frac{(precision) \\; (recall)}{((4)\\;(precision)) + recall}\\; \\Big)$$\nWhere:\n- precision is the positive observations (true positives) the model\n correctly identified from all the observations it labeled as\n positive (the true positives + the false positives).", + "prompt_type": "plain" + }, + { + "output": "Accuracy\nIn binary classification, Accuracy is the number of correct predictions\nmade as a ratio of all predictions made. In multiclass classification,\nthe set of labels predicted for a sample must exactly match the\ncorresponding set of labels in y_true. A Driverless AI model will return probabilities, not predicted classes. To convert probabilities to predicted classes, a threshold needs to be\ndefined. Driverless AI iterates over possible thresholds to calculate a\nconfusion matrix for each threshold. It does this to find the maximum\nAccuracy value. Driverless AI's goal is to continue increasing this\nmaximum Accuracy. Accuracy equation:\n$$Accuracy = \\Big(\\; \\frac{\\text{number correctly predicted}}{\\text{number of observations}}\\; \\Big)$$\nLogloss\nThe logarithmic loss metric can be used to evaluate the performance of a\nbinomial or multinomial classifier. Unlike AUC which looks at how well a\nmodel can classify a binary target, logloss evaluates how close a\nmodel's predicted values (uncalibrated probability estimates) are to the\nactual target value.", + "prompt_type": "plain" + }, + { + "output": "Logloss can be any value greater than or equal to 0,\nwith 0 meaning that the model correctly assigns a probability of 0% or\n100%. Binary classification equation:\n$$Logloss = - \\;\\frac{1}{N} \\sum_{i=1}^{N}w_i(\\;y_i \\ln(p_i)+(1-y_i)\\ln(1-p_i)\\;)$$\nMulticlass classification equation:\n$$Logloss = - \\;\\frac{1}{N} \\sum_{i=1}^{N}\\sum_{j=1}^{C}w_i(\\;y_i,_j \\; \\ln(p_i,_j)\\;)$$\nWhere:\n- N is the total number of rows (observations) of your corresponding\n dataframe. - w is the per row user-defined weight (defaults is 1). - C is the total number of classes (C=2 for binary classification). - p is the predicted value (uncalibrated probability) assigned to a\n given row (observation). - y is the actual target value. AUC (Area Under the Receiver Operating Characteristic Curve)\nThis model metric is used to evaluate how well a binary classification\nmodel is able to distinguish between true positives and false positives. For multi-class problems, this score is computed by micro-averaging the\nROC curves for each class.", + "prompt_type": "plain" + }, + { + "output": "An AUC of 1 indicates a perfect classifier, while an AUC of .5 indicates\na poor classifier whose performance is no better than random guessing. AUCPR (Area Under the Precision-Recall Curve)\nThis model metric is used to evaluate how well a binary classification\nmodel is able to distinguish between precision recall pairs or points. These values are obtained using different thresholds on a probabilistic\nor other continuous-output classifier. AUCPR is an average of the\nprecision-recall weighted by the probability of a given threshold. The main difference between AUC and AUCPR is that AUC calculates the\narea under the ROC curve and AUCPR calculates the area under the\nPrecision Recall curve. The Precision Recall curve does not care about\nTrue Negatives. For imbalanced data, a large quantity of True Negatives\nusually overshadows the effects of changes in other metrics like False\nPositives. The AUCPR will be much more sensitive to True Positives,\nFalse Positives, and False Negatives than AUC.", + "prompt_type": "plain" + }, + { + "output": "MACROAUC (Macro Average of Areas Under the Receiver Operating Characteristic Curves)\nFor multiclass classification problems, this score is computed by\nmacro-averaging the ROC curves for each class (one per class). The area\nunder the curve is a constant. A MACROAUC of 1 indicates a perfect\nclassifier, while a MACROAUC of .5 indicates a poor classifier whose\nperformance is no better than random guessing. This option is not\navailable for binary classification problems. Scorer Best Practices - Regression\nWhen deciding which scorer to use in a regression problem, consider the\nfollowing:\n- Do you want your scorer to be sensitive to outliers? - What unit should the scorer be in? Sensitive to Outliers\nCertain scorers are more sensitive to outliers. When a scorer is\nsensitive to outliers, it means that it is important that the model\npredictions are never exceedingly inaccurate. For example, say you have\nan experiment predicting the number of days until an event. The graph\nbelow shows the absolute error in your predictions.", + "prompt_type": "plain" + }, + { + "output": "RMSEdrops down significantly. **Performance Units** Different scorers show the performance of the Driverless AI experiment in different units. This section continues with the previous example where the target is to predict the number of days until an event. Some possible performance units are: - Same as target: The unit of the scorer is in days - ex: MAE = 5 means the model predictions are off by 5 days on average - Percent of target: The unit of the scorer is the percent of days - ex: MAPE = 10% means the model predictions are off by 10 percent on average - Square of target: The unit of the scorer is in days squared - ex: MSE = 25 means the model predictions are off by 5 days on average (square root of 25 = 5) **Comparison** +-------------+----------+--------------------------+-------------+ | Metric | Units | Sensitive to Outliers | Tip | +=============+==========+==========================+=============+ | R2 | Scaled | No | Use when | | | between | | you want | | | 0 and 1 | | performance | | | | | scaled | | | | | between 0 | | | | | and 1 | +-------------+----------+--------------------------+-------------+ | MSE | Square | Yes | | | | of | | | | | target | | | +-------------+----------+--------------------------+-------------+ | RMSE | Same as | Yes | | | | target | | | +-------------+----------+--------------------------+-------------+ | RMSLE | Log of | Yes | | | | target | | | +-------------+----------+--------------------------+-------------+ | RMSPE | Percent | Yes | Use when | | | of | | target | | | target | | values are | | | | | across | | | | | different | | | | | scales | +-------------+----------+--------------------------+-------------+ | MAE | Same as | No | | | | target | | | +-------------+----------+--------------------------+-------------+ | MAPE | Percent | No | Use when | | | of | | target | | | target | | values are | | | | | across | | | | | different | | | | | scales | +-------------+----------+--------------------------+-------------+ | SMAPE | Percent | No | Use when | | | of | | target | | | target | | values are | | | divided | | close to 0 | | | by 2 | | | +-------------+----------+--------------------------+-------------+ Scorer Best Practices - Classification -------------------------------------- When deciding which scorer to use in a classification problem, consider the following: - Do you want the scorer to evaluate the predicted probabilities or the classes that those probabilities can be converted to?", + "prompt_type": "plain" + }, + { + "output": "**Scorer Evaluates Probabilities or Classes** The final output of a Driverless AI model is a predicted probability that a record is in a particular class. The scorer you choose either evaluates how accurate the probability is or how accurate the assigned class is from that probability. Choosing this depends on the use of the Driverless AI model. Do you want to use the probabilities, or do you want to convert those probabilities into classes? For example, if you are predicting whether a customer will churn, you may take the predicted probabilities and turn them into distinct classes\u2014customers who will churn vs customers who will not churn. If you are predicting the expected loss of revenue, use the predicted probabilities instead (predicted probability of churn \\* value of customer). If your use case requires a class assigned to each record, select a scorer that evaluates the model's performance based on how well it classifies the records. If your use case uses the probabilities, select a scorer that evaluates the model's performance based on the predicted probability.", + "prompt_type": "plain" + }, + { + "output": "Modifying Datasets With Recipes\nDriverless AI lets you create a new dataset by\nmodifying an existing dataset with a data recipe . This example shows you how to create a new dataset with the Live Code\noption. 1. Navigate to the Datasets page, then click on the dataset you want to\n modify. 2. Click Details from the submenu that appears to open the Dataset\n Details page. 3. Click the Modify by Recipe button in the top right portion of the\n UI, then click Live Code from the submenu that appears. 4. Enter the code for the data recipe you want to use to modify the\n dataset. Click the Get Preview button to see a preview of how the\n data recipe will modify the dataset. In this example, the data\n recipe modifies the number of rows and columns in the dataset. 5. To download the entered code script as a .py file, click the\n Download button. 6. Click the Apply button to confirm the changes and create a new\n dataset. (The original dataset is still available on the Datasets\n page.)", + "prompt_type": "plain" + }, + { + "output": "Using Multiple Authenticators\n\nDriverless AI lets you enable multiple authentication methods at the\nsame time. The following are some examples of when this can be useful:\n\n- When you want to use single sign-on (SSO) options for the front-end\n and also give users direct access with credentials for headless\n setups like the Driverless AI Python client.\n- When you want to allow access to users that are not managed by the\n provider of the primary authentication option.\n\nTo enable additional authentications methods, use the", + "prompt_type": "plain" + }, + { + "output": "additional_authentication_methods:ref:`config.toml ` setting. **Note**: In order to let users access their data when using multiple authenticators, usernames for all of the enabled authentication methods need to match one another. Multiple Authentication Methods Example --------------------------------------- In this example, a user wants to use OpenID Connect authentication on the front-end and also let users use LDAP credentials to gain access with the Driverless AI Python client. To enable both authentication methods, use the :ref:`config.toml file ` to set the following parameters: :: authentication_method = \"openid\" additional_authentication_methods = \"['ldap']\" # Configure OpenID Connect auth_openid_provider_base_uri = ... # Configure LDAP ldap_server = ... The primary authentication method's login page is available on the standard/loginpath. All of the enabled authentication methods can be used on path/login/``.", + "prompt_type": "plain" + }, + { + "output": "Driverless AI Transformations\nTransformations in Driverless AI are applied to columns in the data. The\ntransformers create the engineered features in\nexperiments. Driverless AI provides a number of transformers. The downloaded\nexperiment logs include the transformations that were applied to your\nexperiment. Notes:\n- You can include or exclude specific transformers in your Driverless\n AI environment using the included_transformers or\n excluded_transformers config options. - You can control which transformers to use in individual experiments\n with the included_transformers Expert Setting in Recipe panel. - You can set transformers to be used as pre-processing transformers\n with the included_pretransformers Expert Setting in Recipe panel. Additional layers can be added with the num_pipeline_layers Expert\n Setting in Recipe panel. - An alternative to transformers that gives more flexibility (but has\n no fitted state) are data recipes, controlled by the included_datas\n Expert Setting in Recipe panel.", + "prompt_type": "plain" + }, + { + "output": "- Numeric Transformers \n- Categorical Transformers \n- Time and Date Transformers \n- Time Series Transformers \n- NLP (text) Transformers \n- Image Transformers \n- Autoviz Recommendation Transformer \nTransformed Feature Naming Convention\nTransformed feature names are encoded as follows:\n _::<...>:.\nFor example in 32_NumToCatTE:BILL_AMT1:EDUCATION:MARRIAGE:SEX.0 :\n - 32_ is the transformation index for specific transformation\n parameters. - NumToCatTE is the transformer name. - BILL_AMT1:EDUCATION:MARRIAGE:SEX represents original features\n used. - 0 is the extra and represents the likelihood encoding for\n target[0] after grouping by features (shown here as BILL_AMT1,\n EDUCATION, MARRIAGE and SEX) and making out-of-fold estimates.", + "prompt_type": "plain" + }, + { + "output": "For binary experiments,\n this value is always 0. Numeric Transformers (Integer, Real, Binary)\n- ClusterDist Transformer\n The Cluster Distance Transformer clusters selected numeric columns\n and uses the distance to a specific cluster as a new feature. - ClusterDist cuML Transformer\n The Cluster Distance cuML Transformer runs on GPUs to train cuML\n accelerated k-means clustering to create clusters on selected\n numeric columns and uses the distance to a specific cluster as a\n new feature. - ClusterTE Transformer\n The Cluster Target Encoding Transformer clusters selected numeric\n columns and calculates the mean of the response column for each\n cluster. The mean of the response is used as a new feature. Cross\n Validation is used to calculate mean response to prevent\n overfitting. - DBSCAN cuML Transformer\n DBSCAN cuML Transformer runs on GPUs to train cuML accelerated\n DBSCAN model on selected numeric columns and uses the output\n cluster label as a new feature.", + "prompt_type": "plain" + }, + { + "output": "This transformation uses a smart search to identify which feature\n pairs to transform. Only interactions that improve the baseline\n model score are kept. - InteractionsSimple Transformer\n The InteractionsSimple Transformer adds, divides, multiplies, and\n subtracts two numeric columns in the data to create a new feature. This transformation randomly selects pairs of features to\n transform. - NumCatTE Transformer\n The Numeric Categorical Target Encoding Transformer calculates the\n mean of the response column for several selected columns. If one\n of the selected columns is numeric, it is first converted to\n categorical by binning. The mean of the response column is used as\n a new feature. Cross Validation is used to calculate mean response\n to prevent overfitting. - NumToCatTE Transformer\n The Numeric to Categorical Target Encoding Transformer converts\n numeric columns to categoricals by binning and then calculates the\n mean of the response column for each group.", + "prompt_type": "plain" + }, + { + "output": "Cross Validation is\n used to calculate mean response to prevent overfitting. - NumToCatWoEMonotonic Transformer\n The Numeric to Categorical Weight of Evidence Monotonic\n Transformer converts a numeric column to categorical by binning\n and then calculates Weight of Evidence for each bin. The monotonic\n constraint ensures the bins of values are monotonically related to\n the Weight of Evidence value. The Weight of Evidence is used as a\n new feature. Weight of Evidence measures the \u201cstrength\u201d of a\n grouping for separating good and bad risk and is calculated by\n taking the log of the ratio of distributions for a binary response\n column. - NumToCatWoE Transformer\n The Numeric to Categorical Weight of Evidence Transformer converts\n a numeric column to categorical by binning and then calculates\n Weight of Evidence for each bin. The Weight of Evidence is used as\n a new feature. Weight of Evidence measures the \u201cstrength\u201d of a\n grouping for separating good and bad risk and is calculated by\n taking the log of the ratio of distributions for a binary response\n column.", + "prompt_type": "plain" + }, + { + "output": "- TruncSVDNum Transformer\n Truncated SVD Transformer trains a Truncated SVD model on selected\n numeric columns and uses the components of the truncated SVD\n matrix as new features. - TruncSVDNum cuML Transformer\n The Truncated SVD cuML Transformer runs on GPUs to train cuML\n accelerates Truncated SVD model on selected numeric columns and\n uses the components of the truncated SVD matrix as new features. Time Series Experiments Transformers\n- DateOriginal Transformer\n The Date Original Transformer retrieves date values such as year,\n quarter, month, day, day of the year, week, and weekday values. - DateTimeOriginal Transformer\n The Date Time Original Transformer retrieves date and time values\n such as year, quarter, month, day, day of the year, week, weekday,\n hour, minute, and second values. - EwmaLags Transformer\n The Exponentially Weighted Moving Average (EWMA) Transformer\n calculates the exponentially weighted moving average of target or\n feature lags.", + "prompt_type": "plain" + }, + { + "output": "The aggregation\n is used as a new feature. - LagsInteraction Transformer\n The Lags Interaction Transformer creates target/feature lags and\n calculates interactions between the lags (lag2 - lag1, for\n instance). The interaction is used as a new feature. - Lags Transformer\n The Lags Transformer creates target/feature lags, possibly over\n groups. Each lag is used as a new feature. Lag transformers may\n apply to categorical (strings) features or binary/multiclass\n string valued targets after they have been internally numerically\n encoded. - LinearLagsRegression Transformer\n The Linear Lags Regression transformer trains a linear model on\n the target or feature lags to predict the current target or\n feature value. The linear model prediction is used as a new\n feature. Categorical Transformers (String)\n- Cat Transformer\n The Cat Transformer sorts a categorical column in lexicographical\n order and uses the order index created as a new feature.", + "prompt_type": "plain" + }, + { + "output": "- CatOriginal Transformer\n The Categorical Original Transformer applies an identity\n transformation that leaves categorical features as they are. This\n transformer works with models that can handle non-numeric feature\n values. - CVCatNumEncode Transformer\n The Cross Validation Categorical to Numeric Encoding Transformer\n calculates an aggregation of a numeric column for each value in a\n categorical column (ex: calculate the mean Temperature for each\n City) and uses this aggregation as a new feature. - CVTargetEncode Transformer\n The Cross Validation Target Encoding Transformer calculates the\n mean of the response column for each value in a categorical column\n and uses this as a new feature. Cross Validation is used to\n calculate mean response to prevent overfitting. - Frequent Transformer\n The Frequent Transformer calculates the frequency for each value\n in categorical column(s) and uses this as a new feature.", + "prompt_type": "plain" + }, + { + "output": "- LexiLabelEncoder Transformer\n The Lexi Label Encoder sorts a categorical column in\n lexicographical order and uses the order index created as a new\n feature. - NumCatTE Transformer\n The Numeric Categorical Target Encoding Transformer calculates the\n mean of the response column for several selected columns. If one\n of the selected columns is numeric, it is first converted to\n categorical by binning. The mean of the response column is used as\n a new feature. Cross Validation is used to calculate mean response\n to prevent overfitting. - OneHotEncoding Transformer\n The One-hot Encoding transformer converts a categorical column to\n a series of Boolean features by performing one-hot encoding. The\n Boolean features are used as new features. If there are more than\n a specific number of unique values in the column, then they will\n be binned to the max number (10 by default) in lexicographical\n order. This value can be changed with the ohe_bin_list config.toml\n configuration option.", + "prompt_type": "plain" + }, + { + "output": "- WeightOfEvidence Transformer\n The Weight of Evidence Transformer calculates Weight of Evidence\n for each value in categorical column(s). The Weight of Evidence is\n used as a new feature. Weight of Evidence measures the \u201cstrength\u201d\n of a grouping for separating good and bad risk and is calculated\n by taking the log of the ratio of distributions for a binary\n response column. []\n This only works with a binary target variable. The likelihood\n needs to be created within a stratified k-fold if a fit_transform\n method is used. More information can be found here:\n http://ucanalytics.com/blogs/information-value-and-weight-of-evidencebanking-case/. Text Transformers (String)\n- BERT Transformer\n The Bidirectional Encoder Representations from Transformers (BERT)\n Transformer creates new features for each text column based on the\n pre-trained model embeddings and is ideally suited for datasets\n that contain additional important non-text features.", + "prompt_type": "plain" + }, + { + "output": "The GRU prediction is used as a new\n feature. Cross Validation is used when training the GRU model to\n prevent overfitting. - TextCharCNN Transformer\n The Text Character CNN Transformer trains a CNN TensorFlow model\n on character embeddings created from a text feature to predict the\n response column. The CNN prediction is used as a new feature. Cross Validation is used when training the CNN model to prevent\n overfitting. - TextCNN Transformer\n The Text CNN Transformer trains a CNN TensorFlow model on word\n embeddings created from a text feature to predict the response\n column. The CNN prediction is used as a new a feature. Cross\n Validation is used when training the CNN model to prevent\n overfitting. - TextLinModel Transformer\n The Text Linear Model Transformer trains a linear model on a\n TF-IDF matrix created from a text feature to predict the response\n column. The linear model prediction is used as a new feature.", + "prompt_type": "plain" + }, + { + "output": "- Text Transformer\n The Text Transformer tokenizes a text column and creates a TFIDF\n matrix (term frequency-inverse document frequency) or count (count\n of the word) matrix. When the number of TF-IDF features exceeds\n the config TOML value in the list text_gene_dim_reduction_choices,\n dimensionality reduction is performed using truncated SVD. Selected components of the TF-IDF/Count matrix are used as new\n features. - TextOriginal Transformer\n The TextOriginal Transformer performs no feature engineering on\n the text column. Note that this transformer is only available for\n models that have text feature support. Models that have text\n feature support are ImageAutoModel, FTRL, BERT, and unsupervised\n models, in addition to custom model recipes where _can_handle_text\n is set to True. Time Transformers (Date, Time)\n- Dates Transformer\n The Dates Transformer retrieves any date values, including:\n - Year\n - Quarter\n - Month\n - Day\n - Day of year\n - Week\n - Week day\n - Hour\n - Minute\n - Second\n- IsHoliday Transformer\n The Is Holiday Transformer determines if a date column is a\n holiday.", + "prompt_type": "plain" + }, + { + "output": "Creates a separate feature for holidays in\n the United States, United Kingdom, Germany, Mexico, and the\n European Central Bank. Other countries available in the python\n Holiday package can be added via the configuration file. Image Transformers\n- ImageOriginal Transformer\n The Image Original Transformer passes image paths to the model\n without performing any feature engineering. - ImageVectorizer Transformer\n The Image Vectorizer Transformer uses pre-trained ImageNet models\n to convert a column with an image path or URI to an embeddings\n (vector) representation that is derived from the last global\n average pooling layer of the model. Note: Fine-tuning of the pre-trained image models can be enabled\n with the image-model-fine-tune expert setting. Autoviz Recommendation Transformer\nThe Autoviz recommendation transformer applies the recommended\ntransformations obtained by\nvisualizing the dataset in Driverless AI .", + "prompt_type": "plain" + }, + { + "output": "The\nautoviz_recommended_transformation \nin the expert experiment settings list/control the transformation\napplied. The syntax is a dict of transformations from Autoviz\n{column_name: transformation} like\n{\"DIS\":\"log\",\"INDUS\":\"log\",\"RAD\":\"inverse\",\"ZN\":\"square_root\"}. The\nAutoviz recommendation transformer itself can be enabled or disabled\nfrom the expert panel by included_transformers \nconfig setting. This transformer is supported in\npython scoring pipelines and\nmojo scoring pipelines with Java Runtime (no C++ support\nat the moment). Example Transformations\nIn this section, we will describe some of the available transformations\nusing the example of predicting house prices on the example dataset. -------------------------------------------------------------------\n Date Built Square Footage Num Beds Num Baths State Price\n ------------ --------------- ---------- ----------- ------- -------\n 01/01/1920 1700 3 2 NY $700K\n -------------------------------------------------------------------\nFrequent Transformer\n- the count of each categorical value in the dataset\n- the count can be either the raw count or the normalized count\n -------------------------------------------------------------------\n Date Square Num Beds Num Baths S tate Price Fr\n Built Footage eq_State\n --------- ------------ -------- --------- ------ ------- ----------\n 01/ 1700 3 2 NY 70 4,500\n 01/1920 0,000 \n -------------------------------------------------------------------\nThere are 4,500 properties in this dataset with state = NY.", + "prompt_type": "plain" + }, + { + "output": "Truncated SVD Numeric Transformer\n- truncated SVD trained on selected numeric columns of the data\n- the components of the truncated SVD will be new features\n ---------------------------------------------------------------------\n Date Square Num Num St P rice TruncSVD_Price\n Built Footage Beds Baths ate _NumBeds_NumBaths_1\n -------- ---------- ------ ------- ----- ------ ---------------------\n 01/0 1700 3 2 NY 700 0.632\n 1/1920 ,000 \n ---------------------------------------------------------------------\nThe first component of the truncated SVD of the columns Price, Number of\nBeds, Number of Baths. Dates Transformer\n- get year, get quarter, get month, get day, get day of year, get\n week, get week day, get hour, get minute, get second\n --------------------------------------------------------------------\n Date Square Num Beds Num St Price Date\n Built Footage Baths ate Built_Month\n --------- ------------ -------- -------- ----- ------- -------------\n 01/ 1700 3 2 NY 70 1\n 01/1920 0,000 \n --------------------------------------------------------------------\nThe home was built in the month January.", + "prompt_type": "plain" + }, + { + "output": "*In order to prevent overfitting, Driverless AI calculates this average\non out-of-fold data using cross validation. Numeric to Categorical Target Encoding Transformer\n- numeric column converted to categorical by binning\n- cross validation target encoding done on the binned numeric column\n -------------------------------------------------------------------\n Date Square Num Num St P rice CV_TE\n Built Footage Beds Baths ate _SquareFootage\n -------- ----------- ------- -------- ----- ------ ----------------\n 01/0 1700 3 2 NY 700 345,000\n 1/1920 ,000 \n -------------------------------------------------------------------\nThe column Square Footage has been bucketed into 10 equally populated\nbins. This property lies in the Square Footage bucket 1,572 to 1,749. The average price of properties with this range of square footage is\n$345,000*. *In order to prevent overfitting, Driverless AI calculates this average\non out-of-fold data using cross validation.", + "prompt_type": "plain" + }, + { + "output": "Driverless AI release blogs\nLooking for the latest news on H2O Driverless AI releases? Find it here\nin a single convenient location. Driverless AI 1.10.4\nVersion 1.10.4 brings several new features that make it simpler for you\nto take advantage of the predictive modeling capabilities of DAI. For a\nfull list of changes and accompanying documentation, see version_1104. Read more: What's new in version 1.10.4\nDriverless AI GUI-based wizards\nSeveral new GUI-based wizards have been added to DAI as part of this\nrelease. - Experiment wizard: This wizard guides you step-by-step through to\n process of setting up and starting an experiment. For users who\n aren't already familiar with using DAI, the experiment wizard is a\n great way to start running experiments without having to worry about\n whether you've set up your experiment correctly. If you're an experienced user of DAI, you can still take advantage\n of this wizard to ensure that every aspect of your experiment has\n been configured correctly, especially in cases where you're\n attempting to set up more complex experiments.", + "prompt_type": "plain" + }, + { + "output": "To access the experiment wizard, go to the Experiments page and\n click New Experiment -> Wizard Setup. - Dataset join wizard: The process of joining two datasets together\n can sometimes be difficult, depending on the size and complexity of\n the datasets. This wizard guides you through this process so that\n you can be sure that the datasets are joined correctly. To access the Dataset Join Wizard, go to the Datasets page and\n click on the name of the dataset, then click Join Wizard from the\n list of options. - Leaderboard wizard: This wizard helps you set up and perform a\n business value analysis of all models in a project. To access the\n Leaderboard wizard, go to a project and click the Analyze Results\n button. []\nExpert Settings redesign\nThe Expert Settings window has been redesigned to make it simpler to\nnavigate and locate specific settings that are relevant to your\nexperiment. By clicking the Filter by Tags button, you can now also\nfilter the list of available settings by specific tags.", + "prompt_type": "plain" + }, + { + "output": "LDAP Authentication Example\nThis section describes how to enable Lightweight Directory Access\nProtocol in Driverless AI. The available parameters can be specified as\nenvironment variables when starting the Driverless AI Docker image, or\nthey can be set via the config.toml file for native installs. Upon\ncompletion, all the users in the configured LDAP should be able to log\nin to Driverless AI and run experiments, visualize datasets, interpret\nmodels, etc. Note: Driverless AI does not support LDAP client auth. If you have LDAP\nclient auth enabled, then the Driverless AI LDAP connector will not\nwork. Description of Configuration Attributes\nThe following options can be specified when enabling LDAP\nauthentication. - ldap_server: The LDAP server domain or IP. - ldap_port: The LDAP server port. - ldap_bind_dn: The complete distinguished name (DN) of the LDAP bind\n user. - ldap_bind_password: The password for the LDAP bind. - ldap_tls_file: The Transport Layer Security (TLS) certificate file\n location.", + "prompt_type": "plain" + }, + { + "output": "- ldap_search_base: The location in the Directory Information Tree\n (DIT) where the search will start. - ldap_search_filter: A string that describes what you are searching\n for. You can use Python substitution to have this constructed\n dynamically. (Only {{DAI_USERNAME}} is supported. For example,\n \"(&(objectClass=person)(cn:dn:={{DAI_USERNAME}}))\".) - ldap_search_attributes: LDAP attributes to return from search. - ldap_user_name_attribute=\"uid\": Specify the key to find user name. LDAP without SSL\nThe following examples describe how to enable LDAP without SSL when\nrunning Driverless AI in the Docker image or through native installs. If\nthe configuration and authentication authentication are successful, the\nuser can access Driverless AI and run experiments, visualize datasets,\ninterpret models, etc. Docker Image Installs\nThe following example shows how to configure LDAP without SSL when\nstarting the Driverless AI Docker image. nvidia-docker run \\\n --pid=host \\\n --init \\\n --rm \\\n --shm-size=256m \\\n -p 12345:12345 \\\n -u `id -u`:`id -g` \\\n -e DRIVERLESS_AI_ENABLED_FILE_SYSTEMS=\"file,s3,hdfs\" \\\n -e DRIVERLESS_AI_AUTHENTICATION_METHOD=\"ldap\" \\\n -e DRIVERLESS_AI_LDAP_USE_SSL=\"false\" \\\n -e DRIVERLESS_AI_LDAP_SERVER=\"ldap.forumsys.com\" \\\n -e DRIVERLESS_AI_LDAP_PORT=\"389\" \\\n -e DRIVERLESS_AI_LDAP_SEARCH_BASE=\"dc=example,dc=com\" \\\n -e DRIVERLESS_AI_LDAP_BIND_DN=\"cn=read-only-admin,dc=example,dc=com\" \\ \n -e DRIVERLESS_AI_LDAP_BIND_PASSWORD=password \\ \n -e DRIVERLESS_AI_LDAP_SEARCH_FILTER=\"(&(objectClass=person)(cn:dn:={{DAI_USERNAME}}))\" \\\n -e DRIVERLESS_AI_LDAP_USER_NAME_ATTRIBUTE=\"uid\" \\\n -v `pwd`/data:/data \\\n -v `pwd`/log:/log \\\n -v `pwd`/license:/license \\\n -v `pwd`/tmp:/tmp \\\n h2oai/dai-ubi8-x86_64:|tag|\nNative Installs\nThe following example shows how to configure LDAP without SSL when\nstarting Driverless AI from a native install.", + "prompt_type": "plain" + }, + { + "output": "1. Export the Driverless AI config.toml file or add it to ~/.bashrc. For example:\n2. Enable LDAP authentication without SSL. 3. Start (or restart) Driverless AI. Note that the command used to\n start Driverless AI varies depending on your install type. If authentication is successful, the user can access Driverless AI and\nrun experiments, visualize datasets, interpret models, etc. LDAP with SSL\nThese examples show how to enable LDAP authentication with SSL and\nadditional parameters that can be specified as environment variables\nwhen starting the Driverless AI Docker image, or they can be set via the\nconfig.toml file for native installs. Upon completion, all the users in\nthe configured LDAP should be able to log in to Driverless AI and run\nexperiments, visualize datasets, interpret models, etc. Docker Image Installs\nSpecify the following LDAP environment variables when starting the\nDriverless AI Docker image. This example enables LDAP authentication and\nshows how to specify additional options enabling SSL.", + "prompt_type": "plain" + }, + { + "output": "Leaderboards\nDriverless AI provides a feature to automatically create leaderboards. The Create Leaderboard feature runs multiple diverse experiments that\nprovide an overview of the dataset. This feature also provides you with\nrelevant information for deciding on complexity, accuracy, size, and\ntime tradeoffs when putting models into production. Refer to the\nexpert-settings topic for information on expert settings that can be\nused to control this feature. For more information on the default models\nbuilt for a leaderboard, see leaderboard_models. The built models are placed under the projects page and can be\nsimultaneously scored on the test dataset and compared. Creating a Leaderboard\nCreating a Leaderboard is similar to running a\nnew experiment . Refer to the experiment_settings,\nexpert-settings, and scorers topics for more information about options\nyou can set when running an experiment. 1. On the Datasets page, select the dataset that you want to use for\n the experiment, then click Predict\n or\n On the Experiments page, click New Experiment, then select the\n dataset that you want to use.", + "prompt_type": "plain" + }, + { + "output": "Specify whether to include dropped columns, a validation dataset,\n and a testing dataset. 3. Specify the Target column and optionally a fold column, weight\n column, and time column. 4. Optionally specify expert-settings. 5. Optionally adjust the Accuracy/Time/Interpretability knobs. 6. Optionally override the default scorer. 7. Optionally override the Classification/Regression setting. 8. Optionally specify to make the experiments reproducible and/or\n whether to enable GPUs. 9. Click the Create Leaderboard button. []\nDriverless AI creates a new, randomly named project and begins\nautomatically training models using the queuing mechanism. The new\nproject is given the description \"Automatic Leader Board\". After all\nmodels have been built, you can\nscore each experiment and\ncompare experiments , as described in the\nprojects topic. []\nLeaderboard Models\nWhen creating a leaderboard, the models that are built will vary based\non whether you are running a regular experiment or a time-series\nexperiment.", + "prompt_type": "plain" + }, + { + "output": "You can omit models from\nbeing built by disabling those models in the expert-settings. ---------------------------------------------------------------------------\n Model Ac Time Interpre Config Overrides\n curacy tability \n ------------------ -------- -------- ---------- ---------------------------\n Few Features 1 1 10 max_orig _cols_selected=5\n Decision Tree nfeatures_max=10\n Simple LightGBM 1 1 10 \n Constant Baseline 1 1 10 max_orig _cols_selected=1\n Single Decision Spe Spe S pecified fixed_ ensemble_level=0\n Tree cified cified in ex \n in expe in expe periment \n riment riment \n Single GLM Spe Spe S pecified fixed_ ensemble_level=0\n cified cified in ex \n in expe in expe periment \n riment riment \n Complex LightGBM 7 Spe S pecified \n Ensemble cified in ex \n in expe periment \n riment \n Few Features Spe Spe S pecified max_orig _cols_selected=5\n Single LightGBM cified cified in ex nfeatures_max=10\n in expe in expe periment fixed_ ensemble_level=0\n riment riment \n Default Single Spe Spe S pecified fixed_ ensemble_level=0\n LightGBM cified cified in ex \n in expe in expe periment \n riment riment \n Default Spe Spe S pecified \n XGBoost/LightGBM cified cified in ex \n Ensemble in expe in expe periment \n riment riment \n Single FTRL Spe Spe S pecified fixed_ ensemble_level=0\n cified cified in ex \n in expe in expe periment \n riment riment \n Single TensorFlow Spe Spe S pecified fixed_ ensemble_level=0\n cified cified in ex \n in expe in expe periment \n riment riment \n ---------------------------------------------------------------------------\nTime Series Experiments\nDriverless AI will build one time-series experiment using the default\nDriverless AI settings.", + "prompt_type": "plain" + }, + { + "output": "Experiments\n\nexperiment-settings expert-settings scorers experiment-new\nexperiment-sharing experiment-completed experiment-insights\nexperiment-scores experiment-graphs experiment-summary\nexperiment-performance\n\ndiagnosing view-experiments leaderboard projects", + "prompt_type": "plain" + }, + { + "output": "Imputation in Driverless AI\n\nThe impute feature lets you fill in missing values with substituted\nvalues. Missing values can be imputed based on the column's mean,\nmedian, minimum, maximum, or mode value. You can also impute based on a\nspecific percentile or by a constant value.\n\nThe imputation is precomputed on all data or inside the pipeline (based\non what's in the train split).\n\nThe following guidelines should be followed when performing imputation:\n\n- For constant imputation on numeric columns, constant must be\n numeric.\n- For constant imputation on string columns, constant must be a\n string.\n- For percentile imputation, the percentage value must be between 0\n and 100.\n\nNotes:\n\n- This feature is experimental.\n- Time columns cannot be imputed.\n\nEnabling Imputation\n\nImputation is disabled by default. It can be enabled by setting", + "prompt_type": "plain" + }, + { + "output": "enable_imputation=truein the config.toml (for native installs) or via theDRIVERLESS_AI_ENABLE_IMPUTATION=true``\nenvironment variable (Docker image installs). This enables imputation\nfunctionality in transformers.\n\nRunning an Experiment with Imputation\n\nOnce imputation is enabled, you will have the option when running an\nexperiment to add imputation columns.\n\n1. Click on Columns Imputation in the Experiment Setup page.\n\n2. Click on Add Imputation in the upper-right corner.\n3. Select the column that contains missing values you want to impute.\n4. Select the imputation type. Available options are:\n\n5. Optionally allow Driverless AI to compute the imputation value\n during validation instead of using the inputted imputed value.\n6. Click Save when you are done.\n\n7. At this point, you can add additional imputations, delete the\n imputation you just created, or close this form and return to the\n experiment. Note that each column can have only a single imputation.", + "prompt_type": "plain" + }, + { + "output": "FAQ\nH2O Driverless AI is an artificial intelligence (AI) platform for\nautomatic machine learning. Driverless AI automates some of the most\ndifficult data science and machine learning workflows such as feature\nengineering, model validation, model tuning, model selection and model\ndeployment. It aims to achieve highest predictive accuracy, comparable\nto expert data scientists, but in much shorter time thanks to end-to-end\nautomation. Driverless AI also offers automatic visualizations and\nmachine learning interpretability (MLI). Especially in regulated\nindustries, model transparency and explanation are just as important as\npredictive performance. Modeling pipelines (feature engineering and\nmodels) are exported (in full fidelity, without approximations) both as\nPython modules and as Java standalone scoring artifacts. This section provides answers to frequently asked questions. If you have\nadditional questions about using Driverless AI, post them on Stack\nOverflow using the driverless-ai tag at\nhttp://stackoverflow.com/questions/tagged/driverless-ai.", + "prompt_type": "plain" + }, + { + "output": "If you have not signed up for the H2O.ai\nCommunity Slack workspace, you can do so here:\nhttps://www.h2o.ai/community/. General\n- How is Driverless AI different than any other black box ML\n algorithm? - How often do new versions come out? Installation/Upgrade/Authentication\n- How can I change my username and password? - Can Driverless AI run on CPU-only machines? - How can I upgrade to a newer version of Driverless AI? - What kind of authentication is supported in Driverless AI? - How can I automatically turn on persistence each time the GPU system\n reboots? - How can I start Driverless AI on a different port than 12345? - Can I set up TLS/SSL on Driverless AI? - Can I set up TLS/SSL on Driverless AI in AWS? - Why do I receive a \"package dai-.x86_64 does not verify: no\n digest\" error during the installation? <#no-digest>__\n- I received a \"Must have exactly one OpenCL platform 'NVIDIA CUDA'\"\n error. How can I fix that? - Is it possible for multiple users to share a single Driverless AI\n instance?", + "prompt_type": "plain" + }, + { + "output": "- How can I retrieve a list of Driverless AI users? - Start of Driverless AI fails on the message \"Segmentation fault\n (core dumped)\" on Ubuntu 18/RHEL 7.6. How can I fix this? - Which Linux systems does Driverless AI support? Data\n- Is there a file size limit for datasets? - How can I import CSV files that use UTF-8 encoding into Excel? - Can a byte order mark be used when writing CSV files with datatable? - Which version of Longhorn is supported by Driverless AI? - Is it possible to download a transformed test dataset in Driverless\n AI? Connectors\n- Why can't I import a folder as a file when using a data connector on\n Windows? - I get a ClassNotFoundException error when I try to select a JDBC\n connection. How can I fix that? - I get a org.datanucleus.exceptions.NucleusUserException: Please\n check your CLASSPATH and plugin specification error when attempting\n to connect to hive. How can I fix that? - I get a \"Permission Denied\" error during Hive import.", + "prompt_type": "plain" + }, + { + "output": "Recipes\n- Where can I retrieve H2O's custom recipes? - How can I create my own custom recipe? - Are MOJOs supported for experiments that use custom recipes? - How can I use BYOR in my airgapped installation? - When enabling recipes in Driverless AI, can I install Python\n packages from my organization's internal Python package index? Experiments\n- How much memory does Driverless AI require in order to run\n experiments? - How many columns can Driverless AI handle? - How should I use Driverless AI if I have large data? - How does Driverless AI detect the ID column? - Can Driverless AI handle data with missing values/nulls? - How does Driverless AI deal with categorical variables? What if an\n integer column should really be treated as categorical? - How are outliers handled? - If I drop several columns from the Train dataset, will Driverless AI\n understand that it needs to drop the same columns from the Test\n dataset? - Does Driverless AI treat numeric variables as categorical variables?", + "prompt_type": "plain" + }, + { + "output": "- Why do my selected algorithms not show up in the Experiment Preview? - How can we turn on TensorFlow Neural Networks so they are evaluated? - Does Driverless AI standardize the data? - What objective function is used in XGBoost? - Does Driverless AI perform internal or external validation? - How does Driverless AI prevent overfitting? - How does Driverless AI avoid the multiple hypothesis (MH) problem? - How does Driverless AI suggest the experiment settings? - What happens when I set Interpretability and Accuracy to the same\n number? - Can I specify the number of GPUs to use when running Driverless AI? - How can I create the simplest model in Driverless AI? - Why is my experiment suddenly slow? - When I run multiple experiments with different seeds, why do I see\n different scores, runtimes, and sizes on disk in the Experiments\n listing page? - Why does the final model performance appear to be worse than\n previous iterations? - How can I find features that may be causing data leakages in my\n Driverless AI model?", + "prompt_type": "plain" + }, + { + "output": "- How can I see all the performance metrics possible for my\n experiment? - What if my training/validation and testing data sets come from\n different distributions? - Does Driverless AI handle weighted data? - How does Driverless AI handle fold assignments for weighted data? - Why do I see that adding new features to a dataset deteriorates the\n performance of the model? - How does Driverless AI handle imbalanced data for binary\n classification experiments? - How is feature importance calculated in Driverless AI? - I want to have only one LightGBM model in the final pipeline. How\n can I achieve this? - I want to have only one LightGBM model and no FE. How can I do this? - What is fast approximation in Driverless AI? - When should fast approximation be turned off? - Why does the confusion matrix sometimes show decimals instead of\n whole numbers? - Is data sampling for multiclass use cases supported? Feature Transformations\n- Where can I get details of the various transformations performed in\n an experiment?", + "prompt_type": "plain" + }, + { + "output": "- Why are predicted probabilities not available when I run an\n experiment without ensembling? Deployment\n- What drives the size of a MOJO? - Are MOJOs thread safe? - Running the scoring pipeline for my MOJO is taking several hours. How can I get this to run faster? - Why have I encountered a \"Best Score is not finite\" error? Time Series\n- What if my data has a time dependency? - What is a lag, and why does it help? - Why can't I specify a validation data set for time-series problems? Why do you look at the test set for time-series problems\n- Why does the gap between train and test matter? Is it because of\n creating the lag features on the test set? - In regards to applying the target lags to different subsets of the\n time group columns, are you saying Driverless AI perform\n auto-correlation at \"levels\" of the time series? For example,\n consider the Walmart dataset where I have Store and Dept (and my\n target is Weekly Sales). Are you saying that Driverless AI checks\n for auto-correlation in Weekly Sales based on just Store, just Dept,\n and both Store and Dept?", + "prompt_type": "plain" + }, + { + "output": "- What is the logic behind the selectable numbers for forecast horizon\n length? - Assume that in my Walmart dataset, all stores provided data at the\n week level, but one store provided data at the day level. What would\n Driverless AI do? - Assume that in my Walmart dataset, all stores and departments\n provided data at the weekly level, but one department in a specific\n store provided weekly sales on a bi-weekly basis (every two weeks). What would Driverless AI do? - Why does the number of weeks that you want to start predicting\n matter? - Are the scoring components of time series sensitive to the order in\n which new pieces of data arrive? I.e., is each row independent at\n scoring time, or is there a real-time windowing effect in the\n scoring pieces? - What happens if the user, at predict time, gives a row with a time\n value that is too small or too large? - What's the minimum data size for a time series recipe? - How long must the training data be compared to the test data?", + "prompt_type": "plain" + }, + { + "output": "- Can the time information be distributed across multiple columns in\n the input data (such as [year, day, month]? - What type of modeling approach does Driverless AI use for time\n series? - What's the idea behind exponential weighting of moving averages? Logging\n- How can I reduce the size of the Audit Logger? General\nHow is Driverless AI different than any other black box ML algorithm? How often do new versions come out? Installation/Upgrade/Authentication\nHow can I change my username and password? Can Driverless AI run on CPU-only machines? How can I upgrade to a newer version of Driverless AI? What kind of authentication is supported in Driverless AI? How can I automatically turn on persistence each time the GPU system\nreboots? How can I start Driverless AI on a different port than 12345? Can I set up TLS/SSL on Driverless AI? Can I set up TLS/SSL on Driverless AI in AWS? I received a \"package dai-.x86_64 does not verify: no digest\"\nerror during the installation.", + "prompt_type": "plain" + }, + { + "output": "I received a \"Must have exactly one OpenCL platform 'NVIDIA CUDA'\"\nerror. How can I fix that? Is it possible for multiple users to share a single Driverless AI\ninstance? Can multiple Driverless AI users share a GPU server? How can I retrieve a list of Driverless AI users? Start of Driverless AI fails on the message ``Segmentation fault (core\ndumped)`` on Ubuntu 18/RHEL 7.6. How can I fix this? Which Linux systems does Driverless AI support? Data\nIs there a file size limit for datasets? How can I import CSV files that use UTF-8 encoding into Excel? Can a byte order mark be used when writing CSV files with datatable? Which version of Longhorn is supported by Driverless AI? Is it possible to download a transformed test dataset in Driverless AI? Connectors\nWhy can't I import a folder as a file when using a data connector on\nWindows? I get a ClassNotFoundException error when I try to select a JDBC\nconnection. How can I fix that? I get a org.datanucleus.exceptions.NucleusUserException: Please check\nyour CLASSPATH and plugin specification error when attempting to connect\nto Hive.", + "prompt_type": "plain" + }, + { + "output": "I get a \"Permission Denied\" error during Hive import. How do I fix this? Recipes\nWhere can I retrieve H2O's custom recipes? How can I create my own custom recipe? Are MOJOs supported for experiments that use custom recipes? How can I use BYOR in my airgapped installation? When enabling recipes in Driverless AI, can I install Python packages\nfrom my organization's internal Python package index? Yes\u2014you can use the pip_install_options\n TOML option to specify your organization's\n internal Python package index as follows:\n pip_install_options=\"['--extra-index-url', 'http://my-own-repo:port']\"\n For more information on the --extra-index-url pip install\n option, refer to the official pip documentation. Experiments\nHow much memory does Driverless AI require in order to run experiments? How many columns can Driverless AI handle? How should I use Driverless AI if I have large data? How does Driverless AI detect the ID column? Can Driverless AI handle data with missing values/nulls?", + "prompt_type": "plain" + }, + { + "output": "What if an\ninteger column should really be treated as categorical? How are outliers handled? If I drop several columns from the Train dataset, will Driverless AI\nunderstand that it needs to drop the same columns from the Test dataset? Does Driverless AI treat numeric variables as categorical variables? Which algorithms are used in Driverless AI? Why do my selected algorithms not show up in the Experiment Preview? When changing the algorithms used via Expert Settings > Model and Expert\nSettings > Recipes, you may notice in the Experiment Preview that those\nchanges are not applied. Driverless AI determines whether to include\nmodels and/or recipes based on a hierarchy of those expert settings as\nwell as data types (numeric, categorical, text, image, etc.) and system\nproperties (GPUs, multiple GPUs, etc.). []\n- Setting an Algorithm to \"OFF\" in Expert Settings: If an algorithm is\n turned OFF in Expert Settings (for example, GLM Models) when\n running, then that algorithm will not be included in the experiment.", + "prompt_type": "plain" + }, + { + "output": "- Algorithms Not Specified as \"OFF\" and Included from Recipes: If a\n Driverless AI algorithm is specified as either \"AUTO\" or \"ON\" and\n additional models are selected for the experiment in the Include\n specific models option, than those algorithms may or may not be\n included in the experiment. Driverless AI will determine the\n algorithms to use based on the data and experiment type. - To show warnings in the preview for which models were not used, set\n show_inapplicable_models_preview = true in config.toml\nWhy do my selected transformers not show up in the Experiment Preview? When changing the transformers used via Expert Settings > Transformers\nand Expert Settings > Recipes, you may notice in the Experiment Preview\nthat those changes are not applied. Driverless AI determines whether to\ninclude transformers can be used based upon data types (numeric,\ncategorical, text, image, etc.) and system properties (GPUs, multiple\nGPUs, etc.). - Transformers Not Included from Recipes (BYOR): If a transformer from\n a custom recipe is not selected for the experiment in the Include\n specific transformers option, then that transformer will not be\n included in the experiment.", + "prompt_type": "plain" + }, + { + "output": "Does Driverless AI standardize the data? What objective function is used in XGBoost? Does Driverless AI perform internal or external validation? How does Driverless AI prevent overfitting? How does Driverless AI avoid the multiple hypothesis (MH) problem? How does Driverless AI suggest the experiment settings? What happens when I set Interpretability and Accuracy to the same\nnumber? Can I specify the number of GPUs to use when running Driverless AI? How can I create the simplest model in Driverless AI? For information on why your experiment isn't performing as expected, see\nexperiment_performance. When I run multiple experiments with different seeds, why do I see\ndifferent scores, runtimes, and sizes on disk in the Experiments listing\npage? Why does the final model performance appear to be worse than previous\niterations? How can I find features that may be causing data leakages in my\nDriverless AI model? How can I see the performance metrics on the test data? How can I see all the performance metrics possible for my experiment?", + "prompt_type": "plain" + }, + { + "output": "Does Driverless AI handle weighted data? How does Driverless AI handle fold assignments for weighted data? Why do I see that adding new features to a dataset deteriorates the\nperformance of the model? How does Driverless AI handle imbalanced data for binary classification\nexperiments? How is feature importance calculated in Driverless AI? I want to have only one LightGBM model in the final pipeline. How can I\ndo this? I want to have only one LightGBM model and no FE. How can I do this? What is fast approximation in Driverless AI? When should fast approximation be turned off? Why does the confusion matrix sometimes show decimals instead of whole\nnumbers? Is data sampling for multiclass use cases supported? Feature Transformations\nWhere can I get details of the various transformations performed in an\nexperiment? Predictions\nHow can I download the predictions onto the machine where Driverless AI\nis running? Why are predicted probabilities not available when I run an experiment\nwithout ensembling?", + "prompt_type": "plain" + }, + { + "output": "Are MOJOs thread safe? Running the scoring pipeline for my MOJO is taking several hours. How\ncan I get this to run faster? Why have I encountered a \"Best Score is not finite\" error? Time Series\nWhat if my data has a time dependency? What is a lag, and why does it help? Why can't I specify a validation data set for time-series problems? Why\ndo you look at the test set for time-series problems\nWhy does the gap between train and test matter? Is it because of\ncreating the lag features on the test set? In regards to applying the target lags to different subsets of the time\ngroup columns, are you saying Driverless AI perform auto-correlation at\n\"levels\" of the time series? For example, consider the Walmart dataset\nwhere I have Store and Dept (and my target is Weekly Sales). Are you\nsaying that Driverless AI checks for auto-correlation in Weekly Sales\nbased on just Store, just Dept, and both Store and Dept? How does Driverless AI detect the time period? What is the logic behind the selectable numbers for forecast horizon\nlength?", + "prompt_type": "plain" + }, + { + "output": "What would\nDriverless AI do? Assume that in my Walmart dataset, all stores and departments provided\ndata at the weekly level, but one department in a specific store\nprovided weekly sales on a bi-weekly basis (every two weeks). What would\nDriverless AI do? Why does the number of weeks that you want to start predicting matter? Are the scoring components of time series sensitive to the order in\nwhich new pieces of data arrive? I.e., is each row independent at\nscoring time, or is there a real-time windowing effect in the scoring\npieces? What happens if the user, at predict time, gives a row with a time value\nthat is too small or too large? What's the minimum data size for a time series recipe? How long must the training data be compared to the test data? How does the time series recipe deal with missing values? Can the time information be distributed across multiple columns in the\ninput data (such as [year, day, month]? What type of modeling approach does Driverless AI use for time series?", + "prompt_type": "plain" + }, + { + "output": "Dask Multinode Training (Alpha)\nDriverless AI can be configured to run in a multinode worker mode where\neach worker has a Dask CPU worker and (if the worker has GPUs) a Dask\nGPU worker. The main node in this setup has a Dask scheduler. This\ndocument describes the Dask training process and how to configure it. Before setting up Dask multinode training, you must configure\nRedis Multinode training in Driverless AI . Note: For Dask multinode examples, see\nDask Multinode examples . Understanding Dask Multinode Training\nDask multinode training in Driverless AI can be used to run a single\nexperiment that trains across the multinode cluster. It is effective in\nsituations where you need to run and complete a single experiment with\nlarge amounts of data or a large hyper-parameter space search. The Dask\ndistributed machines can be CPU only or CPU + GPU, with Dask experiments\nusing resources accordingly. For more information on Dask multinode design concepts, see\nhttps://dask.org/.", + "prompt_type": "plain" + }, + { + "output": "If you are interested in using Dask multinode configurations,\n contact support@h2o.ai. - Dask multinode training requires the transfer of data between\n several different workers. For example, if an experiment uses the\n Dask cluster, it must distribute data among cluster workers to be\n trained by XGBoost or Optuna hyper-parameter search. - Dask tasks are scheduled on a first in, first out (FIFO) basis. - Users can enable Dask multinode training on a per-experiment basis\n from the expert settings. - If an experiment chooses to use the Dask cluster (default is true if\n applicable), then a single experiment runs on the entire multinode\n cluster. For this reason, using a large number of commodity-grade\n hardware is not useful in the context of Dask multinode. - By default, Dask models are not selected because they can be less\n efficient for small data than non-Dask models. Set\n show_warnings_preview = true in the config.toml to display warnings\n whenever a user does not select Dask models and the system is\n capable of using them.", + "prompt_type": "plain" + }, + { + "output": "lightgbm_listen_port. Edit the Driverless AI config.toml ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ After Driverless AI is installed, edit the following config option in the config.toml file. .. code:: bash # Dask settings -- set the IP address of the Dask server. Same as the IP of the main Driverless AI node, and usually same as the Redis/MinIO IP dask_server_ip = \"\" For thedask_server_ipparameter, Driverless AI automatically tries the Redis, MinIO, and local IP addresses to see if it can find the Dask scheduler. In such a case, thedask_server_ip``\nparameter does not have to be set.\n\nOn EC2 systems, if the main server is", + "prompt_type": "plain" + }, + { + "output": "http://ec2-52-71-252-183.compute-1.amazonaws.com:12345/``, it is\nrecommended to use the nslookup-resolved IP instead of the EC2 IP due to\nthe way Dask and XGBoost (with rabit) operate. For example,", + "prompt_type": "plain" + }, + { + "output": "nslookup ec2-52-71-252-183.compute-1.amazonaws.com`` gives", + "prompt_type": "plain" + }, + { + "output": "10.10.4.103. Redis, MinIO, and Dask subsequently use that as the IP in the config.toml file. Ifdask_server_ipis not specified, its value is automatically inferred from Redis or MinIO. Once the worker node starts, use the Driverless AI server IP and Dask dashboard port(s) to view the status of the Dask cluster. .. figure:: images/dask_dashboard.png :alt: Description of Configuration Attributes --------------------------------------- General Dask Settings ~~~~~~~~~~~~~~~~~~~~~ -enable_dask_cluster: Specifies whether to enable a Dask worker on each multinode worker. -dask_server_ip: IP address used by server for Dask and Dask CUDA communications. CPU Cluster Dask Settings ~~~~~~~~~~~~~~~~~~~~~~~~~ -dask_server_port: Port used by server for Dask communications. -dask_dashboard_port: Dask dashboard port for Dask diagnostics. -dask_cluster_kwargs: Set Dask CUDA/RAPIDS cluster settings for single node workers. -dask_scheduler_env: Set Dask scheduler env. -dask_scheduler_options: Set Dask scheduler command-line options.", + "prompt_type": "plain" + }, + { + "output": "-dask_worker_options: Set Dask worker command-line options. -dask_protocol: Protocol used for Dask communications. -dask_worker_nprocs: Number of processes per Dask worker. -dask_worker_nthreads: Number of threads per process for Dask. GPU CUDA Cluster Dask Settings ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -dask_cuda_server_port: Port using by server for Dask cuda communications. -dask_cuda_dashboard_port: Dask dashboard port for dask_cuda diagnostics. -dask_cuda_cluster_kwargs: Set Dask CUDA/RAPIDS cluster settings for single node workers. -dask_cuda_scheduler_env: Set Dask CUDA scheduler env. -dask_cuda_scheduler_options: Set Dask CUDA scheduler command-line options. -dask_cuda_worker_options: Set Dask CUDA worker options. -dask_cuda_worker_env: Set Dask CUDA worker environment variables. -dask_cuda_protocol: Protocol using for dask cuda communications. -dask_cuda_worker_nthreads: Number of threads per process for dask_cuda. Other Cluster Dask Settings ~~~~~~~~~~~~~~~~~~~~~~~~~~~ -lightgbm_listen_port: LightGBM local listening port when using Dask with LightGBM.", + "prompt_type": "plain" + }, + { + "output": "**Notes**: - The same steps can be used for a local Dask cluster on a single node with multiple GPUs. - If have Dask cluster but only want to use the worker node's GPUs, set :ref:`use_dask_cluster ` to False. - If have Dask cluster or single dask node available as single user, one can set :ref:`exclusive_mode ` to \"max\" in expert settings to maximize usage of workers in cluster. User Experiment Dask Settings ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -use_dask_cluster: Whether to use Dask cluster (True) or only local cluster for multi-GPU case (False). -enable_xgboost_rapids: :ref:`Enable RAPIDS-cudf extensions to XGBoost GBM/Dart. ` (1) -enable_xgboost_gbm_dask: :ref:`Enable dask_cudf (multi-GPU) XGBoost GBM. ` (2) -enable_lightgbm_dask: :ref:`Enable Dask (multi-node) LightGBM. ` (*Experimental*) (2) -enable_xgboost_dart_dask: :ref:`Enable dask_cudf (multi-GPU) XGBoost Dart.", + "prompt_type": "plain" + }, + { + "output": "H2O AI Feature Store Setup\nYou can use the H2O AI Feature Store to store, update, and share the\nfeatures data scientists, developers, and engineers need to build AI\nmodels. This page describes how to configure Driverless AI to work with\nthe H2O AI Feature Store. Note: For more information on the H2O AI Feature Store, refer to the\nofficial documentation. Description of relevant configuration attributes\nThe following are descriptions of the relevant configuration attributes\nwhen enabling the H2O AI Feature Store data connector:\n- enabled_file_systems: A list of file systems you want to enable. To\n enable the Feature Store data connector, feature_store must be added\n to this list of data sources. - feature_store_endpoint_url: A URL that points to the Feature Store\n server. - feature_store_enable_tls: To enable TLS communication between DAI\n and the Feature Store server, set this to true. - feature_store_access_token_scopes: A space-separated list of access\n token scopes used by the Feature Store connector for authentication.", + "prompt_type": "plain" + }, + { + "output": "Driverless AI k-LIME MOJO Reason Code Pipeline - Java Runtime\nFor completed MLI experiments, users can download the k-LIME MOJO. The\nk-LIME MOJO Reason Code Pipeline is a reason code engine that can be\ndeployed in any Java environment to generate reason codes in real time. To obtain Java runtime MOJO for K-LIME reason codes, download K-Lime\nMOJO reason code Pipeline and for Python scoring pipeline for K-LIME\nreason codes and Shapley, download the Scoring pipeline. Note\nThe k-LIME MOJO Reason Code pipeline does not support multinomial,\nnatural language processing (NLP), and time series models. []\nPrerequisites\nThe following are required in order to run the k-LIME MOJO reason code\npipeline. - Java 7 runtime (JDK 1.7) or newer. Note: Using Java 11+ is\n recommended due to a bug in Java. For more information, see\n https://bugs.openjdk.java.net/browse/JDK-8186464. - Valid Driverless AI license. You can download the license.sig file\n from the machine hosting Driverless AI (usually in the license\n folder).", + "prompt_type": "plain" + }, + { + "output": "- mojo2-runtime.jar file. This is available from the top navigation\n menu in the Driverless AI UI and in the downloaded mojo-pipeline.zip\n file for an experiment. License Specification\nDriverless AI requires a license to be specified in order to run any\nDAI/MLI MOJO. The license can be specified with one of the following:\n- An environment variable:\n - DRIVERLESS_AI_LICENSE_FILE: Path to the Driverless AI license\n file, or\n - DRIVERLESS_AI_LICENSE_KEY: The Driverless AI license key\n (Base64 encoded string)\n- A system property of JVM (-D option):\n - ai.h2o.mojos.runtime.license.file: Path to the Driverless AI\n license file, or\n - ai.h2o.mojos.runtime.license.key: The Driverless AI license\n key (Base64 encoded string)\n- An application classpath:\n - The license is loaded from a resource called /license.sig. - The default resource name can be changed with the JVM system\n property ai.h2o.mojos.runtime.license.filename.", + "prompt_type": "plain" + }, + { + "output": "On the completed MLI page, click on the Download k-LIME MOJO Reason\n Code Pipeline button. 2. To run the Java application for reason code generation directly, use\n the following command:\n java -Dai.h2o.mojos.runtime.license.file=license.sig -cp mojo2-runtime.jar ai.h2o.mojos.ExecuteMojo klime_mojo.zip example.csv\nk-LIME MOJO Command Line Options\nExecuting the Java Runtime\nThe following are two general examples of how the Java runtime can be\nexecuted from the command-line. - With additional libraries:\n- Without additional libraries:\nSo, for example, the sys.ai.h2o.mojos.parser.csv.separator option can be\npassed with the following:\n java -Dsys.ai.h2o.mojos.parser.csv.separator='|' -Dai.h2o.mojos.runtime.license.file=../license.sig -jar mojo2-runtime.jar pipeline.mojo input.csv output.csv\nSimilarly, the sys.ai.h2o.mojos.exposedInputs option can be passed with:\n java -Xmx5g -Dsys.ai.h2o.mojos.exposedInputs=ALL -Dai.h2o.mojos.runtime.license.file= -cp mojo2-runtime.jar ai.h2o.mojos.ExecuteMojo pipeline.mojo example.csv\nNote: Data can be streamed from stdin to stdout by replacing both the\ninput and output CSV arguments with `-`.", + "prompt_type": "plain" + }, + { + "output": "This value\n defaults to True. - sys.ai.h2o.mojos.parser.csv.stripCrFromLastColumn (boolean)\n -Workaround for issues relating to the OpenCSV parser. This value\n defaults to True. - sys.ai.h2o.mojos.parser.csv.quotedHeaders (boolean) - Specify\n whether to quote header names in the output CSV file. This value\n defaults to False. - sys.ai.h2o.mojos.parser.csv.separator (char) - Specify the separator\n used between CSV fields. The special value `TAB` can be used for\n tab-separated values. This value defaults to `,`. - sys.ai.h2o.mojos.parser.csv.escapeChar (char) - Specify the escape\n character for parsing CSV fields. If this value is not specified,\n then no escaping is attempted. This value defaults to an empty\n string. - sys.ai.h2o.mojos.parser.csv.batch (int) - Specify the number of\n input records brought into memory for batch processing (determines\n consumed memory). This value defaults to 1000. - sys.ai.h2o.mojos.pipelineFormats (string) - When multiple formats\n are recognized, this option specifies the order in which they are\n tried.", + "prompt_type": "plain" + }, + { + "output": "- sys.ai.h2o.mojos.parser.csv.date.formats (string) - Specify a format\n for dates. This value defaults to an empty string. - sys.ai.h2o.mojos.exposedInputs (string) - Specify a comma separated\n list of input cols that are needed on output. The special value\n `ALL` takes all inputs. This defaults to a null value. - sys.ai.h2o.mojos.useWeakHash (boolean) - Specify whether to use\n WeakHashMap. This is set to False by default. Enabling this setting\n may improve MOJO loading times. JVM Options for Access Control\n- ai.h2o.mojos.runtime.license.key - Specify a license key. - ai.h2o.mojos.runtime.license.file - Specify the location of a\n license key. - ai.h2o.mojos.runtime.license.filename - Override the default license\n file name. - ai.h2o.mojos.runtime.signature.filename - Override the default\n signature file name. - ai.h2o.mojos.runtime.watermark.filename - Override the default\n watermark file name. JVM Options for Access Control\n- ai.h2o.mojos.runtime.license.key - Specify a license key.", + "prompt_type": "plain" + }, + { + "output": "Machine Learning Interpretability\n\ninterpreting interpret-the-mli-page.rst interpret-non-ts interpret-ts\ninterpret-recipes", + "prompt_type": "plain" + }, + { + "output": "OpenID Connect Authentication Examples\nThis section describes how to enable OpenID Connect authentication in\nDriverless AI. It provides two examples. The first describes how to\nenable OpenID connect and log in to the Driverless AI UI. The second\ndescribes additional token-based authentication settings, which allows\nyou to run the Driverless AI Python client. (Note that token-based\nauthentication is not yet supported on the Driverless AI R client.) This\nsection assumes that you have an understanding of OpenID Connect. The OpenID Connect Protocol\nOpenID Connect follows a distinct protocol during the authentication\nprocess:\n1. A request is sent from the client (RP) to the OpenID provider (OP). 2. The OP authenticates the end user and obtains authorization. 3. The OP responds with an ID Token. (An Access Token is usually\n provided as well.) 4. The Relying Party (RP) can send a request with the Access Token to\n the UserInfo Endpoint. 5. The UserInfo Endpoint returns Claims about the End User.", + "prompt_type": "plain" + }, + { + "output": "This information is subsequently used to\nconfigure further interactions with the provider. The well-known endpoint is typically configured as follows:\n https://yourOpenIDProviderHostname/.well-known/openid-configuration\nConfiguration Options\nOpenID Configuration Options\nThe following options in the config.toml file are used for enabling\nOpenID-based authentication. Setting these options lets you log in to\nthe Driverless AI UI using OpenID. # The OpenID server URL. (Ex: https://oidp.ourdomain.com) Do not end with a \"/\"\n auth_openid_provider_base_uri= \"https://yourOpenIDProviderHostname\"\n # The uri to pull OpenID config data from. (You can extract most of required OpenID config from this URL.) # Usually located at: /auth/realms/master/.well-known/openid-configuration\n # Quote method from urllib.parse used to encode payload dict in Authentication Request\n auth_openid_urlencode_quote_via=\"quote\"\n # These endpoints are made available by the well-known endpoint of the OpenID provider\n # All endpoints should start with a \"/\"\n auth_openid_auth_uri=\"\"\n auth_openid_token_uri=\"\"\n auth_openid_userinfo_uri=\"\"\n auth_openid_logout_uri=\"\"\n # In most cases, these values are usually 'code' and 'authorization_code' (as shown below)\n # Supported values for response_type and grant_type are listed in the response of well-known endpoint\n auth_openid_response_type=\"code\"\n auth_openid_grant_type=\"authorization_code\"\n # Scope values\u2014supported values are available in the response from the well-known endpoint\n # 'openid' is required\n # Additional scopes may be necessary if the response to the userinfo request\n # does not include enough information to use for authentication\n # Separate additional scopes with a blank space.", + "prompt_type": "plain" + }, + { + "output": "Token-based authentication allows\nclients to authenticate with the Driverless AI server by providing a\ntoken with each request. This is targeted for (but not limited to) the\nenvironments with OpenID Connect authentication. If these options are\nnot set, then clients are not able to authenticate with the server when\nOpenID Connect is configured as the authentication method. # Sets token introspection URL for OpenID Connect authentication. (needs to be an absolute URL)\n auth_openid_token_introspection_url = \"\"\n # Enables option to use Bearer token for authentication with the RPC endpoint. api_token_introspection_enabled = false\n # Sets the method that is used to introspect the bearer token. # OAUTH2_TOKEN_INTROSPECTION: Uses OAuth 2.0 Token Introspection (RPC 7662)\n # endpoint to introspect the bearer token. # This useful when 'openid' is used as the authentication method. # Uses 'auth_openid_client_id' and 'auth_openid_client_secret' and to\n # authenticate with the authorization server and\n # `auth_openid_token_introspection_url` to perform the introspection.", + "prompt_type": "plain" + }, + { + "output": "Space separated./\n # This is passed to the introspection endpoint and also verified after response\n # for the servers that don't enforce scopes. # Keeping this empty turns any the verification off. # \n api_token_oauth2_scopes = \"\"\n # Which field of the response returned by the token introspection endpoint should be used as a username. api_token_oauth2_username_field_name = \"username\"\n # Enables the option to initiate a PKCE flow from the UI in order to obtain tokens usable with Driverless clients\n oauth2_client_tokens_enabled = false\n # Sets up client id that will be used in the OAuth 2.0 Authorization Code Flow to obtain the tokens. Client needs to be public and be able to use PKCE with S256 code challenge. oauth2_client_tokens_client_id = \"\"\n # Sets up the absolute url to the authorize endpoint. oauth2_client_tokens_authorize_url = \"\"\n # Sets up the absolute url to the token endpoint. oauth2_client_tokens_token_url = \"\"\n # Sets up the absolute url to the token introspection endpoint.It's displayed in the UI so that clients can inspect the token expiration.", + "prompt_type": "plain" + }, + { + "output": "this /oauth2/client_token\n oauth2_client_tokens_redirect_url = \"\"\n # Sets up the scope for the requested tokens. Space seprated list. oauth2_client_tokens_scope = \"openid profile ai.h2o.storage\"\nExample 1: Enabling OpenID Connect\nThis example describes how to start Driverless AI in the Docker image\nand with native installs after OpenID has been configured. Note that\nthis example does not enable tokens, so the Driverless AI Python client\nwill be incompatible with this installation. Docker Image Installs\n1. Edit the OpenID configuration options in your config.toml file as\n described in the openid-config-options section. 2. Mount the edited config.toml file into the Docker container. The next step is to launch and log in to Driverless AI. Refer to\nlogging-in. Native Installs\n1. Export the Driverless AI config.toml file or add it to ~/.bashrc. For example:\n2. Edit the OpenID configuration properties in the config.toml file as\n described in the openid-config-options section.", + "prompt_type": "plain" + }, + { + "output": "Start (or restart) Driverless AI. The next step is to launch and log in to Driverless AI. Refer to\nlogging-in. Example 2: Enabling Token-based Authentication with OpenID Connect\nSimilar to Example 1, this example describes how to start Driverless AI\nin the Docker image and with native installs after OpenID has been\nconfigured. It also enables tokens for compatibility with the Driverless\nAI Python client. Docker Image Installs\n1. Edit the OpenID configuration options in your config.toml file as\n described in the openid-config-options section. Be sure to also\n enable the token-based authentication options described in the\n token_based_options options section. 2. Mount the edited config.toml file into the Docker container. The next step is to launch and log in to Driverless AI. Refer to\nlogging-in. Native Installs\n1. Export the Driverless AI config.toml file or add it to ~/.bashrc. For example:\n2. Edit the OpenID configuration properties in the config.toml file as\n described in the openid-config-options section.", + "prompt_type": "plain" + }, + { + "output": "3. Start (or restart) Driverless AI. The next step is to launch and log in to Driverless AI. Refer to\nlogging-in. Python Client\nThe following is an example of how to enable token-based authentication\nwith OpenID Connect for the Driverless AI Python Client:\n # setup a token provider with a refresh token from the Driverless AI web UI\n token_provider = driverlessai.token_providers.OAuth2TokenProvider(\n refresh_token=\"eyJhbGciOiJIUzI1N...\",\n client_id=\"python_client\",\n token_endpoint_url=\"https://keycloak-server/auth/realms/driverlessai/protocol/openid-connect/token\",\n token_introspection_url=\"https://keycloak-server/auth/realms/driverlessai/protocol/openid-connect/token/introspect\"\n )\n # use the token provider to get authorization to connect to the\n # Driverless AI server\n dai = driverlessai.Client(\n address=\"https://localhost:12345\",\n token_provider=token_provider.ensure_fresh_token\n )\nParameters:\n- refresh_token (str) \u2013 token from Driverless AI server web UI, used\n to obtain fresh access token when needed\n- client_id (str) \u2013 public ID for the Python client\n- token_endpoint_url (str) \u2013 Authorization server URL to get an access\n or refresh token\n- token_introspection_url (str) \u2013 Authorization server URL to get\n information about a token\n- access_token (Optional [str]) \u2013 token authorizing Python client\n access\n- client_secret (Optional [str]) \u2013 private secret for the Python\n client\nFor more information, see\nhttp://docs.h2o.ai/driverless-ai/pyclient/docs/html/utils.html#oauth-2-0-token-provider.", + "prompt_type": "plain" + }, + { + "output": "Image Processing in Driverless AI\nImage processing in Driverless AI is a powerful tool that can be used to\ngain insight from digital images. The following sections describe\nDriverless AI's image processing capabilities. - image-processing-supported-file-types\n- Uploading Image dataset to Driverless AI\n- Image Transformer : Use image transformers when a\n dataset contains both images and other feature types. - Image Model : Use an Image model when the only feature\n in the dataset is an image. - Deploying an Image Model to Production\nNote\n- Image models from Driverless AI version 1.9.x aren't supported in\n1.10.x. - Image and NLP use cases in Driverless AI benefit significantly\nfrom GPU usage. For more information, see GPU usage in DAI . Supported File Types for Image processing\nThe following is a list of supported file types for image processing in\nDriverless AI:\n- Windows bitmaps - .bmp\n- JPEG files - .jpeg, .jpg, .jpe\n- JPEG 2000 files - .jp2\n- Portable Network Graphics - .png\n- WebP - .webp\n- Portable image format - .pbm, .pgm, .ppm, .pnm\n- TIFF files - .tiff, .tif\n- OpenEXR Image files - .exr\n- Radiance HDR - .hdr\nDue to browser restrictions, images may not render for some formats\n(like .ppm, .tiff, .pnm and .exr) when viewing dataset rows from the\nGUI.", + "prompt_type": "plain" + }, + { + "output": "Ideally Driverless AI can support all OpenCV Image formats. Uploading Data for Image Processing\nDriverless AI supports multiple methods for uploading image datasets:\n- Archive with images in directories for each class. Labels for each\n class are automatically created based on directory hierarchy\n- Archive with images and a CSV file that contains at least one column\n with image names and a target column (best method for regression). Note that each image name must include the correct file extension. - CSV file with local paths to the images on the disk\n- CSV file with remote URLs to the images\nModeling Images\nDriverless AI features two different approaches to modeling images. Embeddings Transformer (Image Vectorizer)\nThe Image Vectorizer transformer utilizes TensorFlow\npre-trained ImageNet models to\nconvert a column with an image path or URI to an embeddings (vector)\nrepresentation that is derived from the last global average pooling\nlayer of the model.", + "prompt_type": "plain" + }, + { + "output": "There are several options in the Expert Settings panel that let you\nconfigure the Image Vectorizer transformer. This panel is available from\nwithin the experiment page above the Scorer knob. Refer to\nimage-settings for more information on these options. Notes:\n- This modeling approach supports classification and regression\n experiments. - This modeling approach supports the use of mixed data types (any\n number of image columns, text columns, numeric or categorical\n columns)\n- The Image Vectorizer transformer can also be enabled with the\n Pipeline Building Recipe expert setting,\n which is located in the Experiment tab. Automatic Image Model\nAutomatic Image Model is an AutoML model that accepts only an image and\na label as input features. This model automatically selects\nhyperparameters such as learning rate, optimizer, batch size, and image\ninput size. It also automates the training process by selecting the\nnumber of epochs, cropping strategy, augmentations, and learning rate\nscheduler.", + "prompt_type": "plain" + }, + { + "output": "The possible architectures list includes all\nthe well-known models: (SE)-ResNe(X)ts; DenseNets; EfficientNets; etc. Unique insights that provide information and sample images for the\ncurrent best individual model are available for Automatic Image Model. To view these insights, click on the Insights option while an experiment\nis running or after an experiment is complete. Refer to image-insights\nfor more information. Each individual model score (together with the neural network\narchitecture name) is available in the Iteration Data panel. The last\npoint in the Iteration Data is always called ENSEMBLE. This indicates\nthat the final model ensembles multiple individual models. Enabling Automatic Image Model\nTo enable Automatic Image Model, navigate to the\npipeline-building-recipe expert setting and select the image_model\noption:\nAfter confirming your selection, click Save. The experiment preview\nsection updates to include information about Automatic Image Model:\n[]\nNotes:\n- This modeling approach only supports a single image column as an\n input.", + "prompt_type": "plain" + }, + { + "output": "- This modeling approach supports classification and regression\n experiments. - This modeling approach does not support the use of mixed data types\n because of its limitation on input features. - This modeling approach does not use Genetic Algorithm (GA). - The use of one or more GPUs is strongly recommended for this\n modeling approach. - If an internet connection is available, ImageNet pretrained weights\n are downloaded automatically. If an internet connection is not\n available, weights must be downloaded from\n http://s3.amazonaws.com/artifacts.h2o.ai/releases/ai/h2o/pretrained/dai_image_models_1_10.zip\n and extracted into tensorflow_image_pretrained_models_dir\n (./pretrained/image/ by default). - If extensively running image models with Driverless AI\n Docker install , we recommend setting\n --shm-size=2g. Deploying an Image Model\nPython scoring and\nC++ MOJO scoring are both supported for the\nImage Vectorizer Transformer .", + "prompt_type": "plain" + }, + { + "output": "Data Recipe URL Setup\nDriverless AI lets you explore data recipe URL data sources from within\nthe Driverless AI application. This section provides instructions for\nconfiguring Driverless AI to work with data recipe URLs. When enabled\n(default), you will be able to modify datasets that have been added to\nDriverless AI. (Refer to modify_by_recipe for more information.) Notes:\n- This connector is enabled by default. These steps are provided in\n case this connector was previously disabled and you want to\n re-enable it. - Depending on your Docker install version, use either the\n docker run --runtime=nvidia (>= Docker 19.03) or nvidia-docker (<\n Docker 19.03) command when starting the Driverless AI Docker image. Use docker version to check which version of Docker you are using. Enable Data Recipe URL\nDocker Image Installs\nThis example enables the data recipe URL data connector. nvidia-docker run \\\n --shm-size=256m \\\n --add-host name.node:172.16.2.186 \\\n -e DRIVERLESS_AI_ENABLED_FILE_SYSTEMS=\"file, recipe_url\" \\\n -p 12345:12345 \\\n -it --rm \\\n -v /tmp/dtmp/:/tmp \\\n -v /tmp/dlog/:/log \\\n -v /tmp/dlicense/:/license \\\n -v /tmp/ddata/:/data \\\n -u $(id -u):$(id -g) \\\n h2oai/dai-ubi8-x86_64:|tag|\nDocker Image with the config.toml\nThis example shows how to enable the Data Recipe URL data connector in\nthe config.toml file, and then specify that file when starting\nDriverless AI in Docker.", + "prompt_type": "plain" + }, + { + "output": "1. Configure the Driverless AI config.toml file. Set the following\n configuration options. - enabled_file_systems = \"file, upload, recipe_url\"\n2. Mount the config.toml file into the Docker container. nvidia-docker run \\\n --pid=host \\\n --rm \\\n --shm-size=256m \\\n --add-host name.node:172.16.2.186 \\\n -e DRIVERLESS_AI_CONFIG_FILE=/path/in/docker/config.toml \\\n -p 12345:12345 \\\n -v /local/path/to/config.toml:/path/in/docker/config.toml \\\n -v /etc/passwd:/etc/passwd:ro \\\n -v /etc/group:/etc/group:ro \\\n -v /tmp/dtmp/:/tmp \\\n -v /tmp/dlog/:/log \\\n -v /tmp/dlicense/:/license \\\n -v /tmp/ddata/:/data \\\n -u $(id -u):$(id -g) \\\n h2oai/dai-ubi8-x86_64:|tag|\nNative Installs\nThis example enables the Data Recipe URL data connector. Note that\nrecipe_url is enabled by default. 1. Export the Driverless AI config.toml file or add it to ~/.bashrc. For example:\n2. Specify the following configuration options in the config.toml file.", + "prompt_type": "plain" + }, + { + "output": "Driverless AI Workflow\n\nA typical Driverless AI workflow is to:\n\n1. Load data\n2. Visualize data\n3. Run an experiment\n4. Interpret the model\n5. Deploy the scoring pipeline\n\nIn addition, you can diagnose a model, transform another dataset, score\nthe model against another dataset, and manage your data in Projects.\n\nAlso see the dai_wizard, a question and answer workflow that helps\nautomatically set up use case specific experiment settings.\n\nThe image below describes a typical workflow.\n\n[]", + "prompt_type": "plain" + }, + { + "output": "Out of memory handling in Driverless AI\nThis page describes options for reducing memory usage to avoid out of\nmemory errors during the final model building stage. Reducing estimated memory usage and the number of cores used per\nexperiment\nTo avoid out of memory errors in situations where many different\ntransformers are used at the same time, set the following options as\nenvironment variables when starting DAI. Note that these configuration\noptions can also be set in the config.toml file . - final_munging_memory_reduction_factor: Specify a factor by which to\n reduce estimated memory usage during the final ensemble feature\n engineering stage. Larger values use less memory, with 1 using the\n highest amount of memory. - max_cores: Specify the number of cores to use per experiment. Note\n that if you specify 0, all available cores will be used. To reduce\n memory usage, lowering this value to \u00bd or \u00bc of the available\n physical cores is recommended.", + "prompt_type": "plain" + }, + { + "output": "max_workers_final_base_models = 1to automatically limit the number of models built at the same time to 1. This option is useful in situations where a specific transformer or model uses more memory than expected. **Limiting the total number of features** You can limit the total number of features with the :ref:`config_nfeatures_max` configuration option. For example, if you encounter an out of memory error due to having a large number of features, you can set this option and refit the best model to see if the error is resolved. **Limiting the maximum number of genes per model** You can specify the maximum number of genes (transformer instances) per model with the :ref:`config_ngenes_max` configuration option. **Additional options** - :ref:`config_munging_memory_overhead_factor:`: Specify memory usage per transformer per input data size. In cases where final model data munging uses too much memory due to parallel operations, settingmunging_memory_overhead_factor = 10is recommended to reduce memory usage.", + "prompt_type": "plain" + }, + { + "output": "AWS Role-Based Authentication\n\nIn Driverless AI, it is possible to enable role-based authentication via\nthe IAM role. This is a two-step process that involves setting up AWS\nIAM and then starting Driverless AI by specifying the role in the\nconfig.toml file or by setting the AWS_USE_EC2_ROLE_CREDENTIALS\nenvironment variable to", + "prompt_type": "plain" + }, + { + "output": "True. AWS IAM Setup ------------- 1. Create an IAM role. This IAM role should have a Trust Relationship with Principal Trust Entity set to your Account ID. For example: trust relationship for Account ID 524466471676 would look like: .. .. code:: bash { \"Version\": \"2012-10-17\", \"Statement\": [ { \"Effect\": \"Allow\", \"Principal\": { \"AWS\": \"arn:aws:iam::524466471676:root\" }, \"Action\": \"sts:AssumeRole\" } ] } .. image:: ../images/aws_iam_role_create.png :alt: image :align: center 2. Create a new policy that lets users assume the role: .. .. image:: ../images/aws_iam_policy_create.png :alt: image 3. Assign the policy to the user. .. .. image:: ../images/aws_iam_policy_assign.png :alt: image 4. Test role switching here: https://signin.aws.amazon.com/switchrole. (Refer to https://docs.aws.amazon.com/IAM/latest/UserGuide/troubleshoot_roles.html#troubleshoot_roles_cant-assume-role.)", + "prompt_type": "plain" + }, + { + "output": "AWS_USE_EC2_ROLE_CREDENTIALS`` environment variable.\n\nResources\n\n1. Granting a User Permissions to Switch Roles:\n https://docs.aws.amazon.com/IAM/latest/UserGuide/id_roles_use_permissions-to-switch.html\n2. Creating a Role to Delegate Permissions to an IAM User:\n https://docs.aws.amazon.com/IAM/latest/UserGuide/id_roles_create_for-user.html\n3. Assuming an IAM Role in the AWS CLI:\n https://docs.aws.amazon.com/cli/latest/userguide/cli-configure-role.html", + "prompt_type": "plain" + }, + { + "output": "Driverless AI OpenID Connect Authentication\nThis page describes how to set up OpenID Connect (OIDC) authentication\nin Driverless AI (DAI). - oidc_setup\n- oidc_understanding\nSetting up OIDC authentication\nTo set up OIDC authentication locally (or in production), the following\nconfig.toml options must be specified:\n1. authentication_method = \"oidc\" - Specifies OIDC as the\n authentication method\n2. auth_oidc_issuer_url = \"https://login.microsoftonline.com//v2.0\"\n - Specifies the URL of the Identity Provider (IDP), which is also\n used for automatic provider discovery\n3. auth_oidc_identity_source = \"id_token\" - Specifies whether user\n identity is retrieved from ID Token or the UserInfo. The available\n options are [\"userinfo\", \"id_token\"]\n4. auth_oidc_username_claim = \"preferred_username\" - Specifies the\n Client ID (the application ID assigned to Driverless AI), which is\n provided by the IDP\n5. auth_openid_client_id = \"\" - Specifies the Client ID,\n which is provided by the IDP\n6. auth_openid_client_secret = \"\" - Specifies the Client\n secret created or given by the IDP\n7. auth_openid_redirect_uri = \"http://localhost:12345/oidc/callback\"\n - Specifies a redirection URL so that the IDP can redirect users\n back to the application after successfully logging in\n8. auth_oidc_post_logout_url = \"http://localhost:12345/login\"\n -Specifies the URL the user is directed to after logging out\nThis basic setup should be sufficient to use an IDP such as Azure AD.", + "prompt_type": "plain" + }, + { + "output": "The following example contains several overrides in addition to the\nrequired config.toml options:\n # AUTH\n authentication_method = \"oidc\"\n auth_oidc_id_token_username_key = \"preferred_username\"\n auth_oidc_identity_source = \"id_token\"\n auth_oidc_issuer_url = \"https://login.microsoftonline.com//v2.0\"\n auth_openid_client_id = \"\"\n auth_openid_client_secret = \"\"\n auth_openid_scope = \"openid profile email User.Read\"\n auth_openid_default_scopes = \"User.Read\"\n auth_openid_redirect_uri = \"http://localhost:12345/oidc/callback\"\n auth_oidc_post_logout_url = \"http://localhost:12345/login\"\nIn the preceding example, notice the usage of the following OIDC scopes:\n1. auth_openid_scope - Specifies the list of scopes requested at the\n authorization request\n2. auth_openid_default_scopes - Specifies a set of scopes that are\n requested when making an access token request\nHow does OIDC authentication work? The following sections describe how OIDC authentication is implemented\nin DAI.", + "prompt_type": "plain" + }, + { + "output": "As stated on the OpenID\nwebsite, the Authorization Code Flow returns an Authorization Code to\nthe Client, which can then exchange it for an ID Token and an Access\nToken directly. Note\nDAI mainly supports the client_secret_basic authentication method. Identity sources\nThe DAI OIDC authentication mechanism allows two different methods of\nretrieving a user identity from IDP. Note\nFor both of the following methods, the user must specify the\nauth_oidc_username_claim config.toml option, which controls which claim\nis used as a username in DAI. - userinfo: Makes a UserInfo endpoint request, which in response\n returns a set of claims that should contain the preferred username,\n which will be used as the DAI username. - id_token: Uses an ID Token introspection, which is typically\n acquired during the token exchange, to retrieve the claim holding\n the preferred username. Identity Validation\nDriverless AI allows two different methods of evaluating whether user\n(identity) has required privileges to access the DAI application.", + "prompt_type": "plain" + }, + { + "output": "- If auth_openid_use_objectpath_match is enabled, then the user must\n specify auth_openid_use_objectpath_expression, which evaluates\n ObjectPath against identity (UserInfo response or ID Token)\n- If auth_openid_use_objectpath_match is disabled, then the user may\n specify auth_openid_userinfo_auth_key and\n auth_openid_userinfo_auth_value to compare value with given key in\n identity against the configured value. Logging in using OIDC\nThe following steps describe the procedure of logging in using OIDC:\n1. The OIDC Client is initialized at server startup and performs\n Provider Discovery, which discovers all the Identity Provider (IDP)\n endpoints. 2. When a user enters the login page, authorization code flow is\n initialized and the IDP is requested for an authorization code. 3. The user is redirected to an OIDC callback URL, which processes the\n authorization response and retrieves the authorization code. 4. The OIDC callback handler performs the token exchange using the\n Token Endpoint and acquires the Access and ID Tokens (and when\n possible, the Refresh Token).", + "prompt_type": "plain" + }, + { + "output": "auth_oidc_post_logout_url`` needs to be specified in the config.toml\nfile, which by design should point to the absolute DAI login URL.", + "prompt_type": "plain" + }, + { + "output": "Using the config.toml File\nThe config.toml file is a configuration file that uses the TOML v0.5.0\nfile format. Administrators can customize various aspects of a\nDriverless AI (DAI) environment by editing the config.toml file before\nstarting DAI. Note\nFor information on configuration security, see configuration-security. Configuration Override Chain\nThe configuration engine reads and overrides variables in the following\norder:\n1. Driverless AI defaults: These are stored in a Python config module. 2. config.toml - Place this file in a folder or mount it in a Docker\n container and specify the path in the \"DRIVERLESS_AI_CONFIG_FILE\"\n environment variable. 3. Keystore file - Set the keystore_file parameter in the config.toml\n file or the environment variable \"DRIVERLESS_AI_KEYSTORE_FILE\" to\n point to a valid DAI keystore file generated using the\n h2oai.keystore tool. If an environment variable is set, the value in\n the config.toml for keystore_file is overridden.", + "prompt_type": "plain" + }, + { + "output": "Environment variable - Configuration variables can also be provided\n as environment variables. They must have the prefix DRIVERLESS_AI_\n followed by the variable name in all caps. For example,\n \"authentication_method\" can be provided as\n \"DRIVERLESS_AI_AUTHENTICATION_METHOD\". Setting environment variables\n overrides values from the keystore file. Docker Image Users\n1. Copy the config.toml file from inside the Docker image to your local\n filesystem. 2. Edit the desired variables in the config.toml file. Save your\n changes when you are done. 3. Start DAI with the DRIVERLESS_AI_CONFIG_FILE environment variable. Ensure that this environment variable points to the location of the\n edited config.toml file so that the software can locate the\n configuration file. Native Install Users\nNative installs include DEBs, RPMs, and TAR SH installs. 1. Export the DAI config.toml file or add it to ~/.bashrc. For example:\n2. Edit the desired variables in the config.toml file.", + "prompt_type": "plain" + }, + { + "output": "Free up space on a DAI instance\nThe following sections describe how to free up disk space on an instance\nof Driverless AI. Python API guide\nThis section describes how to free up disk space on an instance of\nDriverless AI (DAI) with the Python API. Note\n- The method described in this section is only available for H2O AI\nCloud customers. The following code sample lets you perform the following tasks:\n1. Link any of your experiments to a Project. Once an experiment is\n linked to a Project, it is automatically pushed to an external\n remote storage. 2. Delete the experiment from the DAI instance. Doing so frees up disk\n space on your DAI instance, and you can always import any experiment\n back into the DAI instance as needed. # Make a project called: \"Test\"\n project = dai.projects.create(name=\"Test\")\n # Link experiment to project to save it to remote storage\n project.link_experiment(experiment)\n # Delete experiment from instance\n experiment.delete()\nNote that when using this approach, the deleted experiment appears\ngrayed out in the Project.", + "prompt_type": "plain" + }, + { + "output": "Data leakage and shift detection in Driverless AI\nThis page describes data leakage and shift detection in Driverless AI\n(DAI). Overview\n- Data leakage: To detect data leakage, DAI runs a model (when\n available, LightGBM) to get the variable importance table, which\n determines the predictive power of each feature on the target\n variable. A simple model is then built on each feature with\n significant variable importance. The models with a high AUC (for\n classification) or R2 (for regression) score are reported to the\n user as potential leak features. - Shift detection: To detect shift in distribution between the\n training, validation or testing datasets, Driverless AI trains a\n binomial model to predict which dataset a row belongs to. For\n example, if a model is built using only a specific feature as a\n predictor and is able to separate the training and testing data with\n high accuracy (for example, an AUC of 0.9), then this indicates that\n there is a drift in the distribution of that feature in the training\n and testing data.", + "prompt_type": "plain" + }, + { + "output": "Enabling leakage detection\nTo enable leakage detection, set the config_check_leakage configuration\noption to on (default). When this option is enabled, Driverless AI runs\na model to determine the predictive power of each feature on the target\nvariable. If leakage detection has been enabled, then the\nconfig_detect_features_leakage_threshold_auc configuration option is\nused for per-feature leakage detection if AUC (or R2 for regression) on\noriginal data (label-encoded) is greater-than or equal to the specified\nvalue. By default, this option is set to 0.95. Identifying features responsible for leakage\nFor significant features (determined by feature importance), a simple\nmodel is built on each feature. The models with a high AUC\n(classification) or R2 (regression) score are reported to the user as\npotential leaks. If leakage detection is enabled, then the\nconfig_detect_features_per_feature_leakage_threshold_auc configuration\noption is used to notify users about features for which AUC or R2 is\ngreater-than or equal to the specific value.", + "prompt_type": "plain" + }, + { + "output": "Automatically drop features suspected in leakage\nA feature is dropped when the single feature model performance exceeds\nthe threshold for dropping features. You can specify this threshold with\nthe config_drop_features_leakage_threshold_auc configuration option,\nwhich has a default value of 0.999. When the AUC (or R2 for regression),\nGINI, or Spearman correlation is above the specified value, the feature\nis dropped. Shift detection\nDriverless AI can detect data distribution shifts between\ntrain/valid/test datasets when they are provided. Shift is detected by training a model to distinguish between\ntrain/validation/test datasets by assigning a unique target label to\neach of the datasets. If the model turns out to have high accuracy, data\nshift is reported with a notification. Shifted features can either be\ndropped or used to create more meaningful aggregate features by using\nthem as labels or bins. The following is a list of configuration options for shift detection:\n- config_check_distribution_shift: Specify whether to enable\n train/valid and train/test distribution shift detection.", + "prompt_type": "plain" + }, + { + "output": "fitted_model.pickle.meta.json`` file in the experiment summary zip\narchive.", + "prompt_type": "plain" + }, + { + "output": "Time Series in Driverless AI\nTime series forecasting is one of the most common and important tasks in\nbusiness analytics. There are many real-world applications like sales,\nweather, stock market, and energy demand, just to name a few. At H2O, we\nbelieve that automation can help our users deliver business value in a\ntimely manner. Therefore, we combined advanced time series analysis and\nour Kaggle Grand Masters\u2019 time series recipes into Driverless AI. The key features/recipes that make automation possible are:\n- Automatic handling of time groups (e.g., different stores and\n departments)\n- Robust time series validation\n - Accounts for gaps and forecast horizon\n - Uses past information only (i.e., no data leakage)\n- Time series-specific feature engineering recipes\n - Date features like day of week, day of month, etc. - AutoRegressive features, like optimal lag and lag-features\n interaction\n - Different types of exponentially weighted moving averages\n - Aggregation of past information (different time groups and time\n intervals)\n - Target transformations and differentiation\n- Integration with existing feature engineering functions (recipes and\n optimization)\n- Rolling-window based predictions for time series experiments with\n test-time augmentation or re-fit\n- Automatic pipeline generation (See \"From Kaggle Grand Masters'\n Recipes to Production Ready in a Few Clicks\" blog post.)", + "prompt_type": "plain" + }, + { + "output": "Converting datetime to a locale-independent format prior to running\nexperiments is recommended. For information on how to convert datetime\nformats so that they are accepted in DAI, refer to the final note in the\nmodify_by_recipe section. Understanding Time Series\nThe following is an in depth description of time series in Driverless\nAI. For an overview of best practices when running time series\nexperiments, see ts_bestpractices. Modeling Approach\nDriverless AI uses GBMs, GLMs and neural networks with a focus on time\nseries-specific feature engineering. The feature engineering includes:\n- Autoregressive elements: creating lag variables\n- Aggregated features on lagged variables: moving averages,\n exponential smoothing descriptive statistics, correlations\n- Date-specific features: week number, day of week, month, year\n- Target transformations: Integration/Differentiation, univariate\n transforms (like logs, square roots)\nThis approach is combined with AutoDL features as part of the genetic\nalgorithm.", + "prompt_type": "plain" + }, + { + "output": "In other\nwords, the same transformations/genes apply; plus there are new\ntransformations that come from time series. Some transformations (like\ntarget encoding) are deactivated. When running a time series experiment, Driverless AI builds multiple\nmodels by rolling the validation window back in time (and potentially\nusing less and less training data). User-Configurable Options\nGap\nThe guiding principle for properly modeling a time series forecasting\nproblem is to use the historical data in the model training dataset such\nthat it mimics the data/information environment at scoring time (i.e. deployed predictions). Specifically, you want to partition the training\nset to account for: 1) the information available to the model when\nmaking predictions and 2) the number of units out that the model should\nbe optimized to predict. Given a training dataset, the gap and forecast horizon are parameters\nthat determine how to split the training dataset into training samples\nand validation samples.", + "prompt_type": "plain" + }, + { + "output": "For example:\n- Assume there are daily data with days 1/1/2020, 2/1/2020, 3/1/2020,\n 4/1/2020 in train. There are 4 days in total for training. - In addition, the test data will start from 6/1/2020. There is only 1\n day in the test data. - The previous day (5/1/2020) does not belong to the train data. It is\n a day that cannot be used for training (i.e because information from\n that day may not be available at scoring time). This day cannot be\n used to derive information (such as historical lags) for the test\n data either. - Here the time bin (or time unit) is 1 day. This is the time interval\n that separates the different samples/rows in the data. - In summary, there are 4 time bins/units for the train data and 1\n time bin/unit for the test data plus the Gap. - In order to estimate the Gap between the end of the train data and\n the beginning of the test data, the following formula is applied. - Gap = min(time bin test) - max(time bin train) - 1.", + "prompt_type": "plain" + }, + { + "output": "This is the\n earliest (and only) day in the test data. - max(time bin train) is 4 (or 4/1/2020). This is the latest (or the\n most recent) day in the train data. - Therefore the GAP is 1 time bin (or 1 day in this case), because Gap\n = 6 - 4 - 1 or Gap = 1\n[]\nForecast Horizon\nIt's often not possible to have the most recent data available when\napplying a model (or it's costly to update the data table too often);\ntherefore some models need to be built accounting for a \u201cfuture gap\u201d. For example, if it takes a week to update a specific data table, you\nideally want to predict 7 days ahead with the data as it is \u201ctoday\u201d;\ntherefore a gap of 6 days is recommended. Not specifying a gap and\npredicting 7 days ahead with the data as it is is unrealistic (and\ncannot happen, as the data is updated on a weekly basis in this\nexample). Similarly, gap can be used if you want to forecast further in\nadvance. For example, if you want to know what will happen 7 days in the\nfuture, then set the gap to 6 days.", + "prompt_type": "plain" + }, + { + "output": "In other words it is\nthe future period that the model can make predictions for (or the number\nof units out that the model should be optimized to predict). Forecast\nhorizon is used in feature selection and engineering and in model\nselection. Note that forecast horizon might not equal the number of\npredictions. The actual predictions are determined by the test dataset. []\nThe periodicity of updating the data may require model predictions to\naccount for significant time in the future. In an ideal world where data\ncan be updated very quickly, predictions can always be made having the\nmost recent data available. In this scenario there is no need for a\nmodel to be able to predict cases that are well into the future, but\nrather focus on maximizing its ability to predict short term. However\nthis is not always the case, and a model needs to be able to make\npredictions that span deep into the future because it may be too costly\nto make predictions every single day after the data gets updated.", + "prompt_type": "plain" + }, + { + "output": "For example,\npredicting tomorrow with today\u2019s data is easier than predicting 2 days\nahead with today\u2019s data. Hence specifying the forecast horizon can\nfacilitate building models that optimize prediction accuracy for these\nfuture time intervals. Prediction Intervals\nFor regression problems, enable the compute-intervals expert setting to\nhave Driverless AI provide two additional columns y.lower and y.upper in\nthe prediction frame. The true target value y for a predicted sample is\nexpected to lie within [y.lower, y.upper] with a certain probability. The default value for this confidence level can be specified with the\nconfidence-level expert setting, which has a default value of 0.9. Driverless AI uses holdout predictions to determine intervals\nempirically (Williams, W.H. and Goodman, M.L. \"A Simple Method for the\nConstruction of Empirical Confidence Limits for Economic Forecasts.\" Journal of the American Statistical Association, 66, 752-754. 1971). This method makes no assumption about the underlying model or the\ndistribution of error and has been shown to outperform many other\napproaches (Lee, Yun Shin and Scholtes, Stefan.", + "prompt_type": "plain" + }, + { + "output": "num_prediction_periods``) needs to be in periods, and the size is\nunknown. To overcome this, you can use the optional", + "prompt_type": "plain" + }, + { + "output": "time_period_in_seconds`` parameter when running", + "prompt_type": "plain" + }, + { + "output": "start_experiment_sync(in Python) ortrain(in R). This is used to specify the forecast horizon in real time units (as well as for gap.) If this parameter is not specified, then Driverless AI will automatically detect the period size in the experiment, and the forecast horizon value will respect this period. I.e., if you are sure that your data has a 1 week period, you can saynum_prediction_periods=14``;\notherwise it is possible that the model will not work correctly. Groups\nGroups are categorical columns in the data that can significantly help\npredict the target variable in time series problems. For example, one\nmay need to predict sales given information about stores and products. Being able to identify that the combination of store and products can\nlead to very different sales is key for predicting the target variable,\nas a big store or a popular product will have higher sales than a small\nstore and/or with unpopular products. For example, if we don\u2019t know that the store is available in the data,\nand we try to see the distribution of sales along time (with all stores\nmixed together), it may look like that:\n[]\nThe same graph grouped by store gives a much clearer view of what the\nsales look like for different stores.", + "prompt_type": "plain" + }, + { + "output": "At a given sample with time stamp t, features at\nsome time difference T (lag) in the past are considered. For example, if\nthe sales today are 300, and sales of yesterday are 250, then the lag of\none day for sales is 250. Lags can be created on any feature as well as\non the target. []\nAs previously noted, the training dataset is appropriately split such\nthat the amount of validation data samples equals that of the testing\ndataset samples. If we want to determine valid lags, we must consider\nwhat happens when we will evaluate our model on the testing dataset. Essentially, the minimum lag size must be greater than the gap size. Aside from the minimum useable lag, Driverless AI attempts to discover\npredictive lag sizes based on auto-correlation. \"Lagging\" variables are important in time series because knowing what\nhappened in different time periods in the past can greatly facilitate\npredictions for the future. Consider the following example to see the\nlag of 1 and 2 days:\n+-----------+-------+------+------+\n| Date | Sales | Lag1 | Lag2 |\n+===========+=======+======+======+\n| 1/1/2020 | 100 | - | - |\n+-----------+-------+------+------+\n| 2/1/2020 | 150 | 100 | - |\n+-----------+-------+------+------+\n| 3/1/2020 | 160 | 150 | 100 |\n+-----------+-------+------+------+\n| 4/1/2020 | 200 | 160 | 150 |\n+-----------+-------+------+------+\n| 5/1/2020 | 210 | 200 | 160 |\n+-----------+-------+------+------+\n| 6/1/2020 | 150 | 210 | 200 |\n+-----------+-------+------+------+\n| 7/1/2020 | 160 | 150 | 210 |\n+-----------+-------+------+------+\n| 8/1/2020 | 120 | 160 | 150 |\n+-----------+-------+------+------+\n| 9/1/2020 | 80 | 120 | 160 |\n+-----------+-------+------+------+\n| 10/1/2020 | 70 | 80 | 120 |\n+-----------+-------+------+------+\nTime series target transformations\nThe following is a description of time series target transformations.", + "prompt_type": "plain" + }, + { + "output": "config.tomlfile. For more information, see :ref:`config_usage`. **Note:** Driverless AI does not attempt time series target transformations automatically; they must be set manually. :ref:`ts-target-transformation` (ts_lag_target_trafo): With this target transformation, you can select between the difference and ratio of the current and a lagged target. You can specify the corresponding lag size with the **Lag size used for time series target transformation** (ts_target_trafo_lag_size) setting. **Note:** This target transformation can be used together with the **Time series centering or detrending transformation** (ts_target_trafo) target transformation, but it is mutually exclusive with regular target transformations. :ref:`centering-detrending` (ts_target_trafo): With this target transformation, the free parameters of the trend model are fitted. The trend is removed from the target signal, and the pipeline is fitted on the residuals. Predictions are then made by adding back the trend.", + "prompt_type": "plain" + }, + { + "output": "- Thecentering\n(robust)andlinear\n(robust)detrending variants use scikit-learn's implementation of random sample consensus (RANSAC) to achieve a higher tolerance with regard to outliers. As stated on scikit-learn's `page on robust linear model estimation using RANSAC `__, \"The ordinary linear regressor is sensitive to outliers, and the fitted line can easily be skewed away from the true underlying relationship of data. The RANSAC regressor automatically splits the data into inliers and outliers, and the fitted line is determined only by the identified inliers.\" Settings Determined by Driverless AI ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Window/Moving Average ^^^^^^^^^^^^^^^^^^^^^ Using the above Lag table, a moving average of 2 would constitute the average of Lag1 and Lag2: +-----------+-------+------+------+------+ | Date | Sales | Lag1 | Lag2 | MA2 | +===========+=======+======+======+======+ | 1/1/2020 | 100 | - | - | - | +-----------+-------+------+------+------+ | 2/1/2020 | 150 | 100 | - | - | +-----------+-------+------+------+------+ | 3/1/2020 | 160 | 150 | 100 | 125 | +-----------+-------+------+------+------+ | 4/1/2020 | 200 | 160 | 150 | 155 | +-----------+-------+------+------+------+ | 5/1/2020 | 210 | 200 | 160 | 180 | +-----------+-------+------+------+------+ | 6/1/2020 | 150 | 210 | 200 | 205 | +-----------+-------+------+------+------+ | 7/1/2020 | 160 | 150 | 210 | 180 | +-----------+-------+------+------+------+ | 8/1/2020 | 120 | 160 | 150 | 155 | +-----------+-------+------+------+------+ | 9/1/2020 | 80 | 120 | 160 | 140 | +-----------+-------+------+------+------+ | 10/1/2020 | 70 | 80 | 120 | 100 | +-----------+-------+------+------+------+ Aggregating multiple lags together (instead of just one) can facilitate stability for defining the target variable.", + "prompt_type": "plain" + }, + { + "output": "Exponential Weighting ^^^^^^^^^^^^^^^^^^^^^ Exponential weighting is a form of weighted moving average where more recent values have higher weight than less recent values. That weight is exponentially decreased over time based on an **alpha** (a) (hyper) parameter (0,1), which is normally within the range of [0.9 - 0.99]. For example: - Exponential Weight = a**(time) - If sales 1 day ago = 3.0 and 2 days ago =4.5 and a=0.95: - Exp. smooth = 3.0*(0.95\\*\\ *1) + 4.5*\\ (0.95\\*\\ *2) / ((0.951) + (0.95*\\ \\*2)) =3.73 approx. Rolling-Window-Based Predictions -------------------------------- Driverless AI supports rolling-window-based predictions for time series experiments with two options: `Test Time Augmentation `__ (TTA) or re-fit. Both options are useful to assess the performance of the pipeline for predicting not just a single forecast horizon, but many in succession.", + "prompt_type": "plain" + }, + { + "output": "Re-fit simulates the process of re-fitting the entire pipeline (including the model) once new data is available. This process is automated when the test set spans for a longer period than the forecast horizon and if the target values of the test set are known. If the user scores a test set that meets these conditions after the experiment is finished, rolling predictions with TTA will be applied. Re-fit, on the other hand, is only applicable for test sets provided during an experiment. TTA is the default option and can be changed with the `Method to Create Rolling Test Set Predictions `__ expert setting. .. figure:: images/time_series_rolling_window_tta.png :alt: .. figure:: images/time_series_rolling_window_refit.png :alt: Time Series Constraints ----------------------- Dataset Size ~~~~~~~~~~~~ Usually, the forecast horizon (prediction length) :math:`H` equals the number of time periods in the testing data :math:`N_{TEST}` (i.e.", + "prompt_type": "plain" + }, + { + "output": "You want to have enough training data time periods :math:`N_{TRAIN}` to score well on the testing dataset. At a minimum, the training dataset should contain at least three times as many time periods as the testing dataset (i.e. :math:`N_{TRAIN} >= 3 \u00d7 N_{TEST}`). This allows for the training dataset to be split into a validation set with the same amount of time periods as the testing dataset while maintaining enough historical data for feature engineering. .. _time-series-use-case: Time Series Use Case: Sales Forecasting --------------------------------------- Below is a typical example of sales forecasting based on the `Walmart competition on Kaggle `__. In order to frame it as a machine learning problem, we formulate the historical sales data and additional attributes as shown below: **Raw data** .. figure:: images/time_series_raw_data.png :alt: **Data formulated for machine learning** .. figure:: images/time_series_ml_data.png :alt: The additional attributes are attributes that we will know at time of scoring.", + "prompt_type": "plain" + }, + { + "output": "In this case, you can assume that you will know whether or not a Store and Department will be running a promotional markdown. Features like the temperature of the Week are not used because that information is not available at the time of scoring. Once you have your data prepared in tabular format (see raw data above), Driverless AI can formulate it for machine learning and sort out the rest. If this is your very first session, the Driverless AI assistant will guide you through the journey. .. figure:: images/first_time_user.png :alt: Similar to previous Driverless AI examples, you need to select the dataset for training/test and define the target. For time series, you need to define the time column (by choosing AUTO or selecting the date column manually). If weighted scoring is required (like the Walmart Kaggle competition), you can select the column with specific weights for different samples. .. figure:: images/time_series_experiment_settings.png :alt: If you prefer to use automatic handling of time groups, you can leave the setting for time groups columns as AUTO, or you can define specific time groups.", + "prompt_type": "plain" + }, + { + "output": "Once the experiment is finished, you can make new predictions and download the scoring pipeline just like any other Driverless AI experiments. .. _ucapt: More About Unavailable Columns at Time of Prediction ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The **Unavailable Columns at Prediction Time** (UCAPT) option is a way to mark features that will not be available in the test dataset or at the time of prediction but might still be predictive when looking at historical values. These features will only be used in historical feature engineering recipes, such as Lagging or Exponential Weighted Moving Average. For example, if we were predicting the sales amount each day, we might have the number of customers each day as a feature in our training dataset. In the future, we won't know how many customers will be coming into the store, so this would be a leaky feature to use. However, the average number of customers last week might be predictive and is something that we could calculate ahead of time.", + "prompt_type": "plain" + }, + { + "output": "The default value for this setting is often--, meaning that all features can be used as they are. If you include a test dataset before selecting a time column, and that test dataset is missing any columns, then you will see a number as the default for **Unavailable Columns at Prediction Time**, which will be the number of columns that are in the training dataset but not the testing dataset. All of these features will only be looked at historically, and you can see a list of them by clicking on this setting. Using a Driverless AI Time Series Model to Forecast --------------------------------------------------- When you set the experiment's forecast horizon, you are telling the Driverless AI experiment the dates this model will be asked to forecast for. In the Walmart Sales example, we set the Driverless AI forecast horizon to 1 (1 week in the future). This means that Driverless AI expects this model to be used to forecast 1 week after training ends. Because the training data ends on 2020-10-26, this model should be used to score for the week of 2020-11-02.", + "prompt_type": "plain" + }, + { + "output": "There are two options: - Option 1: Trigger a Driverless AI experiment to be trained once the forecast horizon ends. A Driverless AI experiment will need to be re-trained every week. - Option 2: Use **Test Time Augmentation** (TTA) to update historical features so that we can use the same model to forecast outside of the forecast horizon. **Test Time Augmentation** (TTA) refers to the process where the model stays the same but the features are refreshed using the latest data. In our Walmart Sales Forecasting example, a feature that may be very important is the Weekly Sales from the previous week. Once we move outside of the forecast horizon, our model no longer knows the Weekly Sales from the previous week. By performing TTA, Driverless AI will automatically generate these historical features if new data is provided. In Option 1, we would launch a new Driverless AI experiment every week with the latest data and use the resulting model to forecast the next week. In Option 2, we would continue using the same Driverless AI experiment outside of the forecast horizon by using TTA.", + "prompt_type": "plain" + }, + { + "output": "By retraining an experiment with the latest data, Driverless AI has the ability to possibly improve the model by changing the features used, choosing a different algorithm, and/or selecting different parameters. As the data changes over time, for example, Driverless AI may find that the best algorithm for this use case has changed. There may be clear advantages for retraining an experiment after each forecast horizon or for using TTA. Refer to `this example `__ to see how to use the scoring pipeline to predict future data instead of using the prediction endpoint on the Driverless AI server. Using TTA to continue using the same experiment over a longer period of time means there is no longer any need to continually repeat a model review process. However, it is possible for the model to become out of date. The following is a table that lists several scoring methods and whether they support TTA: +-------------------------+--------------------------------+ | Scoring Method | Test Time Augmentation Support | +=========================+================================+ | Driverless AI Scorer | Supported | +-------------------------+--------------------------------+ | Python Scoring Pipeline | Supported | +-------------------------+--------------------------------+ | MOJO Scoring Pipeline | Not Supported | +-------------------------+--------------------------------+ For different use cases, there may be clear advantages for retraining an experiment after each forecast horizon or for using TTA.", + "prompt_type": "plain" + }, + { + "output": "**Notes**: - Scorers cannot refit or retrain a model. - To specify a method for creating rolling test set predictions, use :ref:`this expert setting `. Note that refitting performed with this expert setting is only applied to the test set that is provided by the user during an experiment. The final scoring pipeline always uses TTA. Triggering Test Time Augmentation ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ To perform Test Time Augmentation, create your forecast data to include any data that occurred after the training data ended up to the dates you want a forecast for. The dates that you want Driverless AI to forecast should have missing values (NAs) where the target column is. Target values for the remaining dates must be filled in. The following is an example of forecasting for 2020-11-23 and 2020-11-30 with the remaining dates being used for TTA: +----------+--------+----------+-----------+-----------+------------+ | Date | Store | Dept | Mark Down | Mark Down | We | | | | | 1 | 2 | ekly_Sales | +==========+========+==========+===========+===========+============+ | 20 | 1 | 1 | -1 | -1 | $35,000 | | 20-11-02 | | | | | | +----------+--------+----------+-----------+-----------+------------+ | 20 | 1 | 1 | -1 | -1 | $40,000 | | 20-11-09 | | | | | | +----------+--------+----------+-----------+-----------+------------+ | 20 | 1 | 1 | -1 | -1 | $45,000 | | 20-11-16 | | | | | | +----------+--------+----------+-----------+-----------+------------+ | 20 | 1 | 1 | -1 | -1 | NA | | 20-11-23 | | | | | | +----------+--------+----------+-----------+-----------+------------+ | 20 | 1 | 1 | -1 | -1 | NA | | 20-11-30 | | | | | | +----------+--------+----------+-----------+-----------+------------+ **Notes**: - Although TTA can span any length of time into the future, the dates that are being predicted cannot exceed the horizon.", + "prompt_type": "plain" + }, + { + "output": "Forecasting Future Dates ~~~~~~~~~~~~~~~~~~~~~~~~ To forecast or predict future dates, upload a dataset that contains the future dates of interest and provide additional information such as group IDs or features known in the future. The dataset can then be used to run and score your predictions. The following is an example of a model that was trained up to 2020-05-31: +------------+----------+-----------------+-----------------+ | Date | Group_ID | Known_Feature_1 | Known_Feature_2 | +============+==========+=================+=================+ | 2020-06-01 | A | 3 | 1 | +------------+----------+-----------------+-----------------+ | 2020-06-02 | A | 2 | 2 | +------------+----------+-----------------+-----------------+ | 2020-06-03 | A | 4 | 1 | +------------+----------+-----------------+-----------------+ | 2020-06-01 | B | 3 | 0 | +------------+----------+-----------------+-----------------+ | 2020-06-02 | B | 2 | 1 | +------------+----------+-----------------+-----------------+ | 2020-06-03 | B | 4 | 0 | +------------+----------+-----------------+-----------------+ Time Series Expert Settings --------------------------- The user may further configure the time series experiments with a dedicated set of options available through the **Expert Settings** panel.", + "prompt_type": "plain" + }, + { + "output": "Install on RHEL\nThis section describes how to install the Driverless AI Docker image on\nRHEL. The installation steps vary depending on whether your system has\nGPUs or if it is CPU only. Environment\n -------------------------------------------\n Operating System GPUs? Min Mem\n ------------------------- ------- ---------\n RHEL with GPUs Yes 64 GB\n RHEL with CPUs No 64 GB\n -------------------------------------------\nInstall on RHEL with GPUs\nNote: Refer to the following links for more information about using RHEL\nwith GPUs. These links describe how to disable automatic updates and\nspecific package updates. This is necessary in order to prevent a\nmismatch between the NVIDIA driver and the kernel, which can lead to the\nGPUs failures. - https://access.redhat.com/solutions/2372971\n - https://www.rootusers.com/how-to-disable-specific-package-updates-in-rhel-centos/\nWatch the installation video here. Note that some of the images in this\nvideo may change between releases, but the installation steps remain the\nsame.", + "prompt_type": "plain" + }, + { + "output": "Open a Terminal and ssh to the machine that will run Driverless AI. Once\nyou are logged in, perform the following steps. 1. Retrieve the Driverless AI Docker image from\n https://www.h2o.ai/download/. 2. Install and start Docker EE on RHEL (if not already installed). Follow the instructions on\n https://docs.docker.com/engine/installation/linux/docker-ee/rhel/. 3. Install nvidia-docker2 (if not already installed). More information\n is available at\n https://github.com/NVIDIA/nvidia-docker/blob/master/README.md. 4. Verify that the NVIDIA driver is up and running. If the driver is\n not up and running, log on to\n http://www.nvidia.com/Download/index.aspx?lang=en-us to get the\n latest NVIDIA Tesla V/P/K series driver. 5. Set up a directory for the version of Driverless AI on the host\n machine:\n6. Change directories to the new folder, then load the Driverless AI\n Docker image inside the new directory:\n7. Enable persistence of the GPU. Note that this needs to be run once\n every reboot.", + "prompt_type": "plain" + }, + { + "output": "8. Set up the data, log, and license directories on the host machine\n (within the new directory):\n9. At this point, you can copy data into the data directory on the host\n machine. The data will be visible inside the Docker container. 10. Run docker images to find the image tag. 11. Start the Driverless AI Docker image and replace TAG below with the\n image tag. Depending on your install version, use the\n docker run --runtime=nvidia (>= Docker 19.03) or nvidia-docker (<\n Docker 19.03) command. Note that from version 1.10 DAI docker image\n runs with internal tini that is equivalent to using --init from\n docker, if both are enabled in the launch command, tini will print a\n (harmless) warning message. For GPU users, as GPU needs --pid=host\n for nvml, which makes tini not use pid=1, so it will show the\n warning message (still harmless). 12. Connect to Driverless AI with your browser at\n http://Your-Driverless-AI-Host-Machine:12345. Install on RHEL with CPUs\nThis section describes how to install and start the Driverless AI Docker\nimage on RHEL.", + "prompt_type": "plain" + }, + { + "output": "Watch the installation video here. Note that some of the images in this\nvideo may change between releases, but the installation steps remain the\nsame. Note\nAs of this writing, Driverless AI has been tested on RHEL versions 7.4,\n8.3, and 8.4. Open a Terminal and ssh to the machine that will run Driverless AI. Once\nyou are logged in, perform the following steps. 1. Install and start Docker EE on RHEL (if not already installed). Follow the instructions on\n https://docs.docker.com/engine/installation/linux/docker-ee/rhel/. 2. On the machine that is running Docker EE, retrieve the Driverless AI\n Docker image from https://www.h2o.ai/download/. 3. Set up a directory for the version of Driverless AI on the host\n machine:\n4. Load the Driverless AI Docker image inside the new directory:\n5. Set up the data, log, license, and tmp directories (within the new\n directory):\n6. Copy data into the data directory on the host. The data will be\n visible inside the Docker container at //data.", + "prompt_type": "plain" + }, + { + "output": "Run docker images to find the image tag. 8. Start the Driverless AI Docker image. Note that GPU support will not\n be available. Note that from version 1.10 DAI docker image runs with\n internal tini that is equivalent to using --init from docker, if\n both are enabled in the launch command, tini will print a (harmless)\n warning message. 9. Connect to Driverless AI with your browser at\n http://Your-Driverless-AI-Host-Machine:12345. Stopping the Docker Image\nTo stop the Driverless AI Docker image, type Ctrl + C in the Terminal\n(Mac OS X) or PowerShell (Windows 10) window that is running the\nDriverless AI Docker image. Upgrading the Docker Image\nThis section provides instructions for upgrading Driverless AI versions\nthat were installed in a Docker container. These steps ensure that\nexisting experiments are saved. WARNING: Experiments, MLIs, and MOJOs reside in the Driverless AI tmp\ndirectory and are not automatically upgraded when Driverless AI is\nupgraded. - Build MLI models before upgrading.", + "prompt_type": "plain" + }, + { + "output": "- Stop Driverless AI and make a backup of your Driverless AI tmp\n directory before upgrading. If you did not build MLI on a model before upgrading Driverless AI,\n then you will not be able to view MLI on that model after upgrading. Before upgrading, be sure to run MLI jobs on models that you want to\n continue to interpret in future releases. If that MLI job appears in\n the list of Interpreted Models in your current version, then it will\n be retained after upgrading. If you did not build a MOJO pipeline on a model before upgrading\n Driverless AI, then you will not be able to build a MOJO pipeline on\n that model after upgrading. Before upgrading, be sure to build MOJO\n pipelines on all desired models and then back up your Driverless AI\n tmp directory. Note: Stop Driverless AI if it is still running. Requirements\nWe recommend to have NVIDIA driver >= installed (GPU only) in your host\nenvironment for a seamless experience on all architectures, including\nAmpere. Driverless AI ships with CUDA 11.2.2 for GPUs, but the driver\nmust exist in the host environment.", + "prompt_type": "plain" + }, + { + "output": "Overview\nH2O Driverless AI is an artificial intelligence (AI) platform for\nautomatic machine learning. Driverless AI automates some of the most\ndifficult data science and machine learning workflows, such as feature\nengineering, model validation, model tuning, model selection, and model\ndeployment. It aims to achieve the highest predictive accuracy,\ncomparable to expert data scientists, but in a much shorter time thanks\nto end-to-end automation. Driverless AI also offers automatic\nvisualization and machine learning interpretability (MLI). Especially in\nregulated industries, model transparency and explanation are just as\nimportant as predictive performance. Modeling pipelines (feature\nengineering and models) are exported (in full fidelity, without\napproximations) both as Python modules and as Java standalone scoring\nartifacts. Apart from the standard experiment workflow for\nmodel building, DAI offers an experiment setup wizard that\nmakes it simple for you to set up a Driverless AI experiment and ensure\nthat the experiment's settings are optimally configured for your\nspecific use case.", + "prompt_type": "plain" + }, + { + "output": "Unsupervised Algorithms in Driverless AI (Experimental)\nStarting with version 1.10, Driverless AI exposes unsupervised\ntransformers that you can use for unsupervised model building. The\nfollowing sections describe several unsupervised transformers and\ncontain information on support for custom recipes and expert control of\nunsupervised experiments. 1. Isolation Forest Anomaly detection \n2. K-Means Clustering \n3. Truncated SVD (Dimensionality Reduction) \n4. Full support for custom recipes \n5. Expert control over Unsupervised Experiments \nConceptually, the overall pipeline of an unsupervised experiment is\nsimilar to the pipeline of a regular supervised experiment. However,\nthere are a few notable differences:\n1. Only one unsupervised algorithm (model, pipeline) can be chosen\n (that is, either clustering or anomaly detection, but not both). In\n other words, all individuals in the genetic algorithm are of the\n same model type, but they can have different parameters (, number of\n clusters, columns used for clustering).", + "prompt_type": "plain" + }, + { + "output": "Each such unsupervised modeling pipeline consists of exactly one\n pretransformer, one transformer and one model. No labels (y) are\n required. 3. The unsupervised model has only one function: To list the included\n pretransformer, the included transformer and any applicable scorers. The model itself is a pure pass-through function, the\n models.predict() method returns the output of the transformer\n pipeline (any features the transformers makes). This also means that\n the variable importance of the model is ill-defined, and uniformly\n spread across features. For clustering, there will be only 1 feature\n (the assigned cluster label), and it will have variable importance\n of 1.0. 4. Automatic Machine Learning is only possible if there's a metric\n (scorer) that assesses the quality of the transformation via\n score(X, actual=None, predicted=transformed_X). For example, the\n quality of the labels created by a K-Means clustering algorithm can\n be evaluated for a given dataset, given labels, and a metric.", + "prompt_type": "plain" + }, + { + "output": "This value can be ignored, and signals Driverless AI\n that the experiment is converged after the first iteration. 5. No MLI support in 1.10.0, but is planned for future releases. 6. No ensembles and cross-validation for final models for unsupervised\n experiments (fixed_ensemble_level=0 is enforced). As a consequence,\n creation of training holdout predictions is not possible (all data\n is used for the final model). If predictions like cluster\n assignments are desired for the training data, please make\n predictions on the training data, with the usual caveats of\n overfitting (due to heavy tuning during AutoML) since fit() and\n predict() are performed with the same data. Isolation Forest Anomaly detection\nIsolation forest isolates or identifies the anomalous entries by\nrandomly splitting the decision trees. The idea is that an outlier will\nlie farther away from the regular observations in the feature space and\nhence will require fewer random splits to isolate to the terminal node\nof a tree.", + "prompt_type": "plain" + }, + { + "output": "The lower the score, the more likely it is that the row is an\nanomaly. Internally, Driverless AI runs sklearn's Isolation Forest\nimplementation. When building a model, the Accuracy and Time knobs of Driverless AI can\nbe toggled to adjust the effort spent on model tuning but presently as\nthere is no scorer being used for isolation forest, when doing\ngenetic algorithm , the model will converge immediately and use one\nof the models from the tuning phase as the final model. The\nInterpretability knob is ignored in the default set up. The number of\ntrees or n_estimators for the isolation forest model can be adjusted\nwith the isolation_forest_nestimators expert setting parameter. After building the model, the scores can be obtained by predicting on\nthe same dataset. Note that if you pass a test dataset, then you can\ndownload predictions immediately without predicting on the same dataset. If you don't pass a test dataset, then you must go to Model actions >\nPredict. The lower the scores of a row, the more likely it is an outlier\nor anomaly by the model.", + "prompt_type": "plain" + }, + { + "output": "To create labels from these scores, quantile value can be used as a\nthreshold. For example, if you know that 5% of the rows are anomalous in\nyour dataset, then this can be used to calculate the 95th quantile of\nthe scores. This quantile can act as a threshold to classify each row as\nbeing an anomaly or not. The Python scoring pipeline can be used to deploy the\nIsolation Forest model to production (currently no MOJO support). Use case idea: Given an anomaly detection experiment, you can create\npredictions on the training dataset, including all original columns, and\nre-upload into Driverless AI to run a supervised experiment. For a given\nsimilar dataset (in production), you now have an unsupervised scorer\nthat tells you the anomaly score for each row, and supervised scorer\nwhich makes Shapley per-feature contribution reason codes to explain why\neach row is an anomaly or not. Note: The following are some additional details on the transformers and\npretransformers that are relevant to IF.", + "prompt_type": "plain" + }, + { + "output": "- OrigFreqPreTransformer (pretransformer): Categoricals are frequency\n encoded with this pretransformer. Note that isolation forest itself\n only accepts numericals. KMeans Clustering\nClustering algorithms partition observations into clusters. Driverless\nAI uses sklearn KMeans clustering algorithm to partition the\nobservations so that they belong to the cluster with the nearest mean\n(centroid of the cluster). Driverless AI exposes the following unsupervised models that run on\nnumeric and categorical columns to build a K-Means clustering model. You\ncan either pick a model type based on the characteristics of your\ndataset, or run all of them (one by one) to decide which one works best\nfor your dataset. - KMeans : This does K-Means clustering only on numeric columns\n - KMeansFreq : This does K-Means clustering on numeric and\n frequency transformed categorical (integer\n columns are treated only as numeric)\n - KMeansOHE : This does K-Means clustering on numeric and\n one-hot-encoding transformed categorical columns\nDriverless AI provides the following scorers to enable automatic\nunsupervised clustering:\n - CALINSKI HARABASZ : The Calinski-Harabasz index also known as the\n Variance Ratio Criterion, is the ratio of the sum of\n between-clusters dispersion and of inter-cluster dispersion for\n all clusters.", + "prompt_type": "plain" + }, + { + "output": "- DAVIES BOULDIN : The Davies-Bouldin Index signifies the average\n 'similarity' between clusters, where similarity is a measure that\n compares distance between clusters with the size of the clusters\n themselves. A lower Davies-Bouldin index relates to a model with\n better separation between the clusters. - SILHOUETTE : The Silhouette Coefficient is defined for each sample\n and is composed of two scores. The mean distance between a sample\n and all other points in the same class. This score measure the\n closeness of points in the same cluster. And the mean distance\n between a sample and all other points in the next nearest cluster. This score measure the distance of points of different clusters. A\n higher Silhouette Coefficient score relates to a model with better\n defined clusters. This scorer can be slow for larger datasets. Ref\nWhile building a clustering model, Accuracy and Time knobs can be\ntoggled to adjust the effort spent on model tuning and validation.", + "prompt_type": "plain" + }, + { + "output": "unsupervised_clustering_max_clusters`` parameters can be used in the\nexpert panel to set the upper and lower bound on the number of clusters\nto build.\n\nDuring model building, Driverless AI creates KMeans Clustering model on\na subset of features (between 2 to 5). The feature subset size, columns\nto be used for clustering and the parameter tuning is decided during the\ngenetic algorithm process. User can set the feature subset size\n(dimensionality of space to cluster) by", + "prompt_type": "plain" + }, + { + "output": "fixed_interaction_depthparameter of the expert settings. The value should lie between 2 to 5. Say,fixed_interaction_depth=4, then clustering will be performed in 4D. If say, more than 4 features are present in the dataset (or after accounting for the pre-transformations like one-hot-encoding), then when doing genetic algorithm, DAI will select input features and model parameters (based on internal train/valid split(s)) to decide the best possible subset of 4 features and their parameter set to build the model that optimizes the scores. The **scorer** takes the *full dataset* (pre transformed with all features) and *labels* for the rows as created by the (subset of features) clustering model to give the scores. It compares the output of the unsupervised transformer to its input. The **Insights** tab of the experiment gives a peek into the working of clustering transformer on the subset of features to build the best model. It lists the cluster sizes and centroids for the features in the cluster.", + "prompt_type": "plain" + }, + { + "output": "Aggregator algorithm is used to reduce the datasize for the plot. This is a preview of the custom visualization capability (using Vega) that is coming soon to DAI. After building the model, the :ref:`Visualize Scoring Pipeline option ` can be used to inspect the **pre transformations** applied to the features, before building model (on subset of features) and scoring (on full set). It can also be used to inspect the features used to build the clustering model. The cluster **labels** can be created by predicting on the dataset. To get cluster label assignments for the training (or any) dataset, then the fitted model can be used to make predictions, just like any supervised model. Note that overfitting can occur anytime when fit and predict are performed on the same dataset. The clustering model produces :ref:`MOJOs ` and :ref:`Python scoring pipelines ` to deploy to :ref:`production `. .. figure:: images/clust_pipeline.png :alt: You can also write custom clustering recipes by defining your own pretransformer (i.e what columns with what encodings are fed in for clustering), clustering transformer, and scorer.", + "prompt_type": "plain" + }, + { + "output": "(For best results, use the release branch that corresponds with your version of Driverless AI.) .. _svd: Truncated SVD (Dimensionality Reduction) ---------------------------------------- `Truncated SVD `__ is a dimensionality reduction method and can be applied to a dataset to reduce the number of features before running say a supervised algorithm. It factorizes data matrix where the number of columns is equal to the specified truncation. It is useful in use cases where *sparse* data gets generated like recommender systems or in text processing like tfidf. Internally Driverless AI runs `sklearn Truncated SVD `__ implementation. .. raw:: html \"svd\" Driverless AI exposes the TRUNCSVD transformer to reduce the number of features. Presently, none of the parameters can be toggled by the user.", + "prompt_type": "plain" + }, + { + "output": "(Note that these are considered random mutations.) After building the model, :ref:`Visualizing scoring pipeline ` can be used to inspect the number of components created. Additionally, the dimensionality reduced dataset can be obtained by predicting on the dataset. Presently as there is no scorer being used for SVD experiment, when doing :ref:`genetic algorithm `, the model will converge immediately and use one of the models from the :ref:`tuning phase ` as the final model. The Dimensionality Reduction model produces :ref:`MOJOs ` and :ref:`Python ` scoring pipelines to deploy to :ref:`production `. .. _unsup_custom_recipes: Unsupervised Custom Recipes --------------------------- Driverless AI supports **custom Python recipes for unsupervised learning**. You can write custom unsupervised recipes by defining your own pretransformer, transformer, and scorer. To view examples, see the `official Driverless AI recipes repository `__.", + "prompt_type": "plain" + }, + { + "output": ".. _unsup_expert_control: Expert control over Unsupervised Experiments -------------------------------------------- You can control unsupervised experiments by selecting specific **pretransformers** and **transformers**. Pretransformers are equivalent to the first layer of a pipeline, and transformers are equivalent to the second layer of a pipeline. To specify pretransformers and transformers, use the Expert Settings window of an experiment. For more information, see :ref:`understanding-configs`. The following steps describe how to control unsupervised experiments with the Expert Settings window. 1. On the **Experiment Setup** page, select **Unsupervised**. 2. Click **Unsupervised learning model** and select **Unsupervised** from the list of options. The preview updates to display the transformers that are used by default. 3. On the Experiment Setup page, click **Expert Settings**. The Expert Settings window is displayed. a. **To select specific pretransformers:** In the **Training -> Feature Engineering** tab, click the **Select values** button for the **Include specific preprocessing transformers** (included_pretransformers) setting.", + "prompt_type": "plain" + }, + { + "output": "b. **To select specific transformers:** In the **Training -> Feature Engineering** tab, click the **Select values** button for the **Include specific transformers** (included_transformers). To confirm your selection, click **Done**. **Note:** Selecting pretransformers isn't required. If no pretransformers are selected, then the first layer is ignored. .. figure:: images/unsupervised-expert.png :alt: 4. To confirm your overall selection and exit out of the Expert Settings window, click the **Save** button. 5. In the **Training Settings** category on the Experiment Setup page, specify the **Unsupervised** scorer. Alternatively, select a custom scorer. .. figure:: images/unsup_expert.png :alt: Expert control example 1 ~~~~~~~~~~~~~~~~~~~~~~~~ The following list contains examples of how you can use expert control to configure unsupervised experiments. - Input text through through **term frequency\u2013inverse document frequency (TFIDF)** by settingTextTransformeras a pretransformer, and then through K-Means clustering by settingClusterIdAllNumTransformeras a transformer.", + "prompt_type": "plain" + }, + { + "output": "- Setone_hot_encoding_cardinality_thresholdandone_hot_encoding_cardinality_threshold_default_useto a large value like 10,000,000 to allow all possible categorical levels to be included. Expert control example 2 ~~~~~~~~~~~~~~~~~~~~~~~~ The following example describes how you can use expert control to configure unsupervised experiments using a custom recipe for text handling. - Upload https://github.com/h2oai/driverlessai-recipes/blob/master/transformers/nlp/text_topic_modeling_transformer.py (Or choose the version for your DAI release by selecting the correct branch version.) - Upload https://github.com/h2oai/driverlessai-recipes/blob/master/models/unsupervised/TextKMeansIsolationForest.py (Or choose the version for your DAI release by selecting the correct branch version.) - Upload a dataset. On the Experiment Setup page, select **Unsupervised**, and then select KMeansFreqTextModel for the unsupervised model. You can select a variety of other models in the TextKMeansIsolationForest recipe.", + "prompt_type": "plain" + }, + { + "output": "- Upload https://github.com/h2oai/driverlessai-recipes/blob/master/transformers/nlp/text_topic_modeling_transformer.py (or choose the version for your DAI release) - Upload a dataset. On the Experiment Setup page, select **Unsupervised**, and then select **UnsupervisedModel** for the unsupervised model. - Click **Expert Settings**. The Expert Settings window is displayed. - In the **Training -> Feature Engineering** tab, select **Specific transformers to include** (TOMLincluded_transformers) and select only ClusterIdAllNumTransformer. - In the **Training -> Feature Engineering** tab, select **Specific pretransformers to include** (TOMLincluded_pretransformers) and select only TextLDATopicTransformer. - On the **Experiment Setup** page, click **Scorer** and select either UnsupervisedScorer (for one-shot model) or CalinskiHarabasz (for optimal clusters). Expert control example 4 ~~~~~~~~~~~~~~~~~~~~~~~~ In many cases, you may only want a single output from an unsupervised model.", + "prompt_type": "plain" + }, + { + "output": "UNSUPERVISEDscorer to just do single model. Another way to achieve a similar result in Driverless AI version 1.10.5 and beyond is to make the recipe match the following: .. code:: python from h2oaicore.models_custom import CustomModel # don't use CustomUnsupervisedModel from h2oaicore.models_unsupervised import UnsupervisedModel class MyUnsupervisedModel(UnsupervisedModel, CustomModel): _ngenes_max = 1 _ngenes_max_by_layer = [1000, 1] but then set expert optioncustom_unsupervised_expert_mode=true. This forces the experiment to use this custom unsupervised model as if it were likeUnsupervisedModelin terms of requiring you to go to the expert panel and select which scorers, transformers, and pretransformers to be used (like supervised experiments). However, by forcing this model to only havengenes_max=1, it ensures only a single instance of the transformer is produced. Note that in this case, onlyUnsupervisedScoreris available as an option. A slight deviation from the preceding example is to use a recipe like the following: .. code:: python from h2oaicore.models_custom import CustomModel # don't use CustomUnsupervisedModel from h2oaicore.models_unsupervised import UnsupervisedModel class MyUnsupervisedModel(UnsupervisedModel, CustomModel): _ngenes_max = 1 _ngenes_max_by_layer = [1000, 1] _included_scorers = ['UnsupervisedScorer', 'SilhouetteScorer', 'CalinskiHarabaszScorer', 'DaviesBouldinScorer'] and set expert optioncustom_unsupervised_expert_mode=true, which behaves like the prior example, but lets you select other scorers and still give single feature from the model.", + "prompt_type": "plain" + }, + { + "output": "Using License Manager (beta)\nThe following sections describe how to use License Manager with\nDriverless AI. Presently it is in beta state and is optional. Please\ncontact support@h2o.ai to get License manager artifacts. - understanding-lm\n- configure-lm\nUnderstanding License Manager\nLicense Manager is a software that is used to assist in the monitoring\nof license usage for H2O.ai products. It allows for the application of a\nsingle global license that can optionally implement specific\nrestrictions (for example, a restriction on the maximum number of\nconcurrent Driverless AI users can be specified). The license is applied\nto the License Management server, not to individual products. Configuring Driverless AI to Use License Manager\nAlthough Driverless AI can technically be started without the license\nmanager server running, you would not be able to log in and use the\nsoftware if Driverless AI is unable to communicate with a running\nlicense management server. Therefore, it is recommended that the License\nManager server be started before starting any Driverless AI instances.", + "prompt_type": "plain" + }, + { + "output": "Obtain a license manager install artifact from H2O.ai. Choose from\n the following:\n - DEB\n - RPM\n - Docker\n - Linux binary\n2. Install the artifact:\n - DEB - dpkg -i /path/to/lms.deb\n - RPM - rpm -ivh /path/to/lms.rpm\n - Docker - docker load < /path/to/lms.tar.gz\n - Linux binary - No install necessary. Only a Linux-based\n machine is required\n3. Start the License Manager server. This process may vary depending on\n the install type. systemd-based artifacts may require some changes\n to startup scripts if custom startup is needed. Custom startup can\n be performed with the application.properties file or environment\n variables. By default, the license manager UI is available at\n http://license-manager-ip-address:9999. License Manager Server Setup\n1. To acquire a license, contact support@h2o.ai. 2. Create a new project or use the default project with a\n useful/explicit name. 3. Enable the new project.", + "prompt_type": "plain" + }, + { + "output": "Navigate to the Licenses panel in License Manager UI and load the\n license to the License Manager server. Links to the Licenses panel\n are located in the left-hand side bar of the interface. []\nStarting Driverless AI with License Manager\nTo configure Driverless AI to use License Manager on startup, use the\nconfig.toml file. The following TOML options can also be\nset with environment variables. Note: The Driverless AI instance must have the ability to communicate\nwith the License Manager server over a network. Sample config.toml :\n # License Management\n enable_license_manager = true\n license_manager_address = \"http://127.0.0.1:9999\"\n license_manager_project_name = \"license-manager-test\"\n license_manager_lease_duration = 3600000\n license_manager_ssl_certs = \"/home/npng\"\n license_manager_worker_startup_timeout = 60000\nThe following are descriptions of the relevant settings:\n- enable_license_manager - In order for Driverless AI to use the\n license manager, this must be set to true\n- license_manager_address - The IP address and port of the license\n manager so that Driverless AI knows where to access the license\n manager\n- license_manager_project_name - Name of the newly created project\n with license loaded to it from above\n- license_manager_lease_duration (Optional) - How long (in\n milliseconds) the lease issued by the license manager remains active\n before requiring a renewal.", + "prompt_type": "plain" + }, + { + "output": "S3 Setup\n\nDriverless AI lets you explore S3 data sources from within the\nDriverless AI application. This section provides instructions for\nconfiguring Driverless AI to work with S3.\n\nNote: Depending on your Docker install version, use either the", + "prompt_type": "plain" + }, + { + "output": "docker run\n--runtime=nvidia(>= Docker 19.03) ornvidia-docker(< Docker 19.03) command when starting the Driverless AI Docker image. Usedocker\nversionto check which version of Docker you are using. Description of Configuration Attributes --------------------------------------- -aws_access_key_id: The S3 access key ID -aws_secret_access_key: The S3 access key -aws_role_arn: The Amazon Resource Name -aws_default_region: The region to use when the aws_s3_endpoint_url option is not set. This is ignored when aws_s3_endpoint_url is set. -aws_s3_endpoint_url: The endpoint URL that will be used to access S3. -aws_use_ec2_role_credentials: If set to true, the S3 Connector will try to to obtain credentials associated with the role attached to the EC2 instance. -s3_init_path: The starting S3 path that will be displayed in UI S3 browser. -enabled_file_systems: The file systems you want to enable. This must be configured in order for data connectors to function properly. Example 1: Enable S3 with No Authentication ------------------------------------------- .. container:: tabs .. group-tab:: Docker Image Installs This example enables the S3 data connector and disables authentication.", + "prompt_type": "plain" + }, + { + "output": "This allows users to reference data stored in S3 directly using the name node address, for example: s3://name.node/datasets/iris.csv. .. code:: bash nvidia-docker run \\ --shm-size=256m \\ --add-host name.node:172.16.2.186 \\ -e DRIVERLESS_AI_ENABLED_FILE_SYSTEMS=\"file,s3\" \\ -p 12345:12345 \\ --init -it --rm \\ -v /tmp/dtmp/:/tmp \\ -v /tmp/dlog/:/log \\ -v /tmp/dlicense/:/license \\ -v /tmp/ddata/:/data \\ -u $(id -u):$(id -g) \\ h2oai/dai-ubi8-x86_64:|tag| .. container:: group-tab Docker Image with the config.toml This example shows how to configure S3 options in the config.toml file, and then specify that file when starting Driverless AI in Docker. Note that this example enables S3 with no authentication. 1. Configure the Driverless AI config.toml file. Set the following configuration options. .. -enabled_file_systems\n= \"file, upload,\ns3\"2. Mount the config.toml file into the Docker container.", + "prompt_type": "plain" + }, + { + "output": "It does not pass any S3 access key or secret. 1. Export the Driverless AI config.toml file or add it to ~/.bashrc. For example: .. :: # DEB and RPM export DRIVERLESS_AI_CONFIG_FILE=\"/etc/dai/config.toml\" # TAR SH export DRIVERLESS_AI_CONFIG_FILE=\"/path/to/your/unpacked/dai/directory/config.toml\" 2. Specify the following configuration options in the config.toml file. .. :: # File System Support # upload : standard upload feature # file : local file system/server file system # hdfs : Hadoop file system, remember to configure the HDFS config folder path and keytab below # dtap : Blue Data Tap file system, remember to configure the DTap section below # s3 : Amazon S3, optionally configure secret and access key below # gcs : Google Cloud Storage, remember to configure gcs_path_to_service_account_json below # gbq : Google Big Query, remember to configure gcs_path_to_service_account_json below # minio : Minio Cloud Storage, remember to configure secret and access key below # snow : Snowflake Data Warehouse, remember to configure Snowflake credentials below (account name, username, password) # kdb : KDB+ Time Series Database, remember to configure KDB credentials below (hostname and port, optionally: username, password, classpath, and jvm_args) # azrbs : Azure Blob Storage, remember to configure Azure credentials below (account name, account key) # jdbc: JDBC Connector, remember to configure JDBC below.", + "prompt_type": "plain" + }, + { + "output": "(hive_app_configs) # recipe_url: load custom recipe from URL # recipe_file: load custom recipe from local file system enabled_file_systems = \"file, s3\" 3. Save the changes when you are done, then stop/restart Driverless AI. Example 2: Enable S3 with Authentication ---------------------------------------- .. container:: tabs .. group-tab:: Docker Image Installs This example enables the S3 data connector with authentication by passing an S3 access key ID and an access key. It also configures Docker DNS by passing the name and IP of the S3 name node. This allows users to reference data stored in S3 directly using the name node address, for example: s3://name.node/datasets/iris.csv. .. code:: bash nvidia-docker run \\ --shm-size=256m \\ --add-host name.node:172.16.2.186 \\ -e DRIVERLESS_AI_ENABLED_FILE_SYSTEMS=\"file,s3\" \\ -e DRIVERLESS_AI_AWS_ACCESS_KEY_ID=\"\" \\ -e DRIVERLESS_AI_AWS_SECRET_ACCESS_KEY=\"\" \\ -p 12345:12345 \\ --init -it --rm \\ -v /tmp/dtmp/:/tmp \\ -v /tmp/dlog/:/log \\ -v /tmp/dlicense/:/license \\ -v /tmp/ddata/:/data \\ -u $(id -u):$(id -g) \\ h2oai/dai-ubi8-x86_64:|tag| .. container:: group-tab Docker Image with the config.toml This example shows how to configure S3 options with authentication in the config.toml file, and then specify that file when starting Driverless AI in Docker.", + "prompt_type": "plain" + }, + { + "output": "Upgrading the Driverless AI Community Image\nWARNINGS:\n- This release deprecates experiments and MLI models from 1.7.0 and\n earlier. - Experiments, MLIs, and MOJOs reside in the Driverless AI tmp\n directory and are not automatically upgraded when Driverless AI is\n upgraded. We recommend you take the following steps before\n upgrading. - Build MLI models before upgrading. - Build MOJO pipelines before upgrading. - Stop Driverless AI and make a backup of your Driverless AI tmp\n directory before upgrading. The upgrade process inherits the service user and group from\n/etc/dai/User.conf and /etc/dai/Group.conf. You do not need to manually\nspecify the DAI_USER or DAI_GROUP environment variables during an\nupgrade. Upgrading from Version 1.2.2 or Earlier\nThe following example shows how to upgrade from 1.2.2 or earlier to the\ncurrent version. Upgrading from these earlier versions requires an edit\nto the start and h2oai scripts. 1. SSH into the IP address of the image instance and copy the existing\n experiments to a backup location:\n2. wget the newer image.", + "prompt_type": "plain" + }, + { + "output": "Understanding the Model Interpretation Page\nThis document describes the various interpretations available from the\nMachine Learning Interpretability (MLI) explanations page for\nnon-time-series experiments. The explanations page is organized into four tabs:\n - Summary Tab \n - Interpretations Using Driverless AI Model - DAI Model Tab \n - Interpretations Using Surrogate Model - Surrogate Model Tab \n - Interpretations Using NLP Dataset - NLP Tab (Only\n visible for NLP problems)\nThe mli-dashboard button reveals a dashboard with an overview of the\ninterpretations built using surrogate models. The\nActions button on the MLI page can be used to download\nreason codes, scoring pipelines for productionization, and MLI logs. The task bar lists the status and logs of MLI\nexplainers . Summary Tab\nThe Summary tab provides an overview of the interpretation, including\nthe dataset and Driverless AI experiment name (if available) that were\nused for the interpretation along with the feature space (original or\ntransformed), target column, problem type, and k-Lime information.", + "prompt_type": "plain" + }, + { + "output": "[]\nInterpretations Using Driverless AI Model (DAI Model Tab)\nThe DAI Model tab is organized into tiles for each interpretation\nmethod. To view a specific plot, click the tile for the plot that you\nwant to view. For binary classification and regression experiments, this tab includes\nFeature Importance and Shapley (not supported for RuleFit and TensorFlow\nmodels) plots for original and transformed features as well as Partial\nDependence/ICE, Disparate Impact Analysis (DIA), Sensitivity Analysis,\nNLP Tokens and NLP LOCO (for text experiments), and Permutation Feature\nImportance (if the autodoc_include_permutation_feature_importance\nconfiguration option is enabled) plots. For multiclass classification\nexperiments, this tab includes Feature Importance and Shapley plots for\noriginal and transformed features. The following is a list of the interpretation plots available from the\nDriverless AI Model tab:\n - Feature Importance (Original and Transformed Features) \n - Shapley (Original and Transformed Features) \n - Shapley Summary Plot (Original Features) \n - Partial Dependence (PDP) and Individual Conditional Expectation (ICE) \n - Disparate Impact Analysis \n - Time Series Explainer \n - Sensitivity Analysis \n - NLP LOCO \n - Permutation Feature Importance \n[]\nNotes:\n - Shapley plots are not supported for RuleFit, FTRL, and TensorFlow\n models.", + "prompt_type": "plain" + }, + { + "output": "To enable the\n calculations using Kernel Explainer method, enable Original Kernel\n SHAP explainer in recipes . - Shapley plots are only supported for those BYOR (custom) models\n that implement the has_pred_contribs method (and return True) and\n implement proper handling of the argument pred_contribs=True in\n the predict method. - The Permutation-based feature importance plot is only available\n when the autodoc_include_permutation_feature_importance\n configuration option is enabled when starting Driverless AI or\n when starting the MLI experiment (enable AutoDoc from the recipe\n tab and include_permutation_feature_importance from MLI AutoDoc\n expert settings when launching the MLI job). - On the Feature Importance and Shapley plots, the transformed\n feature names are encoded as follows:\n _::<...>:.\n So in 32_NumToCatTE:BILL_AMT1:EDUCATION:MARRIAGE:SEX.0, for\n example:\n - 32_ is the transformation index for specific transformation\n parameters.", + "prompt_type": "plain" + }, + { + "output": "- BILL_AMT1:EDUCATION:MARRIAGE:SEX represent original features\n used. - 0 represents the likelihood encoding for target[0] after\n grouping by features (shown here as BILL_AMT1, EDUCATION,\n MARRIAGE and SEX) and making out-of-fold estimates. For\n multiclass experiments, this value is > 0. For binary\n experiments, this value is always 0. Interpretations Using Surrogate Model (Surrogate Model Tab)\nA surrogate model is a data mining and engineering technique in which a\ngenerally simpler model is used to explain another, usually more\ncomplex, model or phenomenon. For example, the decision tree surrogate\nmodel is trained to predict the predictions of the more complex\nDriverless AI model using the original model inputs. The trained\nsurrogate model enables a heuristic understanding (i.e., not a\nmathematically precise understanding) of the mechanisms of the highly\ncomplex and nonlinear Driverless AI model. The Surrogate Model tab is organized into tiles for each interpretation\nmethod.", + "prompt_type": "plain" + }, + { + "output": "For binary classification and regression experiments, this\ntab includes K-LIME/LIME-SUP and Decision Tree plots as well as Feature\nImportance, Partial Dependence, and LOCO plots for the Random Forest\nsurrogate model. For more information on these plots, see\nsurrogate-model-plots. The following is a list of the interpretation plots from Surrogate\nModels:\n - K-LIME and LIME-SUP \n - Random Forest Feature Importance \n - Random Forest Partial Dependence and Individual Conditional Expectation \n - Random Forest LOCO \n - Decision Tree \n - NLP Surrogate \n[]\nNote: For multiclass classification experiments, only the Decision Tree\nand Random Forest Feature Importance plots are available in this tab. Interpretations Using NLP Dataset (NLP Tab)\nThe NLP tab is only visible for natural language processing (NLP)\nproblems and is organized into tiles for each interpretation method. To\nview a specific plot, click the tile for the plot that you want to view\nThe following is a list of the interpretation plots available from the\nNLP tab:\n - dai-nlp-loco\n - mli-nlp-pdp\n - mli-nlp-tokens\n - mli-nlp-vlm\n[]\nSurrogate Models Dashboard\nTo view a dashboard with an overview of the interpretations built using\nsurrogate models, click the Surrogate Models Dashboard button.", + "prompt_type": "plain" + }, + { + "output": "[]\nFor binary classification and regression experiments, the Surrogate\nModels Dashboard page provides a single page with the following\nsurrogate plots. Note that the PDP and Feature Importance plots on this\npage are based on the Random Forest surrogate model. - Global Interpretable Model Explanations\n - Feature Importance\n - Decision Tree\n - Partial Dependence\nYou can also view explanations from this page by clicking the\nExplanations button located in the upper-right corner. Refer to the\nmli-explanations section for more information. Note: The Surrogate Models Dashboard is only available for binary\nclassification and regression experiments. []\nActions Button\nThe Actions button can be used to download reason codes, scoring\npipelines for productionization, and logs. Click this button to view the\nfollowing options:\n - MLI Docs: View the Machine Learning Interpretability section of\n the Driverless AI documentation. - Display MLI Java Logs: View MLI Java logs for the interpretation.", + "prompt_type": "plain" + }, + { + "output": "- Experiment: View the experiment that was used to generate the\n interpretation. - Download MLI Logs: Download a ZIP file of the logs that were\n generated during the interpretation. - Python Scoring Pipeline: For binomial and regression experiments,\n download the Python scoring pipeline for the interpretation. This\n option is not available for multiclass experiments. - Download k-LIME MOJO Reason Code Pipeline: Download the k-LIME\n MOJO Reason Code Pipeline. For more info, see klime-mojo. - Download Formatted Transformed Shapley Reason Codes: For\n regression, binary, and multiclass experiments, download a CSV\n file of formatted Shapley reason codes on transformed data. - Download Formatted LIME Reason Codes: For binomial experiments,\n download a CSV file of formatted LIME reason codes. - Download LIME Reason Codes: For binomial experiments, download a\n CSV file of LIME reason codes. - Download Formatted Original Shapley Reason Codes (Naive Shapley):\n For regression, binary, and multiclass experiments, download a CSV\n file of formatted Shapley reason codes for original data.", + "prompt_type": "plain" + }, + { + "output": "Feature Importance (Original and Transformed Features)\nThis plot is available for all models for binary classification,\nmulticlass classification, and regression experiments. This plot shows the Driverless AI feature importance. Driverless AI\nfeature importance is a measure of the contribution of an input variable\nto the overall predictions of the Driverless AI model. []\nShapley (Original and Transformed Features)\nThis plot is not available for RuleFit or TensorFlow models. For all\nother models, this plot is available for binary classification,\nmulticlass classification, and regression experiments. Shapley explanations are a technique with credible theoretical support\nthat presents consistent global and local variable contributions. Local\nnumeric Shapley values are calculated by tracing single rows of data\nthrough a trained tree ensemble and aggregating the contribution of each\ninput variable as the row of data moves through the trained ensemble. For regression tasks, Shapley values sum to the prediction of the\nDriverless AI model.", + "prompt_type": "plain" + }, + { + "output": "incomewill be 2.5 each. For ensembles, Shapley values (in the link space) are blended as per the model weights in the ensemble. Driverless AI :ref:`MOJO ` for productionization supports Naive Shapley (even split) approach for original features. Shapley values for original features can also be calculated with the **Kernel Explainer** method, which uses a special weighted linear regression to compute the importance of each feature. This can be enabled by using the :ref:`recipe ` Original Kernel SHAP explainer. More information about Kernel SHAP is available at http://papers.nips.cc/paper/7062-a-unified-approach-to-interpreting-model-predictions.pdf. .. figure:: images/shapley_original_features.png :alt: *Naive Shapley Original Feature Importance* *Naive Shapley Original Feature Importance* .. figure:: images/shapley_transformed.png :alt: *Transformed Shapley* *Transformed Shapley* The **Showing** :math:`n` **Features** dropdown for Feature Importance and Shapley plots lets you select between original and transformed features.", + "prompt_type": "plain" + }, + { + "output": "**Note**: The provided original values are approximations derived from the accompanying transformed values. For example, if the transformed feature :math:`feature1\\_feature2` has a value of 0.5, then the value of the original features (:math:`feature1` and :math:`feature2`) will be 0.25. .. _dai-shapley-summary: Shapley Summary Plot (Original Features) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The Shapley Summary Plot shows original features versus their local Shapley values on a sample of the dataset. Feature values are binned by Shapley values, and the average normalized feature value for each bin is plotted. To see the Shapley value, number of rows, and average normalized feature value for a particular feature bin, hold the pointer over the bin. The legend corresponds to numeric features and maps to their normalized value. Yellow is the lowest value, and deep orange is the highest. You can click on numeric features to see a scatter plot of the actual feature values versus their corresponding Shapley values.", + "prompt_type": "plain" + }, + { + "output": ".. raw:: html \"Shapley **Notes**: - The Shapley Summary Plot only shows original features that are used in the Driverless AI model. - The dataset sample size and the number of bins can be updated in the Interpretation Expert Settings. - For a list of Shapley Summary Plot explainer expert settings, see :ref:`interpretation-expert-settings-shapley`. .. _pdp-ice: Partial Dependence (PDP) and Individual Conditional Expectation (ICE) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ A Partial Dependence and ICE plot is available for both Driverless AI and surrogate models. The Partial Dependence Technique ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Partial dependence is a measure of the average model prediction with respect to an input variable. Partial dependence plots display how machine-learned response functions change based on the values of an input variable of interest while taking nonlinearity into consideration and averaging out the effects of all other input variables.", + "prompt_type": "plain" + }, + { + "output": "Partial dependence plots enable increased transparency in Driverless AI models and the ability to validate and debug Driverless AI models by comparing a variable's average predictions across its domain to known standards, domain knowledge, and reasonable expectations. The ICE Technique ^^^^^^^^^^^^^^^^^ This plot is available for binary classification and regression models. A newer adaptation of partial dependence plots called Individual conditional expectation (ICE) plots can be used to create more localized explanations for a single individual by using the same basic ideas as partial dependence plots. ICE Plots were described by Goldstein et al (2015). ICE values are disaggregated partial dependence, but ICE is also a type of nonlinear sensitivity analysis in which the model predictions for a single row are measured while a variable of interest is varied over its domain. ICE plots enable a user to determine whether the model's treatment of an individual row of data is outside one standard deviation from the average model behavior, whether the treatment of a specific row is valid in comparison to average model behavior, known standards, domain knowledge, and reasonable expectations, and how a model will behave in hypothetical situations where one variable in a selected row is varied across its domain.", + "prompt_type": "plain" + }, + { + "output": "Large differences in partial dependence and ICE are an indication that strong variable interactions may be present. In this case partial dependence plots may be misleading because average model behavior may not accurately reflect local behavior. .. _partial-dependence-plot: Partial Dependence Plot (PDP) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ This plot is available for binary classification and regression models. Overlaying ICE plots onto partial dependence plots allow the comparison of the Driverless AI model's treatment of certain examples or individuals to the model's average predictions over the domain of an input variable of interest. This plot shows the partial dependence when a variable is selected and the ICE values when a specific row is selected. Users may select a point on the graph to see the specific value at that point. You can also focus the PDP plot on a specific subset of data by using the slider in the middle of the screen. Partial dependence (yellow) portrays the average prediction behavior of the Driverless AI model across the domain of an input variable along with +/- 1 standard deviation bands.", + "prompt_type": "plain" + }, + { + "output": "Currently, partial dependence and ICE plots are only available for the top ten most important original input variables. Categorical variables with 20 or more unique values are never included in these plots. .. figure:: images/mli-pdp.png :alt: **Notes**: - To use dynamic switching between PDP numeric and categorical binning and UI chart selection in cases where features were used both as numeric and categorical by the experiment, enable themli_pd_numcat_num_chart:ref:`config.toml ` setting. (This setting is enabled by default.) When this setting is enabled, you can specify the threshold for PDP binning and chart selection with themli_pd_numcat_thresholdsetting, which defaults to 11. - The number of out of range / unseen PD or ICE bins can be specified through the PDP explainer :ref:`oor_grid_resolution` expert setting: .. .. raw:: html \"PDP - For a list of PDP explainer expert settings, see :ref:`interpretation-expert-settings-pdp`.", + "prompt_type": "plain" + }, + { + "output": "With this method, PD/ICE is calculated by an ad hoc explainer, then run and merged to the original DAI PD/ICE representation. To use the PD on-demand option, click the interpretation you want to use, then click **DAI Partial Dependence Plot** from the **DAI Model** tab. On the PD plot page, click the **Add Feature** button and select the feature(s) you want to calculate PD for. Click **Done** to confirm your selection. A notification appears at the bottom of the screen once Driverless AI has finished the on-demand computation. To view the computed PD values for a particular feature, click **Feature** on the PD plot page, then select the feature you want to view PD values for. .. raw:: html \"PDP .. _dai-dia: Disparate Impact Analysis (DIA) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ This plot is available for binary classification and regression models. DIA is a technique that is used to evaluate fairness.", + "prompt_type": "plain" + }, + { + "output": "DIA typically works by comparing aggregate measurements of unprivileged groups to a privileged group. For instance, the proportion of the unprivileged group that receives the potentially harmful outcome is divided by the proportion of the privileged group that receives the same outcome\u2014the resulting proportion is then used to determine whether the model is biased. Refer to the **Summary** section to determine if a categorical level (for example, Fairness Female) is fair in comparison to the specified reference level and user-defined thresholds. **Fairness All** is a true or false value that is only true if every category is fair in comparison to the reference level. Disparate impact testing is best suited for use with constrained models in Driverless AI, such as linear models, monotonic GBMs, or RuleFit. The average group metrics reported in most cases by DIA may miss cases of local discrimination, especially with complex, unconstrained models that can treat individuals very differently based on small changes in their data attributes.", + "prompt_type": "plain" + }, + { + "output": "Several tables are provided as part of the analysis: - **Group metrics**: The aggregated metrics calculated per group. For example, true positive rates per group. - **Group disparity**: This is calculated by dividing themetric_for_groupby thereference_group_metric. Disparity is observed if this value falls outside of the user-defined thresholds. - **Group parity**: This builds on Group disparity by converting the above calculation to a true or false value by applying the user-defined thresholds to the disparity values. In accordance with the established four-fifths rule, user-defined thresholds are set to 0.8 and 1.25 by default. These thresholds will generally detect if the model is (on average) treating the non-reference group 20% more or less favorably than the reference group. Users are encouraged to set the user-defined thresholds to align with their organization's guidance on fairness thresholds. Run DIA on external datasets ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ You can run DIA on a dataset that has predictions from an external source instead of getting predictions within Driverless AI.", + "prompt_type": "plain" + }, + { + "output": "1. In the main navigation, click **MLI**. The **Interpreted Models** page is displayed. 2. Click the **New Interpretation** button, and then click **New Interpretation** from the list of available options. 3. In the **Interpretation Settings** section, click **Select dataset**, and then specify a dataset that has predictions from an external source. 4. In the **Interpretation Settings** section, click **Recipes**. Click the **Uncheck all** button, and then select only **Disparate Impact Analysis**. To confirm your selection, click **Done**. .. figure:: images/dia-external-select-recipe.png :alt: 5. In the **Interpretation Target** section, click **Select target column**, and then specify the target column. 6. In the **Interpretation Target** section, click **Select prediction column**, and then specify the prediction column. 7. Click the **Launch MLI** button. .. figure:: images/dia-external-launch.png :alt: Metrics - Binary Classification ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ The following are formulas for error metrics and parity checks utilized by binary DIA.", + "prompt_type": "plain" + }, + { + "output": "- **ME** is the difference between the percent of the control group members receiving a favorable outcome and the percent of the protected class members receiving a favorable outcome: .. math:: \\text{ME} \\equiv 100 \\cdot (\\text{PR} (\\hat{y} = 1 \\vert X_c = 1) - \\text{Pr}(\\hat{y} = 1 \\vert X_p = 1)) .. Where: - :math:`\\hat{y}` is the model decisions. - :math:`X_c` and :math:`X_p` are binary markers created from some demographic attribute. - :math:`c` is the control group. - :math:`p` is the protected group. - :math:`Pr(\\cdot)` is the operator for conditional probability. - **AIR** is equal to the ratio of the proportion of the protected class that receives a favorable outcome and the proportion of the control class that receives a favorable outcome: .. math:: \\text{AIR} \\equiv \\frac{Pr(\\hat{y} \\; = 1 \\vert X_p = 1)}{Pr(\\hat{y} \\; = 1 \\vert X_c = 1)} .. Where: - :math:`\\hat{y}` is the model decisions. - :math:`X_p` and :math:`X_c` are binary markers created from some demographic attribute.", + "prompt_type": "plain" + }, + { + "output": "- :math:`p` is the protected group. - :math:`Pr(\u00b7)` is the operator for conditional probability. - **SMD** is used to assess disparities in continuous features such as income differences in employment analyses or interest rate differences in lending: .. math:: \\text{SMD} \\equiv \\frac{\\bar{\\hat y_p} - \\bar{\\hat y_c}}{\\sigma_{\\hat y}} .. Where: - :math:`\\bar{\\hat y_p}` is the difference in the average protected class outcome. - :math:`\\bar{\\hat y_c}` is the control class outcome. - :math:`\\sigma_{\\hat y}` is a measure of the standard deviation of the population. .. note:: - For more information on how DIA is implemented in Driverless AI, see https://www.frontiersin.org/articles/10.3389/frai.2021.695301/full. - Although the process of DIA is the same for both classification and regression experiments, the returned information is dependent on the type of experiment being interpreted. An analysis of a regression experiment returns an actual vs. predicted plot, while an analysis of a binary classification experiment returns confusion matrices.", + "prompt_type": "plain" + }, + { + "output": "In addition to its established use as a fairness tool, users may want to consider disparate impact for broader model debugging purposes. For example, users can analyze the supplied confusion matrices and group metrics for important, non-demographic features in the Driverless AI model. - For a list of DIA Summary Plot explainer expert settings, see :ref:`interpretation-expert-settings-dia`. - The mean prediction disparity is the average prediction for the group being considered divided by the average prediction for the reference group. - For more information on group disparity and parity, refer to https://h2oai.github.io/tutorials/disparate-impact-analysis/#5. .. figure:: images/disparate_impact_analysis.png :alt: *Classification Experiment* *Classification Experiment* .. figure:: images/dia_regression.png :alt: *Regression Experiment* *Regression Experiment* .. _dai-time-series: Time Series Explainer ~~~~~~~~~~~~~~~~~~~~~ For time series experiments, the following graphs are provided: - **Metric graph:** View a time series graph that uses the metric that your DAI experiment was optimized for.", + "prompt_type": "plain" + }, + { + "output": "Note that you can use the accompanying slider to view a specific range of dates. .. raw:: html \"Using - **Actual vs. Predicted:** View a graph that contrasts actual and predicted values. Note that this graph also features an accompanying slider that you can use to view a specific range of dates. In addition to the preceding graphs, the following additional information is provided: - **Group metrics:** Grouped metrics are based on an aggregation by group. For example, aggregate by store and department and get counts per group. You can also get the metric of interest, for example aggregate RMSE, etc. You can download all or specific group metrics by clicking the download button. - **Shapley values:** Based on the selected date, Shapley values for each feature are provided in this section. To view Value + Bias for each feature and definitions of the transformed feature, click the **Details** button.", + "prompt_type": "plain" + }, + { + "output": "Note that you can select a specific group and / or date by clicking **Group** or **Date**. .. figure:: images/interpret-time-series.png :alt: .. _dai-sa: Sensitivity Analysis (SA) ~~~~~~~~~~~~~~~~~~~~~~~~~ Overview ^^^^^^^^ **Note**: Sensitivity Analysis (SA) is only available for binary classification and regression experiments. Sensitivity Analysis (or \"What if?\") is a simple and powerful model debugging, explanation, fairness, and security tool. The idea behind SA is both direct and simple: Score your trained model on a single row, on multiple rows, or on an entire dataset of potentially interesting simulated values and compare the model\u2019s new outcome to the predicted outcome on the original data. Beyond traditional assessment practices, sensitivity analysis of machine learning model predictions is perhaps the most important validation technique for machine learning models. Sensitivity analysis investigates whether model behavior and outputs remain stable when data is intentionally perturbed or other changes are simulated in the data.", + "prompt_type": "plain" + }, + { + "output": "For example, when looking at predictions that determine financial decisions, SA can be used to help you understand the impact of changing the most important input variables and the impact of changing socially sensitive variables (such as Sex, Age, Race, etc.) in the model. If the model changes in reasonable and expected ways when important variable values are changed, this can enhance trust in the model. Similarly, if the model changes to sensitive variables have minimal impact on the model, then this is an indication of fairness in the model predictions. This page utilizes the `What If Tool `__ for displaying the SA information. The top portion of this page includes: - A summary of the experiment - Predictions for a specified column. Change the column on the Y axis to view predictions for that column. - The current working score set. This updates each time you rescore. The bottom portion of this page includes: - A filter tool for filtering the analysis.", + "prompt_type": "plain" + }, + { + "output": "Set the filter type (<,>, etc.). Choose to filter by False Positive, False Negative, True Positive, or True Negative. - Scoring chart. Click the **Rescore** button after applying a filter to update the scoring chart. This chart also lets you add or remove variables, toggle the main chart aggregation, reset the data, and delete the global history while resetting the data. - The current history of actions taken on this page. You can delete individual actions by selecting the action and then clicking the Delete button that appears. .. figure:: images/sensitivity_analysis.png :alt: Column actions ^^^^^^^^^^^^^^ When clicking a column in SA, the following actions are available: - **Absolute:** Change a column to a specific value for all rows. For example, you can set a column to have the value 5 for all observations. This is also possible for categorical columns. For example, you can set a categorical column to have the value \"foobar\" for all observations.", + "prompt_type": "plain" + }, + { + "output": "For example, you can add 9 to all observations in a numerical column. You can also pass in a negative number, for example, -9. The input must be numeric. - **Percentage:** Change a numeric column by some percentage. For example, passing 9 to this field changes all values to be 9% of its original value. For example, if the value is 2 and you pass in 9 as the percentage, then the value changes to be 0.18. The input must be an integer. - **Set:** Run the selected action with the valid value in the textbox. - **Randomize:** Randomly change the values in a column, irrespective of what is in the textbox. The change itself is absolute and based on the domain of the column. .. figure:: images/sa-column-actions.png :alt: Understand residuals ^^^^^^^^^^^^^^^^^^^^ Residuals are differences between observed and predicted values. In Sensitivity Analysis, the method used to calculate residuals varies depending on the type of problem. For classification problems, logloss residuals are calculated for the class of interest.", + "prompt_type": "plain" + }, + { + "output": "Use cases ^^^^^^^^^ **Use Case 1: Using SA on a Single Row or on a Small Group of Rows** This section describes scenarios for using SA for explanation, debugging, security, or fairness when scoring a trained model on a single row or on a small group of rows. - **Explanation**: Change values for a variable, and then rescore the model. View the difference between the original prediction and the new model prediction. If the change is big, then the changed variable is locally important. - **Debugging**: Change values for a variable, and then rescore the model. View the difference between the original prediction and the new model prediction and determine whether the change to variable made the model more or less accurate. - **Security**: Change values for a variable, and then rescore the model. View the difference between the original prediction and the new model prediction. If the change is big, then the user can, for example, inform their IT department that this variable can be used in an adversarial attack or inform the model makers that this variable should be more regularized.", + "prompt_type": "plain" + }, + { + "output": "View the difference between the original prediction and the new model prediction. If change is big, then the user can consider using a different model, regularizing the model more, or applying post-hoc bias remediation techniques. - **Random**: Set variables to random values, and then rescore the model. This can help you look for things the you might not have thought of. **Use Case 2: Using SA on an Entire Dataset and Trained Model** This section describes scenarios for using SA for explanation, debugging, security, or fairness when scoring a trained model for an entire dataset and trained predictive model. - **Financial Stress Testing**: Assume the user wants to see how their loan default rates will change (according to their trained probability of default model) when they change an entire dataset to simulate that all their customers are under more financial stress (such as lower FICO scores, lower savings balances, higher unemployment, etc). Change the values of the variables in their entire dataset, and look at the **Percentage Change** in the average model score (default probability) on the original and new data.", + "prompt_type": "plain" + }, + { + "output": "- **Random**: Set variables to random values, and then rescore the model. This lets users look for things they may not have otherwise considered. Additional Resources ^^^^^^^^^^^^^^^^^^^^ `Sensitivity Analysis on a Driverless AI Model `__: This ipynb uses the `UCI credit card default data `__ to perform sensitivity analysis and test model performance. .. _dai-permutation-feature-importance: Permutation Feature Importance ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. note:: - This plot is only available for binary classification and regression experiments. - When permutation importance is enabled for interpretations, it is run as part of the interpretation process, regardless of whether it was run for the original experiment or AutoDoc. Permutation-based feature importance shows how much a model's performance would change if a feature's values were permuted.", + "prompt_type": "plain" + }, + { + "output": "If a feature is highly predictive, however, shuffling its values should decrease the model's performance. The difference between the model's performance before and after permuting the feature provides the feature's absolute permutation importance. .. figure:: images/permutation_feature_importance.png :alt: Surrogate Model Plots --------------------- This section describes the plots that are available in the Surrogate Model Tab. .. _klime-limesup: K-LIME and LIME-SUP ~~~~~~~~~~~~~~~~~~~ The MLI screen includes a :ref:`K-LIME ` (K local interpretable model-agnostic explanations) or :ref:`LIME-SUP ` (Locally Interpretable Models and Effects based on Supervised Partitioning) graph. A K-LIME graph is available by default when you interpret a model from the experiment page. When you create a new interpretation, you can instead choose to use LIME-SUP as the LIME method. Note that these graphs are essentially the same, but the K-LIME/LIME-SUP distinction provides insight into the LIME method that was used during model interpretation.", + "prompt_type": "plain" + }, + { + "output": "**Summary** K-LIME creates one global surrogate GLM on the entire training data and also creates numerous local surrogate GLMs on samples formed from *k*-means clusters in the training data. The parameters of the global K-LIME model give an indication of overall linear feature importance and the overall average direction in which an input variable influences the Driverless AI model predictions. The in-cluster linear model parameters can be used to profile the local region, to give an average description of the important variables in the local region, and to understand the average direction in which an input variable affects the Driverless AI model predictions. **Additional details** K-LIME is a variant of the LIME technique proposed by Ribeiro at al (2016). K-LIME generates global and local explanations that increase the transparency of the Driverless AI model, and allow model behavior to be validated and debugged by analyzing the provided plots, and comparing global and local explanations to one-another, to known standards, to domain knowledge, and to reasonable expectations.", + "prompt_type": "plain" + }, + { + "output": "use_all_columns_klime_kmeansin the config.toml file totrue. All penalized GLM surrogates are trained to model the predictions of the Driverless AI model. The number of clusters for local explanations is chosen by a grid search in which the :math:`R^2` between the Driverless AI model predictions and all of the local K-LIME model predictions is maximized. The global and local linear model's intercepts, coefficients, :math:`R^2` values, accuracy, and predictions can all be used to debug and develop explanations for the Driverless AI model's behavior. In addition to the usage described in the preceding section, the global model is also used to generate explanations for very small clusters (:math:`N < 20`) where fitting a local linear model is inappropriate. As described in the preceding section, the in-cluster linear model parameters can be used to profile the local region, to give an average description of the important variables in the local region, and to understand the average direction in which an input variable affects the Driverless AI model predictions.", + "prompt_type": "plain" + }, + { + "output": "By disaggregating the K-LIME predictions into individual coefficient and input variable value products, the local linear impact of the variable can be determined. This product is sometimes referred to as a reason code and is used to create explanations for the Driverless AI model's behavior. .. raw:: html \"Recipe **Reason codes in K-LIME** The K-LIME plot includes a **Reason codes** page that can be accessed by clicking the **Explanations** button. From the **Reason codes** page, you can view information about both cluster-specific reason codes and global reason codes. In K-LIME, reason code values are calculated by determining each coefficient-feature product. Reason code values are also written into automatically generated reason codes, available in the local reason code section of the explanations dialog. In the following example, reason codes are created by evaluating and disaggregating a local linear model.", + "prompt_type": "plain" + }, + { + "output": "By taking into consideration the value of each contribution, reason codes for the Driverless AI decision can be derived. debt_to_income_ratio and credit_score would be the two largest negative reason codes, followed by savings_acct_balance. The local linear model intercept and the products of each coefficient and corresponding value sum to the K-LIME prediction. Moreover it can be seen that these linear explanations are reasonably representative of the nonlinear model's behavior for this individual because the K-LIME predictions are within 5.5% of the Driverless AI model prediction. This information is encoded into English language rules which can be viewed by clicking the **Explanations** button. Like all LIME explanations based on linear models, the local explanations are linear in nature and are offsets from the baseline prediction, or intercept, which represents the average of the penalized linear model residuals. Of course, linear approximations to complex non-linear response functions will not always create suitable explanations and users are urged to check the K-LIME plot, the local model :math:`R^2`, and the accuracy of the K-LIME prediction to understand the validity of the K-LIME local explanations.", + "prompt_type": "plain" + }, + { + "output": "In cases where K-LIME linear models are not fitting the Driverless AI model well, nonlinear LOCO feature importance values may be a better explanatory tool for local model behavior. As K-LIME local explanations rely on the creation of *k*-means clusters, extremely wide input data or strong correlation between input variables may also degrade the quality of K-LIME local explanations. .. _limesup_technique: The LIME-SUP Technique ^^^^^^^^^^^^^^^^^^^^^^ This plot is available for binary classification and regression models. LIME-SUP explains local regions of the trained Driverless AI model in terms of the original variables. Local regions are defined by each leaf node path of the decision tree surrogate model instead of simulated, perturbed observation samples - as in the original LIME. For each local region, a local GLM model is trained on the original inputs and the predictions of the Driverless AI model. Then the parameters of this local GLM can be used to generate approximate, local explanations of the Driverless AI model.", + "prompt_type": "plain" + }, + { + "output": "This graph is interactive. Hover over the **Model Prediction**, **LIME Model Prediction**, or **Actual Target** radio buttons to magnify the selected predictions. Or click those radio buttons to disable the view in the graph. You can also hover over any point in the graph to view LIME reason codes for that value. By default, this plot shows information for the global LIME model, but you can change the plot view to show local results from a specific cluster. The LIME plot also provides a visual indication of the linearity of the Driverless AI model and the trustworthiness of the LIME explanations. The closer the local linear model approximates the Driverless AI model predictions, the more linear the Driverless AI model and the more accurate the explanation generated by the LIME local linear models. .. figure:: images/global_interpretable.png :alt: .. _decision-tree: Surrogate Decision Tree ~~~~~~~~~~~~~~~~~~~~~~~ The decision tree surrogate model increases the transparency of the Driverless AI model by displaying an *approximate* flow-chart of the complex Driverless AI model's decision making process.", + "prompt_type": "plain" + }, + { + "output": "The decision tree surrogate model can be used for visualizing, validating, and debugging the Driverless AI model by comparing the displayed decision-process, important variables, and important interactions to known standards, domain knowledge, and reasonable expectations. It is known to date back at least to 1996 (Craven and Shavlik). A surrogate model is a data mining and engineering technique in which a generally simpler model is used to explain another usually more complex model or phenomenon. Given our learned function :math:`g` and set of predictions, :math:`g(X) = \\hat{Y}`, we can train a surrogate model :math:`h`: :math:`X,\\hat{Y} \\xrightarrow{\\mathcal{A}_{\\text{surrogate}}} h`, such that :math:`h(X)` is approximately equal to :math:`g(X)`. To preserve interpretability, the hypothesis set for :math:`h` is often restricted to linear models or decision trees. For the purposes of interpretation in Driverless AI, :math:`g` is considered to represent the entire pipeline, including both the feature transformations and model, and the surrogate model is a decision tree (:math:`h_{\\text{tree}}`).", + "prompt_type": "plain" + }, + { + "output": "The RMSE for :math:`h_{\\text{tree}}` is displayed for assessing the fit between :math:`h_{\\text{tree}}` and :math:`g`. :math:`h_{\\text{tree}}` is used to increase the transparency of :math:`g` by displaying an approximate flow chart of the decision making process of :math:`g` as displayed in the following image: .. figure:: images/dt_surrogate.png :alt: :math:`h_{\\text{tree}}` also shows the likely important features and the most important interactions in :math:`g`. :math:`h_{\\text{tree}}` can be used for visualizing, validating, and debugging :math:`g` by comparing the displayed decision-process, important features, and important interactions to known standards, domain knowledge, and reasonable expectations. The preceding image displays the decision tree surrogate, :math:`h_{\\text{tree}}`, for an example probability of default model, :math:`g`, created with Driverless AI using the UCI repository credit card default data (see https://www.kaggle.com/uciml/default-of-credit-card-clients-dataset).", + "prompt_type": "plain" + }, + { + "output": "First level interactions betweenPAY_0andPAY_2and betweenPAY_0andPAY_5are visible along with several second level interactions. Following the decision path to the lowest probability leaf node in :math:`h_{\\text{tree}}` (lower left in the preceding image) shows that customers who pay their first (PAY_0) and second (PAY_2) month bills on time are the least likely to default according to :math:`h_{\\text{tree}}`. The thickness of the edges in this path indicate that this is a very common decision path through :math:`h_{\\text{tree}}`. Following the decision path to the highest probability leaf node in :math:`h_{\\text{tree}}` (second from right in the preceding image) shows that customers who are late on their first (PAY_0) and fifth (PAY_5) month bills and who pay less than 16520 in their sixth payment (PAY_AMT6) are the most likely to default according to :math:`h_{\\text{tree}}`. The thinness of the edges in this path indicate that this is a relatively rare decision path through :math:`h_{\\text{tree}}`.", + "prompt_type": "plain" + }, + { + "output": "When a single observation, :math:`x^{(i)}`, is selected, its path through :math:`h_{\\text{tree}}` is highlighted. The path of :math:`x^{(i)}` through :math:`h_{\\text{tree}}` can be helpful when analyzing the logic or validity of :math:`g(x^{(i)})`. MLI Taxonomy: Decision Tree Surrogate Models ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - **Scope of Interpretability**: - (1) Generally, decision tree surrogates provide global interpretability. - (2) The attributes of a decision tree are used to explain global attributes of a complex Driverless AI model such as important features, interactions, and decision processes. - **Appropriate Response Function Complexity**: Decision tree surrogate models can create explanations for models of nearly any complexity. - **Understanding and Trust**: - (1) Decision tree surrogate models foster understanding and transparency because they provide insight into the internal mechanisms of complex models.", + "prompt_type": "plain" + }, + { + "output": "- **Application Domain**: Decision tree surrogate models are model agnostic. Surrogate Decision Tree Plot ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ This plot is available for binary and multiclass classification models as well as regression models. In the Decision Tree plot, the highlighted row shows the path to the highest probability leaf node and indicates the globally important variables and interactions that influence the Driverless AI model prediction for that row. You can view rules for a specific path by clicking the path's terminal node. **Note**: For a list of Surrogate Decision Tree explainer expert settings, see :ref:`interpretation-expert-settings-surrogate-dt`. .. raw:: html \"Surrogate For multiclass models, decision trees are created for each class. To view a decision tree for a specific class, click **Class** in the upper-left corner of the page and select the class you want to view a decision tree for.", + "prompt_type": "plain" + }, + { + "output": "**Global Feature Importance vs Local Feature Importance** Global feature importance (yellow) is a measure of the contribution of an input variable to the overall predictions of the Driverless AI model. Global feature importance is calculated by aggregating the improvement in splitting criterion caused by a single variable across all of the decision trees in the Random Forest surrogate model. Local feature importance (grey) is a measure of the contribution of an input variable to a single prediction of the Driverless AI model. Local feature importance values for regression and binomial cases are calculated by tracing single rows of data through the random forest surrogate model and returning the absolute LOCO values. For the multiclass case, local feature importance values are calculated by re-scoring the trained supervised model and measuring the impact of setting each variable to missing. The absolute value of differences across classes is then calculated for each dropped or replaced column.", + "prompt_type": "plain" + }, + { + "output": "**Note**: Engineered features are used for MLI when a time series experiment is built. This is because munged time series features are more useful features for MLI than raw time series features, as raw time series features are not IID (Independent and Identically Distributed). .. figure:: images/rf_feature_importance.png :alt: .. _rf-pdp-ice: Random Forest Partial Dependence and Individual Conditional Expectation ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ A Partial Dependence and ICE plot is available for both Driverless AI and surrogate models. Refer to the previous :ref:`pdp-ice` section for more information about this plot. .. _rf-loco: Random Forest LOCO ~~~~~~~~~~~~~~~~~~ This plot is available for binary and multiclass classification models as well as regression models. Local feature importance describes how the combination of the learned model rules or parameters and an individual row's attributes affect a model's prediction for that row while taking nonlinearity and interactions into effect.", + "prompt_type": "plain" + }, + { + "output": "The LOCO-variant method for binary and regression models is calculated by traversing the random forest surrogate model and removing the prediction contribution of any rule containing the variable of interest for every tree from the original prediction. Local LOCO values are calculated by tracing single rows of data through the random forest surrogate model. Global LOCO values are the average of the LOCO values over every row of a dataset. The LOCO-variant method for multiclass models differs slightly in that it calculates row-wise local feature importance values by re-scoring the trained supervised model and measuring the impact of setting each variable to missing. The sum of the absolute value of differences across classes is then calculated for each dropped or replaced column. Given the row of input data with its corresponding Driverless AI and K-LIME predictions: +-------------+-----+----------+-----------+-----------+-------------+ | debt_ | cr | saving | o | H2OAI_pr | K-LIME_ | | to_income\\_ | edi | s_acct\\_ | bserved\\_ | edicted\\_ | predicted\\_ | | ratio | t\\_ | balance | default | default | default | | | sc | | | | | | | ore | | | | | +=============+=====+==========+===========+===========+=============+ | 30 | 600 | 1000 | 1 | 0.85 | 0.9 | +-------------+-----+----------+-----------+-----------+-------------+ Taking the Driverless AI model as F(**X**), LOCO-variant feature importance values are calculated as follows.", + "prompt_type": "plain" + }, + { + "output": ":math:`\\text{Scaled}(\\text{LOCO}_{debt\\_to\\_income\\_ratio}) = \\text{Abs}(\\text{LOCO}_{~debt\\_to\\_income\\_ratio}/0.14) = 1` :math:`\\text{Scaled}(\\text{LOCO}_{credit\\_score}) = \\text{Abs}(\\text{LOCO}_{~credit\\_score}/0.14) = 0.86` :math:`\\text{Scaled}(\\text{LOCO}_{savings\\_acct\\_balance}) = \\text{Abs}(\\text{LOCO}_{~savings\\_acct\\_balance} / 0.14) = 0.21` One drawback to these LOCO-variant feature importance values is, unlike K-LIME, it is difficult to generate a mathematical error rate to indicate when LOCO values may be questionable. .. figure:: images/loco_plot.png :alt: .. _nlp-surrogate: NLP Surrogate Models ~~~~~~~~~~~~~~~~~~~~ These plots are available for natural language processing (NLP) models. For NLP surrogate models, Driverless AI creates a TF-IDF matrix by tokenizing all text features. The resulting frame is appended to numerical or categorical columns from the training dataset, and the original text columns are removed. This frame is then used for training surrogate models that have prediction columns consisting of tokens and the original numerical or categorical features.", + "prompt_type": "plain" + }, + { + "output": "- Each row in the TF-IDF matrix contains :math:`N` columns, where :math:`N` is the total number of tokens in the corpus with values that are appropriate for that row (0 if absent). - Driverless AI does not currently generate a K-LIME scoring pipeline for MLI NLP problems. .. _surrogate-models-on-residuals: Running Surrogate Models on Residuals ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ In Driverless AI, residuals (differences between observed and predicted values) can be used as targets in MLI surrogate models for the purpose of debugging models. The method used to calculate residuals varies depending on the type of problem. For classification problems, logloss residuals are calculated for a specified class. For regression problems, residuals are determined by calculating the square of the difference between targeted and predicted values. To run MLI surrogate models on residuals, enable the **Debug Model Residuals** interpretation expert setting. For classification experiments, specify a class to use as an outcome of interest with the **Class for Debugging Classification Model Logloss Residuals** interpretation expert setting (not visible for regression problems).", + "prompt_type": "plain" + }, + { + "output": ".. figure:: images/mli_surrogate_residuals.png :alt: .. _mli-nlp-plots: NLP Plots --------- This section describes the plots that are available in the NLP tab. - :ref:`dai-nlp-loco` - :ref:`mli-nlp-pdp` - :ref:`mli-nlp-tokens` - :ref:`mli-nlp-vlm` .. note:: - The following plots are only available for natural language processing (NLP) models. .. _dai-nlp-loco: NLP Leave-One-Covariate-Out (LOCO) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ This plot is available for binomial, multiclass, and regression natural language processing (NLP) models. It is located in the **NLP** tab on the Model Interpretation page, which is only visible for NLP models. .. raw:: html \"NLP This plot applies a leave-one-covariate-out (LOCO) styled approach to NLP models by removing a specific token, which is obtained by TF-IDF, from only a single column where the token is occurring. For example, if there is a tokenfooin bothcolumn1andcolumn2, LOCO is computed for both columns separately, even though the token is the same.", + "prompt_type": "plain" + }, + { + "output": "In addition, if a token does **not** exist in a row, then it is appended before calculating LOCO to ensure the token was evaluated across all rows. The difference between the resulting score and the original score (token included) is useful when trying to determine how specific changes to text features alter the predictions made by the model. Driverless AI fits a separate TF-IDF vectorizer for each individual column and concatenates the results. The terms (tokens) in the resulting importance frames are then wrapped with column names: .. table:: Column Names Example +-----------------------+-----------------------+-----------------------+ | column1('and') | column1('apple') | column2('and') | +=======================+=======================+=======================+ | 0.1 | 0.0005 | 0.412512 | +-----------------------+-----------------------+-----------------------+ The NLP LOCO plot lets you view text for a specific row by specifying a row number.", + "prompt_type": "plain" + }, + { + "output": "You can switch between different text features and view their respective importances globally and locally. .. note:: - Due to computational complexity, the global importance value is only calculated for :math:`N` (20 by default) tokens. This value can be changed with themli_nlp_top_nconfiguration option. - A specific token selection method can be used by specifying one of the following options for themli_nlp_min_token_modeconfiguration option: -linspace: Selects :math:`N` evenly spaced tokens according to their TF-IDF score (Default) -top: Selects top :math:`N` tokens by TF-IDF score -bottom: Selects bottom :math:`N` tokens by TF-IDF score - Local values for NLP LOCO can take a significant amount of time to calculate depending on the specifications of your hardware. - Driverless AI does not currently generate a K-LIME scoring pipeline for MLI NLP problems. .. _mli-nlp-pdp: NLP Partial Dependence Plot ~~~~~~~~~~~~~~~~~~~~~~~~~~~ This plot is available for binomial, multiclass, and regression natural language processing (NLP) models.", + "prompt_type": "plain" + }, + { + "output": "NLP partial dependence (yellow) portrays the average prediction behavior of the Driverless AI model when an input text token is left in its respective text and not included in its respective text along with +/- 1 standard deviation bands. ICE (grey) displays the prediction behavior for an individual row of data when an input text token is left in its respective text and not included in its respective text. The text tokens are generated from TF-IDF. .. raw:: html \"NLP .. _mli-nlp-tokens: NLP Tokenizer ~~~~~~~~~~~~~ This plot is available for natural language processing (NLP) models. It is located in the **NLP** tab on the Model Interpretation page, which is only visible for NLP models. .. raw:: html \"NLP This plot shows both the global and local importance values of each token in a corpus (a large and structured set of texts).", + "prompt_type": "plain" + }, + { + "output": "Local importance values are calculated by using the term frequency\u2013inverse document frequency (TF-IDF) as a weighting factor for each token in each row. The TF-IDF increases proportionally to the number of times a token appears in a given document and is offset by the number of documents in the corpus that contain the token. Specify the row that you want to view, then click the **Search** button to see the local importance of each token in that row. Global importance values are calculated by using the inverse document frequency (IDF), which measures how common or rare a given token is across all documents. (Default View) You can download an archive of files relating to the NLP Tokenizer plot by clicking \"NLP Tokenizer ZIP Archive\" in the NLP tab. .. note:: - MLI for NLP does not currently feature the option to remove stop words. - By default, up to 10,000 tokens are created during the tokenization process. This value can be changed in the configuration. - By default, Driverless AI uses up to 10,000 documents to extract tokens from.", + "prompt_type": "plain" + }, + { + "output": "Downsampling is used for datasets that are larger than the default sample limit. - Driverless AI does not currently generate a K-LIME scoring pipeline for MLI NLP problems. - With the LOCO method, a specific token is removed from only a single column where the token is occurring. For example, if there is a tokenfooin bothcolumn1andcolumn2``, LOCO is\n computed for both columns separately, even though the token is the\n same. The TF-IDF for the token differs in both columns. NLP Vectorizer + Linear Model (VLM) Text Feature Importance\nThis plot is available for binomial and regression natural language\nprocessing (NLP) models. It is located in the NLP tab on the Model\nInterpretation page, which is only visible for NLP models. NLP Vectorizer + Linear Model (VLM) text feature importance uses TF-IDF\nof individual words as features from a text column of interest and\nbuilds a linear model (currently GLM) using those features and fits it\nto either the predicted class (binary classification) or the continuous\nprediction (regression) of the Driverless AI model.", + "prompt_type": "plain" + }, + { + "output": "Driverless AI Installation and Upgrade\n\nThe following sections describe how to install and upgrade Driverless\nAI.\n\nNote: Driverless AI is available as part of the H2O AI Cloud (HAIC)\nplatform or as a standalone offering. For information on HAIC, see the\nofficial documentation.\n\nsupported-environments installing-before-you-begin docker native cloud", + "prompt_type": "plain" + }, + { + "output": "Splitting Datasets\nDriverless AI lets you split a dataset into two subsets that can be used\nas training and validation/test datasets during modeling. When splitting\ndatasets for modeling, each split should have a similar distribution to\navoid over fitting on the training set. Depending on the use case, you\ncan either split the dataset randomly, perform a stratified sampling\nbased on the target column, perform a fold column-based split to keep\nrows belonging to the same group together, or perform a time\ncolumn-based split to train on past data and validate/test on future\ndata. Perform the following steps to split a dataset:\n1. Click the dataset or select the [Click for Actions] button next to\n the dataset that you want to split and select Split from the submenu\n that appears. 2. The Dataset Splitter form displays. Specify an Output Name 1 and an\n Output Name 2 for each segment of the split. (For example, you can\n name one segment test and the other validation.) 3. Optionally specify a Target column (for stratified sampling), a Fold\n column (to keep rows belonging to the same group together), a Time\n column, and/or a Random Seed (defaults to 1234).", + "prompt_type": "plain" + }, + { + "output": "MLI Custom Recipes\nThe techniques and methodologies used by Driverless AI for model\ninterpretation can be extended with recipes (Python code snippets). You\ncan use your own recipes in combination with or in place of DAI's\nbuilt-in recipes. This lets you extend the capabilities of MLI\nexplainers and out of the box interpretation techniques. The following\nsteps describe how to upload and enable custom recipes in the Machine\nLearning Interpretability (MLI) view. Note\nFor more information on MLI custom recipes including best practices,\ntutorials, explainer templates, and explainer examples, see the official\nRecipes for Machine Learning Interpretability in Driverless AI repository . To upload a custom recipe:\n 1. Navigate to the MLI page and click the New Interpretation button. Select Upload MLI Recipe from the drop-down menu. You can also\n select MLI Recipe URL to load a recipe from a raw file, a GitHub\n repository / tree, or a local directory.", + "prompt_type": "plain" + }, + { + "output": "Multinode Training (Alpha)\n\nDriverless AI can be configured to run in a multinode worker mode. This\ndocument describes the multinode training process and how to configure\nit.\n\nNotes: For more information on queuing in Driverless AI, see\ndai-queuing.\n\nredis_multinode dask_multinode multinode_example health_api", + "prompt_type": "plain" + }, + { + "output": "Using Driverless AI configuration options\nThis page describes how to use Driverless AI (DAI) configuration\noptions. - understanding-configs\n- understanding-expert-settings\n- toml_editor_using\n- expert-settings-use-case\nUnderstanding DAI configuration options\nDriverless AI features many different kinds of configuration options\nthat you can use to configure various aspects of your DAI environment,\nincluding authentication, data connectors, UI, experiments, and MLI. The\nfollowing methods can be used to control the available DAI configuration\noptions:\n- Administrators can edit the config.toml file, which is a\n configuration file that uses the TOML v0.5.0 file format. The\n config.toml file lets you control all of the configuration options\n documented in the dai_config page. For more information, see\n config_file. - Using the Expert Settings window, which is accessible from the\n Experiment Setup page by clicking Expert Settings. - Using the built-in TOML config editor, which is accessible from the\n Expert Settings window.", + "prompt_type": "plain" + }, + { + "output": "Note\nSome configuration options, such as those related to authentication and\ndata connectors, are applied when starting the DAI server and cannot be\nchanged without restarting the DAI server. Understanding Expert Settings\nWhen creating an experiment, you can specify basic\nsettings for the experiment such as whether to\ndrop specific columns or whether to include a validation dataset. However, you may want to customize the experiment in a manner that is\nbeyond the scope of these basic settings\u2014in this case, Expert Settings\ncan be used to further fine-tune the experiment. For example, you can\nuse Expert Settings to include specific models or transformers as part\nof the experiment. To open the Expert Settings window, click Expert\nSettings on the Experiment Setup page. []\nNotes:\n- For supervised experiments, the Expert Settings window cannot be\n accessed until a target column has been selected. - Some of the settings listed in the dai_config page are not exposed\n in the Expert Settings window.", + "prompt_type": "plain" + }, + { + "output": "Navigating the Expert Settings window\nThe following sections describe how to navigate the Expert Settings\nwindow. Tabbed view\nWhen the Tabbed view is selected, the available Expert Settings are\norganized into the following tabs and sub-tabs. For each sub-tab in the\nfollowing list, the available settings are organized into Common and\nAdvanced settings. - Training: Configure settings related to the model training process. - General\n - Data\n - Feature Engineering\n - Models\n - Genetic Algorithm\n - Validation\n - Deployment\n- Documentation: Configure settings related to AutoDoc, model\n performance, and model interpretation. - General\n - Data\n - Models\n - Model Performance\n - Interpretation\n- System: Configure system-related settings. (This tab has only one\n sub-tab that is also called System.) []\nTabbed view: sub-tabs\nThe following is a list of sub-tab level categories:\n- Common\n- Advanced\n- Image\n- NLP\n- Time Series\n- Unsupervised\nFlat view\nYou can also select the Flat view to view all of the available settings\nin a single searchable window.", + "prompt_type": "plain" + }, + { + "output": "Searching for specific settings\nTo locate a specific Expert Setting, click the search box and type the\nconfiguration name of the Expert Setting you want to locate. For some\nExpert Settings, additional results for related Expert Settings are also\ndisplayed. Filtering settings by tags\nTo filter the list of available settings by specific tags, click the\nFilter by Tags button and select the checkbox next to the tag(s) that\nyou want to filter the list of available settings by. Note that both\nglobal and sub-tab level filtering are supported. []\nAdding custom recipes\nYou can add custom recipes from the Expert Settings window by clicking\nthe Add Custom Recipes button. Select one of the following options:\n- From computer: Add a custom recipe as a Python or ZIP file from your\n local file system. - From URL: Add one or more custom recipes from a URL that points to\n one of the following locations:\n - A GitHub repository. For example, you can enter\n https://github.com/h2oai/driverlessai-recipes/ to add all the\n custom recipes contained in the official Recipes for\n Driverless AI repository.", + "prompt_type": "plain" + }, + { + "output": "For example, you can enter\n https://github.com/h2oai/driverlessai-recipes/tree/master/models\n to add only the custom model recipes contained in the official\n Recipes for Driverless AI repository, or enter\n https://github.com/h2oai/driverlessai-recipes/tree/master/models/algorithms\n to add only the custom algorithm recipes contained in the\n repository. - A file system path. This option is equivalent to the File\n System option when adding datasets. - From Bitbucket: Add a custom recipe from a Bitbucket repository. To\n use this option, your Bitbucket username and password must be\n provided along with the custom recipe Bitbucket URL. - With Editor: Add a custom recipe with a built-in code editor. []\nNote that you can also view the official Recipes for Driverless AI\nrepository from the Expert Settings window by clicking the Official\nRecipes button. Using the built-in TOML config editor\nThe TOML configuration editor lets you manually add, remove, or edit\nExpert Setting parameters.", + "prompt_type": "plain" + }, + { + "output": "To open the built-in TOML configuration\neditor, click Edit TOML in the Expert Settings window. Opening the\nbuilt-in TOML editor is currently the best way to review changed\nconfiguration items in a single location. []\nThe built-in TOML editor is synchronized with the Expert Settings\nwindow. This means that if you change the default value of an expert\nsetting from the Expert Settings window, that change is displayed in the\nTOML configuration editor. For example, if you set the Make MOJO scoring\npipeline setting in the Experiment tab to Off, then the line\nmake_mojo_scoring_pipeline = \"off\" is displayed in the TOML editor. Conversely, if you make changes using the TOML editor, those changes are\nalso visible from the Expert Settings window. You can confirm that your\nchanges have been correctly entered into the editor by checking whether\nthe relevant settings have also changed in the Expert Settings window. To confirm your changes, click Save. The experiment preview updates to\nreflect your specified configuration changes.", + "prompt_type": "plain" + }, + { + "output": "This section provides Driverless AI with\ninformation about which custom recipes can be used by the experiment. This is important for keeping experiments comparable when performing\nretrain / refit operations. Note\n- The settings listed in the dai_config page cannot be edited from the\nbuilt-in TOML editor unless they are exposed in the Expert Settings\nwindow. - For information on TOML, see TOML v0.5.0. Order of settings in the TOML editor\nWhen using the built-in TOML editor, ensure that settings are added in\nthe following order:\n1. Booleans, integers, strings, and lists\n2. Unprocessed dictionaries, which are automatically processed after\n clicking the Save button\n3. Processed dictionaries\nChecking TOML validity\nThe TOML Python library can be used to check the validity of your TOML\nto avoid errors when using the built-in TOML editor. To install the TOML\nPython library, run the following command:\n pip install toml\nThe following examples demonstrate how the TOML Python library can be\nused to check whether your TOML is valid.", + "prompt_type": "plain" + }, + { + "output": "The toml.loads() function is then used to\n convert the string into a dictionary. - Entering an invalid string: In the following example, an error is\n returned after attempting to convert the entered TOML string into a\n dictionary, which means that the entered string is not valid. Sample use case: Hyperparameter tuning\nThe following steps describe how to perform hyperparameter tuning by\nusing the params_tune_lightgbm Expert Setting. 1. On the Experiments page, click the New Experiment button and select\n a training dataset to use for the experiment. 2. Select a target column and specify a test dataset to use for the\n experiment. 3. Click Expert Settings to open the Expert Settings window. 4. Go to the Recipes tab. For the Include specific models setting,\n click Uncheck All and select LightGBM from the list of available\n models. Click Done to confirm your selection. Completing this step\n lets you view how only LightGBM mutates. 5. In the Expert Settings window, enter params_tune into the search box\n to view all of the available params_tune TOMLs.", + "prompt_type": "plain" + }, + { + "output": "Driverless AI Security\nObjective\nThis document describes different aspects of Driverless AI security and\nprovides guidelines to secure the system by reducing its surface of\nvulnerability. This section covers the following areas of the product:\n - security_user_access\n - security_auth (Also see dai_auth)\n - Authorization\n - security_data\n - security_data_import\n - security_data_export\n - security_logs\n - security_data_isolation\n - security_client_server\n - security_response_headers\n - security_recommended_headers\n - security_other_headers\n - security_web_ui\n - security_custom_recipe\n - security_config (Also see\n in depth documentation on configuration\n security in DAI)\nImportant things to know\nWarning\nWARNING Security in a default installation of Driverless AI is DISABLED! By default, a Driverless AI installation targets ease-of-use and does\nnot enable all security features listed in this document.", + "prompt_type": "plain" + }, + { + "output": "------------------------------------------------------------------------\nUser Access\nAuthentication\nDriverless AI supports Client Certificate, LDAP, Local, mTLS, OpenID,\nPAM, none, and unvalidated (default) authentication. These can be\nconfigured by specifying the environment variables when starting the\nDriverless AI Docker image or by specifying the appropriate\nconfiguration options in the config.toml file. For more info, see\ndai_auth. --------------------------------------------------------------------------------------------------------------\n Option D efa ult Va lue Recommended Value Description\n ----------------------------------------- ------------------- ------------------------------- ----------------\n a uthenticati on_method \"un val ida ted \" Any supported authentication Define user\n (e.g., LDAP, PAM) method except authentication\n \"unvalidated\" and \"none\".", + "prompt_type": "plain" + }, + { + "output": "authe ntication_d efault_time out_hours 7 2 Consult your security Number of hours\n requirements. after which a\n user has to\n relogin. --------------------------------------------------------------------------------------------------------------\nmTLS Authentication\nDriverless AI supports Mutual TLS authentication (mTLS) by setting a\nspecific verification mode along with a certificate authority file, an\nSSL private key, and an SSL certificate file. For more information, see\nthe mtls_auth. Authorization Methods\nDriverless AI does not currently perform any authorization. ------------------------------------------------------------------------\nData Security\nData Import\n ----------------------------------------------------------------------------------------------------------------\n Op tion D efault Value Recommended Value Description\n --------------------------- ------------------------------ ----------------------------- -----------------------\n en able d_fi le_s yste ms \"u pload, file, hdfs, s3\" Configure only needed data Control list of\n sources.", + "prompt_type": "plain" + }, + { + "output": "ma x_fi le_u ploa d_si ze 104 857600 000B Configure based on expected Limit maximum size of\n file size and size of uploaded file. Driverless AI deployment. su ppor ted_ file _typ es see confi g.toml It is recommended to limit Supported file formats\n file types to extension used listed in filesystem\n in the target environment browsers. (e.g., parquet). sh ow_a ll_f iles yste ms true false Show all available data\n sources in WebUI (even\n though there are not\n configured).", + "prompt_type": "plain" + }, + { + "output": "----------------------------------------------------------------------------------------------------------------\nData Export\n ---------------------------------------------------------------------------------------------------------\n Option Def ault V alue Recommended Description\n Value \n ----------------------------------- ---------------- ---------------- -----------------------------------\n enab le_dataset_d ownloading tr ue false (disable Control ability to download any\n download of datasets (uploaded, predictions,\n datasets) MLI). Note: if dataset download is\n disabled, we strongly suggest to\n disable custom recipes as well to\n remove another way how data could\n be exported from the application.", + "prompt_type": "plain" + }, + { + "output": "(See notes below.) artif acts_store f ile_ syst em `file_system` Stores a MOJO on a file system\n directory denoted by\n artifac ts_file_system_directory. (See notes below.) artifacts _file_system _directory t mp tmp File system location where\n artifacts will be copied in case\n artifacts_store is set to\n file_system. (See notes below.) ---------------------------------------------------------------------------------------------------------\nNotes about Artifacts:\n- Currently, file_system is the only option that can be specified for\n artifacts_store. Additional options will be available in future\n releases.", + "prompt_type": "plain" + }, + { + "output": "- When these artifacts are enabled/configured, the menu options on the\n completed_experiment page change. Specifically, all \"Download\"\n options (with the exception of AutoDoc) change to \"Export.\" Refer to\n export_artifacts for more information. Logs\nThe Driverless AI produces several logs:\n - audit logs\n - server logs\n - experiment logs\nThe administrator of Driverless AI application (i.e., person who is\nresponsible for configuration and setup of the application) has control\nover content which is written to the logs. -------------------------------------------------------------------------------------------------------\n Option D ef au Reco Description\n lt V al mmended \n ue Value \n ------------------------------------------- ------- --------- -----------------------------------------\n audit_lo g_retentio n_period `5 ` (d 0 ( Number of days to keep audit logs.", + "prompt_type": "plain" + }, + { + "output": "audit log \n ro \n tation) \n do_not_ log_list s ee c --- Contain list of configuration options\n on fi which are not recorded in logs. g. to \n ml \n l og_level `1 ` see conf Define verbosity of logging\n ig.toml \n collect_se rver_logs_ in_experim ent_logs `f al false Dump server logs with experiment. se ` Dangerous because server logs can contain\n information about experiments of other\n users using Driverless AI. h2o _recipes_l og_level No ne --- Log level for OSS H2O instances used by\n custom recipes.", + "prompt_type": "plain" + }, + { + "output": "se ` \n write_ recipes_to _experimen t_logger `f al false Dump a custom recipe source code into\n se ` logs. -------------------------------------------------------------------------------------------------------\nUser Data Isolation\n+---------+---+----------------------+----------------------------------+\n| Option | D | Recommended Value | Description |\n| | e | | |\n| | f | | |\n| | a | | |\n| | u | | |\n| | l | | |\n| | t | | |\n| | V | | |\n| | a | | |\n| | l | | |\n| | u | | |\n| | e | | |\n+=========+===+======================+==================================+\n| da | | Specify proper name | Directory where Driverless AI |\n| ta_dir | \" | and location of | stores all computed experiments |\n| e ctory | | directory.", + "prompt_type": "plain" + }, + { + "output": "| | |\n| | | | |\n| | / | | |\n| | | | |\n| | t | | |\n| | | | |\n| | m | | |\n| | | | |\n| | p | | |\n| | | | |\n| | \" | | |\n| | | | |\n+---------+---+----------------------+----------------------------------+\n| file_ | | true | Hide data_directory in |\n| hide_da | t | | file-system browser.", + "prompt_type": "plain" + }, + { + "output": "|\n| | u | | |\n| | | | |\n| | e | | |\n| | | | |\n+---------+---+----------------------+----------------------------------+\n| f i | | true | Enable path filter for |\n| le_pat | f | | file-system browser (file data |\n| h_filte | | | source). By default the filter |\n| ring_e | a | | is disabled which means users |\n| n abled | | | can browse the entire |\n| | l | | application-local filesystem. |\n| | | | |\n| | s | | |\n| | | | |\n| | e | | |\n| | | | |\n+---------+---+----------------------+----------------------------------+\n| file_ | | Include a list of | List of absolute path prefixes |\n| path_fi | [ | folder paths or | to restrict access to in |\n| lter_i | | {{DAI_USERNAME}} for | file-browser.", + "prompt_type": "plain" + }, + { + "output": "For | |\n| | | example, | |\n| | | \"['/h | |\n| | | ome/{{DAI_USERNAME} | |\n| | | } /','/data/prod']\". | |\n+---------+---+----------------------+----------------------------------+\n| a ut | | \"\" | Directory where Driverless AI |\n| odoc_ a | \" | | searches for the updated AutoDoc |\n| dditio | | | templates. Providing empty value |\n| nal_tem | \" | | \"\" disables this functionality. |\n| plate_ | | | |\n| f older | | | |\n+---------+---+----------------------+----------------------------------+\n------------------------------------------------------------------------\nClient-Server Communication Security\n -----------------------------------------------------------------------------------------------\n Option Default Value Recommended Value Description\n ------------------ ------------------------------ ---------------------- ----------------------\n en able_h ttps false true Enable HTTPS\n ss l_key_ file \"/et c/dai/privat e_key.pem\" Correct private key.", + "prompt_type": "plain" + }, + { + "output": "ss l_crt_ file \"/etc/dai /cert.pem\" Correct public Public certificate to\n certifikate. setup HTTPS/SSL. ss l_no_s slv2 true true Prevents an SSLv2\n connection. ss l_no_s slv3 true true Prevents an SSLv3\n connection. ss l_no_t lsv1 true true Prevents an TLSv1\n connectiona. ssl_ no_tls v1_1 true true Prevents an TLSv1.1\n connection. ssl_ no_tls v1_2 false false (disable TLSv1.2 Prevents a TLSv1.2\n only if TLSv1.3 is connection.", + "prompt_type": "plain" + }, + { + "output": "-----------------------------------------------------------------------------------------------\nHTTP Cookie Attributes\nBy default, HTTP cookies used by Driverless AI are issued with the\nfollowing attributes:\n - HTTPOnly: True\n - SameSite: Lax\nIf either of these needs to be overridden, or if more custom attributes\nneed to be set, you can use the config http_cookie_attributes to specify\nkey-value pairs of so-called cookie morsels. For a list of supported\nkeys, see the official Python documentation. Response Headers\nThe response headers which are passed between Driverless AI server and\nclient (browser, Python/R clients) are controlled via the following\noption:\n ---------------------------------------------------------------------------\n Option Default Re Description\n Value commended \n Value \n --------------------- --------- ----------- -------------------------------\n extra_ht tp_headers \"{}\"`` See below Configure HTTP header returned\n in server response.", + "prompt_type": "plain" + }, + { + "output": "The | | |\n| | max-age | | |\n| | specifies | | |\n| | time, in | | |\n| | seconds, | | |\n| | that the | | |\n| | browser | | |\n| | should | | |\n| | remember | | |\n| | that a | | |\n| | site is | | |\n| | only to | | |\n| | be | | |\n| | accessed | | |\n| | using | | |\n| | HTTPS.", + "prompt_type": "plain" + }, + { + "output": "| c.mozilla.org/gu |\n| | certain | | idelines/web_sec |\n| | types of | | urity#Examples_5 |\n| | attacks, | | |\n| | including | | |\n| | Cross | | |\n| | Site | | |\n| | Scripting | | |\n| | and data | | |\n| | injection | | |\n| | attacks. | | |\n| | Controls | | |\n| | from | | |\n| | where the | | |\n| | page can | | |\n| | download | | |\n| | source.", + "prompt_type": "plain" + }, + { + "output": "| | |\n| | The value | | |\n| | here | | |\n| | overrides | | |\n| | the | | |\n| | default, | | |\n| | which is | | |\n| | SAM | | |\n| | E ORIGIN. | | |\n+------+-----------+--------------------------------+------------------+\n| X-C | Prevents | nosniff | https://develope |\n| o nt | the | | r.mozilla.org/en |\n| en t | browser | | -US/docs/Web/HTT |\n| -Ty | from | | P/Headers/X-Cont |\n| pe-O | trying to | | ent-Type-Options |\n| pti | determine | | |\n| o ns | the con | | |\n| | tent-type | | |\n| | of a | | |\n| | resource | | |\n| | that is | | |\n| | different | | |\n| | than the | | |\n| | declared | | |\n| | cont | | |\n| | ent-type.", + "prompt_type": "plain" + }, + { + "output": "|\n| Prot | rotection | | org/en-US/docs/W |\n| ect | response | | eb/HTTP/Headers/ |\n| i on | header is | | X-XSS-Protection |\n| | a feature | | |\n| | of | | |\n| | Internet | | |\n| | Explorer, | | |\n| | Chrome | | |\n| | and | | |\n| | Safari | | |\n| | that | | |\n| | stops | | |\n| | pages | | |\n| | from | | |\n| | loading | | |\n| | when they | | |\n| | detect | | |\n| | reflected | | |\n| | c | | |\n| | ross-site | | |\n| | scripting | | |\n| | (XSS) | | |\n| | attacks.", + "prompt_type": "plain" + }, + { + "output": "| | |\n+------+-----------+--------------------------------+------------------+\nOther Headers to Consider\n ------------------------------------------------------------------------\n Header Documentation\n ------------------ -----------------------------------------------------\n Pub lic-Key-Pins https://developer\n CORS-related .mozilla.org/en-US/docs/Web/HTTP/Public_Key_Pinning\n headers htt\n ps://developer.mozilla.org/en-US/docs/Web/HTTP/CORS\n ------------------------------------------------------------------------\n------------------------------------------------------------------------\nWeb UI Security\nNote\nThe Driverless AI UI is design to be user-friendly, and by default all\nfeatures like auto-complete are enabled. Disabling the user-friendly\nfeatures increases security of the application, but impacts\nuser-friendliness and usability of the application. -------------------------------------------------------------------------------------\n Option Def Recom Description\n ault V mended \n alue Value \n ----------------------------- ------- -------- --------------------------------------\n all ow_form_aut ocomplete tr ue f alse Control auto-completion in Web UI\n elements (e.g., login inputs).", + "prompt_type": "plain" + }, + { + "output": "show_all_fi lesystems tr ue f alse Show all available data sources in\n WebUI (even though there are not\n configured). It is recommended to show\n only configured data sources. verify_s ession_ip `fal true Verifies each request IP against IP\n se` which initialized the session. allow _concurrent _sessions tr ue f alse Disable concurrent sessions (logins). en able_xsrf_p rotection tr ue true Enable XSRF (cross-site request\n forgery) protection. e nable_secur e_cookies `fal true Enable SECURE cookie flag. Note that\n se` HTTPS must be enabled. -------------------------------------------------------------------------------------\n------------------------------------------------------------------------\nCustom Recipe Security\nNote\nBy default Driverless AI enables custom recipes as a main route for the\nway data-science teams can extend the application capabilities.", + "prompt_type": "plain" + }, + { + "output": "and bundle only a pre-defined\nand approved set of custom Driverless AI extensions. --------------------------------------------------------------------------------------------\n Option De fault Reco Description\n Value mmended \n Value \n ------------------------------------------- -------- --------- -----------------------------\n ena ble_custom_recipes t rue false Enable custom Python recipes. enable_cus tom_recipes_upload t rue false Enable uploading of custom\n recipes. enable_custo m_recipes_from_url t rue false Enable downloading of custom\n recipes from external URL. include_custom_ recipes_by_default fa lse false Include custom recipes in\n default inclusion lists.", + "prompt_type": "plain" + }, + { + "output": "Launching H2O Flow\n\nIf you opened port 12348 when starting Driverless AI, then you can\nlaunch H2O Flow from within Driverless AI. Click the H2O-3 link in the\ntop menu.\n\n[]\n\nThis launches Flow on port 12348.\n\n[]", + "prompt_type": "plain" + }, + { + "output": "mTLS Authentication Example\nDriverless AI supports Mutual TLS authentication (mTLS) by setting a\nspecific verification mode along with a certificate authority file, an\nSSL private key, and an SSL certificate file. The diagram below is a\nvisual representation of the mTLS authentication process. []\nDescription of Configuration Attributes\nUse the following configuration options to configure mTLS. - ssl_client_verify_mode: Sets the client verification mode. Choose\n from the following verification modes:\n- ssl_ca_file: Specifies the path to the certification authority (CA)\n certificate file, provided by your organization. This certificate\n will be used to verify the client certificate when client\n authentication is enabled. If this is not specified, clients are\n verified using the default system certificates. - ssl_key_file: Specifies your web server private key file. This is\n normally created by your organization's sys admin. - ssl_crt_file: Specifies your web server public certificate file.", + "prompt_type": "plain" + }, + { + "output": "- ssl_client_key_file: Required if\n ssl_client_verify_mode = \"CERT_REQUIRED\". Specifies the private key\n file that Driverless AI uses to authenticate itself. This is\n normally created by your organization's sys admin. - ssl_client_crt_file: Required if\n ssl_client_verify_mode = \"CERT_REQUIRED\". Specifies the private\n client certificate file that Driverless AI will use to authenticate\n itself. This is normally created by your organization's sys admin. - auth_tls_crl_file: Specifies the path to the certificate revocation\n list file that will be used to verify the client certificate. This\n file contains a list of revoked user IDs. Configuration Scenarios\nThe table below describes user certificate behavior for mTLS\nauthentication based on combinations of the configuration options\ndescribed above. +--------------------+--------------+------------------+--------------+\n| config.toml | User does | User has a | User has a |\n| settings | not have a | correct and | revoked |\n| | certificate | valid | certificate |\n| | | certificate | |\n+====================+==============+==================+==============+\n| ssl_client_verify | User certs | User certs are | User revoked |\n| _ mode='CERT_NONE' | are ignored | ignored | certs are |\n| | | | ignored |\n+--------------------+--------------+------------------+--------------+\n| ssl_ | User certs | User certs are | User revoked |\n| client_verify_mod | are ignored | set to | certs are |\n| e ='CERT_OPTIONAL' | | Driverless AI | not |\n| | | but are not used | validated |\n| | | for validating | |\n| | | the certs | |\n+--------------------+--------------+------------------+--------------+\n| ssl_ | Not allowed | User provides a | User revoke |\n| client_verify_mod | | valid | lists are |\n| e ='CERT_REQUIRED' | | certificate used | not |\n| | | by Driverless AI | validated |\n| | | but does not | |\n| | | authenticate the | |\n| | | user | |\n+--------------------+--------------+------------------+--------------+\n| sl_ | Not allowed | User provides a | User revoked |\n| client_verify_mod | | valid | certs are |\n| e ='CERT_REQUIRED' | | certificate.", + "prompt_type": "plain" + }, + { + "output": "| |\n+--------------------+--------------+------------------+--------------+\nEnabling mTLS Authentication\nDocker Image Installs\nTo enable mTLS authentication in Docker images, specify the\nauthentication environment variable that you want to use. Each variable\nmust be prepended with DRIVERLESS_AI. nvidia-docker run \\\n --pid=host \\\n --init \\\n --rm \\\n --shm-size=256m \\\n -p 12345:12345 \\\n -u `id -u`:`id -g` \\\n -e DRIVERLESS_AI_ENABLE_HTTPS=true \\\n -e DRIVERLESS_AI_SSL_KEY_FILE=/etc/dai/private_key.pem \\\n -e DRIVERLESS_AI_SSL_CRT_FILE=/etc/dai/cert.pem \\\n -e DRIVERLESS_AI_AUTHENTICATION_METHOD=tls_certificate \\\n -e DRIVERLESS_AI_SSL_CLIENT_VERIFY_MODE=CERT_REQUIRED \\\n -e DRIVERLESS_AI_SSL_CA_FILE=/etc/dai/rootCA.pem \\\n -e DRIVERLESS_AI_SSL_CLIENT_KEY_FILE=/etc/dai/client_config_key.key \\\n -e DRIVERLESS_AI_SSL_CLIENT_CRT_FILE=/etc/dai/client_config_cert.pem \\\n -v /user/log:/log \\\n -v /user/tmp:/tmp \\\n -v /user/certificates/server_config_key.pem:/etc/dai/private_key.pem \\\n -v /user/certificates/server_config_cert.pem:/etc/dai/cert.pem \\\n -v /user/certificates/client_config_cert.pem:/etc/dai/client_config_cert.pem \\\n -v /user/certificates/client_config_key.key:/etc/dai/client_config_key.key \\\n -v /user/certificates/rootCA.pem:/etc/dai/rootCA.pem \\\n h2oai/dai-ubi8-x86_64:|tag|\nNote: When certificate verification is required, use the Docker\nparameter --hostname to ensure that the certificate hostname is\nresolvable from within the Docker container to the container's IP\naddress.", + "prompt_type": "plain" + }, + { + "output": "Checkpointing, Rerunning, and Retraining Experiments\nThe upper-right corner of the Driverless AI UI includes an Experiments\nlink. []\nClick this link to open the Experiments page. From this page, you can\nrename an experiment, view previous experiments, begin a new experiment,\nrerun an experiment, and delete an experiment. []\nCheckpointing, Rerunning, and Retraining\nIn Driverless AI, you can retry an experiment from the last checkpoint,\nyou can run a new experiment using an existing experiment's settings,\nand you can retrain an experiment's final pipeline. []\nCheckpointing Experiments\nIn real-world scenarios, data can change. For example, you may have a\nmodel currently in production that was built using 1 million records. At\na later date, you may receive several hundred thousand more records. Rather than building a new model from scratch, Driverless AI includes\nH2O.ai Brain, which enables caching and smart re-use of prior models to\ngenerate features for new models. You can configure one of the following Brain levels in the experiment's\nexpert-settings.", + "prompt_type": "plain" + }, + { + "output": "(default)\n- 3: Smart checkpoint like level #1, but for the entire population. Tune only if the brain population is of insufficient size. - 4: Smart checkpoint like level #2, but for the entire population. Tune only if the brain population is of insufficient size. - 5: Smart checkpoint like level #4, but will scan over the entire\n brain cache of populations (starting from resumed experiment if\n chosen) in order to get the best scored individuals. If you chooses Level 2 (default), then Level 1 is also done when\nappropriate. To make use of smart checkpointing, be sure that the new data has:\n- The same data column names as the old experiment\n- The same data types for each column as the old experiment. (This\n won't match if, e.g,. a column was all int and then had one string\n row.) - The same target as the old experiment\n- The same target classes (if classification) as the old experiment\n- For time series, all choices for intervals and gaps must be the same\nWhen the above conditions are met, then you can:\n- Start the same kind of experiment, just rerun for longer.", + "prompt_type": "plain" + }, + { + "output": "fewer or more rows). - Effectively do a final ensemble re-fit by varying the data rows and\n starting an experiment with a new accuracy, time=1, and\n interpretability. Check the experiment preview for what the ensemble\n will be. - Restart/Resume a cancelled, aborted, or completed experiment\nTo run smart checkpointing on an existing experiment, click the right\nside of the experiment that you want to retry, then select New /\nContinue -> From Last Checkpoint. The experiment settings page opens. Specify the new dataset. If desired, you can also change experiment\nsettings, though the target column must be the same. Click Launch\nExperiment to resume the experiment from the last checkpoint and build a\nnew experiment. The smart checkpointing continues by adding a prior model as another\nmodel used during tuning. If that prior model is better (which is likely\nif it was run for more iterations), then that smart checkpoint model\nwill be used during feature evolution iterations and final ensemble.", + "prompt_type": "plain" + }, + { + "output": "- The directory where the H2O.ai Brain meta model files are stored is\n tmp/H2O.ai_brain. In addition, the default maximum brain size is\n 20GB. Both the directory and the maximum size can be changed in the\n config.toml file. Rerunning Experiments\nTo run a new experiment using an existing experiment's settings, click\nthe right side of the experiment that you want to use as the basis for\nthe new experiment, then select New Experiment with Same Settings. This\nopens the experiment settings page. From this page, you can rerun the\nexperiment using the original settings, or you can specify to use new\ndata and/or specify different experiment settings. Click Launch\nExperiment to create a new experiment with the same options. Retrain / Refit\nTo retrain an experiment's final pipeline, click on the group of square\nicons next to the experiment that you want to use as the basis for the\nnew experiment and click Retrain / Refit, then select From Final\nCheckpoint. This opens the experiment settings page with the same\nsettings as the original experiment except that Time is set to 0.", + "prompt_type": "plain" + }, + { + "output": "This may include the addition of\nnew features, the exclusion of previously used features, a change in the\nhyperparameter search space, or finding new parameters for the existing\nmodel architecture. To retrain the final pipeline without adding new features, select the\nFrom Best Models option, which overrides the following config.toml\noptions:\n refit_same_best_individual=True\n brain_add_features_for_new_columns=False\n feature_brain_reset_score=\"off\"\n force_model_restart_to_defaults=False\nFor more information, refer to the feature_brain_level setting in the\nconfig.toml file. Note\nFor information on the equivalent Python client calls\nfor Retrain / Refit options, refer to the following list. - New / Continue - With Same Settings:\n retrain(...)\n- New / Continue - From Last Checkpoint:\n retrain(..., use_smart_checkpoint=True)\n- Retrain / Refit - From Final Checkpoint\n retrain(..., final_pipeline_only=True)\n- Retrain / Refit - From Best Models (1.10.1 client)\n retrain(..., final_models_only=True)\n\"Pausing\" an Experiment\nA trick for \"pausing\" an experiment is to:\n1.", + "prompt_type": "plain" + }, + { + "output": "Simple Configurations\nBelow is a list of some simple configurations that can be run with\ncopy/paste config.toml settings in Driverless AI GUI. Get a quick Final Model: no Genetic Algorithm no Ensembling\nThese settings can be copy pasted in the Toml editor in the Expert\nSettings. The experiment preview can be checked to make sure the changes\nhave taken effect. The Toml editor of a completed experiment will also\nlist them at the end of the experiment. Toml editor\n enable_genetic_algorithm = \"off\"\n fixed_ensemble_level = 0\nUse Original Features With Genetic Algorithm\nThis example does no transformations on numeric features and only a\nsingle simple encoding on categorical features, i.e. no interactions,\ntarget-encoding, dates, text, etc. It only does model selection and\ntuning via GA. The examples can be copy pasted in the Toml editor in the Expert\nSettings. The experiment preview gets modified and can be inspected to\nconfirm the changes have taken effect. 1) The example applies only identity or\n original transformation on numeric columns and\n Frequent Transformer on integer and categorical\n columns, i.e it does not do feature engineering or feature\n interactions (consider mutation_mode = \"full\" if set interaction\n depth >1).", + "prompt_type": "plain" + }, + { + "output": "Toml editor\n included_transformers = [\"OriginalTransformer\",\"OneHotEncodingTransformer\"]\n max_feature_interaction_depth = 1\n no_drop_features = true\nBuild models with your choice of algorithm and parameters\nThese settings can be copy pasted in the\nAdd to config.toml via toml string under the Expert Experiment settings\nof an experiment. Always check the Driverless preview to make sure the\nchanges have taken effect before launching the experiment. The Scores\ntab can be used to inspect the built model. 1) This example builds a single GBM model with 2 folds cross\n validation and user provided parameters with no genetic algorithm. Add to config.toml via toml string\n \"\" included_models = ['XGBOOSTGBM']\\n\n params_xgboost = \"{'max_depth': 2, 'max_leaves': 4, 'n_estimators': 50, 'learning_rate': 0.03}\"\\n\n fixed_num_folds = 2 \\n\n feature_brain_level = 0 \\n \n enable_genetic_algorithm = \"off\" \\n\n \"\"\n 2) This example builds a single TensorFlow model on original numeric\n features with user defined parameters.", + "prompt_type": "plain" + }, + { + "output": "The model\n is evaluated with a 4 fold cross validation scheme. Mojo creation,\n pipeline visualization and genetic algorithm is turned off. Experiment logs can be viewed to verify the parameter used by the\n TensorFlow model. Add to config.toml via toml string\n \"\" included_models = [\"TensorFlowModel\"] \\n\n included_transformers = [\"OriginalTransformer\"] \\n\n fixed_ensemble_level = 1 \\n\n fixed_num_folds = 4 \\n\n params_tensorflow = \"{'batch_size': 4096, 'epochs': 100, 'hidden': [1000, 1000]}\" \\n\n target_transformer = \"identity_noclip\" \\n\n make_mojo_scoring_pipeline = \"off\" \\n\n make_pipeline_visualization = \"off\" \\n\n enable_genetic_algorithm = \"off\" \\n\n \"\"\n 3) This example builds LightGBM models. During genetic algorithm, it\n does feature engineering and will do model tuning by toggling\n other params not set by the user.The Scores tab can be used to\n inspect the built models.", + "prompt_type": "plain" + }, + { + "output": "Driverless AI Standalone Python Scoring Pipeline\nA standalone Python scoring pipeline is available after successfully\ncompleting an experiment. This package contains an exported model and\nPython 3.8 source code examples for productionizing models built using\nH2O Driverless AI. The files in this package let you transform and score on new data in\nseveral different ways:\n- From Python 3.8, you can import a scoring module and use it to\n transform and score on new data. - From other languages and platforms, you can use the TCP/HTTP scoring\n service bundled with this package to call into the scoring pipeline\n module through remote procedure calls (RPC). For more information on the Python Scoring Pipeline, refer to the\nfollowing sections:\n- python-scoring-before\n- python-scoring-files\n- python-scoring-quick-start\n- python-scoring-module\n- python-scoring-service\n- python-scoring-shapley\n- python-scoring-faq\n- python-scoring-troubleshooting\nBefore You Begin\nRefer to the following notes for important information regarding the\nPython Scoring Pipeline.", + "prompt_type": "plain" + }, + { + "output": "For more information, see\ncuda-opencl-cudnn. Note\nThe downloaded scorer zip file contains a shell script called\nrun_example.sh, which is used to set up a virtual environment and run an\nexample Python script. If you use the pip-virtualenv mode for the\nrun_example.sh shell script, refer to the following examples to install\nprerequisites for Python scoring:\nDocker\nTo install the necessary prerequisites and activate a virtual\nenvironment using the run_example.sh shell script with Docker, refer to\nthe following examples:\nUbuntu 18.04 or later\n # replace with your license key\ndocker run -ti --entrypoint=bash --runtime nvidia -e\nDRIVERLESS_AI_LICENSE_KEY= -v /home/$USER/scorers:/scorers\ndocker.io/nvidia/cuda:11.2.2-base-ubuntu18.04 apt-get update apt-get\ninstall python3.8 virtualenv unzip git -y apt-get install libgomp1\nlibopenblas-base ocl-icd-libopencl1 -y # required at runtime apt install\nbuild-essential libssl-dev libffi-dev python3-dev python3.8-dev -y # to\ncompile some packages apt install language-pack-en -y # for proper\nencoding support apt-get install libopenblas-dev -y # for runtime mkdir\n-p /etc/OpenCL/vendors && echo \"libnvidia-opencl.so.1\" >\n/etc/OpenCL/vendors/nvidia.icd export LANG=\"en_US.UTF-8\" export\nLC_ALL=\"en_US.UTF-8\" unzip /scorers/scorer.zip cd scoring-pipeline # if\ndon't need h2o-3 recipe server, then add dai_enable_h2o_recipes=0 before\nbash below bash run_example.sh\nRed Hat Enterprise Linux (Red Hat Universal Base Image 8 without GPUs)\n docker run -ti --entrypoint=bash -v /home/$USER/scorers:/scorers registry.access.redhat.com/ubi8/ubi:8.4\n dnf -y install python38 unzip virtualenv openblas libgomp\n unzip /scorers/scorer.zip\n cd scoring-pipeline\n bash run_example.sh\nCentOS 8\n docker run -ti --entrypoint=bash -v /home/$USER/Downloads/scorers:/scorers centos:8\n dnf -y install python38 unzip virtualenv openblas libgomp procps\n unzip /scorers/scorer.zip\n cd scoring-pipeline\n bash run_example.sh\nUbuntu 16.04\nTo install the necessary prerequisites and activate a virtual\nenvironment using the run_example.sh shell script on Ubuntu 16.04, run\nthe following commands:\n sudo apt-get update\n sudo apt-get install software-properties-common # Ubuntu 16.04 only\n sudo add-apt-repository ppa:deadsnakes/ppa # Ubuntu 16.04 only\n sudo apt-get update\n sudo apt-get install python3.8 virtualenv unzip -y\n sudo apt-get install libgomp1 libopenblas-base ocl-icd-libopencl1 -y # required at runtime\n unzip scorer.zip\n cd scoring-pipeline\n bash run_example.sh\nIf you need to be able to compile, also run the following command:\n sudo apt install build-essential libssl-dev libffi-dev python3-dev -y\nTo run a scoring job using the example.py file after the virtual\nenvironment has been activated, run the following command:\n export DRIVERLESS_AI_LICENSE_FILE=\"/path/to/license.sig\"\n python example.py\nUbuntu 18.04 or later\nTo install the necessary prerequisites and activate a virtual\nenvironment using the run_example.sh shell script on Ubuntu 18.04 or\nlater, run the following commands:\n sudo apt-get update\n sudo apt-get install python3.8 virtualenv unzip -y\n sudo apt-get install libgomp1 libopenblas-base ocl-icd-libopencl1 -y # required at runtime\n unzip scorer.zip\n cd scoring-pipeline\n bash run_example.sh\nIf you need to be able to compile, also run the following command:\n sudo apt install build-essential libssl-dev libffi-dev python3-dev -y\nTo run a scoring job using the example.py file after the virtual\nenvironment has been activated, run the following command:\n export DRIVERLESS_AI_LICENSE_FILE=\"/path/to/license.sig\"\n python example.py\nRHEL 8\nTo install the necessary prerequisites and activate a virtual\nenvironment using the run_example.sh shell script on Red Hat Enterprise\nLinux 8, run the following command:\n dnf -y install python38 unzip virtualenv openblas libgomp\n unzip /rpms/scorer.zip\n cd scoring-pipeline\n bash run_example.sh\nCentOS 8\nTo install the necessary prerequisites and activate a virtual\nenvironment using the run_example.sh shell script on CentOS 8, run the\nfollowing command:\n dnf -y install python38 unzip virtualenv openblas libgomp procps\n unzip /rpms/scorer.zip\n cd scoring-pipeline\n bash run_example.sh\nNote\nCustom Recipes and the Python Scoring Pipeline\nBy default, if a custom recipe has been uploaded into Driverless AI and\nis subsequently not used in the experiment, the Python Scoring Pipeline\nstill contains the H2O recipe server.", + "prompt_type": "plain" + }, + { + "output": "In addition, Java has to be installed in the container,\nwhich further increases the runtime storage and memory requirements. A\nworkaround is to set the following environment variable before running\nthe Python Scoring Pipeline:\n export dai_enable_custom_recipes=0\nCUDA, OpenCL, and cuDNN Install Instructions\nRefer to the following sections for instructions on installing CUDA,\nOpenCL, and cuDNN when using the virtualenv or pip run methods of Python\nscoring. Installing CUDA with NVIDIA Drivers\nBefore installing CUDA, make sure you have already installed wget, gcc,\nmake, and elfutils-libelf-devel:\n sudo yum -y install wget\n sudo yum -y install gcc\n sudo yum -y install make\n sudo yum -y install elfutils-libelf-devel\nNext, visit\nhttps://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.html\nfor instructions on installing CUDA. It is recommended that you use the\nrunfile method of installation. If prompted to select what tools you would like to install, select\nDrivers only.", + "prompt_type": "plain" + }, + { + "output": "sudo yum -y clean all\n sudo yum -y makecache\n sudo yum -y update\n wget http://dl.fedoraproject.org/pub/epel/7/x86_64/Packages/c/clinfo-2.1.17.02.09-1.el7.x86_64.rpm\n wget http://dl.fedoraproject.org/pub/epel/7/x86_64/Packages/o/ocl-icd-2.2.12-1.el7.x86_64.rpm\n sudo rpm -if ocl-icd-2.2.12-1.el7.x86_64.rpm\n sudo rpm -if clinfo-2.1.17.02.09-1.el7.x86_64.rpm\n clinfo\n mkdir -p /etc/OpenCL/vendors && \\\n echo \"libnvidia-opencl.so.1\" > /etc/OpenCL/vendors/nvidia.icd\nInstalling cuDNN\nFor information on installing cuDNN on Linux, refer to\nhttps://docs.nvidia.com/deeplearning/cudnn/install-guide/index.html. Note\ncuDNN 8 or later is required. Python Scoring Pipeline Files\nThe scoring-pipeline folder includes the following notable files:\n- example.py: An example Python script demonstrating how to import and\n score new records. - run_example.sh: Runs example.py (also sets up a virtualenv with\n prerequisite libraries). For more information, refer to the second\n note in the python-scoring-before section.", + "prompt_type": "plain" + }, + { + "output": "- http_server.py: A standalone HTTP server for hosting scoring\n services. - run_tcp_server.sh: Runs TCP scoring service (runs tcp_server.py). - run_http_server.sh: Runs HTTP scoring service (runs http_server.py). - example_client.py: An example Python script demonstrating how to\n communicate with the scoring server. - run_tcp_client.sh: Demonstrates how to communicate with the scoring\n service via TCP (runs example_client.py). - run_http_client.sh: Demonstrates how to communicate with the scoring\n service via HTTP (using curl). Quick Start\nThere are two methods for starting the Python Scoring Pipeline. Quick Start - Recommended Method\nThis is the recommended method for running the Python Scoring Pipeline. Use this method if:\n- You have an air gapped environment with no access to the Internet. - You want to use a quick start approach. Prerequisites\n- A valid Driverless AI license key. - A completed Driverless AI experiment. - Downloaded Python Scoring Pipeline.", + "prompt_type": "plain" + }, + { + "output": "1. Download the TAR SH version of Driverless AI from\n https://www.h2o.ai/download/. 2. Use bash to execute the download. This creates a new\n dai- folder, where represents your\n version of Driverless AI, for example, 1.7.1-linux-x86_64.) 3. Change directories into the new Driverless AI folder. (Replace\n below with your the version that was created in Step\n 2.) 4. Run the following to change permissions:\n5. Run the following to install the Python Scoring Pipeline for your\n completed Driverless AI experiment:\n6. Run the following command from the scoring-pipeline directory:\nQuick Start - Alternative Method\nThis section describes an alternative method for running the Python\nScoring Pipeline. This version requires Internet access. Note\nIf you use a scorer from a version prior to 1.10.4.1, you need to add\nexport SKLEARN_ALLOW_DEPRECATED_SKLEARN_PACKAGE_INSTALL=True prior to\ncreating the new scorer python environment, either in run_example.sh or\nin the same terminal where the shell scripts are executed.", + "prompt_type": "plain" + }, + { + "output": "Prerequisites\n- The scoring module and scoring service are supported only on Linux\n with Python 3.8 and OpenBLAS. - The scoring module and scoring service download additional packages\n at install time and require Internet access. Depending on your\n network environment, you might need to set up internet access via a\n proxy. - Valid Driverless AI license. Driverless AI requires a license to be\n specified in order to run the Python Scoring Pipeline. - Apache Thrift (to run the scoring service in TCP mode)\n- Linux environment\n- Python 3.8\n- libopenblas-dev (required for H2O4GPU)\n- OpenCL\nFor info on how to install these prerequisites, refer to the following\nexamples. Installing Python 3.8 and OpenBLAS on Ubuntu 16.10 or Later:\n sudo apt install python3.8 python3.8-dev python3-pip python3-dev \\\n python-virtualenv python3-virtualenv libopenblas-dev\nInstalling Python 3.8 and OpenBLAS on Ubuntu 16.04:\n sudo add-apt-repository ppa:deadsnakes/ppa\n sudo apt-get update\n sudo apt-get install python3.8 python3.8-dev python3-pip python3-dev \\\n python-virtualenv python3-virtualenv libopenblas-dev\nInstalling Conda 3.6:\n You can install Conda using either Anaconda or Miniconda.", + "prompt_type": "plain" + }, + { + "output": "DRIVERLESS_AI_LICENSE_KEYwould be similar. **Installing the Thrift Compiler** Thrift is required to run the scoring service in TCP mode, but it is not required to run the scoring module. The following steps are available on the Thrift documentation site at: https://thrift.apache.org/docs/BuildingFromSource. :: sudo apt-get install automake bison flex g++ git libevent-dev \\ libssl-dev libtool make pkg-config libboost-all-dev ant wget https://github.com/apache/thrift/archive/0.10.0.tar.gz tar -xvf 0.10.0.tar.gz cd thrift-0.10.0 ./bootstrap.sh ./configure make sudo make install Run the following to refresh the runtime shared after installing Thrift: :: sudo ldconfig /usr/local/lib Running the Python Scoring Pipeline - Alternative Method ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 1. On the completed Experiment page, click on the **Download Python Scoring Pipeline** button to download the **scorer.zip** file for this experiment onto your local machine.", + "prompt_type": "plain" + }, + { + "output": "Extract the scoring pipeline. You can run the scoring module and the scoring service after downloading and extracting the pipeline. **Score from a Python Program** If you intend to score from a Python program, run the scoring module example. (Requires Linux and Python 3.8.) :: export DRIVERLESS_AI_LICENSE_FILE=\"/path/to/license.sig\" bash run_example.sh **Score Using a Web Service** If you intend to score using a web service, run the HTTP scoring server example. (Requires Linux x86_64 and Python 3.8.) :: export DRIVERLESS_AI_LICENSE_FILE=\"/path/to/license.sig\" bash run_http_server.sh bash run_http_client.sh **Score Using a Thrift Service** If you intend to score using a Thrift service, run the TCP scoring server example. (Requires Linux x86_64, Python 3.8 and Thrift.) :: export DRIVERLESS_AI_LICENSE_FILE=\"/path/to/license.sig\" bash run_tcp_server.sh bash run_tcp_client.sh **Note**: By default, therun*.shscripts mentioned above create a virtual environment using virtualenv and pip, within which the Python code is executed.", + "prompt_type": "plain" + }, + { + "output": "The package manager to use is provided as an argument to the script. :: # to use conda package manager export DRIVERLESS_AI_LICENSE_FILE=\"/path/to/license.sig\" bash run_example.sh --pm conda # to use pip package manager export DRIVERLESS_AI_LICENSE_FILE=\"/path/to/license.sig\" bash run_example.sh --pm pip If you experience errors while running any of the above scripts, check to make sure your system has a properly installed and configured Python 3.8 installation. Refer to the `Troubleshooting Python Environment Issues <#troubleshooting-python-environment-issues>`__ section that follows to see how to set up and test the scoring module using a cleanroom Ubuntu 16.04 virtual machine. .. _python-scoring-module: The Python Scoring Module ------------------------- The scoring module is a Python module bundled into a standalone wheel file (name `scoring <>`__\\ \\*.whl). All the prerequisites for the scoring module to work correctly are listed in the requirements.txt file.", + "prompt_type": "plain" + }, + { + "output": "from scoring_487931_20170921174120_b4066 import Scorer scorer = Scorer() # Create instance. score = scorer.score([ # Call score() 7.416, # sepal_len 3.562, # sepal_wid 1.049, # petal_len 2.388, # petal_wid ]) The scorer instance provides the following methods (and more): - score(list): Score one row (list of values). - score_batch(df): Score a Pandas dataframe. - fit_transform_batch(df): Transform a Pandas dataframe. - get_target_labels(): Get target column labels (for classification problems). The process of importing and using the scoring module is demonstrated by the bash scriptrun_example.sh, which effectively performs the following steps: :: # See 'run_example.sh' for complete example. virtualenv -p python3.8 env source env/bin/activate pip install --use-deprecated=legacy-resolver -r requirements.txt export DRIVERLESS_AI_LICENSE_FILE=\"/path/to/license.sig\" python example.py .. _python-scoring-service: The Scoring Service ------------------- The scoring service hosts the scoring module as an HTTP or TCP service.", + "prompt_type": "plain" + }, + { + "output": "In effect, this mechanism lets you invoke scoring functions from languages other than Python on the same computer or from another computer on a shared network or on the Internet. The scoring service can be started in two ways: - In TCP mode, the scoring service provides high-performance RPC calls via Apache Thrift (https://thrift.apache.org/) using a binary wire protocol. - In HTTP mode, the scoring service provides JSON-RPC 2.0 calls served by Tornado (http://www.tornadoweb.org). Scoring operations can be performed on individual rows (row-by-row) or in batch mode (multiple rows at a time). Scoring Service - TCP Mode (Thrift) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The TCP mode lets you use the scoring service from any language supported by Thrift, including C, C++, C#, Cocoa, D, Dart, Delphi, Go, Haxe, Java, Node.js, Lua, perl, PHP, Python, Ruby and Smalltalk. To start the scoring service in TCP mode, you will need to generate the Thrift bindings once, then run the server: :: # See 'run_tcp_server.sh' for complete example.", + "prompt_type": "plain" + }, + { + "output": "It is not a run time dependency, i.e. once the scoring services are built and tested, you do not need to repeat this installation process on the machines where the scoring services are intended to be deployed. To call the scoring service, generate the Thrift bindings for your language of choice, then make RPC calls via TCP sockets using Thrift's buffered transport in conjunction with its binary protocol. :: # See 'run_tcp_client.sh' for complete example. thrift --gen py scoring.thrift # See 'example_client.py' for complete example. socket = TSocket.TSocket('localhost', 9090) transport = TTransport.TBufferedTransport(socket) protocol = TBinaryProtocol.TBinaryProtocol(transport) client = ScoringService.Client(protocol) transport.open() row = Row() row.sepalLen = 7.416 # sepal_len row.sepalWid = 3.562 # sepal_wid row.petalLen = 1.049 # petal_len row.petalWid = 2.388 # petal_wid scores = client.score(row) transport.close() You can reproduce the exact same result from other languages, e.g.", + "prompt_type": "plain" + }, + { + "output": "This is usually less performant compared to Thrift, but has the advantage of being usable from any HTTP client library in your language of choice, without any dependency on Thrift. For JSON-RPC documentation, see http://www.jsonrpc.org/specification. To start the scoring service in HTTP mode: :: # See 'run_http_server.sh' for complete example. export DRIVERLESS_AI_LICENSE_FILE=\"/path/to/license.sig\" python http_server.py --port=9090 To invoke scoring methods, compose a JSON-RPC message and make a HTTP POST request to `http://host:port/rpc `__ as follows: :: # See 'run_http_client.sh' for complete example. curl http://localhost:9090/rpc \\ --header \"Content-Type: application/json\" \\ --data @- <` for **transformed features** and **original features** are **available** for XGBoost (GBM, GLM, RF, DART), LightGBM, Zero-Inflated, Imbalanced and DecisionTree models (and their ensemble). For ensemble with ExtraTrees meta learner (ensemble_meta_learner='extra_trees') models we suggest to use the Python scoring packages.", + "prompt_type": "plain" + }, + { + "output": "- The :ref:`Shapley fast approximation ` uses only one model (from the first fold) with no more than the first 50 trees. For details seefast_approx_num_treesandfast_approx_do_one_fold_one_model:ref:`config.toml settings `. .. _python-scoring-faq: Frequently asked questions -------------------------- **I'm getting GCC compile errors on Red Hat / CentOS when not using tar and**SCORING_PIPELINE_INSTALL_DEPENDENCIES\n=\n0. **How do I fix this? ** To fix this issue, run the following command: :: sudo yum -y install gcc **Why am I getting a \"TensorFlow is disabled\" message when I run the Python Scoring Pipeline? ** If you ran an experiment when TensorFlow was enabled and then attempt to run the Python Scoring Pipeline, you may receive a message similar to the following: :: TensorFlow is disabled. To enable, export DRIVERLESS_AI_ENABLE_TENSORFLOW=1 or set enable_tensorflow=true in config.toml. To successfully run the Python Scoring Pipeline, you must enable theDRIVERLESS_AI_ENABLE_TENSORFLOW``\nflag.", + "prompt_type": "plain" + }, + { + "output": "Using a Custom Transformer\nDriverless AI supports a number of feature transformers as described in\ntransformations. This example shows how you can include a custom\ntransformer in your experiment. Specifically, this example will show how\nto add the ExpandingMean transformer. 1. Start an experiment in Driverless AI by selecting your training\n dataset along with (optionally) validation and testing datasets and\n then specifying a Target Column. Notice the list of transformers\n that will be used in the Feature engineering search space (where\n applicable) section of the experiment summary. Driverless AI\n determines this list based on the dataset and experiment. 2. Click on Expert Settings. 3. Specify the custom recipe using one of the following methods:\n4. Navigate to the Expert Settings > Recipes tab and click the Include\n Specific Transformers button. Notice that all transformers are\n selected by default, including the new ExpandingMean transformer\n (bottom of page).", + "prompt_type": "plain" + }, + { + "output": "Google Cloud Storage Setup\nDriverless AI lets you explore Google Cloud Storage data sources from\nwithin the Driverless AI application. This section provides instructions\nfor configuring Driverless AI to work with Google Cloud Storage. This\nsetup requires you to enable authentication. If you enable GCS or GBP\nconnectors, those file systems will be available in the UI, but you will\nnot be able to use those connectors without authentication. In order to enable the GCS data connector with authentication, you must:\n1. Obtain a JSON authentication file from GCP. 2. Mount the JSON file to the Docker instance. 3. Specify the path to the /json_auth_file.json in the\n gcs_path_to_service_account_json config option. Notes:\n- The account JSON includes authentications as provided by the system\n administrator. You can be provided a JSON file that contains both\n Google Cloud Storage and Google BigQuery authentications, just one\n or the other, or none at all. - Depending on your Docker install version, use either the\n docker run --runtime=nvidia (>= Docker 19.03) or nvidia-docker (<\n Docker 19.03) command when starting the Driverless AI Docker image.", + "prompt_type": "plain" + }, + { + "output": "Description of Configuration Attributes\n- gcs_path_to_service_account_json: Specifies the path to the\n /json_auth_file.json file. - gcs_init_path: Specifies the starting GCS path displayed in the UI\n of the GCS browser. Start GCS with Authentication\nDocker Image Installs\nThis example enables the GCS data connector with authentication by\npassing the JSON authentication file. This assumes that the JSON file\ncontains Google Cloud Storage authentications. nvidia-docker run \\\n --pid=host \\\n --init \\\n --rm \\\n --shm-size=256m \\\n -e DRIVERLESS_AI_ENABLED_FILE_SYSTEMS=\"file,gcs\" \\\n -e DRIVERLESS_AI_GCS_PATH_TO_SERVICE_ACCOUNT_JSON=\"/service_account_json.json\" \\\n -u `id -u`:`id -g` \\\n -p 12345:12345 \\\n -v `pwd`/data:/data \\\n -v `pwd`/log:/log \\\n -v `pwd`/license:/license \\\n -v `pwd`/tmp:/tmp \\\n -v `pwd`/service_account_json.json:/service_account_json.json \\\n h2oai/dai-ubi8-x86_64:|tag|\nDocker Image with the config.toml\nThis example shows how to configure the GCS data connector options in\nthe config.toml file, and then specify that file when starting\nDriverless AI in Docker.", + "prompt_type": "plain" + }, + { + "output": "Monitoring Pending Jobs\nDriverless AI features a Pending Jobs panel that lets you monitor the\nprogress of various long-running jobs that can be started from the\ncompleted_experiment page. To view this panel, click the group of square\nicons located in the upper-right corner. The following jobs are monitored in this panel:\n- Create AutoDoc\n- Create MOJO Scoring Pipeline\n- Create Python Scoring Pipeline\n- Create Test Set Predictions\n- Create Training Predictions\n- Score Model\n- Transform Data\nThe circular icon next to the description of a pending job indicates its\nstatus:\n+---------+------------+\n| Icon | Status |\n+=========+============+\n| [logo] | Complete |\n+---------+------------+\n| [logo2] | Failed |\n+---------+------------+\n| | Running |\n+---------+------------+\nNavigate to a completed job by clicking the Open icon. You can also\nclear a completed job from the panel by clicking Remove or cancel an\nongoing job by clicking Abort. Note: Certain jobs cannot be cancelled.", + "prompt_type": "plain" + }, + { + "output": "BlueData DataTap Setup\n\nThis section provides instructions for configuring Driverless AI to work\nwith BlueData DataTap.\n\nNote: Depending on your Docker install version, use either the", + "prompt_type": "plain" + }, + { + "output": "docker run --runtime=nvidia(>= Docker 19.03) ornvidia-docker(< Docker 19.03) command when starting the Driverless AI Docker image. Usedocker versionto check which version of Docker you are using. Description of Configuration Attributes --------------------------------------- -dtap_auth_type: Selects DTAP authentication. Available values are: -noauth: No authentication needed -principal: Authenticate with DataTap with a principal user -keytab: Authenticate with a Key tab (recommended). If running Driverless AI as a service, then the Kerberos keytab needs to be owned by the Driverless AI user. -keytabimpersonation: Login with impersonation using a keytab -dtap_config_path: The location of the DTAP (HDFS) config folder path. This folder can contain multiple config files. **Note**: The DTAP config file core-site.xml needs to contain DTap FS configuration, for example: :: fs.dtap.impl com.bluedata.hadoop.bdfs.Bdfs The FileSystem for BlueData dtap: URIs. -dtap_key_tab_path: The path of the principal key tab file.", + "prompt_type": "plain" + }, + { + "output": "-dtap_app_principal_user: The Kerberos app principal user (recommended). -dtap_app_login_user: The user ID of the current user (for example, user@realm). -dtap_app_jvm_args: JVM args for DTap distributions. Separate each argument with spaces. -dtap_app_classpath: The DTap classpath. -dtap_init_path: Specifies the starting DTAP path displayed in the UI of the DTAP browser. -enabled_file_systems: The file systems you want to enable. This must be configured in order for data connectors to function properly. Example 1: Enable DataTap with No Authentication ------------------------------------------------ .. container:: tabs .. group-tab:: Docker Image Installs This example enables the DataTap data connector and disables authentication. It does not pass any configuration file; however it configures Docker DNS by passing the name and IP of the DTap name node. This lets users reference data stored in DTap directly using the name node address, for example:dtap://name.node/datasets/iris.csvordtap://name.node/datasets/.", + "prompt_type": "plain" + }, + { + "output": ".. code:: bash nvidia-docker run \\ --pid=host \\ --init \\ --rm \\ --shm-size=256m \\ --add-host name.node:172.16.2.186 \\ -e DRIVERLESS_AI_ENABLED_FILE_SYSTEMS=\"file,dtap\" \\ -e DRIVERLESS_AI_DTAP_AUTH_TYPE='noauth' \\ -p 12345:12345 \\ -v /etc/passwd:/etc/passwd \\ -v /tmp/dtmp/:/tmp \\ -v /tmp/dlog/:/log \\ -v /tmp/dlicense/:/license \\ -v /tmp/ddata/:/data \\ -u $(id -u):$(id -g) \\ h2oai/dai-ubi8-x86_64:|tag| .. container:: group-tab Docker Image with the config.toml This example shows how to configure DataTap options in the config.toml file, and then specify that file when starting Driverless AI in Docker. Note that this example enables DataTap with no authentication. 1. Configure the Driverless AI config.toml file. Set the following configuration options: .. -enabled_file_systems = \"file, upload, dtap\"2. Mount the config.toml file into the Docker container.", + "prompt_type": "plain" + }, + { + "output": "This allows users to reference data stored in DataTap directly using the name node address, for example:dtap://name.node/datasets/iris.csvordtap://name.node/datasets/. (**Note**: The trailing slash is currently required for directories.) 1. Export the Driverless AI config.toml file or add it to ~/.bashrc. For example: .. :: # DEB and RPM export DRIVERLESS_AI_CONFIG_FILE=\"/etc/dai/config.toml\" # TAR SH export DRIVERLESS_AI_CONFIG_FILE=\"/path/to/your/unpacked/dai/directory/config.toml\" 2. Specify the following configuration options in the config.toml file. .. :: # File System Support # upload : standard upload feature # dtap : Blue Data Tap file system, remember to configure the DTap section below enabled_file_systems = \"file, dtap\" 3. Save the changes when you are done, then stop/restart Driverless AI. Example 2: Enable DataTap with Keytab-Based Authentication ---------------------------------------------------------- **Notes**: - If using Kerberos Authentication, the the time on the Driverless AI server must be in sync with Kerberos server.", + "prompt_type": "plain" + }, + { + "output": "- If running Driverless AI as a service, then the Kerberos keytab needs to be owned by the Driverless AI user; otherwise Driverless AI will not be able to read/access the Keytab and will result in a fallback to simple authentication and, hence, fail. .. container:: tabs .. group-tab:: Docker Image Installs This example: - Places keytabs in the/tmp/dtmpfolder on your machine and provides the file path as described below. - Configures the environment variableDRIVERLESS_AI_DTAP_APP_PRINCIPAL_USERto reference a user for whom the keytab was created (usually in the form of user@realm). .. code:: bash nvidia-docker run \\ --pid=host \\ --init \\ --rm \\ --shm-size=256m \\ -e DRIVERLESS_AI_ENABLED_FILE_SYSTEMS=\"file,dtap\" \\ -e DRIVERLESS_AI_DTAP_AUTH_TYPE='keytab' \\ -e DRIVERLESS_AI_DTAP_KEY_TAB_PATH='tmp/<>' \\ -e DRIVERLESS_AI_DTAP_APP_PRINCIPAL_USER='<>' \\ -p 12345:12345 \\ -v /etc/passwd:/etc/passwd \\ -v /tmp/dtmp/:/tmp \\ -v /tmp/dlog/:/log \\ -v /tmp/dlicense/:/license \\ -v /tmp/ddata/:/data \\ -u $(id -u):$(id -g) \\ h2oai/dai-ubi8-x86_64:|tag| .. container:: group-tab Docker Image with the config.toml This example: - Places keytabs in the/tmp/dtmpfolder on your machine and provides the file path as described below.", + "prompt_type": "plain" + }, + { + "output": "1. Configure the Driverless AI config.toml file. Set the following configuration options: .. -enabled_file_systems = \"file, upload, dtap\"-dtap_auth_type = \"keytab\"-dtap_key_tab_path = \"/tmp/\"-dtap_app_principal_user = \"\"2. Mount the config.toml file into the Docker container. .. .. code:: bash nvidia-docker run \\ --pid=host \\ --init \\ --rm \\ --shm-size=256m \\ --add-host name.node:172.16.2.186 \\ -e DRIVERLESS_AI_CONFIG_FILE=/path/in/docker/config.toml \\ -p 12345:12345 \\ -v /local/path/to/config.toml:/path/in/docker/config.toml \\ -v /etc/passwd:/etc/passwd:ro \\ -v /etc/group:/etc/group:ro \\ -v /tmp/dtmp/:/tmp \\ -v /tmp/dlog/:/log \\ -v /tmp/dlicense/:/license \\ -v /tmp/ddata/:/data \\ -u $(id -u):$(id -g) \\ h2oai/dai-ubi8-x86_64:|tag| .. container:: group-tab Native Installs This example: - Places keytabs in the/tmp/dtmpfolder on your machine and provides the file path as described below.", + "prompt_type": "plain" + }, + { + "output": "1. Export the Driverless AI config.toml file or add it to ~/.bashrc. For example: .. :: # DEB and RPM export DRIVERLESS_AI_CONFIG_FILE=\"/etc/dai/config.toml\" # TAR SH export DRIVERLESS_AI_CONFIG_FILE=\"/path/to/your/unpacked/dai/directory/config.toml\" 2. Specify the following configuration options in the config.toml file. .. :: # File System Support # file : local file system/server file system # dtap : Blue Data Tap file system, remember to configure the DTap section below enabled_file_systems = \"file, dtap\" # Blue Data DTap connector settings are similar to HDFS connector settings. # # Specify DTap Auth Type, allowed options are: # noauth : No authentication needed # principal : Authenticate with DTab with a principal user # keytab : Authenticate with a Key tab (recommended). If running # DAI as a service, then the Kerberos keytab needs to # be owned by the DAI user.", + "prompt_type": "plain" + }, + { + "output": "Save the changes when you are done, then stop/restart Driverless AI. Example 3: Enable DataTap with Keytab-Based Impersonation --------------------------------------------------------- **Notes**: - If using Kerberos, be sure that the Driverless AI time is synched with the Kerberos server. - If running Driverless AI as a service, then the Kerberos keytab needs to be owned by the Driverless AI user. .. container:: tabs .. group-tab:: Docker Image Installs This example: - Places keytabs in the/tmp/dtmpfolder on your machine and provides the file path as described below. - Configures theDRIVERLESS_AI_DTAP_APP_PRINCIPAL_USERvariable, which references a user for whom the keytab was created (usually in the form of user@realm). - Configures theDRIVERLESS_AI_DTAP_APP_LOGIN_USERvariable, which references a user who is being impersonated (usually in the form of user@realm). .. code:: bash # Docker instructions nvidia-docker run \\ --pid=host \\ --init \\ --rm \\ --shm-size=256m \\ -e DRIVERLESS_AI_ENABLED_FILE_SYSTEMS=\"file,dtap\" \\ -e DRIVERLESS_AI_DTAP_AUTH_TYPE='keytabimpersonation' \\ -e DRIVERLESS_AI_DTAP_KEY_TAB_PATH='tmp/<>' \\ -e DRIVERLESS_AI_DTAP_APP_PRINCIPAL_USER='<>' \\ -e DRIVERLESS_AI_DTAP_APP_LOGIN_USER='<>' \\ -p 12345:12345 \\ -v /etc/passwd:/etc/passwd \\ -v /tmp/dtmp/:/tmp \\ -v /tmp/dlog/:/log \\ -v /tmp/dlicense/:/license \\ -v /tmp/ddata/:/data \\ -u $(id -u):$(id -g) \\ h2oai/dai-ubi8-x86_64:|tag| .. container:: group-tab Docker Image with the config.toml This example: - Places keytabs in the/tmp/dtmpfolder on your machine and provides the file path as described below.", + "prompt_type": "plain" + }, + { + "output": "- Configures thedtap_app_login_uservariable, which references a user who is being impersonated (usually in the form of user@realm). 1. Configure the Driverless AI config.toml file. Set the following configuration options: .. -enabled_file_systems = \"file, upload, dtap\"-dtap_auth_type = \"keytabimpersonation\"-dtap_key_tab_path = \"/tmp/\"-dtap_app_principal_user = \"\"-dtap_app_login_user = \"\"2. Mount the config.toml file into the Docker container. .. .. code:: bash nvidia-docker run \\ --pid=host \\ --init \\ --rm \\ --shm-size=256m \\ --add-host name.node:172.16.2.186 \\ -e DRIVERLESS_AI_CONFIG_FILE=/path/in/docker/config.toml \\ -p 12345:12345 \\ -v /local/path/to/config.toml:/path/in/docker/config.toml \\ -v /etc/passwd:/etc/passwd:ro \\ -v /etc/group:/etc/group:ro \\ -v /tmp/dtmp/:/tmp \\ -v /tmp/dlog/:/log \\ -v /tmp/dlicense/:/license \\ -v /tmp/ddata/:/data \\ -u $(id -u):$(id -g) \\ h2oai/dai-ubi8-x86_64:|tag| .. container:: group-tab Native Installs This example: - Places keytabs in the/tmp/dtmpfolder on your machine and provides the file path as described below.", + "prompt_type": "plain" + }, + { + "output": "- Configures thedtap_app_login_user`` variable, which references\n a user who is being impersonated (usually in the form of\n user@realm). 1. Export the Driverless AI config.toml file or add it to ~/.bashrc. For example:\n # DEB and RPM\n export DRIVERLESS_AI_CONFIG_FILE=\"/etc/dai/config.toml\"\n # TAR SH\n export DRIVERLESS_AI_CONFIG_FILE=\"/path/to/your/unpacked/dai/directory/config.toml\" \n 2. Specify the following configuration options in the config.toml\n file. # File System Support\n # upload : standard upload feature\n # file : local file system/server file system\n # hdfs : Hadoop file system, remember to configure the HDFS config folder path and keytab below\n # dtap : Blue Data Tap file system, remember to configure the DTap section below\n # s3 : Amazon S3, optionally configure secret and access key below\n # gcs : Google Cloud Storage, remember to configure gcs_path_to_service_account_json below\n # gbq : Google Big Query, remember to configure gcs_path_to_service_account_json below\n # minio : Minio Cloud Storage, remember to configure secret and access key below\n # snow : Snowflake Data Warehouse, remember to configure Snowflake credentials below (account name, username, password)\n # kdb : KDB+ Time Series Database, remember to configure KDB credentials below (hostname and port, optionally: username, password, classpath, and jvm_args)\n # azrbs : Azure Blob Storage, remember to configure Azure credentials below (account name, account key)\n # jdbc: JDBC Connector, remember to configure JDBC below.", + "prompt_type": "plain" + }, + { + "output": "(hive_app_configs)\n # recipe_url: load custom recipe from URL\n # recipe_file: load custom recipe from local file system\n enabled_file_systems = \"file, dtap\"\n # Blue Data DTap connector settings are similar to HDFS connector settings. #\n # Specify DTap Auth Type, allowed options are:\n # noauth : No authentication needed\n # principal : Authenticate with DTab with a principal user\n # keytab : Authenticate with a Key tab (recommended). If running\n # DAI as a service, then the Kerberos keytab needs to\n # be owned by the DAI user. # keytabimpersonation : Login with impersonation using a keytab\n dtap_auth_type = \"keytabimpersonation\"\n # Path of the principal key tab file\n dtap_key_tab_path = \"/tmp/\"\n # Kerberos app principal user (recommended)\n dtap_app_principal_user = \"\"\n # Specify the user id of the current user here as user@realm\n dtap_app_login_user = \"\"\n 3.", + "prompt_type": "plain" + }, + { + "output": "Feature Count Control\nThis page describes how to control feature counts during the feature\nselection process in H2O Driverless AI (DAI). - original_feature_control\n- transformed_feature_control\n- individuals_control\n- feature_count_use_case\nOriginal Feature Control\nTo control the count of original features when creating an experiment,\nuse one of the following methods:\n- On the Experiment Setup page, click Dropped Columns to manually\n select specific columns to drop. - Use the Features to Drop Expert Setting to enter\n a list of features to drop. The list of features must be formatted\n as follows:\n- If you are unsure about which original columns are best, you can let\n DAI select the best features by setting the following configuration\n options, which use DAI's feature selection (FS) by permutation\n importance to determine which original features are beneficial to\n keep, and which features to remove if they negatively impact the\n model.", + "prompt_type": "plain" + }, + { + "output": "- max_orig_numeric_cols_selected: This option has the same\n functionality as max_orig_cols_selected, but for numeric\n columns. - max_orig_nonnumeric_cols_selected: This option has the same\n functionality as max_orig_cols_selected, but for non-numeric\n columns. - To view a report about original features without any action, set\n orig_features_fs_report = true. - In general, FS can be controlled by setting the following\n parameters:\n- If strategy is FS (for high interpretability dial) we will use FS to\n get rid of poor features that hurt the model, and this can be\n fine-tuned with the following parameters:\nTransformed Feature Control\nFor transformed features, the Experiment Setup page and expert-settings\ncontrol the genetic algorithm (GA) that decides how many features\nshould be present. In some cases, however, too few or too many features\nare made. To control the number of transformed features that are made during an\nexperiment, use the nfeatures_max and ngenes_max settings.", + "prompt_type": "plain" + }, + { + "output": "These\nsettings can be used to control the number of allowed transformers and\ntransformed features by setting a limit beyond which transformed\nfeatures or transformers are removed. (The transformed features or\ntransformers with the lowest variable importance are removed first.) In some cases, specifying nfeatures_max and ngenes_max may be sufficient\nto get a restricted model. However, the best practice when using these\nsettings is to first run an experiment without specifying any\nrestrictions, and then retrain the final pipeline with the restrictions\nenabled. You can retrain the final pipeline from the\ncompleted experiment page by clicking Tune\nExperiment > Retrain / Refit > From Final Checkpoint. For more\ninformation on retraining the final pipeline, see retrain. To force DAI to add more transformations, use the ngenes_min parameter. This can be useful if you want DAI to search more actively through all\nof the potential permutations of transformers and input features.", + "prompt_type": "plain" + }, + { + "output": "enable_genetic_algorithm='off'.) .. _individuals_control: Individuals Control ------------------- You can control the number or type of individuals that are tuned or evolved by using the following config.toml parameters: .. code:: parameter_tuning_num_models fixed_num_individuals .. _feature_count_use_case: Sample Use Case --------------- The following is a sample use case for controlling feature counts. **Example**: You want to limit the number of features used for scoring to 14. **Solution A**: - For transformed features, setnfeatures_max\n=\n14in the :ref:`Expert Settings window `. - For original features, set the following parameters: .. .. code:: max_orig_cols_selected max_orig_numeric_cols_selected max_orig_nonnumeric_cols_selected **Solution B** Without changing any parameters, let DAI complete the experiment. After the experiment is complete, inspect theensemble_features_orig`\nfiles in the :ref:`experiment_summary to see which original features\nwere not important, then decide whether to drop even more of them by\nperforming \"tune\" experiment and retrain final pipeline (You can also\nchoose to refit from best model for an even closer match to the original\nexperiment).", + "prompt_type": "plain" + }, + { + "output": "Experiment Queuing In Driverless AI\nDriverless AI supports automatic queuing of experiments to avoid system\noverload. You can launch multiple experiments simultaneously that are\nautomatically queued and run when the necessary resources become\navailable. The worker queue indicates the number of experiments that are waiting\nfor their turn on a CPU or GPU + CPU system. Significant jobs like\nrunning experiments and making predictions are distinguished from minor\ntasks. In the following image, 'GPU queue' indicates that there are two\nexperiments waiting in the worker queue on a GPU-enabled system, and not\nthat two workers are waiting for a GPU:\n[]\nNotes:\n- By default, each node runs two experiments at a time. This is\n controlled by the worker_remote_processors option in the\n config.toml file . Starting with version 1.10.4,\n Driverless AI automatically sets the maximum number of CPU cores to\n use per experiment and the maximum number of remote tasks to be\n processed at one time based on the number of CPU cores your system\n has.", + "prompt_type": "plain" + }, + { + "output": "enable_tensorflow_image``\n\nEnable Image Transformer for Processing of Image Data\n\nSpecify whether to use pretrained deep learning models for processing of\nimage data as part of the feature engineering pipeline. When this is\nenabled, a column of Uniform Resource Identifiers (URIs) to images is\nconverted to a numeric representation using ImageNet-pretrained deep\nlearning models. This is enabled by default.", + "prompt_type": "plain" + }, + { + "output": "tensorflow_image_pretrained_models-------------------------------------- .. container:: dropdown **Supported ImageNet Pretrained Architectures for Image Transformer** Specify the supported `ImageNet `__ pretrained architectures for image transformer. Select from the following: - densenet121 - efficientnetb0 - efficientnetb2 - inception_v3 - mobilenetv2 - resnet34 - resnet50 - seresnet50 - seresnext50 - xception (Selected by default) **Notes**: - If an internet connection is available, non-default models are downloaded automatically. If an internet connection is not available, non-default models must be downloaded from http://s3.amazonaws.com/artifacts.h2o.ai/releases/ai/h2o/pretrained/dai_image_models_1_10.zip and extracted intotensorflow_image_pretrained_models_dir``. - Multiple transformers can be activated at the same time to allow\n the selection of multiple options.", + "prompt_type": "plain" + }, + { + "output": "tensorflow_image_vectorization_output_dimension``\n\nDimensionality of Feature Space Created by Image Transformer\n\nSpecify the dimensionality of the feature (embedding) space created by\nImage Transformer. Select from the following:\n\n- 10\n- 25\n- 50\n- 100 (Default)\n- 200\n- 300\n\nNote: Multiple transformers can be activated at the same time to allow\nthe selection of multiple options.", + "prompt_type": "plain" + }, + { + "output": "tensorflow_image_fine_tune``\n\nEnable Fine-Tuning of the Pretrained Models Used for the Image\nTransformer\n\nSpecify whether to enable fine-tuning of the ImageNet pretrained models\nused for the Image Transformer. This is disabled by default.", + "prompt_type": "plain" + }, + { + "output": "tensorflow_image_fine_tuning_num_epochs``\n\nNumber of Epochs for Fine-Tuning Used for the Image Transformer\n\nSpecify the number of epochs for fine-tuning ImageNet pretrained models\nused for the Image Transformer. This value defaults to 2.", + "prompt_type": "plain" + }, + { + "output": "tensorflow_image_augmentations``\n\nList of Augmentations for Fine-Tuning Used for the Image Transformer\n\nSpecify the list of possible image augmentations to apply while\nfine-tuning the ImageNet pretrained models used for the Image\nTransformer. Select from the following:\n\n- Blur\n- CLAHE\n- Downscale\n- GaussNoise\n- GridDropout\n- HorizontalFlip (Default)\n- HueSaturationValue\n- ImageCompression\n- OpticalDistortion\n- RandomBrightnessContrast\n- RandomRotate90\n- ShiftScaleRotate\n- VerticalFlip\n\nNote: For more information on individual augmentations, see\nhttps://albumentations.ai/docs/.", + "prompt_type": "plain" + }, + { + "output": "tensorflow_image_batch_size``\n\nBatch Size for the Image Transformer\n\nSpecify the batch size for the Image Transformer. By default, the batch\nsize is set to -1 (selected automatically).\n\nNote: Larger architectures and batch sizes use more memory.", + "prompt_type": "plain" + }, + { + "output": "image_download_timeout``\n\nImage Download Timeout in Seconds\n\nWhen providing images through URLs, specify the maximum number of\nseconds to wait for an image to download. This value defaults to 60 sec.", + "prompt_type": "plain" + }, + { + "output": "string_col_as_image_max_missing_fraction``\n\nMaximum Allowed Fraction of Missing Values for Image Column\n\nSpecify the maximum allowed fraction of missing elements in a string\ncolumn for it to be considered as a potential image path. This value\ndefaults to 0.1.", + "prompt_type": "plain" + }, + { + "output": "string_col_as_image_min_valid_types_fraction------------------------------------------------ .. container:: dropdown **Minimum Fraction of Images That Need to Be of Valid Types for Image Column to Be Used** Specify the fraction of unique image URIs that need to have valid endings (as defined bystring_col_as_image_valid_types``) for a\n\n string column to be considered as image data. This value defaults to\n 0.8.", + "prompt_type": "plain" + }, + { + "output": "tensorflow_image_use_gpu``\n\nEnable GPU(s) for Faster Transformations With the Image Transformer\n\nSpecify whether to use any available GPUs to transform images into\nembeddings with the Image Transformer. Enabling this setting can lead to\nsignificantly faster transformation speeds. This is enabled by default.\n\nNote: This setting only applies when scoring inside Driverless AI or\nwith Py Scoring.", + "prompt_type": "plain" + }, + { + "output": "This section provides instructions for upgrading Driverless AI versions\nthat were installed in a Docker container. These steps ensure that\nexisting experiments are saved. WARNING: Experiments, MLIs, and MOJOs reside in the Driverless AI tmp\ndirectory and are not automatically upgraded when Driverless AI is\nupgraded. - Build MLI models before upgrading. - Build MOJO pipelines before upgrading. - Stop Driverless AI and make a backup of your Driverless AI tmp\n directory before upgrading. If you did not build MLI on a model before upgrading Driverless AI,\n then you will not be able to view MLI on that model after upgrading. Before upgrading, be sure to run MLI jobs on models that you want to\n continue to interpret in future releases. If that MLI job appears in\n the list of Interpreted Models in your current version, then it will\n be retained after upgrading. If you did not build a MOJO pipeline on a model before upgrading\n Driverless AI, then you will not be able to build a MOJO pipeline on\n that model after upgrading.", + "prompt_type": "plain" + }, + { + "output": "Note: Stop Driverless AI if it is still running. Requirements\nWe recommend to have NVIDIA driver >= installed (GPU only) in your host\nenvironment for a seamless experience on all architectures, including\nAmpere. Driverless AI ships with CUDA 11.2.2 for GPUs, but the driver\nmust exist in the host environment. Go to NVIDIA download driver to get the latest NVIDIA Tesla A/T/V/P/K\nseries drivers. For reference on CUDA Toolkit and Minimum Required\nDriver Versions and CUDA Toolkit and Corresponding Driver Versions, see\nhere . Note\nIf you are using K80 GPUs, the minimum required NVIDIA driver version is\n450.80.02. Upgrade Steps\n1. SSH into the IP address of the machine that is running Driverless\n AI. 2. Set up a directory for the version of Driverless AI on the host\n machine:\n3. Retrieve the Driverless AI package from https://www.h2o.ai/download/\n and add it to the new directory. 4. Load the Driverless AI Docker image inside the new directory:\n5. Copy the data, log, license, and tmp directories from the previous\n Driverless AI directory to the new Driverless AI directory:\n6.", + "prompt_type": "plain" + }, + { + "output": "Using the Standalone Python Scoring Pipeline in a Different Docker Container\nThe Standalone Python Scoring Pipeline runs inside of the Driverless AI\nDocker container. This is the recommended method for running the Python\nScoring Pipeline. If necessary, though, this pipeline can also be run\ninside of a different Docker container. The following steps describe how\nto do this. This setup assumes that you have a valid Driverless AI\nlicense key, which will be required during setup. It also assumes that\nyou have completed a Driverless AI experiment and downloaded the Scoring\nPipeline. 1. On the machine where you want to run the Python Scoring Pipeline,\n create a new directory for Driverless AI (for example, dai-nnn.) 2. Download the TAR SH version of Driverless AI from\n https://www.h2o.ai/download/ (for either Linux or IBM Power). 3. Use bash to execute the download and unpack it into the new\n Driverless AI folder. 4. Change directories into the new Driverless AI folder. 5. Run the following to install the Python Scoring Pipeline for your\n completed Driverless AI experiment:\n6.", + "prompt_type": "plain" + }, + { + "output": "Driverless AI Health API\nThe following sections describe the Driverless AI Health API. - health-api-overview\n- retrieve-health-status\n- health-api-json-attributes\nOverview\nThe Driverless AI Health API is a publicly available API that exposes\nbasic system metrics and statistics. Its primary purpose is to provide\ninformation for resource monitoring and auto-scaling of\nDriverless AI multinode clusters. The API outputs a\nset of metrics in a JSON format so that they can be used by tools like\nKEDA or K8S Autoscaler. Notes:\n- The Health API is only available in multinode or singlenode mode. For more information, refer to the worker_mode\n config.toml option. - For security purposes, the Health API endpoint can be disabled by\n setting the enable_health_api config.toml option\n to false. This setting is enabled by default. - The Health API is designed with the intention to provide information\n that is needed by users to write their own autoscaling logic for\n Multinode Driverless AI .", + "prompt_type": "plain" + }, + { + "output": "Using the DAI Health API\nTo retrieve Driverless AI's health status, create a GET request:\n GET http://{driverless-ai-instance-address}/apis/health/v1\nThis returns the following JSON response:\n {\n \"api_version\": \"1.0\",\n \"server_version\": \"1.10\",\n \"application_id\": \"dai-12345\",\n \"timestamp\": \"ISO 8601 Datetime\",\n \"last_system_interaction\": \"ISO 8601 Datetime\",\n \"is_idle\": true,\n \"active_users\": 3,\n \"resources\": {\n \"cpu_cores\": 150,\n \"gpus\": 12,\n \"nodes\": 5,\n },\n \"tasks\": {\n \"running\": 45,\n \"scheduled\": 123,\n \"scheduled_on_gpu\": 10,\n \"scheduled_on_cpu\": 50,\n },\n \"utilization\": {\n \"cpu\": 0.12,\n \"gpu\": 0.45,\n \"memory\": 0.56,\n },\n \"workers\": [\n {\n \"name\": \"NODE:LOCAL1\",\n \"running_tasks\": 4,\n \"scheduled_tasks\": 0\n },\n {\n \"name\": \"NODE:REMOTE2\",\n \"running_tasks\": 4,\n \"scheduled_tasks\": 11\n }\n ]\n }\nAttribute Definitions\nThe following is a list of relevant JSON attribute definitions.", + "prompt_type": "plain" + }, + { + "output": "Driverless AI with H2O-3 Algorithms\n\nDriverless AI already supports a variety of\nalgorithms . This example shows how you can use\nour h2o-3-models-py recipe to include H2O-3 supervised learning\nalgorithms in your experiment. The available H2O-3 algorithms in the\nrecipe include:\n\n- Naive Bayes\n- GBM\n- Random Forest\n- Deep Learning\n- GLM\n- AutoML\n\nCaution: Because AutoML is treated as a regular ML algorithm here, the\nruntime requirements can be large. We recommend that you adjust the", + "prompt_type": "plain" + }, + { + "output": "max_runtime_secs`` parameters as suggested here:\nhttps://github.com/h2oai/driverlessai-recipes/blob/rel-1.9.0/models/algorithms/h2o-3-models.py#L45\n1. Start an experiment in Driverless AI by selecting your training\n dataset along with (optionally) validation and testing datasets and\n then specifying a Target Column. Notice the list of algorithms that\n will be used in the Feature evolution section of the experiment\n summary. In the example below, the experiment will use LightGBM and\n XGBoostGBM. 2. Click on Expert Settings. 3. Specify the custom recipe using one of the following methods:\n4. In the Expert Settings page, specify any additional settings and\n then click Save. This returns you to the experiment summary. 5. To include each of the new models in your experiment, return to the\n Expert Settings option. Click the Recipes > Include Specific Models\n option. Select the algorithm(s) that you want to include. Click Done\n to return to the experiment summary.", + "prompt_type": "plain" + }, + { + "output": "Viewing Explanations\nNote: Not all explanatory functionality is available for multinomial\nclassification scenarios. Driverless AI provides explanations for completed models. You can view\nthese by clicking the Explanations button on the Model Interpretation >\nSurrogate Models Dashboard page for an interpreted model. The UI lets you view global, cluster-specific, and local reason codes. You can also export the explanations to CSV. - Global Reason Codes: To view global reason codes, click Cluster and\n select Global from the list of options. With Global selected, click\n the Explanations button located in the upper-right corner. - Cluster Reason Codes: To view reason codes for a specific cluster,\n click Cluster and select a specific cluster from the list of\n options. With a cluster selected, click the Explanations button. - Local Reason Codes by Row Number: To view local reason codes for a\n specific row, select a point on the graph or type a value in the Row\n Number or Feature Value field.", + "prompt_type": "plain" + }, + { + "output": "Configuration and Authentication\n\nconfig-usage config_docs/index\n\nconfig_toml setting-environment-variables user-settings connectors\nnotifications export-artifacts language multinode snowflake-integration\npip-install\n\nauthentication", + "prompt_type": "plain" + }, + { + "output": "Add Custom Recipes\nCustom recipes are Python code snippets that can be uploaded into\nDriverless AI at runtime like plugins. Restarting Driverless AI is not\nrequired. If you do not have a custom recipe, you can select from a\nnumber of recipes available in the Recipes for H2O Driverless AI\nrepository. For more information and examples, refer to custom-recipes. To add a custom recipe to Driverless AI, click Add Custom Recipe and\nselect one of the following options:\n- From computer: Add a custom recipe as a Python or ZIP file from your\n local file system. - From URL: Add a custom recipe from a URL. - From Bitbucket: Add a custom recipe from a Bitbucket repository. To\n use this option, your Bitbucket username and password must be\n provided along with the custom recipe Bitbucket URL. Official Recipes (Open Source)\nTo access H2O's official recipes repository, click Official Recipes\n(Open Source). Editing the TOML Configuration\nTo open the built-in TOML configuration editor, click TOML in the\nexpert-settings window.", + "prompt_type": "plain" + }, + { + "output": "make_mojo_scoring_pipeline = \"off\"is displayed in the TOML editor. The TOML configuration editor lets you manually add, remove, or edit expert setting parameters. To confirm your changes, click **Save**. The experiment preview updates to reflect your specified configuration changes. For a full list of available settings, see :ref:`expert-settings`. .. note:: Do not edit the section below the[recipe_activation]`` line. This\n\n section provides Driverless AI with information about which custom\n recipes can be used by the experiment. This is important for keeping\n experiments comparable when performing retrain / refit operations.\n\nNote\n\nFor information on TOML, see https://toml.io/en/v0.4.0.", + "prompt_type": "plain" + }, + { + "output": "Automated Model Documentation (AutoDoc)\n\nThis section describes Driverless AI's AutoDoc feature.\n\nautodoc-using autodoc-placeholders", + "prompt_type": "plain" + }, + { + "output": "MOJO Scoring Pipelines\n\nThe MOJO Scoring Pipeline provides a standalone scoring pipeline that\nconverts experiments to MOJOs, which can be scored in real time. The\nMOJO Scoring Pipeline is a scoring engine that can be deployed in any\nJava environment (Java Runtime) or in Python or R environment (C++\nruntime) for scoring in real time or batch. For deployment options see\nDeploying the MOJO Pipeline to production \n\nscoring-mojo-scoring-pipeline scoring-pipeline-cpp mojo2_javadoc\nscoring-klime-mojo-scoring-pipeline", + "prompt_type": "plain" + }, + { + "output": "Scoring on Another Dataset\n\nAfter you generate a model, you can use that model to make predictions\non another dataset.\n\n1. Click the Experiments link in the top menu and select the experiment\n that you want to use.\n2. On the completed Experiment page, click Model Actions > Predict.\n3. Select the new dataset (test set) that you want to score on. Note\n that this new dataset must include the same columns as the dataset\n used in selected experiment.\n4. Select the columns from the test set to include in the predictions\n frame.\n5. Click Done to start the scoring process.\n6. Click the Download Predictions button after scoring is complete.\n\nNote: This feature runs batch scoring on a new dataset. You may notice\nslow speeds if you attempt to perform single-row scoring.", + "prompt_type": "plain" + }, + { + "output": "Productionizing Your Model\n\nH2O.ai outputs the best model in an experiment. This model can then be\ndownloaded and then saved to a production environment.\n\nRun the following commands in Python 3.8 to save the displayed model as\na .csv. Note that Python 3.8 is the only supported Python version for\nuse with H2O.ai.\n\n ## final pipeline (logic, not state)\n pipe = population[best_id].get_pipe()\n\n ## final pipeline state, based on LARGE training data\n train_df_munged, y_munged = pipe.fit_transform(train_df, y)\n #train_df_munged.to_csv(\"munged_amazon_train.csv\", index=False)\n\n ## Load Kaggle test set without response, convert to munged state\n # test = \"../../../../h2oai-benchmarks/Data/Amazon/test.csv\"\n # test_df = dt.fread(test).topandas()\n test_df = train_df\n test_df_munged = pipe.transform(test_df)\n #test_df_munged.to_csv(\"munged_amazon_test.csv\", index=False)", + "prompt_type": "plain" + }, + { + "output": "Visualizing the Scoring Pipeline\n\nA visualization of the scoring pipeline is available for each completed\nexperiment.\n\nNotes:\n\n- This pipeline is best viewed in the latest version of Chrome.\n- A .png image of this pipeline is available in the AutoDoc \n and in the mojo.zip file ONLY with the Driverless AI Docker image.\n For tar, deb, and rpm installs, you must install Graphviz manually\n in order for the visualization pipeline to be included in the\n AutoDoc and mojo.zip.\n\nClick the Visualize Scoring Pipeline (Experimental) button on the\ncompleted experiment page to view the visualization.\n\n[]\n\nTo view a visual representation of a specific model, click on the oval\nthat corresponds with that model.\n\n[]\n\n[]\n\nTo change the orientation of the visualization, click the Transpose\nbutton in the bottom right corner of the screen.\n\n[]", + "prompt_type": "plain" + }, + { + "output": "Configuration Security\nDriverless AI provides the option to store sensitive or secure\nconfiguration information in an encrypted keystore as an alternative to\nkeeping security settings as clear text in the config.toml file. Updates to config override chain\nThe Configuration Override Chain has been updated to load the settings\nfrom the encrypted keystore after the settings are read from the plain\ntext config.toml file. The Environment Variable can still override the\nvalues from the keystore:\n 1. h2oai/config/config.toml\n [Internal, not visible to users]\n 2. config.toml\n [Place file in a folder/mount file in docker container and provide path\n in \"DRIVERLESS_AI_CONFIG_FILE\" environment variable]\n 3. Keystore file\n [Set keystore_file parameter in config.toml or environment variable\n \"DRIVERLESS_AI_KEYSTORE_FILE\" to point to a valid DAI keystore file \n generated using the h2oai.keystore tool. If env variable is set, the value\n in the config.toml for keystore_file path is overridden]\n 4.", + "prompt_type": "plain" + }, + { + "output": "They must have the prefix \"DRIVERLESS_AI_\" followed\n by the variable name in caps. For example, \"authentication_method\"\n can be provided as \"DRIVERLESS_AI_AUTHENTICATION_METHOD\"]\nKeystore setup workflow\nCreating the keystore\nAlthough the keystore file can contain any configuration parameter\nsupported by the config.toml, it is recommended to store only config\nparameters that contain secure/sensitive information in the keystore\nfile and use the regular config.toml file for other config parameters. Step 1: Create a cleartext config subset\nTo start, create a file config.clear that follows the TOML syntax of a\nregular config.toml file and contains the config parameters that you\nwant to store securely. For example:\n vagrant@ubuntu-bionic:~$ cat /home/vagrant/config.clear\n # ldap connection details\n ldap_bind_password = \"somepassword\"\n # Snowflake Connector credentials\n snowflake_url = \"https://sampleurl\"\n snowflake_user = \"sampleuser\"\n snowflake_password = \"samplepass\"\n snowflake_account = \"sampleaccount\"\n vagrant@ubuntu-bionic:~$\nStep 2: Using the h2oai.keystore tool to create keystore\nThe keystore should be placed so that it is accessible by root or the\nuser id with which the Driverless AI process is running.", + "prompt_type": "plain" + }, + { + "output": "h2oai.keystoretool: - The keystore tool needs to be run asrootand within the context of Driverless AI Python environment provided by thedai-env.shscript. - Theadd-keyscommand accepts the path to keystore as the first argument and the clear text config.toml subset as the second. - If the keystore does not exist, it is created. - All keys in theconfig.clearare either Inserted or Updated in the keystore. If a key already exists in the key store, it is updated. If the keystore contains any keys that are not inconfig.clear, they are not altered. - Once the keystore file is created, it is recommended to ensure the following: - Ownership is with root user with read and write permissions. - Change group ownership to the Driverless group (or the appropriate ID that matches the group ID with which the Driverless processes run in your system) with read only permissions. No other user or group should have read access to this file. - Theconfig.keystorefile is created along with the ownership permissions.", + "prompt_type": "plain" + }, + { + "output": "If root access shell is available; this step can be skipped (root) # /opt/h2oai/dai/dai-env.sh python -m h2oai.keystore add-keys /etc/dai/config.keystore /home/vagrant/config.clear ....some output here ====================================================================== Key: ldap_bind_password; Action: Inserted Key: snowflake_url; Action: Inserted Key: snowflake_user; Action: Inserted Key: snowflake_password; Action: Inserted Key: snowflake_account; Action: Inserted (root) # ls -l /etc/dai total 240 -rw-rw-r-- 1 root root 353 Jul 14 03:28 EnvironmentFile.conf -rw-r--r-- 1 root root 210 Jul 20 06:57 Group.conf -rw-r--r-- 1 root root 209 Jul 20 06:57 User.conf -rw-r----- 1 root dai 236 Jul 20 07:09 config.keystore -rw-r--r-- 1 root root 157135 Jul 20 07:17 config.toml -rw-rw-r-- 1 root root 347 Jul 14 03:28 jaas.conf -rw-r--r-- 1 root root 62206 Jul 20 06:57 redis.conf (root) # chown root:dai /etc/dai/config.keystore (root) # chmod 640 /etc/dai/config.keystore **Step 3: Using h2oai.keystore tool to manage keystore** Theh2oai.keystoretool provides three commands for keystore management: -add-keys: Adds or updates the Driverless AI secrets keystore with config.", + "prompt_type": "plain" + }, + { + "output": "Using AutoDoc\nThe following sections describe Driverless AI's AutoDoc feature. - understanding-autodoc\n- generate-autodoc\n- configure-autodoc\n- autodoc-custom\nUnderstanding AutoDoc\nThe AutoDoc feature is used to generate automated machine learning\ndocumentation for individual Driverless AI experiments. This editable\ndocument contains an overview of the experiment and includes other\nsignificant details like feature engineering and final model\nperformance. To download and view a sample experiment report in Word format,\nclick here . AutoDoc Support\nAutoDoc only supports resumed experiments for certain Driverless AI\nversions. See the following table to check the types of resumed\nexperiments that are supported for your version:\n ---------------------------------------------------------------------\n AutoDoc Support for Resumed 1.7.0 and 1 .7 1.9.0 and later\n Experiments Via older .1 \n ---------------------------------- ------------ ---- ----------------\n New experiment with same settings yes y es yes\n Restart from last checkpoint no y es yes\n Retrain final pipeline no no yes\n ---------------------------------------------------------------------\nNote\n- To ensure that AutoDoc pipeline visualizations are generated correctly\non native installations, installing fontconfig is recommended.", + "prompt_type": "plain" + }, + { + "output": "- Reports for unsupported resumed experiments\nwill still build, but they will only include the following text:\n\"AutoDoc not yet supported for resumed experiments.\" Custom AutoDocs\nAll Driverless AI experiments can generate either a standard or custom\nAutoDoc. A standard AutoDoc uses the default AutoDoc template that is\nincluded with Driverless AI, while a custom AutoDoc uses a\ncustomer-specific template that Driverless AI automatically populates. If you are interested in creating a custom AutoDoc, contact\nsupport@h2o.ai. If you have already purchased a custom AutoDoc template\nand want to learn how to generate custom AutoDocs from your experiments,\nsee autodoc-custom. Note\n- For a list of custom AutoDoc placeholders, see autodoc_placeholders. -\nCustom AutoDocs are Driverless AI version-specific. BYOR Recipes with AutoDoc\nThe experiment AutoDoc supports experiments that use custom scorers,\ntransformers, or models. Custom scorers and transformers are documented\nthe same as Driverless AI scorers and transformers.", + "prompt_type": "plain" + }, + { + "output": "(Note: custom-transformer descriptions are\ncurrently shown as \"None\" in this section.) For custom models, the\nstandard performance metrics and plots are included; however,\ninformation that Driverless AI cannot access is not included, or is\nshown as \"custom\", \"unavailable\", or \"auto.\" For example, in the Model\nTuning table, the booster is listed as \"custom\", and in the Alternative\nModels section, the model package documentation is listed as\n\"unavailable.\" Generating an AutoDoc\nThree different approaches can be used to generate an AutoDoc:\n- autodoc-experiment-ui\n- autodoc-mli-ui\n- autodoc-python-client\nNotes:\n- For more information on how to configure plots/tables and\n enable/disable specific sections in the AutoDoc, see\n configure-autodoc. - These approaches also apply to custom AutoDocs. For more\n information, see autodoc-custom. Experiment UI\nNavigate to the Experiments page and click on the completed experiment\nyou want to generate an AutoDoc for. If AutoDoc was not previously enabled for the experiment, click the\nBuild AutoDoc button.", + "prompt_type": "plain" + }, + { + "output": "[]\nMLI UI\nNavigate to the MLI page and click on the completed experiment you want\nto generate an AutoDoc for. Select AutoDoc from the MLI RECIPES's menu and optionally select\nexplainers that can be included in the AutoDoc (the standard AutoDoc\nsupports the k-LIME Explainer and DT Surrogate Explainer). []\nThe Standard AutoDoc with Explainers:\n[]\nPython Client\n- autodoc-generate-driverlessai\nAutoDoc Functions\n- create_and_download_autodoc()\n- make_autodoc_sync()\nFor local downloads:\n create_and_download_autodoc(\n model_key:str,\n template_path:str='',\n config_overrides:str='',\n dest_path:str='. ',\n mli_key:str='',\n individual_rows:list=[], \n external_dataset_keys:list=[])\nTo save an AutoDoc to the DAI experiment directory (recommended if local\ndownloads are disabled):\n make_autodoc_sync(\n model_key:str,\n template_path:str='',\n config_overrides:str='',\n mli_key:str='',\n individual_rows:list=[], \n external_dataset_keys:list=[])\n- model_key: The experiment key string.", + "prompt_type": "plain" + }, + { + "output": "- config_overrides: The TOML string format with configurations\n overrides for the AutoDoc. - dest_path: The local path where the AutoDoc should be saved. - mli_key: The mli key string. - individual_rows: List of row indices for rows of interest in the\n training dataset, for which additional information can be shown\n (ICE, LOCO, KLIME). - external_dataset_keys: List of DAI dataset keys. driverlessai\nConnect to a running DAI instance:\n import driverlessai\n address = 'http://ip_where_driverless_is_running:12345'\n username = 'username'\n password = 'password'\n dai = driverlessai.Client(address=address, username=username, password=username)\nGenerate an AutoDoc and download it to your current working directory:\n report = dai._backend.create_and_download_autodoc(\n model_key=exp_key,\n dest_path:str='. ',\n )\nConfiguring AutoDoc\nThe plots, tables, and sections of an AutoDoc can be configured through\nfour different workflows:\n- config-experiment-expert\n- config-mli-expert\n- config-python-client\n- config.toml file \nYou can also configure the font of an AutoDoc by setting\nthe H2O_AUTODOC_PLOTS_FONT_FAMILY environment variable.", + "prompt_type": "plain" + }, + { + "output": "The following are several commonly used\nconfiguration parameters:\n import toml\n # Set the document to limit features displayed to the top ten\n config_dict={\n \"autodoc_num_features\": 10\n }\n # Partial Dependence Plots (PDP) and ICE Plots\n config_dict[\"autodoc_pd_max_runtime\"] = 60\n config_dict[\"autodoc_num_rows\"] = 4\n # Prediction statistics\n config_dict[\"autodoc_prediction_stats\"] = True\n config_dict[\"autodoc_prediction_stats_n_quantiles\"] = 10\n # Population Stability Index (PSI)\n config_dict[\"autodoc_population_stability_index\"] = True\n config_dict[\"autodoc_population_stability_index_n_quantiles\"] = 10\n # Permutation feature importance\n config_dict[\"autodoc_include_permutation_feature_importance\"] = True\n config_dict[\"autodoc_feature_importance_scorer\"] = \"GINI\"\n config_dict[\"autodoc_feature_importance_num_perm\"] = 1\n # Response rates (only applicable to Binary classification)\n config_dict[\"autodoc_response_rate\"] = True\n config_dict[\"autodoc_response_rate_n_quantiles\"] = 10\n toml_string = toml.dumps(config_dict)\n print(toml_string)\nAfter setting these parameters, generate an AutoDoc and download it to\nyour current working directory:\ndriverlessai\n report = dai._backend.create_and_download_autodoc(\n model_key=exp_key,\n config_overrides=config_overrides,\n dest_path:str='.", + "prompt_type": "plain" + }, + { + "output": "Note: The following steps assume that DAI has been installed on an EC2\ninstance or an Ubuntu lab machine. These steps still apply if you are\nusing H2O Enterprise Puddle to run a DAI instance\u2014just log in to the EC2\ninstance where the DAI service is running using the provided SSH key. If the DAI service has not been started\n1. Create an EC2 instance with enough memory and storage to run DAI. 2. Install the font you want to use. In this example, the font\n TakaoPGothic is used. 3. Create and install the DAI debian file. 4. Set the font setting environment variable by adding the following\n line to the EnvironmentFile.conf file. 5. Start the DAI service. If the DAI service has already been started\n1. Ensure that the font is available on your system. In this example,\n the font TakaoPGothic is used. 2. Stop the DAI service. 3. Set the font setting environment variable by adding the following\n line to the EnvironmentFile.conf file. 4. Start the DAI service. Generating a Custom AutoDoc\nThis section describes how to generate an AutoDoc from a custom AutoDoc\ntemplate.", + "prompt_type": "plain" + }, + { + "output": "config.tomlsettings: -autodoc_template: Specify the path for the main template file. -autodoc_additional_template_folder: If you have additional custom sub-templates, use this setting to specify the location of additional AutoDoc templates. Note that if this field is left empty, only the default sub-templates folder is used. To generate custom AutoDocs, Driverless AI must have access to the custom template(s). To make sure that Driverless AI has access, update the path in the following example with your own path: .. code:: autodoc_template=\"/full/path/to/your/custom_autodoc_template.docx\" # Required if you have additional custom sub-templates. autodoc_additional_template_folder=\"/path/to/additional_templates_folder\" Custom AutoDoc for Individual Experiments ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ You can use the Python Client to generate standard or custom AutoDocs from an experiment by setting thetemplate_pathvariable to your custom AutoDoc's path: .. code:: template_path='/full/path/to/your/custom_autodoc_template.docx' **Python Client**:driverlessai``\n report = dai._backend.create_and_download_autodoc(\n model_key=exp_key,\n template_path=template_path,\n dest_path:str='.", + "prompt_type": "plain" + }, + { + "output": "Snowflake Setup\n\nDriverless AI allows you to explore Snowflake data sources from within\nthe Driverless AI application. This section provides instructions for\nconfiguring Driverless AI to work with Snowflake. This setup requires\nyou to enable authentication. If you enable Snowflake connectors, those\nfile systems will be available in the UI, but you will not be able to\nuse those connectors without authentication.\n\nNote: Depending on your Docker install version, use either the", + "prompt_type": "plain" + }, + { + "output": "docker run --runtime=nvidia(>= Docker 19.03) ornvidia-docker(< Docker 19.03) command when starting the Driverless AI Docker image. Usedocker versionto check which version of Docker you are using. Description of Configuration Attributes --------------------------------------- -snowflake_account: The Snowflake account ID -snowflake_user: The username for accessing the Snowflake account -snowflake_password: The password for accessing the Snowflake account -enabled_file_systems: The file systems you want to enable. This must be configured in order for data connectors to function properly. Enable Snowflake with Authentication ------------------------------------ .. container:: tabs .. group-tab:: Docker Image Installs This example enables the Snowflake data connector with authentication by passing theaccount,user, andpasswordvariables. .. code:: bash nvidia-docker run \\ --rm \\ --shm-size=256m \\ -e DRIVERLESS_AI_ENABLED_FILE_SYSTEMS=\"file,snow\" \\ -e DRIVERLESS_AI_SNOWFLAKE_ACCOUNT = \"\" \\ -e DRIVERLESS_AI_SNOWFLAKE_USER = \"\" \\ -e DRIVERLESS_AI_SNOWFLAKE_PASSWORD = \"\"\\ -u `id -u`:`id -g` \\ -p 12345:12345 \\ -v `pwd`/data:/data \\ -v `pwd`/log:/log \\ -v `pwd`/license:/license \\ -v `pwd`/tmp:/tmp \\ -v `pwd`/service_account_json.json:/service_account_json.json \\ h2oai/dai-ubi8-x86_64:|tag| .. container:: group-tab Docker Image with the config.toml This example shows how to configure Snowflake options in the config.toml file, and then specify that file when starting Driverless AI in Docker.", + "prompt_type": "plain" + }, + { + "output": "Configure the Driverless AI config.toml file. Set the following configuration options. .. -enabled_file_systems = \"file, snow\"-snowflake_account = \"\"-snowflake_user = \"\"-snowflake_password = \"\"2. Mount the config.toml file into the Docker container. .. .. code:: bash nvidia-docker run \\ --pid=host \\ --init \\ --rm \\ --shm-size=256m \\ --add-host name.node:172.16.2.186 \\ -e DRIVERLESS_AI_CONFIG_FILE=/path/in/docker/config.toml \\ -p 12345:12345 \\ -v /local/path/to/config.toml:/path/in/docker/config.toml \\ -v /etc/passwd:/etc/passwd:ro \\ -v /etc/group:/etc/group:ro \\ -v /tmp/dtmp/:/tmp \\ -v /tmp/dlog/:/log \\ -v /tmp/dlicense/:/license \\ -v /tmp/ddata/:/data \\ -u $(id -u):$(id -g) \\ h2oai/dai-ubi8-x86_64:|tag| .. container:: group-tab Native Installs This example enables the Snowflake data connector with authentication by passing theaccount,user, andpasswordvariables.", + "prompt_type": "plain" + }, + { + "output": "Export the Driverless AI config.toml file or add it to ~/.bashrc. For example: .. :: # DEB and RPM export DRIVERLESS_AI_CONFIG_FILE=\"/etc/dai/config.toml\" # TAR SH export DRIVERLESS_AI_CONFIG_FILE=\"/path/to/your/unpacked/dai/directory/config.toml\" 2. Specify the following configuration options in the config.toml file. .. :: # File System Support # upload : standard upload feature # file : local file system/server file system # hdfs : Hadoop file system, remember to configure the HDFS config folder path and keytab below # dtap : Blue Data Tap file system, remember to configure the DTap section below # s3 : Amazon S3, optionally configure secret and access key below # gcs : Google Cloud Storage, remember to configure gcs_path_to_service_account_json below # gbq : Google Big Query, remember to configure gcs_path_to_service_account_json below # minio : Minio Cloud Storage, remember to configure secret and access key below # snow : Snowflake Data Warehouse, remember to configure Snowflake credentials below (account name, username, password) # kdb : KDB+ Time Series Database, remember to configure KDB credentials below (hostname and port, optionally: username, password, classpath, and jvm_args) # azrbs : Azure Blob Storage, remember to configure Azure credentials below (account name, account key) # jdbc: JDBC Connector, remember to configure JDBC below.", + "prompt_type": "plain" + }, + { + "output": "(hive_app_configs) # recipe_url: load custom recipe from URL # recipe_file: load custom recipe from local file system enabled_file_systems = \"file, snow\" # Snowflake Connector credentials snowflake_account = \"\" snowflake_user = \"\" snowflake_password = \"\" 3. Save the changes when you are done, then stop/restart Driverless AI. Adding Datasets Using Snowflake ------------------------------- After the Snowflake connector is enabled, you can add datasets by selecting **Snowflake** from the **Add Dataset (or Drag and Drop)** drop-down menu. .. figure:: ../images/add_dataset_dropdown.png :alt: :width: 237px :height: 338px Specify the following information to add your dataset. 1. **Enter Database**: Specify the name of the Snowflake database that you are querying. 2. **Enter Warehouse**: Specify the name of the Snowflake warehouse that you are querying. 3. **Enter Schema**: Specify the schema of the dataset that you are querying.", + "prompt_type": "plain" + }, + { + "output": "**Enter Name for Dataset to Be Saved As**: Specify a name for the dataset to be saved as. Note that this can only be a CSV file (for example, **myfile.csv**). 5. **Enter Username**: (Optional) Specify the username associated with this Snowflake account. This can be left blank ifsnowflake_userwas specified in the config.toml when starting Driverless AI; otherwise, this field is required. 6. **Enter Password**: (Optional) Specify the password associated with this Snowflake account. This can be left blank ifsnowflake_passwordwas specified in the config.toml when starting Driverless AI; otherwise, this field is required. 7. **Enter Role**: (Optional) Specify your role as designated within Snowflake. See https://docs.snowflake.net/manuals/user-guide/security-access-control-overview.html for more information. 8. **Enter Region**: (Optional) Specify the region of the warehouse that you are querying. This can be found in the Snowflake-provided URL to access your database (as in **...snowflakecomputing.com**).", + "prompt_type": "plain" + }, + { + "output": "9. **Enter File Formatting Parameters**: (Optional) Specify any additional parameters for formatting your datasets. Available parameters are listed in https://docs.snowflake.com/en/sql-reference/sql/create-file-format.html#type-csv. (**Note**: Use only parameters forTYPE = CSV.) For example, if your dataset includes a text column that contains commas, you can specify a different delimiter usingFIELD_DELIMITER='character'. Multiple parameters must be separated with spaces: .. :: FIELD_DELIMITER=',' FIELD_OPTIONALLY_ENCLOSED_BY=\"\" SKIP_BLANK_LINES=TRUE **Note**: Be sure that the specified delimiter is not also used as a character within a cell; otherwise an error will occur. For example, you might specify the following to load the \"AMAZON_REVIEWS\" dataset: - Database: UTIL_DB - Warehouse: DAI_SNOWFLAKE_TEST - Schema: AMAZON_REVIEWS_SCHEMA - Query: SELECT \\* FROM AMAZON_REVIEWS - Enter File Formatting Parameters (Optional): FIELD_OPTIONALLY_ENCLOSED_BY = '\"' In the above example, if theFIELD_OPTIONALLY_ENCLOSED_BYoption is not set, the following row will result in a failure to import the dataset (as the dataset's delimiter is,by default): :: positive, 2012-05-03,Wonderful\\, tasty taffy,0,0,3,5,2012,Thu,0 **Note**: Numeric columns from Snowflake that have NULL values are sometimes converted to strings (for example, N).", + "prompt_type": "plain" + }, + { + "output": "H2O Drive setup\nH2O Drive is an object-store for H2O AI Cloud. This page describes how\nto configure Driverless AI to work with H2O Drive. Note: For more information on the H2O Drive, refer to the official\ndocumentation. Description of relevant configuration attributes\nThe following are descriptions of the relevant configuration attributes\nwhen enabling the H2O AI Feature Store data connector:\n- enabled_file_systems: A list of file systems you want to enable. To\n enable the Feature Store data connector, h2o_drive must be added to\n this list of data sources. - h2o_drive_endpoint_url: The H2O Drive server endpoint URL. - h2o_drive_access_token_scopes: A space-separated list of OpenID\n scopes for the access token that are used by the H2O Drive\n connector. - h2o_drive_session_duration: The maximum duration in seconds for a\n session with the H2O Drive. - authentication_method: The authentication method used by DAI. When\n enabling the Feature Store data connector, this must be set to\n OpenID Connect (authentication_method=\"oidc\").", + "prompt_type": "plain" + }, + { + "output": "Data Recipe File Setup\nDriverless AI lets you explore data recipe file data sources from within\nthe Driverless AI application. This section provides instructions for\nconfiguring Driverless AI to work with local data recipe files. When\nenabled (default), you will be able to modify datasets that have been\nadded to Driverless AI. (Refer to modify_by_recipe for more\ninformation.) Notes:\n- This connector is enabled by default. These steps are provided in\n case this connector was previously disabled and you want to\n re-enable it. - Depending on your Docker install version, use either the\n docker run --runtime=nvidia (>= Docker 19.03) or nvidia-docker (<\n Docker 19.03) command when starting the Driverless AI Docker image. Use docker version to check which version of Docker you are using. Enable Data Recipe File\nDocker Image Installs\nThis example enables the data recipe file data connector. nvidia-docker run \\\n --shm-size=256m \\\n --add-host name.node:172.16.2.186 \\\n -e DRIVERLESS_AI_ENABLED_FILE_SYSTEMS=\"file,upload,recipe_file\" \\\n -p 12345:12345 \\\n --init -it --rm \\\n -v /tmp/dtmp/:/tmp \\\n -v /tmp/dlog/:/log \\\n -v /tmp/dlicense/:/license \\\n -v /tmp/ddata/:/data \\\n -u $(id -u):$(id -g) \\\n h2oai/dai-ubi8-x86_64:|tag|\nDocker Image with the config.toml\nThis example shows how to enable the Upload Data Recipe connector in the\nconfig.toml file, and then specify that file when starting Driverless AI\nin Docker.", + "prompt_type": "plain" + }, + { + "output": "1. Configure the Driverless AI config.toml file. Set the following\n configuration options. - enabled_file_systems = \"file, upload, recipe_file\"\n2. Mount the config.toml file into the Docker container. nvidia-docker run \\\n --pid=host \\\n --init \\\n --rm \\\n --shm-size=256m \\\n --add-host name.node:172.16.2.186 \\\n -e DRIVERLESS_AI_CONFIG_FILE=/path/in/docker/config.toml \\\n -p 12345:12345 \\\n -v /local/path/to/config.toml:/path/in/docker/config.toml \\\n -v /etc/passwd:/etc/passwd:ro \\\n -v /etc/group:/etc/group:ro \\\n -v /tmp/dtmp/:/tmp \\\n -v /tmp/dlog/:/log \\\n -v /tmp/dlicense/:/license \\\n -v /tmp/ddata/:/data \\\n -u $(id -u):$(id -g) \\\n h2oai/dai-ubi8-x86_64:|tag|\nNative Installs\nThis example enables the Upload Data Recipe data connector. Note that\nrecipe_file is enabled by default. 1. Export the Driverless AI config.toml file or add it to ~/.bashrc. For example:\n2. Specify the following configuration options in the config.toml file.", + "prompt_type": "plain" + }, + { + "output": "Interpreting a Model\nModel interpretations can be run on a Driverless AI experiment or on the\npredictions created by an external model (that is, a model not created\nby Driverless AI). Use the Interpret This Model button on a completed experiment page to\ninterpret a Driverless AI model on original and transformed features. You can also click the MLI link from the top navigation menu to\ninterpret either a Driverless AI model or an external model. - Interpreting a Driverless AI Model \n- Interpreting Predictions From an External Model \nInterpreting a Driverless AI Model\nA completed Driverless AI model can be interpreted from either the\nInterpreted Models page or the completed_experiment. - from-mli-page\n- from-exp-page\nNote\n- This release deprecates experiments run in 1.8.9 and earlier. MLI\nmigration is not supported for experiments from versions <= 1.8.9. This\nmeans that you can't directly run interpretations on a Driverless AI\nmodel built using versions 1.8.9 and earlier, but you can still view\ninterpretations built using those versions.", + "prompt_type": "plain" + }, + { + "output": "- MLI is not supported for Image or\nmulticlass Time Series experiments. - MLI does not require an Internet\nconnection to run on current models. - To specify a port of a specific\nH2O instance for use by MLI, use the h2o_port\nconfig.toml setting. You can also specify an IP\naddress for use by MLI with the h2o_ip setting. Run Interpretations From Interpreted Models Page\nThe following steps describe how to run an interpretation from the\nInterpreted Models page. 1. Click the MLI link in the upper-right corner of the UI to view a\n list of interpreted models. 2. Click the New Interpretation button. The Interpretation Settings\n page is displayed. 3. Select a dataset to use for the interpretation. The selected\n dataset must contain the same columns as the training dataset used\n for the experiment. 4. Specify the Driverless AI model that you want to use for the\n interpretation. After you select a model, the Target Column used\n for the model is automatically selected.", + "prompt_type": "plain" + }, + { + "output": "Optionally specify which MLI recipes (or\n Explainers) to run. You can also change\n Explainer (recipe) specific settings when\n selecting which recipes to use for the interpretation. 6. Optionally specify any additional\n Interpretation Expert Settings to use when\n running this interpretation. 7. Optionally specify a weight column. 8. Optionally specify one or more dropped columns. Columns that were\n dropped when the model was created are automatically dropped for\n the interpretation. 9. Click the Launch MLI button. Run Interpretation From Completed Experiment Page\nThe following steps describe how to run an interpretation from the\ncompleted_experiment. 1. On the Completed Experiment page, click the Interpret This Model\n button. 2. Select a dataset to use for the interpretation. The selected dataset\n must contain the same columns as the training dataset used for the\n experiment.", + "prompt_type": "plain" + }, + { + "output": "Select one of the following options:\n - With Default Settings: Run an interpretation using the default\n settings. - With Custom Settings: Run an interpretation using custom\n settings. Selecting this option opens the Interpretation\n Settings page, where you can specify which\n MLI recipes (explainers) to use for the\n interpretation and change\n explainer-specific settings and\n interpretation expert settings . To run\n an interpretation with your specified custom settings, click\n the Launch MLI button. The interpretation includes a summary of the interpretation,\ninterpretations using the built Driverless AI model, and interpretations\nusing surrogate models that are built on the predictions from the\nDriverless AI model. For information on the available plots, see\ninterpret-regular-understand-model. The plots are interactive, and the logs / artifacts can be downloaded by\nclicking on the Actions button.", + "prompt_type": "plain" + }, + { + "output": "More information about this page is available in the\nUnderstanding the Model Interpretation Page \nsection later in this chapter. []\nInterpreting Predictions From an External Model\nModel Interpretation does not need to be run on a Driverless AI\nexperiment. You can train an external model and run Model\nInterpretability on the predictions from the model. This can be done\nfrom the MLI page. 1. Click the MLI link in the upper-right corner of the UI to view a\n list of interpreted models. 2. Click the New Interpretation button. 3. Leave the Select Model option to none\n 4. Select the dataset that you want to use for the model\n interpretation. This must include a prediction column that was\n generated by the external model. If the dataset does not have\n predictions, then you can join the external predictions. An\n example showing how to do this in Python is available in the Run\n Model Interpretation on External Model Predictions section of the\n Credit Card Demo.", + "prompt_type": "plain" + }, + { + "output": "Specify a Target Column (actuals) and the Prediction Column\n (scores from the external model). 6. Optionally specify any additional MLI\n Expert Settings to use when running this\n interpretation. 7. Optionally specify a weight column. 8. Optionally specify one or more dropped columns. Columns that were\n dropped when the model was created are automatically dropped for\n the interpretation. 9. Click the Launch MLI button. Note: When running interpretations on an external model, leave the\n Select Model option empty. That option is for selecting a Driverless\n AI model. The generated interpretation includes the plots and explanations created\nusing the surrogate models and a summary. For more information, see\ninterpret-regular-understand-model. Explainer Recipes\nDriverless AI Machine Learning Interpretability comes with a number of\nout-of-the-box explainer recipes for model interpretation that can be\nenabled when\nrunning a new interpretation from the MLI page .", + "prompt_type": "plain" + }, + { + "output": "And a list of explainer\nspecific expert settings can be found here . The following is a list of available recipes:\n- Absolute Permutation Feature Importance\n- AutoDoc\n- Disparate Impact Analysis\n- Interpretability Data ZIP (Surrogate and Shapley Techniques)\n- NLP Leave-one-covariate-out (LOCO)\n- NLP Partial Dependence Plot\n- NLP Tokenizer\n- NLP Vectorizer + Linear Model (VLM) Text Feature Importance\n- Original Feature Importance\n- Partial Dependence Plot\n- Relative Permutation Feature Importance\n- Sensitivity Analysis\n- Shapley Summary Plot for Original Features (Naive Shapley Method)\n- Shapley Values for Original Features (Kernel SHAP Method)\n- Shapley Values for Original Features (Naive Method)\n- Shapley Values for Transformed Features\n- Surrogate Decision Tree\n- Surrogate Random Forest Importance\n- Surrogate Random Forest Leave-one-covariate-out (LOCO)\n- Surrogate Random Forest Partial Dependence Plot\n- Transformed Feature Importance\n- k-LIME / LIME-SUP\n []\nThis recipe list is extensible, and users can create their own custom\nrecipes.", + "prompt_type": "plain" + }, + { + "output": "[]\nInterpretation Expert Settings\nWhen interpreting from the MLI page , a variety of\nconfiguration options are available in the Interpretation Expert\nSettings panel that let you customize interpretations. Recipe-specific\nsettings are also available for some recipes. Use the search bar to\nrefine the list of settings or locate a specific setting. For more information on each of these settings, see\ninterpretation-expert-settings. Also see for\nexplainer (recipe) specific expert settings. Notes:\n - The selection of available expert settings is determined by the\n type of model you want to interpret and the specified LIME method. - Expert settings are not available for time-series models. Expert Settings from Recipes (Explainers)\nFor some recipes like\nDriverless AI Partial dependence ,\nDisparate Impact Analysis (DIA) explainer and\nDT (Decision Tree) Surrogate explainer , some of the\nsettings can be toggled from the recipe page.", + "prompt_type": "plain" + }, + { + "output": "Before You Begin\nDriverless AI can run on machines with only CPUs or machines with CPUs\nand GPUs. For the best (and intended-as-designed) experience, install\nDriverless AI on modern data center hardware with GPUs and CUDA support. Feature engineering and model building are primarily performed on CPU\nand GPU respectively. For this reason, Driverless AI benefits from\nmulti-core CPUs with sufficient system memory and GPUs with sufficient\nRAM. For best results, we recommend GPUs that use the Pascal or Volta\narchitectures. The older K80 and M60 GPUs available in EC2 are supported\nand very convenient, but not as fast. Ampere-based NVIDIA GPUs are also\nsupported on x86, as Driverless AI ships with NVIDIA CUDA 11.2.2\ntoolkit. Image processing and NLP use cases in particular, benefit\nsignificantly from GPU usage. For details, see gpu_in_dai. Driverless AI supports local, LDAP, and PAM authentication. Authentication can be configured by setting environment variables or via\na config.toml file.", + "prompt_type": "plain" + }, + { + "output": "Note that the default authentication method is \"unvalidated.\" Driverless AI also supports HDFS, S3, Google Cloud Storage, Google Big\nQuery, KDB, MinIO, and Snowflake access. Support for these data sources\ncan be configured by setting environment variables for the data\nconnectors or via a config.toml file. Refer to the Data Connectors\nsection for more information. Sizing Requirements\nSizing Requirements for Native Installs\nDriverless AI requires a minimum of 5 GB of system memory in order to\nstart experiments and a minimum of 5 GB of disk space in order to run a\nsmall experiment. Note that these limits can changed in the config.toml\nfile. We recommend that you have sufficient system CPU memory (64 GB or\nmore) and 1 TB of free disk space available. Sizing Requirements for Docker Installs\nFor Docker installs, we recommend 1 TB of free disk space. Driverless AI\nuses approximately 38 GB. In addition, the unpacking/temp files require\nspace on the same Linux mount /var during installation.", + "prompt_type": "plain" + }, + { + "output": "GPU Sizing Requirements\nIf you are running Driverless AI with GPUs, ensure that your GPU has\ncompute capability >=3.5 and at least 4GB of RAM. If these requirements\nare not met, then Driverless AI switches to CPU-only mode. Sizing Requirements for Storing Experiments\nWe recommend that your Driverless tmp directory has at least 500 GB to 1\nTB of space. The (Driverless) tmp directory holds all experiments and\nall datasets. We also recommend that you use SSDs (preferably NVMe). Virtual Memory Settings in Linux\nIf you are running Driverless AI on a Linux machine, we recommend\nsetting the overcommit memory to 0. The setting can be changed with the\nfollowing command:\n sudo sh -c \"/bin/echo 0 > /proc/sys/vm/overcommit_memory\"\nThis is the default value that indicates that the Linux kernel is free\nto overcommit memory. If this value is set to 2, then the Linux kernel\ndoes not overcommit memory. In the latter case, the memory requirements\nof Driverless AI may surpass the memory allocation limit and prevent the\nexperiment from completing.", + "prompt_type": "plain" + }, + { + "output": "--shm-size=2g``\n\nWithout this option, those packages will fail. Triton inference server\nalso requires this option be set, and if under heavy load, may require\neven larger values than 2g.\n\nDocker resource limits\n\nDAI controls various resources and needs more resources than what\nsystems typically set by default. You can use the following option to\nensure that DAI is given enough resources:", + "prompt_type": "plain" + }, + { + "output": "--ulimit nofile=131071:131071 --ulimit nproc=16384:16384``\n\nWithout this option, DAI crashes under load.\n\nDocker NICE\n\nAs stated in the official Docker documentation, the", + "prompt_type": "plain" + }, + { + "output": "--cap-add=SYS_NICEoption grants the container theCAP_SYS_NICEcapability, which lets the container raise processnicevalues, set real-time scheduling policies, set CPU affinity, and other operations. If this flag isn't passed when starting the container, DAI isn't able to control resources and can end up with all processes only using a single core. This is also required to use the built-in NVIDIA Triton Inference Server and its use of non-uniform memory access (NUMA) control. Memory Requirements per Experiment ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ As a rule of thumb, the memory requirement per experiment is approximately 5 to 10 times the size of the dataset. Dataset size can be estimated as the number of rows x columns x 4 bytes; if text is present in the data, then more bytes per element are needed. Backup Strategy --------------- The **Driverless AI tmp** directory is used to store all experiment artifacts such as deployment artifacts and MLIs. It also stores the master.db database that tracks users to Driverless artifacts.", + "prompt_type": "plain" + }, + { + "output": "We recommend periodically stopping Driverless AI and backing up the **Driverless AI** **tmp** directory to ensure that a copy of the Driverless AI state is available for instances where you may need to revert to a prior state. Upgrade Strategy ---------------- When upgrading Driverless AI, note that: - Image models from version 1.9.x aren't supported in 1.10.x. All other models from 1.9.x are supported in 1.10.x. - (**MLI**) Interpretations made in version 1.9.0 are supported in 1.9.x and later. - (**MLI**) Interpretations made in version 1.8.x aren't supported in 1.9.x and later. However, interpretations made in 1.8.x can still be viewed and rerun. - We recommend following these steps before upgrading: - *Build MLI models*: Before upgrading, run MLI jobs on models that you want to continue to interpret in future Driverless AI releases. If an MLI job appears in the list of Interpreted Models in your current version, then it is retained after upgrading.", + "prompt_type": "plain" + }, + { + "output": "- Stop Driverless AI and make a backup (copy) of the **Driverless AI** **tmp** directory. The upgrade process inherits the service user and group from /etc/dai/User.conf and /etc/dai/Group.conf. You do not need to manually specify the DAI_USER or DAI_GROUP environment variables during an upgrade. **Note**: Driverless AI does not support data migration from a newer version to an older version. If you rollback to an older version of Driverless AI after upgrading, newer versions of the **master.db** file will not work with the older Driverless AI version. For this reason, we recommend saving a copy of the older 'tmp' directory to fully restore the older Driverless AI version's state. Other Notes ----------- Supported Browsers ~~~~~~~~~~~~~~~~~~ Driverless AI is tested most extensively on Chrome and Firefox. For the best user experience, we recommend using the latest version of Chrome. You may encounter issues if you use other browsers or earlier versions of Chrome and/or Firefox.", + "prompt_type": "plain" + }, + { + "output": "ulimitoptions by using the--ulimitargument todocker\nrun. The following is an example of how to configure these options: :: --ulimit nproc=65535:65535 \\ --ulimit nofile=4096:8192 \\ Refer to https://docs.docker.com/engine/reference/commandline/run/#set-ulimits-in-container---ulimit for more information on these options. Note about nvidia-docker 1.0 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ If you have nvidia-docker 1.0 installed, you need to remove it and all existing GPU containers. Refer to https://github.com/NVIDIA/nvidia-docker/blob/master/README.md for more information. Deprecation ofnvidia-smi~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Thenvidia-smi``\ncommand has been deprecated by NVIDIA. Refer to\nhttps://github.com/nvidia/nvidia-docker#upgrading-with-nvidia-docker2-deprecated\nfor more information. The installation steps have been updated for\nenabling persistence mode for GPUs. Note About CUDA Versions\nDriverless AI ships with CUDA 11.2.2 for GPUs, but the driver must exist\nin the host environment.", + "prompt_type": "plain" + }, + { + "output": "NVIDIA driver >=\n471.68installed in your environment, for a seamless experience on all NVIDIA architectures, including Ampere. Go to `NVIDIA download driver `__ to get the latest NVIDIA Tesla A/T/V/P/K series driver. For reference on CUDA Toolkit and Minimum Required Driver Versions and CUDA Toolkit and Corresponding Driver Versions, see `here `__ . .. note:: If you are using K80 GPUs, the minimum required NVIDIA driver version is 450.80.02. Note About Authentication ~~~~~~~~~~~~~~~~~~~~~~~~~ The default authentication setting in Driverless AI is \"unvalidated.\" In this case, Driverless AI will accept any login and password combination, it will not validate whether the password is correct for the specified login ID, and it will connect to the system as the user specified in the login ID. This is true for all instances, including Cloud, Docker, and native instances.", + "prompt_type": "plain" + }, + { + "output": "Driverless AI provides a number of authentication options, including LDAP, PAM, Local, and None. Refer to :ref:`dai_auth` for information on how to enable a different authentication method. **Note**: Driverless AI is also integrated with IBM Spectrum Conductor and supports authentication from Conductor. Contact sales@h2o.ai for more information about using IBM Spectrum Conductor authentication. Note About Shared File Systems ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ If your environment uses a shared file system, then you must set the following configuration option: :: datatable_strategy='write' The above can be specified in the `config.toml file `__ (for native installs) or specified as an `environment variable `__ (Docker image installs). This configuration is required because, in some cases, Driverless AI can fail to read files during an experiment. Thewrite``\noption lets Driverless AI properly read and write data from shared file\nsystems to disk.", + "prompt_type": "plain" + }, + { + "output": "Using the MOJO Scoring Pipeline with Spark/Sparkling Water\nNote: The Driverless AI 1.5 release will be the last release with\nTOML-based MOJO2. Releases after 1.5 will include protobuf-based MOJO2. MOJO scoring pipeline artifacts can be used in Spark to deploy\npredictions in parallel using the Sparkling Water API. This section\nshows how to load and run predictions on the MOJO scoring pipeline in\nSpark using Scala and the Python API. In the event that you upgrade H2O Driverless AI, we have a good news! Sparkling Water is backwards compatible with MOJO versions produced by\nolder Driverless AI versions. Requirements\n- You must have a Spark cluster with the Sparkling Water JAR file\n passed to Spark. - To run with PySparkling, you must have the PySparkling zip file. The H2OContext does not have to be created if you only want to run\npredictions on MOJOs using Spark. This is because the scoring is\nindependent of the H2O run-time. Preparing Your Environment\nIn order use the MOJO scoring pipeline, Driverless AI license has to be\npassed to Spark.", + "prompt_type": "plain" + }, + { + "output": "Note: In Local Spark mode, use --driver-class-path to specify path to\nthe license file. PySparkling\nFirst, start PySpark with PySparkling Python package and Driverless AI\nlicense. ./bin/pyspark --jars license.sig --py-files pysparkling.zip\nor, you can download official Sparkling Water distribution from H2O\nDownload page. Follow the steps on the Sparkling Water download page. Once you are in the Sparkling Water directory, you can call:\n ./bin/pysparkling --jars license.sig\nAt this point, you should have available a PySpark interactive terminal\nwhere you can try out predictions. If you would like to productionalize\nthe scoring process, you can use the same configuration, except instead\nof using ./bin/pyspark, you would use ./bin/spark-submit to submit your\njob to a cluster. # First, specify the dependencies\n from pysparkling.ml import H2OMOJOPipelineModel, H2OMOJOSettings\n # The 'namedMojoOutputColumns' option ensures that the output columns are named properly. # If you want to use old behavior when all output columns were stored inside an array,\n # set it to False.", + "prompt_type": "plain" + }, + { + "output": "settings = H2OMOJOSettings(namedMojoOutputColumns = True)\n # Load the pipeline. 'settings' is an optional argument. If it's not specified, the default values are used. mojo = H2OMOJOPipelineModel.createFromMojo(\"file:///path/to/the/pipeline.mojo\", settings)\n # Load the data as Spark's Data Frame\n dataFrame = spark.read.csv(\"file:///path/to/the/data.csv\", header=True)\n # Run the predictions. The predictions contain all the original columns plus the predictions\n # added as new columns\n predictions = mojo.transform(dataFrame)\n # You can easily get the predictions for a desired column using the helper function as\n predictions.select(mojo.selectPredictionUDF(\"AGE\")).collect()\nSparkling Water\nFirst, start Spark with Sparkling Water Scala assembly and Driverless AI\nlicense. ./bin/spark-shell --jars license.sig,sparkling-water-assembly.jar\nor, you can download official Sparkling Water distribution from H2O\nDownload page. Follow the steps on the Sparkling Water download page.", + "prompt_type": "plain" + }, + { + "output": "Install on NVIDIA GPU Cloud/NGC Registry\nDriverless AI is supported on the following NVIDIA DGX products, and the\ninstallation steps for each platform are the same. - NVIDIA GPU Cloud\n- NVIDIA DGX-1\n- NVIDIA DGX-2\n- NVIDIA DGX Station\nEnvironment\n ---------------------------------------------------------------\n Provider GPUs Min Memory Suitable for\n ---------------------------- ------ ------------ --------------\n NVIDIA GPU Cloud Yes Serious use\n NVIDIA DGX-1/DGX-2 Yes 128 GB Serious use\n NVIDIA DGX Station Yes 64 GB Serious Use\n ---------------------------------------------------------------\nInstalling the NVIDIA NGC Registry\nNote: These installation instructions assume that you are running on an\nNVIDIA DGX machine. Driverless AI is only available in the NGC registry\nfor DGX machines. 1. Log in to your NVIDIA GPU Cloud account at\n https://ngc.nvidia.com/registry. (Note that NVIDIA Compute is no\n longer supported by NVIDIA.)", + "prompt_type": "plain" + }, + { + "output": "In the Registry > Partners menu, select h2oai-driverless. 3. At the bottom of the screen, select one of the H2O Driverless AI\n tags to retrieve the pull command. 4. On your NVIDIA DGX machine, open a command prompt and use the\n specified pull command to retrieve the Driverless AI image. For\n example:\n5. Set up a directory for the version of Driverless AI on the host\n machine:\n6. Set up the data, log, license, and tmp directories on the host\n machine:\n7. At this point, you can copy data into the data directory on the host\n machine. The data will be visible inside the Docker container. 8. Enable persistence of the GPU. Note that this only needs to be run\n once. Refer to the following for more information:\n http://docs.nvidia.com/deploy/driver-persistence/index.html. 9. Run docker images to find the new image tag. 10. Start the Driverless AI Docker image and replace TAG below with the\n image tag. Depending on your install version, use the\n docker run --runtime=nvidia (>= Docker 19.03) or nvidia-docker (<\n Docker 19.03) command.", + "prompt_type": "plain" + }, + { + "output": "11. Connect to Driverless AI with your browser:\nStopping Driverless AI\nUse Ctrl+C to stop Driverless AI. Upgrading Driverless AI\nThe steps for upgrading Driverless AI on an NVIDIA DGX system are\nsimilar to the installation steps. WARNINGS:\n- This release deprecates experiments and MLI models from 1.7.0 and\n earlier. - Experiments, MLIs, and MOJOs reside in the Driverless AI tmp\n directory and are not automatically upgraded when Driverless AI is\n upgraded. We recommend you take the following steps before\n upgrading. - Build MLI models before upgrading. - Build MOJO pipelines before upgrading. - Stop Driverless AI and make a backup of your Driverless AI tmp\n directory before upgrading. The upgrade process inherits the service user and group from\n/etc/dai/User.conf and /etc/dai/Group.conf. You do not need to manually\nspecify the DAI_USER or DAI_GROUP environment variables during an\nupgrade. Note: Use Ctrl+C to stop Driverless AI if it is still running.", + "prompt_type": "plain" + }, + { + "output": "R Client\n\nThis section describes how to install the Driverless AI R client.\nSeveral end-to-end examples that demonstrate how to use the client are\nalso provided. For more information on the R client, see the Driverless\nAI R client documentation.\n\nr_install_client r_client_tutorial", + "prompt_type": "plain" + }, + { + "output": "Experiment Graphs\nThis section describes the dashboard graphs that display for running and\ncompleted experiments. These graphs are interactive. Hover over a point\non the graph for more details about the point. Binary Classification Experiments\nFor Binary Classification experiments, Driverless AI shows a ROC Curve,\na Precision-Recall graph, a Lift chart, a Kolmogorov-Smirnov chart, and\na Gains chart. []\n- ROC: This shows Receiver-Operator Characteristics curve stats on\n validation data along with the best Accuracy, MCC, and F1 values. An\n ROC curve is a useful tool because it only focuses on how well the\n model was able to distinguish between classes. Keep in mind, though,\n that for models where one of the classes happens rarely, a high AUC\n could provide a false sense that the model is correctly predicting\n the results. This is where the notion of precision and recall become\n important. - Precision-Recall: This shows the Precision-Recall curve on\n validation data along with the best Accuracy, MCC, and F1 values.", + "prompt_type": "plain" + }, + { + "output": "Prec-Recall is a\n complementary tool to ROC curves, especially when the dataset has a\n significant skew. The Prec-Recall curve plots the precision or\n positive predictive value (y-axis) versus sensitivity or true\n positive rate (x-axis) for every possible classification threshold. At a high level, you can think of precision as a measure of\n exactness or quality of the results and recall as a measure of\n completeness or quantity of the results obtained by the model. Prec-Recall measures the relevance of the results obtained by the\n model. - Lift: This chart shows lift stats on validation data. For example,\n \"How many times more observations of the positive target class are\n in the top predicted 1%, 2%, 10%, etc. (cumulative) compared to\n selecting observations randomly?\" By definition, the Lift at 100% is\n 1.0. Lift can help answer the question of how much better you can\n expect to do with the predictive model compared to a random model\n (or no model).", + "prompt_type": "plain" + }, + { + "output": "In other\n words, the ratio of gain % to the random expectation % at a given\n quantile. The random expectation of the xth quantile is x%. - Kolmogorov-Smirnov: This chart measures the degree of separation\n between positives and negatives for validation or test data. - Gains: This shows Gains stats on validation data. For example, \"What\n fraction of all observations of the positive target class are in the\n top predicted 1%, 2%, 10%, etc. (cumulative)?\" By definition, the\n Gains at 100% are 1.0. Multiclass Classification Experiments\nFor multiclass classification experiments, a Confusion Matrix is\navailable in addition to the ROC Curve, Precision-Recall graph, Lift\nchart, Kolmogorov-Smirnov chart, and Gains chart. Driverless AI\ngenerates these graphs by considering the multiclass problem as multiple\none-vs-all problems. These graphs and charts (Confusion Matrix excepted)\nare based on a method known as micro-averaging (reference:\nhttp://scikit-learn.org/stable/auto_examples/model_selection/plot_roc.html#multiclass-settings).", + "prompt_type": "plain" + }, + { + "output": "The\npredictions would look something like this:\n+--------------------+-----------------------+-----------------------+\n| class.Iris-setosa | class.Iris-versicolor | class.Iris-virginica |\n+--------------------+-----------------------+-----------------------+\n| 0.9628 | 0.021 | 0.0158 |\n+--------------------+-----------------------+-----------------------+\n| 0.0182 | 0.3172 | 0.6646 |\n+--------------------+-----------------------+-----------------------+\n| 0.0191 | 0.9534 | 0.0276 |\n+--------------------+-----------------------+-----------------------+\nTo create these charts, Driverless AI converts the results to 3\none-vs-all problems:\n+--------+--------+---+----------+-----------+---+---------+----------+\n| prob | actual | | prob-v | actual-v | | prob-v | actual-v |\n| - | - | | e | ersicolor | | i | irginica |\n| setosa | setosa | | rsicolor | | | rginica | |\n+--------+--------+---+----------+-----------+---+---------+----------+\n| 0.9628 | 1 | | 0.021 | 0 | | 0.0158 | 0 |\n+--------+--------+---+----------+-----------+---+---------+----------+\n| 0.0182 | 0 | | 0.3172 | 1 | | 0.6646 | 0 |\n+--------+--------+---+----------+-----------+---+---------+----------+\n| 0.0191 | 0 | | 0.9534 | 1 | | 0.0276 | 0 |\n+--------+--------+---+----------+-----------+---+---------+----------+\nThe result is 3 vectors of predicted and actual values for binomial\nproblems.", + "prompt_type": "plain" + }, + { + "output": "predicted = [0.9628, 0.0182, 0.0191, 0.021, 0.3172, 0.9534, 0.0158, 0.6646, 0.0276]\n actual = [1, 0, 0, 0, 1, 1, 0, 0, 0]\nMulticlass Confusion Matrix\nA confusion matrix shows experiment performance in terms of false\npositives, false negatives, true positives, and true negatives. For each\nthreshold, the confusion matrix represents the balance between TPR and\nFPR (ROC) or Precision and Recall (Prec-Recall). In general, most useful\noperating points are in the top left corner. In this graph, the actual results display in the columns and the\npredictions display in the rows; correct predictions are highlighted. In\nthe example below, Iris-setosa was predicted correctly 30 times, while\nIris-virginica was predicted correctly 32 times, and Iris-versicolor was\npredicted as Iris-virginica 2 times (against the validation set). Note that while the experiment is running, the CM results are displayed\nonly for the first fold/validation split. A CM for all rows can't be\ndisplayed since, in general, DAI isn't performing k-fold CV but could be\nperforming 2 repeats of 1/3 validation splits with overlaps.", + "prompt_type": "plain" + }, + { + "output": "Install the Driverless AI AWS Community AMI\nWatch the installation video here. Note that some of the images in this\nvideo may change between releases, but the installation steps remain the\nsame. Environment\n+---------------------------+--------------+---------+----------------+\n| Provider | Instance | Num | Suitable for |\n| | Type | GPUs | |\n+===========================+==============+=========+================+\n| AWS | p2.xlarge | 1 | E |\n| | | | |\n| - | ---- | ---- | xperimentation |\n| - | -----------+ | ------+ | |\n| - | | | ---- |\n| - | p2.8xlarge | 8 | -------------+ |\n| - | | | |\n| - | ---- | ---- | Serious |\n| - | -----------+ | ------+ | use |\n| - | | | |\n| | | 16 | ---- |\n| | p2.16xlarge | | -------------+ |\n| | | ---- | |\n| | ---- | ------+ | Serious |\n| | -----------+ | | use |\n| | | 1 | |\n| | p3.2xlarge | | ---- |\n| | | ---- | -------------+ |\n| | ---- | ------+ | |\n| | -----------+ | | E |\n| | | 4 | |\n| | p3.8xlarge | | xperimentation |\n| | | ---- | |\n| | ---- | ------+ | ---- |\n| | -----------+ | | -------------+ |\n| | | 8 | |\n| | | | Serious |\n| | p3.16xlarge | ---- | use |\n| | | ------+ | |\n| | ---- | | ---- |\n| | -----------+ | 1 | -------------+ |\n| | | | |\n| | g3.4xlarge | ---- | Serious |\n| | | ------+ | use |\n| | ---- | | |\n| | -----------+ | 2 | ---- |\n| | | | -------------+ |\n| | g3.8xlarge | ---- | |\n| | | ------+ | E |\n| | ---- | | |\n| | -----------+ | 4 | xperimentation |\n| | | | |\n| | | | ---- |\n| | g3.16xlarge | | -------------+ |\n| | | | |\n| | | | E |\n| | | | |\n| | | | xperimentation |\n| | | | |\n| | | | ---- |\n| | | | -------------+ |\n| | | | |\n| | | | Serious |\n| | | | use |\n+---------------------------+--------------+---------+----------------+\nInstalling the EC2 Instance\n1.", + "prompt_type": "plain" + }, + { + "output": "2. In the upper right corner of the Amazon Web Services page, set the\n location drop-down. (Note: We recommend selecting the US East region\n because H2O's resources are stored there. It also offers more\n instance types than other regions.) 3. Select the EC2 option under the Compute section to open the EC2\n Dashboard. 4. Click the Launch Instance button under the Create Instance section. 5. Under Community AMIs, search for h2oai, and then select the version\n that you want to launch. 6. On the Choose an Instance Type page, select GPU compute in the\n Filter by dropdown. This will ensure that your Driverless AI\n instance will run on GPUs. Select a GPU compute instance from the\n available options. (We recommend at least 32 vCPUs.) Click the Next:\n Configure Instance Details button. 7. Specify the Instance Details that you want to configure. Create a\n VPC or use an existing one, and ensure that \"Auto-Assign Public IP\"\n is enabled and associated to your subnet.", + "prompt_type": "plain" + }, + { + "output": "8. Specify the Storage Device settings. Note again that Driverless AI\n requires 10 GB to run and will stop working of less than 10 GB is\n available. The machine should have a minimum of 30 GB of disk space. Click Next: Add Tags. 9. If desired, add unique Tag name to identify your instance. Click\n Next: Configure Security Group. 10. Add the following security rules to enable SSH access to Driverless\n AI, then click Review and Launch. --------------------------------------------------------------------\n Type Pro Port Range Source Description\n tocol \n ------------ ------- ---------- -------------- ---------------------\n SSH TCP 22 Anywhere \n 0.0.0.0/0 \n Custom TCP TCP 12345 Anywhere Launch DAI\n Rule 0.0.0.0/0 \n --------------------------------------------------------------------\n11. Review the configuration, and then click Launch.", + "prompt_type": "plain" + }, + { + "output": "A popup will appear prompting you to select a key pair. This is\n required in order to SSH into the instance. You can select your\n existing key pair or create a new one. Be sure to accept the\n acknowledgement, then click Launch Instances to start the new\n instance. 13. Upon successful completion, a message will display informing you\n that your instance is launching. Click the View Instances button to\n see information about the instance including the IP address. The\n Connect button on this page provides information on how to SSH into\n your instance. 14. Open a Terminal window and SSH into the IP address of the AWS\n instance. Replace the DNS name below with your instance DNS. 15. If you selected a GPU-compute instance, then you must enable\n persistence and optimizations of the GPU. The commands vary\n depending on the instance type. Note also that these commands need\n to be run once every reboot. Refer to the following for more\n information:\n16.", + "prompt_type": "plain" + }, + { + "output": "For example:\n17. Connect to Driverless AI with your browser. Sign in to Driverless AI\n with the username h2oai and use the AWS InstanceID as the password. You will be prompted to enter your Driverless AI license key when\n you log in for the first time. Stopping the EC2 Instance\nThe EC2 instance will continue to run even when you close the\naws.amazon.com portal. To stop the instance:\n1. On the EC2 Dashboard, click the Running Instances link under the\n Resources section. 2. Select the instance that you want to stop. 3. In the Actions drop down menu, select Instance State > Stop. 4. A confirmation page will display. Click Yes, Stop to stop the\n instance. Upgrading the Driverless AI Community Image\nWARNINGS:\n- This release deprecates experiments and MLI models from 1.7.0 and\n earlier. - Experiments, MLIs, and MOJOs reside in the Driverless AI tmp\n directory and are not automatically upgraded when Driverless AI is\n upgraded. We recommend you take the following steps before\n upgrading.", + "prompt_type": "plain" + }, + { + "output": "- Build MOJO pipelines before upgrading. - Stop Driverless AI and make a backup of your Driverless AI tmp\n directory before upgrading. The upgrade process inherits the service user and group from\n/etc/dai/User.conf and /etc/dai/Group.conf. You do not need to manually\nspecify the DAI_USER or DAI_GROUP environment variables during an\nupgrade. Upgrading from Version 1.2.2 or Earlier\nThe following example shows how to upgrade from 1.2.2 or earlier to the\ncurrent version. Upgrading from these earlier versions requires an edit\nto the start and h2oai scripts. 1. SSH into the IP address of the image instance and copy the existing\n experiments to a backup location:\n2. wget the newer image. The command below retrieves version 1.2.2:\n3. In the /home/ubuntu/scripts/ folder, edit both the start.sh and\n h2oai.sh scripts to use the newer image. 4. Use the docker load command to load the image:\n5. Optionally run docker images to ensure that the new image is in the\n registry.", + "prompt_type": "plain" + }, + { + "output": "Internal Validation Technique\nThis section describes the technique behind internal validation in\nDriverless AI. For the experiment, Driverless AI will either:\n(1) split the data into a training set and internal validation set\n(2) use cross validation to split the data into n folds\nDriverless AI chooses the method based on the size of the data and the\nAccuracy setting. For method 1, part of the data is removed to be used\nfor internal validation. (Note: This train and internal validation split\nmay be repeated if the data is small so that more data can be used for\ntraining.) For method 2, however, no data is wasted for internal validation. With\ncross validation, the whole dataset is utilized, and each model is\ntrained on a different subset of the training data. The following\nvisualization shows an example of cross validation with 5 folds. []\nDriverless AI randomly splits the data into the specified number of\nfolds for cross validation. With cross validation, the whole dataset is\nutilized, and each model is trained on a different subset of the\ntraining data.", + "prompt_type": "plain" + }, + { + "output": "Linux x86_64 Installs\n\nThis section provides installation steps for RPM, deb, and tar installs\nin Linux x86_64 environments.\n\nlinux-rpm linux-deb linux-tarsh", + "prompt_type": "plain" + }, + { + "output": "General Considerations\nMachine Learning and Approximate Explanations\nFor years, common sense has deemed the complex, intricate formulas\ncreated by training machine learning algorithms to be uninterpretable. While great advances have been made in recent years to make these often\nnonlinear, non-monotonic, and non-continuous machine-learned response\nfunctions more understandable (Hall et al, 2017), it is likely that such\nfunctions will never be as directly or universally interpretable as more\ntraditional linear models. Why consider machine learning approaches for inferential purposes? In\ngeneral, linear models focus on understanding and predicting average\nbehavior, whereas machine-learned response functions can often make\naccurate, but more difficult to explain, predictions for subtler aspects\nof modeled phenomenon. In a sense, linear models create very exact\ninterpretations for approximate models. The approach here seeks to make\napproximate explanations for very exact models. It is quite possible\nthat an approximate explanation of an exact model may have as much, or\nmore, value and meaning than the exact interpretations of an approximate\nmodel.", + "prompt_type": "plain" + }, + { + "output": "The Multiplicity of Good Models in Machine Learning\nIt is well understood that for the same set of input variables and\nprediction targets, complex machine learning algorithms can produce\nmultiple accurate models with very similar, but not exactly the same,\ninternal architectures (Breiman, 2001). This alone is an obstacle to\ninterpretation, but when using these types of algorithms as\ninterpretation tools or with interpretation tools it is important to\nremember that details of explanations will change across multiple\naccurate models. Expectations for Consistency Between Explanatory Techniques\n- The decision tree surrogate is a global, nonlinear description of\n the Driverless AI model behavior. Variables that appear in the tree\n should have a direct relationship with variables that appear in the\n global feature importance plot. For certain, more linear Driverless\n AI models, variables that appear in the decision tree surrogate\n model may also have large coefficients in the global K-LIME model.", + "prompt_type": "plain" + }, + { + "output": "LOCO\n importance values are nonlinear, do consider interactions, and do\n not explicitly consider a linear intercept or offset. LIME\n explanations and LOCO importance values are not expected to have a\n direct relationship but can align roughly as both are measures of a\n variable's local impact on a model's predictions, especially in more\n linear regions of the Driverless AI model's learned response\n function. - ICE is a type of nonlinear sensitivity analysis which has a complex\n relationship to LOCO feature importance values. Comparing ICE to\n LOCO can only be done at the value of the selected variable that\n actually appears in the selected row of the training data. When\n comparing ICE to LOCO the total value of the prediction for the row,\n the value of the variable in the selected row, and the distance of\n the ICE value from the average prediction for the selected variable\n at the value in the selected row must all be considered. - ICE curves that are outside the standard deviation of partial\n dependence would be expected to fall into less populated decision\n paths of the decision tree surrogate; ICE curves that lie within the\n standard deviation of partial dependence would be expected to belong\n to more common decision paths.", + "prompt_type": "plain" + }, + { + "output": "Upgrading the Driverless AI Image\nWARNINGS:\n- This release deprecates experiments and MLI models from 1.7.0 and\n earlier. - Experiments, MLIs, and MOJOs reside in the Driverless AI tmp\n directory and are not automatically upgraded when Driverless AI is\n upgraded. We recommend you take the following steps before\n upgrading. - Build MLI models before upgrading. - Build MOJO pipelines before upgrading. - Stop Driverless AI and make a backup of your Driverless AI tmp\n directory before upgrading. The upgrade process inherits the service user and group from\n/etc/dai/User.conf and /etc/dai/Group.conf. You do not need to manually\nspecify the DAI_USER or DAI_GROUP environment variables during an\nupgrade. Upgrading from Version 1.2.2 or Earlier\nIt is not possible to upgrade from version 1.2.2 or earlier to the\nlatest version. You have to manually remove the 1.2.2 container and then\nreinstall the latest Driverless AI version. Be sure to backup your data\nbefore doing this.", + "prompt_type": "plain" + }, + { + "output": "SSH into the IP address of the image instance and copy the existing\n experiments to a backup location:\n2. wget the newer image. Replace VERSION and BUILD below with the\n Driverless AI version. 3. Use the docker load command to load the image:\n4. Run docker images to find the new image tag. 5. Start the Driverless AI Docker image and replace TAG below with the\n image tag. Depending on your install version, use the\n docker run --runtime=nvidia (>= Docker 19.03) or nvidia-docker (<\n Docker 19.03) command. Upgrading from version 1.5.2 or Later\nUpgrading to versions 1.5.2 and later is no longer done via Docker. Instead, perform the following steps if you are upgrading to version\n1.5.2 or later. Replace dai_NEWVERSION.deb below with the new Driverless\nAI version (for example, dai_1.8.4.1_amd64.deb). Note that this upgrade\nprocess inherits the service user and group from /etc/dai/User.conf and\n/etc/dai/Group.conf. You do not need to manually specify the DAI_USER or\nDAI_GROUP environment variables during an upgrade.", + "prompt_type": "plain" + }, + { + "output": "Hive Setup\n\nDriverless AI lets you explore Hive data sources from within the\nDriverless AI application. This section provides instructions for\nconfiguring Driverless AI to work with Hive.\n\nNote: Depending on your Docker install version, use either the", + "prompt_type": "plain" + }, + { + "output": "docker run --runtime=nvidia(>= Docker 19.03) ornvidia-docker(< Docker 19.03) command when starting the Driverless AI Docker image. Usedocker versionto check which version of Docker you are using. Description of Configuration Attributes --------------------------------------- -enabled_file_systems: The file systems you want to enable. This must be configured in order for data connectors to function properly. -hive_app_configs: Configuration for Hive Connector. Inputs are similar to configuring the HDFS connector. Important keys include: -hive_conf_path: The path to Hive configuration. This can have multiple files (e.g. hive-site.xml, hdfs-site.xml, etc.) -auth_type: Specify one ofnoauth,keytab, orkeytabimpersonationfor Kerberos authentication -keytab_path: Specify the path to Kerberos keytab to use for authentication (this can be\"\"if usingauth_type=\"noauth\") -principal_user: Specify the Kerberos app principal user (required when usingauth_type=\"keytab\"orauth_type=\"keytabimpersonation\") **Notes:** - With Hive connectors, it is assumed that DAI is running on the edge node.", + "prompt_type": "plain" + }, + { + "output": "missing classes, dependencies, authorization errors). - Ensure the core-site.xml file (from e.g Hadoop conf) is also present in the Hive conf with the rest of the files (hive-site.xml, hdfs-site.xml, etc.). The core-site.xml file should have proxyuser configured (e.g.hadoop.proxyuser.hive.hosts&hadoop.proxyuser.hive.groups). - If you have tez as the Hive execution engine, make sure that the required tez dependencies (classpaths, jars, etc.) are available on the DAI node. Alternatively, you can use internal engines that come with DAI by changing yourhive.execution.enginevalue in the hive-site.xml file tomrorspark. The configuration should be JSON/Dictionary String with multiple keys. For example: :: \"\"\"{ \"hive_connection_1\": { \"hive_conf_path\": \"/path/to/hive/conf\", \"auth_type\": \"one of ['noauth', 'keytab', 'keytabimpersonation']\", \"keytab_path\": \"/path/to/.keytab\", \"principal_user\": \"hive/node1.example.com@EXAMPLE.COM\", }, \"hive_connection_2\": { \"hive_conf_path\": \"/path/to/hive/conf_2\", \"auth_type\": \"one of ['noauth', 'keytab', 'keytabimpersonation']\", \"keytab_path\": \"/path/to/.keytab\", \"principal_user\": \"hive/node2.example.com@EXAMPLE.COM\", } }\"\"\" **Note**: The expected input ofhive_app_configsis a `JSON string `__.", + "prompt_type": "plain" + }, + { + "output": "Depending on how the configuration value is applied, different forms of outer quotations may be required. The following examples show two unique methods for applying outer quotations. - Configuration value applied with the config.toml file: :: hive_app_configs = \"\"\"{\"my_json_string\": \"value\", \"json_key_2\": \"value2\"}\"\"\" - Configuration value applied with an environment variable: :: DRIVERLESS_AI_HIVE_APP_CONFIGS='{\"my_json_string\": \"value\", \"json_key_2\": \"value2\"}' -hive_app_jvm_args: Optionally specify additional Java Virtual Machine (JVM) args for the Hive connector. Each arg must be separated by a space. .. **Notes**: - If a custom `JAAS configuration file `__ is needed for your Kerberos setup, usehive_app_jvm_argsto specify the appropriate file: .. :: hive_app_jvm_args = \"-Xmx20g -Djava.security.auth.login.config=/etc/dai/jaas.conf\" Samplejaas.conffile: : :: com.sun.security.jgss.initiate { com.sun.security.auth.module.Krb5LoginModule required useKeyTab=true useTicketCache=false principal=\"hive/localhost@EXAMPLE.COM\" [Replace this line] doNotPrompt=true keyTab=\"/path/to/hive.keytab\" [Replace this line] debug=true; }; -hive_app_classpath``: Optionally specify an alternative classpath\n for the Hive connector.", + "prompt_type": "plain" + }, + { + "output": "nvidia-docker run`` command or by editing the configuration options in\nthe config.toml file and then specifying that file in the", + "prompt_type": "plain" + }, + { + "output": "Introduction to Driverless AI\n\nintroduction_to_dai key-features supported-algorithms workflow", + "prompt_type": "plain" + }, + { + "output": "MLI for Time-Series Experiments\nThis section describes how to run MLI for time-series experiments. Refer\nto interpret-regular for MLI information with regular experiments. There are two methods you can use for interpreting time-series models:\n- Using the MLI link in the top main menu on the upper right corner of\n the UI to interpret either a Driverless AI model or an external\n model. This process is described in the\n Interpreting a Driverless AI Model and\n Interpreting Predictions from an External Model \n sections. - Using the Interpret this Model button on a completed experiment page\n to interpret a Driverless AI model on original and transformed\n features. Run Interpretation from Completed Experiment page\n (See below.) - interpret-ts-multi\n- interpret-ts-single\n- Run IID or regular explainers on a Time series experiment \nLimitations\n- This release deprecates experiments run in 1.8.9 and earlier.", + "prompt_type": "plain" + }, + { + "output": "- MLI is not available for multiclass Time Series. - When the test set contains actuals, you will see the time series\n metric plot and the group metrics table. If there are no actuals,\n MLI will run, but you will see only the prediction value time series\n and a Shapley table. - MLI does not require an Internet connection to run on current\n models. Multi-Group Time Series MLI\nThis section describes how to run MLI on time series data for multiple\ngroups. 1. Click the Interpret this Model button on a completed time series\n experiment to launch Model Interpretation for that experiment. This\n page includes the following:\n2. Scroll to the bottom of the panel and select a grouping in the Group\n Search field to view a graph of Actual vs. Predicted values for the\n group. The outputted graph can be downloaded to your local machine. 3. Click on a prediction point in the plot (white line) to view Shapley\n values for that prediction point. The Shapley values plot can also\n be downloaded to your local machine.", + "prompt_type": "plain" + }, + { + "output": "Click Add Panel to add a new MLI Time Series panel. This lets you\n compare different groups in the same model and also provides the\n flexibility to do a \"side-by-side\" comparison between different\n models. Single Time Series MLI\nTime Series MLI can also be run when only one group is available. 1. Click the Interpret this Model button on a completed time series\n experiment to launch Model Interpretation for that experiment. This\n page includes the following:\n2. Scroll to the bottom of the panel and select an option in the Group\n Search field to view a graph of Actual vs. Predicted values for the\n group. (Note that for Single Time Series MLI, there will only be one\n option in this field.) The outputted graph can be downloaded to your\n local machine. 3. Click on a prediction point in the plot (white line) to view Shapley\n values for that prediction point. The Shapley values plot can also\n be downloaded to your local machine. 4. Click Add Panel to add a new MLI Time Series panel.", + "prompt_type": "plain" + }, + { + "output": "Environment Variables and Configuration Options\nDriverless AI provides a number of environment variables that can be\npassed when starting Driverless AI or specified in a config.toml file. The complete list of variables is in the config_file section. The steps\nfor specifying variables vary depending on whether you installed a\nDriverless AI RPM, DEB, or TAR SH or whether you are running a Docker\nimage. Setting Environment Variables and Configuration Options\nDocker Image Installs\nEach property must be prepended with DRIVERLESS_AI. The example below\nstarts Driverless AI with environment variables that enable S3 and HDFS\naccess (without authentication). nvidia-docker run \\\n --pid=host \\\n --rm \\\n -u `id -u`:`id -g` \\\n -e DRIVERLESS_AI_ENABLED_FILE_SYSTEMS=\"file,s3,hdfs\" \\\n -e DRIVERLESS_AI_AUTHENTICATION_METHOD=\"local\" \\\n -e DRIVERLESS_AI_LOCAL_HTPASSWD_FILE=\"\" \\\n -v /etc/passwd:/etc/passwd:ro \\\n -v /etc/group:/etc/group:ro \\\n -v `pwd`/data:/data \\\n -v `pwd`/log:/log \\\n -v `pwd`/license:/license \\\n -v `pwd`/tmp:/tmp \\\n h2oai/dai-ubi8-x86_64:|tag|\nNative Installs\nThe config.toml file is available in the etc/dai folder after the RPM,\nDEB, or TAR SH is installed.", + "prompt_type": "plain" + }, + { + "output": "Google BigQuery Setup\nDriverless AI lets you explore Google BigQuery (GBQ) data sources from\nwithin the Driverless AI application. This page provides instructions\nfor configuring Driverless AI to work with GBQ. Note\nThe setup described on this page requires you to enable authentication. Enabling the GCS and/or GBQ connectors causes those file systems to be\ndisplayed in the UI, but the GCS and GBQ connectors cannot be used\nwithout first enabling authentication. Before enabling the GBQ data connector with authentication, the\nfollowing steps must be performed:\n1. In the Google Cloud Platform (GCP), create a private key for your\n service account. To create a private key, click Service Accounts >\n Keys, and then click the Add Key button. When the Create private key\n dialog appears, select JSON as the key type. To finish creating the\n JSON private key and download it to your local file system, click\n Create. 2. Mount the downloaded JSON file to the Docker instance. 3.", + "prompt_type": "plain" + }, + { + "output": "Note\nDepending on your Docker install version, use either the\ndocker run --runtime=nvidia (>= Docker 19.03) or nvidia-docker (< Docker\n19.03) command when starting the Driverless AI Docker image. Use\ndocker version to check which version of Docker you are using. The following sections describe how to enable the GBQ data connector:\n- gbq-config-toml\n- gbq-environment-variable\n- gbq-workload-identity\nEnabling GBQ with the config.toml file\nDocker Image Installs\nThis example enables the GBQ data connector with authentication by\npassing the JSON authentication file. This assumes that the JSON file\ncontains Google BigQuery authentications. nvidia-docker run \\\n --pid=host \\\n --rm \\\n --shm-size=256m \\\n -e DRIVERLESS_AI_ENABLED_FILE_SYSTEMS=\"file,gbq\" \\\n -e DRIVERLESS_AI_GCS_PATH_TO_SERVICE_ACCOUNT_JSON=\"/service_account_json.json\" \\\n -u `id -u`:`id -g` \\\n -p 12345:12345 \\\n -v `pwd`/data:/data \\\n -v `pwd`/log:/log \\\n -v `pwd`/license:/license \\\n -v `pwd`/tmp:/tmp \\\n -v `pwd`/service_account_json.json:/service_account_json.json \\\n h2oai/dai-ubi8-x86_64:|tag|\nDocker Image with the config.toml\nThis example shows how to configure the GBQ data connector options in\nthe config.toml file, and then specify that file when starting\nDriverless AI in Docker.", + "prompt_type": "plain" + }, + { + "output": "GOOGLE_APPLICATION_CREDENTIALSenvironment variable as follows: :: export GOOGLE_APPLICATION_CREDENTIALS=\"SERVICE_ACCOUNT_KEY_PATH\" In the preceding example, replaceSERVICE_ACCOUNT_KEY_PATHwith the path of the JSON file that contains your service account key. The following is an example of how this might look: :: export GOOGLE_APPLICATION_CREDENTIALS=\"/etc/dai/service-account.json\" To see how to set this environment variable with Docker, refer to the following example: .. code:: bash nvidia-docker run \\ --pid=host \\ --rm \\ --shm-size=256m \\ -e DRIVERLESS_AI_ENABLED_FILE_SYSTEMS=\"file,gbq\" \\ -e GOOGLE_APPLICATION_CREDENTIALS=\"/service_account.json\" \\ -u `id -u`:`id -g` \\ -p 12345:12345 \\ -v `pwd`/data:/data \\ -v `pwd`/log:/log \\ -v `pwd`/license:/license \\ -v `pwd`/tmp:/tmp \\ -v `pwd`/service_account_json.json:/service_account_json.json \\ h2oai/dai-ubi8-x86_64:|tag| For more information on setting theGOOGLE_APPLICATION_CREDENTIALSenvironment variable, refer to the `official documentation on setting the environment variable `__.", + "prompt_type": "plain" + }, + { + "output": "For information on how to enable Workload Identity, refer to the `official documentation on enabling Workload Identity on a GKE cluster `__. .. note:: If Workload Identity is enabled, then theGOOGLE_APPLICATION_CREDENTIALSenvironment variable does not need to be set. Adding Datasets Using GBQ ------------------------- After Google BigQuery is enabled, you can add datasets by selecting **Google Big Query** from the **Add Dataset (or Drag and Drop)** drop-down menu. .. note:: To run a BigQuery query with Driverless AI, the associated service account must have the following Identity and Access Management (IAM) permissions: :: bigquery.jobs.create bigquery.tables.create bigquery.tables.delete bigquery.tables.export bigquery.tables.get bigquery.tables.getData bigquery.tables.list bigquery.tables.update bigquery.tables.updateData storage.buckets.get storage.objects.create storage.objects.delete storage.objects.list storage.objects.update For a list of all Identity and Access Management permissions, refer to the `IAM permissions reference `__ from the official Google Cloud documentation.", + "prompt_type": "plain" + }, + { + "output": "**Enter BQ Dataset ID with write access to create temporary table**: Enter a dataset ID in Google BigQuery that this user has read/write access to. BigQuery uses this dataset as the location for the new table generated by the query. .. **Note**: Driverless AI's connection to GBQ will inherit the top-level directory from the service JSON file. So if a dataset named \"my-dataset\" is in a top-level directory named \"dai-gbq\", then the value for the dataset ID input field would be \"my-dataset\" and not \"dai-gbq:my-dataset\". 2. **Enter Google Storage destination bucket**: Specify the name of Google Cloud Storage destination bucket. Note that the user must have write access to this bucket. 3. **Enter Name for Dataset to be saved as**: Specify a name for the dataset, for example,my_file. 4. **Enter BigQuery Query (Use StandardSQL)**: Enter a StandardSQL query that you want BigQuery to execute. For example:SELECT * FROM .. 5. (Optional) Specify a project to use with the GBQ connector.", + "prompt_type": "plain" + }, + { + "output": "Driverless AI Experiment Setup Wizard\nThe Driverless AI Experiment Setup Wizard makes it simple for you to set\nup a Driverless AI experiment and ensure that the experiment's settings\nare optimally configured for your specific use case. The Experiment\nSetup Wizard helps you learn about your data and lets you provide\ninformation about your use case that is used to determine the\nexperiment's settings. This Wizard covers topics such as data leakage,\nNLP handling, validation method, model reproducibility, and model\ndeployment. Notes:\n- This feature is currently in an experimental state. - A Dataset Join Wizard that makes it simple for you to join two\n datasets together is also available in Driverless AI. For more\n information, see join_dataset_wizard. The following sections describe how to access and use the Driverless AI\nWizard. - wizard-accessing\n- wizard-using\nAccessing the Driverless AI Wizard\nChoose one of the following methods to access the Driverless AI Wizard:\n- On the Datasets page, click the name of the dataset you want to use\n for the experiment and select Predict Wizard from the list of\n options.", + "prompt_type": "plain" + }, + { + "output": "If this method is used, then the Driverless AI Wizard\n prompts you to select a dataset to use for the experiment. []\nDriverless AI Wizard sample walkthrough\nThe following example walks through the Driverless AI Wizard. Note that\nthis walkthrough does not contain every possible step that the wizard\noffers. 1. Select the option that best describes your role and specify how many\n years of experience you have with machine learning and data science. In this example, the options Data Scientist and <1 year are\n selected. Click Continue to proceed. 2. Select a dataset. Select a tabular dataset with training data. Each\n row in the dataset must contain predictor variables (features) that\n can be used to predict the target column. In this example, the Rain\n in Australia dataset is selected. 3. Select a problem type and target column. Specify a problem type and\n a target column for that problem type. Note that you can select a\n target column for only one of the available problem types.", + "prompt_type": "plain" + }, + { + "output": "Click Continue to proceed. 4. Target column analysis. The Driverless AI Wizard provides\n information about the selected target column and prompts you to\n confirm that the target column looks as expected. Click Yes to\n proceed, or click No to return to the previous page and select a\n different column. 5. Exclude columns. The Driverless AI Wizard prompts you to check for\n columns to drop from the experiment. Dropped columns are not used as\n predictors for the target column. If you already know which\n column(s) you want to drop, then you can click the Yes, I want to\n have a look button to select the column(s) you want to drop. If you\n don't want to proceed without dropping any columns, click the No,\n don't drop any columns button. 6. Model deployment. The Driverless AI Wizard prompts you to specify\n how you plan to use the model. In this example, the I'm not ready\n for production option is selected. 7. Importance of time order. If your dataset contains at least one date\n or datetime column that doesn't contain missing values, the\n Driverless AI Wizard prompts you to specify how important time order\n is to the experiment.", + "prompt_type": "plain" + }, + { + "output": "8. Provide a test set. Specify a test set to use for the experiment. You can select an existing test set, create a test set from the\n training data, or skip this step entirely. To refresh the list of\n available datasets, click the Refresh dataset list button. In this\n example, the Create test set from training data option is selected. 9. Split the training data. Use the slider to specify what fraction of\n the training dataset you want to use for testing. The Driverless AI\n Wizard automatically suggests a percentage based on the size of your\n training dataset. In this example, 15 percent of the training\n dataset is used for testing. Click Split my training data to\n proceed. 10. Confirm the train / test split. The Driverless AI Wizard lists the\n following information for both the training and testing data based\n on the percentage specified in the preceding step:\n - The size of each dataset. - The number of rows and columns in each dataset. - Whether either dataset has any temporal order.", + "prompt_type": "plain" + }, + { + "output": "Select a model type. Specify a model type based on settings for\n Accuracy, Time, and Interpretability, as well as training time and\n deployment size. You can also optionally specify whether you have\n strict runtime limits or if you want to limit the complexity of the\n model. In this example, the Keep it simple option is selected. Click\n Continue to proceed. 12. Select a scorer. Specify a scorer to optimize. In this example, Area\n under ROC Curve (AUC) is selected. Click Continue to proceed. 13. Experiment parameters. The Driverless AI Wizard lists all of the\n experiment parameters that have been configured up until this point. From this page, you can specify a name for the experiment and begin\n training, show additional details about the experiment (Python code\n and Expert Settings), or cancel the experiment and restart from the\n beginning of the wizard. In this example, Start Training is\n selected. 14. The experiment now appears on the Experiments page in Driverless AI.", + "prompt_type": "plain" + }, + { + "output": "Dataset Join Wizard\nThe Driverless AI Dataset Join Wizard makes it simple for you to join\ntwo datasets together. This wizard performs a left (outer) join. Note\nthat the join key column name(s) must match between both datasets. To\nrename columns, or to prepare datasets more generally, go to Dataset\nDetails and select Modify by Recipe -> Live Code, or use data recipes. If a model is trained on the resulting dataset, make sure to also\nperform the same join on testing or production data. To access the Dataset Join Wizard, navigate to the Datasets page and\nclick on the name of the dataset you want to join with another dataset. A list of dataset-specific options is displayed. Select Join Wizard to\nopen the wizard. []\nWhen using the Join Datasets wizard, you can either specify a dataset to\njoin, or first specify the join key column(s) to use. Notes:\n- This feature is currently in an experimental state. - An Experiment Setup Wizard that makes it simple for you to set up an\n experiment is also available in Driverless AI.", + "prompt_type": "plain" + }, + { + "output": "Client Certificate Authentication Example\nThis section describes how to configure client certificate\nauthentication in Driverless AI. Client Certificate and SSL Configuration Options\nThe following options can be specified when configuring client\ncertificate authentication. SSL Configuration Options\nMutual TLS authentication (mTLS) must be enabled in order to enable\nClient Certificate Authentication. Use the following configuration\noptions to configure mTLS. Refer to the mTLS Authentication topic for\nmore information on how to enable mTLS. - ssl_client_verify_mode: Sets the client verification mode. Choose\n from the following verification modes:\n- ssl_ca_file: Specifies the path to the certification authority (CA)\n certificate file. This certificate will be used to verify the client\n certificate when client authentication is enabled. If this is not\n specified, clients are verified using the default system\n certificates. - ssl_client_key_file: Required if\n ssl_client_verify_mode = \"CERT_REQUIRED\".", + "prompt_type": "plain" + }, + { + "output": "- ssl_client_crt_file: Required if\n ssl_client_verify_mode = \"CERT_REQUIRED\". Specifies the HTTPS\n settings path to the client certificate that Driverless AI will use\n to authenticate itself. Client Certificate Options\n- auth_tls_crl_file: The path to the certificate revocation list (CRL)\n file that is used to verify the client certificate. - auth_tls_user_lookup: Specifies how a user's identity is obtained. Choose from the following:\n - REGEXP_ONLY: Uses auth_tls_subject_field and\n auth_tls_field_parse_regexp to extract the username from the\n client certificate. - LDAP_LOOKUP: Uses the LDAP server to obtain the username. (Refer to the ldap_authentication section for information\n about additional LDAP Authentication configuration options.) Used with LDAP_LOOKUP:\n- auth_tls_ldap_server: Specifies the LDAP server hostname or IP\n address. - auth_tls_ldap_port: Specifies the LDAP server port number. This is\n 389 by default.", + "prompt_type": "plain" + }, + { + "output": "- auth_tls_ldap_tls_file: Specifies the path to the SSL certificate. - auth_tls_ldap_bind_dn: Specifies the complete DN of the LDAP bind\n user. - auth_tls_ldap_bind_password: Specifies the password for the LDAP\n bind. - auth_tls_subject_field: The subject field that is used as a source\n for a username or other values that provide further validation. - auth_tls_field_parse_regexp: The regular expression that is used to\n parse the subject field in order to obtain the username or other\n values that provide further validation. - auth_tls_ldap_search_base: Specifies the location in the Directory\n Information Tree (DIT) where the search will start. - auth_tls_ldap_search_filter: Specifies an LDAP search filter that is\n used to find a specific user with LDAP_LOOKUP when using the\n tls_certificate authentication method. This can be dynamically built\n by using the named capturing groups from auth_tls_field_parse_regexp\n for substitution:\n auth_tls_field_parse_regexp = \"\\w+ (?P\\d+)\"\n auth_tls_ldap_search_filter = \"(&(objectClass=person)(id={{id}}))\n- auth_tls_ldap_username_attribute: Specifies the LDAP record\n attribute that is used as a username.", + "prompt_type": "plain" + }, + { + "output": "auth_tls_ldap_authorization_lookup_filteroption to determine whether individual users are members of thechemistsgroup in an LDAP schema where group (organizational unit) membership is defined within group entries. :: # Specify to use email as username auth_tls_ldap_username_attribute = \"mail\" # Specify search string auth_tls_ldap_search_filter = \"(&(objectClass=inetOrgPerson)(uid={{username}}))\" # Specify the base DN to start the search from auth_tls_ldap_authorization_search_base=\"dc=example,dc=com\" # Filter the results of the search to determine which users are members of a specific group auth_tls_ldap_authorization_lookup_filter = \"(&(objectClass=groupOfUniqueNames)(uniqueMember=uid={{uid}},dc=example,dc=com)(ou=chemists))\" Enabling Client Certificate Authentication ------------------------------------------ .. container:: tabs .. group-tab:: Docker Image Installs To enable Client Certificate authentication in Docker images, specify the authentication environment variable that you want to use.", + "prompt_type": "plain" + }, + { + "output": "The following example enables Client Certification authentication and usesLDAP_LOOKUPfor the TLS user lookup method. .. code:: bash nvidia-docker run \\ --pid=host \\ --rm \\ --shm-size=256m \\ -p 12345:12345 \\ -u `id -u`:`id -g` \\ -e DRIVERLESS_AI_ENABLED_FILE_SYSTEMS=\"file,s3,hdfs\" \\ -e DRIVERLESS_AI_ENABLE_HTTPS=\"true\" \\ -e DRIVERLESS_AI_SSL_KEY_FILE=\"/etc/pki/dai-server.key\" \\ -e DRIVERLESS_AI_SSL_CRT_FILE=\"/etc/pki/dai-server.crt\" \\ -e DRIVERLESS_AI_SSL_CA_FILE=\"/etc/pki/ca.crt\" \\ -e DRIVERLESS_AI_SSL_CLIENT_VERIFY_MODE=\"CERT_REQUIRED\" \\ -e DRIVERLESS_AI_SSL_CLIENT_KEY_FILE=\"/etc/pki/dai-self.key\" \\ -e DRIVERLESS_AI_SSL_CLIENT_CRT_FILE=\"/etc/pki/dai-self.cert\" \\ -e DRIVERLESS_AI_AUTHENTICATION_METHOD=\"tls_certificate\" \\ -e DRIVERLESS_AI_AUTH_TLS_SUBJECT_FIELD=\"CN\" \\ -e DRIVERLESS_AI_AUTH_TLS_CRL_FILE=\"/etc/pki/crl.pem\" \\ -e DRIVERLESS_AI_AUTH_TLS_FIELD_PARS_REGEXP=\"(?P.", + "prompt_type": "plain" + }, + { + "output": "Using a Custom Scorer\nDriverless AI supports a number of scorers, including:\n- Regression: GINI, MAE, MAPE, MER, MSE, R2, RMSE (default), RMSLE,\n RMSPE, SMAPE, TOPDECILE\n- Classification: ACCURACY, AUC (default), AUCPR, F05, F1, F2, GINI,\n LOGLOSS, MACROAUC, MCC\nThis example shows how you can include a custom scorer in your\nexperiment. This example will use the Explained Variance scorer, which\nis used for regression experiments. 1. Start an experiment in Driverless AI by selecting your training\n dataset along with (optionally) validation and testing datasets and\n then specifying a (regression) Target Column. 2. The scorer defaults to RMSE. Click on Expert Settings. 3. Specify the custom scorer recipe using one of the following methods:\n4. In the Experiment Summary page, select the new Explained Variance\n (EXPVAR) scorer. (Note: If you do not see the EXPVAR option, return\n to the Expert Settings, select Recipes > Include Specific Scorers,\n then click the Enable Custom button in the top right corner.", + "prompt_type": "plain" + }, + { + "output": "Linux RPMs\nFor Linux machines that will not use the Docker image or DEB, an RPM\ninstallation is available for the following environments:\n- x86_64 RHEL 7 / RHEL 8\n- CentOS 7 / CentOS 8\nThe installation steps assume that you have a license key for Driverless\nAI. For information on how to obtain a license key for Driverless AI,\nvisit https://www.h2o.ai/products/h2o-driverless-ai/. Once obtained, you\nwill be prompted to paste the license key into the Driverless AI UI when\nyou first log in, or you can save it as a .sig file and place it in the\nlicense folder that you will create during the installation process. Note\n- To ensure that AutoDoc pipeline visualizations are generated\ncorrectly on native installations, installing fontconfig is recommended. - When using systemd, remove the dai-minio, dai-h2o, dai-redis,\n dai-procsy, and dai-vis-server services. When upgrading, you can use\n the following commands to deactivate these services:\n systemctl stop dai-minio\n systemctl disable dai-minio\n systemctl stop dai-h2o\n systemctl disable dai-h2o\n systemctl stop dai-redis\n systemctl disable dai-redis\n systemctl stop dai-procsy\n systemctl disable dai-procsy\n systemctl stop dai-vis-server\n systemctl disable dai-vis-server\nEnvironment\n -----------------------------------\n Operating System Min Mem\n ------------------------- ---------\n RHEL with GPUs 64 GB\n RHEL with CPUs 64 GB\n CentOS with GPUS 64 GB\n CentOS with CPUs 64 GB\n -----------------------------------\nRequirements\n- RedHat 7/RedHat 8/CentOS 7/CentOS 8\n- NVIDIA drivers >= recommended (GPU only).", + "prompt_type": "plain" + }, + { + "output": "About the Install\n- The 'dai' service user is created locally (in /etc/passwd) if it is\n not found by 'getent passwd'. You can override the user by providing\n the DAI_USER environment variable during rpm or dpkg installation. - The 'dai' service group is created locally (in /etc/group) if it is\n not found by 'getent group'. You can override the group by providing\n the DAI_GROUP environment variable during rpm or dpkg installation. - Configuration files are placed in /etc/dai and owned by the 'root'\n user:\n - /etc/dai/config.toml: Driverless AI config file (See config_file\n section for details). - /etc/dai/User.conf: systemd config file specifying the service\n user. - /etc/dai/Group.conf: systemd config file specifying the service\n group. - /etc/dai/EnvironmentFile.conf: systemd config file specifying\n (optional) environment variable overrides. - Software files are placed in /opt/h2oai/dai and owned by the 'root'\n user\n- The following directories are owned by the service user so that they\n can be updated by the running software:\n - /opt/h2oai/dai/home: The application's home directory (license\n key files are stored here).", + "prompt_type": "plain" + }, + { + "output": "- /opt/h2oai/dai/log: Log files go here if you are not using\n systemd (if you are using systemd, then the use the standard\n journalctl tool). - By default, for Docker or DEB/RPM installs, Driverless AI looks for\n a license key in /opt/h2oai/dai/home/.driverlessai/license.sig. If\n you are installing Driverless AI programmatically, you can copy a\n license key file to that location. For TAR SH installs, the\n equivalent location is /home/.driverlessai, and after\n the license is imported, it is copied under ~/.driverlessai. If no\n license key is found, the application guides you through the process\n of adding one through the UI. - systemd unit files are placed in /usr/lib/systemd/system. - Symbolic links to the configuration files in /etc/dai files are\n placed in /etc/systemd/system. If your environment is running an operational systemd, that is the\npreferred way to manage Driverless AI. The package installs the\nfollowing systemd services and a wrapper service:\n- dai: Wrapper service that starts/stops the other three services.", + "prompt_type": "plain" + }, + { + "output": "- dai-h2o: H2O-3 helper process used by Driverless AI. - dai-procsy: Procsy helper process used by Driverless AI. - dai-vis-server: Visualization server helper process used by\n Driverless AI. If you don't have systemd, refer to linux-tarsh for install\ninstructions. Installing OpenCL\nOpenCL is required for full LightGBM support on GPU-powered systems. To\ninstall OpenCL, run the following as root:\n mkdir -p /etc/OpenCL/vendors && echo \"libnvidia-opencl.so.1\" > /etc/OpenCL/vendors/nvidia.icd && chmod a+r /etc/OpenCL/vendors/nvidia.icd && chmod a+x /etc/OpenCL/vendors/ && chmod a+x /etc/OpenCL\nNote\nIf OpenCL is not installed, then CUDA LightGBM is automatically used. CUDA LightGBM is only supported on Pascal-powered (and later) systems,\nand can be enabled manually with the enable_lightgbm_cuda_support\nconfig.toml setting. Installing Driverless AI\nRun the following commands to install the Driverless AI RPM. # Install Driverless AI. sudo rpm -i |VERSION-rpm-lin|\nNote: For RHEL 7.5, it is necessary to upgrade library glib2:\n sudo yum upgrade glib2\nBy default, the Driverless AI processes are owned by the 'dai' user and\n'dai' group.", + "prompt_type": "plain" + }, + { + "output": "Replace and as appropriate. # Temporarily specify service user and group when installing Driverless AI. # rpm saves these for systemd in the /etc/dai/User.conf and /etc/dai/Group.conf files. sudo DAI_USER=myuser DAI_GROUP=mygroup rpm -i |VERSION-rpm-lin|\nYou may now optionally make changes to /etc/dai/config.toml. Starting Driverless AI\nIf you have systemd (preferred):\n # Start Driverless AI. sudo systemctl start dai\nIf you do not have systemd:\n # Start Driverless AI. sudo -H -u dai /opt/h2oai/dai/run-dai.sh\nStarting NVIDIA Persistence Mode\nIf you have NVIDIA GPUs, you must run the following NVIDIA command. This\ncommand needs to be run every reboot. For more information:\nhttp://docs.nvidia.com/deploy/driver-persistence/index.html. sudo nvidia-smi -pm 1\nLooking at Driverless AI log files\nIf you have systemd (preferred):\n sudo systemctl status dai-dai\n sudo journalctl -u dai-dai\nIf you do not have systemd:\n sudo less /opt/h2oai/dai/log/dai.log\n sudo less /opt/h2oai/dai/log/h2o.log\n sudo less /opt/h2oai/dai/log/procsy.log\n sudo less /opt/h2oai/dai/log/vis-server.log\nStopping Driverless AI\nIf you have systemd (preferred):\n # Stop Driverless AI.", + "prompt_type": "plain" + }, + { + "output": "Verify. sudo ps -u dai\nIf you do not have systemd:\n # Stop Driverless AI. sudo pkill -U dai\n # The processes should now be stopped. Verify. sudo ps -u dai\nUpgrading Driverless AI\nWARNINGS:\n- This release deprecates experiments and MLI models from 1.7.0 and\n earlier. - Experiments, MLIs, and MOJOs reside in the Driverless AI tmp\n directory and are not automatically upgraded when Driverless AI is\n upgraded. We recommend you take the following steps before\n upgrading. - Build MLI models before upgrading. - Build MOJO pipelines before upgrading. - Stop Driverless AI and make a backup of your Driverless AI tmp\n directory before upgrading. The upgrade process inherits the service user and group from\n/etc/dai/User.conf and /etc/dai/Group.conf. You do not need to manually\nspecify the DAI_USER or DAI_GROUP environment variables during an\nupgrade. Requirements\nWe recommend to have NVIDIA driver >= installed (GPU only) in your host\nenvironment for a seamless experience on all architectures, including\nAmpere.", + "prompt_type": "plain" + }, + { + "output": "Go to NVIDIA download driver to get the latest NVIDIA Tesla A/T/V/P/K\nseries drivers. For reference on CUDA Toolkit and Minimum Required\nDriver Versions and CUDA Toolkit and Corresponding Driver Versions, see\nhere . Note\nIf you are using K80 GPUs, the minimum required NVIDIA driver version is\n450.80.02. Upgrade Steps\nIf you have systemd (preferred):\n # Stop Driverless AI. sudo systemctl stop dai\n # The processes should now be stopped. Verify. sudo ps -u dai\n # Make a backup of /opt/h2oai/dai/tmp directory at this time. # Upgrade and restart. sudo rpm -U |VERSION-rpm-lin|\n sudo systemctl daemon-reload\n sudo systemctl start dai\nIf you do not have systemd:\n # Stop Driverless AI. sudo pkill -U dai\n # The processes should now be stopped. Verify. sudo ps -u dai\n # Make a backup of /opt/h2oai/dai/tmp directory at this time. # Upgrade and restart. sudo rpm -U |VERSION-rpm-lin|\n sudo -H -u dai /opt/h2oai/dai/run-dai.sh\nUninstalling Driverless AI\nIf you have systemd (preferred):\n # Stop Driverless AI.", + "prompt_type": "plain" + }, + { + "output": "Imbalanced modeling in Driverless AI\nThis page describes Driverless AI's imbalanced modeling capabilities. - imbalanced_modeling_overview\n- imbalanced_algorithms_enabling\nOverview\nDriverless AI offers imbalanced algorithms for use cases where there is\na binary, imbalanced target. These algorithms are enabled by default if\nthe target column is considered imbalanced. While they are enabled,\nDriverless AI may decide to not use them in the final model to avoid\npoor performance. Note\nWhile Driverless AI does try imbalanced algorithms by default, they have\nnot generally been found to improve model performance. Note that using\nimbalanced algorithms also results in a significantly larger final\nmodel, because multiple models are combined with different balancing\nratios. Imbalanced algorithms\nDriverless AI provides two types of imbalanced algorithms:\nImbalancedXGBoost and ImbalancedLightGBM. These imbalanced algorithms\ntrain an XGBoost or LightGBM model multiple times on different samples\nof data and then combine the predictions of these models together.", + "prompt_type": "plain" + }, + { + "output": "(By trying multiple ratios, DAI is more likely to come up with a\nrobust model.) Note\n- When your experiment is complete, you can find more details about what\nbagging was performed in the experiment AutoDoc . For a sample\nAutoDoc, view the blog post on this topic. - For more information on imbalanced modeling sampling methods, see\n imbalanced-sampling. Enabling imbalanced algorithms\nThe following steps describe how to enable only imbalanced algorithms:\n1. On the Experiment Setup page, click Expert Settings. 2. In the Expert Settings window, click on the Training -> Models\n subtab. 3. For the Include specific models setting, click the Select Values\n button. 4. On the Selected Included Models page, click Uncheck All, and then\n select only the imbalanced algorithms: ImbalancedXGBoost and\n ImbalancedLightGBM. Click Done to confirm your selection. 5. In the Expert Settings window, click the Save button. Additional tips\nThis section describes additional tips you can make use of when enabling\nimbalanced algorithms.", + "prompt_type": "plain" + }, + { + "output": "Custom Individual Recipe\nThe following sections describe Driverless AI's Individual Recipe\nfeature. - individual-recipe-understanding\n- individual-recipe-getting\n- individual-recipe-using\n- individual-recipe-including\n- individual-recipe-example\nUnderstanding the Individual Recipe\nIn Driverless AI, every completed experiment automatically generates\nPython code for the experiment that corresponds to the individual(s)\nused to build the final model. You can edit this auto-generated Python\ncode offline and upload it as a recipe, or edit and save it using the\nbuilt-in custom recipe management editor . This feature\ngives you code-first access to a significant portion of DAI's internal\ntransformer and model generation process. The Individual Recipe contains information about model type, model\nhyperparameters, data science types for input features, transformers\nused, and transformer parameters. It is an object that is evolved by\nmutation within the context of DAI's genetic algorithm .", + "prompt_type": "plain" + }, + { + "output": "This feature is supported for experiments made using DAI 1.7.2 and\nlater. Using custom individuals\nA custom individual can be run as is, evolved alongside other models or\nindividuals, or frozen to be included as is during the final evolution\nstage alongside other models from the experiment. - As is: To ensemble the custom individuals as they are, set\n enable_genetic_algorithm to off. Note\n that to get reproducible results, set reproducibility to on and make\n sure that the same accuracy knob settings are selected (as accuracy\n settings affects the internal cross validation fold data\n assignment). - Evolve alongside other models or individuals: This is the default\n behavior where a custom individual behaves like a standard internal\n DAI individual, which has its features and model hyperparameters\n mutated during the genetic algorithm process as per the\n experiment settings. - Frozen individuals: By default, a custom individual behaves like a\n standard internal DAI individual, which has its features and model\n hyperparameters mutated during evolution.", + "prompt_type": "plain" + }, + { + "output": "You can specify the number of such\n individuals to be included in an ensemble along with any other, by\n modifying the\n Ensemble Level for Final Modeling Pipeline \n expert setting. Getting the Individual Recipe from experiments\nIn Driverless AI, every experiment automatically generates editable\npython code for the best individuals (or models). The following sections\ndescribe how to get the Individual Recipe code for a completed\nexperiment. - From a completed experiment: From a completed experiment page, click\n Tune Experiment > Create Individual Recipe, then select Upload as\n Custom Recipe. When this option is selected, the Individual Recipe\n becomes available on the Recipes page and in the Expert Settings\n under the Include specific individuals setting. You can also select\n Download to download the Individual Recipe Python file directly to\n your local file system. You can then add the downloaded Individual\n Recipe to DAI by clicking Recipes in the main navigation, then\n clicking Add Custom Recipes > From Computer.", + "prompt_type": "plain" + }, + { + "output": "- From the Downloaded Summary: The Individual Recipe Python file is\n included as part of the summary file for every completed experiment. To download the summary file, click the Download Summary & Logs\n button of any completed experiment. The individual recipe filename\n is final_indiv0.py. Using the Individual Recipe\nThis section describes how you can use the Individual Recipe to view\ndetailed information about how the final model was built and make\nfine-tuned adjustments to the model by editing the auto-generated Python\ncode and using the edited Individual Recipe in a new experiment. - individual-recipe-transparency\n- individual-recipe-model-control\n- individual-recipe-feature-control\nModel Transparency\nThe following functions in the Individual Recipe provide significant\ntransparency for the final model:\n- The set_model function lets you view various details about the final\n model such as model type and the model's parameters. - The set_genes function lets you view each feature that is in the\n model and information about how each feature was transformed.", + "prompt_type": "plain" + }, + { + "output": "You can make minor modifications to these\nparameters by editing the self.model_params dictionary. This can be\nhelpful if you want to see whether minor changes to the parameters\nresult in more robust or accurate models or if you are required to\nchange the model parameters for business or regulatory purposes. Feature Control\nEach feature used in the model is listed in the set_genes function,\nbeginning with features that were not engineered and followed by\nengineered features. The following examples show original and\ntransformed features as they appear in the auto-generated Python code. Original features\nThe following example provides details on an original feature called\nHumidity3pm. Note\nOriginal features are labeled with the value OriginalTransformer in the\nadd_transformer() field. # Gene Normalized Importance: 1\n # Transformed Feature Names and Importances: {'3_Humidity3pm': 1.0}\n # Valid parameters: ['num_cols', 'random_state', 'output_features_to_drop', 'labels']\n params = {'num_cols': ['Humidity3pm'], 'random_state': 997149340}\n self.add_transformer('OriginalTransformer', col_type='numeric', gene_index=3, forced=False, mono=False, **params)\nEngineered features\nIn the following example, the Cross Validation Target Encoding\ntransformer was applied to the WindDir3pm column.", + "prompt_type": "plain" + }, + { + "output": "The following sections describe how to perform these actions\nusing the Individual Recipe. Adding features\nDuring the experiment, Driverless AI uses a Genetic Algorithm to\ndetermine which features to drop from the model. However, your use case\nmay require you to force a column to be used by the model. The following\nsteps describe how to force in a numeric column that was dropped by\nDriverless AI:\n1. Copy an OriginalTransformer feature that is already in the code and\n paste it below. 2. Specify the column you want to force in with the num_cols field. In\n the example below, Driverless AI dropped YearsSinceLastPromotion, so\n an OriginalTransformer example that was already present was copied\n and the value for num_cols was edited. 3. To ensure that the model uses the feature, set forced=True. 4. Change the gene_index to a value that is not used . The following is an example of how the final code appears:\n params = {'num_cols': ['YearsSinceLastPromotion'], 'random_state': 730763716}\n self.add_transformer('OriginalTransformer', col_type='numeric', gene_index=100, forced=True, mono=False, **params)\nDeleting features\nThe Experiment Setup page contains a dropped_columns setting that lets\nyou drop columns from an experiment so that they are not used by any\nmodel.", + "prompt_type": "plain" + }, + { + "output": "In this scenario, you can delete the unwanted feature from the\nIndividual Recipe code. Modifying features\nDriverless AI automatically creates engineered features that have a list\nof editable parameters that are specific to the transformer. Because\nthese are internal parameters, contacting support@h2o.ai is recommended\nwhen modifying these parameters. The following are two common use cases for modifying specific features\nin the Individual Recipe code:\n- Forcing features into a model: To force in a specific feature and\n ensure that it is not pruned, set forced=True. - Enforcing monotonicity: To enforce monotonicity for a specific\n feature, set mono=True. Using the edited Individual Recipe in a new experiment\nThe following steps describe how to use an edited Individual Recipe in a\nnew experiment from the built-in\ncustom recipe management editor . 1. On the Custom Recipes page, click the Individual Recipe you want to\n edit. 2. Use the built-in recipe editor to make changes to the Individual\n Recipe.", + "prompt_type": "plain" + }, + { + "output": "Click Save as New Recipe and Activate. 4. Click More Actions > Use in New Experiment. Including specific individuals in an experiment\nThe downloaded individual recipe (zip or Python file) can be directly\nuploaded from the computer via the expert settings when creating a new\nexperiment. You can also perform the following steps to include an Individual Recipe\nthat has already been uploaded by using the\nInclude specific individuals expert setting. 1. On the Experiment Setup page, click Expert Settings. The Expert\n Settings window is displayed. 2. Click the Recipes tab, then click Select Values for the Include\n specific individuals expert setting. 3. Select the custom individuals you want to include in the experiment,\n then click Done. 4. In the Expert Settings window, click Save. The experiment preview\n updates to reflect the inclusion of the selected custom individuals. Individual Recipe Example\nThis section contains a list of minimum required parameters for a custom\nIndividual Recipe, as well as an example of a custom Individual Recipe\nusing the Credit Card dataset.", + "prompt_type": "plain" + }, + { + "output": "Minimum required parameters\nThe following is a list of the minimum required parameters for a custom\nIndividual Recipe:\n- Model type: Specify the model type. For example:\n- Model parameters: Specify the parameters of the model. For example:\n- Genome: Specify all valid parameters for genes. For example:\nSample Individual Recipe\nThe following is an example of a custom Individual Recipe using the\nCredit Card dataset. Note\nThe following example does not contain all available parameters for\ncustom Individual Recipes. For an example Individual Recipe that\nfeatures all available parameters, see creditcard.py from the official\nDriverless AI recipes GitHub repository. from h2oaicore.ga import CustomIndividual\n # Custom wrapper class used to construct the DAI Individual. # Contains information related to model type, model parameters,\n # feature types, and feature parameters. class IndivCCsimple(CustomIndividual):\n # Function to set the model type and its parameters.", + "prompt_type": "plain" + }, + { + "output": "Security\n\nsecurity config-security", + "prompt_type": "plain" + }, + { + "output": "PAM Authentication Example\nThe following sections describe how to enable Pluggable Authentication\nModules (PAM) in Driverless AI. You can do this by specifying\nenvironment variables in the Docker image or by updating the config.toml\nfile. Note: This assumes that the user has an understanding of how to grant\npermissions in their own environment in order for PAM to work. Specifically for Driverless AI, be sure that the Driverless AI processes\nowner has access to /etc/shadow (without root); otherwise authentication\nwill fail. Docker Image Installs\nNote: The following instructions are only applicable with a CentOS 7\nhost. In this example, the host Linux system has PAM enabled for\nauthentication and Docker running on that Linux system. The goal is to\nenable PAM for Driverless AI authentication while the Linux system hosts\nthe user information. 1. Verify that the username (\"eric\" in this case) is defined in the\n Linux system. 2. Start Docker on the Linux Server and enable PAM in Driverless AI.", + "prompt_type": "plain" + }, + { + "output": "Obtain the Driverless AI container ID. This ID is required for the\n next step and will be different every time Driverless AI is started. 4. From the Linux Server, verify that the Docker Driverless AI instance\n can see the shadow file. The example below references 8e333475ffd8,\n which is the container ID obtained in the previous step. 5. Open a Web browser and navigate to port 12345 on the Linux system\n that is running the Driverless AI Docker Image. Log in with\n credentials known to the Linux system. The login information will\n now be validated using PAM. Native Installs\nIn this example, the host Linux system has PAM enabled for\nauthentication. The goal is to enable PAM for Driverless AI\nauthentication while the Linux system hosts the user information. This example shows how to edit the config.toml file to enable PAM. The\nconfig.toml file is available in the etc/dai folder after the RPM or DEB\nis installed. Edit the authentication_method variable in this file to\nenable PAM authentication, and then restart Driverless AI.", + "prompt_type": "plain" + }, + { + "output": "time_series_recipe``\nTime-Series Lag-Based Recipe\nThis recipe specifies whether to include Time Series lag features when\ntraining a model with a provided (or autodetected) time column. This is\nenabled by default. Lag features are the primary automatically generated\ntime series features and represent a variable's past values. At a given\nsample with time stamp t, features at some time difference T (lag) in\nthe past are considered. For example, if the sales today are 300, and\nsales of yesterday are 250, then the lag of one day for sales is 250. Lags can be created on any feature as well as on the target. Lagging\nvariables are important in time series because knowing what happened in\ndifferent time periods in the past can greatly facilitate predictions\nfor the future. Note: Ensembling is disabled when the lag-based recipe\nwith time columns is activated because it only supports a single final\nmodel. Ensembling is also disabled if a time column is selected or if\ntime column is set to [Auto] on the experiment setup screen.", + "prompt_type": "plain" + }, + { + "output": "time_series_leaderboard_mode-------------------------------- .. container:: dropdown **Control the automatic time-series leaderboard mode** Select from the following options: - 'diverse': explore a diverse set of models built using various expert settings. Note that it's possible to rerun another such diverse leaderboard on top of the best-performing model(s), which will effectively help you compose these expert settings. - 'sliding_window': If the forecast horizon is N periods, create a separate model for \"each of the (gap, horizon) pairs of (0,n), (n,n), (2*n,n), ..., (2*N-1, n) in units of time periods. The number of periods to predict per model n is controlled by the expert settingtime_series_leaderboard_periods_per_model``, which defaults\n\n to 1. This can help to improve short-term forecasting quality.", + "prompt_type": "plain" + }, + { + "output": "time_series_leaderboard_periods_per_model--------------------------------------------- .. container:: dropdown **Number of periods per model if time_series_leaderboard_mode is 'sliding_window'** Specify the number of periods per model iftime_series_leaderboard_modeis set tosliding_window``. Larger\n\n values lead to fewer models.", + "prompt_type": "plain" + }, + { + "output": "time_series_merge_splits``\n\nLarger Validation Splits for Lag-Based Recipe\n\nSpecify whether to create larger validation splits that are not bound to\nthe length of the forecast horizon. This can help to prevent overfitting\non small data or short forecast horizons. This is enabled by default.", + "prompt_type": "plain" + }, + { + "output": "merge_splits_max_valid_ratio``\n\nMaximum Ratio of Training Data Samples Used for Validation\n\nSpecify the maximum ratio of training data samples used for validation\nacross splits when larger validation splits are created (see\ntime_series_merge_splits setting). The default value (-1) will set the\nratio automatically depending on the total amount of validation splits.", + "prompt_type": "plain" + }, + { + "output": "fixed_size_splits``\n\nFixed-Size Train Timespan Across Splits\n\nSpecify whether to keep a fixed-size train timespan across time-based\nsplits during internal validation. That leads to roughly the same amount\nof train samples in every split. This is disabled by default.", + "prompt_type": "plain" + }, + { + "output": "time_series_validation_fold_split_datetime_boundaries``\n\nCustom Validation Splits for Time-Series Experiments\n\nSpecify date or datetime timestamps (in the same format as the time\ncolumn) to use for custom training and validation splits.", + "prompt_type": "plain" + }, + { + "output": "timeseries_split_suggestion_timeout``\n\nTimeout in Seconds for Time-Series Properties Detection in UI\n\nSpecify the timeout in seconds for time-series properties detection in\nDriverless AI's user interface. This value defaults to 30.", + "prompt_type": "plain" + }, + { + "output": "holiday_features``\n\nGenerate Holiday Features\n\nFor time-series experiments, specify whether to generate holiday\nfeatures for the experiment. This is enabled by default.", + "prompt_type": "plain" + }, + { + "output": "holiday_countries``\n\nCountry code(s) for holiday features\n\nSpecify country codes in the form of a list that is used to look up\nholidays.\n\nNote: This setting is for migration purposes only.", + "prompt_type": "plain" + }, + { + "output": "override_lag_sizes``\n\nTime-Series Lags Override\n\nSpecify the override lags to be used. The lag values provided here are\nthe only set of lags to be explored in the experiment. The following\nexamples show the variety of different methods that can be used to\nspecify override lags:\n\n- \"[0]\" disable lags\n- \"[7, 14, 21]\" specifies this exact list\n- \"21\" specifies every value from 1 to 21\n- \"21:3\" specifies every value from 1 to 21 in steps of 3\n- \"5-21\" specifies every value from 5 to 21\n- \"5-21:3\" specifies every value from 5 to 21 in steps of 3", + "prompt_type": "plain" + }, + { + "output": "override_ufapt_lag_sizes``\n\nLags Override for Features That are not Known Ahead of Time\n\nSpecify lags override for non-target features that are not known ahead\nof time.\n\n- \"[0]\" disable lags\n- \"[7, 14, 21]\" specifies this exact list\n- \"21\" specifies every value from 1 to 21\n- \"21:3\" specifies every value from 1 to 21 in steps of 3\n- \"5-21\" specifies every value from 5 to 21\n- \"5-21:3\" specifies every value from 5 to 21 in steps of 3", + "prompt_type": "plain" + }, + { + "output": "override_non_ufapt_lag_sizes``\n\nLags Override for Features That are Known Ahead of Time\n\nSpecify lags override for non-target features that are known ahead of\ntime.\n\n- \"[0]\" disable lags\n- \"[7, 14, 21]\" specifies this exact list\n- \"21\" specifies every value from 1 to 21\n- \"21:3\" specifies every value from 1 to 21 in steps of 3\n- \"5-21\" specifies every value from 5 to 21\n- \"5-21:3\" specifies every value from 5 to 21 in steps of 3", + "prompt_type": "plain" + }, + { + "output": "min_lag_size``\n\nSmallest Considered Lag Size\n\nSpecify a minimum considered lag size. This value defaults to -1.", + "prompt_type": "plain" + }, + { + "output": "allow_time_column_as_feature``\n\nEnable Feature Engineering from Time Column\n\nSpecify whether to enable feature engineering based on the selected time\ncolumn, e.g. Date~weekday. This is enabled by default.", + "prompt_type": "plain" + }, + { + "output": "allow_time_column_as_numeric_feature``\n\nAllow Integer Time Column as Numeric Feature\n\nSpecify whether to enable feature engineering from an integer time\ncolumn. Note that if you are using a time series recipe, using a time\ncolumn (numeric time stamps) as an input feature can lead to a model\nthat memorizes the actual timestamps instead of features that generalize\nto the future. This is disabled by default.", + "prompt_type": "plain" + }, + { + "output": "datetime_funcs------------------ .. container:: dropdown **Allowed Date and Date-Time Transformations** Specify the date or date-time transformations to allow Driverless AI to use. Choose from the following transformers: - year - quarter - month - week - weekday - day - dayofyear - num (direct numeric value representing the floating point value of time, disabled by default) - hour - minute - second Features in Driverless AI will appear asgetfollowed by the name of the transformation. Note thatget_num`` can lead to\n\n overfitting if used on IID problems and is disabled by default.", + "prompt_type": "plain" + }, + { + "output": "filter_datetime_funcs``\n\nAuto Filtering of Date and Date-Time Transformations\n\nWhether to automatically filter out date and date-time transformations\nthat would lead to unseen values in the future. This is enabled by\ndefault.", + "prompt_type": "plain" + }, + { + "output": "allow_tgc_as_features``\n\nConsider Time Groups Columns as Standalone Features\n\nSpecify whether to consider time groups columns as standalone features.\nThis is disabled by default.", + "prompt_type": "plain" + }, + { + "output": "allowed_coltypes_for_tgc_as_features``\n\nWhich TGC Feature Types to Consider as Standalone Features\n\nSpecify whether to consider time groups columns (TGC) as standalone\nfeatures. If \"Consider time groups columns as standalone features\" is\nenabled, then specify which TGC feature types to consider as standalone\nfeatures. Available types are numeric, categorical, ohe_categorical,\ndatetime, date, and text. All types are selected by default. Note that\n\"time_column\" is treated separately via the \"Enable Feature Engineering\nfrom Time Column\" option. Also note that if \"Time Series Lag-Based\nRecipe\" is disabled, then all time group columns are allowed features.", + "prompt_type": "plain" + }, + { + "output": "enable_time_unaware_transformers``\n\nEnable Time Unaware Transformers\n\nSpecify whether various transformers (clustering, truncated SVD) are\nenabled, which otherwise would be disabled for time series experiments\ndue to the potential to overfit by leaking across time within the fit of\neach fold. This is set to Auto by default.", + "prompt_type": "plain" + }, + { + "output": "tgc_only_use_all_groups``\n\nAlways Group by All Time Groups Columns for Creating Lag Features\n\nSpecify whether to group by all time groups columns for creating lag\nfeatures, instead of sampling from them. This is enabled by default.", + "prompt_type": "plain" + }, + { + "output": "tgc_allow_target_encoding----------------------------- .. container:: dropdown **Allow Target Encoding of Time Groups Columns** Specify whether it is allowed to target encode the time groups columns. This is disabled by default. **Notes**: - This setting is not affected byallow_tgc_as_features. - Subgroups can be encoded by disablingtgc_only_use_all_groups``.", + "prompt_type": "plain" + }, + { + "output": "time_series_holdout_preds``\n\nGenerate Time-Series Holdout Predictions\n\nSpecify whether to create diagnostic holdout predictions on training\ndata using moving windows. This is enabled by default. This can be\nuseful for MLI, but it will slow down the experiment considerably when\nenabled. Note that the model itself remains unchanged when this setting\nis enabled.", + "prompt_type": "plain" + }, + { + "output": "time_series_validation_splits``\n\nNumber of Time-Based Splits for Internal Model Validation\n\nSpecify a fixed number of time-based splits for internal model\nvalidation. Note that the actual number of allowed splits can be less\nthan the specified value, and that the number of allowed splits is\ndetermined at the time an experiment is run. This value defaults to -1\n(auto).", + "prompt_type": "plain" + }, + { + "output": "time_series_splits_max_overlap``\n\nMaximum Overlap Between Two Time-Based Splits\n\nSpecify the maximum overlap between two time-based splits. The amount of\npossible splits increases with higher values. This value defaults to\n0.5.", + "prompt_type": "plain" + }, + { + "output": "time_series_max_holdout_splits---------------------------------- .. container:: dropdown **Maximum Number of Splits Used for Creating Final Time-Series Model's Holdout Predictions** Specify the maximum number of splits used for creating the final time-series Model's holdout predictions. The default value (-1) will use the same number of splits that are used during model validation. Usetime_series_validation_splits`` to control amount of time-based\n\n splits used for model validation.", + "prompt_type": "plain" + }, + { + "output": "mli_ts_fast_approx``\n\nWhether to Speed up Calculation of Time-Series Holdout Predictions\n\nSpecify whether to speed up time-series holdout predictions for\nback-testing on training data. This setting is used for MLI and\ncalculating metrics. Note that predictions can be slightly less accurate\nwhen this setting is enabled. This is disabled by default.", + "prompt_type": "plain" + }, + { + "output": "mli_ts_fast_approx_contribs``\n\nWhether to Speed up Calculation of Shapley Values for Time-Series\nHoldout Predictions\n\nSpecify whether to speed up Shapley values for time-series holdout\npredictions for back-testing on training data. This setting is used for\nMLI. Note that predictions can be slightly less accurate when this\nsetting is enabled. This is enabled by default.", + "prompt_type": "plain" + }, + { + "output": "mli_ts_holdout_contribs``\n\nGenerate Shapley Values for Time-Series Holdout Predictions at the Time\nof Experiment\n\nSpecify whether to enable the creation of Shapley values for holdout\npredictions on training data using moving windows at the time of the\nexperiment. This can be useful for MLI, but it can slow down the\nexperiment when enabled. If this setting is disabled, MLI will generate\nShapley values on demand. This is enabled by default.", + "prompt_type": "plain" + }, + { + "output": "time_series_min_interpretability``\n\nLower Limit on Interpretability Setting for Time-Series Experiments\n(Implicitly Enforced)\n\nSpecify the lower limit on interpretability setting for time-series\nexperiments. Values of 5 (default) or more can improve generalization by\nmore aggressively dropping the least important features. To disable this\nsetting, set this value to 1.", + "prompt_type": "plain" + }, + { + "output": "lags_dropout``\n\nDropout Mode for Lag Features\n\nSpecify the dropout mode for lag features in order to achieve an equal\nn.a. ratio between train and validation/tests. Independent mode performs\na simple feature-wise dropout. Dependent mode takes the lag-size\ndependencies per sample/row into account. Dependent is enabled by\ndefault.", + "prompt_type": "plain" + }, + { + "output": "prob_lag_non_targets``\n\nProbability to Create Non-Target Lag Features\n\nLags can be created on any feature as well as on the target. Specify a\nprobability value for creating non-target lag features. This value\ndefaults to 0.1.", + "prompt_type": "plain" + }, + { + "output": "rolling_test_method``\n\nMethod to Create Rolling Test Set Predictions\n\nSpecify the method used to create rolling test set predictions. Choose\nbetween test time augmentation (TTA) and a successive refitting of the\nfinal pipeline (Refit). TTA is enabled by default.\n\nNotes:\n\n- This setting only applies to the test set that is provided by the\n user during an experiment.\n- This setting only has an effect if the provided test set spans more\n periods than the forecast horizon and if the target values of the\n test set are known.", + "prompt_type": "plain" + }, + { + "output": "fast_tta_internal``\n\nFast TTA for Internal Validation\n\nSpecify whether the genetic algorithm applies Test Time Augmentation\n(TTA) in one pass instead of using rolling windows for validation splits\nlonger than the forecast horizon. This is enabled by default.", + "prompt_type": "plain" + }, + { + "output": "prob_default_lags``\n\nProbability for New Time-Series Transformers to Use Default Lags\n\nSpecify the probability for new lags or the EWMA gene to use default\nlags. This is determined independently of the data by frequency, gap,\nand horizon. This value defaults to 0.2.", + "prompt_type": "plain" + }, + { + "output": "prob_lagsinteraction``\n\nProbability of Exploring Interaction-Based Lag Transformers\n\nSpecify the unnormalized probability of choosing other lag time-series\ntransformers based on interactions. This value defaults to 0.2.", + "prompt_type": "plain" + }, + { + "output": "prob_lagsaggregates``\n\nProbability of Exploring Aggregation-Based Lag Transformers\n\nSpecify the unnormalized probability of choosing other lag time-series\ntransformers based on aggregations. This value defaults to 0.2.", + "prompt_type": "plain" + }, + { + "output": "ts_target_trafo``\nTime Series Centering or Detrending Transformation\nSpecify whether to use centering or detrending transformation for time\nseries experiments. Select from the following:\n- None (Default)\n- Centering (Fast)\n- Centering (Robust)\n- Linear (Fast)\n- Linear (Robust)\n- Logistic\n- Epidemic (Uses the SEIRD model)\nThe fitted signal is removed from the target signal per individual time\nseries once the free parameters of the selected model are fitted. Linear\nor Logistic will remove the fitted linear or logistic trend, Centering\nwill only remove the mean of the target signal and Epidemic will remove\nthe signal specified by a Susceptible-Infected-Exposed-Recovered-Dead\n(SEIRD) epidemic model. Predictions are made by adding the previously\nremoved signal once the pipeline is fitted on the residuals. Notes:\n- MOJO support is currently disabled when this setting is enabled. - The Fast centering and linear detrending options use least squares\n fitting. - The Robust centering and linear detrending options use random sample\n consensus (RANSAC) to achieve higher tolerance w.r.t.", + "prompt_type": "plain" + }, + { + "output": "ts_target_trafo_epidemic_params_dict---------------------------------------- .. container:: dropdown **Custom Bounds for SEIRD Epidemic Model Parameters** Specify the custom bounds for controlling `Susceptible-Infected-Exposed-Recovered-Dead `__ (SEIRD) epidemic model parameters for detrending of the target for each time series group. The target column must correspond to *I(t)*, which represents infection cases as a function of time. For each training split and time series group, the SEIRD model is fit to the target signal by optimizing a set of free parameters for each time series group. The model's value is then subtracted from the training response, and the residuals are passed to the feature engineering and modeling pipeline. For predictions, the SEIRD model's value is added to the residual predictions from the pipeline for each time series group. The following is a list of free parameters: - **N**: Total population, *N = S+E+I+R+D* - **beta**: Rate of exposure (*S* -> *E*) - **gamma**: Rate of recovering (*I* -> *R*) - **delta**: Incubation period - **alpha**: Fatality rate - **rho**: Rate at which individuals expire - **lockdown**: Day of lockdown (-1 => no lockdown) - **beta_decay**: Beta decay due to lockdown - **beta_decay_rate**: Speed of beta decay Provide upper or lower bounds for each parameter you want to control.", + "prompt_type": "plain" + }, + { + "output": "ts_target_trafo_epidemic_target``\n\nWhich SEIRD Model Component the Target Column Corresponds To\n\nSpecify a SEIRD model component for the target column to correspond to.\nSelect from the following:\n\n- I (Default): Infected\n- R: Recovered\n- D: Deceased", + "prompt_type": "plain" + }, + { + "output": "ts_lag_target_trafo----------------------- .. container:: dropdown **Time Series Lag-Based Target Transformation** Specify whether to use either the difference between or ratio of the current target and a lagged target. Select from **None** (default), **Difference**, and **Ratio**. **Notes**: - MOJO support is currently disabled when this setting is enabled. - The corresponding lag size is specified with thets_target_trafo_lag_size``\nexpert setting.", + "prompt_type": "plain" + }, + { + "output": "ts_target_trafo_lag_size---------------------------- .. container:: dropdown **Lag Size Used for Time Series Target Transformation** Specify the lag size used for time series target transformation. Specify this setting when using thets_lag_target_trafo`` setting.\n\n This value defaults to -1.\n\n Note: The lag size should not be smaller than the sum of forecast\n horizon and gap.", + "prompt_type": "plain" + }, + { + "output": "UI Language\nThe Driverless AI UI is available in English (default), Japanese,\nChinese (Simplified), and Korean. This section describes how you can use\nthe app_language config setting/environment variable to change the\nlanguage of the UI before starting Driverless AI. When using app_language, the following options can be specified:\n- en: English (default)\n- ja: Japanese\n- cn: Chinese (Simplified)\n- ko: Korean\nExamples\nThe following examples show how to change the app language from English\nto Japanese. Docker Image Installs\nTo change the application language in Docker images, specify the\nAPP_LANGUAGE environment variable. Note that this variable must be\nprepended with DRIVERLESS_AI_. Replace nvidia-docker with docker in the\nexample below if necessary. nvidia-docker run \\\n --pid=host \\\n --init \\\n --rm \\\n --shm-size=256m \\\n -p 12345:12345 \\\n -u `id -u`:`id -g` \\\n -e DRIVERLESS_AI_ENABLED_FILE_SYSTEMS=\"file,s3,hdfs\" \\\n -e DRIVERLESS_AI_APP_LANGUAGE=\"ja\" \\\n -v `pwd`/data:/data \\\n -v `pwd`/log:/log \\\n -v `pwd`/license:/license \\\n -v `pwd`/tmp:/tmp \\\n h2oai/dai-ubi8-x86_64:|tag|\nDocker Image with the config.toml\nThis example shows how to configure Minio options in the config.toml\nfile, and then specify that file when starting Driverless AI in Docker.", + "prompt_type": "plain" + }, + { + "output": "Configure the Driverless AI config.toml file. Set the following\n configuration option. - app_language=\"ja\"\n2. Mount the config.toml file into the Docker container. Replace\n nvidia-docker with docker if necessary. nvidia-docker run \\\n --pid=host \\\n --init \\\n --rm \\\n --shm-size=256m \\\n --add-host name.node:172.16.2.186 \\\n -e DRIVERLESS_AI_CONFIG_FILE=/path/in/docker/config.toml \\\n -p 12345:12345 \\\n -v /local/path/to/config.toml:/path/in/docker/config.toml \\\n -v /etc/passwd:/etc/passwd:ro \\\n -v /etc/group:/etc/group:ro \\\n -v /tmp/dtmp/:/tmp \\\n -v /tmp/dlog/:/log \\\n -v /tmp/dlicense/:/license \\\n -v /tmp/ddata/:/data \\\n -u $(id -u):$(id -g) \\\n h2oai/dai-ubi8-x86_64:|tag|\nNative Installs\nNative installs include DEBs, RPMs, and TAR SH installs. The example\nbelow shows how to use the app_language configuration option in the\nconfig.toml file to change the language to Japanese. 1. Export the Driverless AI config.toml file or add it to ~/.bashrc.", + "prompt_type": "plain" + }, + { + "output": "R Client Tutorial\nThis tutorial describes how to use the Driverless AI R client package to\nuse and control the Driverless AI platform. It covers the main\npredictive data-science workflow, including:\n1. Data load\n2. Automated feature engineering and model tuning\n3. Model inspection\n4. Predicting on new data\n5. Managing the datasets and models\nNote: These steps assume that you have entered your license key in the\nDriverless AI UI. Loading the Data\nBefore we can start working with the Driverless.ai platform (DAI), we\nhave to import the package and initialize the connection:\n library(dai)\n dai.connect(uri = 'http://localhost:12345', username = 'h2oai', password = 'h2oai')\n creditcard <- dai.create_dataset('/data/smalldata/kaggle/CreditCard/creditcard_train_cat.csv')\n #> \n | \n | | 0%\n | \n |================ | 24%\n | \n |=================================================================| 100%\nThe function dai.create_dataset() loads the data located at the machine\nthat hosts DAI.", + "prompt_type": "plain" + }, + { + "output": "dai.upload_dataset()`` instead.\n\nIf you already have the data loaded into R data.frame, you can convert\nit into a DAIFrame. For example:\n\n iris.dai <- as.DAIFrame(iris)\n #> \n | \n | | 0%\n | \n |=================================================================| 100%\n\n print(iris.dai)\n #> DAI frame '7c38cb84-5baa-11e9-a50b-b938de969cdb': 150 obs. of 5 variables\n #> File path: ./tmp/7c38cb84-5baa-11e9-a50b-b938de969cdb/iris9e1f15d2df00.csv.1554912339.9424415.bin\n\nYou can switch off the progress bar whenever it is displayed by setting", + "prompt_type": "plain" + }, + { + "output": "head, andformat. .. code:: r dim(creditcard) #> [1] 23999 25 head(creditcard, 10) #> ID LIMIT_BAL SEX EDUCATION MARRIAGE AGE PAY_1 PAY_2 PAY_3 PAY_4 #> 1 1 20000 female university married 24 2 2 -1 -1 #> 2 2 120000 female university single 26 -1 2 0 0 #> 3 3 90000 female university single 34 0 0 0 0 #> 4 4 50000 female university married 37 0 0 0 0 #> 5 5 50000 male university married 57 -1 0 -1 0 #> 6 6 50000 male graduate single 37 0 0 0 0 #> 7 7 500000 male graduate single 29 0 0 0 0 #> 8 8 100000 female university single 23 0 -1 -1 0 #> 9 9 140000 female highschool married 28 0 0 2 0 #> 10 10 20000 male highschool single 35 -2 -2 -2 -2 #> PAY_5 PAY_6 BILL_AMT1 BILL_AMT2 BILL_AMT3 BILL_AMT4 BILL_AMT5 BILL_AMT6 #> 1 -2 -2 3913 3102 689 0 0 0 #> 2 0 2 2682 1725 2682 3272 3455 3261 #> 3 0 0 29239 14027 13559 14331 14948 15549 #> 4 0 0 46990 48233 49291 28314 28959 29547 #> 5 0 0 8617 5670 35835 20940 19146 19131 #> 6 0 0 64400 57069 57608 19394 19619 20024 #> 7 0 0 367965 412023 445007 542653 483003 473944 #> 8 0 -1 11876 380 601 221 -159 567 #> 9 0 0 11285 14096 12108 12211 11793 3719 #> 10 -1 -1 0 0 0 0 13007 13912 #> PAY_AMT1 PAY_AMT2 PAY_AMT3 PAY_AMT4 PAY_AMT5 PAY_AMT6 #> 1 0 689 0 0 0 0 #> 2 0 1000 1000 1000 0 2000 #> 3 1518 1500 1000 1000 1000 5000 #> 4 2000 2019 1200 1100 1069 1000 #> 5 2000 36681 10000 9000 689 679 #> 6 2500 1815 657 1000 1000 800 #> 7 55000 40000 38000 20239 13750 13770 #> 8 380 601 0 581 1687 1542 #> 9 3329 0 432 1000 1000 1000 #> 10 0 0 0 13007 1122 0 #> DEFAULT_PAYMENT_NEXT_MONTH #> 1 TRUE #> 2 TRUE #> 3 FALSE #> 4 FALSE #> 5 FALSE #> 6 FALSE #> 7 FALSE #> 8 FALSE #> 9 FALSE #> 10 FALSE You cannot, however, useDAIFrameto access all its data, nor can you use it to modify the data.", + "prompt_type": "plain" + }, + { + "output": "The head function gives access only to example data: .. code:: r creditcard$example_data[1:10, ] #> ID LIMIT_BAL SEX EDUCATION MARRIAGE AGE PAY_1 PAY_2 PAY_3 PAY_4 #> 1 1 20000 female university married 24 2 2 -1 -1 #> 2 2 120000 female university single 26 -1 2 0 0 #> 3 3 90000 female university single 34 0 0 0 0 #> 4 4 50000 female university married 37 0 0 0 0 #> 5 5 50000 male university married 57 -1 0 -1 0 #> 6 6 50000 male graduate single 37 0 0 0 0 #> 7 7 500000 male graduate single 29 0 0 0 0 #> 8 8 100000 female university single 23 0 -1 -1 0 #> 9 9 140000 female highschool married 28 0 0 2 0 #> 10 10 20000 male highschool single 35 -2 -2 -2 -2 #> PAY_5 PAY_6 BILL_AMT1 BILL_AMT2 BILL_AMT3 BILL_AMT4 BILL_AMT5 BILL_AMT6 #> 1 -2 -2 3913 3102 689 0 0 0 #> 2 0 2 2682 1725 2682 3272 3455 3261 #> 3 0 0 29239 14027 13559 14331 14948 15549 #> 4 0 0 46990 48233 49291 28314 28959 29547 #> 5 0 0 8617 5670 35835 20940 19146 19131 #> 6 0 0 64400 57069 57608 19394 19619 20024 #> 7 0 0 367965 412023 445007 542653 483003 473944 #> 8 0 -1 11876 380 601 221 -159 567 #> 9 0 0 11285 14096 12108 12211 11793 3719 #> 10 -1 -1 0 0 0 0 13007 13912 #> PAY_AMT1 PAY_AMT2 PAY_AMT3 PAY_AMT4 PAY_AMT5 PAY_AMT6 #> 1 0 689 0 0 0 0 #> 2 0 1000 1000 1000 0 2000 #> 3 1518 1500 1000 1000 1000 5000 #> 4 2000 2019 1200 1100 1069 1000 #> 5 2000 36681 10000 9000 689 679 #> 6 2500 1815 657 1000 1000 800 #> 7 55000 40000 38000 20239 13750 13770 #> 8 380 601 0 581 1687 1542 #> 9 3329 0 432 1000 1000 1000 #> 10 0 0 0 13007 1122 0 #> DEFAULT_PAYMENT_NEXT_MONTH #> 1 TRUE #> 2 TRUE #> 3 FALSE #> 4 FALSE #> 5 FALSE #> 6 FALSE #> 7 FALSE #> 8 FALSE #> 9 FALSE #> 10 FALSE A dataset can be split into e.g.", + "prompt_type": "plain" + }, + { + "output": ".. code:: r creditcard.splits$train #> DAI frame '7cf3024c-5baa-11e9-a50b-b938de969cdb': 19199 obs. of 25 variables #> File path: ./tmp/7cf3024c-5baa-11e9-a50b-b938de969cdb/train.1554912341.0864356.bin creditcard.splits$test #> DAI frame '7cf613a6-5baa-11e9-a50b-b938de969cdb': 4800 obs. of 25 variables #> File path: ./tmp/7cf613a6-5baa-11e9-a50b-b938de969cdb/test.1554912341.0966916.bin By default it yields a random sample, but you can do stratified or time-based splits as well. See the function\u2019s documentation for more details. Automated Feature Engineering and Model Tuning ---------------------------------------------- One of the main strengths of Driverless AI is the fully automated feature engineering along with hyperparameter tuning, model selection and ensembling. The functiondai.train()executes the experiment that results in a DAIModel instance that represents the model. .. code:: r model <- dai.train(training_frame = creditcard.splits$train, testing_frame = creditcard.splits$test, target_col = 'DEFAULT_PAYMENT_NEXT_MONTH', is_classification = T, is_timeseries = F, accuracy = 1, time = 1, interpretability = 10, seed = 25) #> | | | 0% | |========================== | 40% | |=============================================== | 73% | |=========================================================== | 91% | |=================================================================| 100% If you do not specify the accuracy, time, or interpretability, they will be suggested by the DAI platform.", + "prompt_type": "plain" + }, + { + "output": "summary, andpredictwork with DAIModel: .. code:: r print(model) #> Status: Complete #> Experiment: 7e2b70ae-5baa-11e9-a50b-b938de969cdb, 2019-04-10 18:06, 1.7.0+local_0c7d019-dirty #> Settings: 1/1/10, seed=25, GPUs enabled #> Train data: train (19199, 25) #> Validation data: N/A #> Test data: test (4800, 24) #> Target column: DEFAULT_PAYMENT_NEXT_MONTH (binary, 22.366% target class) #> System specs: Linux, 126 GB, 40 CPU cores, 2/2 GPUs #> Max memory usage: 0.406 GB, 0.167 GB GPU #> Recipe: AutoDL (2 iterations, 2 individuals) #> Validation scheme: stratified, 1 internal holdout #> Feature engineering: 33 features scored (18 selected) #> Timing: #> Data preparation: 4.94 secs #> Model and feature tuning: 10.13 secs (3 models trained) #> Feature evolution: 5.54 secs (1 of 3 model trained) #> Final pipeline training: 7.85 secs (1 model trained) #> Python / MOJO scorer building: 42.05 secs / 0.00 secs #> Validation score: AUC = 0.77802 +/- 0.0077539 (baseline) #> Validation score: AUC = 0.77802 +/- 0.0077539 (final pipeline) #> Test score: AUC = 0.7861 +/- 0.0064711 (final pipeline) summary(model)$score #> [1] 0.7780229 Predicting on New Data ---------------------- New data can be scored in two different ways: - Callpredict()directly on the model in R session.", + "prompt_type": "plain" + }, + { + "output": "Predicting in R ~~~~~~~~~~~~~~~ Genericpredict()either directly returns an R data.frame with the results (by default) or it returns a URL pointing to a CSV file with the results (return_df=FALSE). The latter option may be useful when you predict on a large dataset. .. code:: r predictions <- predict(model, newdata = creditcard.splits$test) #> | | | 0% | |=================================================================| 100% #> Loading required package: bitops head(predictions) #> DEFAULT_PAYMENT_NEXT_MONTH.0 DEFAULT_PAYMENT_NEXT_MONTH.1 #> 1 0.8879988 0.11200116 #> 2 0.9289870 0.07101299 #> 3 0.9550328 0.04496716 #> 4 0.3513577 0.64864230 #> 5 0.9183724 0.08162758 #> 6 0.9154425 0.08455751 predict(model, newdata = creditcard.splits$test, return_df = FALSE) #> | | | 0% | |=================================================================| 100% #> [1] \"h2oai_experiment_7e2b70ae-5baa-11e9-a50b-b938de969cdb/7e2b70ae-5baa-11e9-a50b-b938de969cdb_preds_f854b49f.csv\" Downloading Python or MOJO Scoring Pipelines ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ For productizing your model in a Python or Java, you can download full Python or MOJO pipelines, respectively.", + "prompt_type": "plain" + }, + { + "output": ".. code:: r dai.download_mojo(model, path = tempdir(), force = TRUE) #> | | | 0% | |=================================================================| 100% #> Downloading the pipeline: #> [1] \"/tmp/RtmppsLTZ9/mojo-7e2b70ae-5baa-11e9-a50b-b938de969cdb.zip\" dai.download_python_pipeline(model, path = tempdir(), force = TRUE) #> | | | 0% | |=================================================================| 100% #> Downloading the pipeline: #> [1] \"/tmp/RtmppsLTZ9/python-pipeline-7e2b70ae-5baa-11e9-a50b-b938de969cdb.zip\" Managing the Datasets and Models -------------------------------- After some time, you may have multiple datasets and models on your DAI server.", + "prompt_type": "plain" + }, + { + "output": "If you already have the dataset loaded into DAI, you can get the DAIFrame object by eitherdai.get_frame(if you know the frame\u2019s key) ordai.find_dataset(if you know the original path or at least a part of it): .. code:: r dai.get_frame(creditcard$key) #> DAI frame '7abe28b2-5baa-11e9-a50b-b938de969cdb': 23999 obs. of 25 variables #> File path: tests/smalldata/kaggle/CreditCard/creditcard_train_cat.csv dai.find_dataset('creditcard') #> DAI frame '7abe28b2-5baa-11e9-a50b-b938de969cdb': 23999 obs. of 25 variables #> File path: tests/smalldata/kaggle/CreditCard/creditcard_train_cat.csv The latter directly returns you the frame if there\u2019s only one match. Otherwise it let you select which frame to return from all the matching candidates. Furthermore, you can get a list of datasets or models: .. code:: r datasets <- dai.list_datasets() head(datasets) #> key name #> 1 7cf613a6-5baa-11e9-a50b-b938de969cdb test #> 2 7cf3024c-5baa-11e9-a50b-b938de969cdb train #> 3 7c38cb84-5baa-11e9-a50b-b938de969cdb iris9e1f15d2df00.csv #> 4 7abe28b2-5baa-11e9-a50b-b938de969cdb creditcard_train_cat.csv #> file_path #> 1 ./tmp/7cf613a6-5baa-11e9-a50b-b938de969cdb/test.1554912341.0966916.bin #> 2 ./tmp/7cf3024c-5baa-11e9-a50b-b938de969cdb/train.1554912341.0864356.bin #> 3 ./tmp/7c38cb84-5baa-11e9-a50b-b938de969cdb/iris9e1f15d2df00.csv.1554912339.9424415.bin #> 4 tests/smalldata/kaggle/CreditCard/creditcard_train_cat.csv #> file_size data_source row_count column_count import_status import_error #> 1 567584 upload 4800 25 0 #> 2 2265952 upload 19199 25 0 #> 3 7064 upload 150 5 0 #> 4 2832040 file 23999 25 0 #> aggregation_status aggregation_error aggregated_frame mapping_frame #> 1 -1 #> 2 -1 #> 3 -1 #> 4 -1 #> uploaded #> 1 TRUE #> 2 TRUE #> 3 TRUE #> 4 FALSE models <- dai.list_models() head(models) #> key description #> 1 7e2b70ae-5baa-11e9-a50b-b938de969cdb mupulori #> dataset_name parameters.dataset_key #> 1 train.1554912341.0864356.bin 7cf3024c-5baa-11e9-a50b-b938de969cdb #> parameters.resumed_model_key parameters.target_col #> 1 DEFAULT_PAYMENT_NEXT_MONTH #> parameters.weight_col parameters.fold_col parameters.orig_time_col #> 1 #> parameters.time_col parameters.is_classification parameters.cols_to_drop #> 1 [OFF] TRUE NULL #> parameters.validset_key parameters.testset_key #> 1 7cf613a6-5baa-11e9-a50b-b938de969cdb #> parameters.enable_gpus parameters.seed parameters.accuracy #> 1 TRUE 25 1 #> parameters.time parameters.interpretability parameters.scorer #> 1 1 10 AUC #> parameters.time_groups_columns parameters.time_period_in_seconds #> 1 NULL NA #> parameters.num_prediction_periods parameters.num_gap_periods #> 1 NA NA #> parameters.is_timeseries parameters.config_overrides #> 1 FALSE NA #> log_file_path #> 1 h2oai_experiment_7e2b70ae-5baa-11e9-a50b-b938de969cdb/h2oai_experiment_logs_7e2b70ae-5baa-11e9-a50b-b938de969cdb.zip #> pickle_path #> 1 h2oai_experiment_7e2b70ae-5baa-11e9-a50b-b938de969cdb/best_individual.pickle #> summary_path #> 1 h2oai_experiment_7e2b70ae-5baa-11e9-a50b-b938de969cdb/h2oai_experiment_summary_7e2b70ae-5baa-11e9-a50b-b938de969cdb.zip #> train_predictions_path valid_predictions_path #> 1 #> test_predictions_path #> 1 h2oai_experiment_7e2b70ae-5baa-11e9-a50b-b938de969cdb/test_preds.csv #> progress status training_duration scorer score test_score deprecated #> 1 1 0 71.43582 AUC 0.7780229 0.7861 FALSE #> model_file_size diagnostic_keys #> 1 695996094 NULL If you know the key of the dataset or model, you can obtain the instance of DAIFrame or DAIModel bydai.get_modelanddai.get_frame: .. code:: r dai.get_model(models$key[1]) #> Status: Complete #> Experiment: 7e2b70ae-5baa-11e9-a50b-b938de969cdb, 2019-04-10 18:06, 1.7.0+local_0c7d019-dirty #> Settings: 1/1/10, seed=25, GPUs enabled #> Train data: train (19199, 25) #> Validation data: N/A #> Test data: test (4800, 24) #> Target column: DEFAULT_PAYMENT_NEXT_MONTH (binary, 22.366% target class) #> System specs: Linux, 126 GB, 40 CPU cores, 2/2 GPUs #> Max memory usage: 0.406 GB, 0.167 GB GPU #> Recipe: AutoDL (2 iterations, 2 individuals) #> Validation scheme: stratified, 1 internal holdout #> Feature engineering: 33 features scored (18 selected) #> Timing: #> Data preparation: 4.94 secs #> Model and feature tuning: 10.13 secs (3 models trained) #> Feature evolution: 5.54 secs (1 of 3 model trained) #> Final pipeline training: 7.85 secs (1 model trained) #> Python / MOJO scorer building: 42.05 secs / 0.00 secs #> Validation score: AUC = 0.77802 +/- 0.0077539 (baseline) #> Validation score: AUC = 0.77802 +/- 0.0077539 (final pipeline) #> Test score: AUC = 0.7861 +/- 0.0064711 (final pipeline) dai.get_frame(datasets$key[1]) #> DAI frame '7cf613a6-5baa-11e9-a50b-b938de969cdb': 4800 obs.", + "prompt_type": "plain" + }, + { + "output": "creditcard.splits$trainandcreditcard.splits$testobjects will not be removed from R session because they are actually function calls (recall that$``\nis a function).", + "prompt_type": "plain" + }, + { + "output": "Model Scores\nYou can view detailed information about model scores after an experiment\nis complete by clicking on the Scores option. []\nThe Model Scores page that opens includes the following tables:\n- Model and feature tuning leaderboard: This leaderboard shows scoring\n information based on the scorer that was selected in the experiment. This information is also available in the tuning_leaderboard.json\n file of the experiment_summary. You can download that file directly\n from the bottom of this table. - Final pipeline scores across cross-validation folds and models: This\n table shows the final pipeline scores across cross-validation folds\n and models. Note that if Constant Model was enabled (default), then\n that model is added in this table as a baseline (reference) only and\n will be dropped in most cases. This information is also included in\n the ensemble_base_learner_fold_scores.json file of the\n experiment_summary. You can download that file directly from a link\n at the bottom of this table.", + "prompt_type": "plain" + }, + { + "output": "Scoring Pipelines\n\npython-mojo-pipelines scoring_pipeline_visualize\nscoring-pipeline-which-to-use scoring-standalone-python\nscoring-mli-standalone-python scoring-mojo-pipelines", + "prompt_type": "plain" + }, + { + "output": "Driverless AI user settings\n\nYou can configure several user-specific settings from the UI by clicking\nUser -> User Settings. A window is displayed that lets you set user\nsettings for various connectors. You can also use the search box to\nlocate specific user settings. Click the Save button to confirm your\nchanges.\n\nAWS\n\nSpecify the following AWS-related user settings:\n\n- AWS Access Key ID\n- AWS Secret Access Key\n- AWS S3 Bucket name for artifact export\n\nAzure\n\nSpecify the following Azure-related user settings:\n\n- Azure Blob Store account name\n- Azure Blob Store account key\n- Azure Blob Store Connection String\n\nMinIO\n\nSpecify the following MinIO-related user settings:\n\n- MinIO Access Key ID\n- MinIO Secret Access Key", + "prompt_type": "plain" + }, + { + "output": "Driverless AI MOJO Scoring Pipeline - Java Runtime (With Shapley contribution)\nFor completed experiments, Driverless AI automatically converts models\nto MOJOs (Model Objects, Optimized). The MOJO Scoring Pipeline is a\nscoring engine that can be deployed in any Java environment for scoring\nin real time. (For information on the C++ scoring runtime with Python\nand R wrappers, see\nH2O MOJO C++ scoring pipeline .) For info on the\navailable deployment options, see H2O MOJO Deployment . MOJOs are tied to experiments. Experiments and MOJOs are not\nautomatically upgraded when Driverless AI is upgraded. Notes:\n- This scoring pipeline is not currently available for TensorFlow,\n BERT, RuleFit or Image models. TensorFlow/Bert are\n supported by C++ Runtime. - To disable the automatic creation of this scoring pipeline, set the\n Make MOJO Scoring Pipeline expert setting to Off while building an\n experiment. - You can have Driverless AI attempt to reduce the size of the MOJO\n scoring pipeline when the experiment is being built by enabling the\n Reduce MOJO Size expert setting also\n see .", + "prompt_type": "plain" + }, + { + "output": "- Shapley contributions for transformed features and\n original features are currently available for XGBoost (GBM, GLM, RF,\n DART), LightGBM, Zero-Inflated, Imbalanced and DecisionTree models\n (and their ensemble). For ensemble with ExtraTrees meta learner\n (ensemble_meta_learner='extra_trees') models, we suggest to use the\n MLI Python scoring package. Download\nBecause the Java MOJO runtime is backward compatible, we recommend using\nthe latest available version. You can download the latest Java MOJO\nruntime from https://mvnrepository.com/artifact/ai.h2o/mojo2-runtime. A Quick run\nTo get a quick output from the downloaded MOJO scoring pipeline in the\nconsole on the example test set:\n- Make sure Java7 or later is installed. - copy Driverless AI license file (say license.file) to the downloaded\n mojo-pipeline folder\n- cd into the mojo-pipeline folder\n- Score the rows of the example.csv file using the pipeline.mojo file(\n with the mojo2-runtime) created from the experiment to get the\n predictions.", + "prompt_type": "plain" + }, + { + "output": "Bigger test files/MOJOs may require\nmore memory (Xmx) to score. Notes:\n - Presently, Shapley contributions for transformed\n features and original features are available for XGBoost (GBM,\n GLM, RF, DART), LightGBM, Zero-Inflated, Imbalanced and\n DecisionTree models (and their ensemble). For ensemble with\n ExtraTrees meta learner (ensemble_meta_learner='extra_trees')\n models we suggest to use the MLI Python scoring package. - In MOJOs, Shapley values for original features are approximated\n from the accompanying Shapley values for transformed features with\n the Naive Shapley (even split ) method. - The Shapley fast approximation uses only\n one model (from the first fold) with no more than the first 50\n trees. For details see fast_approx_num_trees and\n fast_approx_do_one_fold_one_model\n config.toml settings . Prerequisites\nThe following are required in order to run the MOJO scoring pipeline.", + "prompt_type": "plain" + }, + { + "output": "NOTE: We recommend using Java 11+\n due to a bug in Java. (See\n https://bugs.openjdk.java.net/browse/JDK-8186464.) - Valid Driverless AI license. You can download the license.sig file\n from the machine hosting Driverless AI (usually in the license\n folder). Copy the license file into the downloaded mojo-pipeline\n folder. - mojo2-runtime.jar file. This is available from the top navigation\n menu in the Driverless AI UI and in the downloaded mojo-pipeline.zip\n file for an experiment. License Specification\nDriverless AI requires a license to be specified in order to run the\nMOJO Scoring Pipeline. The license can be specified in one of the\nfollowing ways:\n- Via an environment variable:\n - DRIVERLESS_AI_LICENSE_FILE: Path to the Driverless AI license\n file, or\n - DRIVERLESS_AI_LICENSE_KEY: The Driverless AI license key\n (Base64 encoded string)\n- Via a system property of JVM (-D option):\n - ai.h2o.mojos.runtime.license.file: Path to the Driverless AI\n license file, or\n - ai.h2o.mojos.runtime.license.key: The Driverless AI license\n key (Base64 encoded string)\n- Via an application classpath:\n - The license is loaded from a resource called /license.sig.", + "prompt_type": "plain" + }, + { + "output": "For example:\n # Specify the license via a temporary environment variable\n export DRIVERLESS_AI_LICENSE_FILE=\"path/to/license.sig\"\nMOJO Scoring Pipeline Files\nThe mojo-pipeline folder includes the following files:\n- run_example.sh: An bash script to score a sample test set. - pipeline.mojo: Standalone scoring pipeline in MOJO format. - mojo2-runtime.jar: MOJO Java runtime. - example.csv: Sample test set (synthetic, of the correct format). - DOT files: Text files that can be rendered as graphs that provide a\n visual representation of the MOJO scoring pipeline (can be edited to\n change the appearance and structure of a rendered graph). - PNG files: Image files that provide a visual representation of the\n MOJO scoring pipeline. Quickstart\nBefore running the quickstart examples, be sure that the MOJO scoring\npipeline is already downloaded and unzipped:\n1. On the completed Experiment page, click on the Download MOJO Scoring\n Pipeline button. 2. In the pop-up menu that appears, click on the Download MOJO Scoring\n Pipeline button once again to download the scorer.zip file for this\n experiment onto your local machine.", + "prompt_type": "plain" + }, + { + "output": "Run the following to score all rows in the sample test set with the\n file paths to the test set (example.csv), MOJO pipeline\n (pipeline.mojo) and license (license.sig) stored in environment\n variables TEST_SET_FILE, MOJO_PIPELINE_FILE,\n DRIVERLESS_AI_LICENSE_KEY:\n4. Run the following to score a specific test set (example.csv) with\n MOJO pipeline (pipeline.mojo) and the license file (license.sig):\n5. To run the Java application for data transformation directly:\nMOJO Scoring Command-Line Options\nExecuting the Java Runtime\nThe following are two general examples of how the Java runtime can be\nexecuted from the command-line. - With additional libraries:\n- Without additional libraries:\nSo, for example, the sys.ai.h2o.mojos.parser.csv.separator option can be\npassed with the following:\n java -Dsys.ai.h2o.mojos.parser.csv.separator='|' -Dai.h2o.mojos.runtime.license.file=../license.sig -jar mojo2-runtime.jar pipeline.mojo input.csv output.csv\nSimilarly, the sys.ai.h2o.mojos.exposedInputs option can be passed with:\n java -Xmx5g -Dsys.ai.h2o.mojos.exposedInputs=ALL -Dai.h2o.mojos.runtime.license.file= -cp mojo2-runtime.jar ai.h2o.mojos.ExecuteMojo pipeline.mojo example.csv\nNote: Data can be streamed from stdin to stdout by replacing both the\ninput and output CSV arguments with `-`.", + "prompt_type": "plain" + }, + { + "output": "This value\n defaults to True. - sys.ai.h2o.mojos.parser.csv.stripCrFromLastColumn (boolean)\n -Workaround for issues relating to the OpenCSV parser. This value\n defaults to True. - sys.ai.h2o.mojos.parser.csv.quotedHeaders (boolean) - Specify\n whether to quote header names in the output CSV file. This value\n defaults to False. - sys.ai.h2o.mojos.parser.csv.separator (char) - Specify the separator\n used between CSV fields. The special value `TAB` can be used for\n tab-separated values. This value defaults to `,`. - sys.ai.h2o.mojos.parser.csv.escapeChar (char) - Specify the escape\n character for parsing CSV fields. If this value is not specified,\n then no escaping is attempted. This value defaults to an empty\n string. - sys.ai.h2o.mojos.parser.csv.batch (int) - Specify the number of\n input records brought into memory for batch processing (determines\n consumed memory). This value defaults to 1000. - sys.ai.h2o.mojos.pipelineFormats (string) - When multiple formats\n are recognized, this option specifies the order in which they are\n tried.", + "prompt_type": "plain" + }, + { + "output": "- sys.ai.h2o.mojos.parser.csv.date.formats (string) - Specify a format\n for dates. This value defaults to an empty string. - sys.ai.h2o.mojos.exposedInputs (string) - Specify a comma separated\n list of input cols that are needed on output. The special value\n `ALL` takes all inputs. This defaults to a null value. - sys.ai.h2o.mojos.useWeakHash (boolean) - Specify whether to use\n WeakHashMap. This is set to False by default. Enabling this setting\n may improve MOJO loading times. JVM Options for Access Control\n- ai.h2o.mojos.runtime.license.key - Specify a license key. - ai.h2o.mojos.runtime.license.file - Specify the location of a\n license key. - ai.h2o.mojos.runtime.license.filename - Override the default license\n file name. - ai.h2o.mojos.runtime.signature.filename - Override the default\n signature file name. - ai.h2o.mojos.runtime.watermark.filename - Override the default\n watermark file name. Execute the MOJO from Java\n1. Open a new terminal window, create an experiment folder, and change\n directories to that new folder:\n2.", + "prompt_type": "plain" + }, + { + "output": "Include the following contents. 3. Compile the source code with the files of the MOJO runtime\n (mojo2-runtime.jar) and MOJO pipeline (pipeline.mojo) copied into\n the experiment:\n4. Run the MOJO example with the license (license.sig) copied into the\n experiment:\n5. The following output is displayed:\nUsing the MOJO Scoring Pipeline with Spark/Sparkling Water\nNote: The Driverless AI 1.5 release will be the last release with\nTOML-based MOJO2. Releases after 1.5 will include protobuf-based MOJO2. MOJO scoring pipeline artifacts can be used in Spark to deploy\npredictions in parallel using the Sparkling Water API. This section\nshows how to load and run predictions on the MOJO scoring pipeline in\nSpark using Scala and the Python API. In the event that you upgrade H2O Driverless AI, we have a good news! Sparkling Water is backwards compatible with MOJO versions produced by\nolder Driverless AI versions. Requirements\n- You must have a Spark cluster with the Sparkling Water JAR file\n passed to Spark.", + "prompt_type": "plain" + }, + { + "output": "The H2OContext does not have to be created if you only want to run\npredictions on MOJOs using Spark. This is because the scoring is\nindependent of the H2O run-time. Preparing Your Environment\nIn order use the MOJO scoring pipeline, Driverless AI license has to be\npassed to Spark. This can be achieved via --jars argument of the Spark\nlauncher scripts. Note: In Local Spark mode, use --driver-class-path to specify path to\nthe license file. PySparkling\nFirst, start PySpark with PySparkling Python package and Driverless AI\nlicense. ./bin/pyspark --jars license.sig --py-files pysparkling.zip\nor, you can download official Sparkling Water distribution from H2O\nDownload page. Follow the steps on the Sparkling Water download page. Once you are in the Sparkling Water directory, you can call:\n ./bin/pysparkling --jars license.sig\nAt this point, you should have available a PySpark interactive terminal\nwhere you can try out predictions. If you would like to productionalize\nthe scoring process, you can use the same configuration, except instead\nof using ./bin/pyspark, you would use ./bin/spark-submit to submit your\njob to a cluster.", + "prompt_type": "plain" + }, + { + "output": "# If you want to use old behavior when all output columns were stored inside an array,\n # set it to False. However we strongly encourage users to use True which is defined as a default value. settings = H2OMOJOSettings(namedMojoOutputColumns = True)\n # Load the pipeline. 'settings' is an optional argument. If it's not specified, the default values are used. mojo = H2OMOJOPipelineModel.createFromMojo(\"file:///path/to/the/pipeline.mojo\", settings)\n # Load the data as Spark's Data Frame\n dataFrame = spark.read.csv(\"file:///path/to/the/data.csv\", header=True)\n # Run the predictions. The predictions contain all the original columns plus the predictions\n # added as new columns\n predictions = mojo.transform(dataFrame)\n # You can easily get the predictions for a desired column using the helper function as\n predictions.select(mojo.selectPredictionUDF(\"AGE\")).collect()\nSparkling Water\nFirst, start Spark with Sparkling Water Scala assembly and Driverless AI\nlicense.", + "prompt_type": "plain" + }, + { + "output": "kdb+ Setup\n\nDriverless AI lets you explore kdb+ data sources from within the\nDriverless AI application. This section provides instructions for\nconfiguring Driverless AI to work with kdb+.\n\nNote: Depending on your Docker install version, use either the", + "prompt_type": "plain" + }, + { + "output": "docker run\n--runtime=nvidia(>= Docker 19.03) ornvidia-docker(< Docker 19.03) command when starting the Driverless AI Docker image. Usedocker\nversionto check which version of Docker you are using. Description of Configuration Attributes --------------------------------------- -kdb_user: (Optional) User name -kdb_password: (Optional) User's password -kdb_hostname: IP address or host of the KDB server -kdb_port: Port on which the kdb+ server is listening -kdb_app_jvm_args: (Optional) JVM args for kdb+ distributions (for example,-Dlog4j.configuration). Separate each argument with spaces. -kdb_app_classpath: (Optional) The kdb+ classpath (or other if the jar file is stored elsewhere). -enabled_file_systems: The file systems you want to enable. This must be configured in order for data connectors to function properly. Example 1: Enable kdb+ with No Authentication --------------------------------------------- .. container:: tabs .. group-tab:: Docker Image Installs This example enables the kdb+ connector without authentication.", + "prompt_type": "plain" + }, + { + "output": ".. code:: bash nvidia-docker run \\ --pid=host \\ --init \\ --rm \\ --shm-size=256m \\ --add-host name.node:172.16.2.186 \\ -e DRIVERLESS_AI_ENABLED_FILE_SYSTEMS=\"file,kdb\" \\ -e DRIVERLESS_AI_KDB_HOSTNAME=\"\" \\ -e DRIVERLESS_AI_KDB_PORT=\"\" \\ -p 12345:12345 \\ -v /tmp/dtmp/:/tmp \\ -v /tmp/dlog/:/log \\ -v /tmp/dlicense/:/license \\ -v /tmp/ddata/:/data \\ -u $(id -u):$(id -g) \\ h2oai/dai-ubi8-x86_64:|tag| .. container:: group-tab Docker Image with the config.toml This example shows how to configure kdb+ options in the config.toml file, and then specify that file when starting Driverless AI in Docker. Note that this example enables kdb+ with no authentication. 1. Configure the Driverless AI config.toml file. Set the following configuration options. .. -enabled_file_systems\n= \"file, upload, kdb\"-kdb_hostname =\n\"-kdb_port =\n\"\"2.", + "prompt_type": "plain" + }, + { + "output": ".. .. code:: bash nvidia-docker run \\ --pid=host \\ --init \\ --rm \\ --shm-size=256m \\ --add-host name.node:172.16.2.186 \\ -e DRIVERLESS_AI_CONFIG_FILE=/path/in/docker/config.toml \\ -p 12345:12345 \\ -v /local/path/to/config.toml:/path/in/docker/config.toml \\ -v /etc/passwd:/etc/passwd:ro \\ -v /etc/group:/etc/group:ro \\ -v /tmp/dtmp/:/tmp \\ -v /tmp/dlog/:/log \\ -v /tmp/dlicense/:/license \\ -v /tmp/ddata/:/data \\ -u $(id -u):$(id -g) \\ h2oai/dai-ubi8-x86_64:|tag| .. container:: group-tab Native Installs This example enables the kdb+ connector without authentication. The only required flags are the hostname and the port. 1. Export the Driverless AI config.toml file or add it to ~/.bashrc. For example: .. :: # DEB and RPM export DRIVERLESS_AI_CONFIG_FILE=\"/etc/dai/config.toml\" # TAR SH export DRIVERLESS_AI_CONFIG_FILE=\"/path/to/your/unpacked/dai/directory/config.toml\" 2.", + "prompt_type": "plain" + }, + { + "output": ".. :: # File System Support # upload : standard upload feature # file : local file system/server file system # hdfs : Hadoop file system, remember to configure the HDFS config folder path and keytab below # dtap : Blue Data Tap file system, remember to configure the DTap section below # s3 : Amazon S3, optionally configure secret and access key below # gcs : Google Cloud Storage, remember to configure gcs_path_to_service_account_json below # gbq : Google Big Query, remember to configure gcs_path_to_service_account_json below # minio : Minio Cloud Storage, remember to configure secret and access key below # snow : Snowflake Data Warehouse, remember to configure Snowflake credentials below (account name, username, password) # kdb : KDB+ Time Series Database, remember to configure KDB credentials below (hostname and port, optionally: username, password, classpath, and jvm_args) # azrbs : Azure Blob Storage, remember to configure Azure credentials below (account name, account key) # jdbc: JDBC Connector, remember to configure JDBC below.", + "prompt_type": "plain" + }, + { + "output": "(hive_app_configs) # recipe_url: load custom recipe from URL # recipe_file: load custom recipe from local file system enabled_file_systems = \"file, kdb\" # KDB Connector credentials kdb_hostname = \" kdb_port = \"\" 3. Save the changes when you are done, then stop/restart Driverless AI. Example 2: Enable kdb+ with Authentication ------------------------------------------ .. container:: tabs .. group-tab:: Docker Image Installs This example provides users credentials for accessing a kdb+ server from Driverless AI. .. code:: bash nvidia-docker run \\ --pid=host \\ --init \\ --rm \\ --shm-size=256m \\ -e DRIVERLESS_AI_ENABLED_FILE_SYSTEMS=\"file,kdb\" \\ -e DRIVERLESS_AI_KDB_HOSTNAME=\"\" \\ -e DRIVERLESS_AI_KDB_PORT=\"\" \\ -e DRIVERLESS_AI_KDB_USER=\"\" \\ -e DRIVERLESS_AI_KDB_PASSWORD=\"\" \\ -p 12345:12345 \\ -v /tmp/dtmp/:/tmp \\ -v /tmp/dlog/:/log \\ -v /tmp/dlicense/:/license \\ -v /tmp/ddata/:/data \\ -u $(id -u):$(id -g) \\ h2oai/dai-ubi8-x86_64:|tag| .. container:: group-tab Docker Image with the config.toml This example shows how to configure kdb+ options in the config.toml file, and then specify that file when starting Driverless AI in Docker.", + "prompt_type": "plain" + }, + { + "output": "1. Configure the Driverless AI config.toml file. Set the following configuration options. .. -enabled_file_systems\n= \"file, upload, kdb\"-kdb_user = \"\"-kdb_password =\n\"\"-kdb_hostname = \"-kdb_port =\n\"\"-kdb_app_classpath = \"\"-kdb_app_jvm_args =\n\"\"2. Mount the config.toml file into the Docker container. .. .. code:: bash nvidia-docker run \\ --pid=host \\ --init \\ --rm \\ --shm-size=256m \\ --add-host name.node:172.16.2.186 \\ -e DRIVERLESS_AI_CONFIG_FILE=/path/in/docker/config.toml \\ -p 12345:12345 \\ -v /local/path/to/config.toml:/path/in/docker/config.toml \\ -v /etc/passwd:/etc/passwd:ro \\ -v /etc/group:/etc/group:ro \\ -v /tmp/dtmp/:/tmp \\ -v /tmp/dlog/:/log \\ -v /tmp/dlicense/:/license \\ -v /tmp/ddata/:/data \\ -u $(id -u):$(id -g) \\ h2oai/dai-ubi8-x86_64:|tag| .. container:: group-tab Native Installs This example provides users credentials for accessing a kdb+ server from Driverless AI.", + "prompt_type": "plain" + }, + { + "output": "Export the Driverless AI config.toml file or add it to ~/.bashrc. For example: .. :: # DEB and RPM export DRIVERLESS_AI_CONFIG_FILE=\"/etc/dai/config.toml\" # TAR SH export DRIVERLESS_AI_CONFIG_FILE=\"/path/to/your/unpacked/dai/directory/config.toml\" 2. Specify the following configuration options in the config.toml file. .. :: # File System Support # upload : standard upload feature # file : local file system/server file system # hdfs : Hadoop file system, remember to configure the HDFS config folder path and keytab below # dtap : Blue Data Tap file system, remember to configure the DTap section below # s3 : Amazon S3, optionally configure secret and access key below # gcs : Google Cloud Storage, remember to configure gcs_path_to_service_account_json below # gbq : Google Big Query, remember to configure gcs_path_to_service_account_json below # minio : Minio Cloud Storage, remember to configure secret and access key below # snow : Snowflake Data Warehouse, remember to configure Snowflake credentials below (account name, username, password) # kdb : KDB+ Time Series Database, remember to configure KDB credentials below (hostname and port, optionally: username, password, classpath, and jvm_args) # azrbs : Azure Blob Storage, remember to configure Azure credentials below (account name, account key) # jdbc: JDBC Connector, remember to configure JDBC below.", + "prompt_type": "plain" + }, + { + "output": "(hive_app_configs) # recipe_url: load custom recipe from URL # recipe_file: load custom recipe from local file system enabled_file_systems = \"file, kdb\" # kdb+ Connector credentials kdb_user = \"\" kdb_password = \"\" kdb_hostname = \" kdb_port = \"\" kdb_app_classpath = \"\" kdb_app_jvm_args = \"\" 3. Save the changes when you are done, then stop/restart Driverless AI. Adding Datasets Using kdb+ -------------------------- After the kdb+ connector is enabled, you can add datasets by selecting **kdb+** from the **Add Dataset (or Drag and Drop)** drop-down menu. .. figure:: ../images/add_dataset_dropdown.png :alt: :width: 237px :height: 338px Specify the following information to add your dataset. 1. **Enter filepath to save query**. Enter the local file path for storing your dataset. For example, **/home//myfile.csv**.", + "prompt_type": "plain" + }, + { + "output": "Deploying Driverless AI Models to Production\nBy default, each completed Driverless AI experiment (unless explicitly\ndisabled or not available due to modified expert settings) creates at\nleast one scoring pipeline for scoring in Python,\nC++, Java and R.\nThe H2O MLOps service provides a way to manage, collaborate, deploy and\nmonitor your experiments and models. This can be done in the cloud or as\na standalone service. In addition to the H2O MLOps service, here we list several other\ndeployment options and examples for deploying Driverless AI MOJO (Java\nand C++ with Python/R wrappers) and Python Scoring pipelines for\nproduction purposes. The deployment template documentation can be\naccessed from here. For more customized requirements, contact\nsupport@h2o.ai. - Deployment via H2O AI MLOps \n - MOJO with Java runtime \n - MOJO with C++ Runtime \n - Standalone Python Scoring Pipeline \n - Deployment options from within Driverless AI GUI \nDeployment With H2O MLOps\nH2O MLOps is a platform for model deployment, management, governance,\nmonitoring, and colaboration.", + "prompt_type": "plain" + }, + { + "output": "It can be deployed as a\nstandalone service or as an H2O AI Cloud Application. For details, see\nthe H2O MLOps Documentation. MOJO With Java Runtime Deployment Options\nThe following are several options for deploying Driverless AI MOJO with\nJava Runtime. The links in the diagram lead to code examples and\ntemplates. digraph \"example java\" {\n layout=\"circo\"; node [fontname=\"Verdana\",\n fontsize=\"30\",shape=plaintext]; edge [color=\"black\"]; b\n [label=\"Driverless AI MOJO Java Runtime\",\n href=\"https://docs.h2o.ai/driverless-ai/latest-stable/docs/userguide/scoring-mojo-scoring-pipeline.html\",target=\"_top\",fontcolor=\"black\"];\n af [label=\"As a library\",fontcolor=\"green\"]; aa [label=\"As REST\n Server\",\n href=\"https://h2oai.github.io/dai-deployment-templates/local-rest-scorer/\",target=\"_top\",fontcolor=\"green\"];\n ad [label=\"As AzureML\",fontcolor=\"green\"]; ab [label=\"As AWS\n Lambda\",\n href=\"https://h2oai.github.io/dai-deployment-templates/aws_lambda_scorer/\",target=\"_top\",fontcolor=\"green\"];\n ac [label=\"As Google Cloud Run\",\n href=\"https://h2oai.github.io/dai-deployment-templates/gcp/\",target=\"_top\",fontcolor=\"green\"];\n ae [label=\"As Apache Nifi\",\n href=\"https://github.com/h2oai/dai-deployment-examples/tree/master/mojo-nifi\",target=\"_top\",fontcolor=\"green\"];\n ag [label=\"As Snowflake Function\",\n href=\"https://docs.h2o.ai/driverless-ai/latest-stable/docs/userguide/snowflake-integration.html\",target=\"_top\",fontcolor=\"green\"];\n ah [label=\"As Apache Flink\",\n href=\"https://github.com/h2oai/dai-deployment-examples/tree/master/mojo-flink\",target=\"_top\",fontcolor=\"green\",fontcolor=\"green\"];\n ai [label=\"As Sagemaker\",fontcolor=\"red\"]; aj [label=\"As Hive\n UDF\",\n href=\"https://github.com/h2oai/dai-deployment-templates/tree/master/hive-mojo-scorer\",target=\"_top\",fontcolor=\"red\"];\n ak [label=\"As DB scorer\",\n href=\"https://h2oai.github.io/dai-deployment-templates/sql-jdbc-scorer/\",target=\"_top\",fontcolor=\"red\"];\n al [label=\"As Apache Spark Batch/Stream\",\n href=\"http://docs.h2o.ai/sparkling-water/3.0/latest-stable/doc/deployment/load_mojo_pipeline.html#loading-and-score-the-mojo\",target=\"_top\",fontcolor=\"red\"];\n am [label=\"As Apache Kafka Topic\",\n href=\"https://github.com/h2oai/dai-deployment-examples/blob/master/mojo-flink/daimojo-flink-kafka.md\",target=\"_top\",fontcolor=\"blue\"];\n an [label=\"As Active MQ\",fontcolor=\"blue\"]; ao [label=\"As Task\n Queue \",fontcolor=\"blue\"]; ap [label=\"KNIME\",fontcolor=\"blue\"];\n b -> aa; b -> ab; b -> ac; b -> ad; b -> ae; b -> af; b -> ag; b\n -> ah; b -> ai; b -> aj; b -> ak; b -> al; b -> am; b -> an; b ->\n ao; b -> ap;\n }\nThe Java MOJO scoring pipelines can also be deployed from within the\nDriverless AI GUI.", + "prompt_type": "plain" + }, + { + "output": "MOJO With C++ Runtime Deployment Options\nHere we list some example scenarios and platforms for deploying\nDriverless AI MOJO with C++ Runtime. MOJO C++ runtime can also be run\ndirectly from R/Python terminals. For more information, see\ncpp_scoring_pipeline. digraph \"example c++\" {\n layout=\"circo\"; node [fontname=\"Verdana\",\n fontsize=\"16\",shape=plaintext]; edge [color=\"black\"]; b\n [label=\"Driverless AI MOJO C++ Runtime\",\n href=\"https://docs.h2o.ai/driverless-ai/latest-stable/docs/userguide/scoring-pipeline-cpp.html\",target=\"_top\"];\n ab [label=\"As REST Server\",\n href=\"\",target=\"_top\",fontcolor=\"green\"]; ac [label=\"As AWS\n Lambda\", href=\"\",target=\"_top\",fontcolor=\"green\"]; ad [label=\"As\n AzureML\",fontcolor=\"green\"]; aa [label=\"As a\n library\",fontcolor=\"green\"]; ae [label=\"As Apache Nifi\",\n href=\"\",target=\"_top\",fontcolor=\"green\"]; ag [label=\"As Apache\n Spark Batch\", href=\"\",target=\"_top\",fontcolor=\"red\"]; af\n [label=\"As Sagemaker\",fontcolor=\"red\"];\n b -> aa; b -> ab; b -> ac; b -> ad; b -> ae; b -> af; b -> ag;\n }\nStandalone Python Scoring Pipeline Deployment Options\ndigraph \"example py\" {\n layout=\"circo\"; node [fontname=\"Verdana\",\n fontsize=\"20\",shape=plaintext]; edge [color=\"black\"]; b\n [label=\"Driverless AI Python Scoring Pipeline\",\n href=\"https://docs.h2o.ai/driverless-ai/latest-stable/docs/userguide/scoring-standalone-python.html\",target=\"_top\"];\n aa [label=\"As REST Server\",\n href=\"https://github.com/h2oai/driverlessai-tutorials/tree/master/scoring-pipeline-deployment/python/ubuntu/docker\",target=\"_top\",fontcolor=\"green\"];\n ac [label=\"As AWS Lambda\",fontcolor=\"green\"]; ad [label=\"As\n AzureML\",fontcolor=\"green\"]; ae [label=\"As Apache\n Nifi\",fontcolor=\"green\"]; ah [label=\"As a\n library\",fontcolor=\"green\"]; ab [label=\"As Docker Image\",\n href=\"https://github.com/h2oai/driverlessai-tutorials/tree/master/scoring-pipeline-deployment/python/ubuntu/docker\",\n target=\"_top\",fontcolor=\"red\"] af [label=\"As\n Sagemaker\",fontcolor=\"red\"]; ag [label=\"As Apache Spark Batch\",\n href=\"https://github.com/h2oai/driverlessai-tutorials/tree/master/scoring-pipeline-deployment/python/pyspark\",target=\"_top\",fontcolor=\"red\"];\n b -> aa; b -> ab; b -> ac; b -> ad; b -> ae; b -> af; b -> ag; b\n -> ah;\n }\nAvailable Deployments from within Driverless AI GUI\nThe following deployments are available in Driverless AI GUI.", + "prompt_type": "plain" + }, + { + "output": "- amazon-lambda\n- rest-server\nAll of the existing MOJO scoring pipeline deployments are available in\nthe Deployments Overview page, which is available from the top menu. This page lists all active deployments and the information needed to\naccess the respective endpoints. In addition, it lets you stop any\ndeployments that are no longer needed. []\nAmazon Lambda Deployment\nDriverless AI can deploy the trained MOJO scoring pipeline as an AWS\nLambda Function, i.e., a server-less scorer running in Amazon Cloud and\ncharged by the actual usage. Additional Resources\nRefer to the aws-lambda-scorer folder in the dai-deployment-templates\nrepository to see different deployment templates for AWS Lambda scorer. Driverless AI Prerequisites\n- Driverless AI MOJO Scoring Pipeline: To deploy a MOJO scoring\n pipeline as an AWS Lambda function, the MOJO pipeline archive has to\n be created first by choosing the Build MOJO Scoring Pipeline option\n on the completed experiment page. Refer to the\n mojo_scoring_pipelines section for information on how to build a\n MOJO scoring pipeline.", + "prompt_type": "plain" + }, + { + "output": "The Driverless AI deployment pipeline\n to AWS Lambdas explicitly sets the license key as an environment\n variable. You will not be able to use MOJOs if your Driverless AI\n license is expired. If you have an expired license, you can update\n this manually for each MOJO in AWS, or you can update all MOJOs for\n a deployment region using a script. Refer to\n update_license_in_production for more information. AWS Prerequisites\nUsage Plans\nUsage plans must be enabled in the target AWS region in order for API\nkeys to work when accessing the AWS Lambda via its REST API. Refer to\nhttps://aws.amazon.com/blogs/aws/new-usage-plans-for-amazon-api-gateway/\nfor more information. Access Permissions\nThe following AWS access permissions need to be provided to the role in\norder for Driverless AI Lambda deployment to succeed. - AWSLambdaFullAccess\n- IAMFullAccess\n- AmazonAPIGatewayAdministrator\n[]\nThe policy can be further stripped down to restrict Lambda and S3 rights\nusing the JSON policy definition as follows:\n {\n \"Version\": \"2012-10-17\",\n \"Statement\": [\n {\n \"Sid\": \"VisualEditor0\",\n \"Effect\": \"Allow\",\n \"Action\": [\n \"iam:GetPolicyVersion\",\n \"iam:DeletePolicy\",\n \"iam:CreateRole\",\n \"iam:AttachRolePolicy\",\n \"iam:ListInstanceProfilesForRole\",\n \"iam:PassRole\",\n \"iam:DetachRolePolicy\",\n \"iam:ListAttachedRolePolicies\",\n \"iam:GetRole\",\n \"iam:GetPolicy\",\n \"iam:DeleteRole\",\n \"iam:CreatePolicy\",\n \"iam:ListPolicyVersions\"\n ],\n \"Resource\": [\n \"arn:aws:iam::*:role/h2oai*\",\n \"arn:aws:iam::*:policy/h2oai*\"\n ]\n },\n {\n \"Sid\": \"VisualEditor1\",\n \"Effect\": \"Allow\",\n \"Action\": \"apigateway:*\",\n \"Resource\": \"*\"\n },\n {\n \"Sid\": \"VisualEditor2\",\n \"Effect\": \"Allow\",\n \"Action\": [\n \"lambda:CreateFunction\",\n \"lambda:ListFunctions\",\n \"lambda:InvokeFunction\",\n \"lambda:GetFunction\",\n \"lambda:UpdateFunctionConfiguration\",\n \"lambda:DeleteFunctionConcurrency\",\n \"lambda:RemovePermission\",\n \"lambda:UpdateFunctionCode\",\n \"lambda:AddPermission\",\n \"lambda:ListVersionsByFunction\",\n \"lambda:GetFunctionConfiguration\",\n \"lambda:DeleteFunction\",\n \"lambda:PutFunctionConcurrency\",\n \"lambda:GetPolicy\"\n ],\n \"Resource\": \"arn:aws:lambda:*:*:function:h2oai*\"\n },\n {\n \"Sid\": \"VisualEditor3\",\n \"Effect\": \"Allow\",\n \"Action\": \"s3:*\",\n \"Resource\": [\n \"arn:aws:s3:::h2oai*/*\",\n \"arn:aws:s3:::h2oai*\"\n ]\n }\n ]\n }\nDeploying on Amazon Lambda\nOnce the MOJO pipeline archive is ready, Driverless AI provides a Deploy\n(Local & Cloud) option on the completed experiment page.", + "prompt_type": "plain" + }, + { + "output": "[]\nThis option opens a new dialog for setting the AWS account credentials\n(or use those supplied in the Driverless AI configuration file or\nenvironment variables), AWS region, and the desired deployment name\n(which must be unique per Driverless AI user and AWS account used). []\nAmazon Lambda deployment parameters:\n - Deployment Name: A unique name of the deployment. By default,\n Driverless AI offers a name based on the name of the experiment\n and the deployment type. This has to be unique both for Driverless\n AI user and the AWS account used. - Region: The AWS region to deploy the MOJO scoring pipeline to. It\n makes sense to choose a region geographically close to any client\n code calling the endpoint in order to minimize request latency. (See also AWS Regions and Availability Zones.) - Use AWS environment variables: If enabled, the AWS credentials are\n taken from the Driverless AI configuration file (see records\n deployment_aws_access_key_id and deployment_aws_secret_access_key)\n or environment variables\n (DRIVERLESS_AI_DEPLOYMENT_AWS_ACCESS_KEY_ID and\n DRIVERLESS_AI_DEPLOYMENT_AWS_SECRET_ACCESS_KEY).", + "prompt_type": "plain" + }, + { + "output": "- AWS Access Key ID and AWS Secret Access Key: Credentials to access\n the AWS account. This pair of secrets identifies the AWS user and\n the account and can be obtained from the AWS account console. Testing the Lambda Deployment\nOn a successful deployment, all the information needed to access the new\nendpoint (URL and an API Key) is printed, and the same information is\navailable in the Deployments Overview Page after clicking on the\ndeployment row. []\nNote that the actual scoring endpoint is located at the path /score. In\naddition, to prevent DDoS and other malicious activities, the resulting\nAWS lambda is protected by an API Key, i.e., a secret that has to be\npassed in as a part of the request using the x-api-key HTTP header. The request is a JSON object containing attributes:\n - fields: A list of input column names that should correspond to the\n training data columns. - rows: A list of rows that are in turn lists of cell values to\n predict the target values for.", + "prompt_type": "plain" + }, + { + "output": "An example request providing 2 columns on the input and asking to get\none column copied to the output looks as follows:\n {\n \"fields\": [\n \"age\", \"salary\"\n ],\n \"includeFieldsInOutput\": [\n \"salary\"\n ],\n \"rows\": [\n [\n \"48.0\", \"15000.0\"\n ],\n [\n \"35.0\", \"35000.0\"\n ],\n [\n \"18.0\", \"22000.0\"\n ]\n ]\n }\nAssuming the request is stored locally in a file named test.json, the\nrequest to the endpoint can be sent, e.g., using the curl utility, as\nfollows:\n URL={place the endpoint URL here}\n API_KEY={place the endpoint API key here}\n curl \\\n -d @test.json \\\n -X POST \\\n -H \"x-api-key: ${API_KEY}\" \\\n ${URL}/score\nThe response is a JSON object with a single attribute score, which\ncontains the list of rows with the optional copied input values and the\npredictions. For the example above with a two class target field, the result is\nlikely to look something like the following snippet.", + "prompt_type": "plain" + }, + { + "output": "The bucket names\nhave to be unique throughout AWS S3, and one user can create a maximum\nof 100 buckets. Therefore, we recommend setting the bucket name used for\ndeployment with the deployment_aws_bucket_name config option. REST Server Deployment\nThis section describes how to deploy the trained MOJO scoring pipeline\nas a local Representational State Transfer (REST) Server. Note: For information on REST server deployment limitations, see\nrest_limitations. Additional Resources\nThe REST server deployment supports API endpoints such as model\nmetadata, file/CSV scoring, etc. It uses SpringFox for both programmatic\nand manual inspection of the API. Refer to the local-rest-scorer folder\nin the dai-deployment-templates repository to see different deployment\ntemplates for Local REST scorers. Prerequisites\n- Driverless AI MOJO Scoring Pipeline: To deploy a MOJO scoring\n pipeline as a Local REST Scorer, the MOJO pipeline archive has to be\n created first by choosing the Build MOJO Scoring Pipeline option on\n the completed experiment page.", + "prompt_type": "plain" + }, + { + "output": "- When using a firewall or a virtual private cloud (VPC), the ports\n that are used by the REST server must be exposed. - Ensure that you have enough memory and CPUs to run the REST scorer. Typically, a good estimation for the amount of required memory is 12\n times the size of the pipeline.mojo file. For example, a 100MB\n pipeline.mojo file will require approximately 1200MB of RAM. (Note:\n To conveniently view in-depth information about your system in\n Driverless AI, click on Resources at the top of the screen, then\n click System Info.) - When running Driverless AI in a Docker container, you must expose\n ports on Docker for the REST service deployment within the\n Driverless AI Docker container. For example, the following exposes\n the Driverless AI Docker container to listen to port 8094 for\n requests arriving at the host port at 18094. Deploying on REST Server\nOnce the MOJO pipeline archive is ready, Driverless AI provides a Deploy\n(Local & Cloud) option on the completed experiment page.", + "prompt_type": "plain" + }, + { + "output": "- This button is not available on PPC64LE environments. []\nThis option opens a new dialog for setting the REST Server deployment\nname, port number, and maximum heap size (optional). []\n1. Specify a name for the REST scorer in order to help track the\n deployed REST scorers. 2. Provide a port number on which the REST scorer will run. For\n example, if port number 8081 is selected, the scorer will be\n available at http://my-ip-address:8081/models\n3. Optionally specify the maximum heap size for the Java Virtual\n Machine (JVM) running the REST scorer. This can help constrain the\n REST scorer from overconsuming memory of the machine. Because the\n REST scorer is running on the same machine as Driverless AI, it may\n be helpful to limit the amount of memory that is allocated to the\n REST scorer. This option will limit the amount of memory the REST\n scorer can use, but it will also produce an error if the memory\n allocated is not enough to run the scorer. (The amount of memory\n required is mostly dependent on the size of MOJO.", + "prompt_type": "plain" + }, + { + "output": "Testing the REST Server Deployment\n[]\nNote that the actual scoring endpoint is located at the path /score. The request is a JSON object containing attributes:\n - fields: A list of input column names that should correspond to the\n training data columns. - rows: A list of rows that are in turn lists of cell values to\n predict the target values for. - optional includeFieldsInOutput: A list of input columns that\n should be included in the output. An example request providing 2 columns on the input and asking to get\none column copied to the output looks as follows:\n {\n \"fields\": [\n \"age\", \"salary\"\n ],\n \"includeFieldsInOutput\": [\n \"salary\"\n ],\n \"rows\": [\n [\n \"48.0\", \"15000.0\"\n ],\n [\n \"35.0\", \"35000.0\"\n ],\n [\n \"18.0\", \"22000.0\"\n ]\n ]\n }\nAssuming the request is stored locally in a file named test.json, the\nrequest to the endpoint can be sent, e.g., using the curl utility, as\nfollows:\n URL={place the endpoint URL here}\n curl \\\n -X POST \\\n -d {\"fields\": ['age', 'salary', 'education'], \"rows\": [1, 2, 3], \"includeFieldsInOutput\": [\"education\"]}\\\n -H \"Content-Type: application/json\" \\\n ${URL}/score\nThe response is a JSON object with a single attribute score, which\ncontains the list of rows with the optional copied input values and the\npredictions.", + "prompt_type": "plain" + }, + { + "output": "The particular\nvalues would of course depend on the scoring pipeline:\n {\n \"score\": [\n [\n \"48.0\",\n \"0.6240277982943945\",\n \"0.045458571508101536\",\n ],\n [\n \"35.0\",\n \"0.7209441819603676\",\n \"0.06299909138586585\",\n ],\n [\n \"18.0\",\n \"0.7209441819603676\",\n \"0.06299909138586585\",\n ]\n ]\n }\nREST Server Deployment Limitations\n- Local REST server deployments are useful for determining the\n behavioral characteristics of a MOJO that is intended for\n deployment. However, using the REST Server deployment as a\n production level scoring service is not recommended. The REST Server\n deployment runs in the same machine as the core of Driverless AI,\n and therefore has to share system resources with all other\n Driverless AI processes. This can lead to unexpected scenarios in\n which competition for compute resources causes the REST Server to\n fail.", + "prompt_type": "plain" + }, + { + "output": "Install on AWS\n\nDriverless AI can be installed on Amazon AWS using the AWS Marketplace\nAMI or the AWS Community AMI.\n\nchoose-AWS aws-marketplace-ami aws-community-ami\n\nWhen installing via AWS, you can also enable role-based authentication.\n\naws-role-based-authentication", + "prompt_type": "plain" + }, + { + "output": "Monotonicity Constraints\nMonotonicity can be enforced for the feature engineering pipeline, the\nfitted model(s), or the entire modeling pipeline. Monotonicity constraints enforce a monotonic relationship between a\nspecified feature and the target prediction. For example, given a model\ntrained to predict housing prices, you may want to enforce that the\nmodel predicts higher housing prices with increasing lot size and lower\nhousing prices with increasing neighborhood crime rate. When monotonicity constraints are enabled, Driverless AI automatically\ndetermines if monotonicity is present and then enforces it through all\nor part of the modeling pipelines. Depending on the level of correlation\nbetween a feature and the target, Driverless AI assigns positive,\nnegative, or no monotonicity constraints. Specifically, monotonicity is\nenforced if the absolute correlation is greater than a specific\nthreshold (default 0.1). To build an entire monotonic gbm modeling pipeline with a single click,\nuser can select the monotonic_gbm recipe from\nthe Experiment settings of the expert panel.", + "prompt_type": "plain" + }, + { + "output": "For details see\nMonotonic GBM in pipeline building recipe\nunder experiment expert settings. For more granular control, over thresholds, manual override of\nmonotonicity constraints etc, refer to\nthese settings under feature settings of the expert\npanel of an experiment. To build monotonic fitted models, ensure that:\n- The Interpretability setting for the experiment must be greater than\n or equal to the\n monotonicity_constraints_interpretability_switch ,\n that has a default value of 7). So Interpretability setting for the\n experiment and/or monotonicity_constraints_interpretability_switch\n can be toggled to achieve this. - The final model must be linear (for example, GLMModel) or otherwise\n support monotonic constraints (LightGBMModel, XGBoostGBMModel,\n XGBoostDartModel or Decision Tree models). These can be set to 'ON'\n from the Model settings of the expert panel. The ensemble level can\n be toggled by setting fixed_ensemble_level \n level.", + "prompt_type": "plain" + }, + { + "output": "- Drop features with low correlation to the target. See\n monotonicity constraints drop low correlation features . - For regression case, make sure the\n target_transformer is monotonic like 'identity'\n or 'identity_noclip'. This can be toggled under experiment settings\n of the expert panel. and for monotonic feature engineering:\n- Disable features engineered from multi-feature interaction i.e set\n max_feature_interaction_depth to 1\n in feature settings under expert settings panel. - Disable numerical to categorical feature transformations i.e set\n num_as_cat to False in the feature settings under\n expert settings panel. - For numeric features, allow only monotonic transformations i.e set\n included_transformers to\n ['OriginalTransformer'] only under recipe settings of the expert\n panel. The following table lists an example of settings to create a monotonic\nDriverless AI modeling pipeline.", + "prompt_type": "plain" + }, + { + "output": "Before You Begin\n\ndata-sampling missing-values-handling imputation-in-dai reproducibility\ntransformations internal-validation ensemble-learning\nmonotonicity-constraints leakage-shift-detection vi imbalanced-modeling\nwide gpu-dai queuing dai-free-space ts_bestpractices tips-n-tricks\nsimple_configs", + "prompt_type": "plain" + }, + { + "output": "Driverless AI Logs\nDriverless AI provides several logs that can be viewed and/or retrieved\nwhen performing different tasks. All content in the logs are labeled\nwith INFO, DATA, WARNING and ERROR tags. Driverless AI Modeling and MLI\nexperiments also provide access to anonymized logs that do not contain\ncontents from the DATA tag. - logs-available\n- logs-sending\n- Obtaining System Log Files \nAvailable Log Files\nThe following is a list of available Driverless AI log files. - dai_log\n - exp_log\n - mli_log\n - auto_viz_log\n - h2oai_server_log\n - audit_log\ndai.log\ndai.log are part of Driverless AI System Logs . They are\ngenerated as part of stderr/stdout and are useful for debugging or\ndetailed support in case of issues. If needed, the verbosity or logging\nlevel of this log file can be toggled using config.toml settings. Admin access to Driverless AI installation location is required to\nobtain these logs. See System Logs section on steps to\nobtain them.", + "prompt_type": "plain" + }, + { + "output": "It helps with understanding the run details and\ndebugging experiment related issues. The log file naming convention is\nh2oai_experiment_{experiment_ID}.log and the content is labeled with\nINFO, DATA, WARNING and ERROR tags. Users can download these log directly from the experiment page of the\nDriverless AI GUI. For an experiment in progress, logs can be accessed\nfrom under the Log tab to the right. For completed experiments, the logs\nreside with the summary zip file. []\nThe zip also contains an anonymized version of experiment logs that does\nnot report any information relating to the data used in the experiment\n(i.e no DATA label), such as column names and individual data points. And a details folder that comprises of error stack traces that may help\nwith debugging. []\nMLI Logs\nThese logs cover the model interpretation \nprocess runs for surrogate models and explainer/recipe runs for\nDriverless AI Machine Learning Interpretability jobs. MLI surrogate model run logs can be downloaded from the Action button on\nthe MLI GUI page.", + "prompt_type": "plain" + }, + { + "output": "It contains three files, the\nstdout/stderr log for full MLI process run, an anonymized copy (i.e no\nDATA label) of the same log file and surrogate model run logs. []\nThe explainer or recipe logs are accessible from the task run button. []\nMLI uses H2O_3 (Java backend) to build surrogate models. Admins can\naccess the h2o_3 server logs using System Logs commands in\ncase of issues with starting the MLI server. The /tmp folder of DAI\ncontains h2o_mli.log, that keeps track of rolling mli logs and are also\nadmin accessible. Auto Visualization Logs\nThis log store run information for automatic data visualization in\nDriverless AI. Users can obtain them from the Autoviz page of DAI GUI. []\nAdmins can access the viz-server logs using System Logs \ncommands in case of issues with starting of Viz server. The failure logs\nrelating to data visualization are also available from the /tmp folder\nas h2oai_server.log and requires admin access. h2oai_server Log\nThese logs register all issues relating to datasets like Adding Datasets\nor viewing Dataset Details or Auto Visualization of datasets.", + "prompt_type": "plain" + }, + { + "output": "An anonymized copy (i.e no\nDATA label) of this log file is also available in the same folder. Accessing h2oai_server log requires admin access to Driverless AI. Audit Logs\nAudit logs register all user interactions with the Driverless AI system\nlike login/logout, downloads/uploads, experiment creation/deletion etc. Admins can access them from /tmp folder of Driverless AI. Sending Logs to support@H2O.ai\nThis section describes what logs to send in the event of failures when\nrunning Driverless AI. All content in the logs are labeled with INFO,\nDATA, WARNING and ERROR tags. Driverless AI Modeling and MLI experiments\nalso provides access to anonymized logs that do not contain contents\nfrom the DATA tag. - Driverless AI starting Failures: This requires inspection of\n System Logs like dai.log file. - Dataset Failures: A simple error stack trace is displayed on the GUI\n in case of datasets failures like Adding Datasets or viewing Dataset\n Details and detailed logs are registered as\n h2oai_server logs that requires admin access.", + "prompt_type": "plain" + }, + { + "output": "A full detailed stacktrace is also available in the\n h2oai_server.log file in ./tmp folder of DAI that\n requires admin access. - Experiment Failures: User needs to send the\n experiment logs . In some cases, for in depth analysis,\n support@h2o.ai may request dai.logs that requires admin\n access to retrieve. - MLI Failures: See MLI Logs for details. - Custom Recipes Failures: If a Custom Recipe is producing errors, the\n entire zip file obtained by clicking on the Download Summary & Logs\n button on the experiment page, can be sent for\n troubleshooting. Note that these files may contain information that\n is not anonymized. System Logs\nSystem logs include useful information about Driverless AI. Driverless\nAI solution needs following set of services to work-\n- Driverless AI server: This is a python code, that internally starts\n a local worker to start a web server for UI pages (DAI GUI) and runs\n the actual experiment work.", + "prompt_type": "plain" + }, + { + "output": "- procsy: This handles the communication between the DAI server\n (python code) and other binaries or java jar files, like data\n connectors or the vis-sever. - vis-server: This is needed for Auto visualization of Datasets, DAI\n sends a request to procsy, which in turn will query the vis-server\n to make the computations necessary for autoviz. - redis-server: It is used as a communication bus between the backend\n (DAI) server and the local worker or remote workers (in case of DAI\n multinode set up). - minio: This is needed in multinode setup, and is used for data\n storage, for example, when running an experiment on a remote node,\n the remote worker gets the experiment configuration details via\n redis, and the actual dataset, is pushed to minio and the remote\n worker is instructed to fetch it. When experiment finishes, the\n model is sent back to the main server from the remote node via minio\n (upload and download). Each of these services creates a log file.", + "prompt_type": "plain" + }, + { + "output": "Transforming datasets\nWhen a training dataset is used in an experiment, Driverless AI\ntransforms the data into an improved, feature engineered dataset. (For\nmore information on the transformations that are provided in Driverless\nAI, see Transformations.) But what happens when new rows are added to\nyour dataset? In this case, you can specify to transform the new dataset\nafter adding it to Driverless AI, and the same transformations that\nDriverless AI applied to the original dataset are applied to these new\nrows. The following sections describe the two options for transforming\ndatasets that are available in Driverless AI:\n- transform_dataset\n- fit_and_transform_dataset\nNotes:\n- To avoid leakage, the result of transformations should not be used\n for training unless enable_target_encoding='off'. []\nTransform dataset\nThe following steps describe how to transform a dataset with the\nTransform dataset option, which transforms the dataset without fitting. Notes:\n- This transformation uses the experiment's full model pipeline,\n except instead of generating predictions, it generates the\n transformation before the model is applied.", + "prompt_type": "plain" + }, + { + "output": "1. Select the dataset that you want to transform. 2. Select the columns you want to include in the transformation frame. To confirm your selection, click Done. The dataset transformation\n job is added to the pending jobs queue. 3. When the transformed dataset is ready, click Download transformed\n dataset. Specify a filename for the dataset, then click the Download\n button to download the transformed dataset. Fit and transform dataset\nThe following steps describe how to transform a dataset with the Fit &\nTransform dataset option, which both fits and transforms the dataset. Notes:\n- This functionality is not available for Time Series experiments when\n time_series_recipe=true. (That is, when the lag-based recipe is\n used.) - This functionality provides the pipeline (engineered features) of\n the best individual model of the experiment, not the full pipeline\n of all models and folds. 1. On the completed experiment page for the original dataset, click\n Model Actions -> Fit & Transform Dataset.", + "prompt_type": "plain" + }, + { + "output": "Select the new training dataset that you want to transform. Note\n that this must have the same number of columns as the original\n dataset. 3. Select one of the following options:\n - Default: The validation split ratio is set to 0. - With validation dataset: Specify a validation dataset to use\n with this dataset. The validation split ratio is set to 0.2. - With training data split: Split the training data. The\n validation split ratio is set to 0.2. Note: To ensure that the transformed dataset respects the row\n order, choose a validation dataset instead of splitting the\n training data. Splitting the training data results in a shuffling\n of the row order. 4. Optionally specify a test dataset. If specified, then the output\n also includes the final test dataset for final scoring. 5. Click Launch Transformation. []\nThe following datasets are made available for download upon successful\ncompletion:\n- Training dataset (not for cross validation)\n- Validation dataset for parameter tuning\n- Test dataset for final scoring.", + "prompt_type": "plain" + }, + { + "output": "Native Installation\n\nThis section provides instructions for installing Driverless AI in\nnative Linux environments.\n\ninstall/x86-64\n\nFor instructions on installing the Driverless AI Docker image, refer to\ndocker_installs.", + "prompt_type": "plain" + }, + { + "output": "HDFS Setup\n\nDriverless AI lets you explore HDFS data sources from within the\nDriverless AI application. This section provides instructions for\nconfiguring Driverless AI to work with HDFS.\n\nNote: Depending on your Docker install version, use either the", + "prompt_type": "plain" + }, + { + "output": "docker run\n--runtime=nvidia(>= Docker 19.03) ornvidia-docker(< Docker 19.03) command when starting the Driverless AI Docker image. Usedocker\nversionto check which version of Docker you are using. Description of Configuration Attributes --------------------------------------- -hdfs_config_path(Required): The location the HDFS config folder path. This folder can contain multiple config files. -hdfs_auth_type(Required): Specifies the HDFS authentication. Available values are: -principal: Authenticate with HDFS with a principal user. -keytab: Authenticate with a keytab (recommended). If running DAI as a service, then the Kerberos keytab needs to be owned by the DAI user. -keytabimpersonation: Login with impersonation using a keytab. -noauth: No authentication needed. -key_tab_path: The path of the principal key tab file. This is required whenhdfs_auth_type='principal'. -hdfs_app_principal_user: The Kerberos application principal user. This is required whenhdfs_auth_type='keytab'.", + "prompt_type": "plain" + }, + { + "output": "Separate each argument with spaces. --Djava.security.krb5.conf--Dsun.security.krb5.debug--Dlog4j.configuration-hdfs_app_classpath: The HDFS classpath. -hdfs_app_supported_schemes: The list of DFS schemas that is used to check whether a valid input to the connector has been established. For example: :: hdfs_app_supported_schemes = ['hdfs://', 'maprfs://', 'custom://'] The following are the default values for this option. Additional schemas can be supported by adding values that are not selected by default to the list. -hdfs://-maprfs://-swift://-hdfs_max_files_listed: Specifies the maximum number of files that are viewable in the connector UI. Defaults to 100 files. To view more files, increase the default value. -hdfs_init_path: Specifies the starting HDFS path displayed in the UI of the HDFS browser. -enabled_file_systems: The file systems you want to enable. This must be configured in order for data connectors to function properly. Example 1: Enable HDFS with No Authentication --------------------------------------------- .. container:: tabs .. group-tab:: Docker Image Installs This example enables the HDFS data connector and disables HDFS authentication.", + "prompt_type": "plain" + }, + { + "output": "This lets you reference data stored in HDFS directly using name node address, for example:hdfs://name.node/datasets/iris.csv. .. code:: bash nvidia-docker run \\ --pid=host \\ --init \\ --rm \\ --shm-size=256m \\ --add-host name.node:172.16.2.186 \\ -e DRIVERLESS_AI_ENABLED_FILE_SYSTEMS=\"file,hdfs\" \\ -e DRIVERLESS_AI_HDFS_AUTH_TYPE='noauth' \\ -e DRIVERLESS_AI_PROCSY_PORT=8080 \\ -p 12345:12345 \\ -v /etc/passwd:/etc/passwd:ro \\ -v /etc/group:/etc/group:ro \\ -v /tmp/dtmp/:/tmp \\ -v /tmp/dlog/:/log \\ -v /tmp/dlicense/:/license \\ -v /tmp/ddata/:/data \\ -u $(id -u):$(id -g) \\ h2oai/dai-ubi8-x86_64:|tag| .. container:: group-tab Docker Image with the config.toml This example shows how to configure HDFS options in the config.toml file, and then specify that file when starting Driverless AI in Docker. Note that this example enables HDFS with no authentication.", + "prompt_type": "plain" + }, + { + "output": "Configure the Driverless AI config.toml file. Set the following configuration options. Note that the procsy port, which defaults to 12347, also has to be changed. .. -enabled_file_systems\n= \"file, upload, hdfs\"-procsy_ip = \"127.0.0.1\"-procsy_port =\n80802. Mount the config.toml file into the Docker container. .. .. code:: bash nvidia-docker run \\ --pid=host \\ --init \\ --rm \\ --shm-size=256m \\ --add-host name.node:172.16.2.186 \\ -e DRIVERLESS_AI_CONFIG_FILE=/path/in/docker/config.toml \\ -p 12345:12345 \\ -v /local/path/to/config.toml:/path/in/docker/config.toml \\ -v /etc/passwd:/etc/passwd:ro \\ -v /etc/group:/etc/group:ro \\ -v /tmp/dtmp/:/tmp \\ -v /tmp/dlog/:/log \\ -v /tmp/dlicense/:/license \\ -v /tmp/ddata/:/data \\ -u $(id -u):$(id -g) \\ h2oai/dai-ubi8-x86_64:|tag| .. container:: group-tab Native Installs This example enables the HDFS data connector and disables HDFS authentication in the config.toml file.", + "prompt_type": "plain" + }, + { + "output": "1. Export the Driverless AI config.toml file or add it to ~/.bashrc. For example: .. :: # DEB and RPM export DRIVERLESS_AI_CONFIG_FILE=\"/etc/dai/config.toml\" # TAR SH export DRIVERLESS_AI_CONFIG_FILE=\"/path/to/your/unpacked/dai/directory/config.toml\" 2. Specify the following configuration options in the config.toml file. Note that the procsy port, which defaults to 12347, also has to be changed. .. :: # IP address and port of procsy process. procsy_ip = \"127.0.0.1\" procsy_port = 8080 # File System Support # upload : standard upload feature # file : local file system/server file system # hdfs : Hadoop file system, remember to configure the HDFS config folder path and keytab below # dtap : Blue Data Tap file system, remember to configure the DTap section below # s3 : Amazon S3, optionally configure secret and access key below # gcs : Google Cloud Storage, remember to configure gcs_path_to_service_account_json below # gbq : Google Big Query, remember to configure gcs_path_to_service_account_json below # minio : Minio Cloud Storage, remember to configure secret and access key below # snow : Snowflake Data Warehouse, remember to configure Snowflake credentials below (account name, username, password) # kdb : KDB+ Time Series Database, remember to configure KDB credentials below (hostname and port, optionally: username, password, classpath, and jvm_args) # azrbs : Azure Blob Storage, remember to configure Azure credentials below (account name, account key) # jdbc: JDBC Connector, remember to configure JDBC below.", + "prompt_type": "plain" + }, + { + "output": "(hive_app_configs) # recipe_url: load custom recipe from URL # recipe_file: load custom recipe from local file system enabled_file_systems = \"file, hdfs\" 3. Save the changes when you are done, then stop/restart Driverless AI. Example 2: Enable HDFS with Keytab-Based Authentication ------------------------------------------------------- **Notes**: - If using Kerberos Authentication, then the time on the Driverless AI server must be in sync with Kerberos server. If the time difference between clients and DCs are 5 minutes or higher, there will be Kerberos failures. - If running Driverless AI as a service, then the Kerberos keytab needs to be owned by the Driverless AI user; otherwise Driverless AI will not be able to read/access the Keytab and will result in a fallback to simple authentication and, hence, fail. .. container:: tabs .. group-tab:: Docker Image Installs This example: - Places keytabs in the/tmp/dtmpfolder on your machine and provides the file path as described below.", + "prompt_type": "plain" + }, + { + "output": ".. code:: bash nvidia-docker run \\ --pid=host \\ --init \\ --rm \\ --shm-size=256m \\ -e DRIVERLESS_AI_ENABLED_FILE_SYSTEMS=\"file,hdfs\" \\ -e DRIVERLESS_AI_HDFS_AUTH_TYPE='keytab' \\ -e DRIVERLESS_AI_KEY_TAB_PATH='tmp/<>' \\ -e DRIVERLESS_AI_HDFS_APP_PRINCIPAL_USER='<>' \\ -e DRIVERLESS_AI_PROCSY_PORT=8080 \\ -p 12345:12345 \\ -v /etc/passwd:/etc/passwd:ro \\ -v /etc/group:/etc/group:ro \\ -v /tmp/dtmp/:/tmp \\ -v /tmp/dlog/:/log \\ -v /tmp/dlicense/:/license \\ -v /tmp/ddata/:/data \\ -u $(id -u):$(id -g) \\ h2oai/dai-ubi8-x86_64:|tag| .. container:: group-tab Docker Image with the config.toml This example: - Places keytabs in the/tmp/dtmpfolder on your machine and provides the file path as described below. - Configures the optionhdfs_app_prinicpal_userto reference a user for whom the keytab was created (usually in the form of user@realm).", + "prompt_type": "plain" + }, + { + "output": "Configure the Driverless AI config.toml file. Set the following configuration options. Note that the procsy port, which defaults to 12347, also has to be changed. .. -enabled_file_systems\n= \"file, upload, hdfs\"-procsy_ip = \"127.0.0.1\"-procsy_port =\n8080-hdfs_auth_type = \"keytab\"-key_tab_path =\n\"/tmp/\"-hdfs_app_principal_user =\n\"\"2. Mount the config.toml file into the Docker container. .. .. code:: bash nvidia-docker run \\ --pid=host \\ --init \\ --rm \\ --shm-size=256m \\ --add-host name.node:172.16.2.186 \\ -e DRIVERLESS_AI_CONFIG_FILE=/path/in/docker/config.toml \\ -p 12345:12345 \\ -v /local/path/to/config.toml:/path/in/docker/config.toml \\ -v /etc/passwd:/etc/passwd:ro \\ -v /etc/group:/etc/group:ro \\ -v /tmp/dtmp/:/tmp \\ -v /tmp/dlog/:/log \\ -v /tmp/dlicense/:/license \\ -v /tmp/ddata/:/data \\ -u $(id -u):$(id -g) \\ h2oai/dai-ubi8-x86_64:|tag| .. container:: group-tab Native Installs This example: - Places keytabs in the/tmp/dtmpfolder on your machine and provides the file path as described below.", + "prompt_type": "plain" + }, + { + "output": "1. Export the Driverless AI config.toml file or add it to ~/.bashrc. For example: .. :: # DEB and RPM export DRIVERLESS_AI_CONFIG_FILE=\"/etc/dai/config.toml\" # TAR SH export DRIVERLESS_AI_CONFIG_FILE=\"/path/to/your/unpacked/dai/directory/config.toml\" 2. Specify the following configuration options in the config.toml file. .. :: # IP address and port of procsy process. procsy_ip = \"127.0.0.1\" procsy_port = 8080 # File System Support # upload : standard upload feature # file : local file system/server file system # hdfs : Hadoop file system, remember to configure the HDFS config folder path and keytab below # dtap : Blue Data Tap file system, remember to configure the DTap section below # s3 : Amazon S3, optionally configure secret and access key below # gcs : Google Cloud Storage, remember to configure gcs_path_to_service_account_json below # gbq : Google Big Query, remember to configure gcs_path_to_service_account_json below # minio : Minio Cloud Storage, remember to configure secret and access key below # snow : Snowflake Data Warehouse, remember to configure Snowflake credentials below (account name, username, password) # kdb : KDB+ Time Series Database, remember to configure KDB credentials below (hostname and port, optionally: username, password, classpath, and jvm_args) # azrbs : Azure Blob Storage, remember to configure Azure credentials below (account name, account key) # jdbc: JDBC Connector, remember to configure JDBC below.", + "prompt_type": "plain" + }, + { + "output": "(hive_app_configs) # recipe_url: load custom recipe from URL # recipe_file: load custom recipe from local file system enabled_file_systems = \"file, hdfs\" # HDFS connector # Auth type can be Principal/keytab/keytabPrincipal # Specify HDFS Auth Type, allowed options are: # noauth : No authentication needed # principal : Authenticate with HDFS with a principal user # keytab : Authenticate with a Key tab (recommended) # keytabimpersonation : Login with impersonation using a keytab hdfs_auth_type = \"keytab\" # Path of the principal key tab file key_tab_path = \"/tmp/\" # Kerberos app principal user (recommended) hdfs_app_principal_user = \"\" 3. Save the changes when you are done, then stop/restart Driverless AI. Example 3: Enable HDFS with Keytab-Based Impersonation ------------------------------------------------------ **Notes**: - If using Kerberos, be sure that the Driverless AI time is synched with the Kerberos server.", + "prompt_type": "plain" + }, + { + "output": "- Logins are case sensitive when keytab-based impersonation is configured. .. container:: tabs .. group-tab:: Docker Image Installs The example: - Sets the authentication type tokeytabimpersonation. - Places keytabs in the/tmp/dtmpfolder on your machine and provides the file path as described below. - Configures theDRIVERLESS_AI_HDFS_APP_PRINCIPAL_USERvariable, which references a user for whom the keytab was created (usually in the form of user@realm). .. code:: bash nvidia-docker run \\ --pid=host \\ --init \\ --rm \\ --shm-size=256m \\ -e DRIVERLESS_AI_ENABLED_FILE_SYSTEMS=\"file,hdfs\" \\ -e DRIVERLESS_AI_HDFS_AUTH_TYPE='keytabimpersonation' \\ -e DRIVERLESS_AI_KEY_TAB_PATH='/tmp/<>' \\ -e DRIVERLESS_AI_HDFS_APP_PRINCIPAL_USER='<>' \\ -e DRIVERLESS_AI_PROCSY_PORT=8080 \\ -p 12345:12345 \\ -v /etc/passwd:/etc/passwd:ro \\ -v /etc/group:/etc/group:ro \\ -v /tmp/dlog/:/log \\ -v /tmp/dlicense/:/license \\ -v /tmp/ddata/:/data \\ -u $(id -u):$(id -g) \\ h2oai/dai-ubi8-x86_64:|tag| .. container:: group-tab Docker Image with the config.toml This example: - Sets the authentication type tokeytabimpersonation.", + "prompt_type": "plain" + }, + { + "output": "- Configures thehdfs_app_principal_uservariable, which references a user for whom the keytab was created (usually in the form of user@realm). 1. Configure the Driverless AI config.toml file. Set the following configuration options. Note that the procsy port, which defaults to 12347, also has to be changed. .. -enabled_file_systems\n= \"file, upload, hdfs\"-procsy_ip = \"127.0.0.1\"-procsy_port =\n8080-hdfs_auth_type = \"keytabimpersonation\"-key_tab_path =\n\"/tmp/\"-hdfs_app_principal_user =\n\"\"2. Mount the config.toml file into the Docker container. .. .. code:: bash nvidia-docker run \\ --pid=host \\ --init \\ --rm \\ --shm-size=256m \\ --add-host name.node:172.16.2.186 \\ -e DRIVERLESS_AI_CONFIG_FILE=/path/in/docker/config.toml \\ -p 12345:12345 \\ -v /local/path/to/config.toml:/path/in/docker/config.toml \\ -v /etc/passwd:/etc/passwd:ro \\ -v /etc/group:/etc/group:ro \\ -v /tmp/dtmp/:/tmp \\ -v /tmp/dlog/:/log \\ -v /tmp/dlicense/:/license \\ -v /tmp/ddata/:/data \\ -u $(id -u):$(id -g) \\ h2oai/dai-ubi8-x86_64:|tag| .. container:: group-tab Native Installs This example: - Sets the authentication type tokeytabimpersonation.", + "prompt_type": "plain" + }, + { + "output": "- Configures thehdfs_app_principal_uservariable, which references a user for whom the keytab was created (usually in the form of user@realm). 1. Export the Driverless AI config.toml file or add it to ~/.bashrc. For example: .. :: # DEB and RPM export DRIVERLESS_AI_CONFIG_FILE=\"/etc/dai/config.toml\" # TAR SH export DRIVERLESS_AI_CONFIG_FILE=\"/path/to/your/unpacked/dai/directory/config.toml\" 2. Specify the following configuration options in the config.toml file. .. :: # IP address and port of procsy process. procsy_ip = \"127.0.0.1\" procsy_port = 8080 # File System Support # upload : standard upload feature # file : local file system/server file system # hdfs : Hadoop file system, remember to configure the HDFS config folder path and keytab below # dtap : Blue Data Tap file system, remember to configure the DTap section below # s3 : Amazon S3, optionally configure secret and access key below # gcs : Google Cloud Storage, remember to configure gcs_path_to_service_account_json below # gbq : Google Big Query, remember to configure gcs_path_to_service_account_json below # minio : Minio Cloud Storage, remember to configure secret and access key below # snow : Snowflake Data Warehouse, remember to configure Snowflake credentials below (account name, username, password) # kdb : KDB+ Time Series Database, remember to configure KDB credentials below (hostname and port, optionally: username, password, classpath, and jvm_args) # azrbs : Azure Blob Storage, remember to configure Azure credentials below (account name, account key) # jdbc: JDBC Connector, remember to configure JDBC below.", + "prompt_type": "plain" + }, + { + "output": "(hive_app_configs) # recipe_url: load custom recipe from URL # recipe_file: load custom recipe from local file system enabled_file_systems = \"file, hdfs\" # HDFS connector # Auth type can be Principal/keytab/keytabPrincipal # Specify HDFS Auth Type, allowed options are: # noauth : No authentication needed # principal : Authenticate with HDFS with a principal user # keytab : Authenticate with a Key tab (recommended) # keytabimpersonation : Login with impersonation using a keytab hdfs_auth_type = \"keytabimpersonation\" # Path of the principal key tab file key_tab_path = \"/tmp/\" # Kerberos app principal user (recommended) hdfs_app_principal_user = \"\" 3. Save the changes when you are done, then stop/restart Driverless AI. Specifying a Hadoop Platform ---------------------------- The following example shows how to build an H2O-3 Hadoop image and run Driverless AI.", + "prompt_type": "plain" + }, + { + "output": "Change theH2O_TARGETto specify a different platform. 1. Clone and then build H2O-3 for CDH 6.0. .. .. code:: bash git clone https://github.com/h2oai/h2o-3.git cd h2o-3 ./gradlew clean build -x test export H2O_TARGET=cdh6.0 export BUILD_HADOOP=true ./gradlew clean build -x test 2. Start H2O. .. .. code:: bash docker run -it --rm \\ -v `pwd`:`pwd` \\ -w `pwd` \\ --entrypoint bash \\ --network=host \\ -p 8020:8020 \\ docker.h2o.ai/cdh-6-w-hive \\ -c 'sudo -E startup.sh && \\ source /envs/h2o_env_python3.8/bin/activate && \\ hadoop jar h2o-hadoop-3/h2o-cdh6.0-assembly/build/libs/h2odriver.jar -libjars \"$(cat /opt/hive-jars/hive-libjars)\" -n 1 -mapperXmx 2g -baseport 54445 -notify h2o_one_node -ea -disown && \\ export CLOUD_IP=localhost && \\ export CLOUD_PORT=54445 && \\ make -f scripts/jenkins/Makefile.jenkins test-hadoop-smoke; \\ bash' 3.", + "prompt_type": "plain" + }, + { + "output": "Key Features\nBelow are some of the key features available in Driverless AI. Flexibility of Data and Deployment\nDriverless AI works across a variety of data sources, including Hadoop\nHDFS, Amazon S3, and more. Driverless AI can be deployed everywhere,\nincluding all clouds (Microsoft Azure, AWS, and Google Cloud),\non-premises, and can run on machines with only CPUs or machines with\nCPUs and GPUs. NVIDIA GPU Acceleration\nDriverless AI is optimized to take advantage of GPU acceleration to\nachieve up to 40X speedups for automatic machine learning. It includes\nmulti-GPU algorithms for XGBoost, GLM, K-Means, and more. GPUs allow for\nthousands of iterations of model features and optimizations and give\nsignificant speedups for use cases involving images and/or text. For\nmore information, see gpu_in_dai. Automatic Data Visualization\nFor datasets, Driverless AI automatically selects data plots based on\nthe most relevant data statistics, generates visualizations, and creates\ndata plots that are most relevant from a statistical perspective based\non the most relevant data statistics.", + "prompt_type": "plain" + }, + { + "output": "They are also useful for understanding the composition\nof very large datasets and for seeing trends or even possible issues,\nsuch as large numbers of missing values or significant outliers that\ncould impact modeling results. For more information, see\nVisualizing Datasets . Automatic Feature Engineering\nFeature engineering is the secret weapon that advanced data scientists\nuse to extract the most accurate results from algorithms. H2O Driverless\nAI employs a library of algorithms and feature transformations to\nautomatically engineer new, high-value features for a given dataset. (See transformations for more information.) Included in the interface is\na variable importance chart that shows the significance of original and\nnewly engineered features. Automatic Model Documentation\nTo explain models to business users and regulators, data scientists and\ndata engineers must document the data, algorithms, and processes used to\ncreate machine learning models. Driverless AI provides an AutoDoc for\neach experiment, relieving the user from the time-consuming task of\ndocumenting and summarizing their workflow used when building machine\nlearning models.", + "prompt_type": "plain" + }, + { + "output": "With this capability in Driverless AI, practitioners can\nfocus more on drawing actionable insights from the models and save weeks\nor even months in development, validation, and deployment. Driverless AI also provides a number of autodoc_ configuration options,\ngiving users even more control over the output of the AutoDoc. (Refer to\nthe sample-configtoml topic for information about these configuration\noptions.) Click here to download and view a sample experiment\nreport in Word format. Time Series Forecasting\nTime series forecasting is one of the biggest challenges for data\nscientists. These models address key use cases, including demand\nforecasting, infrastructure monitoring, and predictive maintenance. Driverless AI delivers superior time series capabilities to optimize for\nalmost any prediction time window. Driverless AI incorporates data from\nnumerous predictors, handles structured character data and\nhigh-cardinality categorical variables, and handles gaps in time series\ndata and other missing values.", + "prompt_type": "plain" + }, + { + "output": "NLP with TensorFlow and PyTorch\nText data can contain critical information to inform better predictions. Driverless AI automatically converts text strings into features using\npowerful techniques like TFIDF and Embeddings. With TensorFlow and\nPyTorch, Driverless AI can process large text blocks and build models\nusing all the available data to solve business problems like sentiment\nanalysis, document classification, and content tagging. The Driverless\nAI platform has the ability to support both standalone text and text\nwith other columns as predictive features. For more information, see\nnlp-in-dai. Image Processing with TensorFlow\nDriverless AI can be used to gain insight from digital images. It\nsupports the use of both standalone images and images together with\nother data types as predictive features. For more information, see\nimage-processing-in-dai. Machine Learning Interpretability (MLI)\nDriverless AI provides robust interpretability of machine learning\nmodels to explain modeling results in a human-readable format.", + "prompt_type": "plain" + }, + { + "output": "A number of charts are generated automatically (depending on experiment\ntype), including K-LIME, Shapley, Variable Importance, Decision Tree\nSurrogate, Partial Dependence, Individual Conditional Expectation,\nSensitivity Analysis, NLP Tokens, NLP LOCO, and more. Additionally, you\ncan download a CSV of LIME and Shapley reasons codes from the MLI page. For more information, see interpreting_a_model. Automatic Reason Codes\nIn regulated industries, an explanation is often required for\nsignificant decisions relating to customers (for example, credit\ndenial). Reason codes show the key positive and negative factors in a\nmodel's scoring decision in a simple language. Reasons codes are also\nuseful in other industries, such as healthcare, because they can provide\ninsights into model decisions that can drive additional testing or\ninvestigation. For more information, see mli-explanations. Custom Recipe Support\nDriverless AI lets you import custom recipes for MLI algorithms, feature\nengineering (transformers), scorers, and configuration.", + "prompt_type": "plain" + }, + { + "output": "This lets you have greater influence over the Driverless AI\nAutomatic ML pipeline and gives you control over the optimization\nchoices that Driverless AI makes. For more information, see\ncustom-recipes. Automatic Scoring Pipelines\nFor completed experiments, Driverless AI automatically generates both\nPython scoring pipelines and new ultra-low-latency automatic scoring\npipelines (MOJO) for deploying the model to production. The new\nautomatic scoring pipeline is a unique technology that deploys all\nfeature engineering and the winning machine learning model in highly\noptimized, low-latency, production-ready Java or C++ code that can be\ndeployed anywhere. For more information, see Scoring_Pipeline. Experiment Setup Wizard\nThe Driverless AI Experiment Setup Wizard makes it simple for you to set\nup a Driverless AI experiment and ensure that the experiment's settings\nare optimally configured for your specific use case. The Experiment\nSetup Wizard helps you learn about your data and lets you provide\ninformation about your use case that is used to determine the\nexperiment's settings.", + "prompt_type": "plain" + }, + { + "output": "Introduction to H2O Driverless AI\nH2O Driverless AI is a high-performance, GPU-enabled, client-server\napplication for the rapid development and deployment of state-of-the-art\npredictive analytics models. It reads tabular data from various sources\nand automates data visualization, grand-master level automatic feature\nengineering, model validation (overfitting and leakage prevention),\nmodel parameter tuning, model interpretability, and model deployment. H2O Driverless AI is currently targeting common regression, binomial\nclassification, and multinomial classification applications, including\nloss-given-default, probability of default, customer churn, campaign\nresponse, fraud detection, anti-money-laundering, and predictive asset\nmaintenance models. It also handles time-series problems for individual\nor grouped time-series, such as weekly sales predictions per store and\ndepartment, with time-causal feature engineering and validation schemes. Driverless can also handle image and text data(NLP) use cases.", + "prompt_type": "plain" + }, + { + "output": "Visualizing Datasets\nPerform one of the following steps to visualize a dataset:\n- On the Datasets page, select the [Click for Actions] button beside\n the dataset that you want to view, and then click Visualize from the\n submenu that appears. - Click the Autoviz top menu link to go to the Visualizations list\n page, click the New Visualization button, then select or import the\n dataset that you want to visualize. The Visualization page shows all available graphs for the selected\ndataset. Note that the graphs on the Visualization page can vary based\non the information in your dataset. You can also view and download logs\nthat were generated during the visualization. Autoviz Recommendations\nFor some cases, Autoviz suggests certain recommended transformations to\nthe columns of the dataset. These recommendations can be directly applied to the experiment. This is\ndone internally by using the\nautoviz recommendation transformer . The following is a complete list of available graphs from Driverless AI\nAutoviz.", + "prompt_type": "plain" + }, + { + "output": "All possible scatterplots based on\npairs of features (variables) are examined for correlations. The\ndisplayed plots are ranked according to the correlation. Some of these\nplots may not look like textbook examples of correlation. The only\ncriterion is that they have a large value of squared Pearson's r\n(greater than .95). When modeling with these variables, you may want to\nleave out variables that are perfectly correlated with others. Note that points in the scatterplot can have different sizes. Because\n Driverless AI aggregates the data and does not display all points, the\n bigger the point is, the bigger number of exemplars (aggregated\n points) the plot covers. Spikey Histograms\nSpikey histograms are histograms with huge spikes. This often indicates\nan inordinate number of single values (usually zeros) or highly similar\nvalues. The measure of \"spikeyness\" is a bin frequency that is ten times\nthe average frequency of all the bins. You should be careful when\nmodeling (particularly regression models) with spikey variables.", + "prompt_type": "plain" + }, + { + "output": "The robust measure of skewness is derived from Groeneveld, R.A. and\nMeeden, G. (1984), \"Measuring Skewness and Kurtosis.\" The Statistician,\n33, 391-399. Highly skewed variables are often candidates for a\ntransformation (e.g., logging) before use in modeling. The histograms in\nthe output are sorted in descending order of skewness. Varying Boxplots\nVarying boxplots reveal unusual variability in a feature across the\ncategories of a categorical variable. The measure of variability is\ncomputed from a robust one-way analysis of variance (ANOVA). Sufficiently diverse variables are flagged in the ANOVA. A boxplot is a\ngraphical display of the fractiles of a distribution. The center of the\nbox denotes the median, the edges of a box denote the lower and upper\nquartiles, and the ends of the \"whiskers\" denote that range of values. Sometimes outliers occur, in which case the adjacent whisker is\nshortened to the next lower or upper value. For variables (features)\nhaving only a few values, the boxes can be compressed, sometimes into a\nsingle horizontal line at the median.", + "prompt_type": "plain" + }, + { + "output": "Heteroscedasticity is\ncalculated with a Brown-Forsythe test: Brown, M. B. and Forsythe, A. B. (1974), \"Robust tests for equality of variances. Journal of the American\nStatistical Association, 69, 364-367. Plots are ranked according to\ntheir heteroscedasticity values. A boxplot is a graphical display of the\nfractiles of a distribution. The center of the box denotes the median,\nthe edges of a box denote the lower and upper quartiles, and the ends of\nthe \"whiskers\" denote that range of values. Sometimes outliers occur, in\nwhich case the adjacent whisker is shortened to the next lower or upper\nvalue. For variables (features) having only a few values, the boxes can\nbe compressed, sometimes into a single horizontal line at the median. Biplots\nA Biplot is an enhanced scatterplot that uses both points and vectors to\nrepresent structure simultaneously for rows and columns of a data\nmatrix. Rows are represented as points (scores), and columns are\nrepresented as vectors (loadings). The plot is computed from the first\ntwo principal components of the correlation matrix of the variables\n(features).", + "prompt_type": "plain" + }, + { + "output": "And you\nshould look for purple vectors that are well-separated. Overlapping\nvectors can indicate a high degree of correlation between variables. Outliers\nVariables with anomalous or outlying values are displayed as red points\nin a dot plot. Dot plots are constructed using an algorithm in\nWilkinson, L. (1999). \"Dot plots.\" The American Statistician, 53,\n276\u2013281. Not all anomalous points are outliers. Sometimes the algorithm\nwill flag points that lie in an empty region (i.e., they are not near\nany other points). You should inspect outliers to see if they are\nmiscodings or if they are due to some other mistake. Outliers should\nordinarily be eliminated from models only when there is a reasonable\nexplanation for their occurrence. Correlation Graph\nThe correlation network graph is constructed from all pairwise squared\ncorrelations between variables (features). For continuous-continuous\nvariable pairs, the statistic used is the squared Pearson correlation. For continuous-categorical variable pairs, the statistic is based on the\nsquared intraclass correlation (ICC).", + "prompt_type": "plain" + }, + { + "output": "The\nformula is (MSbetween - MSwithin)/(MSbetween + (k - 1)MSwithin), where k\nis the number of categories in the categorical variable. For\ncategorical-categorical pairs, the statistic is computed from Cramer's V\nsquared. If the first variable has k1 categories and the second variable\nhas k2 categories, then a k1 x k2 table is created from the joint\nfrequencies of values. From this table, we compute a chi-square\nstatistic. Cramer's V squared statistic is then (chi-square / n) /\nmin(k1,k2), where n is the total of the joint frequencies in the table. Variables with large values of these respective statistics appear near\neach other in the network diagram. The color scale used for the\nconnecting edges runs from low (blue) to high (red). Variables connected\nby short red edges tend to be highly correlated. Parallel Coordinates Plot\nA Parallel Coordinates Plot is a graph used for comparing multiple\nvariables. Each variable has its own vertical axis in the plot. Each\nprofile connects the values on the axes for a single observation.", + "prompt_type": "plain" + }, + { + "output": "Radar Plot\nA Radar Plot is a two-dimensional graph that is used for comparing\nmultiple variables. Each variable has its own axis that starts from the\ncenter of the graph. The data are standardized on each variable between\n0 and 1 so that values can be compared across variables. Each profile,\nwhich usually appears in the form of a star, connects the values on the\naxes for a single observation. Multivariate outliers are represented by\nred profiles. The Radar Plot is the polar version of the popular\nParallel Coordinates plot. The polar layout enables us to represent more\nvariables in a single plot. Data Heatmap\nThe heatmap graphic is constructed from the transposed data matrix. Rows\nof the heatmap represent variables, and columns represent cases\n(instances). The data are standardized before display so that small\nvalues are yellow and large values are red. The rows and columns are\npermuted via a singular value decomposition (SVD) of the data matrix so\nthat similar rows and similar columns are near each other.", + "prompt_type": "plain" + }, + { + "output": "Also implemented\nare extensions of these three transformers that handle negative values,\nwhich are derived from I.K. Yeo and R.A. Johnson, \u201cA new family of power\ntransformations to improve normality or symmetry.\u201d Biometrika, 87(4),\n(2000). For each transformer, transformations are selected by comparing\nthe robust skewness of the transformed column with the robust skewness\nof the original raw column. When a transformation leads to a relatively\nlow value of skewness, it is recommended. Missing Values Heatmap\nThe missing values heatmap graphic is constructed from the transposed\ndata matrix. Rows of the heatmap represent variables and columns\nrepresent cases (instances). The data are coded into the values 0\n(missing) and 1 (nonmissing). Missing values are colored red and\nnonmissing values are left blank (white). The rows and columns are\npermuted via a singular value decomposition (SVD) of the data matrix so\nthat similar rows and similar columns are near each other. Gaps Histogram\nThe gaps index is computed using an algorithm of Wainer and Schacht\nbased on work by John Tukey.", + "prompt_type": "plain" + }, + { + "output": "Project Workspace\nDriverless AI provides a Project Workspace for managing datasets and\nexperiments related to a specific business problem or use case. Whether\nyou are trying to detect fraud or predict user retention, datasets and\nexperiments can be stored and saved in the individual projects. A\nLeaderboard on the Projects page lets you easily compare performance and\nresults and identify the best solution for your problem. The following sections describe how to create and manage projects. - create-project\n- link-datasets\n- link-experiments\n- experiments-list\nNote: For information on how to export Driverless AI experiments to H2O\nMLOps from the Projects page, see\nhttps://docs.h2o.ai/mlops-release/latest-stable/docs/userguide/using.html#exporting-experiments-from-driverless-ai-into-mlops. Creating a Project Workspace\nTo create a Project Workspace:\n1. Click the Projects option on the top menu. 2. Click New Project. 3. Specify a name for the project and provide a description.", + "prompt_type": "plain" + }, + { + "output": "Click Create Project. This creates an empty Project page. From the Projects page, you can link datasets and/or experiments, run\nnew experiments, and score experiments on a scoring dataset. When you\nlink an existing experiment to a Project, the datasets used for the\nexperiment are automatically linked to the project (if not already\nlinked). Linking Datasets\nAny dataset that has been added to Driverless AI can be linked to a\nproject. In addition, when you link an experiment, the datasets used for\nthat experiment are also automatically linked to the project. To link a dataset:\n1. Click the Link Dataset button, then select the type of dataset you\n want to upload. Choose from Training, Testing, and Validation. 2. Select the dataset(s) that you want to link. 3. (Optional) If there are any completed experiments that are based on\n the selected dataset(s), you can choose to link them as well. 4. (Optional) To filter the list of linked datasets by type, click\n Filter Dataset Type and select the type of dataset you want to view.", + "prompt_type": "plain" + }, + { + "output": "When datasets are linked, the same menu options are available here as on\nthe Datasets page. For more information, refer to Datasets. []\nSelecting Datasets\nIn the Datasets section, you can select a training, validation, or\ntesting dataset. The Experiments section shows experiments in the\nProject that use the selected dataset. Linking Experiments\nExisting experiments can be selected and linked to a Project. Additionally, you can run new experiments or checkpoint existing\nexperiments from this page. Experiments started from the Project page\nare automatically linked to the Project. To link an existing experiment to the project, click Link Experiments\nand select one of the following options:\n- By Selecting Experiments: Select one or more experiments to link to\n the Project. - By Selecting Dataset Used in Experiments: Upload all experiments\n that used the selected dataset as a Training, Testing, or Validation\n dataset. For example, if you select By Selecting Dataset Used in\n Experiments > Training and then select the dataset\n example-dataset.csv, all the experiments that used the\n example-dataset.csv as a training dataset are linked.", + "prompt_type": "plain" + }, + { + "output": "1. Click the New Experiment link to begin a new experiment. 2. Select your training data and optionally your validation and/or\n testing data. 3. Specify your desired experiment settings (refer to\n experiment_settings and expert-settings), and then click Launch\n Experiment. As the experiment is running, it will be listed at the top of the\nExperiments Leaderboard until it is completed. It will also be available\non the Experiments page. Checkpointing Experiments\nWhen experiments are linked to a Project, the same checkpointing options\nfor experiments are available here as on the Experiments page. Refer to\ncheckpointing for more information. []\nExperiments List\nWhen attempting to solve a business problem, a normal workflow will\ninclude running multiple experiments, either with different/new data or\nwith a variety of settings, and the optimal solution can vary for\ndifferent users and/or business problems. For some users, the model with\nthe highest accuracy for validation and test data could be the most\noptimal one.", + "prompt_type": "plain" + }, + { + "output": "For some, it could also mean how\nquickly the model could be trained with acceptable levels of accuracy. The Experiments list allows you to find the best solution for your\nbusiness problem. The list is organized based on experiment name. You can change the\nsorting of experiments by selecting the up/down arrows beside a column\nheading in the experiment menu. Hover over the right menu of an experiment to view additional\ninformation about the experiment, including the problem type, datasets\nused, and the target column. Experiment Scoring\nFinished experiments linked to the project show their validation and\ntest scores. You can also score experiments on other datasets. To do\nthis, you first need to add a dataset by clicking the Link Dataset\nbutton and choosing Testing from the drop-down menu. After the test\ndataset has been added, click the Score on Scoring Data button and\nchoose the experiment(s) that you want to score along with the test\ndataset to be applied. This triggers a diagnostics job, the results of\nwhich are located on the diagnostics page.", + "prompt_type": "plain" + }, + { + "output": "After the scoring process has completed, the\nresult appears in the Score and Scoring Time columns. The Score column\nshows results for the scorer specified by the Show Results for Scorer\npicker. Notes:\n- If an experiment has already been scored on a dataset, Driverless AI\n cannot score it again. The scoring step is deterministic, so for a\n particular test dataset and experiment combination, the score will\n be same regardless of how many times you repeat it. - The test dataset must have all the columns that are expected by the\n various experiments you are scoring it on. However, the columns of\n the test dataset need not be exactly the same as input features\n expected by the experiment. There can be additional columns in the\n test dataset. If these columns were not used for training, they will\n be ignored. This feature gives you the ability to train experiments\n on different training datasets (i.e., having different features),\n and if you have an \"uber test dataset\" that includes all these\n feature columns, then you can use the same dataset to score these\n experiments.", + "prompt_type": "plain" + }, + { + "output": "This\n value shows the total time (in seconds) that it took for calculating\n the experiment scores for all applicable scorers for the experiment\n type. This is valuable to users who need to estimate the runtime\n performance of an experiment. Comparing Experiments\nYou can compare two or three experiments and view side-by-side detailed\ninformation about each. 1. Select either two or three experiments that you want to compare. You\n cannot compare more than three experiments. 2. Click the Compare n Items button. This opens the Compare Experiments page. This page includes the\nexperiment summary and metric plots for each experiment. The metric\nplots vary depending on whether this is a classification or regression\nexperiment. For classification experiments, this page includes:\n - Variable Importance list\n - Confusion Matrix\n - ROC Curve\n - Precision Recall Curve\n - Lift Chart\n - Gains Chart\n - Kolmogorov-Smirnov Chart\nFor regression experiments, this page includes:\n- Variable Importance list\n- Actual vs.", + "prompt_type": "plain" + }, + { + "output": "The datasets and experiments will still be available on\nthe Datasets and Experiments pages. - Unlink a dataset by clicking on the dataset and selecting Unlink\n from the menu. Note: You cannot unlink datasets that are tied to\n experiments in the same project. - Unlink an experiment by selecting the experiment and clicking the\n Unlink Item button. Note that this will not automatically unlink\n datasets that were tied to the experiment. Deleting Projects\nTo delete a project, click the Projects option on the top menu to open\nthe main Projects page. Click the dotted menu the right-most column, and\nthen select Delete. You will be prompted to confirm the deletion. Note that deleting projects does not delete datasets and experiments\nfrom Driverless AI. Any datasets and experiments from deleted projects\nwill still be available on the Datasets and Experiments pages. []\nLeaderboard Wizard: Business value calculator\nFrom the Project page, you can access a business value calculator wizard\nby clicking the Analyze Results button.", + "prompt_type": "plain" + }, + { + "output": "Install the Google Cloud Platform Offering\nThis section describes how to install and start Driverless AI in a\nGoogle Compute environment using the GCP Marketplace. This assumes that\nyou already have a Google Cloud Platform account. If you don't have an\naccount, go to https://console.cloud.google.com/getting-started to\ncreate one. Before You Begin\nIf you are trying GCP for the first time and have just created an\naccount, check your Google Compute Engine (GCE) resource quota limits. By default, GCP allocates a maximum of 8 CPUs and no GPUs. Our default\nrecommendation for launching Driverless AI is 32 CPUs, 120 GB RAM, and 2\nP100 NVIDIA GPUs. You can change these settings to match your quota\nlimit, or you can request more resources from GCP. Refer to\nhttps://cloud.google.com/compute/quotas for more information, including\ninformation on how to check your quota and request additional quota. Installation Procedure\n1. In your browser, log in to the Google Compute Engine Console at\n https://console.cloud.google.com/.", + "prompt_type": "plain" + }, + { + "output": "In the left navigation panel, select Marketplace. 3. On the Marketplace page, search for Driverless and select the H2O.ai\n Driverless AI offering. The following page will display. 4. Click Launch on Compute Engine. (If necessary, refer to Google\n Compute Instance Types for information about machine and GPU types.) 5. A summary page displays when the compute engine is successfully\n deployed. This page includes the instance ID and the username\n (always h2oai) and password that will be required when starting\n Driverless AI. Click on the Instance link to retrieve the external\n IP address for starting Driverless AI. 6. In your browser, go to https://%5BExternal_IP%5D:12345 to start\n Driverless AI. 7. Agree to the Terms and Conditions. 8. Log in to Driverless AI using your user name and password. 9. Optionally enable GCS and Big Query access. Upgrading the Google Cloud Platform Offering\nPerform the following steps to upgrade the Driverless AI Google Platform\noffering.", + "prompt_type": "plain" + }, + { + "output": "NLP in Driverless AI\nThis section describes NLP (text) processing capabilities of Driverless\nAI. The Driverless AI platform has the ability to support both\nstandalone text and text with other column types as predictive features. TensorFlow based and PyTorch Transformer Architectures (for example,\nBERT) are used for Feature Engineering and Model Building. For details, see:\n - NLP Feature Engineering and Modeling \n - NLP Expert Settings \n - NLP Feature Naming Convention \n - nlp-explainers\n - An NLP example in Driverless AI \n - NLP Models to Production \nNote\n- NLP and image use cases in Driverless benefit significantly from\nGPU usage . - To download pretrained NLP models, visit\nhttp://s3.amazonaws.com/artifacts.h2o.ai/releases/ai/h2o/pretrained/bert_models.zip. You can use the pytorch_nlp_pretrained_models_dir configuration option\nto specify a path to pretrained PyTorch NLP models. This can be either a\npath in the local file system (/path/on/server/to/bert_models_folder), a\nURL, or an S3 location (s3://).", + "prompt_type": "plain" + }, + { + "output": "- You can use the Driverless AI Experiment Setup Wizard to guide you\nthrough the process of setting up NLP experiments. For more information,\nsee dai_wizard. NLP Feature Engineering and Modeling\n[]\nPretrained PyTorch Models in Driverless AI\n[]\nThe following NLP recipes are available for a text column. A full list\nof NLP Transformers is available here . - n-gram frequency/TF-IDF followed by Truncated SVD\n - n-gram frequency/TF-IDF followed by Linear/Logistic regression\n - Word embeddings followed by CNN model (TensorFlow)\n - Word embeddings followed by BiGRU model (TensorFlow)\n - Character embeddings followed by CNN model (TensorFlow)\n - BERT/DistilBERT based embeddings for Feature Engineering (PyTorch)\n - Support for multiple Transformer Architectures (eg.BERT) as\n modeling algorithms (PyTorch)\nn-gram\nAn n-gram is a contiguous sequence of n items from a given sample of\ntext or speech. n-gram Frequency\nFrequency-based features represent the count of each word from a given\ntext in the form of vectors.", + "prompt_type": "plain" + }, + { + "output": "For example, a one-gram is equivalent to a single word, a\ntwo-gram is equivalent to two consecutive words paired together, and so\non. Words and n-grams that occur more often will receive a higher\nweightage. The ones that are rare will receive a lower weightage. TF-IDF of n-grams\nFrequency-based features can be multiplied with the inverse document\nfrequency to get term frequency\u2013inverse document frequency (TF-IDF)\nvectors. Doing so also gives importance to the rare terms that occur in\nthe corpus, which may be helpful in certain classification tasks. []\nTruncated SVD Features\nTF-IDF and the frequency of n-grams both result in higher dimensions of\nthe representational vectors. To counteract this, Truncated SVD is\ncommonly used to decompose the vectorized arrays into lower dimensions. []\nLinear Models for TF-IDF Vectors\nLinear models are also available in the Driverless AI NLP recipe. These\ncapture linear dependencies that are crucial to the process of achieving\nhigh accuracy rates and are used as features in the base DAI model.", + "prompt_type": "plain" + }, + { + "output": "Representations are made so that\nwords with similar meanings are placed close to or equidistant from one\nanother. For example, the word \"king\" is closely associated with the\nword \"queen\" in this kind of vector representation. []\nTF-IDF and frequency-based models represent counts and significant word\ninformation, but they lack the semantic context for these words. Word\nembedding techniques are used to make up for this lack of semantic\ninformation. CNN Models for Word Embedding\nAlthough Convolutional Neural Network (CNN) models are primarily used on\nimage-level machine learning tasks, their use case on representing text\nas information has proven to be quite efficient and faster compared to\nRNN models. In Driverless AI, we pass word embeddings as input to CNN\nmodels, which return cross validated predictions that can be used as a\nnew set of features. []\nBi-directional GRU Models for Word Embedding\nRecurrent neural networks, like long short-term memory units (LSTM) and\ngated recurrent units (GRU), are state-of-the-art algorithms for NLP\nproblems.", + "prompt_type": "plain" + }, + { + "output": "For example, in the sentence \"John is walking on the golf course,\" a\nunidirectional model would represent states that represent \"golf\" based\non \"John is walking on,\" but would not represent \"course.\" Using a\nbi-directional model, the representation would also account the later\nrepresentations, giving the model more predictive power. In simple terms, a bi-directional GRU model combines two independent RNN\nmodels into a single model. A GRU architecture provides high speeds and\naccuracy rates similar to a LSTM architecture. As with CNN models, we\npass word embeddings as input to these models, which return cross\nvalidated predictions that can be used as a new set of features. []\nCNN Models for Character Embedding\nFor languages like Japanese and Mandarin Chinese, where characters play\na major role, character level embedding is available as an NLP recipe. In character embedding, each character is represented in the form of\nvectors rather than words. Driverless AI uses character level embedding\nas the input to CNN models and later extracts class probabilities to\nfeed as features for downstream models.", + "prompt_type": "plain" + }, + { + "output": "These models\ncapture the contextual relation between words by using an attention\nmechanism. Unlike directional models that read text sequentially, a\nTransformer-based model reads the entire sequence of text at once,\nallowing it to learn the context of the word based on all of its\nsurrounding words. The embeddings obtained by these models show improved\nresults in comparison to earlier embedding approaches. []\nBERT and DistilBERT models can be used for generating embeddings for any\ntext columns. These pretrained models are used to get embeddings for the\ntext followed by Linear/Logistic Regression to generate features that\ncan then be used for any downstream models in Driverless AI. Refer to\nnlp-settings in the Expert Settings topic for more information on how to\nenable these models for feature engineering. We recommend using GPU(s)\nto leverage the power of these models and accelerate the feature\nengineering process. PyTorch Transformer Architecture Models (eg. BERT) as Modeling\nAlgorithms\nStarting with Driverless AI 1.9 release, the Transformer-based\narchitectures shown in the diagram below is supported as models in\nDriverless AI.", + "prompt_type": "plain" + }, + { + "output": "DistilBERT is a distilled\nversion of BERT that has fewer parameters compared to BERT (40% less)\nand it is faster (60% speedup) while retaining 95% of BERT level\nperformance. The DistilBERT model can be useful when training time and\nmodel size is important. Refer to nlp-settings in the Expert Settings\ntopic for more information on how to enable these models as modeling\nalgorithms. We recommend using GPU(s) to leverage the power of these\nmodels and accelerate the model training time. In addition to these techniques, Driverless AI supports\ncustom NLP recipes using, for example, PyTorch or\nFlair. NLP Feature Naming Convention\nThe naming conventions of the NLP features help to understand the type\nof feature that has been created. The syntax for the feature names is as follows:\n[FEAT TYPE]:[COL]. [TARGET_CLASS]\n- [FEAT TYPE] represents one of the following:\n- [COL] represents the name of the text column. - [TARGET_CLASS] represents the target class for which the model\n predictions are made.", + "prompt_type": "plain" + }, + { + "output": "[]\nNLP Explainers\nThe following is a list of available NLP explainers. For more\ninformation, refer to mli_default_recipes and mli-nlp-plots. - NLP LOCO Explainer: The NLP LOCO plot applies a\n leave-one-covariate-out (LOCO) styled approach to NLP models by\n removing a specific token from all text features in a record and\n predicting local importance without that token. The difference\n between the resulting score and the original score (token included)\n is useful when trying to determine how specific changes to text\n features alter the predictions made by the model. - NLP Partial Dependence Plot Explainer: NLP partial dependence\n (yellow) portrays the average prediction behavior of the Driverless\n AI model when an input text token is left in its respective text and\n not included in its respective text along with +/- 1 standard\n deviation bands. ICE (grey) displays the prediction behavior for an\n individual row of data when an input text token is left in its\n respective text and not included in its respective text.", + "prompt_type": "plain" + }, + { + "output": "- NLP Tokenizer Explainer: NLP tokenizer plot shows both the global\n and local importance values of each token in a corpus (a large and\n structured set of texts). The corpus is automatically generated from\n text features used by Driverless AI models prior to the process of\n tokenization. Local importance values are calculated by using the\n term frequency-inverse document frequency (TF-IDF) as a weighting\n factor for each token in each row. The TF-IDF increases\n proportionally to the number of times a token appears in a given\n document and is offset by the number of documents in the corpus that\n contain the token. - NLP Vectorizer + Linear Model (VLM) Text Feature Importance\n Explainer: NLP Vectorizer + Linear Model (VLM) text feature\n importance uses TF-IDF of individual words as features from a text\n column of interest and builds a linear model (currently GLM) using\n those features and fits it to either the predicted class (binary\n classification) or the continuous prediction (regression) of the\n Driverless AI model.", + "prompt_type": "plain" + }, + { + "output": "Note that by default, this explainer uses\n the first text column based on alphabetical order. NLP Expert Settings\nA number of configurable settings are available for NLP in Driverless\nAI. For more information, refer to nlp-settings in the Expert Settings\ntopic. Also see nlp model and nlp transformer in\npipeline building recipes under experiment\nsettings. []\nAn NLP Example: Sentiment Analysis\nThe following section provides an NLP example. This information is based\non the Automatic Feature Engineering for Text Analytics blog post. A\nsimilar example using the Python Client is available in python_client. This example uses a classic example of sentiment analysis on tweets\nusing the US Airline Sentiment dataset. Note that the sentiment of each\ntweet has been labeled in advance and that our model will be used to\nlabel new tweets. We can split the dataset into training and test\n(80/20) with the random split in Driverless AI. We will use the tweets\nin the \u2018text\u2019 column and the sentiment (positive, negative or neutral)\nin the \u2018airline_sentiment\u2019 column for this demo.", + "prompt_type": "plain" + }, + { + "output": "Similar to other problems in the Driverless AI\nsetup, we need to choose the dataset, and then specify the target column\n(\u2018airline_sentiment\u2019). []\nBecause we don't want to use any other columns in the dataset, we need\nto click on Dropped Cols, and then exclude everything but text as shown\nbelow:\n[]\nNext, we will turn on our TensorFlow NLP recipes. We can go to the\nExpert Settings window, NLP and turn on the following:\nCNN TensorFlow models, BiGRU TensorFlow models, character-based\nTensorFlow models or pretrained PyTorch NLP models. []\nAt this point, we are ready to launch an experiment. Text features will\nbe automatically generated and evaluated during the feature engineering\nprocess. Note that some features such as TextCNN rely on TensorFlow\nmodels. We recommend using GPU(s) to leverage the power of TensorFlow or\nthe PyTorch Transformer models and accelerate the feature engineering\nprocess. []\nOnce the experiment is done, users can make new predictions and download\nthe scoring pipeline just like any other Driverless AI experiments.", + "prompt_type": "plain" + }, + { + "output": "Redis Multinode Training\n\nRedis Multinode training in Driverless AI can be used to run multiple\nexperiments at the same time. It is effective in situations where you\nneed to run and complete many experiments simultaneously in a short\namount of time without having to wait for each individual experiment to\nfinish.\n\nUnderstanding Redis Multinode Training\n\nRedis multinode training uses a load distribution technique in which a\nset of machines (worker nodes) are used to help a main server node\nprocess experiments. These machines can be CPU only or CPU + GPU, with\nexperiments being distributed accordingly.\n\n[]\n\nJobs (experiments) within the multinode setup are organized into a\nqueue . Jobs remain in this queue when no processor is\navailable. When a worker's processor becomes available, it asks the job\nqueue service to assign it a new job. By default, each worker node\nprocesses two jobs at a time (configured with the", + "prompt_type": "plain" + }, + { + "output": "worker_remote_processorsoption in the config.toml file). Each worker can process multiple jobs at the same time, but two workers cannot process the same experiment at the same time. Messaging and data exchange services are also implemented to allow the workers to effectively communicate with the main server node. **Notes**: - Redis multinode training in Driverless AI is currently in a preview stage. If you are interested in using multinode configurations, contact support@h2o.ai. - Redis multinode training requires the transfer of data to several different workers. For example, if an experiment is scheduled to be on a remote worker node, the datasets it is using need to be copied to the worker machine by using the MinIO service. The experiment can take longer to initialize depending on the size of the transferred objects. - The number of jobs that each worker node processes is controlled by theworker_remote_processors`\noption in the config.toml file. - Tasks are not distributed to best fit\nworkers.", + "prompt_type": "plain" + }, + { + "output": "- **A single experiment runs entirely on one machine (or\nnode)**. For this reason, using a large number of commodity-grade\nhardware is not useful in the context of multinode. - For more\ninformation on queuing in Driverless AI, see :ref:`dai-queuing. Requirements\n- Redis\nRedis Multinode Setup Example\nThe following example configures a two-node Redis Multinode Driverless\nAI cluster on AWS EC2 instances using bashtar distribution. This example\ncan be expanded to multiple worker nodes. This example assumes that you\nhave spun up two EC2 instances (Ubuntu 16.04) within the same VPC on\nAWS. VPC Settings\nIn the VPC settings, enable inbound rules to listen to TCP connections\non port 6379 for Redis and 9000 for MinIO. Install Driverless AI Natively\nInstall Driverless AI on the server node. Refer to one of the following\ndocuments for information on how to perform a native install on Linux\nsystems. - linux-deb\n- linux-rpms\n- linux-tarsh\nEdit the Driverless AI config.toml\nAfter Driverless AI is installed, edit the following configuration\noptions in the config.toml file.", + "prompt_type": "plain" + }, + { + "output": "enable_dask_cluster = false`` would not be done.\n\nStart the Driverless AI Server Node\n\n cd |VERSION-dir|-linux-x86_64\n ./run-dai.sh\n\nInstall the Linux deb/rpm/tar package on the EC2 instance to create a\nDriverless AI worker node. After the installation is complete, edit the\nfollowing in the config.toml.\n\n # Redis settings, point to the dai main server's redis server ip address\n redis_ip = \"\"\n\n # Redis settings\n redis_port = 6379\n\n # Redis settings, point to the dai main server's redis server password\n main_server_redis_password = \"\"\n\n # Location of the dai main server's minio server.\n main_server_minio_address = \":9000\"\n\n enable_dask_cluster = false\n\nTo use the full multinode with both redis and dask support, see the\nexample multinode-example, in which case", + "prompt_type": "plain" + }, + { + "output": "enable_dask_cluster = false`` would not be done.\n\nStart the Driverless AI Worker Node\n\n cd |VERSION-dir|-linux-x86_64\n ./run-dai.sh --worker\n\n # Note that when using rpm/deb you can run the following:\n sudo systemctl start dai-worker\n\nOnce the worker node starts, use the Driverless AI server IP to log into\nDriverless AI. Click on Resources > System Info to confirm that the\nnumber of workers is \"2\" if only one worker is used. (By default, each\nworker node processes two jobs at a time. This is configured with the", + "prompt_type": "plain" + }, + { + "output": "worker_remote_processorsoption in the config.toml file.) .. figure:: images/system_info_view.png :alt: .. _multinode-config-attributes: Description of Configuration Attributes --------------------------------------- -worker_mode: Specifies how the long-running tasks are scheduled. Available options include: -multiprocessing: Forks the current process immediately. -singlenode: Shares the task through Redis and needs a worker running. -multinode: Same assinglenode. Also shares the data through MinIO and allows the worker to run on the different machine. -redis_ip: Redis IP address. Defaults to 127.0.0.1 -redis_port: Redis port. Defaults to 6379. -redis_db: Redis database. Each DAI instance running on the Redis server should have unique integer. Defaults to 0. -main_server_redis_password: Main Server Redis password. Defaults to empty string. -local_minio_port: The port that MinIO will listen on. This only takes effect if the current system is a multinode main server.", + "prompt_type": "plain" + }, + { + "output": "check_distribution_shift``\n\nData Distribution Shift Detection\n\nSpecify whether Driverless AI should detect data distribution shifts\nbetween train/valid/test datasets (if provided). When train and test\ndataset differ (or train/valid or valid/test) in terms of distribution\nof data, then a model can be built with high accuracy that tells for\neach row, whether the row is in train or test. Currently, this\ninformation is only presented to the user and not acted upon.\n\nShifted features should either be dropped. Or more meaningful aggregate\nfeatures be created by using them as labels or bins.\n\nAlso see\ndrop_features_distribution_shift_threshold_auc \nand check_distribution_shift_drop .", + "prompt_type": "plain" + }, + { + "output": "check_distribution_shift_drop``\n\nData Distribution Shift Detection Drop of Features\n\nSpecify whether to drop high-shift features. This defaults to Auto. Note\nthat Auto for time series experiments turns this feature off.\n\nAlso see\ndrop_features_distribution_shift_threshold_auc \nand check_distribution_shift .", + "prompt_type": "plain" + }, + { + "output": "drop_features_distribution_shift_threshold_auc``\n\nMax Allowed Feature Shift (AUC) Before Dropping Feature\n\nSpecify the maximum allowed AUC value for a feature before dropping the\nfeature.\n\nWhen train and test dataset differ (or train/valid or valid/test) in\nterms of distribution of data, then a model can be built that tells for\neach row, whether the row is in train or test. This model includes an\nAUC value. If this AUC, GINI, or Spearman correlation of the model is\nabove the specified threshold, then Driverless AI will consider it a\nstrong enough shift to drop those features.\n\nThe default AUC threshold is 0.999.", + "prompt_type": "plain" + }, + { + "output": "check_leakage----------------- .. container:: dropdown **Data Leakage Detection** Specify whether to check for data leakage for each feature. Some of the features may contain over predictive power on the target column. This may affect model generalization. Driverless AI runs a model to determine the predictive power of each feature on the target variable. Then, a simple model is built on each feature with significant variable importance. The models with high AUC (for classification) or R2 score (regression) are reported to the user as potential leak. Note that this option is always disabled if the experiment is a time series experiment. This is set to **Auto** by default. The equivalent config.toml parameter ischeck_leakage`.\nAlso see :ref:`drop_features_leakage_threshold_auc\n", + "prompt_type": "plain" + }, + { + "output": "drop_features_leakage_threshold_auc--------------------------------------- .. container:: dropdown **Data Leakage Detection Dropping AUC/R2 Threshold** If :ref:`Leakage Detection ` is enabled, specify the threshold for dropping features. When the AUC (or R2 for regression), GINI, or Spearman correlation is above this value, the feature is dropped. This value defaults to 0.999. The equivalent config.toml parameter isdrop_features_leakage_threshold_auc``.", + "prompt_type": "plain" + }, + { + "output": "leakage_max_data_size``\n\nMax Rows X Columns for Leakage\n\nSpecify the maximum number of (rows x columns) to trigger sampling for\nleakage checks. This value defaults to 10,000,000.", + "prompt_type": "plain" + }, + { + "output": "max_features_importance``\n\nMax. num. features for variable importance\n\nSpecify the maximum number of features to use and show in importance\ntables. For any interpretability higher than 1, transformed or original\nfeatures with low importance than top max_features_importance features\nare always removed Feature importances of transformed or original\nfeatures correspondingly will be pruned. Higher values can lead to lower\nperformance and larger disk space used for datasets with more than 100k\ncolumns.", + "prompt_type": "plain" + }, + { + "output": "enable_wide_rules--------------------- .. container:: dropdown **Enable Wide Rules** Enable various rules to handle wide datasets( i.e no. of columns > no. of rows). The default value is \"auto\", that will automatically enable the wide rules when detect that number of columns is greater than number of rows. Setting \"on\" forces rules to be enabled regardless of any conditions. Enabling wide data rules sets allmax_cols,max_origcol``, and ``fs_origtomls to large values, and enforces monotonicity to be disabled unlessmonotonicity_constraints_dictis set or default value ofmonotonicity_constraints_interpretability_switch` is changed. It also disables shift detection and data leakage checks. And enables :ref:`Xgboost Random Forest model \n for modeling. To disable wide rules, set enable_wide_rules to \"off\". For mostly or\n entirely numeric datasets, selecting only 'OriginalTransformer' for\n faster speed is recommended (see\n included_transformers ).", + "prompt_type": "plain" + }, + { + "output": "orig_features_fs_report``\n\nReport Permutation Importance on Original Features\n\nSpecify whether Driverless AI reports permutation importance on original\nfeatures (represented as normalized change in the chosen metric) in logs\nand the report file. This is disabled by default.", + "prompt_type": "plain" + }, + { + "output": "max_rows_fs``\n\nMaximum Number of Rows to Perform Permutation-Based Feature Selection\n\nSpecify the maximum number of rows when performing permutation feature\nimportance, reduced by (stratified) random sampling. This value defaults\nto 500,000.", + "prompt_type": "plain" + }, + { + "output": "max_orig_cols_selected``\n\nMax Number of Original Features Used\n\nSpecify the maximum number of columns to be selected from an existing\nset of columns using feature selection. This value defaults to\n10,000000. For categorical columns, the selection is based upon how well\ntarget encoding (or frequency encoding if not available) on categoricals\nand numerics treated as categoricals helps. This is useful to reduce the\nfinal model complexity. First the best [max_orig_cols_selected] are\nfound through feature selection methods and then these features are used\nin feature evolution (to derive other features) and in modelling.", + "prompt_type": "plain" + }, + { + "output": "max_orig_nonnumeric_cols_selected``\n\nMax Number of Original Non-Numeric Features\n\nMaximum number of non-numeric columns selected, above which will do\nfeature selection on all features and avoid treating numerical as\ncategorical same as above (max_orig_numeric_cols_selected) but for\ncategorical columns. Feature selection is performed on all features when\nthis value is exceeded. This value defaults to 300.", + "prompt_type": "plain" + }, + { + "output": "fs_orig_cols_selected``\n\nMax Number of Original Features Used for FS Individual\n\nSpecify the maximum number of features you want to be selected in an\nexperiment. This value defaults to 10,0000000. Additional columns above\nthe specified value add special individual with original columns\nreduced.", + "prompt_type": "plain" + }, + { + "output": "fs_orig_numeric_cols_selected``\n\nNumber of Original Numeric Features to Trigger Feature Selection Model\nType\n\nThe maximum number of original numeric columns, above which Driverless\nAI will do feature selection. Note that this is applicable only to\nspecial individuals with original columns reduced. A separate individual\nin the genetic algorithm is created by doing feature selection by\npermutation importance on original features. This value defaults to\n10,000000.", + "prompt_type": "plain" + }, + { + "output": "fs_orig_nonnumeric_cols_selected``\n\nNumber of Original Non-Numeric Features to Trigger Feature Selection\nModel Type\n\nThe maximum number of original non-numeric columns, above which\nDriverless AI will do feature selection on all features. Note that this\nis applicable only to special individuals with original columns reduced.\nA separate individual in the genetic algorithm is created by doing\nfeature selection by permutation importance on original features. This\nvalue defaults to 200.", + "prompt_type": "plain" + }, + { + "output": "max_relative_cardinality``\n\nMax Allowed Fraction of Uniques for Integer and Categorical Columns\n\nSpecify the maximum fraction of unique values for integer and\ncategorical columns. If the column has a larger fraction of unique\nvalues than that, it will be considered an ID column and ignored. This\nvalue defaults to 0.95.", + "prompt_type": "plain" + }, + { + "output": "num_as_cat-------------- .. container:: dropdown **Allow Treating Numerical as Categorical** Specify whether to allow some numerical features to be treated as categorical features. This is enabled by default. The equivalent config.toml parameter isnum_as_cat``.", + "prompt_type": "plain" + }, + { + "output": "max_int_as_cat_uniques``\n\nMax Number of Unique Values for Int/Float to be Categoricals\n\nSpecify the number of unique values for integer or real columns to be\ntreated as categoricals. This value defaults to 50.", + "prompt_type": "plain" + }, + { + "output": "max_fraction_invalid_numeric``\n\nMax. fraction of numeric values to be non-numeric (and not missing) for\na column to still be considered numeric\n\nWhen the fraction of non-numeric (and non-missing) values is less or\nequal than this value, consider the column numeric. Can help with minor\ndata quality issues for experimentation, not recommended for production,\nsince type inconsistencies can occur. Note: Replaces non-numeric values\nwith missing values at start of experiment, so some information is lost,\nbut column is now treated as numeric, which can help. Disabled if < 0.", + "prompt_type": "plain" + }, + { + "output": "nfeatures_max----------------- .. container:: dropdown **Max Number of Engineered Features** Specify the maximum number of features to be included per model (and in each model within the final model if an ensemble). After each scoring, based on this parameter value, keeps top variable importance features, and prunes away rest of the features. Final ensemble will exclude any pruned-away features and only train on kept features, but may contain a few new features due to fitting on different data view (e.g. new clusters). Final scoring pipeline will exclude any pruned-away features, but may contain a few new features due to fitting on different data view (e.g. new clusters). The default value of **-1** means no restrictions are applied for this parameter except internally-determined memory and interpretability restrictions. Notes: - Ifinterpretability>remove_scored_0gain_genes_in_postprocessing_above_interpretability(see :ref:`config.toml ` for reference), then every GA (:ref:`genetic algorithm `) iteration post-processes features down to this value just after scoring them.", + "prompt_type": "plain" + }, + { + "output": "ngenes_max-------------- .. container:: dropdown **Max Number of Genes** Specify the maximum number of genes (transformer instances) kept per model (and per each model within the final model for ensembles). This controls the number of genes before features are scored, so Driverless AI will just randomly samples genes if pruning occurs. If restriction occurs after scoring features, then aggregated gene importances are used for pruning genes. Instances includes all possible transformers, including original transformer for numeric features. A value of -1 means no restrictions except internally-determined memory and interpretability restriction. The equivalent config.toml parameter isngenes_max``.", + "prompt_type": "plain" + }, + { + "output": "features_allowed_by_interpretability---------------------------------------- .. container:: dropdown **Limit Features by Interpretability** Specify whether to limit feature counts with the **Interpretability** training setting as specified by thefeatures_allowed_by_interpretability`\n:ref:`config.toml setting.", + "prompt_type": "plain" + }, + { + "output": "monotonicity_constraints_interpretability_switch``\n\nThreshold for Interpretability Above Which to Enable Automatic\nMonotonicity Constraints for Tree Models\n\nSpecify an Interpretability setting value equal and above which to use\nautomatic monotonicity constraints in XGBoostGBM, LightGBM, or Decision\nTree models. This value defaults to 7.\n\nAlso see monotonic gbm recipe and\nMonotonicity Constraints in Driverless AI for reference.", + "prompt_type": "plain" + }, + { + "output": "monotonicity_constraints_correlation_threshold``\n\nCorrelation Beyond Which to Trigger Monotonicity Constraints (if\nenabled)\n\nSpecify the threshold of Pearson product-moment correlation coefficient\nbetween numerical or encoded transformed feature and target above (below\nnegative for) which to use positive (negative) monotonicity for\nXGBoostGBM, LightGBM and Decision Tree models. This value defaults to\n0.1.\n\nNote: This setting is only enabled when Interpretability is greater than\nor equal to the value specified by the enable-constraints setting and\nwhen the constraints-override setting is not specified.\n\nAlso see monotonic gbm recipe and\nMonotonicity Constraints in Driverless AI for reference.", + "prompt_type": "plain" + }, + { + "output": "monotonicity_constraints_log_level``\n\nControl amount of logging when calculating automatic monotonicity\nconstraints (if enabled)\n\nFor models that support monotonicity constraints, and if enabled, show\nautomatically determined monotonicity constraints for each feature going\ninto the model based on its correlation with the target. 'low' shows\nonly monotonicity constraint direction. 'medium' shows correlation of\npositively and negatively constraint features. 'high' shows all\ncorrelation values.\n\nAlso see monotonic gbm recipe and\nMonotonicity Constraints in Driverless AI for reference.", + "prompt_type": "plain" + }, + { + "output": "monotonicity_constraints_drop_low_correlation_features``\n\nWhether to drop features that have no monotonicity constraint applied\n(e.g., due to low correlation with target)\n\nIf enabled, only monotonic features with +1/-1 constraints will be\npassed to the model(s), and features without monotonicity constraints\n(0) will be dropped. Otherwise all features will be in the model. Only\nactive when interpretability >=\nmonotonicity_constraints_interpretability_switch or\nmonotonicity_constraints_dict is provided.\n\nAlso see monotonic gbm recipe and\nMonotonicity Constraints in Driverless AI for reference.", + "prompt_type": "plain" + }, + { + "output": "monotonicity_constraints_dict``\n\nManual Override for Monotonicity Constraints\n\nSpecify a list of features for max_features_importance which\nmonotonicity constraints are applied. Original numeric features are\nmapped to the desired constraint:\n\n- 1: Positive constraint\n- -1: Negative constraint\n- 0: Constraint disabled\n\nConstraint is automatically disabled (set to 0) for features that are\nnot in this list.\n\nThe following is an example of how this list can be specified:\n\n \"{'PAY_0': -1, 'PAY_2': -1, 'AGE': -1, 'BILL_AMT1': 1, 'PAY_AMT1': -1}\"\n\nNote: If a list is not provided, then the automatic correlation-based\nmethod is used when monotonicity constraints are enabled at high enough\ninterpretability settings.\n\nSee Monotonicity Constraints in Driverless AI for reference.", + "prompt_type": "plain" + }, + { + "output": "max_feature_interaction_depth--------------------------------- .. container:: dropdown **Max Feature Interaction Depth** Specify the maximum number of features to use for interaction features like grouping for target encoding, weight of evidence, and other likelihood estimates. Exploring feature interactions can be important in gaining better predictive performance. The interaction can take multiple forms (i.e. feature1 + feature2 or feature1 \\* feature2 + \u2026 featureN). Although certain machine learning algorithms (like tree-based methods) can do well in capturing these interactions as part of their training process, still generating them may help them (or other algorithms) yield better performance. The depth of the interaction level (as in \"up to\" how many features may be combined at once to create one single feature) can be specified to control the complexity of the feature engineering process. Higher values might be able to make more predictive models at the expense of time.", + "prompt_type": "plain" + }, + { + "output": "fixed_feature_interaction_depth``\n\nFixed Feature Interaction Depth\n\nSpecify a fixed non-zero number of features to use for interaction\nfeatures like grouping for target encoding, weight of evidence, and\nother likelihood estimates. To use all features for each transformer,\nset this to be equal to the number of columns. To do a 50/50 sample and\na fixed feature interaction depth of n features, set this to -n.", + "prompt_type": "plain" + }, + { + "output": "enable_target_encoding``\n\nEnable Target Encoding\n\nSpecify whether to use Target Encoding when building the model. Target\nencoding refers to several different feature transformations (primarily\nfocused on categorical data) that aim to represent the feature using\ninformation of the actual target variable. A simple example can be to\nuse the mean of the target to replace each unique category of a\ncategorical feature. These type of features can be very predictive but\nare prone to overfitting and require more memory as they need to store\nmappings of the unique categories and the target values.", + "prompt_type": "plain" + }, + { + "output": "cvte_cv_in_cv----------------- .. container:: dropdown **Enable Outer CV for Target Encoding** For target encoding, specify whether an outer level of cross-fold validation is performed in cases where GINI is detected to flip sign or have an inconsistent sign for weight of evidence betweenfit_transform(on training data) andtransform`` (on training\n\n and validation data). The degree to which GINI is inaccurate is also\n used to perform fold-averaging of look-up tables instead of using\n global look-up tables. This is enabled by default.", + "prompt_type": "plain" + }, + { + "output": "enable_lexilabel_encoding``\n\nEnable Lexicographical Label Encoding\n\nSpecify whether to enable lexicographical label encoding. This is\ndisabled by default.", + "prompt_type": "plain" + }, + { + "output": "enable_isolation_forest``\n\nEnable Isolation Forest Anomaly Score Encoding\n\nIsolation Forest is useful for identifying anomalies or outliers in\ndata. Isolation Forest isolates observations by randomly selecting a\nfeature and then randomly selecting a split value between the maximum\nand minimum values of that selected feature. This split depends on how\nlong it takes to separate the points. Random partitioning produces\nnoticeably shorter paths for anomalies. When a forest of random trees\ncollectively produces shorter path lengths for particular samples, they\nare highly likely to be anomalies.\n\nThis option lets you specify whether to return the anomaly score of each\nsample. This is disabled by default.", + "prompt_type": "plain" + }, + { + "output": "enable_one_hot_encoding``\n\nEnable One HotEncoding\n\nSpecify whether one-hot encoding is enabled. The default Auto setting is\nonly applicable for small datasets and GLMs.", + "prompt_type": "plain" + }, + { + "output": "isolation_forest_nestimators``\n\nNumber of Estimators for Isolation Forest Encoding\n\nSpecify the number of estimators for Isolation Forest encoding. This\nvalue defaults to 200.", + "prompt_type": "plain" + }, + { + "output": "drop_constant_columns``\n\nDrop Constant Columns\n\nSpecify whether to drop columns with constant values. This is enabled by\ndefault.", + "prompt_type": "plain" + }, + { + "output": "drop_id_columns``\n\nDrop ID Columns\n\nSpecify whether to drop columns that appear to be an ID. This is enabled\nby default.", + "prompt_type": "plain" + }, + { + "output": "no_drop_features``\n\nDon't Drop Any Columns\n\nSpecify whether to avoid dropping any columns (original or derived).\nThis is disabled by default.", + "prompt_type": "plain" + }, + { + "output": "cols_to_drop``\n\nFeatures to Drop\n\nSpecify which features to drop. This setting allows you to select many\nfeatures at once by copying and pasting a list of column names (in\nquotes) separated by commas.", + "prompt_type": "plain" + }, + { + "output": "cols_to_force_in``\n\nFeatures to always keep or force in, e.g. \"G1\", \"G2\", \"G3\"\n\nControl over columns to force-in. Forced-in features are handled by the\nmost interpretable transformers allowed by the experiment options, and\nthey are never removed (even if the model assigns 0 importance to them).\nTransformers used by default includes:\n\n - OriginalTransformer for numeric,\n - CatOriginalTransformer or FrequencyTransformer for categorical,\n - TextOriginalTransformer for text,\n - DateTimeOriginalTransformer for date-times,\n - DateOriginalTransformer for dates,\n - ImageOriginalTransformer or ImageVectorizerTransformer for images,\n etc", + "prompt_type": "plain" + }, + { + "output": "cols_to_group_by``\n\nFeatures to Group By\n\nSpecify which features to group columns by. When this field is left\nempty (default), Driverless AI automatically searches all columns\n(either at random or based on which columns have high variable\nimportance).", + "prompt_type": "plain" + }, + { + "output": "sample_cols_to_group_by``\n\nSample from Features to Group By\n\nSpecify whether to sample from given features to group by or to always\ngroup all features. This is disabled by default.", + "prompt_type": "plain" + }, + { + "output": "agg_funcs_for_group_by``\n\nAggregation Functions (Non-Time-Series) for Group By Operations\n\nSpecify whether to enable aggregation functions to use for group by\noperations. Choose from the following (all are selected by default):\n\n- mean\n- sd\n- min\n- max\n- count", + "prompt_type": "plain" + }, + { + "output": "folds_for_group_by``\n\nNumber of Folds to Obtain Aggregation When Grouping\n\nSpecify the number of folds to obtain aggregation when grouping.\nOut-of-fold aggregations will result in less overfitting, but they\nanalyze less data in each fold. The default value is 5.", + "prompt_type": "plain" + }, + { + "output": "mutation_mode``\n\nType of Mutation Strategy\n\nSpecify which strategy to apply when performing mutations on\ntransformers. Select from the following:\n\n- sample: Sample transformer parameters (Default)\n- batched: Perform multiple types of the same transformation together\n- full: Perform more types of the same transformation together than\n the above strategy", + "prompt_type": "plain" + }, + { + "output": "dump_varimp_every_scored_indiv``\n\nEnable Detailed Scored Features Info\n\nSpecify whether to dump every scored individual's variable importance\n(both derived and original) to a csv/tabulated/json file. If enabled,\nDriverless AI produces files such as\n\"individual_scored_id%d.iter%d*features*\". This is disabled by default.", + "prompt_type": "plain" + }, + { + "output": "dump_trans_timings``\n\nEnable Detailed Logs for Timing and Types of Features Produced\n\nSpecify whether to dump every scored fold's timing and feature info to a\ntimings.txt file. This is disabled by default.", + "prompt_type": "plain" + }, + { + "output": "compute_correlation``\n\nCompute Correlation Matrix\n\nSpecify whether to compute training, validation, and test correlation\nmatrixes. When enabled, this setting creates table and heatmap PDF files\nthat are saved to disk. Note that this setting is currently a single\nthreaded process that may be slow for experiments with many columns.\nThis is disabled by default.", + "prompt_type": "plain" + }, + { + "output": "interaction_finder_gini_rel_improvement_threshold``\n\nRequired GINI Relative Improvement for Interactions\n\nSpecify the required GINI relative improvement value for the\nInteractionTransformer. If the GINI coefficient is not better than the\nspecified relative improvement value in comparison to the original\nfeatures considered in the interaction, then the interaction is not\nreturned. If the data is noisy and there is no clear signal in\ninteractions, this value can be decreased to return interactions. This\nvalue defaults to 0.5.", + "prompt_type": "plain" + }, + { + "output": "interaction_finder_return_limit``\n\nNumber of Transformed Interactions to Make\n\nSpecify the number of transformed interactions to make from generated\ntrial interactions. (The best transformed interactions are selected from\nthe group of generated trial interactions.) This value defaults to 5.", + "prompt_type": "plain" + }, + { + "output": "enable_rapids_transformers------------------------------ .. container:: dropdown **Whether to enable RAPIDS cuML GPU transformers (no mojo)** Specify whether to enable GPU-based `RAPIDS cuML `__ transformers. Note that **no MOJO** support for deployment is available for this selection at this time, but python scoring is supported and this is in beta testing status. The equivalent config.toml parameter isenable_rapids_transformers``\nand the default value is False.", + "prompt_type": "plain" + }, + { + "output": "varimp_threshold_at_interpretability_10``\n\nLowest allowed variable importance at interpretability 10\n\nSpecify the variable importance below which features are dropped (with\nthe possibility of a replacement being found that's better). This\nsetting also sets the overall scale for lower interpretability settings.\nSet this to a lower value if you're content with having many weak\nfeatures despite choosing high interpretability, or if you see a drop in\nperformance due to the need for weak features.", + "prompt_type": "plain" + }, + { + "output": "stabilize_fs``\nWhether to take minimum (True) or mean (False) of delta improvement in\nscore when aggregating feature selection scores across multiple\nfolds/depths\nWhether to take minimum (True) or mean (False) of delta improvement in\nscore when aggregating feature selection scores across multiple\nfolds/depths. Delta improvement of score corresponds to original metric\nminus metric of shuffled feature frame if maximizing metric, and\ncorresponds to negative of such a score difference if minimizing. Feature selection by permutation importance considers the change in\nscore after shuffling a feature, and using minimum operation ignores\noptimistic scores in favor of pessimistic scores when aggregating over\nfolds. Note, if using tree methods, multiple depths may be fitted, in\nwhich case regardless of this toml setting, only features that are kept\nfor all depths are kept by feature selection. If interpretability >=\nconfig toml value of fs_data_vary_for_interpretability, then half data\n(or setting of fs_data_frac) is used as another fit, in which case\nregardless of this toml setting, only features that are kept for all\ndata sizes are kept by feature selection.", + "prompt_type": "plain" + }, + { + "output": "The Interpreted Models Page\n\nClick the MLI link in the upper-right corner of the UI to view a list of\ninterpreted models.\n\nYou can sort this page by Name, Target, Model, Dataset, N-Folds, Feature\nSet, Cluster Col, LIME Method, Status, or ETA/Runtime. You can also use\nthe search bar to locate a specific interpreted model. To specify which\ncolumns are visible on this page, click the top right-most column, then\nselect Visible Columns.\n\nClick the right-most column of an interpreted model to view an\nadditional menu. This menu allows you to open, rename, or delete the\ninterpretation.\n\nNote: Driverless AI version 1.9 features a redesigned MLI page for\ninterpreted models. To view the legacy version of an interpreted model's\nMLI page, select Open Legacy from the menu.\n\nClick on an interpreted model to view the MLI page for that\ninterpretation. The MLI page that displays will vary depending on\nwhether the experiment was a regular experiment or a time series\nexperiment.", + "prompt_type": "plain" + }, + { + "output": "Docker Image Installation\n\nThis section provides instructions for installing the Driverless AI\nDocker image.\n\ninstall/linux-docker-images install/mac-osx install/windows\n\nFor instructions on installing Driverless AI in native Linux\nenvironments, refer to native_installs.\n\nNote that from version 1.10, DAI Docker image runs with internal", + "prompt_type": "plain" + }, + { + "output": "tinithat is equivalent to using--initfrom Docker. If both are enabled in the launch command, tini prints a (harmless) warning message. For GPU users, as GPU needs--pid=hostfor nvml, which makes tini not use pid=1, so it will show the warning message (still harmless). We recommend--shm-size=256m`\nin Docker launch command. But if user plans to build :ref:`image auto\nmodel extensively, then", + "prompt_type": "plain" + }, + { + "output": "--shm-size=2g`` is recommended for Driverless AI Docker command.", + "prompt_type": "plain" + }, + { + "output": "Scoring Pipelines Overview\nDriverless AI provides Scoring Pipelines that can be deployed to\nproduction for experiments and/or\ninterpreted models. - A standalone Python Scoring Pipeline is available for experiments\n and interpreted models. - A low-latency, standalone MOJO Scoring Pipeline is available for\n experiments, with both Java and C++ backends. The Python Scoring Pipeline is implemented as a Python whl file. While\nthis allows for a single process scoring engine, the scoring service is\ngenerally implemented as a client/server architecture and supports\ninterfaces for TCP and HTTP. The MOJO (Model Objects, Optimized) Scoring Pipeline provides a\nstandalone scoring pipeline that converts experiments to MOJOs, which\ncan be scored in real time. The MOJO Scoring Pipeline is available as\neither a Java runtime or a\nC++ runtime . For the C++ runtime, both Python and\nR wrappers are provided.", + "prompt_type": "plain" + }, + { + "output": "Downloading Datasets\n\nIn Driverless AI, you can download datasets from the Datasets Overview\npage.\n\nTo download a dataset, click on the dataset or select the [Click for\nActions] button beside the dataset that you want to download, and then\nselect Download from the submenu that appears.\n\nNote: The option to download datasets will not be available if the", + "prompt_type": "plain" + }, + { + "output": "enable_dataset_downloadingoption is set tofalse` when starting\nDriverless AI. This option can be specified in the :ref:`config.toml\n file.", + "prompt_type": "plain" + }, + { + "output": "MLI Overview\nDriverless AI provides robust interpretability of machine learning\nmodels to explain modeling results in a human-readable format. In the\nMachine Learning Interpretability (MLI) view, Driverless AI employs a\nhost of different techniques and methodologies for interpreting and\nexplaining the results of its models. A number of charts are generated\nautomatically (depending on experiment type), including K-LIME, Shapley,\nVariable Importance, Decision Tree Surrogate, Partial Dependence,\nIndividual Conditional Expectation, Sensitivity Analysis, NLP Tokens,\nNLP LOCO, and more. Additionally, you can download a CSV of LIME,\nShapley, and Original (Kernel SHAP) Shapley reason codes as well as text\nand Python files of Decision Tree Surrogate model rules from this view. The techniques and methodologies used by Driverless AI for model\ninterpretation can be extended with recipes (Python code snippets). For\nmore information on custom recipes for MLI, see\nhttps://github.com/h2oai/driverlessai-recipes/tree/rel-1.9.1/explainers.", + "prompt_type": "plain" + }, + { + "output": "Refer to the\nfollowing sections for more information:\n- interpreted-model-page\n- interpret-regular\n- interpret-ts\n- mli-byor\nNote\nMigration Information\n- Interpretations made in version 1.9.0 are supported in 1.9.x and\n later. - Interpretations made in version 1.8.x aren't supported in 1.9.x and\n later. However, interpretations made in 1.8.x can still be viewed\n and rerun. Note\n- MLI is not supported for unsupervised learning models. - MLI is not\nsupported for Image or multiclass Time Series experiments. - MLI does\nnot require an Internet connection to run on current models. - To\nspecify a port of a specific H2O instance for use by MLI, use the\nh2o_port config.toml setting. You can also specify\nan IP address for use by MLI with the h2o_ip setting. Additional Resources\n- Click here to download our MLI cheat sheet. - \"An Introduction to Machine Learning Interpretability\" book. - Click here to access the H2O.ai MLI Resources repository.", + "prompt_type": "plain" + }, + { + "output": "Quick-Start Tables by Environment\nUse the following tables for Cloud, Server, and Desktop to find the\nright setup instructions for your environment. Cloud\nRefer to the following for more information about instance types:\n- AWS Instance Types\n- Azure Instance Types\n- Google Compute Instance Types\n+-----------------+---------+------+----------+-----------------------+\n| Provider | I | Num | Suitable | Refer to Section |\n| | nstance | GPUs | for | |\n| | Type | | | |\n+=================+=========+======+==========+=======================+\n| NVIDIA GPU | | | Serious | i |\n| Cloud | | | use | nstall-on-nvidia-dgx |\n+-----------------+---------+------+----------+-----------------------+\n| AWS | p2 | 1 | Experim | install-on-aws |\n| | | | entation | |\n| - | .xlarge | --- | | |\n| - | | ---- | -- | |\n| - | -- | ---+ | -------- | |\n| - | ------- | | -------+ | |\n| - | ------+ | | | |\n| - | | 8 | | |\n| - | p2.", + "prompt_type": "plain" + }, + { + "output": "| | -------+ | |\n| | | | | |\n| | 2xlarge | 4 | | |\n| | | | Experim | |\n| | -- | --- | | |\n| | ------- | ---- | entation | |\n| | ------+ | ---+ | | |\n| | | | -- | |\n| | p3. | | -------- | |\n| | | 8 | -------+ | |\n| | 8xlarge | | | |\n| | | --- | | |\n| | -- | ---- | Serious | |\n| | ------- | ---+ | | |\n| | ------+ | | | |\n| | | | use | |\n| | | 1 | | |\n| | p3.1 | | -- | |\n| | | --- | -------- | |\n| | 6xlarge | ---- | -------+ | |\n| | | ---+ | | |\n| | -- | | | |\n| | ------- | | Serious | |\n| | ------+ | 2 | | |\n| | | | | |\n| | g3.", + "prompt_type": "plain" + }, + { + "output": "| | | |\n| | | | entation | |\n| | 8xlarge | | | |\n| | | | -- | |\n| | -- | | -------- | |\n| | ------- | | -------+ | |\n| | ------+ | | | |\n| | | | | |\n| | | | Experim | |\n| | g3.1 | | | |\n| | | | entation | |\n| | 6xlarge | | | |\n| | | | -- | |\n| | | | -------- | |\n| | | | -------+ | |\n| | | | | |\n| | | | | |\n| | | | Serious | |\n| | | | | |\n| | | | | |\n| | | | use | |\n+-----------------+---------+------+----------+-----------------------+\n| Azure | Stand | 1 | Experim | :r |\n| | ard_NV6 | | entation | ef:install-on-azure |\n| - | | --- | | |\n| - | -- | ---- | -- | |\n| - | ------- | ---+ | -------- | |\n| - | ------+ | | -------+ | |\n| - | | | | |\n| | | 2 | | |\n| | Standa | | Experim | |\n| | | --- | | |\n| | rd_NV12 | ---- | entation | |\n| | | ---+ | | |\n| | -- | | -- | |\n| | ------- | | -------- | |\n| | ------+ | 4 | -------+ | |\n| | | | | |\n| | | --- | | |\n| | Standa | ---- | Serious | |\n| | | ---+ | | |\n| | rd_NV24 | | | |\n| | | | use | |\n| | -- | 1 | | |\n| | ------- | | -- | |\n| | ------+ | --- | -------- | |\n| | | ---- | -------+ | |\n| | Stand | ---+ | | |\n| | | | | |\n| | ard_NC6 | | Experim | |\n| | | 2 | | |\n| | -- | | entation | |\n| | ------- | --- | | |\n| | ------+ | ---- | -- | |\n| | | ---+ | -------- | |\n| | | | -------+ | |\n| | Standa | | | |\n| | | 4 | | |\n| | rd_NC12 | | Experim | |\n| | | | | |\n| | -- | | entation | |\n| | ------- | | | |\n| | ------+ | | -- | |\n| | | | -------- | |\n| | | | -------+ | |\n| | Standa | | | |\n| | | | | |\n| | rd_NC24 | | Serious | |\n| | | | | |\n| | | | | |\n| | | | use | |\n+-----------------+---------+------+----------+-----------------------+\n| Google Compute | | | | insta |\n| | | | | ll-on-google-compute |\n+-----------------+---------+------+----------+-----------------------+\nServer\n --------------------------------------------------------------------\n Operating System GP Min Mem Refer to Section\n Us?", + "prompt_type": "plain" + }, + { + "output": "Datasets in Driverless AI\n\nThe Datasets Overview page is the Driverless AI home page. It displays\nthe datasets that have been imported into Driverless AI. Data Connectors\ncan be used to connect to various data sources.\n\ndatasets-import datasets-options datasets-download datasets-modify\ndatasets-join-wizard datasets-split\n\n[]", + "prompt_type": "plain" + }, + { + "output": "Experiment Summary\nAn experiment summary is available for each completed experiment. Click\nthe Download Summary & Logs button to download the\nh2oai_experiment_summary_.zip file. []\nThe files within the experiment summary zip provide textual explanations\nof the graphical representations that are shown on the Driverless AI UI. Details of each artifact are described below. Experiment AutoDoc\nA report file (AutoDoc) is included in the experiment summary. This\nreport provides insight into the training data and any detected shifts\nin distribution, the validation schema selected, model parameter tuning,\nfeature evolution and the final set of features chosen during the\nexperiment. For more information, see autodoc. Experiment Artifacts Overview\nThe Experiment Summary contains artifacts that provide overviews of the\nexperiment. - preview.txt: Provides a preview of the experiment. (This is the same\n information that was included on the UI before starting the\n experiment.)", + "prompt_type": "plain" + }, + { + "output": "(Available in txt or json.) - config.json: Provides a list of the settings used in the experiment. - config_overrides_toml_string.txt: Provides any overrides for this\n experiment that were made to the config.toml file. - args_do_auto_dl.json: The internal arguments used in the Driverless\n AI experiment based on the dataset and accuracy, time and\n interpretability settings. - experiment_column_types.json: Provides the column types for each\n column included in the experiment. - experiment_original_column.json: A list of all columns available in\n the dataset that was used in the experiment. - experiment_pipeline_original_required_columns.json: For columns used\n in the experiment, this includes the column name and type. - experiment_sampling_description.json: A description of the sampling\n performed on the dataset. - timing.json: The timing and number of models generated in each part\n of the Driverless AI pipeline. Tuning Artifacts\nDuring the Driverless AI experiment, model tuning is performed to\ndetermined the optimal algorithm and parameter settings for the provided\ndataset.", + "prompt_type": "plain" + }, + { + "output": "does taking\nthe log of the target column improve results). The results from these\ntuning steps are available in the Experiment Summary. - tuning_leaderboard: A table of the model tuning performed along with\n the score generated from the model and training time. (Available in\n txt or json.) - target_transform_tuning_leaderboard.txt: A table of the transforms\n applied to the target column along with the score generated from the\n model and training time. (This will be empty for binary and\n multiclass use cases.) Features Artifacts\nDriverless AI performs feature engineering on the dataset to determine\nthe optimal representation of the data. The top features used in the\nfinal model can be seen in the GUI. The complete list of features used\nin the final model is available in the Experiment Summary artifacts. The Experiment Summary also provides a list of the original features and\ntheir estimated feature importance. For example, given the features in\nthe final Driverless AI model, we can estimate the feature importance of\nthe original features.", + "prompt_type": "plain" + }, + { + "output": "- PAY_3: 0.92 * 1 (PAY_3 is the only variable used.) - ClusterDist9:BILL_AMT1:LIMIT_BAL:PAY_3: 0.90 * 1/3 (PAY_3 is one of\n three variables used.) Estimated Feature Importance = (1*0) + (0.92*1) + (0.9*(1/3)) = 1.22\nNote: The feature importance is converted to relative feature\nimportance. (The feature with the highest estimated feature importance\nwill have a relative feature importance of 1). - ensemble_features: A list of features used in the final model, a\n description of the feature, and the relative feature importance. Feature importances for multiple models are linearly blended with\n same weights as the final ensemble of models. (Available in txt,\n table, or json.) - ensemble_features_orig: A complete list of all original features\n used in the final model, a description of the feature, the relative\n feature importance, and the standard deviation of relative\n importance. (Available in txt or json.) - ensemble_features_orig_shift: A list of original user features used\n in the final model and the difference in relative feature importance\n between the final model and the corresponding feature importance of\n the final population.", + "prompt_type": "plain" + }, + { + "output": "- ensemble_features_prefit: A list of features used by the best\n individuals in the final population, each model blended with same\n weights as ensemble if ensemble used blending. (Available in txt,\n table, or json.) - ensemble_features_shift: A list of features used in the final model\n and the difference in relative feature importance between the final\n model and the corresponding feature importance of the final\n population. (Available in txt, table, or json.) - features: A list of features used by the best individual pipeline\n (identified by the genetic algorithm) and each feature's relative\n importance. (Available in txt, table, or json.) - features_orig: A list of original user features used by the best\n individual pipeline (identified by the genetic algorithm) and each\n feature's estimated relative importance. (Available in txt or json.) - leaked_features.json: A list of all leaked features provided along\n with the relative importance and the standard deviation of relative\n importance.", + "prompt_type": "plain" + }, + { + "output": "- leakage_features_orig.json: A list of leaked original features\n provided and an estimate of the relative feature importance of that\n leaked original feature in the final model. - shift_features.json: A list of all features provided along with the\n relative importance and the shift in standard deviation of relative\n importance of that feature. - shifit_features_orig.json: A list of original features provided and\n an estimate of the shift in relative feature importance of that\n original feature in the final model. Final Model Artifacts\nThe Experiment Summary includes artifacts that describe the final model. This is the model that is used to score new datasets and create the MOJO\nscoring pipeline. The final model may be an ensemble of models depending\non the Accuracy setting. - coefs: A list of coefficients and standard deviation of coefficients\n for features. (Available in txt or json.) - ensemble.txt: A summary of the final model which includes a\n description of the model(s), gains/lifts table, confusion matrix,\n and scores of the final model for our list of scorers.", + "prompt_type": "plain" + }, + { + "output": "(Available in table or json.) Note that this is not available for\n Time Series experiments. - ensemble_description.txt: A sentence describing the final model. (For example: \"Final TensorFlowModel pipeline with ensemble_level=0\n transforming 21 original features -> 54 features in each of 1 models\n each fit on full training data (i.e. no hold-out).\") - ensemble_coefs: The coefficient and standard deviation coefficient\n for each feature in the ensemble. (Available as txt or json.) - ensemble_coefs_shift: The coefficient and shift of coefficient for\n each feature in the ensemble. (Available as txt or json.) - ensemble_model_description.json/ensemble_model_extra_description: A\n json file describing the model(s) and for ensembles how the model\n predictions are weighted. - ensemble_model_params.json: A json file describing the parameters of\n the model(s). - ensemble_folds_data.json: A json file describing the folds used for\n the final model(s). This includes the size of each fold of data and\n the performance of the final model on each fold.", + "prompt_type": "plain" + }, + { + "output": "- ensemble_features_orig: A list of the original features provided and\n an estimate of the relative feature importance of that original\n feature in the ensemble of models. (Available in txt or json.) - ensemble_features: A complete list of all features used in the final\n ensemble of models, a description of the feature, and the relative\n feature importance. (Available in txt, table, or json.) - leakage_coefs.json: A list of coefficients and standard deviation of\n coefficients for leaked features. - pipeline: A visual representation of the experiment pipeline. - shift_coefs.json: A list of coefficients and the shift in standard\n deviation for those coefficients used in the experiment. The Experiment Summary also includes artifacts about the final model\nperformance. - ensemble_scores.json: The scores of the final model for our list of\n scorers. - ensemble_confusion_matrix_test: The confusion matrix for the test\n data if test data is provided. Note that this is not available for\n Time Series experiments.", + "prompt_type": "plain" + }, + { + "output": "Note that this is not available for\n Time Series experiments. - ensemble_confusion_matrix_stats_validation: The confusion matrix\n statistics on internal validation data. Note that this is not\n available for Time Series experiments. - ensemble_confusion_matrix_stats_test.json: Confusion matrix\n statistics on the test data. This is only available if test data is\n provided. Note that this is not available for Time Series\n experiments. - ensemble_gains_test: The lift and gains table for test data if test\n data is provided. (Visualization of lift and gains can be seen in\n the UI.) Note that this is not available for Time Series\n experiments. - ensemble_gains_with_validation: The lift and gains table for the\n internal validation data. (Visualization of lift and gains can be\n seen in the UI.) Note that this is not available for Time Series\n experiments. - ensemble_roc_test: The ROC and Precision Recall table for test data\n if test data is provided.", + "prompt_type": "plain" + }, + { + "output": "To stop the Driverless AI Docker image, type Ctrl + C in the Terminal\n(Mac OS X) or PowerShell (Windows 10) window that is running the\nDriverless AI Docker image.", + "prompt_type": "plain" + }, + { + "output": "Supported Algorithms\nConstant Model\nA Constant Model predicts the same constant value for any input data. The constant value is computed by optimizing the given scorer. For\nexample, for MSE/RMSE, the constant is the (weighted) mean of the target\ncolumn. For MAE, it is the (weighted) median. For other scorers like\nMAPE or custom scorers, the constant is found with an optimization\nprocess. For classification problems, the constant probabilities are the\nobserved priors. A constant model is meant as a baseline reference model. If it ends up\nbeing used in the final pipeline, a warning will be issued because that\nwould indicate a problem in the dataset or target column (e.g., when\ntrying to predict a random outcome). Decision Tree\nA Decision Tree is a single (binary) tree model that splits the training\ndata population into sub-groups (leaf nodes) with similar outcomes. No\nrow or column sampling is performed, and the tree depth and method of\ngrowth (depth-wise or loss-guided) is controlled by hyper-parameters.", + "prompt_type": "plain" + }, + { + "output": "This\nimplementation uses a hashing trick and Hogwild approach [3] for\nparallelization. FTRL supports binomial and multinomial classification\nfor categorical targets, as well as regression for continuous targets. GLM\nGeneralized Linear Models (GLM) estimate regression models for outcomes\nfollowing exponential distributions. GLMs are an extension of\ntraditional linear models. They have gained popularity in statistical\ndata analysis due to:\n- the flexibility of the model structure unifying the typical\n regression methods (such as linear regression and logistic\n regression for binary classification)\n- the recent availability of model-fitting software\n- the ability to scale well with large datasets\nDriverless AI uses the XGBoost GLM implementation (booster=gblinear) for\nmodeling. This GLM is subject to early stopping. Isolation Forest\nIsolation Forest is useful for identifying anomalies or outliers in\ndata. Isolation Forest isolates observations by randomly selecting a\nfeature and then randomly selecting a split value between the maximum\nand minimum values of that selected feature.", + "prompt_type": "plain" + }, + { + "output": "Random partitioning produces\nnoticeably shorter paths for anomalies. When a forest of random trees\ncollectively produces shorter path lengths for particular samples, they\nare highly likely to be anomalies. LightGBM\nLightGBM is a gradient boosting framework developed by Microsoft that\nuses tree based learning algorithms. It was specifically designed for\nlower memory usage and faster training speed and higher efficiency. Similar to XGBoost, it is one of the best gradient boosting\nimplementations available. It is also used for fitting Random Forest,\nDART (experimental), and Decision Tree models inside of Driverless AI. PyTorch Models\nPyTorch is an open source library used for deep learning tasks such as\nnatural language processing and computer vision. Driverless AI's NLP BERT models are implemented using PyTorch, for\ndetails see NLP in Driverless AI . PyTorch Grownet Model\nGradient Boosting Neural Networks or GrowNet applies gradient boosting\nto shallow neural networks.", + "prompt_type": "plain" + }, + { + "output": "Each model is fed the original features and the\npredictions of the previous model. The predictions of all the models are\nsummed to produce a final output. Every model can be as simple as having\nonly one hidden layer. As per the paper, GrowNet is easy to tune and\nrequires less computational cost and time to train, than deep neural\nnetworks and yet seems to outperform deep neural networks in regression,\nclassification, and ranking on multiple datasets. Driverless AI integrates the Pytorch implementation of Grownet. The\nmodel expert settings parameter enable_grownet controls\nthe run. Random Forest\nRandom Forest averages multiple deep decision trees on different parts\nof the same training data. Driverless AI supports both XGBoost RandomForest (XGBRF) and LightGBM\nRandomForest (boosting=rf) implementations for modeling. RuleFit\nThe RuleFit [2] algorithm creates an optimal set of decision rules by\nfirst fitting a tree model, and then fitting a Lasso (L1-regularized)\nGLM model to create a linear model consisting of the most important tree\nleaves (rules).", + "prompt_type": "plain" + }, + { + "output": "TensorFlow\nTensorFlow is an open source software library for performing high\nperformance numerical computation. Driverless AI includes\nTensorFlow NLP recipes based on CNN ad BiGRU (RNN) Deeplearning\nmodels and Tensorflow Imagenet models for\nimage data. A TensorFlow model is a fully connected neural network with a few hidden\nlayers (that is, a multilayer perceptron). It has a few tuning\nparameters that can add wide and deep or attention. TensorFlow is considered a model like XGB, LGBM, or GLM. In many cases,\nit may not perform as well as the aforementioned models, but it can be\nuseful for ensembles and multiclass as well as for small data recipes\nsince there are many folds / repeats and models involved. Only C++ MOJOs are currently available for TensorFlow models. XGBoost\nXGBoost is a supervised learning algorithm that implements a process\ncalled boosting to yield accurate models. Boosting refers to the\nensemble learning technique of building many models sequentially, with\neach new model attempting to correct for the deficiencies in the\nprevious model.", + "prompt_type": "plain" + }, + { + "output": "XGBoost provides parallel tree boosting\n(also known as GBDT, GBM) that solves many data science problems in a\nfast and accurate way. For many problems, XGBoost is one of the best\ngradient boosting machine (GBM) frameworks today. Driverless AI supports XGBoost GBM and XGBoost DART models. Zero-Inflated Models\nZero-inflated models fit the data with excess zero counts in the target\nvariable for example in insurance claim use case. In Driverless AI, this\nmodel trains a classifier that attempts to classify zero and non-zero\nvalues. It then trains a regression model that attempts to predict the\nnon-zero values. The classifier predictions are multiplied by the\nregression predictions to determine the final output. Driverless AI supports both LightGBM and XGBoost versions of\nzero-inflated models. References\n[1] DataTable for Python, https://github.com/h2oai/datatable\n[2] J. Friedman, B. Popescu. \"Predictive Learning via Rule Ensembles\". 2005. http://statweb.stanford.edu/~jhf/ftp/RuleFit.pdf\n[3] Niu, Feng, et al.", + "prompt_type": "plain" + }, + { + "output": "Passing additional pip install options\n\nYou can use the pip_install_options TOML option \nto pass additional pip install options formatted as a list. The\nfollowing are two examples that demonstrate how this option can be used.\n\n- When installing Python packages, you can use this TOML option to\n specify your organization's internal Python package index as\n follows:\n\n- You can use this TOML option to install Python packages with a proxy\n server as follows:\n\nPassing multiple pip install options to DAI\n\nThe following example demonstrates how to correctly pass multiple pip\ninstall options to DAI.\n\n pip_install_options=\"['--extra-index-url', 'http://my-own-repo1:port','--extra-index-url', 'http://my-own-repo2:port']\"", + "prompt_type": "plain" + }, + { + "output": "About Licenses\n\nDriverless AI is licensed per a single named user. Therefore, in order,\nto have different users run experiments simultaneously, they would each\nneed a license. Driverless AI manages the GPU(s) that it is given and\nensures that different experiments from different users can run safely\nsimultaneously and don\u2019t interfere with each other. So when two licensed\nusers log in with different credentials, neither of them will see the\nother\u2019s experiment. Similarly, if a licensed user logs in using a\ndifferent set of credentials, that user will not see any previously run\nexperiments.", + "prompt_type": "plain" + }, + { + "output": "Genetic Algorithm in Driverless AI\nDriverless AI aims to determine the best pipeline for a dataset. This\ninvolves data transformation, feature engineering, model hyperparameter\ntuning, scoring and ensembling. The genetic algorithm process is a trial-and-error selection process,\nbut it is reproducible. In Driverless AI,\ngenetic algorithm is performed during the\nFeature Evolution stage of an experiment. Feature Evolution\nis a competition between slowly mutating parameters to find best\nindividuals . The Feature Evolution is not completely random and\nis informed from the variable importance interactions tables\nof the modeling algorithms. Driverless AI Brain caches\ninformation about the set of best genes, interactions and parameters in\nthe population and also information from previous experiments (if\nenabled), can be used during genetic algorithm mutations. Driverless AI also integrates Optuna, that employs Bayesian optimization\ntechnique for model hyperparameter search.", + "prompt_type": "plain" + }, + { + "output": "Custom code can also be written to toggle inbuilt mutation\nstrategy. For details see additional information section. During model building and feature tuning processes, overfitting is\nprevented by doing bootstrapping and cross validation, while\nunderfitting is prevented by balancing exploitation vs exploration in\ngenetic algorithm. - Understanding Genetic Algorithm and its Driverless AI\n equivalent. - The Full Picture : The end to end pipeline in Driverless\n AI. - Reading the logs : Workflow as seen in the Experiment\n logs. - Some additional details \nUnderstanding Genetic Algorithm\nGenetic Algorithm is a search heuristic inspired by the process of\nnatural selection where the fittest individuals are selected to produce\noffspring for the next generation. Some Driverless AI equivalent definitions to consider before the deep\ndive:\n - A gene stores information about type of and parameters for a\n feature transformation .", + "prompt_type": "plain" + }, + { + "output": "- A transformer is the actual code that applies the gene. - An individual consists of a genome that includes a set of genes,\n i.e. information about which transformations and with what\n parameters to perform. It also includes model hyperparameters and\n some additional information like the target transformations\n applied etc. - Individuals create a population that goes through a randomly\n chosen pair-wise tournament process to decide\n the winners. - Fitness score for an individual is model evaluation or scores\n based on the scoring metric. Below are the steps involved in a Genetic Algorithm and their Driverless\nAI equivalent:\nInitialization\nConsider all the probable solutions to the given problem. This creates\nthe population. The most popular technique for initialization is the use\nof random binary strings. Driverless AI : The individuals from the Tuning Phase are fed\nin as the random probable solutions for Feature evolution via genetic\nalgorithm.", + "prompt_type": "plain" + }, + { + "output": "The higher the fitness\nscore, the higher the chances of being chosen for reproduction. Driverless AI : Fitness score for an individual is model evaluation\nbased on the scoring metric. Selection\nIndividuals are selected for the reproduction of offspring. The selected\nindividuals are then arranged in pairs of two to enhance reproduction. These individuals pass on their genes to the next generation. The\ngenetic algorithm uses the fitness proportionate selection technique to\nensure that useful solutions are used for recombination. Driverless AI : A tournament is performed within the\npopulation to find the best subset (half) of the population. Reproduction : crossover mutation\nThis phase involves the creation of a child population. The algorithm\nemploys variation operators that are applied to the parent population. The two main operators in this phase include crossover and mutation. mutation : This operator adds new genetic information to the new child\n population.", + "prompt_type": "plain" + }, + { + "output": "Mutation solves the problem of local minimum and enhances\n diversification. crossover : This operator swaps the genetic information of two parents\n to reproduce an offspring. It is performed on parent pairs that are\n selected randomly to generate a child population of equal size as the\n parent population. Driverless AI : Winning sub population's genes, features and model\nhyperparameters are mutated into new offspring (asexual reproduction). Mutation involves adding, perturbing, or pruning\ngenes . The strategy for adding genes is based on balancing exploitation and\n exploration of importance of original variables. Genes are added that\n explore additional transformations for original variables with high\n importance. The best genes from prior winners become part of the pool of great\n genes that are used and can be shared amongst the offspring. Specific output features can be pruned. Features are pruned when\n variable importance is below a certain threshold (based upon\n interpretability settings).", + "prompt_type": "plain" + }, + { + "output": "For some like CUML RF, it is based upon Shapley\n Permutation Importance. Replacement\nGenerational replacement takes place in this phase, which is a\nreplacement of the old population with the new child population. The new\npopulation consists of higher fitness scores than the old population,\nDriverless AI : Mutate winning sub-population's Genes (add, prune and\nperturb), Features, Model hyper parameters to fill-up the population\nback to pre-tournament size. Termination\nAfter replacement has been done, a stopping criterion is used to provide\nthe basis for termination. The algorithm will terminate after the\nthreshold fitness solution has been attained. It will identify this\nsolution as the best solution in the population. Driverless AI: Score the individuals and either terminate the evolution\nif stopping criteria is reached or continue the selection process. The Full Picture\nHere we describe in details the working of the different stages that\nDriverless performs in sequence during an experiment to output the best\npipeline for the dataset-\n1) Convert Accuracy, Time and Interpretabilty knob settings\n to number of iterations and models to be built.", + "prompt_type": "plain" + }, + { + "output": "This is achieved by building\n (LightGBM if available) models with simple allowed feature\n transformations and model parameters (chosen from the internal\n recipe pool) and choosing the target transformation with highest\n score. The target_transform_tuning_leaderboard_simple.json file in\n summary zip or Experiment GUI lists the built models with their\n scores and parameters. []\n3) Data Leakage and Shift Detection:\n A) Leakage Detection : To detect data leakage,\n Driverless AI runs a model (LightGBM if available) to get the\n variable importance table (that determines the predictive\n power of each feature on the target variable). Then, a simple\n model is built on each feature with significant variable\n importance. The models with high AUC (for classification) or\n R2 score (regression) are reported to the user as potential\n leak features. B) Shift Detection : To detect shift in\n distribution between the training, validation or testing\n datasets, Driverless AI trains a binomial model to predict\n which dataset a row belongs to.", + "prompt_type": "plain" + }, + { + "output": "Shifted\n features should either be dropped. Or more meaningful\n aggregate features be created by using them as labels/bins. These features are reported to the user as a notification and\n dropped if a threshold is set. 4) Model and Feature Tuning Phase: Tuning is random selection of\n parameters to find best individuals . A) Driverless creates a diverse set of individuals. First, it\n goes through and creates a \"SEQUENCE\" of models (based on\n allowed algorithms), adding them with simple feature\n transformations and model parameters. These allowed algorithms\n and feature transformations are displayed in the preview of\n the experiment. The DEFAULT includes simple genes like\n original numeric, date, tfidf or bert embeddings for text\n data, Target encodings, Frequency encodings, Weight of\n evidence encodings, clustering, interactions, etc. These\n default features are simple and support MOJO creation.", + "prompt_type": "plain" + }, + { + "output": "Then, if more individuals are needed in the population,\n \"RANDOM\" models are added. These have same model types\n (algorithms) as in SEQUENCE but with mutated parameters calls\n to the model to get random hyper parameters and (default +\n extra) random features. A \"GLM ONE HOT ENCODED\" model is evaluated and if seem to be\n performing well on the dataset, is added as an individual. A reference individual \"CONSTANT MODEL\" is added to the mix,\n so that we know what best constant predictions (predict the\n same thing whatever the input data) would give for a score. This is how a diverse population of individuals is created. B) All individuals are scored :\n a) Batches (given hardware) of individuals are scored for\n every tuning iteration\n b) At higher accuracy, the original feature set is\n re-created, each batch passing feature importance to\n next batch so it can exploit the importance in order to\n create better features.", + "prompt_type": "plain" + }, + { + "output": "C) Then a tournament is performed amongst the\n individuals to get the best individuals to be passed on to the\n evolution phase. D) An \"EXTRA_FS\" model is added in case \"FS\" strategy (feature\n selection strategy) is chosen ( for high interpretability\n settings) and it replaces one of the above non-reference\n individuals. This special individual has features that are\n pre-pruned based on the permutation importance of\n the dataset. The Tuning stage leaderboard of an experiment lists all the wining\n individuals (i.e models that scored highest during the tournament). The summary zip artifact includes it as the\n tuning_leaderboard_simple.json or txt file. []\n5) Feature Evolution Phase: Evolution is competition between slowly\n mutating parameters to find best individuals . During\n evolution phase, we start off with the best individuals (highest\n score) from the tuning phase.", + "prompt_type": "plain" + }, + { + "output": "So first step\n is to either prune or add new individuals to create the desired\n population size. The evolution_begin_leaderboard_simple.json file\n lists these individuals (the unscored are the new added individuals\n to bring the population to the right size). A) Every iteration of the experiment, each individual creates a\n new model based on its genes. B) Population of individuals is trained on the training data,\n with early stopping if available. C) Population is scored for given metric, with bootstrapping if\n chosen (default). D) Tournament is performed amongst the\n individuals based on the selected strategy, to decide winning\n subset of population\n E) Mutate winning sub-population's Genes, Features, Model to\n fill-up the population back to pre-tournament size (asexual\n reproduction). In the genetic algorithm, Mutation involves\n adding, pruning, or perturbing genes.", + "prompt_type": "plain" + }, + { + "output": "The strategy for adding genes is based on\n balancing exploitation and exploration of importance of\n original variables. Genes are added that explore additional\n transformations for original variables with high importance. Genes are pruned based on the Information Gain Variable\n Importance for most models, for some like CUML RF, it is based\n upon Shapley Permutation Importance. Features are pruned when\n variable importance is below a certain threshold (based upon\n interpretability settings). See also\n Mutation strategies . F) Back to A...\n6) Ensembling and Final Scoring Pipeline creation: Ensemble the final\n models and build Final Pipeline for production with a MOJO and/or\n Python scoring pipelines . Notes:\n - Feature and Model Tuning leaderboard table lists a parameter\n called feature cost of a model. Feature cost is not equal to the\n number of features used in the model but is based on their\n complexity (or interpretability) i.e.", + "prompt_type": "plain" + }, + { + "output": "For example a low cost model\n may have greater number of more interpretable features than a high\n cost model (i.e. cost number != number of feature used). This\n parameter is used in the workflow during genetic algorithm to\n decide if need to reduce feature count given interpretability dial\n settings of the experiment. - Certain individuals in the Evolution Begin leaderboard table are\n unscored. This can happen if:\n - They violated some constraint on feature counts imposed for\n given choice of interpretability settings and so were\n changed, and the score no longer applies. - They were added at end to fill-up the needed total number of\n individuals in the population and hence have not been scored\n yet. - Also see additional details. Reading the Logs\nThe Experiment preview gives an estimate of the number of iterations\ndone and the total number of models(including cross validation models)\nthat are built during the various stages of the experiment.", + "prompt_type": "plain" + }, + { + "output": "INFO | Number of individuals: 8\n INFO | Estimated target transform tuning iterations: 2\n INFO | Estimated model and feature parameter tuning iterations: 4\n INFO | Estimated total (tuning + feature evolution) number of iterations: 16\n INFO | Estimated total (backend + tuning + feature evolution + final) number of models to train: 598\n INFO | Backend tuning: 0 model(s)\n INFO | Target transform tuning: 18 model(s)\n INFO | Model and feature tuning: 48 model(s)\n INFO | Feature pre-pruning: 0 model(s)\n INFO | Feature evolution: 528 model(s)\n INFO | Final pipeline: 3 model(s)\n INFO | ACCURACY [7/10]:\n INFO | - Training data size: *1,000 rows, 11 cols*\n INFO | - Feature evolution: *LightGBM*, *3-fold CV**, 2 reps*\n INFO | - Final pipeline: *LightGBM, averaged across 3-fold CV splits*\n INFO | \n INFO | TIME [2/10]:\n INFO | - Feature evolution: *8 individuals*, up to *10 iterations*\n INFO | - Early stopping: After *5* iterations of no improvement\n INFO | \n INFO | INTERPRETABILITY [8/10]:\n INFO | - Feature pre-pruning strategy: Permutation Importance FS\n INFO | - Monotonicity constraints: enabled\n INFO | - Feature engineering search space: [Interactions, Original]\n INFO | \n INFO | LightGBM models to train:\n INFO | - Target transform tuning: *18*\n INFO | - Model and feature tuning: *48*\n INFO | - Feature evolution: *528*\n INFO | - Final pipeline: *3*\nThis experiment creates only LightGBM models.", + "prompt_type": "plain" + }, + { + "output": "As this is a regression problem, target tuning is performed and 18\nmodels are created to decide the best\ntarget transformation for the dataset. This create\n3 models with 3 fold cross validation each with 2 repeats, i.e two\ndifferent views of the dataset (in train/valid split). This is done in\ntwo iterations. Next 4 iterations are be used for model and feature parameter tuning. This involves creation of approximately 8*3*2\n(individuals*folds*repeats) ~ 48 models. The output models from tuning stage undergo Feature Evolution by genetic\nalgorithm. The genetic algorithm is performed on 8 individuals\n(population size). The next 10 iterations are used for feature evolution\nand around (10 * 8/2[population subset] * (3*2) (foldcv*repeats) ~240\nnew models are scored. The upper limit to it is 528 models. Early\nstopping is performed if the scores do not improve after 5 iterations. The final pipeline is created with the a single individual with 3 fold\ncross validation. These estimates are based on Accuracy/Time/Interpretabilty dial\nsettings, types of models selected, and other expert settings for the\nexperiment.", + "prompt_type": "plain" + }, + { + "output": "WARNING| - Feature engineering search space: [CVCatNumEncode, CVTargetEncode, Frequent, Interactions, NumCatTE, OneHotEncoding, Original]\n DATA | LightGBMModel *default* feature->transformer map\n DATA | X_0 :['OriginalTransformer', 'CVTargetEncodeTransformer', 'OneHotEncodingTransformer', 'InteractionsTransformer']\n DATA | X_1 :['OriginalTransformer', 'CVTargetEncodeTransformer', 'OneHotEncodingTransformer', 'InteractionsTransformer']\n DATA | X_2 :['OriginalTransformer', 'CVTargetEncodeTransformer', 'OneHotEncodingTransformer']\n DATA | X_3 :['OriginalTransformer', 'CVTargetEncodeTransformer', 'OneHotEncodingTransformer']\n DATA | X_4 :['OriginalTransformer', 'CVTargetEncodeTransformer', 'OneHotEncodingTransformer']\n DATA | X_5 :['OriginalTransformer', 'CVTargetEncodeTransformer', 'OneHotEncodingTransformer']\n DATA | X_6 :['OriginalTransformer', 'CVTargetEncodeTransformer', 'OneHotEncodingTransformer']\n DATA | X_7 :['OriginalTransformer', 'CVTargetEncodeTransformer', 'OneHotEncodingTransformer']\n DATA | X_8 :['OriginalTransformer', 'CVTargetEncodeTransformer', 'OneHotEncodingTransformer']\n DATA | X_9 :['OriginalTransformer', 'CVTargetEncodeTransformer', 'OneHotEncodingTransformer']\nValidation splits creation.", + "prompt_type": "plain" + }, + { + "output": "In this example,\nFeature evolution stage will require 3 folds for cross validation and\nand two repeats i.e data views are done. The for final pipeline will\nalso perform 3 folds cv. After splitting the datasets in to folds for\ninternal validations, a Kolmogorov-Smirnov statistics is calculated to\nsee if the folds have similar distribution of data. INFO | Preparing validation splits...\n INFO | [Feature evolution (repeat 1)] Optimized fold splits: Target fold mean (target transformed) stddev: 0.01329 | means: [14.346849, 14.358292, 14.362315, 14.327351, 14.342845, 14.366349]\n INFO | Kolmogorov-Smirnov statistics for splits of fold 0: KstestResult(statistic=0.02176727625829422, pvalue=0.9998424722802827)\n INFO | Kolmogorov-Smirnov statistics for splits of fold 1: KstestResult(statistic=0.025154089621855738, pvalue=0.9981216923269776)\n INFO | Kolmogorov-Smirnov statistics for splits of fold 2: KstestResult(statistic=0.02074638356497427, pvalue=0.9999414082418556)\n INFO | [Feature evolution (repeat 2)] Optimized fold splits: Target fold mean (target transformed) stddev: 0.01793 | means: [14.3447695, 14.362441, 14.366518, 14.318932, 14.340719, 14.370607]\n INFO | Kolmogorov-Smirnov statistics for splits of fold 0: KstestResult(statistic=0.024698351045656434, pvalue=0.9985813106473687)\n INFO | Kolmogorov-Smirnov statistics for splits of fold 1: KstestResult(statistic=0.027531279405342373, pvalue=0.9937850958604381)\n INFO | Kolmogorov-Smirnov statistics for splits of fold 2: KstestResult(statistic=0.02358730544637591, pvalue=0.9993204937887651)\n INFO | [Final pipeline ] Optimized fold splits: Target fold mean (target transformed) stddev: 0.01329 | means: [14.346849, 14.358292, 14.362315, 14.327351, 14.342845, 14.366349]\n INFO | Kolmogorov-Smirnov statistics for splits of fold 0: KstestResult(statistic=0.02176727625829422, pvalue=0.9998424722802827)\n INFO | Kolmogorov-Smirnov statistics for splits of fold 1: KstestResult(statistic=0.025154089621855738, pvalue=0.9981216923269776)\n INFO | Kolmogorov-Smirnov statistics for splits of fold 2: KstestResult(statistic=0.02074638356497427, pvalue=0.9999414082418556)\n INFO | Feature engineering training / validation splits:\n INFO | split #1: 666 / 334 - target min -1.264726 / 0.766517, target mean: 14.346850 / 14.358292, target max: 27.710434 / 26.761804, target std: 4.981032 / 5.059986\n INFO | split #2: 667 / 333 - target min -1.264726 / 2.914631, target mean: 14.362315 / 14.327350, target max: 26.761804 / 27.710434, target std: 4.999868 / 5.022746\n INFO | split #3: 667 / 333 - target min 0.766517 / -1.264726, target mean: 14.342844 / 14.366349, target max: 27.710434 / 25.879954, target std: 5.037666 / 4.946448\n INFO | split #4: 666 / 334 - target min -1.264726 / 1.490552, target mean: 14.344769 / 14.362441, target max: 27.710434 / 25.997716, target std: 5.026847 / 4.968671\n INFO | split #5: 667 / 333 - target min -1.264726 / 1.101135, target mean: 14.366518 / 14.318931, target max: 26.492384 / 27.710434, target std: 4.981698 / 5.058766\n INFO | split #6: 667 / 333 - target min 1.101135 / -1.264726, target mean: 14.340719 / 14.370606, target max: 27.710434 / 26.492384, target std: 5.010135 / 5.002203\n INFO | Doing backend tuning on data of shape (666, 11) / (334, 11)\n INFO | Maximum number of rows (train or valid) for feature evolution: 667\n INFO | Final ensemble training / validation splits:\n INFO | split #1: 666 / 334 - target min -1.264726 / 0.766517, target mean: 14.346850 / 14.358292, target max: 27.710434 / 26.761804, target std: 4.981032 / 5.059986\n INFO | split #2: 667 / 333 - target min -1.264726 / 2.914631, target mean: 14.362315 / 14.327350, target max: 26.761804 / 27.710434, target std: 4.999868 / 5.022746\n INFO | split #3: 667 / 333 - target min 0.766517 / -1.264726, target mean: 14.342844 / 14.366349, target max: 27.710434 / 25.879954, target std: 5.037666 / 4.946448\n INFO | Maximum number of rows (train or valid) for final model/ensemble: 667\nThe transformations and genes applicable and the\ntournament style for the genetic algorithm for\nfeature evolution is registered.", + "prompt_type": "plain" + }, + { + "output": "INFO | Auto-tuning modeling backend: start. INFO | Backend candidate Job# 0 Name: LightGBMModel using GPU (if applicable) with Booster: lightgbm\n INFO | Backend candidate Job# 1 Name: LightGBMModel using CPU with Booster: lightgbm\n ...\n INFO | Auto-tuning modeling backend: end : Duration: 299.8936 s\nLeakage detection A model is run to determine the predictive power of\neach feature on the target. Then, a simple model is built on each\nfeature with significant variable importance. The models with high AUC\n(for classification) or R2 score (regression) are reported to the user\nas potential leak. INFO | Checking for leakage...\n ...\n INFO | Time for leakage check for training and None: 30.6861 [secs]\n INFO | No significant leakage detected in training data ( R2: 0.7957284 )\nTarget tuning is performed for regression problems to find the best\ndistribution (log, unit box, square root, etc.) of the target variable\nto optimize for scorer So 3 models with 6 fold cross validation in 2\niterations.", + "prompt_type": "plain" + }, + { + "output": "INFO | Tuned 18/18 target transform tuning models. Tuned [LIGHTGBM] Tuning []\n INFO | Target transform search: end : Duration: 389.6202 s\n INFO | Target transform: TargetTransformer_identity_noclip\nParameter and feature tuning stage starts from 3rd iteration and 4\niterations are spent in building ~48 models (8*3*2). 8 Individuals are built and made sure that the features included in the\nmodels satisfy the interpretablity conditions (see nfeatures_max and\nngenes_max). Also an additional FS individual is added during the 6th\niteration. See tuning phase for reference. Hence this stage\nbuilds greater than 48 models. INFO | Model and feature tuning scores (RMSE, less is better):\n INFO | Individual 0 : 1.638517 +/- 0.04910973 [Tournament: 1.638517 Model: LIGHTGBM Feature Cost: 17]\n INFO | Individual 1 : 1.638517 +/- 0.04910973 [Tournament: 1.638517 Model: LIGHTGBM Feature Cost: 17]\n INFO | Individual 2 : 1.638517 +/- 0.04910973 [Tournament: 1.638517 Model: LIGHTGBM Feature Cost: 17]\n INFO | Individual 3 : 1.643672 +/- 0.06142867 [Tournament: 1.643672 Model: LIGHTGBM Feature Cost: 14]\n INFO | Individual 4 : 1.66976 +/- 0.04171555 [Tournament: 1.66976 Model: LIGHTGBM Feature Cost: 13]\n INFO | Individual 5 : 1.683212 +/- 0.06572724 [Tournament: 1.683212 Model: LIGHTGBM Feature Cost: 14]\n INFO | Individual 6 : 1.690918 +/- 0.05417363 [Tournament: 1.690918 Model: LIGHTGBM Feature Cost: 16]\n INFO | Individual 7 : 1.692052 +/- 0.04037833 [Tournament: 1.692052 Model: LIGHTGBM Feature Cost: 17]\n INFO | Individual 8 : 2.080228 +/- 0.03523514 [Tournament: 2.080228 Model: LIGHTGBM Feature Cost: 13]\n INFO | Applying nfeatures_max and ngenes_max limits to tuning population\n INFO | Parameter tuning: end : Duration: 634.5521 s\n INFO | Prepare Feature Evolution\n INFO | Feature evolution has 0 brain cached individuals out of 8 individuals\n INFO | Making 1 new individuals during preparation for evolution\n INFO | Pre-pruning 1 gene(s) from 12 active base genes\n INFO | Starting search for statistically relevant features (FS scheme)\n INFO | FS Permute population of size 1 has 2 unique transformations that include: ['InteractionsTransformer', 'OriginalTransformer']\n INFO | Transforming FS train\n INFO | Using 2 parallel workers (1 parent workers) for fit_transform.", + "prompt_type": "plain" + }, + { + "output": "At the end of the 16th iteration, the experiment has not converged so\nthe Feature evolution is stopped. It is made sure that the features\nincluded in the models satisfy the interpretablity conditions and are\nless than the maximum allowed limits (see nfeatures_max and ngenes_max). Best individual and population is stored in the Driverless AI brain for\nrestart or refitting of the experiment. The best individual(s) is\nproceeded the next stage. INFO | Scored 283/310 models on 31 features. Last Scored [LIGHTGBM]\n INFO | Scores (RMSE, less is better):\n INFO | Individual 0 : 1.540669 +/- 0.07447481 [Tournament: 1.540669 Model: LIGHTGBM Feature Cost: 10]\n INFO | Individual 1 : 1.541396 +/- 0.07796533 [Tournament: 1.541396 Model: LIGHTGBM Feature Cost: 9]\n INFO | Individual 2 : 1.542085 +/- 0.07796533 [Tournament: 1.542085 Model: LIGHTGBM Feature Cost: 9]\n INFO | Individual 3 : 1.543484 +/- 0.07796533 [Tournament: 1.543484 Model: LIGHTGBM Feature Cost: 9]\n INFO | Individual 4 : 1.547386 +/- 0.08567484 [Tournament: 1.547386 Model: LIGHTGBM Feature Cost: 10]\n INFO | Individual 5 : 1.557151 +/- 0.08078833 [Tournament: 1.557151 Model: LIGHTGBM Feature Cost: 8]\n INFO | Individual 6 : 3.961817 +/- 0.08480774 [Tournament: 3.961817 Model: LIGHTGBM Feature Cost: 4]\n INFO | Individual 7 : 4.052189 +/- 0.05662354 [Tournament: 4.052189 Model: LIGHTGBM Feature Cost: 1]\n INFO | Best individual with LIGHTGBM model has 7 transformers creating 10 total features and 10 features for model: 1.540669 RMSE\n DATA | Top 10 variable importances of best individual:\n DATA | LInteraction LGain\n DATA | 0 3_X_3 1.000000\n DATA | 1 10_InteractionMul:X_0:X_1 0.570066\n DATA | 2 4_X_4 0.264919\n DATA | 3 10_InteractionAdd:X_0:X_1 0.225805\n DATA | 4 2_X_2 0.183059\n DATA | 5 0_X_0 0.130161\n DATA | 6 1_X_1 0.124281\n DATA | 7 10_InteractionDiv:X_0:X_1 0.032255\n DATA | 8 10_InteractionSub:X_0:X_1 0.013721\n DATA | 9 7_X_7 0.007424\n INFO | Experiment has not yet converged after 16 iteration(s).", + "prompt_type": "plain" + }, + { + "output": "After sampling expected population size: 1. INFO | Final population size after sampling: 1 (0 reference) with models_final=3 and num_ensemble_folds=3\n INFO | Final Model sampled population with population of 8 individuals (best scores=['1.540669'])\nIn iteration 17, three fold cross validation is performed on the final\nensemble model, a few checks are done on the features used, predictions\nand python and mojo scoring pipelines are created. Logs and summary\nartifacts are collected. INFO | Completed 3/3 final ensemble models. INFO | Model performance:\n INFO | fold: 0, model name: LightGBM, model iterations: 500, model transformed features: 10, total model time: 2.4198, fit+predict model time: 0.376, total pipeline time: 0.48786, fit pipeline time: 0.29738\n INFO | fold: 1, model name: LightGBM, model iterations: 500, model transformed features: 10, total model time: 3.343, fit+predict model time: 0.34681, total pipeline time: 0.43664, fit pipeline time: 0.24267\n INFO | fold: 2, model name: LightGBM, model iterations: 473, model transformed features: 10, total model time: 2.1446, fit+predict model time: 0.38534, total pipeline time: 0.41979, fit pipeline time: 0.23152\n INFO | Checking for shift in tuning model -> final model variable importances\n DATA | New features created only in final pipeline: Count: 0 List: []\n DATA | Extra features created in final pipeline compared to genetic algorithm population: Count: 0 List: []\n DATA | Missing features from final StackedEnsemble pipeline compared to genetic algorithm population: Count: 0 List: []\n INFO | Completed training of the final scoring pipeline\n INFO | Predictions and Scoring final pipeline...\n INFO | Scored 286/310 models on 31 features.", + "prompt_type": "plain" + }, + { + "output": "Reducing number of features for all models is applicable only when\n (one of below satisfied):\n - num. of columns, is greater than max_orig_cols_selected or,\n - num of non-numeric columns, is greater than\n max_orig_nonnumeric_cols_selected or,\n - num. of numeric columns, is greater than\n max_orig_numeric_cols_selected\n Given the above requirements for all models is not satisfied;\n reducing number of features only for the FS individual (EXTRA_FS)\n is applicable only when (one of below satisfied) :\n - num. of columns, is greater than fs_orig_cols_selected or,\n - num. of non-numeric columns, is greater than\n fs_orig_numeric_cols_selected or,\n - num. of numeric columns, is greater than\n fs_orig_nonnumeric_cols_selected\n See tuning phase and permutation importance . 2) Tuning Phase Model Origins:\n - SEQUENCE and DefaultIndiv: Feature transformations and model\n hyper-parameters are chosen at random from the basic\n transformation sets and parameter lists as suggested by\n internal proprietary data science recipes.", + "prompt_type": "plain" + }, + { + "output": "- model_origin as RANDOM allows both features and model\n hyper-parameters to call their mutate lists or functions. - model_origin as EXTRA_FS is for the extra individuals added\n through Feature Selection(FS) based on permutation importance. - model_origin as REF# denotes for reference individuals\n provided as a baseline(eg. ConstantModel). - model_origin as GLM_OHE denotes features generated by GLM +\n OHE. 3) Driverless AI Brain: During an experiment building, Brain caches the\n best iterations, parameters, models, genes and populations. These\n are used for informed lookups, cross overs during mutation,\n restarts and refits of experiment. For\n details see feature_brain_level . 4) Mutation strategy: Strategy to apply when doing mutations on\n transformers :\n - Sample mode is default, with tendency to sample transformer\n parameters. - Batched mode tends to do multiple types of the same\n transformation together.", + "prompt_type": "plain" + }, + { + "output": "5) Mutation via custom recipe: Users can control and specify their own\n mutation strategy and the list of parameters to mutate on, by\n writing their own custom python code and hooking it up with the\n inbuilt Driverless AI Genetic Algorithm. Here is an example of such\n a recipe. The get_one function passes on the list of values to\n genetic algorithm or Optuna for that parameter. Reach out to\n support@h2o.ai if need more help with writing your own\n custom recipies . 6) Optuna: Driverless AI supports Optuna for model hyperparameter\n tuning during the Tuning phase of an experiment. Optuna\n employs a Bayesian optimization algorithm called Tree-structured\n Parzen Estimator for hyperparameter optimization. For details see\n enable_genetic_algorithm and tournament_style . When Optuna is selected then, model hyperparameters are tuned with\n Optuna and genetic algorithm is\n used for feature engineering.", + "prompt_type": "plain" + }, + { + "output": "Modifying Datasets\nViewing dataset details\nTo view a summary of a dataset or to preview the dataset, click on the\ndataset or select the [Click for Actions] button next to the dataset\nthat you want to view and select Details from the submenu that appears. This opens the Dataset Details page, which provides a summary of the\ndataset that lists each of the dataset's columns and displays\naccompanying rows for column name, feature engineering type\n(categorical, date, datetime, ID, numerical, text, or image), storage\ntype (integer, string, real, boolean, or time), count, number of missing\nvalues, mean, minimum, maximum, standard deviation, frequency, and\nnumber of unique values. Hover over the top of a column to view a summary of the first 20 rows of\nthat column. To view information for a specific column, type the column\nname in the field above the graph. To switch the view and preview the dataset, click the Dataset Rows\nbutton in the top right portion of the UI. Click the Dataset Overview\nbutton to return to the original view.", + "prompt_type": "plain" + }, + { + "output": "These are the same options that are available\nfrom the Datasets page. []\nChange column type\nDriverless AI also lets you change a column's type. If a column's data\ntype or distribution does not match the manner in which you want the\ncolumn to be handled during an experiment, changing the Logical Type can\nhelp to make the column fit better. For example, an integer zip code can\nbe changed into a categorical so that it is only used with\ncategorical-related feature engineering. For Date and Datetime columns,\nuse the Format option. To change the Logical Type or Format of a column,\nclick on the group of square icons located to the right of the words\nAuto-detect. (The squares light up when you hover over them with your\ncursor.) Then select the new column type for that column. Modify by custom data recipe\nThe option to create a new dataset by modifying an existing dataset with\ncustom recipes is also available from this page. Scoring pipelines can\nbe created on the new dataset by building an experiment.", + "prompt_type": "plain" + }, + { + "output": "For example, you\ncan change the target column from regression to classification, add a\nweight column to mark specific training rows as being more important, or\nremove outliers that you do not want to model on. Refer to the\ncustom_recipes_data_recipes section for more information. Click the Modify by Recipe drop-down menu in the top right portion of\nthe UI and select from the following options:\n- Data Recipe URL: Load a custom recipe from a URL to use to modify\n the dataset. The URL must point to either an HTML or raw version of\n the file, a GitHub repository or tree, or a local file. Sample\n custom data recipes are available in the\n driverlessai-recipes repository . - Upload Data Recipe: If you have a custom recipe available on your\n local system, click this button to upload that recipe. - Live Code: Manually enter custom recipe code that is used to modify\n the dataset. Click the Get Preview button to preview the code's\n effect on the dataset, then click Apply to create a new dataset.", + "prompt_type": "plain" + }, + { + "output": "- Apply Existing Data Recipe: Apply an existing data recipe to the\n dataset. For more information on adding recipes, see custom-recipes. Notes:\n- These options are enabled by default. You can disable them by\n removing recipe_file and recipe_url from the enabled_file_systems\n configuration option. - Modifying a dataset with a recipe does not overwrite the original\n dataset. The dataset that is selected for modification remains in\n the list of available datasets in its original form, and the\n modified dataset appears in this list as a new dataset. - Changes made to the original dataset through this feature are not\n applied to any new data that is scored. - Due to locale, parsing a datetime column with Live Code or a Data\n Recipe may result in an error or return different results when\n compared to running the same code outside of DAI. The following\n example illustrates the issue that might occur with certain datetime\n formats and describes how you can convert them so that they are\n accepted by DAI:\nRename datasets\nIn Driverless AI, you can rename datasets from the Datasets Overview\npage.", + "prompt_type": "plain" + }, + { + "output": "Exporting Artifacts\nIn some cases, you might find that you do not want your users to\ndownload artifacts directly to their machines. Driverless AI provides\nseveral configuration options/environment variables that enable\nexporting of artifacts instead of downloading. Artifacts can be exported\nto a file system directory, an Amazon S3 bucket, a Bitbucket repository,\nor Azure Blob storage. Note: The option to download artifacts is automatically disabled when\nexporting is enabled. Enabling Artifact Exports\nThe config.toml file exposes the following variables:\n- enable_artifacts_upload: Replace all the downloads on the experiment\n page to exports, and lets users push to the artifact store with\n artifacts_store. This is disabled by default. - artifacts_store: Specify one of the following storage methods:\n - file_system: Store artifacts in the file system directory\n specified by the artifacts_file_system_directory setting. - S3: Store artifacts in the S3 bucket specified by the\n artifacts_s3_bucket setting.", + "prompt_type": "plain" + }, + { + "output": "- azure: Store artifacts in Azure Blob storage. Specify the following for the storage method you selected:\nFile System Directory\n- artifacts_file_system_directory: The file system location where\n artifacts will be copied. This is expected to be a directory on your\n server. AWS S3\n- artifacts_s3_bucket: The AWS S3 bucket where artifacts will be\n stored. Bitbucket\n- bitbucket_skip_cert_verification: Specify whether to skip\n certificate verification for Bitbucket when using a repository with\n HTTPS. This is disabled by default. - bitbucket_tmp_relative_dir: Specify a local temporary directory to\n clone artifacts to (relative to data_directory). Azure Blob Storage\n- artifacts_azure_blob_account_name: Specify your Azure Blob Storage\n account name. - artifacts_azure_blob_account_key: Specify your Azure Blob Storage\n account key. - artifacts_azure_connection_string: Specify your Azure Blob Storage\n connection string. - artifacts_azure_sas_token: Specify your Azure Blob Storage shared\n access signatures (SAS) token.", + "prompt_type": "plain" + }, + { + "output": "enable_dataset_downloading`` configuration option, which is set to", + "prompt_type": "plain" + }, + { + "output": "trueby default. Set this tofalse`` if you do not want users to download\ndatasets to their local machine. There is currently no configuration\noption that enables exporting datasets to a file system. Docker Image Installs\nThe following example shows how to enable artifact exporting to a file\nsystem when starting the Driverless AI Docker image. docker run \\\n --pid=host \\\n --rm \\\n --shm-size=256m \\\n -e DRIVERLESS_AI_ENABLE_ARTIFACTS_UPLOAD=\"true\" \\\n -e DRIVERLESS_AI_ARTIFACTS_STORE=\"file_system\" \\\n -e DRIVERLESS_AI_ARTIFACTS_FILE_SYSTEM_DIRECTORY=\"tmp\" \\\n -u `id -u`:`id -g` \\\n -p 12345:12345 \\\n -v `pwd`/data:/data \\\n -v `pwd`/log:/log \\\n -v `pwd`/license:/license \\\n -v `pwd`/tmp:/tmp \\\n h2oai/dai-ubi8-x86_64:|tag|\nNative Installs\nThe following example shows how to enable artifact exporting to a file\nsystem on native installs. 1. Export the Driverless AI config.toml file or add it to ~/.bashrc. For example:\n # DEB and RPM\n export DRIVERLESS_AI_CONFIG_FILE=\"/etc/dai/config.toml\"\n # TAR SH\n export DRIVERLESS_AI_CONFIG_FILE=\"/path/to/your/unpacked/dai/directory/config.toml\"\n 1.", + "prompt_type": "plain" + }, + { + "output": "Save your changes when you are done. # Replace all the downloads on the experiment page to exports and allow users to push to the artifact store configured with artifacts_store\n enable_artifacts_upload = true\n # Artifacts store. # file_system: stores artifacts on a file system directory denoted by artifacts_file_system_directory. #\n artifacts_store = \"file_system\"\n # File system location where artifacts will be copied in case artifacts_store is set to file_system\n artifacts_file_system_directory = \"tmp\"\n 1. Start Driverless AI. Note that the command used to start\n Driverless AI varies depending on your install type. # Deb or RPM with systemd (preferred for Deb and RPM):\n # Start Driverless AI. sudo systemctl start dai\n # Deb or RPM without systemd:\n # Start Driverless AI. sudo -H -u dai /opt/h2oai/dai/run-dai.sh\n # Tar.sh\n # Start Driverless AI\n ./run-dai.sh\nExporting an Artifact\nWhen the export artifacts options are enabled/configured, the menu\noptions on the completed_experiment page will change.", + "prompt_type": "plain" + }, + { + "output": "AutoDoc Custom Template Placeholders\nThe following sections describe placeholders for AutoDoc's custom\ntemplate feature. Using placeholders\nYou can customize the content that appears in an AutoDoc report by using\nplaceholders. When you insert a placeholder into a template, the content\nunique to that specific placeholder appears in the generated report in\nthe location where you inserted it. A placeholder is defined as follows:\n {{p section.render('placeholder_name')}}\nThe following example shows how to define the Experiment Overview.DAI\nExperiment Pipeline Column Types placeholder:\n {{p section.render('Experiment Overview.DAI Experiment Pipeline Column Types')}}\nList of placeholders\nThe following is a list of available placeholders categories:\n- placeholders_experiment_overview\n- placeholders_data_overview\n- placeholders_methodology\n- placeholders_data_sampling\n- placeholders_validation\n- placeholders_feature_evolution\n- placeholders_feature_transformations\n- placeholders_final_model\n- placeholders_glm\n- placeholders_literature\n- placeholders_mli\n- placeholders_model_tuning\n- placeholders_nlp\n- placeholders_pdp\n- placeholders_appendix\nExperiment Overview\nPlaceholders related to the Experiment Overview:\n -----------------------------------------------------------------------\n Name Description\n ----------------------------------- -----------------------------------\n Experiment Overview.DAI Experiment A table with different column types\n Pipeline Column Types and type descriptions for DAI\n Experiment Overview.DAI Experiment A table of the DAI time series\n Pipeline Time Series settings and definitions for each\n setting\n Experiment Overview.DAI GPU A sentence indicating whether DAI\n Specifications used available GPUs\n Experiment Overview.DAI Intro Model An introductory paragraph on the\n Goal scorer the model is trying to\n optimize\n Experiment Overview.DAI Iterative A section describing the different\n Tuning iterative steps in the DAI\n experiment pipeline (that is,\n model, feature, target tuning, and\n feature evolution)\n Experiment Overview.DAI Validation A documentation-type section that\n Schema Options defines the different types of\n validation strategies available to\n the user\n Experiment Overview.Performance A summary performance table.", + "prompt_type": "plain" + }, + { + "output": "This\n placeholder is used in the standard\n AutoDoc. The content is similar to\n Data Overview.DAI Training Data\n Detailed but has less descriptive\n text and does not include\n information about missing values\n -----------------------------------------------------------------------\nMethodology\nPlaceholders related to Methodology:\n -----------------------------------------------------------------------\n Name Description\n ----------------------------------- -----------------------------------\n Methodology.Assumptions A high-level overview of DAI's\n assumptions and limitations. This\n section includes details about\n whether a shift was detected\n between datasets\n Methodology.DAI Assumptions A section describing whether a user\n Detailed provided a validation dataset and\n whether a shift in distribution\n between datasets was detected.", + "prompt_type": "plain" + }, + { + "output": "Note, permutation feature\n importance must be enabled in the\n AutoDoc expert settings for this\n section to render information\n Feature Transformations.template This template is used to call\n placeholders: Feature\n Transformation.Intro, Feature\n Transformations.Permutation Feature\n Importance, NLP.DAI NLP Detail\n -----------------------------------------------------------------------\nFinal Model\nPlaceholders related to the Final Model:\n -----------------------------------------------------------------------\n Name Description\n ----------------------------------- -----------------------------------\n Final Model.DAI All Feature This placeholder is designed to go\n Transformations in an Appendix section.", + "prompt_type": "plain" + }, + { + "output": "Final Model.DAI Final Model A table with the final model's\n Performance Table performance across available\n scorers\n Final Model.DAI Final Model This template is meant to be called\n Performance Text directly after the Experiment\n Overview.DAI Iterative Tuning\n placeholder. This placeholder\n includes a short paragraph about\n final model selection and a\n performance table\n Final Model.DAI Model and Component This section includes the model\n Table component table (i.e., this\n placeholder calls the Final\n Model.DAI Final Model Components\n Table), which shows information\n like the model type, model weight,\n number of folds, etc.", + "prompt_type": "plain" + }, + { + "output": "This placeholder is\n called by the Final Model.DAI Loss\n Function placeholder\n Final Model.DAI Model Package A table that provides the algorithm\n Description name, package name, version of the\n package and the packages primary\n documentation string. This\n placeholder is called by the Final\n Model.DAI Model Components\n placeholder\n Final Model.DAI Models Evaluated A table with the algorithms\n Table available in DAI and the reason an\n algorithm was or wasn't selected\n for the final model. This\n placeholder is called by the Final\n Model.DAI Model Components\n placeholder\n Final Model.Pipeline Overview This placeholder is called by the\n Final Model.Pipeline placeholder\n and shows a table of the final\n model components.", + "prompt_type": "plain" + }, + { + "output": "Note the local\n interpretation based plots and\n table require that the user\n specifies individual records of\n interest with the Python client's\n individual_rows parameter\n MLI.KLIME Plot A description of kLIME with the\n kLIME plot\n MLI.KLIME Reason Code Text A documentation-type section that\n describes kLIME reason codes\n MLI.Local Interpretability Row This placeholder is only available\n Information if the user-specified\n individual_rows are provided. This\n placeholder is called by the DAI\n MLI Section placeholder\n MLI.Surrogate DT The surrogate Decision Tree plot.", + "prompt_type": "plain" + }, + { + "output": "This\n template is specific to the\n standard AutoDoc\n -----------------------------------------------------------------------\nNatural Language Processing (NLP)\nPlaceholders related to Natural Language Processing (NLP):\n -----------------------------------------------------------------------\n Name Description\n ----------------------------------- -----------------------------------\n NLP.DAI NLP Detail Similar to DAI NLP Assumption, but\n includes information about NLP\n transformer sampling and\n limitations and does not\n distinguish between image and NLP\n transformers (i.e., you will see\n NLP/Image in the body text of this\n sub template).", + "prompt_type": "plain" + }, + { + "output": "This sub\n template includes additional\n explanations about sensitivity\n analysis in general and shows a\n records original feature values\n along with the ICE overlaid PDP. This template expects a user to\n pass in the individual_rows\n parameter to the Python client with\n records of interest\n Partial Dependence Plots.template A section describing how partial\n dependence plots work and showing\n the partial dependence plots. This\n section is used in the standard\n AutoDoc template\n -----------------------------------------------------------------------\nAppendix\nPlaceholders related to the Appendix:\n -----------------------------------------------------------------------\n Name Description\n ----------------------------------- -----------------------------------\n Appendix.DAI Performance Metrics A glossary of DAI performance\n metrics\n Appendix.DAI References A reference for the standard\n AutoDoc.", + "prompt_type": "plain" + }, + { + "output": "Appendix.PSI_Appendix The table used to calculate PSI\n Appendix.Response_Rates_Appendix The quantile-base plots calculation\n table. Appendix.template This template points to the\n Appendix.PSI,\n Appendix.Response_Rates_Appendix,\n and the Appendix.NLP Appendix. If\n the final model is or includes a\n GLM this section also include the\n full GLM coefficients tables and\n the documentation on how to\n understand the GLM coefficients\n table. If a user has set the\n AutoDoc to show all configurations,\n the full configuration table will\n be shown in the appendix.", + "prompt_type": "plain" + }, + { + "output": "Adding datasets\nYou can add datasets using one of the following methods:\nDrag and drop files from your local machine directly onto this page. Note that this method currently works for files that are less than 10\nGB. or\nClick the Add Dataset (or Drag & Drop) button to upload or add a\ndataset. Notes:\n- Upload File, File System, HDFS, S3, Data Recipe URL, and Upload Data\n Recipe are enabled by default. These can be disabled by removing\n them from the enabled_file_systems setting in the config.toml file. (Refer to Using the config.toml file section for more information.) - If File System is disabled, Driverless AI will open a local\n filebrowser by default. - If Driverless AI was started with data connectors enabled for Azure\n Blob Store, BlueData Datatap, Google Big Query, Google Cloud\n Storage, KDB+, Minio, Snowflake, or JDBC, then these options will\n appear in the Add Dataset (or Drag & Drop) dropdown menu. Refer to\n the Enabling Data Connectors section for more information.", + "prompt_type": "plain" + }, + { + "output": "Choosing an Install Method\n\nConsider the following when choosing between the AWS Marketplace and AWS\nCommunity AMIs:\n\nDriverless AI AWS Marketplace AMI\n\n- Native (Debian) install based\n- Certified by AWS\n- Will typically lag behind our standard releases, and may require\n updates to work with the latest versions of Driverless AI\n- Features several default configurations like default password and\n HTTPS configuration, which are required by AWS\n\nDriverless AI AWS Community AMI\n\n- Docker based\n- Not certified by AWS\n- Will typically have an up-to-date version of Driverless AI for both\n LTS and latest stable releases\n- Base Driverless AI installation on Docker does not feature preset\n configurations", + "prompt_type": "plain" + }, + { + "output": "included_transformers------------------------- .. container:: dropdown **Include Specific Transformers** Select the :ref:`transformer(s) ` that you want to use in the experiment. Use the **Check All**/**Uncheck All** button to quickly add or remove all transfomers at once. **Note**: If you uncheck all transformers so that none is selected, Driverless AI will ignore this and will use the default list of transformers for that experiment. This list of transformers will vary for each experiment. The equivalent config.toml parameter isincluded_transformers``.", + "prompt_type": "plain" + }, + { + "output": "included_scorers``\n\nInclude Specific Scorers\n\nSpecify the scorer(s) that you want Driverless AI to include when\nrunning the experiment.", + "prompt_type": "plain" + }, + { + "output": "included_pretransformers---------------------------- .. container:: dropdown **Include Specific Preprocessing Transformers** Specify which :ref:`transformers ` to use for preprocessing before other transformers are activated. Preprocessing transformers can take any original features and output arbitrary features that are used by the normal layer of transformers. **Notes**: - Preprocessing transformers and all other layers of transformers are part of the Python and (if applicable) MOJO scoring packages. - Any :ref:`custom transformer recipe ` or native DAI transformer can be used as a preprocessing transformer. For example, a preprocessing transformer can perform interactions, string concatenations, or date extractions as a preprocessing step before the next layer of Date and DateTime transformations are performed. Caveats: 1) one cannot currently do a time-series experiment on a time_column that hasn't yet been made (setup of experiment only knows about original data, not transformed).", + "prompt_type": "plain" + }, + { + "output": "num_pipeline_layers----------------------- .. container:: dropdown **Number of Pipeline Layers** Specify the number of pipeline layers. This value defaults to 1. The equivalent config.toml parameter isnum_pipeline_layers``.\n\n Note: This does not include the preprocessing layer specified by the\n included_pretransformers expert setting.", + "prompt_type": "plain" + }, + { + "output": "included_datas------------------ .. container:: dropdown **Include Specific Data Recipes During Experiment** Specify whether to include specific data recipes during the experiment. Avoids need for separate data preparation step, builds data preparation within experiment and within python scoring package. But Mojo will require data preparation applied before making predictions. The equivalent config.toml parameter isincluded_datas``.", + "prompt_type": "plain" + }, + { + "output": "included_individuals------------------------ .. container:: dropdown **Include Specific Individuals** In Driverless AI, every completed experiment automatically generates Python code for the experiment that corresponds to the individual(s) used to build the final model. You can edit this auto-generated Python code offline and upload it as a recipe, or edit and save it using the built-in :ref:`custom recipe management editor `. This feature gives you code-first access to a significant portion of DAI's internal transformer and model generation process. This expert setting lets you do one of the following: - Leave this field empty to have all individuals be freshly generated and treated by DAI's AutoML as a container of model and transformer choices. - Select recipe display names of custom individuals through the UI. If the number of included custom individuals is less than DAI needs, then the remaining individuals are freshly generated.", + "prompt_type": "plain" + }, + { + "output": "threshold_scorer``\n\nScorer to Optimize Threshold to Be Used in Other Confusion-Matrix Based\nScorers (For Binary Classification)\n\nSpecify the scorer used to optimize the binary probability threshold\nthat is being used in related Confusion Matrix based scorers such as\nPrecision, Recall, FalsePositiveRate, FalseDiscoveryRate,\nFalseOmissionRate, TrueNegativeRate, FalseNegativeRate, and\nNegativePredictiveValue. Select from the following:\n\n- Auto (Default): Use this option to sync the threshold scorer with\n the scorer used for the experiment. If this is not possible, F1 is\n used.\n- F05 More weight on precision, less weight on recall.\n- F1: Equal weight on precision and recall.\n- F2: Less weight on precision, more weight on recall.\n- MCC: Use this option when all classes are equally important.", + "prompt_type": "plain" + }, + { + "output": "prob_add_genes``\n\nProbability to Add Transformers\n\nSpecify the unnormalized probability to add genes or instances of\ntransformers with specific attributes. If no genes can be added, other\nmutations are attempted. This value defaults to 0.5.", + "prompt_type": "plain" + }, + { + "output": "prob_addbest_genes``\n\nProbability to Add Best Shared Transformers\n\nSpecify the unnormalized probability to add genes or instances of\ntransformers with specific attributes that have shown to be beneficial\nto other individuals within the population. This value defaults to 0.5.", + "prompt_type": "plain" + }, + { + "output": "prob_prune_genes``\n\nProbability to Prune Transformers\n\nSpecify the unnormalized probability to prune genes or instances of\ntransformers with specific attributes. This value defaults to 0.5.", + "prompt_type": "plain" + }, + { + "output": "prob_perturb_xgb``\n\nProbability to Mutate Model Parameters\n\nSpecify the unnormalized probability to change model hyper parameters.\nThis value defaults to 0.25.", + "prompt_type": "plain" + }, + { + "output": "prob_prune_by_features``\n\nProbability to Prune Weak Features\n\nSpecify the unnormalized probability to prune features that have low\nvariable importance instead of pruning entire instances of\ngenes/transformers. This value defaults to 0.25.", + "prompt_type": "plain" + }, + { + "output": "skip_transformer_failures``\n\nWhether to Skip Failures of Transformers\n\nSpecify whether to avoid failed transformers. This is enabled by\ndefault.", + "prompt_type": "plain" + }, + { + "output": "skip_model_failures``\n\nWhether to Skip Failures of Models\n\nSpecify whether to avoid failed models. Failures are logged according to\nthe specified level for logging skipped failures. This is enabled by\ndefault.", + "prompt_type": "plain" + }, + { + "output": "detailed_skip_failure_messages_level``\n\nLevel to Log for Skipped Failures\n\nSpecify one of the following levels for the verbosity of log failure\nmessages for skipped transformers or models:\n\n- 0 = Log simple message\n- 1 = Log code line plus message (Default)\n- 2 = Log detailed stack traces", + "prompt_type": "plain" + }, + { + "output": "notify_failures------------------- .. container:: dropdown **Whether to Notify About Failures of Transformers or Models or Other Recipe Failures** Specify whether to display notifications in the GUI about recipe failures. This is enabled by default. The equivalent config.toml parameter isnotify_failures``.", + "prompt_type": "plain" + }, + { + "output": "acceptance_test_timeout``\n\nTimeout in Minutes for Testing Acceptance of Each Recipe\n\nSpecify the number of minutes to wait until a recipe's acceptance\ntesting is aborted. A recipe is rejected if acceptance testing is\nenabled and it times out. This value defaults to 20.0.", + "prompt_type": "plain" + }, + { + "output": "Experiment Settings\nThis section describes the settings that are available when running an\nexperiment. Display Name\nOptional: Specify a display name for the new experiment. There are no\ncharacter or length restrictions for naming. If this field is left\nblank, Driverless AI will automatically generate a name for the\nexperiment. Dropped Columns\nDropped columns are columns that you do not want to be used as\npredictors in the experiment. Note that Driverless AI will automatically\ndrop ID columns and columns that contain a significant number of unique\nvalues (above max_relative_cardinality in the config.toml file or Max. allowed fraction of uniques for integer and categorical cols in Expert\nsettings). Validation Dataset\nThe validation dataset is used for tuning the modeling pipeline. If\nprovided, the entire training data will be used for training, and\nvalidation of the modeling pipeline is performed with only this\nvalidation dataset. When you do not include a validation dataset,\nDriverless AI will do K-fold cross validation for I.I.D.", + "prompt_type": "plain" + }, + { + "output": "For this reason it is not generally recommended to include a validation\ndataset as you are then validating on only a single dataset. Note that\ntime series experiments cannot be used with a validation dataset:\nincluding a validation dataset will disable the ability to select a time\ncolumn and vice versa. This dataset must have the same number of columns (and column types) as\nthe training dataset. Also note that if provided, the validation set is\nnot sampled down, so it can lead to large memory usage, even if\naccuracy=1 (which reduces the train size). Test Dataset\nThe test dataset is used for testing the modeling pipeline and creating\ntest predictions. The test set is never used during training of the\nmodeling pipeline. (Results are the same whether a test set is provided\nor not.) If a test dataset is provided, then test set predictions will\nbe available at the end of the experiment. Weight Column\nOptional: Column that indicates the observation weight (a.k.a. sample or\nrow weight), if applicable.", + "prompt_type": "plain" + }, + { + "output": "Rows with higher weights have higher importance. The weight affects\nmodel training through a weighted loss function and affects model\nscoring through weighted metrics. The weight column is not used when\nmaking test set predictions, but a weight column (if specified) is used\nwhen computing the test score. Note: The weight column is not used as a feature in modeling. Fold Column\nOptional: Rows with the same value in the fold column represent groups\nthat should be kept together in the training, validation, or\ncross-validation datasets. This can prevent data leakage and improve\ngeneralization for data that is naturally grouped and not i.i.d. (identically and independently distributed). This column must be an\ninteger or categorical variable, and it cannot be specified if a\nvalidation set is used or if a Time Column is specified. By default, Driverless AI assumes that the dataset is i.i.d. and creates\nvalidation datasets randomly for regression or with stratification of\nthe target variable for classification.", + "prompt_type": "plain" + }, + { + "output": "This can prevent data leakage and improve generalization. For example,\nwhen viewing data for a pneumonia dataset, person_id would be a good\nFold Column. This is because the data may include multiple diagnostic\nsnapshots per person, and we want to ensure that the same person\u2019s\ncharacteristics show up only in either the training or validation\nframes, but not in both to avoid data leakage. This column must be an integer or categorical variable and cannot be\nspecified if a validation set is used or if a Time Column is specified. Note: The fold column is not used as a feature in modeling. Time Column\nOptional: Specify a column that provides a time order (time stamps for\nobservations), if applicable. This can improve model performance and\nmodel validation accuracy for problems where the target values are\nauto-correlated with respect to the ordering (per time-series group). The values in this column must be a datetime format understood by\npandas.to_datetime(), like \"2017-11-29 00:30:35\" or \"2017/11/29\", or\ninteger values.", + "prompt_type": "plain" + }, + { + "output": "If a time column is found, feature engineering and model\nvalidation will respect the causality of time. If [OFF] is selected, no\ntime order is used for modeling and data may be shuffled randomly (any\npotential temporal causality will be ignored). When your data has a date column, then in most cases, specifying [AUTO]\nfor the Time Column will be sufficient. However, if you select a\nspecific date column, then Driverless AI will provide you with an\nadditional side menu. From this side menu, you can specify Time Group\ncolumns or specify [Auto] to let Driverless AI determine the best time\ngroup columns. You can also specify the columns that will be unavailable\nat prediction time (see ucapt for more information), the Forecast\nHorizon (in a unit of time identified by Driverless AI), and the Gap\nbetween the train and test periods. Refer to time-series-in-dai for more information about time series\nexperiments in Driverless AI and to see a time series example. []\nNotes:\n- Engineered features will be used for MLI when a time series\n experiment is built.", + "prompt_type": "plain" + }, + { + "output": "- A Time Column cannot be specified if a Fold Column is specified. This is because both fold and time columns are only used to split\n training datasets into training/validation, so once you split by\n time, you cannot also split with the fold column. If a Time Column\n is specified, then the time group columns play the role of the fold\n column for time series. - A Time Column cannot be specified if a validation dataset is used. - A column that is specified as being unavailable at prediction time\n will only have lag-related features created for (or with) it. - Unavailable Columns at Time of Prediction will only have lag-related\n features created for (or with) it, so this option is only used when\n time-series-lag-based-recipe is enabled. Accuracy, Time, and Interpretability Knobs\nThe experiment preview describes what the Accuracy, Time, and\nInterpretability settings mean for your specific experiment. This\npreview automatically updates when any of the experiment's settings\nchange (including the knobs).", + "prompt_type": "plain" + }, + { + "output": "Usually\n achieved through the use of larger data (less sampling), more\n modeling effort (more tuning, higher accuracy settings), more\n statistical calculations (cross-validation, bootstrapping). Doesn't always mean that the final model is better, but generally\n means that the final estimate is more accurate. If in doubt, trust\n the results of the experiment with higher accuracy settings. - The Time knob stands for relative time tolerance: Higher values\n generally lead to longer run times. Indicates patience to wait for\n convergence of the experiment score. Larger values mean higher\n chance of getting a better model. If it takes too long, just click\n on 'Finish' button and it will finish the experiment as if\n convergence was achieved. - The Interpretability knob stands for relative interpretability:\n Higher values favor more interpretable models (e.g. linear models,\n decision trees, single models) with less complex feature\n engineering (fewer features, simple features).", + "prompt_type": "plain" + }, + { + "output": "neural networks, GBMs, ensembles) and\n more complex feature pipelines (more features, higher-order\n interaction features). Note\n- You can manually select individual features to force into an\nexperiment\u2014regardless of Accuracy, Time, and Interpretability\nlevels\u2014with the Features to Force In expert setting. - To adjust the lowest allowed variable importance that features can\nhave before being dropped, use the\nLowest Allowed Variable Importance at Interpretability 10 \nexpert setting. [Accuracy, Time, and Interpretability Knobs]\n[Experiment Preview]\nAccuracy\nAs accuracy increases, Driverless AI gradually adjusts the method for\nperforming the evolution and ensemble. At low accuracy, Driverless AI\nvaries features and models, but they all compete evenly against each\nother. At higher accuracy, each independent main model will evolve\nindependently and be part of the final ensemble as an ensemble over\ndifferent main models.", + "prompt_type": "plain" + }, + { + "output": "Finally, at highest accuracies, Driverless AI\nperforms both model and feature tracking and ensembles all those\nvariations. Changing this value affects the feature evolution and final pipeline. Note: A check for a shift in the distribution between train and test is\ndone for accuracy >= 5. Training data size: Displays the number of rows and columns in the\ntraining data. Feature evolution: This represents the algorithms used to create the\nexperiment. If a test set is provided without a validation set, then\nDriverless AI will perform a 1/3 validation split during the experiment. If a validation set is provided, then the experiment will perform\nexternal validation. Final pipeline: This represents the number of models and the validation\nmethod used in the final pipeline. For ensemble modeling, information\nabout how models are combined is also shown here. Time\nThis specifies the relative time for completing the experiment (that is,\nhigher settings take longer). Feature Brain Level: Displays the feature brain level for the\nexperiment.", + "prompt_type": "plain" + }, + { + "output": "Feature evolution: Displays the number of individuals and maximum number\nof iterations that will be run in this experiment. Early stopping: Early stopping will take place if the experiment doesn't\nimprove the score for the specified amount of iterations. Interpretability\nSpecify the relative interpretability for this experiment. Higher values\nfavor more interpretable models. Changing the interpretability level\naffects the feature pre-pruning strategy, monotonicity constraints, and\nthe feature engineering search space. Feature pre-pruning strategy: This represents the feature selection\nstrategy (to prune-away features that do not clearly give improvement to\nmodel score). Strategy = \u201cPermutation Importance FS\u201d if interpretability\n>= 6; otherwise strategy is None. Monotonicity constraints: If Monotonicity Constraints are enabled, the\nmodel will satisfy knowledge about monotonicity in the data and monotone\nrelationships between the predictors and the target variable. For\nexample, in house price prediction, the house price should increase with\nlot size and number of rooms, and should decrease with crime rate in the\narea.", + "prompt_type": "plain" + }, + { + "output": "Depending on the correlation, Driverless AI will assign positive,\nnegative, or no monotonicity constraints. Monotonicity is enforced if\nthe absolute correlation is greater than 0.1. All other predictors will\nnot have monotonicity enforced. For more information, see mc. Note: Monotonicity constraints are used in XGBoost GBM, XGBoost Dart,\n LightGBM, and Decision Tree models. Feature engineering search space: This represents the transformers that\nwill be used during the experiment. [...] Models to Train\nFor the listed models:\n Model and feature tuning: Represents the number of validation splits\n multiplied by the tuning population size. Feature evolution: Represents the number of models trained in order to\n evaluate engineered features. Final pipeline: Represents the number of final models. Per-model hyperparameter optimization trials:\n - evolution - Represents the number of trials performed for\n hyperparameter optimization for tuning models. - final - Represents the number of trials performed for\n hyperparameter optimization for final models.", + "prompt_type": "plain" + }, + { + "output": "Though not recommended, you can override this setting\nby clicking this button. Reproducible\nThe Reproducible toggle lets you build an experiment with a random seed\nand get reproducible results. If this is disabled (default), then\nresults vary between runs, which can give a good sense of variance among\nexperiment results. When enabling this option, keep the following notes in mind:\n- Experiments are only reproducible when run on the same hardware\n (that is, using the same number and type of GPUs/CPUs and the same\n architecture). For example, you will not get the same results if you\n try an experiment on a GPU machine, and then attempt to reproduce\n the results on a CPU-only machine or on a machine with a different\n number and type of GPUs. - This option should be used with the reproducibility_level expert\n setting option, which ensures different degrees of reproducibility\n based on the OS and environment architecture. Keep in mind that when\n Reproducibility is enabled, then reproducibility_level=1 by default.", + "prompt_type": "plain" + }, + { + "output": "Dask Redis Multinode Example\nDask Multinode Example running docker\nOn main server with public IP address 172.16.2.210:\n mkdir -p /home/$USER/docker/data ; chmod u+rwx /home/$USER/docker/data\n mkdir -p /home/$USER/docker/log ; chmod u+rwx /home/$USER/docker/log\n mkdir -p /home/$USER/docker/tmp ; chmod u+rwx /home/$USER/docker/tmp\n mkdir -p /home/$USER/docker/license ; chmod u+rwx /home/$USER/docker/license\n mkdir -p /home/$USER/docker/jupyter/notebooks\n cp /home/$USER/.driverlessai/license.sig /home/$USER/docker/license/\n export server=172.16.2.210\n docker run \\\n --net host \\\n --runtime nvidia \\\n --rm \\\n --init \\\n --pid=host \\\n --gpus all \\\n --ulimit core=-1 \\\n --shm-size=2g \\\n -u `id -u`:`id -g` \\\n -v /etc/passwd:/etc/passwd:ro \\\n -v /etc/group:/etc/group:ro \\\n -v /home/$USER/docker/license:/license \\\n -v /home/$USER/docker/data:/data \\\n -v /home/$USER/docker/log:/log \\\n -v /home/$USER/docker/tmp:/tmp \\\n -v /home/$USER/docker/jupyter:/jupyter \\\n -e dai_dask_server_ip=$server \\\n -e dai_redis_ip=$server \\\n -e dai_redis_port=6379 \\\n -e dai_main_server_minio_address=$server:9001 \\\n -e dai_local_minio_port=9001 \\\n -e dai_ip=$server \\\n -e dai_main_server_redis_password=\"\" \\\n -e dai_worker_mode='multinode' \\\n -e dai_enable_dask_cluster=1 \\\n -e dai_enable_jupyter_server=1 \\\n -e dai_enable_jupyter_server_browser=1 \\\n -e NCCL_SOCKET_IFNAME=\"enp5s0\" \\\n -e NCCL_DEBUG=WARN \\\n -e NCCL_P2P_DISABLE=1 \\\n docker_image\nThe preceding example launches the following:\n- DAI main server on 12345\n- MinIO data server on 9001\n- Redis server on 6379\n- H2O-3 MLI server on 12348\n- H2O-3 recipe server on 50361\n- Juypter on 8889\n- Dask CPU scheduler on 8786\n- Dask CPU scheduler's dashboard on 8787\n- Dask GPU scheduler on 8790\n- Dask GPU scheduler's dashboard on 8791\n- LightGBM Dask listening port on 12400\nNotes:\n- (1) $USER in bash gives the username.", + "prompt_type": "plain" + }, + { + "output": "- (3) Replace various ports with alternative values if required. - (4) Replace docker_image with the image (include repository if\n remote image). - (5) For GPU usage, --runtime nvidia is required. Systems without\n GPUs should remove this line. - (6) Dask on cluster can be disabled by passing\n dai_enable_dask_cluster=0. If Dask on cluster is disabled, then\n dai_dask_server_ip does not need to be set. - (7) Dask dashboard ports (for example, 8787 and 8791) and H2O-3\n ports 12348, 50361, and 50362 are not required to be exposed. These are for user-level access to H2O-3 or Dask behavior. - (8) Jupyter can be disabled by passing dai_enable_jupyter_server=0\n and dai_enable_jupyter_server_browser=0. - (9) Dask requires the host network be used so scheduler can tell\n workers where to find other workers, so a subnet on new IP\n cannot be used, e.g. with\n docker network create --subnet=192.169.0.0/16 dainet. - (10) To isolate user access to single user, instead of doing\n -v /etc/passwd:/etc/passwd:ro -v /etc/group:/etc/group:ro one\n can map to user files with the same required information.", + "prompt_type": "plain" + }, + { + "output": "- (11) Directories created should have not existed or should be from a\n prior run by same user. Pre-existing directories should be\n moved or names changed to avoid conflicts. - (12) Services like the Procsy server, H2O-3 MLI and Recipe servers,\n and Vis-data server are only used internally for each node. - (13) The options -p 12400:12400 is only required to LightGBM Dask. - (14) NCCL_SOCKET_IFNAME should specify the actual hardware device to\n use, as required due to issues with NCCL obtaining the correct\n device automatically from IP. On any number of workers for server with public IP address 172.16.2.210:\n mkdir -p /home/$USER/docker/log ; chmod u+rwx /home/$USER/docker/log\n mkdir -p /home/$USER/docker/tmp ; chmod u+rwx /home/$USER/docker/tmp\n export server=172.16.2.210\n docker run \\\n --runtime nvidia \\\n --gpus all \\\n --rm \\\n --init \\\n --pid=host \\\n --net host \\\n --ulimit core=-1 \\\n --shm-size=2g \\\n -u `id -u`:`id -g` \\\n -v /etc/passwd:/etc/passwd:ro \\\n -v /etc/group:/etc/group:ro \\\n -v /home/$USER/docker/log:/log \\\n -v /home/$USER/docker/tmp:/tmp \\\n -e dai_dask_server_ip=$server \\\n -e dai_redis_ip=$server \\\n -e dai_redis_port=6379 \\\n -e dai_main_server_minio_address=$server:9001 \\\n -e dai_local_minio_port=9001 \\\n -e dai_ip=$server \\\n -e dai_main_server_redis_password=\"\" \\\n -e dai_worker_mode='multinode' \\\n -e dai_enable_dask_cluster=1 \\\n -e NCCL_SOCKET_IFNAME=\"enp4s0\" \\\n -e NCCL_DEBUG=WARN \\\n -e NCCL_P2P_DISABLE=1 \\\n docker_image --worker\nNotes:\n- (1) If same disk is used for main server and worker, change \"docker\"\n to \"docker_w1\" for worker 1, etc.", + "prompt_type": "plain" + }, + { + "output": "Dask Multinode Example running tar\nOn main server with public IP address 172.16.2.210:\n export DRIVERLESS_AI_LICENSE_FILE=/home/$$USER/.driverlessai/license.sig\n export server=172.16.2.210\n NCCL_SOCKET_IFNAME=\"enp5s0\" \\\n NCCL_DEBUG=WARN \\\n NCCL_P2P_DISABLE=1 \\\n dai_dask_server_ip=$server dai_redis_ip=$server dai_redis_port=6379 \\\n dai_main_server_minio_address=$server:9001 dai_ip=$server dai_main_server_redis_password=\"\" \\\n dai_worker_mode='multinode' dai_enable_dask_cluster=1 \\\n dai_enable_jupyter_server=1 dai_enable_jupyter_server_browser=1 \\\n /opt/h2oai/dai/dai-env.sh python -m h2oai &> multinode_main.txt\nOn each worker node, run the exact same command but with --worker added\nat the end, i.e. :\n export DRIVERLESS_AI_LICENSE_FILE=/home/$$USER/.driverlessai/license.sig\n export server=172.16.2.210\n NCCL_SOCKET_IFNAME=\"enp4s0\" \\\n NCCL_DEBUG=WARN \\\n NCCL_P2P_DISABLE=1 \\\n dai_dask_server_ip=$server dai_redis_ip=$server dai_redis_port=6379 \\\n dai_main_server_minio_address=$server:9001 dai_ip=$server dai_main_server_redis_password=\"\" \\\n dai_worker_mode='multinode' dai_enable_dask_cluster=1 \\\n /opt/h2oai/dai/dai-env.sh python -m h2oai --worker &> multinode_worker.txt\nNotes:\n- (1) In this example, address 172.16.2.210 needs to be the public IP\n associated with the network device to use for communication.", + "prompt_type": "plain" + }, + { + "output": "MLI for Regular (Non-Time-Series) Experiments\n\nThis section describes MLI functionality and features for regular\nexperiments. Refer to interpret-ts for MLI information with time-series\nexperiments.\n\ninterpret-a-model interpret-expert-settings\ninterpret-explainer-expert-settings interpret-understanding\nviewing-explanations interpret-general-considerations", + "prompt_type": "plain" + }, + { + "output": "Updating Licenses\nIf your current Driverless AI license has expired, you will be required\nto update it in order to continue running Driverless AI, in order to run\nthe scoring pipeline, in order to access deployed pipelines to AWS\nLambdas, etc. Updating the License for Driverless AI\nSimilar to adding a license for the first time, you can update your\nlicense for running Driverless AI either by replacing your current\nlicense.sig file or via the Web UI. Updating the license.sig File\nUpdate the license key in your\n/opt/h2oai/dai/home/.driverlessai/license.sig file by replacing the\nexisting license with your new one. Updating the License in the Web UI\nIf your license is expired, the Web UI will prompt you to enter a new\none. The steps are the same as adding a license for the first time via\nthe Driverless AI Web UI. Updating the License for Scoring Pipelines\nFor the Python Scoring Pipeline, include the updated license file when\nsetting the environment variable in Python. Refer to the above\npython_scoring_license section for adding licenses.", + "prompt_type": "plain" + }, + { + "output": "This is the same as adding a license for the\nfirst time. Refer to the above mojo_scoring_license section for adding\nlicenses. Updating Driverless AI Licenses on AWS Lambda\nUsers can manually update each of their Driverless AI licenses deployed\nin production on AWS Lambda. For users with many MOJOs in production,\nthough, H2O provides a script that will update Driverless AI licenses\nfor all of your MOJOs currently deployed on AWS Lambda. Manual Update\nThe Driverless AI deployment pipeline to AWS Lambdas explicitly sets the\nlicense key as an environment variable. Replace the expired license key\nwith your updated one. []\nAutomatic Update\nH2O provides a script that can be used to update Driverless AI licenses\nfor all of your MOJOs deployed on a specific AWS Lambda region. This\nscript can be run for any machine. Requirements\n- New Driverless AI license\n- The following Python packages are required for this script:\n - boto3\n - argparse\n - os\nUpdate Steps\nPerform the following steps to update your Driverless AI license for\nMOJOs on AWS Lambda.", + "prompt_type": "plain" + }, + { + "output": "Variable importance in Driverless AI\nGlobal Feature Importance\n- Model Specific Feature Importance: After completion of an experiment\n Driverless AI, reports the variable importance that is model or\n algorithm specific. For example for Tree based models, this\n importance is gain based. i.e It computes the average reduction in\n impurity across all trees in the forest due to each feature. Features that tend to split nodes closer to the root of a tree have\n a larger importance value. For say an n fold model the variable\n importance is averaged across the folds, normalized and reported. For an ensemble model, the importance is multiplied by the\n respective model weights and normalized. - Permutation Feature Importance: Permutation-based feature importance\n is a model-agnostic approach. After evaluating the performance or\n scoring a model, if you permute (shuffle) the values of a feature of\n interest and re-evaluate model performance, the observed mean\n difference in performance indicates feature\u2019s absolute permutation\n importance.", + "prompt_type": "plain" + }, + { + "output": "If a\n feature is highly predictive, however, shuffling its values should\n decrease the model\u2019s performance. ref. Driverless AI applies permutation based feature importance for\n upfront feature selection before genetic algorithm when the\n feature space is large. Local Feature Importance\n- LIME: Local interpretable model-agnostic explanations (LIME) is a\n model agnostic technique aiming to explain which features are most\n important in specific areas of the feature space. The main idea of\n LIME is to compute a local surrogate model in the area of interest. This surrogate model is an easily interpretable model such as a\n linear model or a decision tree trained to mimic the behavior of the\n more complex model of interest. For a specific prediction you want\n to explain, LIME slightly changes the values to create new data\n points that are similar. By feeding these perturbed data points to\n the complex model a relation between the the perturbed features and\n the model prediction emerges which is then captured by the surrogate\n model.", + "prompt_type": "plain" + }, + { + "output": "- Shapley: Shapley values can be used for local feature importance. They can be used to explain which feature(s) contribute most to a\n specific prediction, say fraud or not fraud. Shapley values are not\n designed to answer the \"what if\" questions that LIME\u2019s local\n surrogate models are designed for. Shapely has its origin in game theory where the problem at hand is\n to determine a fair payoff for all players in the team based on\n their individual capabilities or performance. Shapley value is\n defined as an average expected marginal contribution of one player\n after all possible combinations have been considered. A marginal\n contribution is defined as a value of the group with the player as a\n member minus the value of the group without the player minus the\n value created by the player working alone. As considering all possible subsets (or combinations) of features is\n computationally prohibitive in most realistic models with many\n features, Shapley value approximations are computed based on\n sampling.", + "prompt_type": "plain" + }, + { + "output": "Adding Licenses for the First Time\nSpecifying a License File for the Driverless AI Application\nA license file to run Driverless AI can be added in one of three ways\nwhen starting Driverless AI. - Specifying the license.sig file during launch in native installs\n- Using the DRIVERLESS_AI_LICENSE_FILE and DRIVERLESS_AI_LICENSE_KEY\n environment variables when starting the Driverless AI Docker image\n- Uploading your license in the Web UI\nSpecifying the license.sig File During Launch\nBy default, Driverless AI looks for a license key in\n/opt/h2oai/dai/home/.driverlessai/license.sig. If you are installing\nDriverless AI programmatically, you can copy a license key file to that\nlocation. If no license key is found, the application will prompt you to\nadd one via the Web UI. Specifying Environment Variables\nYou can use the DRIVERLESS_AI_LICENSE_FILE or DRIVERLESS_AI_LICENSE_KEY\nenvironment variable when starting the Driverless AI Docker image. For\nexample:\n nvidia-docker run \\\n --pid=host \\\n --rm \\\n --shm-size=256m \\\n -u `id -u`:`id -g` \\\n -p 12345:12345 \\\n -e DRIVERLESS_AI_LICENSE_FILE=\"/license/license.sig\" \\\n -v `pwd`/config:/config \\\n -v `pwd`/data:/data \\\n -v `pwd`/log:/log \\\n -v `pwd`/license:/license \\\n -v `pwd`/tmp:/tmp \\\n h2oai/dai-ubi8-x86_64:|tag|\nor\n nvidia-docker run \\\n --pid=host \\\n --rm \\\n --shm-size=256m \\\n -u `id -u`:`id -g` \\\n -p 12345:12345 \\\n -e DRIVERLESS_AI_LICENSE_KEY=\"Y0uRl1cens3KeyH3re\" \\\n -v `pwd`/config:/config \\\n -v `pwd`/data:/data \\\n -v `pwd`/log:/log \\\n -v `pwd`/license:/license \\\n -v `pwd`/tmp:/tmp \\\n h2oai/dai-ubi8-x86_64:|tag|\nUploading Your License in the Web UI\nIf Driverless AI does not locate a license.sig file during launch, then\nthe UI will prompt you to enter your license key after you log in the\nfirst time.", + "prompt_type": "plain" + }, + { + "output": "Click Save when you are done. Upon\nsuccessful completion, you will be able to begin using Driverless AI. []\nSpecifying a License for Scoring Pipelines\nWhen deploying models to production, Driverless AI requires a license to\nbe specified in order to run both the Python and MOJO Scoring Pipelines. Python Scoring Pipeline\nThe license can be specified via an environment variable in Python:\n # Set DRIVERLESS_AI_LICENSE_FILE, the path to the Driverless AI license file\n %env DRIVERLESS_AI_LICENSE_FILE=\"/home/ubuntu/license/license.sig\"\n # Set DRIVERLESS_AI_LICENSE_KEY, the Driverless AI license key (Base64 encoded string)\n %env DRIVERLESS_AI_LICENSE_KEY=\"oLqLZXMI0y...\"\nYou can also export the license file when running the scoring pipeline:\n export DRIVERLESS_AI_LICENSE_FILE=\"/path/to/license.sig\"\n bash run_example.sh\nMOJO Scoring Pipeline\nDriverless AI requires a license to be specified in order to run the\nMOJO Scoring Pipeline. The license can be specified in one of the\nfollowing ways:\n- Via an environment variable:\n - DRIVERLESS_AI_LICENSE_FILE: Path to the Driverless AI license\n file, or\n - DRIVERLESS_AI_LICENSE_KEY: The Driverless AI license key\n (Base64 encoded string)\n- Via a system property of JVM (-D option):\n - ai.h2o.mojos.runtime.license.file: Path to the Driverless AI\n license file, or\n - ai.h2o.mojos.runtime.license.key: The Driverless AI license\n key (Base64 encoded string)\n- Via an application classpath:\n - The license is loaded from a resource called /license.sig.", + "prompt_type": "plain" + }, + { + "output": "Enabling Notifications\nDriverless AI can be configured to trigger a user-defined script at the\nbeginning and end of an experiment. This functionality can be used to\nsend notifications to services like Slack or to trigger a machine\nshutdown. The config.toml file exposes the following variables:\n- listeners_experiment_start: Registers an absolute location of a\n script that gets executed at the start of an experiment. - listeners_experiment_done: Registers an absolute location of a\n script that gets executed when an experiment is finished\n successfully. Driverless AI accepts any executable as a script. (For example, a script\ncan be implemented in Bash or Python.) There are only two requirements:\n- The specified script can be executed. (i.e., The file has executable\n flag.) - The script should be able to accept command line parameters. Script Interfaces\nWhen Driverless AI executes a script, it passes the following parameters\nas a script command line:\n- Application ID: A unique identifier of a running Driverless AI\n instance.", + "prompt_type": "plain" + }, + { + "output": "setuidbit set up together with executable bit. For more info, visit: https://unix.stackexchange.com/questions/85663/poweroff-or-reboot-as-normal-user.) Theon_startScript ~~~~~~~~~~~~~~~~~~~~~~~ This script increases the counter of running experiments. :: #!/usr/bin/env bash app_id=\"${1}\" experiment_id=\"${3}\" tmp_dir=\"${TMPDIR:-/tmp}/${app_id}\" exp_file=\"${tmp_dir}/${experiment_id}\" mkdir -p \"${tmp_dir}\" touch \"${exp_file}\" Theon_doneScript ~~~~~~~~~~~~~~~~~~~~~~ This script decreases the counter and executes machine shutdown when the counter reaches 0-value. :: #!/usr/bin/env bash app_id=\"${1}\" experiment_id=\"${3}\" tmp_dir=\"${TMPDIR:-/tmp}/${app_id}\" exp_file=\"${tmp_dir}/${experiment_id}\" if [ -f \"${exp_file}\" ]; then rm -f \"${exp_file}\" fi running_experiments=$(ls -1 \"${tmp_dir}\" | wc -l) if [ \"${running_experiments}\" -gt 0 ]; then echo \"There is still ${running_experiments} running experiments!\"", + "prompt_type": "plain" + }, + { + "output": "Machine is going to shutdown!\" # Use instance meta-data API to get instance ID and then use AWS CLI to shutdown the machine # This expects, that AWS CLI is properly configured and has capability to shutdown instances enabled. aws ec2 stop-instances --instance-ids $(curl http://169.254.169.254/latest/meta-data/instance-id) fi .. container:: tabs .. group-tab:: Docker Image Installs 1. Copy the config.toml file from inside the Docker image to your local filesystem. (Changenvidia-docker runtodocker runfor non-GPU environments.) .. .. code:: bash # In your Driverless AI folder (for exmaple, dai_1.5.1), # make config and scripts directories mkdir config mkdir scripts # Copy the config.toml file to the new config directory. nvidia-docker run \\ --pid=host \\ --rm \\ -u `id -u`:`id -g` \\ -v `pwd`/config:/config \\ --entrypoint bash \\ h2oai/dai-ubi8-x86_64:|tag| -c \"cp /etc/dai/config.toml /config\" 2.", + "prompt_type": "plain" + }, + { + "output": "Note that in this example, the scripts are saved to a **dai_VERSION/scripts** folder. .. :: # Notification scripts # - the variable points to a location of script which is executed at given event in experiment lifecycle # - the script should have executable flag enabled # - use of absolute path is suggested # The on experiment start notification script location listeners_experiment_start = \"dai_VERSION/scripts/on_start.sh\" # The on experiment finished notification script location listeners_experiment_done = \"dai_VERSION/scripts/on_done.sh\" 3. Start Driverless AI with the DRIVERLESS_AI_CONFIG_FILE environment variable. Make sure this points to the location of the edited config.toml file so that the software finds the configuration file. (Changenvidia-docker runtodocker run`` for non-GPU\n environments.) nvidia-docker run \\\n --pid=host \\\n --rm \\\n -u `id -u`:`id -g` \\\n -e DRIVERLESS_AI_CONFIG_FILE=\"/config/config.toml\" \\\n -v `pwd`/config:/config \\\n -v `pwd`/data:/data \\\n -v `pwd`/log:/log \\\n -v `pwd`/license:/license \\\n -v `pwd`/tmp:/tmp \\\n -v `pwd`/scripts:/scripts \\\n h2oai/dai-ubi8-x86_64:|tag|\n Native Installs\n 4.", + "prompt_type": "plain" + }, + { + "output": "For example:\n # DEB and RPM\n export DRIVERLESS_AI_CONFIG_FILE=\"/etc/dai/config.toml\"\n # TAR SH\n export DRIVERLESS_AI_CONFIG_FILE=\"/path/to/your/unpacked/dai/directory/config.toml\"\n 2. Edit the Notification scripts section in the config.toml file to\n point to the new scripts. Save your changes when you are done. # Notification scripts\n # - the variable points to a location of script which is executed at given event in experiment lifecycle\n # - the script should have executable flag enabled\n # - use of absolute path is suggested\n # The on experiment start notification script location\n listeners_experiment_start = \"/opt/h2oai/dai/scripts/on_start.sh\"\n # The on experiment finished notification script location\n listeners_experiment_done = \"/opt/h2oai/dai/scripts/on_done.sh\"\n 3. Start Driverless AI. Note that the command used to start\n Driverless AI varies depending on your install type. # Deb or RPM with systemd (preferred for Deb and RPM):\n # Start Driverless AI.", + "prompt_type": "plain" + }, + { + "output": "Supported file types\nDriverless AI supports the following dataset file formats:\n- arff\n- avro\n- bin\n- bz2\n- csv (See note below)\n- dat\n- feather\n- gz\n- jay (See note below)\n- orc (See notes below)\n- parquet (See notes below)\n- pickle / pkl (See note below)\n- tgz\n- tsv\n- txt\n- xls\n- xlsx\n- xz\n- zip\nNote\n- Compressed Parquet files are typically the most efficient file type to\nuse with Driverless AI. - CSV in UTF-16 encoding is only supported when\nimplemented with a byte order mark (BOM). If a BOM is not present, the\ndataset is read as UTF-8. - For ORC and Parquet file formats, if you\nselect to import multiple files, those files will be imported as\nmultiple datasets. If you select a folder of ORC or Parquet files, the\nfolder will be imported as a single dataset. Tools like Spark/Hive\nexport data as multiple ORC or Parquet files that are stored in a\ndirectory with a user-defined name. For example, if you export with\nSpark dataFrame.write.parquet(\"/data/big_parquet_dataset\"), Spark\ncreates a folder /data/big_parquet_dataset, which will contain multiple\nParquet files (depending on the number of partitions in the input\ndataset) and metadata.", + "prompt_type": "plain" + }, + { + "output": "-\nFor ORC and Parquet file formats, you may receive a \"Failed to ingest\nbinary file with ORC / Parquet: lists with structs are not supported\"\nerror when ingesting an ORC or Parquet file that has a struct as an\nelement of an array. This is because PyArrow cannot handle a struct\nthat's an element of an array. - A workaround to flatten Parquet files\nis provided in Sparkling Water. Refer to our Sparkling Water solution\nfor more information. - To use Parquet files that have columns with list\ntype, the data_import_explode_list_type_columns_in_parquet\nconfig.toml option must be set to true. (Note that\nthis setting is disabled by default.) When this option is enabled,\ncolumns with list type are \"exploded\" into separate new columns. That\nis, each list in a cell is split into separate items which are then used\nto create new columns. Refer to the following image for a visual\nrepresentation of this process:\n[]\n- You can create new datasets from Python script files (custom\n recipes) by selecting Data Recipe URL or Upload Data Recipe from the\n Add Dataset (or Drag & Drop) dropdown menu.", + "prompt_type": "plain" + }, + { + "output": "Snowflake Integration\nOverview\nThis document describes how to use the external function feature of\nSnowflake to invoke Driverless AI models as HTTP REST API endpoints. Using the external function requires some setup and configuration in\nSnowflake and Amazon. For more information, refer to the Snowflake\ndocumentation on external functions. Note\nDownloads:\n- Download the Driverless AI Snowflake Java UDF. - Download the Driverless AI Snowflake external function\n (dai-snowflake-integration.tgz). The setup process for the Java UDF is typically easier than for the\nexternal function. []\nRequirements\n1. Snowflake login credentials\n2. Amazon EC2 login credentials\n3. Driverless AI MOJO (pipelineSF.mojo)\n - Included in the demo files\n4. DAIMojoRestServer\n - Included in the demo files\n5. Driverless AI license\n - Provided through the partnership portal\n - Copy the license to the Snowflake_H2Oai directory. Name the file\n license.sig. 6. Java JDK 1.8\n - An open source JDK is included in the demo zip file and the demo\n scripts use that as the default.", + "prompt_type": "plain" + }, + { + "output": "The output of the\n command should indicate JDK 1.8, for example:\n - If the output does not show JDK 1.8, download a 1.8 JDK\n for your environment from one of the following sites:\n - https://www.azul.com/downloads/zulu-community/\n - https://openjdk.java.net/install/\nSecurity\nWhen using the external function, a call is made from Snowflake to the\nAWS API Gateway. This requires the configuration of trust relationships\nin AWS so that the call can be made. The H2O REST Server only accepts calls from the AWS Gateway endpoint. When the parameter\n-DSecureModelAllowAgent=\u201dAmazonAPIGateway.|snowflake.\u201d is added to the\ncommand line, it\u2019s even possible to further limit this to a specific AWS\nfunction. Enabling -DModelSecureEndPoints=/** protects the Rest Server by\nrequiring full authentication, effectivity blocking requests. Installation\nDownloads\nDownload the Driverless AI Snowflake Java UDF. Download the Driverless AI Snowflake external function\n(dai-snowflake-integration.tgz).", + "prompt_type": "plain" + }, + { + "output": "The following installation includes steps in Snowflake, AWS, and an EC2\ninstance where the H2O REST server is installed. The following steps outline the REST server installation:\n1. Create an EC2 Instance, a demo system should have the following\n minimum specification:\n - Operating System: Linux\n - CPU: 2\n - Memory: 16GB\n - Disk: 500MB\n2. Copy the distribution to the EC2 instance and extract the file. 3. Create the database. 4. Populate the table with the sample data. 5. Verify that the data is available. Starting the REST Server\nUse the following steps to start the H2O REST server on the EC2\ninstance. 1. Ensure the current working directory is Snowflake-H2Oai/Function. 2. Press ENTER to background the program. The log is written to\n nohup.log. 3. The REST server initiates after several seconds have passed. Check\n for a ready message similar to the following:\nVerify REST Server Installation\nTo verify that the REST server and its model components were installed\nsuccessfully and that the server initialized correctly:\n1.", + "prompt_type": "plain" + }, + { + "output": "Run the following script from a separate terminal window:\nStopping the REST Server\nTo stop the H2O REST server on the EC2 instance, run the following\ncommands:\n cd Snowflake-H2Oai/Function\n ./stopServer.sh\nExternal Function Example\nThe following is an example of an external function:\n create or replace api integration demonstration_external_api_integration_01\n api_provider=aws_api_gateway \n api_aws_role_arn='arn:aws:iam::nnnnnnnn:role/snowflake' \n api_allowed_prefixes=('https://aaaaaaaa.execute-api.us-east-1.amazonaws.com/MojoTest') \n enabled=true;\n create or replace external function H2OPredict(v varchar, v0 number, v1 varchar, v2 number, v3 number, v4 number, v5 number, v6 varchar, v7 varchar, v8 number, v9 number, v10 number, v11 number)\n returns variant\n api_integration = demonstration_external_api_integration_01\n as 'https://aaaaaaaa.execute-api.us-east-1.amazonaws.com/MojoTest';\nFunction Data Types\nThe preceding function passes 13 parameters (v to V11).", + "prompt_type": "plain" + }, + { + "output": "- If the data in the table is a float and the function uses the\nSQL Examples\nOnce the Snowflake and AWS Gateway has been configured, the following\nexample SQL statements return predictions:\n select H2OPredict('Modelname=pipelineSF.mojo\u2019, LOAN_AMNT, TERM, INT_RATE, INSTALLMENT, EMP_LENGTH, ANNUAL_INC, VERIFICATION_STATUS, ADDR_STATE, DTI, DELINQ_2YRS, REVOL_BAL, REVOL_UTIL ) from LENDINGCLUB where ADDR_STATE='NJ' order by ID;\nPassing Runtime Parameters\nThe following is a list of parameters used to pass specific values to\nthe REST server:\n- Modelname: The name of the Driverless AI MOJO file that exists in\n the REST server ModelDirectory. This is pipeline.mojo by default. - Prediction: The numeric prediction to use. This is 0 by default. Sample parameter usage:\n select *, H2OPredict('Modelname=pipelineSF.mojo Prediction=0',LOAN_AMNT, TERM, INT_RATE, INSTALLMENT, EMP_LENGTH, ANNUAL_INC, VERIFICATION_STATUS, \n ADDR_STATE, DTI, DELINQ_2YRS, REVOL_BAL, REVOL_UTIL ) from LENDINGCLUB;\n Request: 10625, 36 months,6.62,326.23,4,33000,VERIFIED - income,WA,27.38,0,6290,46.3 \n Response: [\"bad_loan.0 : 0.917305\",\"bad_loan.1 : 0.08269503\"]\n 0.917305\nAdvanced Setup\nThe Snowflake External Function allows custom HTTP headers to be\ndefined.", + "prompt_type": "plain" + }, + { + "output": "create or replace external function H2OPredictHDR(v0 number, v1 varchar, v2 number, v3 number, v4 number, v5 number, v6 varchar, v7 varchar, v8 number, v9 number, v10 number, v11 number)\n returns variant\n HEADERS=('modelname' = 'pipelineSF.mojo')\n api_integration = demonstration_external_api_integration_01\n as 'https://aaaaaaaa.execute-api.us-east-1.amazonaws.com/production'; \nThis allows function calls to not require any parameters. A function by\nitself is enough for each model:\n select id, H2OPredictHDR(LOAN_AMNT, TERM, INT_RATE, INSTALLMENT, EMP_LENGTH, ANNUAL_INC, VERIFICATION_STATUS, \n ADDR_STATE, DTI, DELINQ_2YRS, REVOL_BAL, REVOL_UTIL ) from LENDINGCLUB;\nThe prediction can also be passed if required. Otherwise, a probability\nof 0 is returned. Building Models\nThe Snowflake external function feature lets you build Driverless AI\nmodels from a Snowflake worksheet. When requesting Driverless AI to\nbuild a model from a worksheet, the build status is updated in a table\ncalled MODELBUILD so that the build can be monitored.", + "prompt_type": "plain" + }, + { + "output": "Note: When the build finishes, the build experiment UUID is reported for\nauditability purposes. Define build function example:\n create or replace external function H2OBuild(v varchar)\n returns variant\n api_integration = demonstration_external_api_integration_01\n as 'https://bbbbb.execute-api.us-east-1.amazonaws.com/production';\nDefine Snowflake Table\nA Snowflake table is used to track the status of the model build that\nRequesting a Build Example\nUse the function H2OBuild to change the requesting parameters:\n select H2OBuild('Build --Table=LENDINGCLUB2 --Target=BAD_LOAN --Modelname=custchurn.mojo') ;\nFor more information on the parameters to the build request, see the\nfollowing table:\n ----------------------------------------------------------------------\n Parameter Optional Description\n ------------- ------------------------------------- ------------------\n Table no Defines which\n Snowflake table to\n use for the model\n build\n Target no The column\n (feature) name to\n use as the models\n target from\n training\n Modelname no The name the model\n will have when\n deployed\n Accuracy yes Model accuracy\n setting\n Time yes Model experiment\n time\n Inter yes Model\n pretability interpretability\n setting\n User yes Username required\n to access\n Snowflake table\n Password yes Password required\n to access\n Snowflake table\n Warehouse yes Snowflake\n warehouse\n Database yes Snowflake database\n Schema yes Snowflake schema\n ----------------------------------------------------------------------\n : Build Parameters\nDeployment\nOnce the model has finished building, it is copied to the REST server\nand becomes available for the H2OPredict scoring function.", + "prompt_type": "plain" + }, + { + "output": "By default,\nthis is /home/ec2-user/Snowflake-H2Oai/Function. Note: The script code must be updated based on the environment you are\nusing. Driverless AI Snowflake Configuration\nThe Driverless AI configuration uses the standard default settings\nexcept for settings related to user security. Use the authentication\nmethod that is best suited to the environment that you are using. For\nmore information, see config_file and dai_auth. authentication_method = \"local\"\n local_htpasswd_file = \"/home/ec2-user/dai-1.8.5.1-linux-x86_64/.htpasswd\" \n This resource must be secured from unauthorized access and use. To create a username and password using local authentication:\n sudo htpasswd -B -c .htpasswd snowflake \n Password yourpassword\nRequirements\nThe build functionality invokes a Python program that uses the\nDriverless AI Python Client to create an experiment. The following\npackages must be available:\n- sudo yum install httpd\n- sudo yum install python3\n- sudo pip3 install driverlessai\n- sudo pip3 install --upgrade snowflake-connector-python\nSample Workbook\nThe following example shows how to use the functions once the initial\nsetup has been completed.", + "prompt_type": "plain" + }, + { + "output": "Authentication Methods\nDriverless AI supports Client Certificate, LDAP, Local, mTLS, OpenID,\nPAM, none, and unvalidated (default) authentication. These can be\nconfigured by specifying the environment variables when starting the\nDriverless AI Docker image or by specifying the appropriate\nconfiguration options in the config.toml file. Notes:\n- You can enable multiple authentication methods with the\n additional_authentication_methods config.toml setting. These are\n enabled alongside the default method specified with the\n authentication_method config.toml setting. Login forms for each\n additional method are available on the\n /login/ path. - If multiple authentication methods are enabled, each method must be\n set up so that it results in the same username to provide access to\n the same resources. - Driverless AI is also integrated with IBM Spectrum Conductor and\n supports authentication from Conductor. Contact sales@h2o.ai for\n more information about using IBM Spectrum Conductor authentication.", + "prompt_type": "plain" + }, + { + "output": "Dataset Options\nThe following is a list of options that are available for every dataset\non the Datasets page. To view these options, click Click for Actions\nnext to any dataset listed on the Datasets page. - Details: View detailed information about the dataset. For more\n information, see view_dataset. - Visualize: View a variety of visualizations generated by Driverless\n AI using the dataset. For more information, see visualize_dataset. - Split: Split the dataset into two subsets. For more information, see\n split_dataset. - Predict: Opens the Experiment Setup page and automatically specifies\n the selected dataset as the training dataset. - Predict Wizard: Opens the Driverless AI experiment setup wizard. For\n more information, see dai_wizard. - Join Wizard: Opens the Driverless AI dataset join wizard. - Rename: Rename the dataset. - Download: Download the dataset to your local file system. - Display Logs: View logs relating to the dataset. - Delete: Delete the dataset from the list of datasets on the Datasets\n page.", + "prompt_type": "plain" + }, + { + "output": "Install on Ubuntu\nThis section describes how to install the Driverless AI Docker image on\nUbuntu. The installation steps vary depending on whether your system has\nGPUs or if it is CPU only. Environment\n -------------------------------------------\n Operating System GPUs? Min Mem\n ------------------------- ------- ---------\n Ubuntu with GPUs Yes 64 GB\n Ubuntu with CPUs No 64 GB\n -------------------------------------------\nInstall on Ubuntu with GPUs\nNote: Driverless AI is supported on Ubuntu 16.04 or later. Open a Terminal and ssh to the machine that will run Driverless AI. Once\nyou are logged in, perform the following steps. 1. Retrieve the Driverless AI Docker image from\n https://www.h2o.ai/download/. (Note that the contents of this Docker\n image include a CentOS kernel and CentOS packages.) 2. Install and run Docker on Ubuntu (if not already installed):\n3. Install nvidia-docker2 (if not already installed). More information\n is available at\n https://github.com/NVIDIA/nvidia-docker/blob/master/README.md.", + "prompt_type": "plain" + }, + { + "output": "Verify that the NVIDIA driver is up and running. If the driver is\n not up and running, log on to\n http://www.nvidia.com/Download/index.aspx?lang=en-us to get the\n latest NVIDIA Tesla V/P/K series driver:\n5. Set up a directory for the version of Driverless AI on the host\n machine:\n6. Change directories to the new folder, then load the Driverless AI\n Docker image inside the new directory:\n7. Enable persistence of the GPU. Note that this needs to be run once\n every reboot. Refer to the following for more information:\n http://docs.nvidia.com/deploy/driver-persistence/index.html. 8. Set up the data, log, and license directories on the host machine:\n9. At this point, you can copy data into the data directory on the host\n machine. The data will be visible inside the Docker container. 10. Run docker images to find the image tag. 11. Start the Driverless AI Docker image and replace TAG below with the\n image tag. Depending on your install version, use the\n docker run --runtime=nvidia (>= Docker 19.03) or nvidia-docker (<\n Docker 19.03) command.", + "prompt_type": "plain" + }, + { + "output": "nvidia-docker. GPU support will not be available. **Watch the installation video** `here `__. Note that some of the images in this video may change between releases, but the installation steps remain the same. Open a Terminal and ssh to the machine that will run Driverless AI. Once you are logged in, perform the following steps. 1. Retrieve the Driverless AI Docker image from https://www.h2o.ai/download/. 2. Install and run Docker on Ubuntu (if not already installed): .. .. code:: bash # Install and run Docker on Ubuntu curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo apt-key add - sudo apt-key fingerprint 0EBFCD88 sudo add-apt-repository \\ \"deb [arch=amd64] https://download.docker.com/linux/ubuntu $(lsb_release -cs) stable\" sudo apt-get update sudo apt-get install docker-ce sudo systemctl start docker 3. Set up a directory for the version of Driverless AI on the host machine: .. .. code:: bash # Set up directory with the version name mkdir |VERSION-dir| 4.", + "prompt_type": "plain" + }, + { + "output": "Set up the data, log, license, and tmp directories on the host machine (within the new directory): .. .. code:: bash # Set up the data, log, license, and tmp directories mkdir data mkdir log mkdir license mkdir tmp 6. At this point, you can copy data into the data directory on the host machine. The data will be visible inside the Docker container. 7. Rundocker\nimagesto find the new image tag. 8. Start the Driverless AI Docker image. Note that GPU support will not be available. Note that from version 1.10 DAI docker image runs with internaltinithat is equivalent to using--initfrom docker, if both are enabled in the launch command, tini will print a (harmless) warning message. .. We recommend--shm-size=256min docker launch command. But if user plans to build :ref:`image auto model ` extensively, then--shm-size=2gis recommended for Driverless AI docker command. .. code:: bash # Start the Driverless AI Docker image docker run \\ --pid=host \\ --rm \\ --shm-size=256m \\ -u `id -u`:`id -g` \\ -p 12345:12345 \\ -v `pwd`/data:/data \\ -v `pwd`/log:/log \\ -v `pwd`/license:/license \\ -v `pwd`/tmp:/tmp \\ -v /etc/passwd:/etc/passwd:ro \\ -v /etc/group:/etc/group:ro \\ h2oai/dai-ubi8-x86_64:|tag| Driverless AI will begin running: :: -------------------------------- Welcome to H2O.ai's Driverless AI --------------------------------- - Put data in the volume mounted at /data - Logs are written to the volume mounted at /log/20180606-044258 - Connect to Driverless AI on port 12345 inside the container - Connect to Jupyter notebook on port 8888 inside the container 9.", + "prompt_type": "plain" + }, + { + "output": "Upgrading the Docker Image -------------------------- This section provides instructions for upgrading Driverless AI versions that were installed in a Docker container. These steps ensure that existing experiments are saved. **WARNING**: Experiments, MLIs, and MOJOs reside in the Driverless AI tmp directory and are not automatically upgraded when Driverless AI is upgraded. - Build MLI models before upgrading. - Build MOJO pipelines before upgrading. - Stop Driverless AI and make a backup of your Driverless AI tmp directory before upgrading. If you did not build MLI on a model before upgrading Driverless AI, then you will not be able to view MLI on that model after upgrading. Before upgrading, be sure to run MLI jobs on models that you want to continue to interpret in future releases. If that MLI job appears in the list of Interpreted Models in your current version, then it will be retained after upgrading. If you did not build a MOJO pipeline on a model before upgrading Driverless AI, then you will not be able to build a MOJO pipeline on that model after upgrading.", + "prompt_type": "plain" + }, + { + "output": "**Note**: Stop Driverless AI if it is still running. Requirements ~~~~~~~~~~~~ We recommend to have NVIDIA driver >= installed (GPU only) in your host environment for a seamless experience on all architectures, including Ampere. Driverless AI ships with CUDA 11.2.2 for GPUs, but the driver must exist in the host environment. Go to `NVIDIA download driver `__ to get the latest NVIDIA Tesla A/T/V/P/K series drivers. For reference on CUDA Toolkit and Minimum Required Driver Versions and CUDA Toolkit and Corresponding Driver Versions, see `here `__ . .. note:: .. If you are using K80 GPUs, the minimum required NVIDIA driver version is 450.80.02. Upgrade Steps ~~~~~~~~~~~~~ 1. SSH into the IP address of the machine that is running Driverless AI. 2. Set up a directory for the version of Driverless AI on the host machine: .. .. code:: bash # Set up directory with the version name mkdir |VERSION-dir| # cd into the new directory cd |VERSION-dir| 3.", + "prompt_type": "plain" + }, + { + "output": "Azure Blob Store Setup\n\nDriverless AI lets you explore Azure Blob Store data sources from within\nthe Driverless AI application.\n\nNote: Depending on your Docker install version, use either the", + "prompt_type": "plain" + }, + { + "output": "docker run\n--runtime=nvidia(>= Docker 19.03) ornvidia-docker(< Docker 19.03) command when starting the Driverless AI Docker image. Usedocker\nversionto check which version of Docker you are using. Supported Data Sources Using the Azure Blob Store Connector ----------------------------------------------------------- The following data sources can be used with the Azure Blob Store connector. - :ref:`Azure Blob Storage (general purpose v1)` - Blob Storage - :ref:`Azure Files (File Storage)` - :ref:`Azure Data Lake Storage Gen 2 (Storage V2)` The following data sources can be used with the Azure Blob Store connector when also using the HDFS connector. - :ref:`Azure Data Lake Gen 1 (HDFS connector required)` - :ref:`Azure Data Lake Gen 2 (HDFS connector optional)` Description of Configuration Attributes --------------------------------------- The following configuration attributes are specific to enabling Azure Blob Storage. -azure_blob_account_name: The Microsoft Azure Storage account name.", + "prompt_type": "plain" + }, + { + "output": "-azure_blob_account_key: Specify the account key that maps to your account name. -azure_connection_string: Optionally specify a new connection string. With this option, you can include an override for a host, port, and/or account name. For example, .. code:: bash azure_connection_string = \"DefaultEndpointsProtocol=http;AccountName=;AccountKey=;BlobEndpoint=http://:/;\" -azure_blob_init_path: Specifies the starting Azure Blob store path displayed in the UI of the Azure Blob store browser. -enabled_file_systems: The file systems you want to enable. This must be configured in order for data connectors to function properly. The following additional configuration attributes can be used for enabling an HDFS Connector to connect to Azure Data Lake Gen 1 (and optionally with Azure Data Lake Gen 2). -hdfs_config_path: The location the HDFS config folder path. This folder can contain multiple config files. -hdfs_app_classpath: The HDFS classpath.", + "prompt_type": "plain" + }, + { + "output": ".. _example1: Example 1: Enabling the Azure Blob Store Data Connector ------------------------------------------------------- .. container:: tabs .. group-tab:: Docker Image Installs This example enables the Azure Blob Store data connector by specifying environment variables when starting the Driverless AI Docker image. This lets users reference data stored on your Azure storage account using the account name, for example:https://mystorage.blob.core.windows.net. .. code:: bash nvidia-docker run \\ --pid=host \\ --init \\ --rm \\ --shm-size=256m \\ -e DRIVERLESS_AI_ENABLED_FILE_SYSTEMS=\"file,azrbs\" \\ -e DRIVERLESS_AI_AZURE_BLOB_ACCOUNT_NAME=\"mystorage\" \\ -e DRIVERLESS_AI_AZURE_BLOB_ACCOUNT_KEY=\"\" \\ -p 12345:12345 \\ -v /tmp/dtmp/:/tmp \\ -v /tmp/dlog/:/log \\ -v /tmp/dlicense/:/license \\ -v /tmp/ddata/:/data \\ -u $(id -u):$(id -g) \\ h2oai/dai-ubi8-x86_64:|tag| .. container:: group-tab Docker Image with the config.toml This example shows how to configure Azure Blob Store options in the config.toml file, and then specify that file when starting Driverless AI in Docker.", + "prompt_type": "plain" + }, + { + "output": "Configure the Driverless AI config.toml file. Set the following configuration options: .. -enabled_file_systems\n= \"file, upload, azrbs\"-azure_blob_account_name =\n\"mystorage\"-azure_blob_account_key =\n\"\"2. Mount the config.toml file into the Docker container. .. .. code:: bash nvidia-docker run \\ --pid=host \\ --init \\ --rm \\ --shm-size=256m \\ --add-host name.node:172.16.2.186 \\ -e DRIVERLESS_AI_CONFIG_FILE=/path/in/docker/config.toml \\ -p 12345:12345 \\ -v /local/path/to/config.toml:/path/in/docker/config.toml \\ -v /etc/passwd:/etc/passwd:ro \\ -v /etc/group:/etc/group:ro \\ -v /tmp/dtmp/:/tmp \\ -v /tmp/dlog/:/log \\ -v /tmp/dlicense/:/license \\ -v /tmp/ddata/:/data \\ -u $(id -u):$(id -g) \\ h2oai/dai-ubi8-x86_64:|tag| .. container:: group-tab Native Installs This example shows how to enable the Azure Blob Store data connector in the config.toml file when starting Driverless AI in native installs.", + "prompt_type": "plain" + }, + { + "output": "1. Export the Driverless AI config.toml file or add it to ~/.bashrc. For example: .. :: # DEB and RPM export DRIVERLESS_AI_CONFIG_FILE=\"/etc/dai/config.toml\" # TAR SH export DRIVERLESS_AI_CONFIG_FILE=\"/path/to/your/unpacked/dai/directory/config.toml\" 2. Specify the following configuration options in the config.toml file. .. :: # File System Support # upload : standard upload feature # file : local file system/server file system # hdfs : Hadoop file system, remember to configure the HDFS config folder path and keytab below # dtap : Blue Data Tap file system, remember to configure the DTap section below # s3 : Amazon S3, optionally configure secret and access key below # gcs : Google Cloud Storage, remember to configure gcs_path_to_service_account_json below # gbq : Google Big Query, remember to configure gcs_path_to_service_account_json below # minio : Minio Cloud Storage, remember to configure secret and access key below # snow : Snowflake Data Warehouse, remember to configure Snowflake credentials below (account name, username, password) # kdb : KDB+ Time Series Database, remember to configure KDB credentials below (hostname and port, optionally: username, password, classpath, and jvm_args) # azrbs : Azure Blob Storage, remember to configure Azure credentials below (account name, account key) # jdbc: JDBC Connector, remember to configure JDBC below.", + "prompt_type": "plain" + }, + { + "output": "(hive_app_configs) # recipe_url: load custom recipe from URL # recipe_file: load custom recipe from local file system enabled_file_systems = \"file, azrbs\" # Azure Blob Store Connector credentials azure_blob_account_name = \"mystorage\" azure_blob_account_key = \"\" 3. Save the changes when you are done, then stop/restart Driverless AI. .. _example2: Example 2: Mount Azure File Shares to the Local File System ----------------------------------------------------------- Supported Data Sources Using the Local File System ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - Azure Files (File Storage) Mounting Azure File Shares ~~~~~~~~~~~~~~~~~~~~~~~~~~ Azure file shares can be mounted into the Local File system of Driverless AI. To mount the Azure file share, follow the steps listed on https://docs.microsoft.com/en-us/azure/storage/files/storage-how-to-use-files-linux. .. _example3: Example 3: Enable HDFS Connector to Connect to Azure Data Lake Gen 1 -------------------------------------------------------------------- This example enables the HDFS Connector to connect to Azure Data Lake Gen1.", + "prompt_type": "plain" + }, + { + "output": ".. container:: tabs .. group-tab:: Docker Image with the config.toml 1. Create an Azure AD web application for service-to-service authentication: https://docs.microsoft.com/en-us/azure/data-lake-store/data-lake-store-service-to-service-authenticate-using-active-directory 2. Add the information from your web application to the Hadoopcore-site.xmlconfiguration file: .. .. code:: bash fs.adl.oauth2.access.token.provider.type ClientCredential fs.adl.oauth2.refresh.url Token endpoint created in step 1. fs.adl.oauth2.client.id Client ID created in step 1 fs.adl.oauth2.credential Client Secret created in step 1 fs.defaultFS ADL URIt 3.", + "prompt_type": "plain" + }, + { + "output": "This file can found on any Hadoop version in:$HADOOP_HOME/share/hadoop/tools/lib/*. .. .. code:: bash echo \"$HADOOP_CLASSPATH:$HADOOP_HOME/share/hadoop/tools/lib/*\" 4. Configure the Driverless AI config.toml file. Set the following configuration options: .. .. code:: bash enabled_file_systems = \"upload, file, hdfs, azrbs, recipe_file, recipe_url\" hdfs_config_path = \"/path/to/hadoop/conf\" hdfs_app_classpath = \"/hadoop/classpath/\" hdfs_app_supported_schemes = \"['adl://']\" 5. Mount the config.toml file into the Docker container. .. .. code:: bash nvidia-docker run \\ --pid=host \\ --init \\ --rm \\ --shm-size=256m \\ --add-host name.node:172.16.2.186 \\ -e DRIVERLESS_AI_CONFIG_FILE=/path/in/docker/config.toml \\ -p 12345:12345 \\ -v /local/path/to/config.toml:/path/in/docker/config.toml \\ -v /etc/passwd:/etc/passwd:ro \\ -v /etc/group:/etc/group:ro \\ -v /tmp/dtmp/:/tmp \\ -v /tmp/dlog/:/log \\ -v /tmp/dlicense/:/license \\ -v /tmp/ddata/:/data \\ -u $(id -u):$(id -g) \\ h2oai/dai-ubi8-x86_64:|tag| .. container:: group-tab Native Installs 1.", + "prompt_type": "plain" + }, + { + "output": "https://docs.microsoft.com/en-us/azure/data-lake-store/data-lake-store-service-to-service-authenticate-using-active-directory 2. Add the information from your web application to the hadoopcore-site.xmlconfiguration file: .. .. code:: bash fs.adl.oauth2.access.token.provider.type ClientCredential fs.adl.oauth2.refresh.url Token endpoint created in step 1. fs.adl.oauth2.client.id Client ID created in step 1 fs.adl.oauth2.credential Client Secret created in step 1 fs.defaultFS ADL URIt 3.", + "prompt_type": "plain" + }, + { + "output": "abfs[s]://file_system@account_name.dfs.core.windows.net///. .. container:: tabs .. group-tab:: Docker Image with the config.toml 1. Create an Azure Service Principal: https://docs.microsoft.com/en-us/azure/active-directory/develop/howto-create-service-principal-portal 2. Grant permissions to the Service Principal created on step 1 to access blobs: https://docs.microsoft.com/en-us/azure/storage/common/storage-auth-aad 3. Add the information from your web application to the Hadoopcore-site.xmlconfiguration file: .. .. code:: bash fs.azure.account.auth.type OAuth fs.azure.account.oauth.provider.type org.apache.hadoop.fs.azurebfs.oauth2.ClientCredsTokenProvider fs.azure.account.oauth2.client.endpoint Token endpoint created in step 1. fs.azure.account.oauth2.client.id Client ID created in step 1 fs.azure.account.oauth2.client.secret Client Secret created in step 1 4.", + "prompt_type": "plain" + }, + { + "output": "These files can found on any Hadoop version 3.2 or higher at:$HADOOP_HOME/share/hadoop/tools/lib/*.. .. code:: bash echo \"$HADOOP_CLASSPATH:$HADOOP_HOME/share/hadoop/tools/lib/*\" **Note**: ABFS is only supported for Hadoop version 3.2 or higher. 5. Configure the Driverless AI config.toml file. Set the following configuration options: .. .. code:: bash enabled_file_systems = \"upload, file, hdfs, azrbs, recipe_file, recipe_url\" hdfs_config_path = \"/path/to/hadoop/conf\" hdfs_app_classpath = \"/hadoop/classpath/\" hdfs_app_supported_schemes = \"['abfs://']\" 6. Mount the config.toml file into the Docker container. .. .. code:: bash nvidia-docker run \\ --pid=host \\ --init \\ --rm \\ --shm-size=256m \\ --add-host name.node:172.16.2.186 \\ -e DRIVERLESS_AI_CONFIG_FILE=/path/in/docker/config.toml \\ -p 12345:12345 \\ -v /local/path/to/config.toml:/path/in/docker/config.toml \\ -v /etc/passwd:/etc/passwd:ro \\ -v /etc/group:/etc/group:ro \\ -v /tmp/dtmp/:/tmp \\ -v /tmp/dlog/:/log \\ -v /tmp/dlicense/:/license \\ -v /tmp/ddata/:/data \\ -u $(id -u):$(id -g) \\ h2oai/dai-ubi8-x86_64:|tag| .. container:: group-tab Native Installs 1.", + "prompt_type": "plain" + }, + { + "output": "https://docs.microsoft.com/en-us/azure/active-directory/develop/howto-create-service-principal-portal 2. Grant permissions to the Service Principal created on step 1 to access blobs: https://docs.microsoft.com/en-us/azure/storage/common/storage-auth-aad 3. Add the information from your web application to the hadoopcore-site.xmlconfiguration file: .. .. code:: bash fs.azure.account.auth.type OAuth fs.azure.account.oauth.provider.type org.apache.hadoop.fs.azurebfs.oauth2.ClientCredsTokenProvider fs.azure.account.oauth2.client.endpoint Token endpoint created in step 1. fs.azure.account.oauth2.client.id Client ID created in step 1 fs.azure.account.oauth2.client.secret Client Secret created in step 1 4.", + "prompt_type": "plain" + }, + { + "output": "These files can found on any hadoop version 3.2 or higher at:$HADOOP_HOME/share/hadoop/tools/lib/*.. .. code:: bash echo \"$HADOOP_CLASSPATH:$HADOOP_HOME/share/hadoop/tools/lib/*\" **Note**: ABFS is only supported for hadoop version 3.2 or higher 5. Configure the Driverless AI config.toml file. Set the following configuration options: .. .. code:: bash enabled_file_systems = \"upload, file, hdfs, azrbs, recipe_file, recipe_url\" hdfs_config_path = \"/path/to/hadoop/conf\" hdfs_app_classpath = \"/hadoop/classpath/\" hdfs_app_supported_schemes = \"['abfs://']\" 6. Save the changes when you are done, then stop/restart Driverless AI. Export MOJO artifact to Azure Blob Storage ------------------------------------------ In order to export the MOJO artifact to Azure Blob Storage, you must enable support for the shared access signatures (SAS) token.", + "prompt_type": "plain" + }, + { + "output": "Install on Google Compute\n\nDriverless AI can be installed on Google Compute using one of two\nmethods:\n\n- Install the Google Cloud Platform offering. This installs Driverless\n AI via the available GCP Marketplace offering.\n- Install and Run in a Docker Container on Google Compute Engine. This\n installs and runs Driverless AI from scratch in a Docker container\n on Google Compute Engine.\n\nSelect your desired installation procedure below:\n\ngoogle-cloud-platform google-docker-container", + "prompt_type": "plain" + }, + { + "output": "Automatic Visualization\n\ndatasets-viewing custom_viz", + "prompt_type": "plain" + }, + { + "output": "Supported Environments\nThe following tables list the environments that support Driverless AI. Linux\n ---------------------------------------------------------------------\n P ackage OS GPU C PU\n Type \n -------- ----------------------------------- ------------------- ----\n RPM RHEL 7 & 8/CentOS 7 & 8 CUDA 11.2 and x8 6\n above/CPU only 64\n DEB Ubuntu 16.04/Ubuntu 18.04/Ubuntu CUDA 11.2 and x8 6\n 20.04/Ubuntu 22.04 above/CPU only 64\n TAR SH Most Linux CUDA 11.2 and x8 6\n above/CPU only 64\n Docker Docker CE CUDA 11.2 and x8 6\n above/CPU only 64\n ---------------------------------------------------------------------\nNote\nUsing TensorFlow requires your CPUs to support Advanced Vector\nExtensions (AVX).", + "prompt_type": "plain" + }, + { + "output": "For install instructions, refer to linux. Windows 10 Pro, Enterprise, or Education\nCaution: Windows computers (laptops in particular) should only be used\nwith small datasets for the purpose of exploring the software. For\nserious use, server hardware is required. Consider spinning up a more\npowerful instance in the cloud instead of using a laptop. Avoid laptops\nwith less than 16 GB of RAM. GPUs are not supported on Windows. --------------------------------------------------------------------\n Package OS GPU CPU Min\n Type Support? Memory\n ---------- ------------------------------- ---------- ----- --------\n DEB Ubuntu 18.04 for WSL (not fully No x86 16 GB\n tested) _64 \n Docker Docker Desktop for Win 2.2.0.3 No x86 16 GB\n (42716) _64 \n --------------------------------------------------------------------\nFor install instructions, refer to install-on-windows.", + "prompt_type": "plain" + }, + { + "output": "Windows 10\nThis section describes how to install, start, stop, and upgrade\nDriverless AI on a Windows 10 machine. The installation steps assume\nthat you have a license key for Driverless AI. For information on how to\nobtain a license key for Driverless AI, visit\nhttps://h2o.ai/o/try-driverless-ai/. Once obtained, you will be prompted\nto paste the license key into the Driverless AI UI when you first log\nin, or you can save it as a .sig file and place it in the license folder\nthat you will create during the installation process. Overview of Installation on Windows\nTo install Driverless AI on Windows, use a Driverless AI Docker image. Notes:\n- GPU support is not available on Windows. - Scoring is not available on Windows. Caution: Installing Driverless AI on Windows 10 is not recommended for\nserious use. Environment\n -------------------------------------------------------------------\n Operating System GPU Support? Min Mem Suitable for\n ----------------------- --------------- --------- -----------------\n Windows 10 Pro No 16 GB Experimentation\n Windows 10 Enterprise No 16 GB Experimentation\n Windows 10 Education No 16 GB Experimentation\n -------------------------------------------------------------------\nNote: Driverless AI cannot be installed on versions of Windows 10 that\ndo not support Hyper-V.", + "prompt_type": "plain" + }, + { + "output": "Docker Image Installation\nNotes:\n- Be aware that there are known issues with Docker for Windows. More\n information is available here:\n https://github.com/docker/for-win/issues/188. - Consult with your Windows System Admin if\n - Your corporate environment does not allow third-part software\n installs\n - You are running Windows Defender\n - You your machine is not running with\n Enable-WindowsOptionalFeature -Online -FeatureName Microsoft-Windows-Subsystem-Linux. Watch the installation video here. Note that some of the images in this\nvideo may change between releases, but the installation steps remain the\nsame. Requirements\n- Windows 10 Pro / Enterprise / Education\n- Docker Desktop for Windows 2.2.0.3 (42716)\nNote: As of this writing, Driverless AI has only been tested on Docker\nDesktop for Windows version 2.2.0.3 (42716). Installation Procedure\n1. Retrieve the Driverless AI Docker image from\n https://www.h2o.ai/download/. 2. Download, install, and run Docker for Windows from\n https://docs.docker.com/docker-for-windows/install/.", + "prompt_type": "plain" + }, + { + "output": "Note that you may have to reboot after\n installation. 3. Before running Driverless AI, you must:\n4. Open a PowerShell terminal and set up a directory for the version of\n Driverless AI on the host machine:\n5. With Docker running, navigate to the location of your downloaded\n Driverless AI image. Move the downloaded Driverless AI image to your\n new directory. 6. Change directories to the new directory, then load the image using\n the following command:\n7. Set up the data, log, license, and tmp directories (within the new\n directory). 8. Copy data into the /data directory. The data will be visible inside\n the Docker container at /data. 9. Run docker images to find the image tag. 10. Start the Driverless AI Docker image. Be sure to replace path_to_\n below with the entire path to the location of the folders that you\n created (for example,\n \"c:/Users/user-name/driverlessai_folder/data\"). Note that this is\n regular Docker, not NVIDIA Docker. GPU support will not be\n available.", + "prompt_type": "plain" + }, + { + "output": "11. Connect to Driverless AI with your browser at\n http://localhost:12345. Stopping the Docker Image\nTo stop the Driverless AI Docker image, type Ctrl + C in the Terminal\n(Mac OS X) or PowerShell (Windows 10) window that is running the\nDriverless AI Docker image. Upgrading the Docker Image\nThis section provides instructions for upgrading Driverless AI versions\nthat were installed in a Docker container. These steps ensure that\nexisting experiments are saved. WARNING: Experiments, MLIs, and MOJOs reside in the Driverless AI tmp\ndirectory and are not automatically upgraded when Driverless AI is\nupgraded. - Build MLI models before upgrading. - Build MOJO pipelines before upgrading. - Stop Driverless AI and make a backup of your Driverless AI tmp\n directory before upgrading. If you did not build MLI on a model before upgrading Driverless AI,\n then you will not be able to view MLI on that model after upgrading. Before upgrading, be sure to run MLI jobs on models that you want to\n continue to interpret in future releases.", + "prompt_type": "plain" + }, + { + "output": "If you did not build a MOJO pipeline on a model before upgrading\n Driverless AI, then you will not be able to build a MOJO pipeline on\n that model after upgrading. Before upgrading, be sure to build MOJO\n pipelines on all desired models and then back up your Driverless AI\n tmp directory. Note: Stop Driverless AI if it is still running. Requirements\nWe recommend to have NVIDIA driver >= installed (GPU only) in your host\nenvironment for a seamless experience on all architectures, including\nAmpere. Driverless AI ships with CUDA 11.2.2 for GPUs, but the driver\nmust exist in the host environment. Go to NVIDIA download driver to get the latest NVIDIA Tesla A/T/V/P/K\nseries drivers. For reference on CUDA Toolkit and Minimum Required\nDriver Versions and CUDA Toolkit and Corresponding Driver Versions, see\nhere . Note\nUpgrade Steps\n1. SSH into the IP address of the machine that is running Driverless\n AI. 2. Set up a directory for the version of Driverless AI on the host\n machine:\n3.", + "prompt_type": "plain" + }, + { + "output": "reproducibility_level=1`` by default. []\nThe following section describes the different levels of reproducibility\nin more detail. Reproducibility levels\nYou can manually specify one of the four available levels of\nreproducibility with the reproducibility_level config option. The\nfollowing list describes how these levels of reproducibility are\ndistinct from one another. - 1 (default): Same experiment results for same operating system, same\n CPU(s), and same GPU(s). - 2: Same experiment results for same operating system, same CPU\n architecture, and same GPU architecture. - 3: Same experiment results for same operating system and same CPU\n architecture. Note that this reproducibility level excludes GPUs. - 4: Same experiment results for same operating system. This level is\n considered to be the best effort approximation. Notes:\n- Experiments are only reproducible when run on the same hardware\n (that is, when using the same number and type of GPUs/CPUs and the\n same architecture).", + "prompt_type": "plain" + }, + { + "output": "Custom Recipe Management\nThe following sections describe custom recipe management in Driverless\nAI. Understanding Custom Recipes\nCustom recipes are Python code snippets that can be uploaded into\nDriverless AI at runtime like plugins. Restarting Driverless AI is not\nrequired. Custom recipes can be provided for transformers, models, and\nscorers. During training of a supervised machine learning modeling\npipeline, Driverless AI can use these code snippets as building blocks\nin combination with or in place of built-in code pieces. When selecting\nrecipes for an experiment in the expert-settings panel, only custom\nrecipes that are currently active are visible. New datasets can be created by\nmodifying an existing dataset with a data recipe . You\ncan also apply data recipes as standalone recipes. Additionally, the set\nof MLI techniques and methodologies used in Driverless AI can be\nextended with recipes. For more information on MLI explainer recipes,\nsee mli-byor. Note\n- The Python Scoring Pipeline for deployment features full support for\ncustom recipes.", + "prompt_type": "plain" + }, + { + "output": "For\ncomplete examples that demonstrate how to download these dependencies\nand run the Python Scoring Pipeline, see Python_Pipeline. - In most cases, and especially for complex recipes, MOJO for model\n deployment is not available out of the box. However, it is possible\n to get the MOJO. Contact support@h2o.ai for more information about\n creating MOJOs for custom recipes. - To enable Shapley calculations in MLI, custom model recipes must use\n the has_pred_contribs method. Refer to the model recipe template for\n more info. - When enabling recipes, you can use the pip_install_options\n TOML option to specify your organization's\n internal Python package index as follows:\nAdding Custom Recipes\nTo add a custom recipe, go to the recipe management page by clicking\nRecipes in the top navigation, then click the Add Custom Recipes button. Select one of the following options from the drop-down menu that\nappears:\n[]\n- From computer: Add a custom recipe as a Python or ZIP file from your\n local file system.", + "prompt_type": "plain" + }, + { + "output": "For example, you can enter\n https://github.com/h2oai/driverlessai-recipes/ to add all the\n custom recipes contained in the official Recipes for\n Driverless AI repository. - A GitHub tree. For example, you can enter\n https://github.com/h2oai/driverlessai-recipes/tree/master/models\n to add only the custom model recipes contained in the official\n Recipes for Driverless AI repository, or enter\n https://github.com/h2oai/driverlessai-recipes/tree/master/models/algorithms\n to add only the custom algorithm recipes contained in the\n repository. - A file system path. This option is equivalent to the File\n System option when adding datasets. - From Bitbucket: Add a custom recipe from a Bitbucket repository. To\n use this option, your Bitbucket username and password must be\n provided along with the custom recipe Bitbucket URL. - With Editor: Add a custom recipe with a built-in code editor.", + "prompt_type": "plain" + }, + { + "output": "Note\nIf you set the _global_modules_needed_by_name parameter in a custom\nrecipe, then ensure that it is set on a single line before uploading it. Using line breaks when setting the _global_modules_needed_by_name\nparameter results in a syntax error when attempting to upload the custom\nrecipe. Managing Recipes\nTwo distinct views are available on this page:\n- List view: This view displays all available custom recipes. Only\n active recipes are listed by default, but deactivated recipes can\n also be viewed. For more information, see list-view. - Detail view: This view lets you edit custom recipe code in\n Driverless AI and save the edited code. The detail view is available\n for both active and deactivated recipes. For more information, see\n detail-view. List View\nThe following is a list of actions that you can take from the recipe\nlist view:\nGeneral actions:\n- View deactivated recipes by selecting Include inactive recipes. - Deactivate a recipe by selecting it and clicking Deactivate x\n Item(s).", + "prompt_type": "plain" + }, + { + "output": "Note that recipes can only be deactivated, not deleted. - Search and sort recipes. Note that if enough recipes are uploaded,\n they are listed on multiple pages. - Select which columns are visible on the list view. Recipe-specific actions:\n- Open: View a specific recipe in detail. - Edit note: Create or edit a note for a recipe to keep track of its\n functionality. - Deactivate: Deactivate the selected recipe. - Apply on Dataset (For data recipes only): Apply an existing data\n recipe to the dataset. For more information on modifying datasets\n with data recipes, see modify_by_recipe. - Apply Without Dataset (For data recipes only): Apply the selected\n data recipe as a standalone recipe. Detail View\nThe following is a list of actions that you can take from the recipe\ndetail view:\n- Edit custom recipe code:\n - You can toggle an in-code search feature by pressing Control+F\n (or Command+F on Mac). - To save the edited recipe, click the Save as New Recipe and\n Activate button.", + "prompt_type": "plain" + }, + { + "output": "If you don't change both the ClassName and _display_name\n defined in the recipe, the old version of the recipe is\n automatically deactivated when a new version is saved and\n activated. New versions of existing recipes keep references to\n the original recipes, letting you keep track of changes\n throughout multiple versions. - You can download recipe code and deactivate recipes from this\n view. - View the recipe's name, type, ID, filename, creation date, and\n whether the recipe is currently active. - (For data recipes only) Apply the data recipe on a dataset or as a\n standalone recipe. - If a recipe was downloaded from an external URL, the link is\n displayed under Original URL. - (For Individual recipes only) View a link to the experiment from\n which the Individual recipe was derived from. - More Actions drop-down:\n - (For Individual recipes only) To create a new experiment using\n the Individual recipe, click Use in New Experiment.", + "prompt_type": "plain" + }, + { + "output": "- Download the recipe by clicking Download. - Deactivate the recipe by clicking Deactivate. Note that\n recipes can only be deactivated, not deleted. []\nNote\nIf _display_name is not defined in a recipe, then that recipe's display\nname is derived from the ClassName defined in the recipe. Examples\ncustom-recipes-data-recipes custom-recipes-h2o-3-algos\ncustom-recipes-scorer custom-recipes-transformers\nAdditional Resources\n- Custom Recipes FAQ :\n For answers to common questions about custom recipes. - How to Write a Recipe :\n A guide for writing your own recipes. - Data Template :\n A template for creating your own Data recipe. - Model Template :\n A template for creating your own Model recipe. - Scorer Template :\n A template for creating your own Scorer recipe.", + "prompt_type": "plain" + }, + { + "output": "Driverless AI Clients\n\npython_client r_client", + "prompt_type": "plain" + }, + { + "output": "Monitoring and Logging\n\npending-jobs logging", + "prompt_type": "plain" + }, + { + "output": "GPUs in Driverless AI\nDriverless AI can run on machines with only CPUs or machines with CPUs\nand GPUs. For the best (and intended-as-designed) experience, install\nDriverless AI on modern data center hardware with GPUs and CUDA support. Feature engineering and model building are primarily performed on CPU\nand GPU respectively. For this reason, Driverless AI benefits from\nmulti-core CPUs with sufficient system memory and GPUs with sufficient\nRAM. For best results, we recommend GPUs that use the Pascal or Volta\narchitectures. Ampere-based NVIDIA GPUs are also supported on x86\nmachines (requires NVIDIA CUDA Driver 11.2 or later). Driverless AI ships with NVIDIA CUDA 11.2.2 and cuDNN. Image and NLP use cases in\nDriverless AI benefit significantly from GPU usage. Model building algorithms, namely, XGBoost (GBM/DART/RF/GLM), LightGBM\n(GBM/DART/RF), PyTorch (BERT models) and TensorFlow (CNN/BiGRU/ImageNet)\nmodels utilize GPU. Model scoring on GPUs can be enabled by selecting\nnon-zero number of GPUs for prediction/scoring via\nnum_gpus_for_prediction system expert setting\nof the experiment.", + "prompt_type": "plain" + }, + { + "output": "MOJO\nscoring for productionizing models on GPUs can be enabled for some uses\ncases. See tensorflow_nlp_have_gpus_in_production in\nconfig.toml . Driverless AI Tensorflow, BERT and\nImage models support C++ MOJO scoring for\nproduction. Feature engineering transformers such as\nClusterDist cuML Transformer, TruncSVDNum cuML Transformer, DBSCAN cuML\nTransformer run on GPUs. With Driverless AI Dask multinode setup, GPUs\ncan be used for extensive model hyperparamenter search. For details see -\nDriverless AI & NVIDIA cuDNN\nNVIDIA cuDNN is a library for deep neural nets built using CUDA and\noptimized for GPUs. For NLP data modeling and feature\nengineering , Driverless AI uses cuDNN PyTorch (BERT models) and\nTensorFlow NLP recipe based on CNN and BiGRU (RNN) deep learning models. For modeling Image data, TensorFlow (ImageNet\nmodels) are used. Driverless AI & NVIDIA RAPIDS\nNVIDIA RAPIDS provides PyData APIs that are GPU-accelerated.Driverless\nAI integrates RAPIDS cuML (scikit-learn)\ntransformers namely ClusterDist cuML Transformer,\nTruncSVDNum cuML Transformer, DBSCAN cuML Transformer for feature\nengineering and RAPIDS cuDF extension to\nXGBoost GBM / DART for building machine learning\nmodels on GPUs.", + "prompt_type": "plain" + }, + { + "output": "Automatic Feature Engineering\nDriverless AI performs automatic feature engineering as part of an\nexperiment's model building process. New features are created by\nperforming transformations and/or\ninteractions on the dataset columns. The\ndefault transformers picked up by Driverless depends on interpretability\nsettings of an experiment. For more interpretable models, simpler\ntransformations are applied. This can be seen in the preview of the\nexperiment. Feature engineering expert settings like include/exclude\ntransformers can be used to control the applied transformations. Transformers like binning, target encoding, weight of evidence,\nclustering, dimensionality reduction, autoencoders, TensorFlow, NLP BERT\nmodels, lags, aggregates, can be used to create Feature interactions. Feature creation and selection is evolutionary (based on variable\nimportance of previous iteration) in nature and uses\ngenetic algorithm to find the best set of feature transformations\nand model parameters for an experiment/dataset.", + "prompt_type": "plain" + }, + { + "output": "Launching Driverless AI\nDriverless AI is tested on Chrome and Firefox but is supported on all\nmajor browsers. For the best user experience, we recommend using Chrome. 1. After Driverless AI is installed and started, open a browser and\n navigate to :12345. 2. The first time you log in to Driverless AI, you will be prompted to\n read and accept the Evaluation Agreement. You must accept the terms\n before continuing. Review the agreement, then click I agree to these\n terms to continue. 3. Log in by entering unique credentials. For example:\n Username: h2oai Password: h2oai\n4. As with accepting the Evaluation Agreement, the first time you log\n in, you will be prompted to enter your License Key. Click the Enter\n License button, then paste the License Key into the License Key\n entry field. Click Save to continue. This license key will be saved\n in the host machine's /license folder. Upon successful completion, you will be ready to add datasets and run\nexperiments.", + "prompt_type": "plain" + }, + { + "output": "- Standard output (stdout) log files: These log files are the standard\n output for different servers (given as prefix). - Standard error (stderr) log files: These log files are standard\n error for different servers (given as prefix). - TMPDIR directories: These are temporary directories used by various\n packages or servers. - uploads directory: This directory is where files are uploaded by the\n web server. - funnels directory: This directory is where certain forked processes\n store stderr or stdout files. - sys directory: This directory is used by the system to perform\n various generic tasks. - startup_job_user directory: This directory is used by the system to\n perform various startup tasks. Note\nServer logs and pid files are located in separate directories\n(server_logs and pids, respectively). Resources\n[]\nThe Resources drop-down menu lets you view system information, download\nDAI clients, and view DAI-related tutorials and guides. - System Info: View information relating to hardware utilization and\n worker activity.", + "prompt_type": "plain" + }, + { + "output": "- Python Client: Download the Driverless AI Python client. For more\n information, see python_client. - R Client: Download the Driverless AI R client. For more information,\n see r_client. - MOJO Java Runtime: Download the MOJO Java Runtime. For more\n information, see Mojo_Pipeline. - MOJO Py Runtime: Download the MOJO Python Runtime. For more\n information, see cpp_scoring_pipeline. - MOJO R Runtime: Download the MOJO R Runtime. For more information,\n see cpp_scoring_pipeline. - Documentation: View the DAI documentation. - About: View version, current user, and license information for your\n Driverless AI install. - API Token: Click to retrieve an access token for authentication\n purposes. []\nUser Options\nTo view news and announcements relating to Driverless AI, click User in\nthe top navigation bar, then click Messages. To log out of Driverless\nAI, click User, then click Logout. You can also configure various\nuser-specific settings by clicking User Settings.", + "prompt_type": "plain" + }, + { + "output": "Data Connectors\nDriverless AI provides a number of data connectors for accessing\nexternal data sources. The following data connection types are enabled\nby default:\n- upload: The standard upload feature of Driverless AI. - file: Local file system or server file system. - hdfs: Hadoop file system. Remember to configure the HDFS config\n folder path and keytab. - s3: Amazon S3. Optionally configure secret and access key. - recipe_file: Custom recipe file upload. - recipe_url: Custom recipe upload via URL. Additionally, the following connections types can be enabled by\nmodifying the enabled_file_systems configuration option (Native\ninstalls) or environment variable (Docker image installs):\n- dtap: Blue Data Tap file system, remember to configure the DTap\n section\n- gcs: Google Cloud Storage, remember to configure\n gcs_path_to_service_account_json\n- gbq: Google Big Query, remember to configure\n gcs_path_to_service_account_json\n- hive: Hive Connector, remember to configure Hive\n- minio: Minio Cloud Storage, remember to configure\n secret and access key\n- snow: Snowflake Data Warehouse, remember to configure Snowflake\n credentials\n- kdb: KDB+ Time Series Database, remember to configure KDB\n credentials\n- azrbs: Azure Blob Storage, remember to configure Azure credentials\n- jdbc: JDBC Connector, remember to configure JDBC\n- h2o_drive: H2O Drive, remember to configure h2o_drive_endpoint_url\n- feature_store: Feature Store, remember to configure\n feature_store_endpoint_url below\nThese data sources are exposed in the form of the file systems, and each\nfile system is prefixed by a unique prefix.", + "prompt_type": "plain" + }, + { + "output": "Cloud Installation\n\nDriverless AI is available on the following cloud platforms:\n\n- H2O AI Cloud (HAIC)\n- AWS - Amazon Machine Image (AMI) \n- Azure \n- Google Cloud \n\nThe installation steps for AWS, Azure, and Google Cloud assume that you\nhave a license key for Driverless AI. For information on how to obtain a\nlicense key for Driverless AI, visit\nhttps://h2o.ai/o/try-driverless-ai/. Once obtained, you will be prompted\nto paste the license key into the Driverless AI UI when you first log\nin, or you can save it as a .sig file and place it in the license folder\nthat you will create during the installation process.\n\ninstall/aws install/azure install/google-compute", + "prompt_type": "plain" + }, + { + "output": "Sampling in Driverless AI\n\nData Sampling\n\nDriverless AI does not perform any type of data sampling unless the\ndataset is big or highly imbalanced (for improved accuracy). What is\nconsidered big is dependent on your accuracy setting and the", + "prompt_type": "plain" + }, + { + "output": "statistical_threshold_data_size_largeparameter in the :ref:`config.toml file ` or in the Expert Settings. You can see if the data will be sampled by viewing the Experiment Preview when you set up the experiment. In the experiment preview below, I can see that my data was sampled down to 5 million rows for the final model, and to 100k rows for the feature evolution part of the experiment. .. figure:: images/experiment-settings-summary.png :alt: If Driverless AI decides to sample the data based on these settings and the data size, then Driverless AI performs the following types of sampling at the start of (and/or throughout) the experiment: - Random sampling for regression problems - Stratified sampling for classification problems - Imbalanced sampling for binary problems where the target distribution is considered imbalanced and imbalanced sampling methods are enabled (imbalance_sampling_methodnot set to\"off\"``)\nImbalanced Model Sampling Methods\nImbalanced sampling techniques can help in binary classification use\ncases with highly imbalanced outcomes (churn, fraud, rare event\nmodeling, etc.)", + "prompt_type": "plain" + }, + { + "output": "ImbalancedLightGBMModelandImbalancedXGBoostGBMModel. Both perform repeated stratified sampling (bagging) inside their fit() method in an attempt to speed up modeling and to improve the resolution of the decision boundary between the two classes. Because these models are presented a training dataset with a different prior than the original data, they require a probability correction that is performed as part of postprocessing in the predict() method. When imbalanced sampling is enabled, no sampling is performed at the start of the experiment for either the feature evolution phase or the final model pipeline. Instead, sampling (with replacement) is performed during model fitting, and the model is presented a more balanced target class distribution than the original data. Because the sample is usually much smaller than the original data, this process can be repeated many times and each internal model's prediction can be averaged to improve accuracy (bagging). By default, the number of bags is automatically determined, but this value can be specified in expert settings (imbalance_sampling_number_of_bags=-1``\nmeans automatic).", + "prompt_type": "plain" + }, + { + "output": "\"over_under_sampling\", each bag can have a slightly different balance between minority and majority classes. There are multiple settings for imbalanced sampling: - Disabled (imbalance_sampling_method=\"off\", the default) - Automatic (imbalance_sampling_method=\"auto\"). A combination of the two methods below. - Under- and over-sample both minority and majority classes to reach roughly class balance in each sampled bag (imbalance_sampling_method=\"over_under_sampling\"). If original data has 500:10000 imbalance, this method could sample 1000:1500 samples for the first bag, 500:400 samples for the second bag, and so on. - Under-sample the majority class to reach exact class balance in each sampled bag (imbalance_sampling_method=\"under_sampling\"). Would create 500:500 samples per bag for the same example imbalance ratio . Each bag would then sample the 500 rows from each class with replacement, so each bag is still different. The amount of imbalance controls how aggressively imbalanced models are used for the experiment (ifimbalance_sampling_method is not \"off\"): - By default, imbalanced is defined as when the majority class is 5 times more common than the minority class (imbalance_ratio_sampling_threshold=5, configurable).", + "prompt_type": "plain" + }, + { + "output": "- By default, heavily imbalanced is defined as when the majority class is 25 times more common than the minority class (heavy_imbalance_ratio_sampling_threshold=25, configurable). In highly imbalanced cases, imbalanced models are used exclusively. Notes: - The binary imbalanced sampling techniques and settings described in this section apply only to the **Imbalanced Model** types listed above. - The data has to be large enough to enable imbalanced sampling: by default,imbalance_sampling_threshold_min_rows_originalis set to 100,000 rows. - Ifimbalance_sampling_number_of_bags=-1(automatic) andimbalance_sampling_method=\"auto\", the number of bags will be automatically determined by the experiment's accuracy settings and by the total size of all bags together, controlled byimbalance_sampling_max_multiple_data_size, which defaults to1. So all bags together will be no larger than 1x the original data by default. For an imbalance of 1:19, each balanced 1:1 sample would be as large as 10% of the data, so it would take up to 10 such 1:1 bags (or approximately 10 if the balance is different or slightly random) to reach that limit.", + "prompt_type": "plain" + }, + { + "output": "That's why the other limit of 3 (by default) for feature evolution exists. Feel free to adjust to your preferences. - Ifimbalance_sampling_number_of_bags=-1(automatic) andimbalance_sampling_method=\"over_under_sampling\"or\"under_sampling\", the number of bags will be equal to the experiment's accuracy settings (accuracy 7 will use 7 bags). - The upper limit for the number of bags can be specified separately for feature evolution (imbalance_sampling_max_number_of_bags_feature_evolution) and globally (i.e., final model) set by (imbalance_sampling_max_number_of_bags) and both will be strictly enforced. - Instead of balancing the target class distribution via default value ofimbalance_sampling_target_minority_fraction=-1(same as setting it to 0.5), one can control the target fraction of the minority class. So if the data starts with a 1:1000 imbalance and you wish to model with a 1:9 imbalance, specifyimbalance_sampling_target_minority_fraction=0.1.", + "prompt_type": "plain" + }, + { + "output": "Ensemble Learning in Driverless AI\nThis section describes Driverless AI's ensemble learning capabilities. Ensemble Method\nAn ensemble is a hierarchical composition of multiple models, where\nevery level in the hierarchy uses the output of the previous level as\ninput. The simplest ensemble is a 2-layer architecture with a single\nlinear model (the meta model or meta learner) combining the predictions\nfrom several first layer models (base models). This is the default\nensemble model in Driverless AI due to its robustness and linear\nproperties that allow Shapley contributions to be fully interpretable\neven for ensembles. By default, the meta learner is a linear blender that assigns\nnon-negative weights (that sum to 1) to all the base models. The weights\nare assigned at the model level and obtained using cross-validation (to\navoid overfitting of the meta learner). When making prediction on a test\nset, the predictions from all cross-validation models are averaged. For\nexample, if 2 models are ensembled together (e.g., a LightGBM model and\nan XGBoost model, each doing 4-fold cross validation), then the linear\nblender will find a weight for all 4 LightGBM models (e.g., 0.37) and a\nweight for all 4 XGBoost models (e.g., 0.63).", + "prompt_type": "plain" + }, + { + "output": "When Driverless AI ensembles a single model\n(level 1), then it is simply taking the average of the CV model\npredictions (the model itself is assigned a weight of 1). Ensemble Levels\nDriverless AI has multiple ensemble levels that are tied to the accuracy\nknob. As accuracy increases, the ensemble level increases. Ensemble level can also be controlled using\nEnsemble Level for Final Modeling Pipeline from\nthe Model settings of the expert settings panel. The following is a\ndescription of each ensemble level:\n- level 0: No ensemble, only a final single model. Cross validation is\n only used to determine the model validation performance. The final\n model is trained on the whole dataset. - level 1: Cross validation is performed for 1 model and the CV model\n predictions are ensembled. - level 2: Cross validation is performed for 2 models and the CV model\n predictions are ensembled. For example, Driverless AI may choose to\n ensemble an XGBoost model and a LightGBM model.", + "prompt_type": "plain" + }, + { + "output": "Driverless AI MLI Standalone Python Scoring Package\nThis package contains an exported model and Python 3.8 source code\nexamples for productionizing models built using H2O Driverless AI\nMachine Learning Interpretability (MLI) tool. This is only available for\ninterpreted models and can be downloaded by clicking the Scoring\nPipeline button on the Interpreted Models page. The files in this package let you obtain reason codes for a given row of\ndata in a couple of different ways:\n- From Python 3.8, you can import a scoring module and use it to\n transform and score on new data. - From other languages and platforms, you can use the TCP/HTTP scoring\n service bundled with this package to call into the scoring pipeline\n module through remote procedure calls (RPC). MLI Python Scoring Package Files\nThe scoring-pipeline-mli folder includes the following notable files:\n- example.py: An example Python script demonstrating how to import and\n interpret new records. - run_example.sh: Runs example.py (This also sets up a virtualenv with\n prerequisite libraries.)", + "prompt_type": "plain" + }, + { + "output": "This compares\n K-LIME and Driverless AI Shapley reason codes. - tcp_server.py: A standalone TCP server for hosting MLI services. - http_server.py: A standalone HTTP server for hosting MLI services. - run_tcp_server.sh: Runs the TCP scoring service (specifically,\n tcp_server.py). - run_http_server.sh: Runs HTTP scoring service (runs http_server.py). - example_client.py: An example Python script demonstrating how to\n communicate with the MLI server. - example_shapley.py: An example Python script demonstrating how to\n compare K-LIME and Driverless AI Shapley reason codes. - run_tcp_client.sh: Demonstrates how to communicate with the MLI\n service via TCP (runs example_client.py). - run_http_client.sh: Demonstrates how to communicate with the MLI\n service via HTTP (using curl). Quick Start\nThere are two methods for starting the MLI Standalone Scoring Pipeline. Quick Start - Recommended Method\nThis is the recommended method for running the MLI Scoring Pipeline.", + "prompt_type": "plain" + }, + { + "output": "- You want to use a quick start approach. Prerequisites\n- A valid Driverless AI license key. - A completed Driverless AI experiment. - Downloaded MLI Scoring Pipeline. Running the MLI Scoring Pipeline - Recommended\n1. Download the TAR SH version of Driverless AI from\n https://www.h2o.ai/download/. 2. Use bash to execute the download. This creates a new dai-nnn folder. 3. Change directories into the new Driverless AI folder. 4. Run the following to install the Python Scoring Pipeline for your\n completed Driverless AI experiment:\n5. Run the following command to run the included scoring pipeline\n example:\nQuick Start - Alternative Method\nThis section describes an alternative method for running the MLI\nStandalone Scoring Pipeline. This version requires Internet access. Note\nIf you use a scorer from a version prior to 1.10.4.1, you need to add\nexport SKLEARN_ALLOW_DEPRECATED_SKLEARN_PACKAGE_INSTALL=True prior to\ncreating the new scorer python environment, either in run_example.sh or\nin the same terminal where the shell scripts are executed.", + "prompt_type": "plain" + }, + { + "output": "Prerequisites\n- Valid Driverless AI license. - The scoring module and scoring service are supported only on Linux\n with Python 3.8 and OpenBLAS. - The scoring module and scoring service download additional packages\n at install time and require internet access. Depending on your\n network environment, you might need to set up internet access via a\n proxy. - Apache Thrift (to run the scoring service in TCP mode)\nExamples of how to install these prerequisites are below. Installing Python 3.8 on Ubuntu 16.10 or Later:\n sudo apt install python3.8 python3.8-dev python3-pip python3-dev \\\n python-virtualenv python3-virtualenv\nInstalling Python 3.8 on Ubuntu 16.04:\n sudo add-apt-repository ppa:deadsnakes/ppa\n sudo apt-get update\n sudo apt-get install python3.8 python3.8-dev python3-pip python3-dev \\\n python-virtualenv python3-virtualenv\nInstalling Conda 3.6:\n You can install Conda using either Anaconda or Miniconda. Refer to the\n links below for more information:\n - Anaconda - https://docs.anaconda.com/anaconda/install.html\n - Miniconda - https://docs.conda.io/en/latest/miniconda.html\nInstalling the Thrift Compiler\nRefer to Thrift documentation at\nhttps://thrift.apache.org/docs/BuildingFromSource for more information.", + "prompt_type": "plain" + }, + { + "output": "sudo ldconfig /usr/local/lib\nRunning the MLI Scoring Pipeline - Alternative Method\n1. On the MLI page, click the Scoring Pipeline button. 2. Unzip the scoring pipeline, and run the following examples in the\n scoring-pipeline-mli folder. MLI Python Scoring Module\nThe MLI scoring module is a Python module bundled into a standalone\nwheel file (name scoring*.whl). All the prerequisites for the scoring\nmodule to work correctly are listed in the 'requirements.txt' file. To\nuse the scoring module, all you have to do is create a Python\nvirtualenv, install the prerequisites, and then import and use the\nscoring module as follows:\n ----- See 'example.py' for complete example. -----\n from scoring_487931_20170921174120_b4066 import Scorer\n scorer = KLimeScorer() # Create instance. score = scorer.score_reason_codes([ # Call score_reason_codes()\n 7.416, # sepal_len\n 3.562, # sepal_wid\n 1.049, # petal_len\n 2.388, # petal_wid\n ])\nThe scorer instance provides the following methods:\n- score_reason_codes(list): Get K-LIME reason codes for one row (list\n of values).", + "prompt_type": "plain" + }, + { + "output": "-----\n virtualenv -p python3.8 env\n source env/bin/activate\n pip install --use-deprecated=legacy-resolver -r requirements.txt\n python example.py\nK-LIME vs Shapley Reason Codes\nThere are times when the K-LIME model score is not close to the\nDriverless AI model score. In this case it may be better to use reason\ncodes using the Shapley method on the Driverless AI model. Note that the\nreason codes from Shapley will be in the transformed feature space. To see an example of using both K-LIME and Driverless AI Shapley reason\ncodes in the same Python session, run:\n bash run_example_shapley.sh\nFor this batch script to succeed, MLI must be run on a Driverless AI\nmodel. If you have run MLI in standalone (external model) mode, there\nwill not be a Driverless AI scoring pipeline. If MLI was run with transformed features, the Shapley example scripts\nwill not be exported. You can generate exact reason codes directly from\nthe Driverless AI model scoring pipeline. MLI Scoring Service Overview\nThe MLI scoring service hosts the scoring module as a HTTP or TCP\nservice.", + "prompt_type": "plain" + }, + { + "output": "score_batch``. Both functions let you specify", + "prompt_type": "plain" + }, + { + "output": "pred_contribs=[True|False]`` to get MLI predictions (KLime/Shapley) on a\nnew dataset. See the example_shapley.py file for more information. MLI Scoring Service - TCP Mode (Thrift)\nThe TCP mode lets you use the scoring service from any language\nsupported by Thrift, including C, C++, C#, Cocoa, D, Dart, Delphi, Go,\nHaxe, Java, Node.js, Lua, perl, PHP, Python, Ruby and Smalltalk. To start the scoring service in TCP mode, you will need to generate the\nThrift bindings once, then run the server:\n ----- See 'run_tcp_server.sh' for complete example. -----\n thrift --gen py scoring.thrift\n python tcp_server.py --port=9090\nNote that the Thrift compiler is only required at build-time. It is not\na run time dependency, i.e. once the scoring services are built and\ntested, you do not need to repeat this installation process on the\nmachines where the scoring services are intended to be deployed. To call the scoring service, generate the Thrift bindings for your\nlanguage of choice, then make RPC calls via TCP sockets using Thrift's\nbuffered transport in conjunction with its binary protocol.", + "prompt_type": "plain" + }, + { + "output": "-----\n thrift --gen py scoring.thrift\n\n ----- See 'example_client.py' for complete example. -----\n socket = TSocket.TSocket('localhost', 9090)\n transport = TTransport.TBufferedTransport(socket)\n protocol = TBinaryProtocol.TBinaryProtocol(transport)\n client = ScoringService.Client(protocol)\n transport.open()\n row = Row()\n row.sepalLen = 7.416 # sepal_len\n row.sepalWid = 3.562 # sepal_wid\n row.petalLen = 1.049 # petal_len\n row.petalWid = 2.388 # petal_wid\n scores = client.score_reason_codes(row)\n transport.close()\nYou can reproduce the exact same result from other languages, e.g. Java:\n thrift --gen java scoring.thrift\n // Dependencies: \n // commons-codec-1.9.jar\n // commons-logging-1.2.jar\n // httpclient-4.4.1.jar\n // httpcore-4.4.1.jar\n // libthrift-0.10.0.jar\n // slf4j-api-1.7.12.jar\n import ai.h2o.scoring.Row;\n import ai.h2o.scoring.ScoringService;\n import org.apache.thrift.TException;\n import org.apache.thrift.protocol.TBinaryProtocol;\n import org.apache.thrift.transport.TSocket;\n import org.apache.thrift.transport.TTransport;\n import java.util.List;\n public class Main {\n public static void main(String[] args) {\n try {\n TTransport transport = new TSocket(\"localhost\", 9090);\n transport.open();\n ScoringService.Client client = new ScoringService.Client(\n new TBinaryProtocol(transport));\n Row row = new Row(7.642, 3.436, 6.721, 1.020);\n List scores = client.score_reason_codes(row);\n System.out.println(scores);\n transport.close();\n } catch (TException ex) {\n ex.printStackTrace();\n }\n }\n }\nScoring Service - HTTP Mode (JSON-RPC 2.0)\nThe HTTP mode lets you use the scoring service using plaintext JSON-RPC\ncalls.", + "prompt_type": "plain" + }, + { + "output": "MinIO Setup\n\nThis section provides instructions for configuring Driverless AI to work\nwith MinIO. Note that unlike S3, authentication must also be configured\nwhen the MinIO data connector is specified.\n\nNote: Depending on your Docker install version, use either the", + "prompt_type": "plain" + }, + { + "output": "docker run\n--runtime=nvidia(>= Docker 19.03) ornvidia-docker(< Docker 19.03) command when starting the Driverless AI Docker image. Usedocker\nversionto check which version of Docker you are using. Description of Configuration Attributes --------------------------------------- -minio_endpoint_url: The endpoint URL that will be used to access MinIO. -minio_access_key_id: The MinIO access key. -minio_secret_access_key: The MinIO secret access key. -minio_skip_cert_verification: If this is set to true, then MinIO connector will skip certificate verification. This is set to false by default. -enabled_file_systems: The file systems you want to enable. This must be configured in order for data connectors to function properly. Enable MinIO with Authentication -------------------------------- .. container:: tabs .. group-tab:: Docker Image Installs This example enables the MinIO data connector with authentication by passing an endpoint URL, access key ID, and an access key.", + "prompt_type": "plain" + }, + { + "output": "This lets you reference data stored in MinIO directly using the endpoint URL, for example: http://\\ //datasets/iris.csv. .. code:: bash nvidia-docker run \\ --shm-size=256m \\ --add-host name.node:172.16.2.186 \\ -e DRIVERLESS_AI_ENABLED_FILE_SYSTEMS=\"file,minio\" \\ -e DRIVERLESS_AI_MINIO_ENDPOINT_URL=\"\" -e DRIVERLESS_AI_MINIO_ACCESS_KEY_ID=\"\" \\ -e DRIVERLESS_AI_MINIO_SECRET_ACCESS_KEY=\"\" \\ -e DRIVERLESS_AI_MINIO_SKIP_CERT_VERIFICATION=\"false\" \\ -p 12345:12345 \\ --init -it --rm \\ -v /tmp/dtmp/:/tmp \\ -v /tmp/dlog/:/log \\ -v /tmp/dlicense/:/license \\ -v /tmp/ddata/:/data \\ -u $(id -u):$(id -g) \\ h2oai/dai-ubi8-x86_64:|tag| .. container:: group-tab Docker Image with the config.toml This example shows how to configure MinIO options in the config.toml file, and then specify that file when starting Driverless AI in Docker.", + "prompt_type": "plain" + }, + { + "output": "Python Client Admin API\nThe following sections describe Driverless AI's Admin API. Note\nThe Admin API is currently only available through the DAI Python client. Understanding the Admin API\nThe Driverless AI Admin API lets you manage entities created by other\nusers by providing options for listing, deleting, or transferring them. The primary component of the Admin API is the new user role called\nAdmin. Driverless AI currently supports only local Admin user\nauthorization, which is defined through the local_administrator_list\nconfig parameter. For example, to promote UserA and UserB to\nadministrator, add the following config override to the config.toml\nfile:\n local_administrator_list = ['UserA', 'UserB']\nAdmin API methods\nThe following is a list of DAI Admin API methods. Note\nThe following examples assume that you have initialized the h2oai Python\nclient and are logged in with a user that has the Admin role. Listing entities\nTo list the datasets of a particular user, use the following client\nmethod:\n # cli = h2oai_client.Client(...)\n cli.admin.list_entities(\n username=\"other-user-name\",\n kind=\"dataset\",\n )\nThe following is a list of entities that can be listed with the\npreceding method:\n- model: Experiments\n- dataset: Datasets\n- project: Projects\n- deployment: Deployments\n- interpretation: MLI interpretations\n- model_diagnostic: Model diagnostics\nDeleting entities\nIf you know the kind and key associated with an entity, you can delete\nthat entity with the following client method:\n # cli = h2oai_client.Client(...)\n cli.admin.delete_entity(\n username=\"other-user-name\",\n kind=\"model\",\n key=\"model-key\",\n )\nNote\nAn entity's kind and key can be obtained through the listing API.", + "prompt_type": "plain" + }, + { + "output": "Linux Docker Images\nTo simplify local installation, Driverless AI is provided as a Docker\nimage for the following system combinations:\n ---------------------------------------------------------------------\n Host OS Docker Version Host Architecture Min Mem\n --------------------------- -------------- ----------------- --------\n Ubuntu 16.04 or later Docker CE x86_64 64 GB\n RHEL or CentOS 7.4 or later Docker CE x86_64 64 GB\n NVIDIA DGX Registry x86_64 \n ---------------------------------------------------------------------\nNote: CUDA 11.2.2 or later with NVIDIA drivers >= is recommended (GPU\nonly). Note that if you are using K80 GPUs, the minimum required NVIDIA\ndriver version is 450.80.02. For the best performance, including GPU support, use nvidia-docker. For\na lower-performance experience without GPUs, use regular docker (with\nthe same docker image). These installation steps assume that you have a license key for\nDriverless AI.", + "prompt_type": "plain" + }, + { + "output": "--shm-size=2g`` is recommended for Driverless AI docker command.\n\nubuntu rhel nvidia-dgx", + "prompt_type": "plain" + }, + { + "output": "Install the Driverless AI AWS Marketplace AMI\nA Driverless AI AMI is available in the AWS Marketplace beginning with\nDriverless AI version 1.5.2. This section describes how to install and\nrun Driverless AI through the AWS Marketplace. Environment\n+---------------------------+--------------+---------+----------------+\n| Provider | Instance | Num | Suitable for |\n| | Type | GPUs | |\n+===========================+==============+=========+================+\n| AWS | p2.xlarge | 1 | E |\n| | | | |\n| - | ---- | ---- | xperimentation |\n| - | -----------+ | ------+ | |\n| - | | | ---- |\n| - | p2.8xlarge | 8 | -------------+ |\n| - | | | |\n| - | ---- | ---- | Serious |\n| - | -----------+ | ------+ | use |\n| - | | | |\n| | | 16 | ---- |\n| | p2.16xlarge | | -------------+ |\n| | | ---- | |\n| | ---- | ------+ | Serious |\n| | -----------+ | | use |\n| | | 1 | |\n| | p3.2xlarge | | ---- |\n| | | ---- | -------------+ |\n| | ---- | ------+ | |\n| | -----------+ | | E |\n| | | 4 | |\n| | p3.8xlarge | | xperimentation |\n| | | ---- | |\n| | ---- | ------+ | ---- |\n| | -----------+ | | -------------+ |\n| | | 8 | |\n| | | | Serious |\n| | p3.16xlarge | ---- | use |\n| | | ------+ | |\n| | ---- | | ---- |\n| | -----------+ | 1 | -------------+ |\n| | | | |\n| | g3.4xlarge | ---- | Serious |\n| | | ------+ | use |\n| | ---- | | |\n| | -----------+ | 2 | ---- |\n| | | | -------------+ |\n| | g3.8xlarge | ---- | |\n| | | ------+ | E |\n| | ---- | | |\n| | -----------+ | 4 | xperimentation |\n| | | | |\n| | | | ---- |\n| | g3.16xlarge | | -------------+ |\n| | | | |\n| | | | E |\n| | | | |\n| | | | xperimentation |\n| | | | |\n| | | | ---- |\n| | | | -------------+ |\n| | | | |\n| | | | Serious |\n| | | | use |\n+---------------------------+--------------+---------+----------------+\nInstallation Procedure\n1.", + "prompt_type": "plain" + }, + { + "output": "2. Search for Driverless AI. 3. Select the version of Driverless AI that you want to install. 4. Scroll down to review/edit your region and the selected\n infrastructure and pricing. 5. Return to the top and select Continue to Subscribe. 6. Review the subscription, then click Continue to Configure. 7. If desired, change the Fullfillment Option, Software Version, and\n Region. Note that this page also includes the AMI ID for the\n selected software version. Click Continue to Launch when you are\n done. 8. Review the configuration and choose a method for launching\n Driverless AI. Click the Usage Instructions button in AWS to review\n your Driverless AI username and password. Scroll down to the bottom\n of the page and click Launch when you are done. You will receive a \"Success\" message when the image launches\nsuccessfully. []\nStarting Driverless AI\nThis section describes how to start Driverless AI after the Marketplace\nAMI has been successfully launched. 1. Navigate to the EC2 Console.", + "prompt_type": "plain" + }, + { + "output": "Select your instance. 3. Open another browser and launch Driverless AI by navigating to\n https://\\ :12345. 4. Sign in to Driverless AI with the username h2oai and use the AWS\n InstanceID as the password. You will be prompted to enter your\n Driverless AI license key when you log in for the first time. Stopping the EC2 Instance\nThe EC2 instance will continue to run even when you close the\naws.amazon.com portal. To stop the instance:\n1. On the EC2 Dashboard, click the Running Instances link under the\n Resources section. 2. Select the instance that you want to stop. 3. In the Actions drop down menu, select Instance State > Stop. 4. A confirmation page will display. Click Yes, Stop to stop the\n instance. Upgrading the Driverless AI Marketplace Image\nNote that the first offering of the Driverless AI Marketplace image was\n1.5.2. As such, it is only possible to upgrade to versions greater than\nthat. Perform the following steps if you are upgrading to a Driverless AI\nMarketeplace image version greater than 1.5.2.", + "prompt_type": "plain" + }, + { + "output": "dai_NEWVERSION.debbelow with the new Driverless AI version (for example,dai_1.5.4_amd64.deb``).\nNote that this upgrade process inherits the service user and group from\n/etc/dai/User.conf and /etc/dai/Group.conf. You do not need to manually\nspecify the DAI_USER or DAI_GROUP environment variables during an\nupgrade.\n\n # Stop Driverless AI.\n sudo systemctl stop dai\n\n # Make a backup of /opt/h2oai/dai/tmp directory at this time.\n\n # Upgrade Driverless AI.\n sudo dpkg -i dai_NEWVERSION.deb\n sudo systemctl daemon-reload\n sudo systemctl start dai", + "prompt_type": "plain" + }, + { + "output": "exclusive_mode------------------ .. container:: dropdown **Exclusive level of access to node resources** There are three levels of access: - safe: this level assumes that there might be another experiment also running on same node. - moderate: this level assumes that there are no other experiments or tasks running on the same node, but still only uses physical core counts. - max: this level assumes that there is absolutly nothing else running on the node except the experiment The default level is \"safe\" and the equivalent config.toml parameter isexclusive_mode`. If :ref:`multinode is\n enabled, this option has no effect, unless\n worker_remote_processors=1 when it will still be applied. Each\n exclusive mode can be chosen, and then fine-tuned using each expert\n settings. Changing the exclusive mode will reset all exclusive mode\n related options back to default and then re-apply the specific rules\n for the new mode, which will undo any fine-tuning of expert options\n that are part of exclusive mode rules.", + "prompt_type": "plain" + }, + { + "output": "max_cores``\n\nNumber of Cores to Use\n\nSpecify the number of cores to use per experiment. Note that if you\nspecify 0, all available cores will be used. Lower values can reduce\nmemory usage but might slow down the experiment. This value defaults to\n0(all). One can also set it using the environment variable\nOMP_NUM_THREADS or OPENBLAS_NUM_THREADS (e.g., in bash: 'export\nOMP_NUM_THREADS=32' or 'export OPENBLAS_NUM_THREADS=32')", + "prompt_type": "plain" + }, + { + "output": "max_fit_cores``\n\nMaximum Number of Cores to Use for Model Fit\n\nSpecify the maximum number of cores to use for a model's fit call. Note\nthat if you specify 0, all available cores will be used. This value\ndefaults to 10.", + "prompt_type": "plain" + }, + { + "output": "use_dask_cluster-------------------- .. container:: dropdown **If full dask cluster is enabled, use full cluster** Specify whether to use full multinode distributed cluster (True) or single-node dask (False). In some cases, using entire cluster can be inefficient. E.g. several DGX nodes can be more efficient, if used one DGX at a time for medium-sized data. The equivalent config.toml parameter isuse_dask_cluster``.", + "prompt_type": "plain" + }, + { + "output": "max_predict_cores``\n\nMaximum Number of Cores to Use for Model Predict\n\nSpecify the maximum number of cores to use for a model's predict call.\nNote that if you specify 0, all available cores will be used. This value\ndefaults to 0(all).", + "prompt_type": "plain" + }, + { + "output": "max_predict_cores_in_dai``\n\nMaximum Number of Cores to Use for Model Transform and Predict When\nDoing MLI, AutoDoc\n\nSpecify the maximum number of cores to use for a model's transform and\npredict call when doing operations in the Driverless AI MLI GUI and the\nDriverless AI R and Python clients. Note that if you specify 0, all\navailable cores will be used. This value defaults to 4.", + "prompt_type": "plain" + }, + { + "output": "batch_cpu_tuning_max_workers``\n\nTuning Workers per Batch for CPU\n\nSpecify the number of workers used in CPU mode for tuning. A value of 0\nuses the socket count, while a value of -1 uses all physical cores\ngreater than or equal to 1. This value defaults to 0(socket count).", + "prompt_type": "plain" + }, + { + "output": "cpu_max_workers``\n\nNumber of Workers for CPU Training\n\nSpecify the number of workers used in CPU mode for training:\n\n- 0: Use socket count (Default)\n- -1: Use all physical cores >= 1 that count", + "prompt_type": "plain" + }, + { + "output": "num_gpus_per_experiment``\n\n#GPUs/Experiment\n\nSpecify the number of GPUs to use per experiment. A value of -1\n(default) specifies to use all available GPUs. Must be at least as large\nas the number of GPUs to use per model (or -1). In multinode context\nwhen using dask, this refers to the per-node value.", + "prompt_type": "plain" + }, + { + "output": "min_num_cores_per_gpu``\n\nNum Cores/GPU\n\nSpecify the number of CPU cores per GPU. In order to have a sufficient\nnumber of cores per GPU, this setting limits the number of GPUs used.\nThis value defaults to 2.", + "prompt_type": "plain" + }, + { + "output": "num_gpus_per_model---------------------- .. container:: dropdown **#GPUs/Model** Specify the number of GPUs to user per model. The equivalent config.toml parameter isnum_gpus_per_model`` and the default value\n\n is 1. Currently num_gpus_per_model other than 1 disables GPU\n locking, so is only recommended for single experiments and single\n users. Setting this parameter to -1 means use all GPUs per model. In\n all cases, XGBoost tree and linear models use the number of GPUs\n specified per model, while LightGBM and Tensorflow revert to using 1\n GPU/model and run multiple models on multiple GPUs. FTRL does not\n use GPUs. Rulefit uses GPUs for parts involving obtaining the tree\n using LightGBM. In multinode context when using dask, this parameter\n refers to the per-node value.", + "prompt_type": "plain" + }, + { + "output": "num_gpus_for_prediction--------------------------- .. container:: dropdown **Num. of GPUs for Isolated Prediction/Transform** Specify the number of GPUs to use forpredictfor models andtransformfor transformers when running outside offit/fit_transform. Ifpredictortransformare called in the same process asfit/fit_transform, the number of GPUs will match. New processes will use this count for applicable models and transformers. Note that enablingtensorflow_nlp_have_gpus_in_productionwill override this setting for relevant TensorFlow NLP transformers. The equivalent config.toml parameter isnum_gpus_for_prediction`` and the default value is\n\n \"0\".\n\n Note: When GPUs are used, TensorFlow, PyTorch models and\n transformers, and RAPIDS always predict on GPU. And RAPIDS requires\n Driverless AI python scoring package also to be used on GPUs. In\n multinode context when using dask, this refers to the per-node\n value.", + "prompt_type": "plain" + }, + { + "output": "gpu_id_start---------------- .. container:: dropdown **GPU Starting ID** Specify Which gpu_id to start with. If using CUDA_VISIBLE_DEVICES=... to control GPUs (preferred method), gpu_id=0 is the first in that restricted list of devices. For example, ifCUDA_VISIBLE_DEVICES='4,5'thengpu_id_start=0`` will refer to\n device #4. From expert mode, to run 2 experiments, each on a distinct GPU out\n of 2 GPUs, then:\n - Experiment#1: num_gpus_per_model=1, num_gpus_per_experiment=1,\n gpu_id_start=0\n - Experiment#2: num_gpus_per_model=1, num_gpus_per_experiment=1,\n gpu_id_start=1\n From expert mode, to run 2 experiments, each on a distinct GPU out\n of 8 GPUs, then:\n - Experiment#1: num_gpus_per_model=1, num_gpus_per_experiment=4,\n gpu_id_start=0\n - Experiment#2: num_gpus_per_model=1, num_gpus_per_experiment=4,\n gpu_id_start=4\n To run on all 4 GPUs/model, then\n - Experiment#1: num_gpus_per_model=4, num_gpus_per_experiment=4,\n gpu_id_start=0\n - Experiment#2: num_gpus_per_model=4, num_gpus_per_experiment=4,\n gpu_id_start=4\n If num_gpus_per_model!=1, global GPU locking is disabled.", + "prompt_type": "plain" + }, + { + "output": "assumed_simultaneous_dt_forks_munging``\n\nAssumed/Expected number of munging forks\n\nExpected maximum number of forks, used to ensure datatable doesn't\noverload system. For actual use beyond this value, system will start to\nhave slow-down issues. THe default value is 3.", + "prompt_type": "plain" + }, + { + "output": "max_max_dt_threads_munging``\n\nMaximum of threads for datatable for munging\n\nMaximum number of threads for datatable for munging.", + "prompt_type": "plain" + }, + { + "output": "max_dt_threads_munging``\n\nMax Number of Threads to Use for datatable and OpenBLAS for Munging and\nModel Training\n\nSpecify the maximum number of threads to use for datatable and OpenBLAS\nduring data munging (applied on a per process basis):\n\n- 0 = Use all threads\n- -1 = Automatically select number of threads (Default)", + "prompt_type": "plain" + }, + { + "output": "max_dt_threads_readwrite``\n\nMax Number of Threads to Use for datatable Read and Write of Files\n\nSpecify the maximum number of threads to use for datatable during data\nreading and writing (applied on a per process basis):\n\n- 0 = Use all threads\n- -1 = Automatically select number of threads (Default)", + "prompt_type": "plain" + }, + { + "output": "max_dt_threads_stats_openblas``\n\nMax Number of Threads to Use for datatable Stats and OpenBLAS\n\nSpecify the maximum number of threads to use for datatable stats and\nOpenBLAS (applied on a per process basis):\n\n- 0 = Use all threads\n- -1 = Automatically select number of threads (Default)", + "prompt_type": "plain" + }, + { + "output": "allow_reduce_features_when_failure``\nWhether to reduce features when model fails (GPU OOM Protection)\nBig models (on big data or with lot of features) can run out of memory\non GPUs. This option is primarily useful for avoiding model building\nfailure due to GPU Out Of Memory (OOM). Currently is applicable to all\nnon-dask XGBoost models (i.e. GLMModel, XGBoostGBMModel,\nXGBoostDartModel, XGBoostRFModel),during normal fit or when using\nOptuna. This is acheived by reducing features until model does not fail. For\nexample, If XGBoost runs out of GPU memory, this is detected, and\n(regardless of setting of skip_model_failures), we perform feature\nselection using XGBoost on subsets of features. The dataset is\nprogressively reduced by factor of 2 with more models to cover all\nfeatures. This splitting continues until no failure occurs. Then all\nsub-models are used to estimate variable importance by absolute\ninformation gain, in order to decide which features to include. Finally,\na single model with the most important features is built using the\nfeature count that did not lead to OOM.", + "prompt_type": "plain" + }, + { + "output": "reduce_repeats_when_failure``\n\nNumber of repeats for models used for feature selection during failure\nrecovery\n\nWith\nallow_reduce_features_when_failure ,\nthis controls how many repeats of sub-models are used for feature\nselection. A single repeat only has each sub-model consider a single\nsub-set of features, while repeats shuffle hich features are considered\nallowing more chance to find important interactions. More repeats can\nlead to higher accuracy. The cost of this option is proportional to the\nrepeat count. The default value is 1.", + "prompt_type": "plain" + }, + { + "output": "fraction_anchor_reduce_features_when_failure``\n\nFraction of features treated as anchor for feature selection during\nfailure recovery\n\nWith\nallow_reduce_features_when_failure ,\nthis controls the fraction of features treated as an anchor that are\nfixed for all sub-models. Each repeat gets new anchors. For tuning and\nevolution, the probability depends upon any prior importance (if\npresent) from other individuals, while final model uses uniform\nprobability for anchor features. The default fraction is 0.1.", + "prompt_type": "plain" + }, + { + "output": "xgboost_reduce_on_errors_list``\n\nErrors From XGBoost That Trigger Reduction of Features\n\nError strings from XGBoost that are used to trigger re-fit on reduced\nsub-models. See allow_reduce_features_when_failure.", + "prompt_type": "plain" + }, + { + "output": "lightgbm_reduce_on_errors_list``\n\nErrors From LightGBM That Trigger Reduction of Features\n\nError strings from LightGBM that are used to trigger re-fit on reduced\nsub-models. See allow_reduce_features_when_failure.", + "prompt_type": "plain" + }, + { + "output": "num_gpus_per_hyperopt_dask``\n\nGPUs / HyperOptDask\n\nSpecify the number of GPUs to use per model hyperopt training task. To\nuse all GPUs, set this to -1. For example, when this is set to -1 and\nthere are 4 GPUs available, all of them can be used for the training of\na single model across a Dask cluster. Ignored if GPUs are disabled or if\nthere are no GPUs on system. In multinode context, this refers to the\nper-node value.", + "prompt_type": "plain" + }, + { + "output": "detailed_traces``\n\nEnable Detailed Traces\n\nSpecify whether to enable detailed tracing in Driverless AI trace when\nrunning an experiment. This is disabled by default.", + "prompt_type": "plain" + }, + { + "output": "debug_log``\n\nEnable Debug Log Level\n\nIf enabled, the log files will also include debug logs. This is disabled\nby default.", + "prompt_type": "plain" + }, + { + "output": "log_system_info_per_experiment``\n\nEnable Logging of System Information for Each Experiment\n\nSpecify whether to include system information such as CPU, GPU, and disk\nspace at the start of each experiment log. Note that this information is\nalready included in system logs. This is enabled by default.", + "prompt_type": "plain" + }, + { + "output": "AutoDoc Settings\n\nThis section includes settings that can be used to configure AutoDoc.", + "prompt_type": "plain" + }, + { + "output": "make_autoreport``\n\nMake AutoDoc\n\nSpecify whether to create an AutoDoc for the experiment after it has\nfinished running. This is enabled by default.", + "prompt_type": "plain" + }, + { + "output": "autodoc_report_name``\n\nAutoDoc Name\n\nSpecify a name for the AutoDoc report. This is set to \"report\" by\ndefault.", + "prompt_type": "plain" + }, + { + "output": "autodoc_template``\n\nAutoDoc Template Location\n\nSpecify a path for the AutoDoc template:\n\n- To generate a custom AutoDoc template, specify the full path to your\n custom template.\n- To generate the standard AutoDoc, specify the default value for this\n setting.", + "prompt_type": "plain" + }, + { + "output": "autodoc_output_type``\n\nAutoDoc File Output Type\n\nSpecify the AutoDoc output type. Choose from the following file types:\n\n- docx (Default)\n- md", + "prompt_type": "plain" + }, + { + "output": "autodoc_subtemplate_type``\n\nAutoDoc SubTemplate Type\n\nSpecify the type of sub-templates to use. Choose from the following:\n\n- auto (Default)\n- md\n- docx", + "prompt_type": "plain" + }, + { + "output": "autodoc_max_cm_size``\n\nConfusion Matrix Max Number of Classes\n\nSpecify the maximum number of classes in the confusion matrix. This\nvalue defaults to 10.", + "prompt_type": "plain" + }, + { + "output": "autodoc_num_features``\n\nNumber of Top Features to Document\n\nSpecify the number of top features to display in the document. To\ndisable this setting, specify -1. This is set to 50 by default.", + "prompt_type": "plain" + }, + { + "output": "autodoc_min_relative_importance``\n\nMinimum Relative Feature Importance Threshold\n\nSpecify the minimum relative feature importance in order for a feature\nto be displayed. This value must be a float >= 0 and <= 1. This is set\nto 0.003 by default.", + "prompt_type": "plain" + }, + { + "output": "autodoc_include_permutation_feature_importance``\n\nPermutation Feature Importance\n\nSpecify whether to compute permutation-based feature importance. This is\ndisabled by default.", + "prompt_type": "plain" + }, + { + "output": "autodoc_feature_importance_num_perm``\n\nNumber of Permutations for Feature Importance\n\nSpecify the number of permutations to make per feature when computing\nfeature importance. This is set to 1 by default.", + "prompt_type": "plain" + }, + { + "output": "autodoc_feature_importance_scorer``\n\nFeature Importance Scorer\n\nSpecify the name of the scorer to be used when calculating feature\nimportance. Leave this setting unspecified to use the default scorer for\nthe experiment.", + "prompt_type": "plain" + }, + { + "output": "autodoc_pd_max_rows``\n\nPDP Max Number of Rows\n\nSpecify the number of rows for Partial Dependence Plots.", + "prompt_type": "plain" + }, + { + "output": "autodoc_pd_max_runtime``\n\nPDP Max Runtime in Seconds\n\nSpecify the maximum number of seconds Partial Dependency computation can\ntake when generating a report. Set this value to -1 to disable the time\nlimit. This is set to 20 seconds by default.", + "prompt_type": "plain" + }, + { + "output": "autodoc_out_of_range``\n\nPDP Out of Range\n\nSpecify the number of standard deviations outside of the range of a\ncolumn to include in partial dependence plots. This shows how the model\nreacts to data it has not seen before. This is set to 3 by default.", + "prompt_type": "plain" + }, + { + "output": "autodoc_num_rows``\n\nICE Number of Rows\n\nSpecify the number of rows to include in PDP and ICE plots if individual\nrows are not specified. This is set to 0 by default.", + "prompt_type": "plain" + }, + { + "output": "autodoc_population_stability_index``\n\nPopulation Stability Index\n\nSpecify whether to include a population stability index if the\nexperiment is a binary classification or regression problem. This is\ndisabled by default.", + "prompt_type": "plain" + }, + { + "output": "autodoc_population_stability_index_n_quantiles``\n\nPopulation Stability Index Number of Quantiles\n\nSpecify the number of quantiles to use for the population stability\nindex. This is set to 10 by default.", + "prompt_type": "plain" + }, + { + "output": "autodoc_prediction_stats``\n\nPrediction Statistics\n\nSpecify whether to include prediction statistics information if the\nexperiment is a binary classification or regression problem. This value\nis disabled by default.", + "prompt_type": "plain" + }, + { + "output": "autodoc_prediction_stats_n_quantiles``\n\nPrediction Statistics Number of Quantiles\n\nSpecify the number of quantiles to use for prediction statistics. This\nis set to 20 by default.", + "prompt_type": "plain" + }, + { + "output": "autodoc_response_rate``\n\nResponse Rates Plot\n\nSpecify whether to include response rates information if the experiment\nis a binary classification problem. This is disabled by default.", + "prompt_type": "plain" + }, + { + "output": "autodoc_response_rate_n_quantiles``\n\nResponse Rates Plot Number of Quantiles\n\nSpecify the number of quantiles to use for response rates information.\nThis is set to 10 by default.", + "prompt_type": "plain" + }, + { + "output": "autodoc_gini_plot``\n\nShow GINI Plot\n\nSpecify whether to show the GINI plot. This is disabled by default.", + "prompt_type": "plain" + }, + { + "output": "autodoc_enable_shapley_values``\n\nEnable Shapley Values\n\nSpecify whether to show Shapley values results in the AutoDoc. This is\nenabled by default.", + "prompt_type": "plain" + }, + { + "output": "autodoc_data_summary_col_num``\n\nNumber of Features in Data Summary Table\n\nSpecify the number of features to be shown in the data summary table.\nThis value must be an integer. To show all columns, specify any value\nlower than 1. This is set to -1 by default.", + "prompt_type": "plain" + }, + { + "output": "autodoc_list_all_config_settings``\n\nList All Config Settings\n\nSpecify whether to show all config settings. If this is disabled, only\nsettings that have been changed are listed. All settings are listed when\nenabled. This is disabled by default.", + "prompt_type": "plain" + }, + { + "output": "autodoc_keras_summary_line_length``\n\nKeras Model Architecture Summary Line Length\n\nSpecify the line length of the Keras model architecture summary. This\nvalue must be either an integer greater than 0 or -1. To use the default\nline length, set this value to -1 (default).", + "prompt_type": "plain" + }, + { + "output": "autodoc_transformer_architecture_max_lines``\n\nNLP/Image Transformer Architecture Max Lines\n\nSpecify the maximum number of lines shown for advanced transformer\narchitecture in the Feature section. Note that the full architecture can\nbe found in the appendix.", + "prompt_type": "plain" + }, + { + "output": "autodoc_full_architecture_in_appendix``\n\nAppendix NLP/Image Transformer Architecture\n\nSpecify whether to show the full NLP/Image transformer architecture in\nthe appendix. This is disabled by default.", + "prompt_type": "plain" + }, + { + "output": "autodoc_coef_table_appendix_results_table``\n\nFull GLM Coefficients Table in the Appendix\n\nSpecify whether to show the full GLM coefficient table(s) in the\nappendix. This is disabled by default.", + "prompt_type": "plain" + }, + { + "output": "autodoc_coef_table_num_models``\n\nGLM Coefficient Tables Number of Models\n\nSpecify the number of models for which a GLM coefficients table is shown\nin the AutoDoc. This value must be -1 or an integer >= 1. Set this value\nto -1 to show tables for all models. This is set to 1 by default.", + "prompt_type": "plain" + }, + { + "output": "autodoc_coef_table_num_folds``\n\nGLM Coefficient Tables Number of Folds Per Model\n\nSpecify the number of folds per model for which a GLM coefficients table\nis shown in the AutoDoc. This value must be be -1 (default) or an\ninteger >= 1 (-1 shows all folds per model).", + "prompt_type": "plain" + }, + { + "output": "autodoc_coef_table_num_coef``\n\nGLM Coefficient Tables Number of Coefficients\n\nSpecify the number of coefficients to show within a GLM coefficients\ntable in the AutoDoc. This is set to 50 by default. Set this value to -1\nto show all coefficients.", + "prompt_type": "plain" + }, + { + "output": "autodoc_coef_table_num_classes``\n\nGLM Coefficient Tables Number of Classes\n\nSpecify the number of classes to show within a GLM coefficients table in\nthe AutoDoc. Set this value to -1 to show all classes. This is set to 9\nby default.", + "prompt_type": "plain" + }, + { + "output": "autodoc_num_histogram_plots``\n\nNumber of Histograms to Show\n\nSpecify the number of top features for which to show histograms. This is\nset to 10 by default.", + "prompt_type": "plain" + }, + { + "output": "Driverless AI MOJO Scoring Pipeline - C++ Runtime with Python (Supports Shapley) and R Wrappers\nThe C++ Scoring Pipeline is provided as R and Python packages for the\nprotobuf-based MOJO2 protocol. Use your preferred method once the MOJO\nScoring Pipeline has been built. Notes:\n - These scoring pipelines are currently not available for RuleFit\n models. - Unlike the Java Runtime, TensorFlow/Bert are supported by C++\n Runtime MOJO. - You can have Driverless AI attempt to reduce the size of the MOJO\n scoring pipeline when the experiment is being built by enabling\n the Reduce MOJO Size expert setting also\n see . - Shapley contributions come with the downloaded experiment MOJO\n scoring pipeline. See cpp_scoring_shapley for scoring example. - Shapley contributions for transformed\n features and original features are currently available for XGBoost\n (GBM, GLM, RF, DART), LightGBM, Zero-Inflated, Imbalanced and\n DecisionTree models (and their ensemble).", + "prompt_type": "plain" + }, + { + "output": "libopenblas-dev, run the following command: :: sudo apt install libopenblas-dev .. _cpp-mojo-downloads: Downloads --------- This section contains download links for the C++ MOJO runtime and its Python and R wrappers. **Python:** - :mojo-runtime38:C++ MOJO runtime (Python 3.8) - :mojo-runtime37:C++ MOJO runtime (Python 3.7) - :mojo-runtime36:C++ MOJO runtime (Python 3.6) **R**: - :daimojo-r:`C++ MOJO runtime ` .. note:: The Python and R packages can also be downloaded from within the Driverless AI application by clicking **Resources**, and then clicking **MOJO Py Runtime** or **MOJO R Runtime** from the drop-down menu. Examples -------- The following examples show how to use the R and Python APIs of the C++ MOJO runtime.", + "prompt_type": "plain" + }, + { + "output": "--with-prediction-interval.. code:: bash java -Xmx5g -Dai.h2o.mojos.runtime.license.file=license.file -jar mojo2-runtime.jar --with-prediction-interval pipeline.mojo example.csv .. _cpp_scoring_shapley: C++ MOJO runtime Shapley values support --------------------------------------- The C++ MOJO runtime and its Python wrapper support Shapley contributions for transformed features and original features. The following example demonstrates how to retrieve Shapley contributions for transformed and original features when making predictions: .. code:: python import datatable as dt import daimojo X = dt.Frame(\"example.jay\") m = daimojo.model(\"pipeline.mojo\") m.predict(X) # Prediction call that returns regular predictions m.predict(X, pred_contribs=True) # Prediction call that returns Shapley contributions for transformed features m.predict(X, pred_contribs=True, pred_contribs_original=True) # Prediction call that returns Shapley contributions for original features .. note:: - Settingpred_contribs_original=Truerequires thatpred_contribsis also set toTrue.", + "prompt_type": "plain" + }, + { + "output": "Python Client\n\nThis section describes how to install the Driverless AI Python client.\nSeveral end-to-end examples that demonstrate how to use the client are\nalso provided. Additional examples are available in the Driverless AI\nCode Samples and Tutorials GitHub repository.\n\nFor more information on the Python client, see the Driverless AI Python\nclient documentation.\n\nNote\n\nThe Python client does not currently support the following Driverless AI\nfeatures:\n\n- Diagnostics\n- Deployments\n- MLI Bring Your Own Recipe (BYOR)\n- mTLS authentication\n\npython_install_client python_client_admin\nexamples/credit_card/credit_card_default.ipynb\nexamples/walmart_timeseries_experiment/training_timeseries_model.ipynb\nexamples/stock_timeseries_experiment/demo_stock_timeseries.ipynb\nexamples/nlp_airline_sentiment/demo_nlp_airline_sentiment.ipynb", + "prompt_type": "plain" + }, + { + "output": "enable_constant_model``\n\nConstant Models\n\nSpecify whether to enable constant models . This is set\nto Auto (enabled) by default.", + "prompt_type": "plain" + }, + { + "output": "enable_decision_tree------------------------ .. container:: dropdown **Decision Tree Models** Specify whether to build Decision Tree models as part of the experiment. This is set to **Auto** by default. In this case, Driverless AI will build Decision Tree models if interpretability is greater than or equal to the value ofdecision_tree_interpretability_switch(which defaults to 7) and accuracy is less than or equal todecision_tree_accuracy_switch``\n\n (which defaults to 7).", + "prompt_type": "plain" + }, + { + "output": "enable_glm``\n\nGLM Models\n\nSpecify whether to build GLM models (generalized linear models) as part\nof the experiment (usually only for the final model unless it's used\nexclusively). GLMs are very interpretable models with one coefficient\nper feature, an intercept term and a link function. This is set to Auto\nby default (enabled if accuracy <= 5 and interpretability >= 6).", + "prompt_type": "plain" + }, + { + "output": "enable_xgboost_gbm``\n\nXGBoost GBM Models\n\nSpecify whether to build XGBoost models as part of the experiment (for\nboth the feature engineering part and the final model). XGBoost is a\ntype of gradient boosting method that has been widely successful in\nrecent years due to its good regularization techniques and high\naccuracy. This is set to Auto by default. In this case, Driverless AI\nwill use XGBoost unless the number of rows * columns is greater than a\nthreshold. This threshold is a config setting that is 100M by default\nfor CPU and 30M by default for GPU.", + "prompt_type": "plain" + }, + { + "output": "enable_lightgbm``\n\nLightGBM Models\n\nSpecify whether to build LightGBM models as part of the experiment.\nLightGBM Models are the default models. This is set to Auto (enabled) by\ndefault.", + "prompt_type": "plain" + }, + { + "output": "enable_xgboost_dart``\n\nXGBoost Dart Models\n\nSpecify whether to use XGBoost's Dart method when building models for\nexperiment (for both the feature engineering part and the final model).\nThis is set to Auto (disabled) by default.", + "prompt_type": "plain" + }, + { + "output": "enable_xgboost_rapids------------------------- .. container:: dropdown **Enable RAPIDS-cuDF extensions to XGBoost GBM/Dart** Specify whether to enable RAPIDS extensions to XGBoost GBM/Dart. **If selected, python scoring package can only be used on GPU system**. The equivalent config.toml parameter isenable_xgboost_rapids`` and\n\n the default value is False. Disabled for dask multinode models due\n to bug in dask_cudf and xgboost.", + "prompt_type": "plain" + }, + { + "output": "enable_xgboost_rf``\n\nEnable XGBoost RF model\n\nSpecify whether to enable XGBoost RF mode without early stopping. This\nsetting is disabled unless switched on.", + "prompt_type": "plain" + }, + { + "output": "enable_xgboost_gbm_dask--------------------------- .. container:: dropdown **Enable Dask_cuDF (multi-GPU) XGBoost GBM** Specify whether to enable Dask_cudf (multi-GPU) version of XGBoost GBM. Disabled unless switched on. Only applicable for single final model without early stopping. **No Shapley possible**. The equivalent config.toml parameter isenable_xgboost_gbm_dask`` and the default\n\n value is \"auto\".", + "prompt_type": "plain" + }, + { + "output": "enable_xgboost_dart_dask---------------------------- .. container:: dropdown **Enable Dask_cuDF (multi-GPU) XGBoost Dart** Specify whether to enable Dask_cudf (multi-GPU) version of XGBoost GBM/Dart. This option is disabled unless switched on. Only applicable for single final model without early stopping. **No Shapley is possible**. The equivalent config.toml parameter isenable_xgboost_dart_daskand the default value is \"auto\". It is recommended to run Dask_cudf on multi gpus; if for say debugging purposes, user would like to enable them on 1 GPU, then setuse_dask_for_1_gpu``\nto True via config.toml setting.", + "prompt_type": "plain" + }, + { + "output": "enable_lightgbm_dask------------------------ .. container:: dropdown **Enable Dask (multi-node) LightGBM** Specify whether to enable multi-node LightGBM. It is disabled by default unless switched on. The equivalent config.toml parameter isenable_lightgbm_dask``\nand default value is \"auto\".\n\nTo enable multinode Dask see\nDask Multinode Training .", + "prompt_type": "plain" + }, + { + "output": "enable_hyperopt_dask------------------------ .. container:: dropdown **Enable Dask (multi-node/multi-GPU) hyperparameter search** Specify whether to enable Dask (multi-node/multi-GPU) version of hyperparameter search. \"auto\" and \"on\" are same currently. Dask mode for hyperparameter search is enabled if: 1) Have a :ref:`Dask multinode cluster ` or multi-GPU node and model uses 1 GPU for each model( see :ref:`num-gpus-per-model`). 2) Not already using a Dask model. The equivalent config.toml parameter isenable_hyperopt_dask`` and\n\n the default value is \"auto\".", + "prompt_type": "plain" + }, + { + "output": "num_inner_hyperopt_trials_prefinal-------------------------------------- .. container:: dropdown **Number of trials for hyperparameter optimization during model tuning only** Specify the number of trials for **Optuna** hyperparameter optimization for tuning and evolution of models. If using **RAPIDS** or **DASK**, this parameter specifies the number of trials for hyperparameter optimization within XGBoost GBM/Dart and LightGBM and hyperparameter optimization keeps data on GPU entire time. 0 means no trials. For small data, 100 is fine, while for larger data smaller values are reasonable if need results quickly. If using RAPIDS or DASK, hyperparameter optimization stays on GPU the entire time. The equivalent config.toml parameter isnum_inner_hyperopt_trials_prefinal`` and the default value is\n 0. Note that, this is useful when there is high overhead of DAI outside\n inner model fit/predict (i.e the various file, process, and other\n DAI management processes), so this tunes without that overhead.", + "prompt_type": "plain" + }, + { + "output": "num_inner_hyperopt_trials_final----------------------------------- .. container:: dropdown **Number of trials for hyperparameter optimization for final model only** Number of trials for **Optuna** hyperparameter optimization for final models. If using **RAPIDS** or **DASK**, this is number of trials for rapids-cudf hyperparameter optimization within XGBoost GBM/Dart and LightGBM, and hyperparameter optimization keeps data on GPU entire time. 0 means no trials.For small data, 100 is ok choice, while for larger data smaller values are reasonable if need results quickly. This setting applies to final model only, even if num_inner_hyperopt_trials=0. The equivalent config.toml parameter isnum_inner_hyperopt_trials_final``\nand the default value is 0.", + "prompt_type": "plain" + }, + { + "output": "num_hyperopt_individuals_final---------------------------------- .. container:: dropdown **Number of individuals in final ensemble to use Optuna on** Number of individuals in final model (all folds/repeats for given base model) to optimize with **Optuna** hyperparameter tuning. The default value is -1, means all. 0 is same as choosing no Optuna trials. Might be only beneficial to optimize hyperparameters of best individual (i.e. value of 1) in ensemble. The default value is -1, means all. The equivalent config.toml parameter isnum_hyperopt_individuals_final``", + "prompt_type": "plain" + }, + { + "output": "optuna_pruner----------------- .. container:: dropdown **Optuna Pruners** `Optuna Pruner `__ algorithm to use for early stopping of unpromising trials (applicable to XGBoost and LightGBM that support Optuna callbacks). The default is **MedianPruner**. To disable choose None. The equivalent config.toml parameter isoptuna_pruner``", + "prompt_type": "plain" + }, + { + "output": "optuna_sampler------------------ .. container:: dropdown **Optuna Samplers** `Optuna Sampler `__ algorithm to use for narrowing down and optimizing the search space (applicable to XGBoost and LightGBM that support Optuna callbacks). The default is **TPESampler**. To disable choose None. The equivalent config.toml parameter isoptuna_sampler``", + "prompt_type": "plain" + }, + { + "output": "enable_xgboost_hyperopt_callback------------------------------------ .. container:: dropdown **Enable Optuna XGBoost Pruning callback** Specify whether to enable Optuna's XGBoost Pruning callback to abort unpromising runs. This is True by default. This not is enabled when tuning learning rate. The equivalent config.toml parameter isenable_xgboost_hyperopt_callback``", + "prompt_type": "plain" + }, + { + "output": "enable_lightgbm_hyperopt_callback------------------------------------- .. container:: dropdown **Enable Optuna LightGBM Pruning callback** Specify whether to enable Optuna's LightGBM Pruning callback to abort unpromising runs. This is True by default. This not is enabled when tuning learning rate. The equivalent config.toml parameter isenable_lightgbm_hyperopt_callback``", + "prompt_type": "plain" + }, + { + "output": "enable_tensorflow``\n\nTensorFlow Models\n\nSpecify whether to build TensorFlow models as part of the experiment\n(usually only for text features engineering and for the final model\nunless it's used exclusively). Enable this option for NLP experiments.\nThis is set to Auto by default (not used unless the number of classes is\ngreater than 10).\n\nTensorFlow models are not yet supported by Java MOJOs (only Python\nscoring pipelines and C++ MOJOs are supported).", + "prompt_type": "plain" + }, + { + "output": "enable_grownet``\n\nPyTorch GrowNet Models\n\nSpecify whether to enable PyTorch-based GrowNet models. By\ndefault, this parameter is set to auto i.e Driverless decides internally\nwhether to use the algorithm for the experiment. Set it to on to force\nthe experiment to build a GrowNet model.", + "prompt_type": "plain" + }, + { + "output": "enable_ftrl``\n\nFTRL Models\n\nSpecify whether to build Follow the Regularized Leader (FTRL) models as\npart of the experiment. Note that MOJOs are not yet supported (only\nPython scoring pipelines). FTRL supports binomial and multinomial\nclassification for categorical targets, as well as regression for\ncontinuous targets. This is set to Auto (disabled) by default.", + "prompt_type": "plain" + }, + { + "output": "enable_rulefit``\n\nRuleFit Models\n\nSpecify whether to build RuleFit models as part of the experiment. Note\nthat MOJOs are not yet supported (only Python scoring pipelines). Note\nthat multiclass classification is not yet supported for RuleFit models.\nRules are stored to text files in the experiment directory for now. This\nis set to Auto (disabled) by default.", + "prompt_type": "plain" + }, + { + "output": "enable_zero_inflated_models``\n\nZero-Inflated Models\n\nSpecify whether to enable the automatic addition of\nzero-inflated models for regression problems with\nzero-inflated target values that meet certain conditions:\n\n y >= 0, y.std() > y.mean()\")\n\nThis is set to Auto by default.", + "prompt_type": "plain" + }, + { + "output": "enable_lightgbm_boosting_types``\n\nLightGBM Boosting Types\n\nSpecify which boosting types to enable for LightGBM. Select one or more\nof the following:\n\n- gbdt: Boosted trees\n- rf_early_stopping: Random Forest with early stopping\n- rf: Random Forest\n- dart: Dropout boosted trees with no early stopping\n\ngbdt and rf are both enabled by default.", + "prompt_type": "plain" + }, + { + "output": "enable_lightgbm_cat_support``\n\nLightGBM Categorical Support\n\nSpecify whether to enable LightGBM categorical feature support. This is\ndisabled by default.\n\nNotes:\n\n- Only supported for CPU.\n- A MOJO is not built when this is enabled.", + "prompt_type": "plain" + }, + { + "output": "enable_lightgbm_cuda_support``\n\nLightGBM CUDA Support\n\nSpecify whether to enable LightGBM CUDA implementation instead of\nOpenCL. LightGBM CUDA is supported on Linux x86-64 environments.", + "prompt_type": "plain" + }, + { + "output": "show_constant_model``\n\nWhether to Show Constant Models in Iteration Panel\n\nSpecify whether to show constant models in the iteration panel. This is\ndisabled by default.", + "prompt_type": "plain" + }, + { + "output": "params_tensorflow``\n\nParameters for TensorFlow\n\nSpecify specific parameters for TensorFlow to override Driverless AI\nparameters. The following is an example of how the parameters can be\nconfigured:\n\n params_tensorflow = '{'lr': 0.01, 'add_wide': False, 'add_attention': True, 'epochs': 30,\n 'layers': [100, 100], 'activation': 'selu', 'batch_size': 64, 'chunk_size': 1000, 'dropout': 0.3,\n 'strategy': 'one_shot', 'l1': 0.0, 'l2': 0.0, 'ort_loss': 0.5, 'ort_loss_tau': 0.01, 'normalize_type': 'streaming'}'\n\nThe following is an example of how layers can be configured:\n\n [500, 500, 500], [100, 100, 100], [100, 100], [50, 50]\n\nMore information about TensorFlow parameters can be found in the Keras\ndocumentation. Different strategies for using TensorFlow parameters can\nbe viewed here.", + "prompt_type": "plain" + }, + { + "output": "max_nestimators``\n\nMax Number of Trees/Iterations\n\nSpecify the upper limit on the number of trees (GBM) or iterations\n(GLM). This defaults to 3000. Depending on accuracy settings, a fraction\nof this limit will be used.", + "prompt_type": "plain" + }, + { + "output": "n_estimators_list_no_early_stopping--------------------------------------- .. container:: dropdown **n_estimators List to Sample From for Model Mutations for Models That Do Not Use Early Stopping** For LightGBM, the dart and normal random forest modes do not use early stopping. This setting lets you specify then_estimators``\n\n (number of trees in the forest) list to sample from for model\n mutations for these types of models.", + "prompt_type": "plain" + }, + { + "output": "min_learning_rate_final``\n\nMinimum Learning Rate for Final Ensemble GBM Models\n\nThis value defaults to 0.01. This is the lower limit on learning rate\nfor final ensemble GBM models.In some cases, the maximum number of\ntrees/iterations is insufficient for the final learning rate, which can\nlead to no early stopping getting triggered and poor final model\nperformance. Then, one can try increasing the learning rate by raising\nthis minimum, or one can try increasing the maximum number of\ntrees/iterations.", + "prompt_type": "plain" + }, + { + "output": "max_learning_rate_final``\n\nMaximum Learning Rate for Final Ensemble GBM Models\n\nSpecify the maximum (upper limit) learning rate for final ensemble GBM\nmodels. This value defaults to 0.05.", + "prompt_type": "plain" + }, + { + "output": "max_nestimators_feature_evolution_factor``\n\nReduction Factor for Max Number of Trees/Iterations During Feature\nEvolution\n\nSpecify the factor by which the value specified by the\nmax-trees-iterations setting is reduced for tuning and feature\nevolution. This option defaults to 0.2. So by default, Driverless AI\nwill produce no more than 0.2 * 3000 trees/iterations during feature\nevolution.", + "prompt_type": "plain" + }, + { + "output": "max_abs_score_delta_train_valid``\n\nMax. absolute delta between training and validation scores for tree\nmodels\n\nModify early stopping behavior for tree-based models (LightGBM,\nXGBoostGBM, CatBoost) such that training score (on training data, not\nholdout) and validation score differ no more than this absolute value\n(i.e., stop adding trees once abs(train_score - valid_score) >\nmax_abs_score_delta_train_valid). Keep in mind that the meaning of this\nvalue depends on the chosen scorer and the dataset (i.e., 0.01 for\nLogLoss is different than 0.01 for MSE). This option is Experimental,\nand only for expert use to keep model complexity low. To disable, set to\n0.0. By default this option is disabled.", + "prompt_type": "plain" + }, + { + "output": "max_rel_score_delta_train_valid``\n\nMax. relative delta between training and validation scores for tree\nmodels\n\nModify early stopping behavior for tree-based models (LightGBM,\nXGBoostGBM, CatBoost) such that training score (on training data, not\nholdout) and validation score differ no more than this relative value\n(i.e., stop adding trees once abs(train_score - valid_score) >\nmax_rel_score_delta_train_valid * abs(train_score)). Keep in mind that\nthe meaning of this value depends on the chosen scorer and the dataset\n(i.e., 0.01 for LogLoss is different than 0.01 for MSE etc). This option\nis Experimental, and only for expert use to keep model complexity low.\nTo disable, set to 0.0. By default this option is disabled.", + "prompt_type": "plain" + }, + { + "output": "min_learning_rate``\n\nMinimum Learning Rate for Feature Engineering GBM Models\n\nSpecify the minimum learning rate for feature engineering GBM models.\nThis value defaults to 0.05.", + "prompt_type": "plain" + }, + { + "output": "max_learning_rate``\n\nMax Learning Rate for Tree Models\n\nSpecify the maximum learning rate for tree models during feature\nengineering. Higher values can speed up feature engineering but can hurt\naccuracy. This value defaults to 0.5.", + "prompt_type": "plain" + }, + { + "output": "max_epochs``\n\nMax Number of Epochs for TensorFlow/FTRL\n\nWhen building TensorFlow or FTRL models, specify the maximum number of\nepochs to train models with (it might stop earlier). This value defaults\nto 10. This option is ignored if TensorFlow models and/or FTRL models is\ndisabled.", + "prompt_type": "plain" + }, + { + "output": "max_max_depth----------------- .. container:: dropdown **Max Tree Depth** Specify the maximum tree depth. The corresponding maximum value formax_leaves`` is double the specified value. This value defaults to\n\n 12.", + "prompt_type": "plain" + }, + { + "output": "max_max_bin--------------- .. container:: dropdown **Max max_bin for Tree Features** Specify the maximummax_bin`` for tree features. This value\n\n defaults to 256.", + "prompt_type": "plain" + }, + { + "output": "rulefit_max_num_rules``\n\nMax Number of Rules for RuleFit\n\nSpecify the maximum number of rules to be used for RuleFit models. This\ndefaults to -1, which specifies to use all rules.", + "prompt_type": "plain" + }, + { + "output": "ensemble_meta_learner``\n\nEnsemble Level for Final Modeling Pipeline\n\nModel to combine base model predictions, for experiments that create a\nfinal pipeline consisting of multiple base models:\n\n- blender: Creates a linear blend with non-negative weights that add\n to 1 (blending) - recommended\n- extra_trees: Creates a tree model to non-linearly combine the base\n models (stacking) - experimental, and recommended to also set enable\n cross_validate_meta_learner.", + "prompt_type": "plain" + }, + { + "output": "fixed_ensemble_level------------------------ .. container:: dropdown **Ensemble Level for Final Modeling Pipeline** Specify one of the following ensemble levels: - -1 = auto, based upon ensemble_accuracy_switch, accuracy, size of data, etc. (Default) - 0 = No ensemble, only final single model on validated iteration/tree count. Note that holdout predicted probabilities will not be available. (For more information, refer to this :ref:`FAQ `.) - 1 = 1 model, multiple ensemble folds (cross-validation) - 2 = 2 models, multiple ensemble folds (cross-validation) - 3 = 3 models, multiple ensemble folds (cross-validation) - 4 = 4 models, multiple ensemble folds (cross-validation) The equivalent config.toml parameter isfixed_ensemble_level``.", + "prompt_type": "plain" + }, + { + "output": "cross_validate_meta_learner``\n\nEnsemble Level for Final Modeling Pipeline\n\nIf enabled, use cross-validation to create an ensemble for the meta\nlearner itself. Especially recommended for\nensemble_meta_learner='extra_trees', to make unbiased training holdout\npredictions. No MOJO will be created if this setting is enabled. Not\nneeded for ensemble_meta_learner='blender'.", + "prompt_type": "plain" + }, + { + "output": "cross_validate_single_final_model``\n\nCross-Validate Single Final Model\n\nDriverless AI normally produces a single final model for low accuracy\nsettings (typically, less than 5). When the Cross-validate single final\nmodel option is enabled (default for regular experiments), Driverless AI\nwill perform cross-validation to determine optimal parameters and early\nstopping before training the final single modeling pipeline on the\nentire training data. The final pipeline will build N\u2005+\u20051 models, with\nN-fold cross validation for the single final model. This also creates\nholdout predictions for all non-time-series experiments with a single\nfinal model.\n\nNote that the setting for this option is ignored for time-series\nexperiments or when a validation dataset is provided.", + "prompt_type": "plain" + }, + { + "output": "parameter_tuning_num_models``\n\nNumber of Models During Tuning Phase\n\nSpecify the number of models to tune during pre-evolution phase. Specify\na lower value to avoid excessive tuning, or specify a higher to perform\nenhanced tuning. This option defaults to -1 (auto).", + "prompt_type": "plain" + }, + { + "output": "imbalance_sampling_method``\nSampling Method for Imbalanced Binary Classification Problems\nSpecify the sampling method for imbalanced binary classification\nproblems. This is set to off by default. Choose from the following\noptions:\n- auto: sample both classes as needed, depending on data\n- over_under_sampling: over-sample the minority class and under-sample\n the majority class, depending on data\n- under_sampling: under-sample the majority class to reach class\n balance\n- off: do not perform any sampling\nThis option is closely tied with the Imbalanced Light GBM and Imbalanced\nXGBoost GBM models, which can be enabled/disabled on the Recipes tab\nunder included_models. Specifically:\n- If this option is ENABLED (set to a value other than off) and the\n ImbalancedLightGBM and/or ImbalancedXGBoostGBM models are ENABLED,\n then Driverless AI will check your target imbalance fraction. If the\n target fraction proves to be above the allowed imbalance threshold,\n then sampling will be triggered.", + "prompt_type": "plain" + }, + { + "output": "imbalance_sampling_threshold_min_rows_original``\n\nThreshold for Minimum Number of Rows in Original Training Data to Allow\nImbalanced Sampling\n\nSpecify a threshold for the minimum number of rows in the original\ntraining data that allow imbalanced sampling. This value defaults to\n100,000.", + "prompt_type": "plain" + }, + { + "output": "imbalance_ratio_sampling_threshold``\n\nRatio of Majority to Minority Class for Imbalanced Binary Classification\nto Trigger Special Sampling Techniques (if Enabled)\n\nFor imbalanced binary classification problems, specify the ratio of\nmajority to minority class. Special imbalanced models with sampling\ntechniques are enabled when the ratio is equal to or greater than the\nspecified ratio. This value defaults to 5.", + "prompt_type": "plain" + }, + { + "output": "heavy_imbalance_ratio_sampling_threshold``\n\nRatio of Majority to Minority Class for Heavily Imbalanced Binary\nClassification to Only Enable Special Sampling Techniques (if Enabled)\n\nFor heavily imbalanced binary classification, specify the ratio of the\nmajority to minority class equal and above which to enable only special\nimbalanced models on the full original data without upfront sampling.\nThis value defaults to 25.", + "prompt_type": "plain" + }, + { + "output": "imbalance_sampling_number_of_bags``\n\nNumber of Bags for Sampling Methods for Imbalanced Binary Classification\n(if Enabled)\n\nSpecify the number of bags for sampling methods for imbalanced binary\nclassification. This value defaults to -1.", + "prompt_type": "plain" + }, + { + "output": "imbalance_sampling_max_number_of_bags``\n\nHard Limit on Number of Bags for Sampling Methods for Imbalanced Binary\nClassification\n\nSpecify the limit on the number of bags for sampling methods for\nimbalanced binary classification. This value defaults to 10.", + "prompt_type": "plain" + }, + { + "output": "imbalance_sampling_max_number_of_bags_feature_evolution``\n\nHard Limit on Number of Bags for Sampling Methods for Imbalanced Binary\nClassification During Feature Evolution Phase\n\nSpecify the limit on the number of bags for sampling methods for\nimbalanced binary classification. This value defaults to 3. Note that\nthis setting only applies to shift, leakage, tuning, and feature\nevolution models. To limit final models, use the Hard Limit on Number of\nBags for Sampling Methods for Imbalanced Binary Classification setting.", + "prompt_type": "plain" + }, + { + "output": "imbalance_sampling_max_multiple_data_size``\n\nMax Size of Data Sampled During Imbalanced Sampling\n\nSpecify the maximum size of the data sampled during imbalanced sampling\nin terms of the dataset's size. This setting controls the approximate\nnumber of bags and is only active when the \"Hard limit on number of bags\nfor sampling methods for imbalanced binary classification during feature\nevolution phase\" option is set to -1. This value defaults to 1.", + "prompt_type": "plain" + }, + { + "output": "imbalance_sampling_target_minority_fraction``\n\nTarget Fraction of Minority Class After Applying Under/Over-Sampling\nTechniques\n\nSpecify the target fraction of a minority class after applying\nunder/over-sampling techniques. A value of 0.5 means that\nmodels/algorithms will be given a balanced target class distribution.\nWhen starting from an extremely imbalanced original target, it can be\nadvantageous to specify a smaller value such as 0.1 or 0.01. This value\ndefaults to -1.", + "prompt_type": "plain" + }, + { + "output": "ftrl_max_interaction_terms_per_degree``\n\nMax Number of Automatic FTRL Interactions Terms for 2nd, 3rd, 4th order\ninteractions terms (Each)\n\nSamples the number of automatic FTRL interactions terms to no more than\nthis value (for each of 2nd, 3rd, 4th order terms). This value defaults\nto 10000", + "prompt_type": "plain" + }, + { + "output": "enable_bootstrap``\n\nWhether to Enable Bootstrap Sampling for Validation and Test Scores\n\nSpecify whether to enable bootstrap sampling. When enabled, this setting\nprovides error bars to validation and test scores based on the standard\nerror of the bootstrap mean. This is enabled by default.", + "prompt_type": "plain" + }, + { + "output": "tensorflow_num_classes_switch``\n\nFor Classification Problems with This Many Classes, Default to\nTensorFlow\n\nSpecify the number of classes above which to use TensorFlow when it is\nenabled. Others model that are set to Auto will not be used above this\nnumber. (Models set to On, however, are still used.) This value defaults\nto 10.", + "prompt_type": "plain" + }, + { + "output": "prediction_intervals``\n\nCompute Prediction Intervals\n\nSpecify whether to compute empirical prediction intervals based on\nholdout predictions. This is enabled by default.", + "prompt_type": "plain" + }, + { + "output": "prediction_intervals_alpha``\n\nConfidence Level for Prediction Intervals\n\nSpecify a confidence level for prediction intervals. This value defaults\nto 0.9.", + "prompt_type": "plain" + }, + { + "output": "dump_modelparams_every_scored_indiv``\n\nEnable detailed scored model info\n\nWhether to dump every scored individual's model parameters to\ncsv/tabulated/json file produces files. For example:\nindividual_scored.params.[txt, csv, json]", + "prompt_type": "plain" + }, + { + "output": "Linux DEBs\nFor Linux machines that will not use the Docker image or RPM, a deb\ninstallation is available for x86_64 Ubuntu 16.04/18.04/20.04/22.04. The following installation steps assume that you have a valid license\nkey for Driverless AI. For information on how to obtain a license key\nfor Driverless AI, visit https://www.h2o.ai/products/h2o-driverless-ai/. Once obtained, you will be prompted to paste the license key into the\nDriverless AI UI when you first log in, or you can save it as a .sig\nfile and place it in the license folder that you will create during the\ninstallation process. Note\n- To ensure that AutoDoc pipeline visualizations are generated\ncorrectly on native installations, installing fontconfig is recommended. - When using systemd, remove the dai-minio, dai-h2o, dai-redis,\n dai-procsy, and dai-vis-server services. When upgrading, you can use\n the following commands to deactivate these services:\n systemctl stop dai-minio\n systemctl disable dai-minio\n systemctl stop dai-h2o\n systemctl disable dai-h2o\n systemctl stop dai-redis\n systemctl disable dai-redis\n systemctl stop dai-procsy\n systemctl disable dai-procsy\n systemctl stop dai-vis-server\n systemctl disable dai-vis-server\nEnvironment\n -----------------------------------\n Operating System Min Mem\n ------------------------- ---------\n Ubuntu with GPUs 64 GB\n Ubuntu with CPUs 64 GB\n -----------------------------------\nRequirements\n- Ubuntu 16.04/Ubuntu 18.04/Ubuntu 20.04/Ubuntu 22.04\n- NVIDIA drivers >= is recommended (GPU only).", + "prompt_type": "plain" + }, + { + "output": "About the Install\n- The 'dai' service user is created locally (in /etc/passwd) if it is\n not found by 'getent passwd'. You can override the user by providing\n the DAI_USER environment variable during rpm or dpkg installation. - The 'dai' service group is created locally (in /etc/group) if it is\n not found by 'getent group'. You can override the group by providing\n the DAI_GROUP environment variable during rpm or dpkg installation. - Configuration files are placed in /etc/dai and owned by the 'root'\n user:\n - /etc/dai/config.toml: Driverless AI config file (See config_file\n section for details). - /etc/dai/User.conf: systemd config file specifying the service\n user. - /etc/dai/Group.conf: systemd config file specifying the service\n group. - /etc/dai/EnvironmentFile.conf: systemd config file specifying\n (optional) environment variable overrides. - Software files are placed in /opt/h2oai/dai and owned by the 'root'\n user\n- The following directories are owned by the service user so that they\n can be updated by the running software:\n - /opt/h2oai/dai/home: The application's home directory (license\n key files are stored here).", + "prompt_type": "plain" + }, + { + "output": "- /opt/h2oai/dai/log: Log files go here if you are not using\n systemd (if you are using systemd, then the use the standard\n journalctl tool). - By default, for Docker or DEB/RPM installs, Driverless AI looks for\n a license key in /opt/h2oai/dai/home/.driverlessai/license.sig. If\n you are installing Driverless AI programmatically, you can copy a\n license key file to that location. For TAR SH installs, the\n equivalent location is /home/.driverlessai, and after\n the license is imported, it is copied under ~/.driverlessai. If no\n license key is found, the application guides you through the process\n of adding one through the UI. - systemd unit files are placed in /usr/lib/systemd/system. - Symbolic links to the configuration files in /etc/dai files are\n placed in /etc/systemd/system. If your environment is running an operational systemd, that is the\npreferred way to manage Driverless AI. The package installs the\nfollowing systemd services and a wrapper service:\n- dai: Wrapper service that starts/stops the other three services.", + "prompt_type": "plain" + }, + { + "output": "- dai-h2o: H2O-3 helper process used by Driverless AI. - dai-procsy: Procsy helper process used by Driverless AI. - dai-vis-server: Visualization server helper process used by\n Driverless AI. If you don't have systemd, refer to linux-tarsh for install\ninstructions. Starting NVIDIA Persistence Mode (GPU only)\nIf you have NVIDIA GPUs, you must run the following NVIDIA command. This\ncommand needs to be run every reboot. For more information:\nhttp://docs.nvidia.com/deploy/driver-persistence/index.html. sudo nvidia-smi -pm 1\nInstalling OpenCL\nOpenCL is required for full LightGBM support on GPU-powered systems. To\ninstall OpenCL, run the following as root:\n mkdir -p /etc/OpenCL/vendors && echo \"libnvidia-opencl.so.1\" > /etc/OpenCL/vendors/nvidia.icd && chmod a+r /etc/OpenCL/vendors/nvidia.icd && chmod a+x /etc/OpenCL/vendors/ && chmod a+x /etc/OpenCL\nNote\nIf OpenCL is not installed, then CUDA LightGBM is automatically used. CUDA LightGBM is only supported on Pascal-powered (and later) systems,\nand can be enabled manually with the enable_lightgbm_cuda_support\nconfig.toml setting.", + "prompt_type": "plain" + }, + { + "output": "# Install Driverless AI. sudo dpkg -i |VERSION-deb-lin|\nBy default, the Driverless AI processes are owned by the 'dai' user and\n'dai' group. You can optionally specify a different service user and\ngroup as shown below. Replace and as appropriate. # Temporarily specify service user and group when installing Driverless AI. # dpkg saves these for systemd in the /etc/dai/User.conf and /etc/dai/Group.conf files. sudo DAI_USER=myuser DAI_GROUP=mygroup dpkg -i |VERSION-deb-lin|\nYou may now optionally make changes to /etc/dai/config.toml. Starting Driverless AI\nTo start Driverless AI, use the following command:\n # Start Driverless AI. sudo systemctl start dai\nNote: If you don't have systemd, refer to linux-tarsh for install\ninstructions. Viewing Driverless AI Log Files\nIf you have systemd (preferred):\n sudo systemctl status dai-dai\n sudo journalctl -u dai-dai\nIf you do not have systemd:\n sudo less /opt/h2oai/dai/log/dai.log\n sudo less /opt/h2oai/dai/log/h2o.log\n sudo less /opt/h2oai/dai/log/procsy.log\n sudo less /opt/h2oai/dai/log/vis-server.log\nStopping Driverless AI\nIf you have systemd (preferred):\n # Stop Driverless AI.", + "prompt_type": "plain" + }, + { + "output": "Verify. sudo ps -u dai\nIf you do not have systemd:\n # Stop Driverless AI. sudo pkill -U dai\n # The processes should now be stopped. Verify. sudo ps -u dai\nUpgrading Driverless AI\nWARNINGS:\n- This release deprecates experiments and MLI models from 1.7.0 and\n earlier. - Experiments, MLIs, and MOJOs reside in the Driverless AI tmp\n directory and are not automatically upgraded when Driverless AI is\n upgraded. We recommend you take the following steps before\n upgrading. - Build MLI models before upgrading. - Build MOJO pipelines before upgrading. - Stop Driverless AI and make a backup of your Driverless AI tmp\n directory before upgrading. The upgrade process inherits the service user and group from\n/etc/dai/User.conf and /etc/dai/Group.conf. You do not need to manually\nspecify the DAI_USER or DAI_GROUP environment variables during an\nupgrade. Requirements\nWe recommend to have NVIDIA driver >= installed (GPU only) in your host\nenvironment for a seamless experience on all architectures, including\nAmpere.", + "prompt_type": "plain" + }, + { + "output": "Go to NVIDIA download driver to get the latest NVIDIA Tesla A/T/V/P/K\nseries drivers. For reference on CUDA Toolkit and Minimum Required\nDriver Versions and CUDA Toolkit and Corresponding Driver Versions, see\nhere . Note\nIf you are using K80 GPUs, the minimum required NVIDIA driver version is\n450.80.02. Upgrade Steps\nIf you have systemd (preferred):\n # Stop Driverless AI. sudo systemctl stop dai\n # Make a backup of /opt/h2oai/dai/tmp directory at this time. # Upgrade Driverless AI. sudo dpkg -i |VERSION-deb-lin|\n sudo systemctl daemon-reload\n sudo systemctl start dai\nIf you do not have systemd:\n # Stop Driverless AI. sudo pkill -U dai\n # The processes should now be stopped. Verify. sudo ps -u dai\n # Make a backup of /opt/h2oai/dai/tmp directory at this time. If you do not, all previous data will be lost. # Upgrade and restart. sudo dpkg -i |VERSION-deb-lin|\n sudo -H -u dai /opt/h2oai/dai/run-dai.sh\nUninstalling Driverless AI\nIf you have systemd (preferred):\n # Stop Driverless AI.", + "prompt_type": "plain" + }, + { + "output": "Verify. sudo ps -u dai\n # Uninstall Driverless AI. sudo dpkg -r dai\n # Purge Driverless AI. sudo dpkg -P dai\nIf you do not have systemd:\n # Stop Driverless AI. sudo pkill -U dai\n # The processes should now be stopped. Verify. sudo ps -u dai\n # Uninstall Driverless AI. sudo dpkg -r dai\n # Purge Driverless AI. sudo dpkg -P dai\nCAUTION! At this point you can optionally completely remove all\nremaining files, including the database (this cannot be undone):\n sudo rm -rf /opt/h2oai/dai\n sudo rm -rf /etc/dai\nNote: The UID and GID are not removed during the uninstall process. These can be removed with userdel and usergroup. However, we DO NOT\nrecommend removing the UID and GID if you plan to re-install Driverless\nAI. If you remove the UID and GID and then reinstall Driverless AI, the\nUID and GID will likely be re-assigned to a different (unrelated)\nuser/group in the future; this may cause confusion if there are any\nremaining files on the filesystem referring to the deleted user or\ngroup.", + "prompt_type": "plain" + }, + { + "output": "pip\ninstallcommand. Once installed, you can launch a Jupyter notebook and begin using the Driverless AI Python client. Installing from Python Package Index (PyPI) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The latest release of the client is available on PyPI and can be installed to your desired Python environment withpip``.\nThe following command installs the latest version of the Python Client:\n\n pip install driverlessai\n\nTo upgrade when new versions of the client are released, run the\nfollowing command:\n\n pip install --upgrade driverlessai\n\nInstalling from Anaconda Cloud\n\nTo install the Python Client as a conda package, use the following\ncommand:\n\n conda install -c h2oai driverlessai", + "prompt_type": "plain" + }, + { + "output": "Wide Datasets in Driverless AI\nA wide dataset with many features comes with its own challenges for\nfeature engineering and model building. In Driverless AI, datasets where number of columns > number of rows are\nconsidered as wide. When running experiments on such datasets,\nDriverless AI automatically enables wide rules that\nextend the limits on the maximum number of allowed features (that can be\nselected for feature evolution and selection) to a large number,\ndisables certain checks like data leakage and shift detection,\nmonotonicity constraints, AutoDoc and pipeline visualization creation. It also enables XGBoost random forest model for modeling, which helps to\navoid overfitting on wide datasets with few rows. See\nenable_wide_rules . A big-wide dataset can result in large models that can run out of memory\non GPUs. To avoid such model failures for XGBoost models (GBM, GLM, RF,\nDART), Driverless AI provides protection against GPU OOM by performing\nautomatic feature selection by building sub-models (with repeats) to\nselect features.", + "prompt_type": "plain" + }, + { + "output": "See\nallow_reduce_features_when_failure \nfor details. Here is an example of config.toml settings for a quick model run on a\nwide dataset. This disables genetic algorithm/tuning/evolution to get a quick final\nmodel. It also uses (XGBoost) random forest that is best to avoid\noverfit on wide data with few rows. The following config settings can be\ncopy/pasted in the expert settings GUI TOML to run this model. num_as_cat=false\n target_transformer=\"identity_noclip\"\n included_models=[\"XGBoostRFModel\"]\n included_transformers=[\"OriginalTransformer\"]\n fixed_ensemble_level=1\n make_mojo_scoring_pipeline=\"off\"\n make_pipeline_visualization=\"off\"\n n_estimators_list_no_early_stopping=[200]\n fixed_num_folds=2\n enable_genetic_algorithm=\"off\"\n max_max_bin=128\n reduce_repeats_when_failure=1\nThe reduce_repeats_when_failure controls the repeats, 1 is default. A\nvalue of 3 or more can take longer but can give more accuracy by finding\nthe best features to build a final model on.", + "prompt_type": "plain" + }, + { + "output": "Install on Azure\nThis section describes how to install the Driverless AI image from\nAzure. Note: Prior versions of the Driverless AI installation and upgrade on\nAzure were done via Docker. This is no longer the case as of version\n1.5.2. Watch the installation video here. Note that some of the images in this\nvideo may change between releases, but the installation steps remain the\nsame. Environment\n+---------------------------+--------------+---------+----------------+\n| Provider | Instance | Num | Suitable for |\n| | Type | GPUs | |\n+===========================+==============+=========+================+\n| Azure | Standard_NV6 | 1 | E |\n| | | | |\n| - | ---- | ---- | xperimentation |\n| - | -----------+ | ------+ | |\n| - | | | ---- |\n| - | S | 2 | -------------+ |\n| - | | | |\n| | tandard_NV12 | ---- | E |\n| | | ------+ | |\n| | ---- | | xperimentation |\n| | -----------+ | 4 | |\n| | | | ---- |\n| | S | ---- | -------------+ |\n| | | ------+ | |\n| | tandard_NV24 | | Serious |\n| | | 1 | use |\n| | ---- | | |\n| | -----------+ | ---- | ---- |\n| | | ------+ | -------------+ |\n| | Standard_NC6 | | |\n| | | 2 | E |\n| | ---- | | |\n| | -----------+ | ---- | xperimentation |\n| | | ------+ | |\n| | S | | ---- |\n| | | 4 | -------------+ |\n| | tandard_NC12 | | |\n| | | | E |\n| | ---- | | |\n| | -----------+ | | xperimentation |\n| | | | |\n| | S | | ---- |\n| | | | -------------+ |\n| | tandard_NC24 | | |\n| | | | Serious |\n| | | | use |\n+---------------------------+--------------+---------+----------------+\nAbout the Install\n- The 'dai' service user is created locally (in /etc/passwd) if it is\n not found by 'getent passwd'.", + "prompt_type": "plain" + }, + { + "output": "- The 'dai' service group is created locally (in /etc/group) if it is\n not found by 'getent group'. You can override the group by providing\n the DAI_GROUP environment variable during rpm or dpkg installation. - Configuration files are placed in /etc/dai and owned by the 'root'\n user:\n - /etc/dai/config.toml: Driverless AI config file (See config_file\n section for details). - /etc/dai/User.conf: systemd config file specifying the service\n user. - /etc/dai/Group.conf: systemd config file specifying the service\n group. - /etc/dai/EnvironmentFile.conf: systemd config file specifying\n (optional) environment variable overrides. - Software files are placed in /opt/h2oai/dai and owned by the 'root'\n user\n- The following directories are owned by the service user so that they\n can be updated by the running software:\n - /opt/h2oai/dai/home: The application's home directory (license\n key files are stored here). - /opt/h2oai/dai/tmp: Experiments and imported data are stored\n here.", + "prompt_type": "plain" + }, + { + "output": "- By default, for Docker or DEB/RPM installs, Driverless AI looks for\n a license key in /opt/h2oai/dai/home/.driverlessai/license.sig. If\n you are installing Driverless AI programmatically, you can copy a\n license key file to that location. For TAR SH installs, the\n equivalent location is /home/.driverlessai, and after\n the license is imported, it is copied under ~/.driverlessai. If no\n license key is found, the application guides you through the process\n of adding one through the UI. - systemd unit files are placed in /usr/lib/systemd/system. - Symbolic links to the configuration files in /etc/dai files are\n placed in /etc/systemd/system. If your environment is running an operational systemd, that is the\npreferred way to manage Driverless AI. The package installs the\nfollowing systemd services and a wrapper service:\n- dai: Wrapper service that starts/stops the other three services. - dai-dai: Main Driverless AI process. - dai-h2o: H2O-3 helper process used by Driverless AI.", + "prompt_type": "plain" + }, + { + "output": "- dai-vis-server: Visualization server helper process used by\n Driverless AI. If you don't have systemd, refer to linux-tarsh for install\ninstructions. Installing the Azure Instance\n1. Log in to your Azure portal at https://portal.azure.com, and click\n the Create a Resource button. 2. Search for and select H2O DriverlessAI in the Marketplace. 3. Click Create. This launches the H2O DriverlessAI Virtual Machine\n creation process. 4. On the Basics tab:\n5. On the Size tab, select your virtual machine size. Specify the HDD\n disk type and select a configuration. We recommend using an N-Series\n type, which comes with a GPU. Also note that Driverless AI requires\n 10 GB of free space in order to run and will stop working of less\n than 10 GB is available. We recommend a minimum of 30 GB of disk\n space. Click OK when you are done. 6. On the Settings tab, select or create the Virtual Network and Subnet\n where the VM is going to be located and then click OK.\n7.", + "prompt_type": "plain" + }, + { + "output": "When the validation passes\n successfully, click Create to create the VM. 8. After the VM is created, it will be available under the list of\n Virtual Machines. Select this Driverless AI VM to view the IP\n address of your newly created machine. 9. Connect to Driverless AI with your browser using the IP address\n retrieved in the previous step. Stopping the Azure Instance\nThe Azure instance will continue to run even when you close the Azure\nportal. To stop the instance:\n1. Click the Virtual Machines left menu item. 2. Select the checkbox beside your DriverlessAI virtual machine. 3. On the right side of the row, click the ... button, then select\n Stop. (Note that you can then restart this by selecting Start.) [image]\nUpgrading the Driverless AI Image\nWARNINGS:\n- This release deprecates experiments and MLI models from 1.7.0 and\n earlier. - Experiments, MLIs, and MOJOs reside in the Driverless AI tmp\n directory and are not automatically upgraded when Driverless AI is\n upgraded.", + "prompt_type": "plain" + }, + { + "output": "- Build MLI models before upgrading. - Build MOJO pipelines before upgrading. - Stop Driverless AI and make a backup of your Driverless AI tmp\n directory before upgrading. The upgrade process inherits the service user and group from\n/etc/dai/User.conf and /etc/dai/Group.conf. You do not need to manually\nspecify the DAI_USER or DAI_GROUP environment variables during an\nupgrade. Upgrading from Version 1.2.2 or Earlier\nIt is not possible to upgrade from version 1.2.2 or earlier to the\nlatest version. You have to manually remove the 1.2.2 container and then\nreinstall the latest Driverless AI version. Be sure to backup your data\nbefore doing this. Upgrading from Version 1.3.0 to 1.5.1\n1. SSH into the IP address of the image instance and copy the existing\n experiments to a backup location:\n2. wget the newer image. Replace VERSION and BUILD below with the\n Driverless AI version. 3. Use the docker load command to load the image:\n4. Run docker images to find the new image tag.", + "prompt_type": "plain" + }, + { + "output": "Start the Driverless AI Docker image and replace TAG below with the\n image tag. Depending on your install version, use the\n docker run --runtime=nvidia (>= Docker 19.03) or nvidia-docker (<\n Docker 19.03) command. Upgrading from version 1.5.2 or Later\nUpgrading to versions 1.5.2 and later is no longer done via Docker. Instead, perform the following steps if you are upgrading to version\n1.5.2 or later. Replace dai_NEWVERSION.deb below with the new Driverless\nAI version (for example, dai_1.8.4.1_amd64.deb). Note that this upgrade\nprocess inherits the service user and group from /etc/dai/User.conf and\n/etc/dai/Group.conf. You do not need to manually specify the DAI_USER or\nDAI_GROUP environment variables during an upgrade. We recommend to have NVIDIA driver >= installed (GPU only) in your host\nenvironment for a seamless experience on all architectures, including\nAmpere. Driverless AI ships with CUDA 11.2.2 for GPUs, but the driver\nmust exist in the host environment. Go to NVIDIA download driver to get the latest NVIDIA Tesla A/T/V/P/K\nseries drivers.", + "prompt_type": "plain" + }, + { + "output": "Sharing Experiments\nThis page describes how to share Driverless AI (DAI) experiments by\nexporting and importing experiments or by using Remote Storage. - export_import\n- remote_storage\n -----------------------------------------------------------------------\n Sharing Method Requirements\n ----------------------------------- -----------------------------------\n Exporting and Importing Experiments Requires only DAI\n Experiments \n Remote Storage Requires H2O AI Cloud (HAIC) __\n -----------------------------------------------------------------------\nExporting and Importing Experiments\nAs of version 1.10, DAI supports exporting and importing DAI\nexperiments. You can download experiments as a .dai file that can be\nimported by other DAI users. Exporting an Experiment\nAn experiment can be exported either from the main Experiment listing\npage by clicking the three dot icons to the right of the experiment name\nand selecting Export or from the\ncompleted experiment page by clicking Model\nActions > Export.", + "prompt_type": "plain" + }, + { + "output": "Importing an Experiment\nTo import an experiment, click the Import Experiment button on the\nExperiment listing page, and then select the DAI experiment file you\nwant to import from your local file system. You can also drag the DAI\nexperiment file from your local file system to the Experiment listing\npage. If the selected experiment used custom recipes, the custom recipes\nassociated with the experiment are also imported. Datasets associated with imported experiments are not imported as part\nof the experiment import process. Instead, only a minimal set of\nmetadata is imported. To take advantage of certain features such as\ninterpreting experiments and previewing datasets, you must manually\nimport the datasets associated with the imported experiment. Warning\nTo ensure that the import process is not interrupted, do not refresh the\npage while the experiment is being imported. Note\nWhen projects are shared with users, the users with whom the project is\nshared must import the experiments and datasets associated with the\nshared project.", + "prompt_type": "plain" + }, + { + "output": "For more information on HAIC,\nsee the HAIC documentation. Note\nUnsupervised experiments are not currently supported by both Remote\nStorage and H2O MLOps. Remote storage is only available to H2O AI Cloud (HAIC) users. In most\ncases, experiments that are placed in a Project are automatically added\nto Remote Storage. However, if the Project is created by clicking New\nExperiment > Create Leaderboard, the experiments in that Project are not\nautomatically added to Remote Storage. To add an experiment in a\nLeaderboard Project to Remote Storage, navigate to the Project and open\nthe drop-down options menu for the experiment, and then click Link\nRemotely. If a project is shared with you by another DAI user, the experiments and\ndatasets associated with that project are initially greyed out,\nindicating that they live only in the Remote Storage. Before they can be\nviewed and used, you must import them. This can be done by either\nclicking on the IMPORT button at a given row or by clicking the row menu\nand choosing the IMPORT option.", + "prompt_type": "plain" + }, + { + "output": "Both the\nexperiment and its datasets must be imported to use all of the\nexperiment's functionalities. Experiments in Remote Storage are made available in H2O MLOps and can be\nshared with other users. If a DAI instance is terminated and deleted,\nthe Projects associated with that instance of DAI remain saved in Remote\nStorage. Projects saved in Remote Storage are made available in newly\ncreated instances of DAI. This means that in cases where you need to\nkeep an old experiment, model interpretation, or AutoDoc for reference\npurposes, keeping the specific DAI instance containing them isn't\nnecessary. Instead, you can create a project, link the relevant\nexperiment and data, and delete the DAI instance. The model can then be\ndeployed to H2O MLOps, from which you can download the AutoDoc\nassociated with the model. In addition, you can create a new DAI\ninstance, import the project, and run and view the model interpretation. Following this practice can help lower costs by eliminating the need to\nkeep specific instances of DAI.", + "prompt_type": "plain" + }, + { + "output": "Sharing With Other Users\nTo share your project with other users, go to the Projects page and open\nthe drop-down menu for the project you want to share, then click Share. In the Sharing window, you can select a specific user and their role\nbefore adding them to the list of users your project is shared with. Select one of the following roles:\n- Default: This role is equivalent to granting write access to a user. Users with this role can make any modification to the shared\n project, including renaming the project, adding datasets, adding\n experiments, adding a note, and rerunning experiments. Users that\n are granted this role can perform any action that they are able to\n perform on projects they create and own. Warning\n Users with the Default role can delete projects that have been\n shared with them. If a user with the Default role deletes a project,\n it is also deleted for both the original owner and other shared\n users. - Reader: This role is equivalent to granting read-only access to a\n user.", + "prompt_type": "plain" + }, + { + "output": "Linux TAR SH\nThe Driverless AI software is available for use in pure user-mode\nenvironments as a self-extracting TAR SH archive. This form of\ninstallation does not require a privileged user to install or to run. This artifact has the same compatibility matrix as the RPM and DEB\npackages (combined), it just comes packaged slightly differently. See\nthose sections for a full list of supported environments. The installation steps assume that you have a valid license key for\nDriverless AI. For information on how to obtain a license key for\nDriverless AI, visit https://www.h2o.ai/products/h2o-driverless-ai/. Once obtained, you will be prompted to paste the license key into the\nDriverless AI UI when you first log in. Note\nTo ensure that AutoDoc pipeline visualizations are generated\ncorrectly on native installations, installing fontconfig is recommended. Requirements\n- RedHat 7/RedHat 8 or Ubuntu 16.04/Ubuntu 18.04/Ubuntu 20.04/Ubuntu\n 22.04\n- NVIDIA drivers >= recommended (GPU only).", + "prompt_type": "plain" + }, + { + "output": "Installing OpenCL\nOpenCL is required for full LightGBM support on GPU-powered systems. To\ninstall OpenCL, run the following as root:\n mkdir -p /etc/OpenCL/vendors && echo \"libnvidia-opencl.so.1\" > /etc/OpenCL/vendors/nvidia.icd && chmod a+r /etc/OpenCL/vendors/nvidia.icd && chmod a+x /etc/OpenCL/vendors/ && chmod a+x /etc/OpenCL\nNote\nIf OpenCL is not installed, then CUDA LightGBM is automatically used. CUDA LightGBM is only supported on Pascal-powered (and later) systems,\nand can be enabled manually with the enable_lightgbm_cuda_support\nconfig.toml setting. Installing Driverless AI\nRun the following commands to install the Driverless AI TAR SH. # Install Driverless AI. chmod 755 |VERSION-tar-lin|\n ./|VERSION-tar-lin|\nYou may now cd to the unpacked directory and optionally make changes to\nconfig.toml. Starting Driverless AI\n # Start Driverless AI. ./run-dai.sh\nStarting NVIDIA Persistence Mode\nIf you have NVIDIA GPUs, you must run the following NVIDIA command. This\ncommand needs to be run every reboot.", + "prompt_type": "plain" + }, + { + "output": "sudo nvidia-smi -pm 1\nInstall OpenCL\nOpenCL is required in order to run LightGBM on GPUs. Run the following\nfor Centos7/RH7 based systems using yum and x86. yum -y clean all\n yum -y makecache\n yum -y update\n wget http://dl.fedoraproject.org/pub/epel/7/x86_64/Packages/c/clinfo-2.1.17.02.09-1.el7.x86_64.rpm\n wget http://dl.fedoraproject.org/pub/epel/7/x86_64/Packages/o/ocl-icd-2.2.12-1.el7.x86_64.rpm\n rpm -if clinfo-2.1.17.02.09-1.el7.x86_64.rpm\n rpm -if ocl-icd-2.2.12-1.el7.x86_64.rpm\n clinfo\n mkdir -p /etc/OpenCL/vendors && \\\n echo \"libnvidia-opencl.so.1\" > /etc/OpenCL/vendors/nvidia.icd\nLooking at Driverless AI log files\n less log/dai.log\n less log/h2o.log\n less log/procsy.log\n less log/vis-server.log\nStopping Driverless AI\n # Stop Driverless AI. ./kill-dai.sh\nUninstalling Driverless AI\nTo uninstall Driverless AI, just remove the directory created by the\nunpacking process. By default, all files for Driverless AI are contained\nwithin this directory.", + "prompt_type": "plain" + }, + { + "output": "- Experiments, MLIs, and MOJOs reside in the Driverless AI tmp\n directory and are not automatically upgraded when Driverless AI is\n upgraded. We recommend you take the following steps before\n upgrading. - Build MLI models before upgrading. - Build MOJO pipelines before upgrading. - Stop Driverless AI and make a backup of your Driverless AI tmp\n directory before upgrading. The upgrade process inherits the service user and group from\n/etc/dai/User.conf and /etc/dai/Group.conf. You do not need to manually\nspecify the DAI_USER or DAI_GROUP environment variables during an\nupgrade. Requirements\nWe recommend to have NVIDIA driver >= installed (GPU only) in your host\nenvironment for a seamless experience on all architectures, including\nAmpere. Driverless AI ships with CUDA 11.2.2 for GPUs, but the driver\nmust exist in the host environment. Go to NVIDIA download driver to get the latest NVIDIA Tesla A/T/V/P/K\nseries drivers. For reference on CUDA Toolkit and Minimum Required\nDriver Versions and CUDA Toolkit and Corresponding Driver Versions, see\nhere .", + "prompt_type": "plain" + }, + { + "output": "Importing Datasets\nSupported file types\nDriverless AI supports the following dataset file formats:\n- arff\n- avro\n- bin\n- bz2\n- csv (See note below)\n- dat\n- feather\n- gz\n- jay (See note below)\n- orc (See notes below)\n- parquet (See notes below)\n- pickle / pkl (See note below)\n- tgz\n- tsv\n- txt\n- xls\n- xlsx\n- xz\n- zip\nNote\nAdding datasets\nYou can add datasets using one of the following methods:\nDrag and drop files from your local machine directly onto this page. Note that this method currently works for files that are less than 10\nGB. or\nClick the Add Dataset (or Drag & Drop) button to upload or add a\ndataset. Notes:\n- Upload File, File System, HDFS, S3, Data Recipe URL, and Upload Data\n Recipe are enabled by default. These can be disabled by removing\n them from the enabled_file_systems setting in the config.toml file. (Refer to Using the config.toml file section for more information.) - If File System is disabled, Driverless AI will open a local\n filebrowser by default.", + "prompt_type": "plain" + }, + { + "output": "Refer to\n the Enabling Data Connectors section for more information. - When specifying to add a dataset using Data Recipe URL, the URL must\n point to either an HTML or raw version of the file, a GitHub\n repository or tree, or a local file. When adding or uploading\n datasets via recipes, the dataset will be saved as a .jay file. - Datasets must be in delimited text format. - Driverless AI can detect the following separators: ,|;t\n- When importing a folder, the entire folder and all of its contents\n are read into Driverless AI as a single file. - When importing a folder, all of the files in the folder must have\n the same columns. - If you try to import a folder via a data connector on Windows, the\n import will fail if the folder contains files that do not have file\n extensions (the resulting error is usually related to the above\n note). Upon completion, the datasets will appear in the Datasets Overview page. Click on a dataset to open a submenu.", + "prompt_type": "plain" + }, + { + "output": "Tips 'n Tricks\nThis section includes Arno\u2019s tips for running Driverless AI. Pipeline Tips\nGiven training data and a target column to predict, H2O Driverless AI\nproduces an end-to-end pipeline tuned for high predictive performance\n(and/or high interpretability) for general classification and regression\ntasks. The pipeline has only one purpose: to take a test set, row by\nrow, and turn its feature values into predictions. A typical pipeline creates dozens or even hundreds of derived features\nfrom the user-given dataset. Those transformations are often based on\nprecomputed lookup tables and parameterized mathematical operations that\nwere selected and optimized during training. It then feeds all these\nderived features to one or several machine learning algorithms such as\nlinear models, deep learning models, or gradient boosting models (and\nseveral more derived models). If there are multiple models, then their\noutput is post-processed to form the final prediction (either\nprobabilities or target values).", + "prompt_type": "plain" + }, + { + "output": "It is important to note that the training dataset is processed as a\nwhole for better results (e.g., aggregate statistics). For scoring,\nhowever, every row of the test dataset must be processed independently\nto mimic the actual production scenario. To facilitate deployment to various production environments, there are\nmultiple ways to obtain predictions from a completed Driverless AI\nexperiment, either from the GUI, from the R or Python client API, or\nfrom a standalone pipeline. GUI\n- Score on Another Dataset - Convenient, parallelized, ideal for\n imported data\n- Download Predictions - Available if a test set was provided during\n training\n- Deploy - Creates an Amazon Lambda endpoint (more endpoints coming\n soon)\n- Diagnostics - Useful if the test set includes a target column\nClient APIs\n- Python client - Use the make_prediction_sync() method. An optional\n argument can be used to get per-row and per-feature 'Shapley'\n prediction contributions. (Pass pred_contribs=True.)", + "prompt_type": "plain" + }, + { + "output": "An optional argument can be\n used to get per-row and per-feature 'Shapley' prediction\n contributions. (Pass pred_contribs=True.) Standalone Pipelines\n- Python - Supports all models and transformers, and supports\n 'Shapley' prediction contributions and MLI reason codes\n- Java - Most portable, low latency, supports all models and\n transformers that are enabled by default (except TensorFlow NLP\n transformers), can be used in Spark/H2O-3/SparklingWater for scale\n- C++ - Highly portable, low latency, standalone runtime with a\n convenient Python and R wrapper\nTime Series Tips\nH2O Driverless AI handles time-series forecasting problems out of the\nbox. All you need to do when starting a time-series experiment is to provide\na regular columnar dataset containing your features. Then pick a target\ncolumn and also pick a \"time column\" - a designated column containing\ntime stamps for every record (row) such as \"April 10 2019 09:13:41\" or\n\"2019/04/10\". If you have a test set for which you want predictions for\nevery record, make sure to provide future time stamps and features as\nwell.", + "prompt_type": "plain" + }, + { + "output": "You can launch the experiment and let\nDriverless AI do the rest. It will even auto-detect multiple time series\nin the same dataset for different groups such as weekly sales for stores\nand departments (by finding the columns that identify stores and\ndepartments to group by). Driverless AI will also auto-detect the time\nperiod including potential gaps during weekends, as well as the forecast\nhorizon, a possible time gap between training and testing time periods\n(to optimize for deployment delay) and even keeps track of holiday\ncalendars. Of course, it automatically creates multiple causal\ntime-based validation splits (sliding time windows) for proper\nvalidation, and incorporates many other related grand-master recipes\nsuch as automatic target and non-target lag feature generation as well\nas interactions between lags, first and second derivatives and\nexponential smoothing. - If you find that the automatic lag-based time-series recipe isn't\n performing well for your dataset, we recommend that you try to\n disable the creation of lag-based features by disabling \"Time-series\n lag-based recipe\" in the expert settings.", + "prompt_type": "plain" + }, + { + "output": "Especially for small datasets and short forecast periods, this can\n lead to better results. - If the target column is present in the test set and has partially\n filled information (non-missing values), then Driverless AI will\n automatically augment the model with those future target values to\n make better predictions. This can be used to extend the usable\n lifetime of the model into the future without the need for\n retraining by providing past known outcomes. Contact us if you're\n interested in learning more about test-time augmentation. - For now, training and test datasets should have the same input\n features available, so think about which of the predictors (input\n features) will be available during production time and drop the rest\n (or create your own lag features that can be available to both train\n and test sets). - For datasets that are non-stationary in time, create a test set from\n the last temporal portion of data, and create time-based features.", + "prompt_type": "plain" + }, + { + "output": "- We are working on further improving many aspects of our time-series\n recipe. For example, we will add support to automatically generate\n lags for features that are only available in the training set, but\n not in the test set, such as environmental or economic factors. We'll also improve the performance of back-testing using rolling\n windows. Scorer Tips\nA core capability of H2O Driverless AI is the creation of automatic\nmachine learning modeling pipelines for supervised problems. In addition\nto the data and the target column to be predicted, the user can pick a\nscorer. A scorer is a function that takes actual and predicted values\nfor a dataset and returns a number. Looking at this single number is the\nmost common way to estimate the generalization performance of a\npredictive model on unseen data by comparing the model's predictions on\nthe dataset with its actual values. There are more detailed ways to\nestimate the performance of a machine learning model such as residual\nplots (available on the Diagnostics page in Driverless AI), but we will\nfocus on scorers here.", + "prompt_type": "plain" + }, + { + "output": "The default scorer for\nregression problems is RMSE (root mean squared error), where 0 is the\nbest possible value. For example, for a dataset containing 4 rows, if\nactual target values are [1, 1, 10, 0], but predictions are [2, 3, 4,\n-1], then the RMSE is sqrt((1+4+36+1)/4) and the largest misprediction\ndominates the overall score (quadratically). Driverless AI will focus on\nimproving the predictions for the third data point, which can be very\ndifficult when hard-to-predict outliers are present in the data. If\noutliers are not that important to get right, a metric like the MAE\n(mean absolute error) can lead to better results. For this case, the MAE\nis (1+2+6+1)/4 and the optimization process will consider all errors\nequally (linearly). Another scorer that is robust to outliers is RMSLE\n(root mean square logarithmic error), which is like RMSE but after\ntaking the logarithm of actual and predicted values - however, it is\nrestricted to positive values. For price predictions, scorers such as\nMAPE (mean absolute percentage error) or MER (median absolute percentage\nerror) are useful, but have problems with zero or small positive values.", + "prompt_type": "plain" + }, + { + "output": "For classification problems, the default scorer is either the AUC (area\nunder the receiver operating characteristic curve) or LOGLOSS\n(logarithmic loss) for imbalanced problems. LOGLOSS focuses on getting\nthe probabilities right (strongly penalizes wrong probabilities), while\nAUC is designed for ranking problems. Gini is similar to the AUC, but\nmeasures the quality of ranking (inequality) for regression problems. For general imbalanced classification problems, AUCPR and MCC are good\nchoices, while F05, F1 and F2 are designed to balance recall against\nprecision. We highly suggest experimenting with different scorers and to study\ntheir impact on the resulting models. Using the Diagnostics page in\nDriverless AI, all applicable scores can be computed for any given\nmodel, no matter which scorer was used during training. Knob Settings Tips\nH2O Driverless AI lets you customize every experiment in great detail\nvia the expert settings. The most important controls however are the\nthree knobs for accuracy, time and interpretability.", + "prompt_type": "plain" + }, + { + "output": "Higher time\nsettings means the experiment is given more time to converge to an\noptimal solution. Higher interpretability settings reduces the model's\ncomplexity through less feature engineering and using simpler models. In\ngeneral, a setting of 1/1/10 will lead to the simplest and usually least\naccurate modeling pipeline, while a setting of 10/10/1 will lead to the\nmost complex and most time consuming experiment possible. Generally, it\nis sufficient to use settings of 7/5/5 or similar, and we recommend to\nstart with the default settings. We highly recommend studying the\nexperiment preview on the left-hand side of the GUI before each\nexperiment - it can help you fine-tune the settings and save time\noverall. Note that you can always finish an experiment early, either by clicking\n'Finish' to get the deployable final pipeline out, or by clicking\n'Abort' to instantly terminate the experiment. In either case, the\nexperiment can be continued seamlessly at a later time with 'Restart\nfrom last Checkpoint' or 'Retrain Final Pipeline', and you can always\nturn the knobs (or modify the expert settings) to adapt to your\nrequirements.", + "prompt_type": "plain" + }, + { + "output": "The predictive performance of the pipeline is a function of both the\ntraining data and the parameters of the pipeline (details of feature\nengineering and modeling). During an experiment, Driverless AI\nautomatically tunes these parameters by scoring candidate pipelines on\nheld out (\"validation\") data. This important validation data is either\nprovided by the user (for experts) or automatically created (random,\ntime-based or fold-based) by Driverless AI. Once a final pipeline has\nbeen created, it should be scored on yet another held out dataset (\"test\ndata\") to estimate its generalization performance. Understanding the\norigin of the training, validation and test datasets (\"the validation\nscheme\") is critical for success with machine learning, and we welcome\nyour feedback and suggestions to help us create the right validation\nschemes for your use cases. Expert Settings Tips\nH2O Driverless AI offers a range of 'Expert Settings' that let you\ncustomize each experiment. For example, you can limit the amount of\nfeature engineering by reducing the value for 'Feature engineering\neffort' or 'Max.", + "prompt_type": "plain" + }, + { + "output": "You can also select the model types to be used for training\non the engineered features (such as XGBoost, LightGBM, GLM, TensorFlow,\nFTRL, or RuleFit). For time-series problems where the selected\ntime_column leads to an error message (this can currently happen if the\nthe time structure is not regular enough - we are working on an improved\nversion), you can disable the 'Time-series lag-based recipe' and\nDriverless AI will create train/validation splits based on the time\norder instead, which can increase the model's performance if the time\ncolumn is important. Checkpointing Tips\nDriverless AI provides the option to checkpoint experiments to speed up\nfeature engineering and model tuning when running multiple experiments\non the same dataset. By default, H2O Driverless AI automatically scans\nall prior experiments (including aborted ones) for an optimal checkpoint\nto restart from. You can select a specific prior experiment to restart a\nnew experiment from with \u201cRestart from Last Checkpoint\u201d in the\nexperiment listing page (click on the 3 yellow bars on the right).", + "prompt_type": "plain" + }, + { + "output": "Time Series Best Practices\nThis document describes best practices for running time series\nexperiments in Driverless AI. Preparing Your Data\nThe goal for a time series use case is to use historical data to\nforecast. The manner in which the data for forecasting is formatted\ndepends on what we want to do with this forecast. To format your data\nfor forecasting, aggregate the data for each group you are interested in\nfor a specific period of time. The following are three use cases in which the volume of stocks sold in\nthe S&P 500 is predicted. Each use case provides a unique scenario that\ndetermines how the data is formatted. Our raw data looks like this:\n[]\n- Use Case 1: Forecast the total volume for a stock tomorrow. - Use Case 2: Forecast the total volume for a stock next month. - Use Case 3: Forecast the total volume of all S&P 500 stocks next\n year. Experiment Setup\nOnce your data is formatted to match your use case, you can begin\nsetting up your experiment. Enabling the Time Series Recipe\nTo begin setting up your experiment, provide the following:\n- Training data\n- Target column\n- Time column (providing the time column enables the Time Series\n recipe)\n[]\nTime Series Settings\nOnce you have provided the time column, you are asked to fill in time\nseries-specific configurations.", + "prompt_type": "plain" + }, + { + "output": "In this example, there is one time series\n per stock (column: Name ), so Name is selected as the time group\n column. - Unavailable Columns at Prediction Time: The columns that are not\n known at time of prediction. In the S&P 500 data example, the\n independent variables are open, high, low, and close. Any variables\n that are not known in advance must be marked as columns that are\n unavailable at prediction time. Driverless AI only uses historical\n values for the independent variables that are marked. - Forecast Horizon: How far in advance you want to forecast. - Gap: Specify whether there is any gap between the training data and\n when you want to start forecasting. For example, if on Monday you\n want to predict the volume of a stock for Wednesday and Thursday,\n then you must provide the following configurations:\nValidation and Testing\nFor a time series use case, always validate and test the models on more\nrecent data. In Driverless AI, validation data is automatically created\nby default, and this data is used to evaluate the performance of each\nmodel.", + "prompt_type": "plain" + }, + { + "output": "It is\nnot used by Driverless AI until after the final model has already been\nchosen to prevent any accidental overfitting on the test data. Validation Data\nValidation data is automatically generated by Driverless AI using a\nrolling window approach. The number of time units contained in the\nvalidation data matches the forecast horizon and gap configurations. If\nyou want to forecast the next day, the validation data must consist of\none day's worth of data. If you want to forecast the next five days, the\nvalidation data must consist of five days' worth of data. In the first\nuse case, Driverless AI internally creates splits where the validation\ndata always consists of one day of data. []\nThe total number of data points used to validate models is:\nNumber of validation splits\u2005*\u2005Number of Time Group Columns\u2005*\u2005Forecast Horizon\nIn a use case where the number of Time Group Columns is small and you\nonly want to forecast stock volume for a specific stock, the validation\ndata can become very small.", + "prompt_type": "plain" + }, + { + "output": "There are generally two ways to do this: increase the number of\nvalidation splits done by Driverless AI, or increase the number of Time\nGroup Columns in the dataset. You can increase the number of validation\nsplits performed by Driverless AI by going to the Expert Settings under\nthe Time Series tab:\n[]\nBy default, Driverless AI automatically determines the number of\nvalidation splits based on the Accuracy setting (higher accuracy leads\nto more validation splits). You can override this to a larger number if\nyou know that the number of rows for each validation split will be small\n(that is, a small number of Time Group Columns and/or a small Forecast\nHorizon). If you override this, you can see the change reflected in the experiment\npreview. In the following experiment, the number of validation splits\nhas been increased to 20 in the expert settings panel. This change is\nreflected in the experiment preview. []\nAnother way to prevent small validation data is to consider including\nmore Time Group Columns.", + "prompt_type": "plain" + }, + { + "output": "Test Data\nThe test data is an optional dataset provided by the user. Driverless AI\nautomatically calculates the performance of the final model on this\ndataset but does not use it for model selection. The test dataset can be\nlarger than the Forecast Horizon. The first use case involves\nforecasting the next day's stock volume. You can, however, provide\nDriverless AI with one month of test data. In this scenario, Driverless\nAI evaluates how the model does at forecasting the next day's stock\nvolume over the one month period. Scorers\nThe scorer determines how Driverless AI evaluates the success of each\nmodel. []\nThe following is a list of popular scorers with information about which\nuse cases they excel in. []\nInterpreting Models with MLI\nBy clicking on Interpret this Model once an experiment has completed,\nyou can gather more information about how your final model performed on\nthe validation and test data. The first graph in the Model Interpretability module shows the error for\neach date in the validation and test data:\n[]\nYou can also see groups with very high error and very low error:\n[]\nYou can search for a specific group to see the actual time series vs\npredicted:\n[]\nBy clicking on a specific forecasted point, you can see the Shapley\ncontributions for that point.", + "prompt_type": "plain" + }, + { + "output": "[]\nThe Shapley contributions also show the strength and direction of each\npredictor for the selected date. Scoring\nBecause Driverless AI is building a traditional machine learning model\n(such as GLM, GBM, Random Forest), it requires a record to score on to\ngenerate a prediction. If you want to use the model to forecast, you\nhave three different scoring options:\n- Using Driverless AI\n- The Python Scoring pipeline\n - Independent of Driverless AI\n - Python whl with scoring function inside\n- The MOJO Scoring pipeline\n - Independent of Driverless AI\n - Java runtime or C++ runtime\nIf you want to use the model to score past the Forecast Horizon, then\nyou can only use Driverless AI or the Python Scoring pipeline for\nscoring. This means that if you provide Driverless AI with training data\nup to 2018-02-07 and ask it to build a model to predict tomorrow's\nvolume, the MOJO can only be used to score for 2018-02-08. The MOJO is stateless. It takes a single record and provides a\nprediction.", + "prompt_type": "plain" + }, + { + "output": "If a\nDriverless AI model shows that the previous day's stock volume is very\nimportant, then once the MOJO is used to start scoring past 2018-02-08,\nit no longer has information about the previous day's stock volume. Predicting Within Forecast Horizon\nIf you want to predict within the Forecast Horizon, you can provide\nDriverless AI, the Python Scoring pipeline, or the MOJO scoring pipeline\nwith the record that you want to predict for. Consider the following\nexample:\nThe training data ends on Friday 2018-01-05 and you want to forecast the\nnext business day's stock volume. Therefore, Monday 2018-01-08 is within\nthe Forecast Horizon. To predict the Stock volume for Stock: AAL on\n2018-01-08, provide any scoring method with the following data. []\nThe output is the volume prediction. Note: Because open, high, low, and close are not known at the time of\nprediction, these are filled in with NAs. Predicting Outside Forecast Horizon\nIf you now want to use the model to predict past 2018-01-08, then you\ncan only use Driverless AI or the Python scoring pipeline to score\nbecause the MOJO is stateless and cannot be used outside of the Forecast\nHorizon.", + "prompt_type": "plain" + }, + { + "output": "In the case where\nyou want to forecast for 2018-01-09, you must tell the model what\nhappened on 2018-01-08 (this date was not in the training data, so\nDriverless AI does not know what ended up happening on that date). In order to score for 2018-01-09, provide Driverless AI with the\nfollowing data. []\nThe model now returns two predictions: one for 2018-01-08 and one for\n2018-01-09 (the prediction of interest). Other Approaches\nUsing the IID Recipe\nSometimes it can be helpful to try building an experiment without the\nTime Series recipe even if you have a forecasting use case. The Time\nSeries recipe relies heavily on lagging the data, which means that it is\nmost helpful for cases where the past behavior is predictive. If you\nhave a use case where there is no strong temporal trend, then it may be\nhelpful to use Driverless AI without the Time Series recipe turned on. You can do this by simply not providing a Time Column when setting up\nthe experiment. Notes:\n- If you decide to try the model without Time Series turned on, make\n sure to provide a test dataset that is out of time.", + "prompt_type": "plain" + }, + { + "output": "Model Performance on Another Dataset\nThe Diagnose Model on New Dataset option lets you view model performance\nfor multiple scorers based on existing model and dataset. On the completed experiment page, click the Diagnose Model on New\nDataset button. Note: You can also diagnose a model by selecting Diagnostics from the\ntop menu, then selecting an experiment and test dataset. []\nSelect a dataset to use when diagnosing this experiment. Note that the\ndataset must include the target column that is in the original dataset. At this point, Driverless AI will begin calculating all available scores\nfor the experiment. When the diagnosis is complete, it will be available on the Model\nDiagnostics page. Click on the new diagnosis. From this page, you can\ndownload predictions. You can also view scores and metric plots. The\nplots are interactive. Click a graph to enlarge. In the enlarged view,\nyou can hover over the graph to view details for a specific point. You\ncan also download the graph in the enlarged view.", + "prompt_type": "plain" + }, + { + "output": "New Experiments\nThis page describes how to start a new experiment in Driverless AI. Note\nAn experiment setup wizard that guides you through the process of\nsetting up an experiment is also available. For more information, see\ndai_wizard. 1. Run an experiment by selecting [Click for Actions] button beside the\n training dataset that you want to use. Click Predict to begin an\n experiment. Alternatively, you can click the New Experiment ->\n Standard Setup button on the Experiments page, which prompts you to\n select a training dataset. (To go to the _dai_wizard, click New\n Experiment -> Wizard Setup.) Clicking Standard Setup takes you\n directly to the dataset list page:\nYou can also get to the dataset list page from the Experiment Setup page\nby clicking Training Dataset, Test Dataset, or Validation Dataset. The\ndataset list page lets you view a list of datasets that are available\nfor selection. You can also click the link icon next to a particular\ndataset to open the Dataset Details page for that dataset in a new\nbrowser tab.", + "prompt_type": "plain" + }, + { + "output": "2. The Experiment Settings form displays and auto-fills with the\n selected dataset. Optionally enter a custom name for this\n experiment. If you do not add a name, Driverless AI will create one\n for you. 3. Optionally specify a validation dataset and/or a test dataset. 4. Specify the target (response) column. Note that not all explanatory\n functionality will be available for multiclass classification\n scenarios (scenarios with more than two outcomes). When the target\n column is selected, Driverless AI automatically provides the target\n column type and the number of rows. If this is a classification\n problem, then the UI shows unique and frequency statistics (Target\n Freq/Most Freq) for numerical columns. If this is a regression\n problem, then the UI shows the dataset mean and standard deviation\n values. 5. The next step is to set the parameters and settings for the\n experiment. (Refer to the Experiment Settings section for more\n information about these settings.)", + "prompt_type": "plain" + }, + { + "output": "Available parameters and\n settings include the following:\n6. After your settings are made, review the Experiment Preview to learn\n what each of the settings means. Note: When changing the algorithms\n used via expert-settings, you may notice that those changes are not\n applied. Driverless AI determines whether to include models and/or\n recipes based on a hierarchy of those expert settings. Refer to the\n Why do my selected algorithms not show up in the Experiment Preview?\n FAQ for more information. 7. Click Launch Experiment to start the experiment. Understanding the Experiment Page\nIn addition to the status, as an experiment is running, the UI also\ndisplays the following:\n- Details about the dataset. - The iteration data (internal validation) for each cross validation\n fold along with the specified scorer value. Click on a specific\n iteration or drag to view a range of iterations. Double click in the\n graph to reset the view.", + "prompt_type": "plain" + }, + { + "output": "During the iteration, Driverless AI\n will train n models. (This is called individuals in the experiment\n preview.) So for any column, you may see the score value for those n\n models for each iteration on the graph. - The variable importance values. To view variable importance for a\n specific iteration, just select that iteration in the Iteration Data\n graph. The Variable Importance list will automatically update to\n show variable importance information for that iteration. Hover over\n an entry to view more info. - CPU/Memory information along with Insights (for\n time-series experiments), Scores , Notifications, Logs, and\n Trace info. (Note that Trace is used for development/debugging and\n to show what the system is doing at that moment.) - For classification problems, the lower right section includes a\n toggle between an ROC curve, Precision-Recall graph, Lift chart,\n Gains chart, and GPU Usage information (if GPUs are available).", + "prompt_type": "plain" + }, + { + "output": "Predicted chart, and GPU\n Usage information (if GPUs are available). (Refer to the Experiment\n Graphs section for more information.) Upon completion, an Experiment\n Summary section will populate in the lower right section. - The bottom portion of the experiment screen will show any warnings\n that Driverless AI encounters. You can hide this pane by clicking\n the x icon. []\nFinishing/Aborting Experiments\nYou can finish and/or abort experiments that are currently running. - Finish Click the Finish button to stop a running experiment. Driverless AI will end the experiment and then complete the\n ensembling and the deployment package. - Abort: After clicking Finish, you have the option to click Abort,\n which terminates the experiment. (You will be prompted to confirm\n the abort.) Aborted experiments will display on the Experiments\n page as Failed. You can restart aborted experiments by clicking\n the right side of the experiment, then selecting Restart from Last\n Checkpoint.", + "prompt_type": "plain" + }, + { + "output": "Experiment Settings\n\nThis section includes settings that can be used to customize the\nexperiment like total runtime, reproducibility level, pipeline building,\nfeature brain control, adding config.toml settings and more.", + "prompt_type": "plain" + }, + { + "output": "max_runtime_minutes``\n\nMax Runtime in Minutes Before Triggering the Finish Button\n\nSpecify the maximum runtime in minutes for an experiment. This is\nequivalent to pushing the Finish button once half of the specified time\nvalue has elapsed. Note that the overall enforced runtime is only an\napproximation.\n\nThis value defaults to 1440, which is the equivalent of a 24 hour\napproximate overall runtime. The Finish button will be automatically\nselected once 12 hours have elapsed, and Driverless AI will subsequently\nattempt to complete the overall experiment in the remaining 12 hours.\nSet this value to 0 to disable this setting.\n\nNote that this setting applies to per experiment so if building\nleaderboard models(n) it will apply to each experiment separately(i.e\ntotal allowed runtime will be n*24hrs. This time estimate assumes\nrunning each experiment one at a time, sequentially)", + "prompt_type": "plain" + }, + { + "output": "max_runtime_minutes_until_abort``\n\nMax Runtime in Minutes Before Triggering the Abort Button\n\nSpecify the maximum runtime in minutes for an experiment before\ntriggering the abort button. This option preserves experiment artifacts\nthat have been generated for the summary and log zip files while\ncontinuing to generate additional artifacts. This value defaults to\n10080 mins (7 days).\n\nNote that this setting applies to per experiment so if building\nleaderboard models( say n), it will apply to each experiment\nseparately(i.e total allowed runtime will be n*7days. This time estimate\nassumes running each experiment one at a time, sequentially). Also see\ntime_abort .", + "prompt_type": "plain" + }, + { + "output": "pipeline-building-recipe---------------------------- .. container:: dropdown **Pipeline Building Recipe** Specify the Pipeline Building recipe type (overrides GUI settings). Select from the following: - **Auto**: Specifies that all models and features are automatically determined by experiment settings, config.toml settings, and the feature engineering effort. (Default) - **Compliant**: Similar to **Auto** except for the following: - Interpretability is set to 10. - Only uses GLM or booster as 'giblinear'. - :ref:`Fixed ensemble level ` is set to 0. - :ref:`Feature brain level ` is set to 0. - Max feature interaction depth is set to 1 i.e no interactions. - Target transformers is set to 'identity' for regression. - Does not use :ref:`distribution shift ` detection. - :ref:`monotonicity_constraints_correlation_threshold ` is set to 0.", + "prompt_type": "plain" + }, + { + "output": "- Drops features that are not correlated with target by at least 0.01. See :ref:`monotonicity-constraints-drop-low-correlation-features ` and :ref:`monotonicity-constraints-correlation-threshold `. - Does not build an ensemble model i.e setfixed_ensemble_level=0- No :ref:`feature brain ` is used to ensure every restart is identical. - :ref:`Interaction depth ` is set to 1 i.e no multi-feature interactions done to avoid complexity. - No target transformations applied for regression problems i.e sets :ref:`target_transformer ` to 'identity'. The equivalent config.toml parameter isrecipe=['monotonic_gbm']. - :ref:`num_as_cat ` feature transformation is disabled. - List of included_transformers | 'OriginalTransformer', #numeric (no clustering, no interactions, no num->cat) | 'CatOriginalTransformer', 'RawTransformer','CVTargetEncodeTransformer', 'FrequentTransformer','WeightOfEvidenceTransformer','OneHotEncodingTransformer', #categorical (but no num-cat) | 'CatTransformer','StringConcatTransformer', # big data only | 'DateOriginalTransformer', 'DateTimeOriginalTransformer', 'DatesTransformer', 'DateTimeDiffTransformer', 'IsHolidayTransformer', 'LagsTransformer', 'EwmaLagsTransformer', 'LagsInteractionTransformer', 'LagsAggregatesTransformer',#dates/time | 'TextOriginalTransformer', 'TextTransformer', 'StrFeatureTransformer', 'TextCNNTransformer', 'TextBiGRUTransformer', 'TextCharCNNTransformer', 'BERTTransformer',#text | 'ImageOriginalTransformer', 'ImageVectorizerTransformer'] #image For reference also see :ref:`Monotonicity Constraints in Driverless AI `.", + "prompt_type": "plain" + }, + { + "output": "- The test set is concatenated with the train set, with the target marked as missing - Transformers that do not use the target are allowed tofit_transform`` across the entirety of the train,\n validation, and test sets. - Has several config.toml expert options\n open-up limits. - nlp_model: Only enable NLP BERT models based on PyTorch to process\n pure text. To avoid slowdown when using this recipe, enabling one or\n more GPUs is strongly recommended. For more information, see\n nlp-in-dai. - included_models = ['TextBERTModel', 'TextMultilingualBERTModel',\n 'TextXLNETModel', 'TextXLMModel','TextRoBERTaModel',\n 'TextDistilBERTModel', 'TextALBERTModel', 'TextCamemBERTModel',\n 'TextXLMRobertaModel'] - enable_pytorch_nlp_transformer = 'off' -\n enable_pytorch_nlp_model = 'on'\n - nlp_transformer: Only enable PyTorch based BERT transformers that\n process pure text. To avoid slowdown when using this recipe,\n enabling one or more GPUs is strongly recommended.", + "prompt_type": "plain" + }, + { + "output": "- included_transformers = ['BERTTransformer']\n - excluded_models = ['TextBERTModel', 'TextMultilingualBERTModel',\n 'TextXLNETModel', 'TextXLMModel','TextRoBERTaModel',\n 'TextDistilBERTModel', 'TextALBERTModel', 'TextCamemBERTModel',\n 'TextXLMRobertaModel'] - enable_pytorch_nlp_transformer = 'on' -\n enable_pytorch_nlp_model = 'off'\n - image_model: Only enable image models that process pure images\n (ImageAutoModel). To avoid slowdown when using this recipe, enabling\n one or more GPUs is strongly recommended. For more information, see\n image-model. Notes:\n - This option disables the Genetic Algorithm (GA). - Image insights are only available when this option is selected. - image_transformer: Only enable the ImageVectorizer transformer,\n which processes pure images. For more information, see\n image-embeddings. - unsupervised: Only enable unsupervised transformers, models and\n scorers. See for reference. - gpus_max: Maximize use of GPUs (e.g.", + "prompt_type": "plain" + }, + { + "output": "enable_genetic_algorithm---------------------------- .. container:: dropdown **Enable Genetic Algorithm for Selection and Tuning of Features and Models** Specify whether to enable :ref:`genetic algorithm ` for selection and hyper-parameter tuning of features and models: - **auto**: Default value is 'auto'. This is same as 'on' unless it is a pure NLP or Image experiment. - **on**: Driverless AI genetic algorithm is used for feature engineering and model tuning and selection. - **Optuna**: When 'Optuna' is selected, model hyperparameters are tuned with :ref:`Optuna ` and Driverless AI genetic algorithm is used for feature engineering. In the Optuna case, the scores shown in the iteration panel are the best score and trial scores. Optuna mode currently only uses Optuna for XGBoost, LightGBM, and CatBoost (custom recipe). If Pruner is enabled, as is default, Optuna mode disables mutations of evaluation metric (eval_metric) so pruning uses same metric across trials to compare.", + "prompt_type": "plain" + }, + { + "output": "tournament_style``\nTournament Model for Genetic Algorithm\nSelect a method to decide which models are best at each iteration. This\nis set to Auto by default. Choose from the following:\n- auto: Choose based upon accuracy and interpretability\n- uniform: all individuals in population compete to win as best (can\n lead to all, e.g. LightGBM models in final ensemble, which may not\n improve ensemble performance due to lack of diversity)\n- fullstack: Choose from optimal model and feature types\n- feature: individuals with similar feature types compete (good if\n target encoding, frequency encoding, and other feature sets lead to\n good results)\n- model: individuals with same model type compete (good if multiple\n models do well but some models that do not do as well still\n contribute to improving ensemble)\nFor each case, a round robin approach is used to choose best scores\namong type of models to choose from. If enable_genetic_algorithm=='Optuna', then every individual is\nself-mutated without any tournament during the genetic algorithm .", + "prompt_type": "plain" + }, + { + "output": "make_python_scoring_pipeline``\n\nMake Python Scoring Pipeline\n\nSpecify whether to automatically build a Python Scoring Pipeline for the\nexperiment. Select On or Auto (default) to make the Python Scoring\nPipeline immediately available for download when the experiment is\nfinished. Select Off to disable the automatic creation of the Python\nScoring Pipeline.", + "prompt_type": "plain" + }, + { + "output": "make_mojo_scoring_pipeline``\n\nMake MOJO Scoring Pipeline\n\nSpecify whether to automatically build a MOJO (Java) Scoring Pipeline\nfor the experiment. Select On to make the MOJO Scoring Pipeline\nimmediately available for download when the experiment is finished. With\nthis option, any capabilities that prevent the creation of the pipeline\nare dropped. Select Off to disable the automatic creation of the MOJO\nScoring Pipeline. Select Auto (default) to attempt to create the MOJO\nScoring Pipeline without dropping any capabilities.", + "prompt_type": "plain" + }, + { + "output": "mojo_for_predictions------------------------ .. container:: dropdown **Allow Use of MOJO for Making Predictions** Specify whether to use MOJO for making fast, low-latency predictions after the experiment has finished. When this is set to **Auto** (default), the MOJO is only used if the number of rows is equal to or below the value specified bymojo_for_predictions_max_rows``.", + "prompt_type": "plain" + }, + { + "output": "reduce_mojo_size-------------------- .. container:: dropdown **Attempt to Reduce the Size of the MOJO (Small MOJO)** Specify whether to attempt to create a small MOJO scoring pipeline when the experiment is being built. A smaller MOJO leads to less memory footprint during scoring. This setting attempts to reduce the mojo size by limiting experiment's maximum :ref:`interaction depth ` to **3**, setting :ref:`ensemble level ` to **0** i.e no ensemble model for final pipeline and limiting the :ref:`maximum number of features ` in the model to **200**. Note that these settings in some cases can affect the overall model's predictive accuracy as it is limiting the complexity of the feature engineering and model building space. This is disabled by default. The equivalent config.toml setting isreduce_mojo_size``", + "prompt_type": "plain" + }, + { + "output": "make_pipeline_visualization``\n\nMake Pipeline Visualization\n\nSpecify whether to create a visualization of the scoring pipeline at the\nend of an experiment. This is set to Auto by default. Note that the\nVisualize Scoring Pipeline feature is experimental and is not available\nfor deprecated models. Visualizations are available for all newly\ncreated experiments.", + "prompt_type": "plain" + }, + { + "output": "benchmark_mojo_latency``\n\nMeasure MOJO Scoring Latency\n\nSpecify whether to measure the MOJO scoring latency at the time of MOJO\ncreation. This is set to Auto by default. In this case, MOJO scoring\nlatency will be measured if the pipeline.mojo file size is less than 100\nMB.", + "prompt_type": "plain" + }, + { + "output": "mojo_building_timeout``\n\nTimeout in Seconds to Wait for MOJO Creation at End of Experiment\n\nSpecify the amount of time in seconds to wait for MOJO creation at the\nend of an experiment. If the MOJO creation process times out, a MOJO can\nstill be made from the GUI or the R and Python clients (the timeout\nconstraint is not applied to these). This value defaults to 1800 sec (30\nminutes).", + "prompt_type": "plain" + }, + { + "output": "mojo_building_parallelism``\n\nNumber of Parallel Workers to Use During MOJO Creation\n\nSpecify the number of parallel workers to use during MOJO creation.\nHigher values can speed up MOJO creation but use more memory. Set this\nvalue to -1 (default) to use all physical cores.", + "prompt_type": "plain" + }, + { + "output": "kaggle_username``\n\nKaggle Username\n\nOptionally specify your Kaggle username to enable automatic submission\nand scoring of test set predictions. If this option is specified, then\nyou must also specify a value for the Kaggle Key option. If you don't\nhave a Kaggle account, you can sign up at https://www.kaggle.com.", + "prompt_type": "plain" + }, + { + "output": "kaggle_key``\n\nKaggle Key\n\nSpecify your Kaggle API key to enable automatic submission and scoring\nof test set predictions. If this option is specified, then you must also\nspecify a value for the Kaggle Username option. For more information on\nobtaining Kaggle API credentials, see\nhttps://github.com/Kaggle/kaggle-api#api-credentials.", + "prompt_type": "plain" + }, + { + "output": "kaggle_timeout``\n\nKaggle Submission Timeout in Seconds\n\nSpecify the Kaggle submission timeout in seconds. This value defaults to\n120 sec.", + "prompt_type": "plain" + }, + { + "output": "min_num_rows``\n\nMin Number of Rows Needed to Run an Experiment\n\nSpecify the minimum number of rows that a dataset must contain in order\nto run an experiment. This value defaults to 100.", + "prompt_type": "plain" + }, + { + "output": "reproducibility_level``\n\nReproducibility Level\n\nSpecify one of the following levels of reproducibility. Note that this\nsetting is only used when the reproducible option is enabled in the\nexperiment:\n\n- 1 = Same experiment results for same O/S, same CPU(s), and same\n GPU(s) (Default)\n- 2 = Same experiment results for same O/S, same CPU architecture, and\n same GPU architecture\n- 3 = Same experiment results for same O/S, same CPU architecture\n (excludes GPUs)\n- 4 = Same experiment results for same O/S (best approximation)\n\nThis value defaults to 1.", + "prompt_type": "plain" + }, + { + "output": "seed``\n\nRandom Seed\n\nSpecify a random seed for the experiment. When a seed is defined and the\nreproducible button is enabled (not by default), the algorithm will\nbehave deterministically.", + "prompt_type": "plain" + }, + { + "output": "allow_different_classes_across_fold_splits``\n\nAllow Different Sets of Classes Across All Train/Validation Fold Splits\n\n(Note: Applicable for multiclass problems only.) Specify whether to\nenable full cross-validation (multiple folds) during feature evolution\nas opposed to a single holdout split. This is enabled by default.", + "prompt_type": "plain" + }, + { + "output": "save_validation_splits``\n\nStore Internal Validation Split Row Indices\n\nSpecify whether to store internal validation split row indices. This\nincludes pickles of (train_idx, valid_idx) tuples (numpy row indices for\noriginal training data) for all internal validation folds in the\nexperiment summary ZIP file. Enable this setting for debugging purposes.\nThis setting is disabled by default.", + "prompt_type": "plain" + }, + { + "output": "max_num_classes``\n\nMax Number of Classes for Classification Problems\n\nSpecify the maximum number of classes to allow for a classification\nproblem. A higher number of classes may make certain processes more\ntime-consuming. Memory requirements also increase with a higher number\nof classes. This value defaults to 200.", + "prompt_type": "plain" + }, + { + "output": "max_num_classes_compute_roc------------------------------- .. container:: dropdown **Max Number of Classes to Compute ROC and Confusion Matrix for Classification Problems** Specify the maximum number of classes to use when computing the ROC and CM. When this value is exceeded, the reduction type specified byroc_reduce_type`` is applied. This value defaults to 200 and cannot\n\n be lower than 2.", + "prompt_type": "plain" + }, + { + "output": "max_num_classes_client_and_gui---------------------------------- .. container:: dropdown **Max Number of Classes to Show in GUI for Confusion Matrix** Specify the maximum number of classes to show in the GUI for CM, showing firstmax_num_classes_client_and_gui`` labels. This value\n\n defaults to 10, but any value beyond 6 will result in visually\n truncated diagnostics. Note that if this value is changed in the\n config.toml and the server is restarted, then this setting will only\n modify client-GUI launched diagnostics. To control experiment plots,\n this value must be changed in the expert settings panel.", + "prompt_type": "plain" + }, + { + "output": "roc_reduce_type------------------- .. container:: dropdown **ROC/CM Reduction Technique for Large Class Counts** Specify the ROC confusion matrix reduction technique used for large class counts: - **Rows** (Default): Reduce by randomly sampling rows - **Classes**: Reduce by truncating classes to no more than the value specified bymax_num_classes_compute_roc``", + "prompt_type": "plain" + }, + { + "output": "max_rows_cm_ga``\n\nMaximum Number of Rows to Obtain Confusion Matrix Related Plots During\nFeature Evolution\n\nSpecify the maximum number of rows to obtain confusion matrix related\nplots during feature evolution. Note that this doesn't limit final model\ncalculation.", + "prompt_type": "plain" + }, + { + "output": "use_feature_brain_new_experiments``\n\nWhether to Use Feature Brain for New Experiments\n\nSpecify whether to use feature_brain results even if running new\nexperiments. Feature brain can be risky with some types of changes to\nexperiment setup. Even rescoring may be insufficient, so by default this\nis False. For example, one experiment may have training=external\nvalidation by accident, and get high score, and while\nfeature_brain_reset_score='on' means we will rescore, it will have\nalready seen during training the external validation and leak that data\nas part of what it learned from. If this is False, feature_brain_level\njust sets possible models to use and logs/notifies, but does not use\nthese feature brain cached models.", + "prompt_type": "plain" + }, + { + "output": "feature_brain_level``\nModel/Feature Brain Level\nSpecify whether to use H2O.ai brain, which enables local caching and\nsmart re-use (checkpointing) of prior experiments to generate useful\nfeatures and models for new experiments. It can also be used to control\ncheckpointing for experiments that have been paused or interrupted. When enabled, this will use the H2O.ai brain cache if the cache file:\n - has any matching column names and types for a similar experiment\n type\n - has classes that match exactly\n - has class labels that match exactly\n - has basic time series choices that match\n - the interpretability of the cache is equal or lower\n - the main model (booster) is allowed by the new experiment\n- -1: Don't use any brain cache (default)\n- 0: Don't use any brain cache but still write to cache. Use case:\n Want to save the model for later use, but we want the current model\n to be built without any brain models. - 1: Smart checkpoint from the latest best individual model.", + "prompt_type": "plain" + }, + { + "output": "The match may not be precise,\n so use with caution. - 2: Smart checkpoint if the experiment matches all column names,\n column types, classes, class labels, and time series options\n identically. Use case: Driverless AI scans through the H2O.ai brain\n cache for the best models to restart from. - 3: Smart checkpoint like level #1 but for the entire population. Tune only if the brain population is of insufficient size. Note that\n this will re-score the entire population in a single iteration, so\n it appears to take longer to complete first iteration. - 4: Smart checkpoint like level #2 but for the entire population. Tune only if the brain population is of insufficient size. Note that\n this will re-score the entire population in a single iteration, so\n it appears to take longer to complete first iteration. - 5: Smart checkpoint like level #4 but will scan over the entire\n brain cache of populations to get the best scored individuals. Note\n that this can be slower due to brain cache scanning if the cache is\n large.", + "prompt_type": "plain" + }, + { + "output": "feature_brain2``\nFeature Brain Save Every Which Iteration\nSave feature brain iterations every iter_num %\nfeature_brain_iterations_save_every_iteration == 0, to be able to\nrestart/refit with which_iteration_brain >= 0. This is disabled (0) by\ndefault. - -1: Don't use any brain cache. - 0: Don't use any brain cache but still write to cache. - 1: Smart checkpoint if an old experiment_id is passed in (for\n example, via running \"resume one like this\" in the GUI). - 2: Smart checkpoint if the experiment matches all column names,\n column types, classes, class labels, and time series options\n identically. (default)\n- 3: Smart checkpoint like level #1 but for the entire population. Tune only if the brain population is of insufficient size. - 4: Smart checkpoint like level #2 but for the entire population. Tune only if the brain population is of insufficient size. - 5: Smart checkpoint like level #4 but will scan over the entire\n brain cache of populations (starting from resumed experiment if\n chosen) in order to get the best scored individuals.", + "prompt_type": "plain" + }, + { + "output": "feature_brain3``\n\nFeature Brain Restart from Which Iteration\n\nWhen performing restart or re-fit of type feature_brain_level with a\nresumed ID, specify which iteration to start from instead of only last\nbest. Available options include:\n\n- -1: Use the last best\n- 1: Run one experiment with\n feature_brain_iterations_save_every_iteration=1 or some other number\n- 2: Identify which iteration brain dump you wants to restart/refit\n from\n- 3: Restart/Refit from the original experiment, setting\n which_iteration_brain to that number here in expert settings.\n\nNote: If restarting from a tuning iteration, this will pull in the\nentire scored tuning population and use that for feature evolution. This\nvalue defaults to -1.", + "prompt_type": "plain" + }, + { + "output": "feature_brain4``\n\nFeature Brain Refit Uses Same Best Individual\n\nSpecify whether to use the same best individual when performing a refit.\nDisabling this setting allows the order of best individuals to be\nrearranged, leading to a better final result. Enabling this setting lets\nyou view the exact same model or feature with only one new feature\nadded. This is disabled by default.", + "prompt_type": "plain" + }, + { + "output": "feature_brain5``\n\nFeature Brain Adds Features with New Columns Even During Retraining of\nFinal Model\n\nSpecify whether to add additional features from new columns to the\npipeline, even when performing a retrain of the final model. Use this\noption if you want to keep the same pipeline regardless of new columns\nfrom a new dataset. New data may lead to new dropped features due to\nshift or leak detection. Disable this to avoid adding any columns as new\nfeatures so that the pipeline is perfectly preserved when changing data.\nThis is enabled by default.", + "prompt_type": "plain" + }, + { + "output": "force_model_restart_to_defaults``\n\nRestart-Refit Use Default Model Settings If Model Switches\n\nWhen restarting or refitting, specify whether to use the model class's\ndefault settings if the original model class is no longer available. If\nthis is disabled, the original hyperparameters will be used instead.\n(Note that this may result in errors.) This is enabled by default.", + "prompt_type": "plain" + }, + { + "output": "min_dai_iterations``\n\nMin DAI Iterations\n\nSpecify the minimum number of Driverless AI iterations for an\nexperiment. This can be used during restarting, when you want to\ncontinue for longer despite a score not improving. This value defaults\nto 0.", + "prompt_type": "plain" + }, + { + "output": "target_transformer---------------------- .. container:: dropdown **Select Target Transformation of the Target for Regression Problems** Specify whether to automatically select target transformation for regression problems. Available options include: - auto - identity - identity_noclip - center - standardize - unit_box - log - log_noclip - square - sqrt - double_sqrt - inverse - logit - sigmoid If set to **auto** (default), Driverless AI will automatically pick the best target transformer if the **Accuracy** is set to the value of thetune_target_transform_accuracy_switchconfiguration option (defaults to 5) or larger. Selecting **identity_noclip** automatically turns off any target transformations. All transformers except for **center**, **standardize**, **identity_noclip** and **log_noclip** perform clipping to constrain the predictions to the domain of the target in the training data, so avoid them if you want to enable extrapolations.", + "prompt_type": "plain" + }, + { + "output": "fixed_num_folds_evolution``\n\nNumber of Cross-Validation Folds for Feature Evolution\n\nSpecify the fixed number of cross-validation folds (if >= 2) for feature\nevolution. Note that the actual number of allowed folds can be less than\nthe specified value, and that the number of allowed folds is determined\nat the time an experiment is run. This value defaults to -1 (auto).", + "prompt_type": "plain" + }, + { + "output": "fixed_num_folds``\n\nNumber of Cross-Validation Folds for Final Model\n\nSpecify the fixed number of cross-validation folds (if >= 2) for the\nfinal model. Note that the actual number of allowed folds can be less\nthan the specified value, and that the number of allowed folds is\ndetermined at the time an experiment is run. This value defaults to -1\n(auto).", + "prompt_type": "plain" + }, + { + "output": "fixed_only_first_fold_model``\n\nForce Only First Fold for Models\n\nSpecify whether to force only the first fold for models. Select from\nAuto (Default), On, or Off. Set \"on\" to force only first fold for\nmodels.This is useful for quick runs regardless of data", + "prompt_type": "plain" + }, + { + "output": "feature_evolution_data_size``\n\nMax Number of Rows Times Number of Columns for Feature Evolution Data\nSplits\n\nSpecify the maximum number of rows allowed for feature evolution data\nsplits (not for the final pipeline). This value defaults to 100,000,000.", + "prompt_type": "plain" + }, + { + "output": "final_pipeline_data_size``\n\nMax Number of Rows Times Number of Columns for Reducing Training Dataset\n\nSpecify the upper limit on the number of rows times the number of\ncolumns for training the final pipeline. This value defaults to\n500,000,000.", + "prompt_type": "plain" + }, + { + "output": "max_validation_to_training_size_ratio_for_final_ensemble``\n\nMaximum Size of Validation Data Relative to Training Data\n\nSpecify the maximum size of the validation data relative to the training\ndata. Smaller values can make the final pipeline model training process\nquicker. Note that final model predictions and scores will always be\nprovided on the full dataset provided. This value defaults to 2.0.", + "prompt_type": "plain" + }, + { + "output": "force_stratified_splits_for_imbalanced_threshold_binary``\n\nPerform Stratified Sampling for Binary Classification If the Target Is\nMore Imbalanced Than This\n\nFor binary classification experiments, specify a threshold ratio of\nminority to majority class for the target column beyond which stratified\nsampling is performed. If the threshold is not exceeded, random sampling\nis performed. This value defaults to 0.01. You can choose to always\nperform random sampling by setting this value to 0, or to always perform\nstratified sampling by setting this value to 1.", + "prompt_type": "plain" + }, + { + "output": "last_recipe``\n\nlast_recipe\n\nInternal helper to allow memory of if changed recipe", + "prompt_type": "plain" + }, + { + "output": "feature_brain_save_every_iteration``\n\nFeature Brain Save every which iteration\n\nSpecify whether to save feature brain iterations every iter_num %\nfeature_brain_iterations_save_every_iteration == 0, to be able to\nrestart/refit with which_iteration_brain >= 0. Set to 0 to disable this\nsetting.", + "prompt_type": "plain" + }, + { + "output": "which_iteration_brain``\n\nFeature Brain Restart from which iteration\n\nWhen performing restart or re-fit type feature_brain_level with\nresumed_experiment_id, choose which iteration to start from, instead of\nonly last best -1 means just use last best.\n\nUsage:\n\n - 1) Run one experiment with\n feature_brain_iterations_save_every_iteration=1 or some other\n number\n\n - 2) Identify which iteration brain dump one wants to restart/refit\n from\n\n - 3) Restart/Refit from original experiment, setting\n which_iteration_brain to that number in expert settings\n\nNote: If restart from a tuning iteration, this will pull in entire\nscored tuning population and use that for feature evolution.", + "prompt_type": "plain" + }, + { + "output": "refit_same_best_individual``\n\nFeature Brain refit uses same best individual\n\nWhen doing re-fit from feature brain, if change columns or features,\npopulation of individuals used to refit from may change order of which\nwas best, leading to better result chosen (False case). But sometimes\nyou want to see exact same model/features with only one feature added,\nand then would need to set this to True case. That is, if refit with\njust 1 extra column and have interpretability=1, then final model will\nbe same features, with one more engineered feature applied to that new\noriginal feature.", + "prompt_type": "plain" + }, + { + "output": "brain_add_features_for_new_columns``\n\nFeature Brain adds features with new columns even during retraining\nfinal model\n\nWhether to take any new columns and add additional features to pipeline,\neven if doing retrain final model. In some cases, one might have a new\ndataset but only want to keep same pipeline regardless of new columns,\nin which case one sets this to False. For example, new data might lead\nto new dropped features, due to shift or leak detection. To avoid change\nof feature set, one can disable all dropping of columns, but set this to\nFalse to avoid adding any columns as new features, so pipeline is\nperfectly preserved when changing data.", + "prompt_type": "plain" + }, + { + "output": "force_model_restart_to_defaults``\n\nRestart-refit use default model settings if model switches\n\nIf restart/refit and no longer have the original model class available,\nbe conservative and go back to defaults for that model class. If False,\nthen try to keep original hyperparameters, which can fail to work in\ngeneral.", + "prompt_type": "plain" + }, + { + "output": "dump_modelparams_every_scored_indiv``\n\nEnable detailed scored model info\n\nWhether to dump every scored individual's model parameters to\ncsv/tabulated/json file produces files. For example:\nindividual_scored.params.[txt, csv, json]", + "prompt_type": "plain" + }, + { + "output": "fast_approx_num_trees------------------------- .. container:: dropdown **Max number of trees to use for fast approximation** Whenfast_approx=True, specify the maximum number of trees to use. By default, this value is 250. .. note:: By default,fast_approx`` is enabled for MLI and AutoDoc and\n\n disabled for Experiment predictions.", + "prompt_type": "plain" + }, + { + "output": "fast_approx_do_one_fold--------------------------- .. container:: dropdown **Whether to use only one fold for fast approximation** Whenfast_approx=True, specify whether to speed up fast approximation further by using only one fold out of all cross-validation folds. By default, this setting is enabled. .. note:: By default,fast_approx`` is enabled for MLI and AutoDoc and\n\n disabled for Experiment predictions.", + "prompt_type": "plain" + }, + { + "output": "fast_approx_do_one_model---------------------------- .. container:: dropdown **Whether to use only one model for fast approximation** Whenfast_approx=True, specify whether to speed up fast approximation further by using only one model out of all ensemble models. By default, this setting is disabled. .. note:: By default,fast_approx`` is enabled for MLI and AutoDoc and\n\n disabled for Experiment predictions.", + "prompt_type": "plain" + }, + { + "output": "fast_approx_contribs_num_trees---------------------------------- .. container:: dropdown **Maximum number of trees to use for fast approximation when making Shapley predictions** Whenfast_approx_contribs=True, specify the maximum number of trees to use for 'Fast Approximation' in GUI when making Shapley predictions and for AutoDoc/MLI. By default, this value is 50. .. note:: By default,fast_approx_contribs`` is enabled for MLI and\n\n AutoDoc.", + "prompt_type": "plain" + }, + { + "output": "fast_approx_contribs_do_one_fold------------------------------------ .. container:: dropdown **Whether to use only one fold for fast approximation when making Shapley predictions** Whenfast_approx_contribs=True, specify whether to speed upfast_approx_contribsfurther by using only one fold out of all cross-validation folds for 'Fast Approximation' in GUI when making Shapley predictions and for AutoDoc/MLI. By default, this setting is enabled. .. note:: By default,fast_approx_contribs`` is enabled for MLI and\n\n AutoDoc.", + "prompt_type": "plain" + }, + { + "output": "fast_approx_contribs_do_one_model------------------------------------- .. container:: dropdown **Whether to use only one model for fast approximation when making Shapley predictions** Whenfast_approx_contribs=True, specify whether to speed upfast_approx_contribsfurther by using only one model out of all ensemble models for 'Fast Approximation' in GUI when making Shapley predictions and for AutoDoc/MLI. By default, this setting is enabled. .. note:: By default,fast_approx_contribs`` is enabled for MLI and\n\n AutoDoc.", + "prompt_type": "plain" + }, + { + "output": "autoviz_recommended_transformation``\n\nAutoviz Recommended Transformations\n\nKey-value pairs of column names and transformations that\nAutoviz recommended. Also see\nAutoviz Recommendation Transformer\n.", + "prompt_type": "plain" + }, + { + "output": "Appendix A: Third-Party Integrations\nH2O Driverless AI integrates with a (continuously growing) number of\nthird-party products. Please contact sales@h2o.ai to schedule a\ndiscussion with one of our Solution Engineers for more information. If you are interested in a product not yet listed here, please ask us\nabout it! Instance Life-Cycle Management\nThe following products are able to manage (start and stop) Driverless AI\ninstances themselves:\n ---------------------------------------------------------------------\n Name Notes\n ------------------------- -------------------------------------------\n BlueData DAI runs in a BlueData container\n Domino DAI runs in a Domino container\n IBM Spectrum Conductor DAI runs in user mode via TAR SH\n distribution\n IBM Cloud Private (ICP) Uses Kubernetes underneath; DAI runs in a\n docker container; requires HELM chart\n Kubernetes DAI runs in as a long running service via\n Docker container\n Kubeflow Abstraction of Kubernetes; allows\n additional monitoring and management of\n Kubernetes deployments.", + "prompt_type": "plain" + }, + { + "output": "Puddle (from H2O.ai) Multi-tenant orchestration platform for DAI\n instances (not a third party, but listed\n here for completeness)\n SageMaker Bring your own algorithm docker container\n ---------------------------------------------------------------------\nAPI Clients\nThe following products have Driverless AI client API integrations:\n ---------------------------------------------------------------------\n Name Notes\n ---------------- ----------------------------------------------------\n Alteryx Lets users interact with a remote DAI server from\n Alteryx Designer\n Cinchy Data collaboration for the Enterprise, use MOJOs to\n enrich data and use Cinchy data network to train\n models\n Jupyter/Python DAI Python API client library can be downloaded from\n the Web UI of a running instance\n KDB Use KDB as a data source in Driverless AI for\n training\n RStudio/R DAI R API client library can be downloaded from the\n Web UI of a running instance.", + "prompt_type": "plain" + }, + { + "output": "Appendix C: Installed Components and Dependencies\nH2O Driverless AI is an artificial intelligence (AI) platform that\nautomates some of the most difficult data science and machine learning\nworkflows such as feature engineering, model validation, model tuning,\nmodel selection and model deployment. It aims to achieve highest\npredictive accuracy, comparable to expert data scientists, but in much\nshorter time thanks to end-to-end automation. Driverless AI also offers\nautomatic visualizations and machine learning interpretability (MLI). Especially in regulated industries, model transparency and explanation\nare just as important as predictive performance. This section describes components that included with the Driverless AI\nDocker image and information on additional Driverless AI dependencies. Installed Components\nh2oaicore--cp38-cp38-linux_x86_64.whl\nH2O-3: H2O is an open source, in-memory, distributed, fast, and scalable\nmachine learning and predictive analytics platform that allows you to\nbuild machine learning models on big data and provides easy\nproductionalization of those models in an enterprise environment.", + "prompt_type": "plain" + }, + { + "output": "It provides a high-performance version of base R's\u00a0data.frame\u00a0with\nsyntax and feature enhancements for ease of use, convenience, and\nprogramming speed. h2o4gpu-0.2.0+master.b1ef476-cp38-cp38-linux_x86_64.whl: H2O4GPU\u00a0is a\ncollection of GPU solvers provided by\u00a0H2Oai\u00a0with APIs in Python and R.\nThe Python API builds upon the easy-to-use\u00a0scikit-learn\u00a0API and its\nwell-tested CPU-based algorithms. It can be used as a drop-in\nreplacement for scikit-learn (i.e. import h2o4gpu as sklearn) with\nsupport for GPUs on selected (and ever-growing) algorithms. H2O4GPU\ninherits all the existing scikit-learn algorithms and falls back to CPU\nalgorithms when the GPU algorithm does not support an important existing\nscikit-learn class option. The R package is a wrapper around the H2O4GPU\nPython package, and the interface follows standard R conventions for\nmodeling. The DAAL library added for CPU is currently only supported on\nx86_64 architecture. Python and Other Dependencies for Driverless AI\nPython 3.6: Python is a programming language that lets you work more\nquickly and integrate your systems more effectively.", + "prompt_type": "plain" + }, + { + "output": "pycrypto 2.6.1: The Python Cryptography Toolkit (pycrypto) is a\ncollection of both secure hash functions (such as SHA256 and RIPEMD160)\nand various encryption algorithms (AES, DES, RSA, ElGamal, etc.). The\npackage is structured to make adding new modules easy. This section is\nessentially complete, and the software interface will almost certainly\nnot change in an incompatible way in the future; all that remains to be\ndone is to fix any bugs that show up. If you encounter a bug, please\nreport it in the Launchpad bug tracker. filelock 2.0.13: This package contains a single module that implements a\nplatform-independent file lock in Python, which provides a simple method\nof inter-process communication. numpy 1.14.0 NumPy is the fundamental package for scientific computing\nwith Python. It contains among other components:\n - A powerful N-dimensional array object\n - Sophisticated (broadcasting) functions\n - Tools for integrating C/C++ and Fortran code\n - Useful linear algebra, Fourier transform, and random number\n capabilities\n Besides its obvious scientific uses, NumPy can also be used as an\n efficient multi-dimensional container of generic data.", + "prompt_type": "plain" + }, + { + "output": "This allows NumPy to seamlessly and\n speedily integrate with a wide variety of databases. NumPy is licensed\n under the\u00a0BSD license, enabling reuse with few restrictions. pandas 0.22.0: The Python Data Analysis Library, pandas\u00a0is an open\nsource, BSD-licensed library providing high-performance, easy-to-use\ndata structures and data analysis tools for the\u00a0Python\u00a0programming\nlanguage. requests 2.13.0: Requests\u00a0allows you to send\u00a0organic, grass-fed\u00a0HTTP/1.1\nrequests without the need for manual labor. There's no need to manually\nadd query strings to your URLs or to form-encode your POST data. Keep-alive and HTTP connection pooling are 100% automatic, thanks\nto\u00a0urllib3. scikit-learn 0.19.1: Simple and efficient tools for data mining and data\nanalysis, accessible to everybody, and reusable in various contexts. scikit-learn is built on NumPy, SciPy, and matplotlib open source,\ncommercially usable BSD license. scipy 1.0.0: SciPy (pronounced \u201cSigh Pie\u201d) is a Python-based ecosystem\nof open-source software for mathematics, science, and engineering.", + "prompt_type": "plain" + }, + { + "output": "Changing\nthe title is mostly useful in multi-process systems, for example when a\nmaster process is forked: changing the children\u2019s title allows to\nidentify the task each process is busy with. The technique is used\nby\u00a0PostgreSQL\u00a0and the\u00a0OpenSSH Server\u00a0for example. statsmodels 0.8.0: statsmodels\u00a0is a Python module that provides classes\nand functions for the estimation of many different statistical models,\nas well as for conducting statistical tests, and statistical data\nexploration. An extensive list of result statistics are available for\neach estimator. The results are tested against existing statistical\npackages to ensure that they are correct. The package is released under\nthe open source Modified BSD (3-clause) license. toml 0.9.3.1: This is a Python library for parsing and creating\u00a0TOML. The module passes\u00a0the TOML test suite\u00a0which is a fork of\u00a0BurntSushi\u2019s\nTOML test suite. TOML\u00a0is a\u00a0configuration file\u00a0format that is easy to\nread due to obvious semantics and aims to be \"minimal\".", + "prompt_type": "plain" + }, + { + "output": "clang: Python bindings for clang from clang release branches\nclang+llvm-4.0.0-x86_64-linux-gnu-ubuntu-16.04/ clang: The LLVM compiler\ninfrastructure supports a wide range of projects, from industrial\nstrength compilers to specialized JIT applications to small research\nprojects. apt-get: This\u00a0is a tool to automatically update your Debian machine and\nget and install debian packages/programs. This tool is a part of\nthe\u00a0DebianPackageManagement\u00a0system. curl: PycURL is a Python interface to\u00a0libcurl, the multiprotocol file\ntransfer library. Similar to the\u00a0urllib\u00a0Python module, PycURL can be\nused to fetch objects identified by a URL from a Python program. Beyond\nsimple fetches however PycURL exposes most of the functionality of\nlibcurl. apt-utils: A package management related utility program. This package\ncontains some less used command line utilities related to package\nmanagement with APT. python-software-properties: This manages the repositories that you\ninstall software from (universe).", + "prompt_type": "plain" + }, + { + "output": "iputils-ping: The iputils package is set of small useful utilities for\nLinux networking. wget: GNU Wget is a\u00a0free software\u00a0package for retrieving files using\nHTTP, HTTPS, FTP and FTPS - the most widely-used Internet protocols. It\nis a non-interactive command line tool, so it can easily be called from\nscripts,\u00a0cron\u00a0jobs, terminals without X-Windows support, etc. cpio: GNU cpio copies files into or out of a cpio or tar archive. The\narchive can be another file on the disk, a magnetic tape, or a pipe. GNU\ncpio supports the following archive formats: binary, old ASCII, new\nASCII, crc, HPUX binary, HPUX old ASCII, old tar, and POSIX.1 tar. The\ntar format is provided for compatibility with the\u00a0tar\u00a0program. By\ndefault, cpio creates binary format archives, for compatibility with\nolder cpio programs. When extracting from archives, cpio automatically\nrecognizes which kind of archive it is reading and can read archives\ncreated on machines with a different byte-order. net-tools: A collection of programs that form the base set of the NET-3\nnetworking distribution for the Linux operating system.", + "prompt_type": "plain" + }, + { + "output": "git: Git is a\u00a0free and open source\u00a0distributed version control system\ndesigned to handle everything from small to very large projects with\nspeed and efficiency. zip: zip\u00a0is a compression and file packaging utility for Unix, VMS,\nMSDOS, OS/2, Windows 9x/NT/XP, Minix, Atari, Macintosh, Amiga, and Acorn\nRISC OS. It is analogous to a combination of the Unix commands\u00a0tar(1)\nand\u00a0compress(1) and is compatible with PKZIP (Phil Katz's ZIP for MSDOS\nsystems). dirmngr: Dirmngr is a server for managing and downloading certificate\nrevocation lists (CRLs) for X.509 certificates and for downloading the\ncertificates themselves. Dirmngr also handles OCSP requests as an\nalternative to CRLs. Dirmngr is either invoked internally by gpgsm (from\nGnuPG 2) or when running as a system daemon through\nthe\u00a0dirmngr-client\u00a0tool. curl -sL\u00a0https://deb.nodesource.com/setup_15.x\u00a0| bash - &&: This\nrepository contains the source of\nthe\u00a0NodeSource\u00a0Node.js\u00a0and\u00a0io.js\u00a0Binary Distributions setup and support\nscripts. nodejs: Node.js is a JavaScript runtime built on\u00a0Chrome's V8 JavaScript\nengine.", + "prompt_type": "plain" + }, + { + "output": "The node.js package ecosystem,\u00a0npm, is the\nlargest ecosystem of open source libraries in the world. build-essential: An informational list of build-essential packages. ccache: ccache is a compiler cache. It\u00a0speeds up recompilation\u00a0by\ncaching previous compilations and detecting when the same compilation is\nbeing done again. Supported languages are C, C++, Objective-C and\nObjective-C++. ccache is free software, released under the\u00a0GNU General\nPublic License version 3\u00a0or later. libopenblas-dev: Optimized BLAS (linear algebra) library (development\nfiles)\nPBZip2: PBZIP2 is a parallel implementation of the\u00a0bzip2\u00a0block-sorting\nfile compressor that uses pthreads and achieves near-linear speedup on\nSMP machines. The output of this version is fully compatible with bzip2\nv1.0.2 or newer\u00a0(ie: anything compressed with pbzip2 can be decompressed\nwith bzip2). PBZIP2 should work on any system that has a pthreads\ncompatible C++ compiler (such as gcc). It has been tested on: Linux,\nWindows (cygwin & MinGW), Solaris, Tru64/OSF1, HP-UX, OS/2, OSX, and\nIrix.", + "prompt_type": "plain" + }, + { + "output": "Python\u00a02.7.9 and later (on the\npython2 series), and Python\u00a03.4 and later include\u00a0pip\u00a0(pip3\nfor\u00a0Python\u00a03) by default. pip\u00a0is a recursive acronym that can stand for\neither \"Pip\u00a0Installs Packages\" or \"Pip\u00a0Installs\u00a0Python\". setuptools: Allows you to easily download, build, install, upgrade, and\nuninstall Python packages. tensorflow-gpu: An open source machine learning framework for numerical\ncomputation using data flow graphs. psutil: psutil (process and system utilities) is a cross-platform\nlibrary for retrieving information on\u00a0running processes\u00a0and\u00a0system\nutilization\u00a0(CPU, memory, disks, network, sensors) in Python. It is\nuseful mainly for\u00a0system monitoring,\u00a0profiling and limiting process\nresources\u00a0and\u00a0management of running processes. It implements many\nfunctionalities offered by UNIX command line tools such as: ps, top,\nlsof, netstat, ifconfig, who, df, kill, free, nice, ionice, iostat,\niotop, uptime, pidof, tty, taskset, pmap. jupyter: The\u00a0Jupyter\u00a0Notebook is an open-source web application that\nallows you to create and share documents that contain live code,\nequations, visualizations and narrative text.", + "prompt_type": "plain" + }, + { + "output": "Interpretation Expert Settings\n\nThe following is a list of the Interpretation expert settings that are\navailable when setting up a new interpretation from the\nMLI page . The name of each setting is preceded by its\nconfig.toml label. For info on explainer-specific expert\nsettings, see explainer-expert-settings.\n\n- interpretation-expert-settings-mli\n- interpretation-expert-settings-nlp\n- interpretation-expert-settings-surrogate\n\nMLI Tab", + "prompt_type": "plain" + }, + { + "output": "mli_sample~~~~~~~~~~~~~~ .. container:: dropdown **Sample All Explainers** Specify whether to perform the interpretation on a sample of the training data. By default, MLI will sample the training dataset if it is greater than 100k rows. (The equivalent config.toml setting ismli_sample_size``.) This is enabled by default. Turn this toggle\n\n off to run MLI on the entire dataset.", + "prompt_type": "plain" + }, + { + "output": "mli_enable_mojo_scorer``\n\nAllow Use of MOJO Scoring Pipeline\n\nUse this option to disable MOJO scoring pipeline. Scoring pipeline is\nchosen automatically (from MOJO and Python pipelines) by default. In\ncase of certain models, MOJO vs. Python choice can impact pipeline\nperformance and robustness.", + "prompt_type": "plain" + }, + { + "output": "mli_fast_approx``\n\nSpeed up predictions with a fast approximation\n\nSpecify whether to speed up predictions with a fast approximation. When\nenabled, this setting can reduce the number of trees or cross-validation\nfolds and ultimately reduce the time needed to complete interpretations.\nThis setting is enabled by default.", + "prompt_type": "plain" + }, + { + "output": "mli_custom``\n\nAdd to config.toml via TOML String\n\nUse this input field to add to the Driverless AI server config.toml\nconfiguration file with TOML string.\n\nMLI NLP Tab", + "prompt_type": "plain" + }, + { + "output": "mli_nlp_top_n``\n\nNumber of Tokens Used for MLI NLP Explanations\n\nSpecify the number of tokens used for MLI NLP explanations. To use all\navailable tokens, set this value to -1. By default, this value is set to\n20.", + "prompt_type": "plain" + }, + { + "output": "mli_nlp_sample_limit``\n\nSample Size for NLP Surrogate Models\n\nSpecify the maximum number of records used by MLI NLP explainers. The\ndefault value is 10000.", + "prompt_type": "plain" + }, + { + "output": "mli_nlp_min_df``\n\nMinimum Number of Documents in Which Token Has to Appear\n\nSpecify the minimum number of documents in which token has to appear.\nUse integer values to denote absolute counts and floating-point values\nto denote percentages. By default, this value is set to 3.", + "prompt_type": "plain" + }, + { + "output": "mli_nlp_max_df``\n\nMaximum Number of Documents in Which Token Has to Appear", + "prompt_type": "plain" + }, + { + "output": "mli_nlp_min_ngram``\n\nMinimum Value in n-gram Range", + "prompt_type": "plain" + }, + { + "output": "mli_nlp_max_ngram``\n\nMaximum Value in n-gram Range", + "prompt_type": "plain" + }, + { + "output": "mli_nlp_min_token_mode``\n\nMode Used to Choose N Tokens for MLI NLP", + "prompt_type": "plain" + }, + { + "output": "mli_nlp_tokenizer_max_features``\n\nNumber of Top Tokens to Use as Features (Token-based Feature Importance)", + "prompt_type": "plain" + }, + { + "output": "mli_nlp_loco_max_features``\n\nNumber of Top Tokens to Use as Features (LOCO)", + "prompt_type": "plain" + }, + { + "output": "mli_nlp_surrogate_tokens``\n\nNumber of Top Tokens to Use as Features (Surrogate Model)", + "prompt_type": "plain" + }, + { + "output": "mli_nlp_use_stop_words``\n\nStop Words for MLI NLP", + "prompt_type": "plain" + }, + { + "output": "mli_nlp_stop_words``\n\nList of Words to Filter Before Generating Text Tokens", + "prompt_type": "plain" + }, + { + "output": "mli_nlp_append_to_english_stop_words``\n\nAppend List of Custom Stop Words to Default Stop Words\n\nMLI Surrogate Models Tab", + "prompt_type": "plain" + }, + { + "output": "mli_lime_method~~~~~~~~~~~~~~~~~~~ .. container:: dropdown **LIME Method** Select a LIME method of either K-LIME (default) or LIME-SUP. - **K-LIME** (default): creates one global surrogate GLM on the entire training data and also creates numerous local surrogate GLMs on samples formed from *k*-means clusters in the training data. The features used for *k*-means are selected from the Random Forest surrogate model's variable importance. The number of features used for *k*-means is the minimum of the top 25% of variables from the Random Forest surrogate model's variable importance and the max number of variables that can be used for *k*-means, which is set by the user in the config.toml setting formli_max_number_cluster_vars. (Note, if the number of features in the dataset are less than or equal to 6, then all features are used for *k*-means clustering.) The previous setting can be turned off to use all features for k-means by settinguse_all_columns_klime_kmeansin the config.toml file totrue`.", + "prompt_type": "plain" + }, + { + "output": "mli_use_raw_features``\n\nUse Original Features for Surrogate Models\n\nSpecify whether to use original features or transformed features in the\nsurrogate model for the new interpretation. This is enabled by default.\n\nNote: When this setting is disabled, the K-LIME clustering column and\nquantile binning options are unavailable.", + "prompt_type": "plain" + }, + { + "output": "mli_vars_to_pdp``\n\nNumber of Features for Partial Dependence Plot\n\nSpecify the maximum number of features to use when building the Partial\nDependence Plot. Use -1 to calculate Partial Dependence Plot for all\nfeatures. By default, this value is set to 10.", + "prompt_type": "plain" + }, + { + "output": "mli_nfolds``\n\nCross-validation Folds for Surrogate Models\n\nSpecify the number of surrogate cross-validation folds to use (from 0 to\n10). When running experiments, Driverless AI automatically splits the\ntraining data and uses the validation data to determine the performance\nof the model parameter tuning and feature engineering steps. For a new\ninterpretation, Driverless AI uses 3 cross-validation folds by default\nfor the interpretation.", + "prompt_type": "plain" + }, + { + "output": "mli_qbin_count``\n\nNumber of Columns to Bin for Surrogate Models\n\nSpecify the number of columns to bin for surrogate models. This value\ndefaults to 0.", + "prompt_type": "plain" + }, + { + "output": "mli_sample_size``\n\nSample Size for Surrogate Models\n\nWhen the number of rows is above this limit, sample for surrogate\nmodels. The default value is 100000.", + "prompt_type": "plain" + }, + { + "output": "mli_num_quantiles``\n\nNumber of Bins for Quantile Binning\n\nSpecify the number of bins for quantile binning. By default, this value\nis set to -10.", + "prompt_type": "plain" + }, + { + "output": "mli_dia_sample_size``\n\nSample Size for Disparate Impact Analysis\n\nWhen the number of rows is above this limit, sample for Disparate Impact\nAnalysis (DIA). The default value is 100000.", + "prompt_type": "plain" + }, + { + "output": "mli_pd_sample_size``\n\nSample Size for Partial Dependence Plot\n\nWhen number of rows is above this limit, sample for the Driverless AI\npartial dependence plot. The default value is 25000.", + "prompt_type": "plain" + }, + { + "output": "mli_pd_numcat_num_chart``\n\nUnique Feature Values Count Driven Partial Dependence Plot Binning and\nChart Selection\n\nSpecify whether to use dynamic switching between PDP numeric and\ncategorical binning and UI chart selection in cases where features were\nused both as numeric and categorical by the experiment. This is enabled\nby default.", + "prompt_type": "plain" + }, + { + "output": "mli_pd_numcat_threshold~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. container:: dropdown **Threshold for PD/ICE Binning and Chart Selection** Ifmli_pd_numcat_num_chart`` is enabled, and if the number of\n\n unique feature values is greater than the threshold, then numeric\n binning and chart is used. Otherwise, categorical binning and chart\n is used. The default threshold value is 11.", + "prompt_type": "plain" + }, + { + "output": "mli_sa_sampling_limit``\n\nSample Size for Sensitivity Analysis (SA)\n\nWhen the number of rows is above this limit, sample for Sensitivity\nAnalysis (SA). The default value is 500000.", + "prompt_type": "plain" + }, + { + "output": "klime_cluster_col``\n\nk-LIME Clustering Columns\n\nFor k-LIME interpretations, optionally specify which columns to have\nk-LIME clustering applied to.\n\nNote: This setting is not found in the config.toml file.", + "prompt_type": "plain" + }, + { + "output": "qbin_cols``\n\nQuantile Binning Columns\n\nFor k-LIME interpretations, specify one or more columns to generate\ndecile bins (uniform distribution) to help with MLI accuracy. Columns\nselected are added to top n columns for quantile binning selection. If a\ncolumn is not numeric or not in the dataset (transformed features), then\nthe column will be skipped.\n\nNote: This setting is not found in the config.toml file.", + "prompt_type": "plain" + }, + { + "output": "Mac OS X\nThis section describes how to install, start, stop, and upgrade the\nDriverless AI Docker image on Mac OS X. Note that this uses regular\nDocker and not NVIDIA Docker. Note: Support for GPUs and MOJOs is not available on Mac OS X. The installation steps assume that you have a license key for Driverless\nAI. For information on how to obtain a license key for Driverless AI,\nvisit https://h2o.ai/o/try-driverless-ai/. Once obtained, you will be\nprompted to paste the license key into the Driverless AI UI when you\nfirst log in, or you can save it as a .sig file and place it in the\nlicense folder that you will create during the installation process. Caution:\n- This is an extremely memory-constrained environment for experimental\n purposes only. Stick to small datasets! For serious use, please use\n Linux. - Be aware that there are known performance issues with Docker for\n Mac. More information is available here:\n https://docs.docker.com/docker-for-mac/osxfs/#technology.", + "prompt_type": "plain" + }, + { + "output": "Min Mem Suitable for\n ----------------------- --------------- --------- -----------------\n Mac OS X No 16 GB Experimentation\n -------------------------------------------------------------------\nInstalling Driverless AI\n1. Retrieve the Driverless AI Docker image from\n https://www.h2o.ai/download/. 2. Download and run Docker for Mac from\n https://docs.docker.com/docker-for-mac/install. 3. Adjust the amount of memory given to Docker to be at least 10 GB. Driverless AI won't run at all with less than 10 GB of memory. You\n can optionally adjust the number of CPUs given to Docker. You will\n find the controls by clicking on (Docker\n Whale)->Preferences->Advanced as shown in the following screenshots. (Don't forget to Apply the changes after setting the desired memory\n value.) [image]\n[image]\n4. On the File Sharing tab, verify that your macOS directories (and\n their subdirectories) can be bind mounted into Docker containers.", + "prompt_type": "plain" + }, + { + "output": "[image]\n5. Set up a directory for the version of Driverless AI within the\n Terminal:\n6. With Docker running, open a Terminal and move the downloaded\n Driverless AI image to your new directory. 7. Change directories to the new directory, then load the image using\n the following command:\n8. Set up the data, log, license, and tmp directories (within the new\n Driverless AI directory):\n9. Optionally copy data into the data directory on the host. The data\n will be visible inside the Docker container at /data. You can also\n upload data after starting Driverless AI. 10. Run docker images to find the image tag. 11. Start the Driverless AI Docker image (still within the new\n Driverless AI directory). Replace TAG below with the image tag. Note\n that GPU support will not be available. Note that from version 1.10\n DAI docker image runs with internal tini that is equivalent to using\n --init from docker, if both are enabled in the launch command, tini\n prints a (harmless) warning message.", + "prompt_type": "plain" + }, + { + "output": "Connect to Driverless AI with your browser at\n http://localhost:12345. Stopping the Docker Image\nTo stop the Driverless AI Docker image, type Ctrl + C in the Terminal\n(Mac OS X) or PowerShell (Windows 10) window that is running the\nDriverless AI Docker image. Upgrading the Docker Image\nThis section provides instructions for upgrading Driverless AI versions\nthat were installed in a Docker container. These steps ensure that\nexisting experiments are saved. WARNING: Experiments, MLIs, and MOJOs reside in the Driverless AI tmp\ndirectory and are not automatically upgraded when Driverless AI is\nupgraded. - Build MLI models before upgrading. - Build MOJO pipelines before upgrading. - Stop Driverless AI and make a backup of your Driverless AI tmp\n directory before upgrading. If you did not build MLI on a model before upgrading Driverless AI,\n then you will not be able to view MLI on that model after upgrading. Before upgrading, be sure to run MLI jobs on models that you want to\n continue to interpret in future releases.", + "prompt_type": "plain" + }, + { + "output": "Creating Custom Plots\nTo create a custom plot, click the Add Graph button in the upper-right\ncorner and select one of the available plot types. After selecting a\nplot, configure the available settings for that plot type and click\nSave. The custom plot appears on the Visualization page once it has been\ncreated. The following example creates a custom histogram plot for the\nCreditCard-Train dataset:\nThe following is a complete list of available graph types. Bar chart\nThis plot presents categorical data with rectangular bars that are\nproportional to the values they represent. The type of marker used to\nrepresent bars determines the bar chart type. The most common marker is\nthe bar marker, which ranges from a lower value (usually zero) to an\nupper value. Also available are the Cleveland dot plot (replaces the bar\nwith a dot located at the upper value) and the area chart (covers the\nbars with a solid area marker). Bars are always plotted against the\ncategories of a categorical variable.", + "prompt_type": "plain" + }, + { + "output": "When creating a bar chart, specify the following options:\n - x variable name: Specify the name of the x variable\n - y variable name: Specify the name of the y variable\n - Transpose: Specify whether to switch the X-axis and Y-axis\n - Sort: Specify whether to sort bars alphabetically by x values\n - Mark: Specify a marker type. Select point to create a Cleveland\n dot plot\nBoxplot\nThis plot presents the fractiles of a distribution. The center of the\nbox represents the median, the edges of a box represent the lower and\nupper quartiles, and the ends of the \"whiskers\" represent that range of\nvalues. When outliers occur, the adjacent whisker is shortened to the\nnext lower or upper value. For variables having only a few values, the\nboxes can be compressed. When creating a boxplot, specify the following options:\n - Variable name: Specify the variable that you want the box to\n represent\n - Transpose: Specify whether to switch the X-axis and Y-axis\nDotplot\nThis plot represents individual data values with dots.", + "prompt_type": "plain" + }, + { + "output": "When creating a dotplot, specify the following options:\n - Variable name: Specify the name of the variable on which dots\n are calculated\n - Mark: Specify a marker type\nGrouped Boxplot\nThis plot is a boxplot where categories are organized into groups and\nsubgroups. When creating a grouped boxplot, specify the following options:\n - Variable name: Specify the variable that you want the box to\n represent\n - Group variable name: Specify the name of the grouping variable\n - Transpose: Specify whether to switch the X-axis and Y-axis\nHeatmap\nSee data heatmap. When creating a heatmap, specify the following\noptions:\n - Variable names: Specify one or more variables to use. If none are\n specified, all the variables in the dataset are used\n - Permute: Specify whether to reorder variables using singular value\n decomposition (SVD)\n - Transpose: Specify whether to switch the X-axis and Y-axis\n - Matrix type: Specify a matrix type.", + "prompt_type": "plain" + }, + { + "output": "Each bar groups numbers into ranges by its width, and taller\nbars show that more data falls within a specific range. This plot is\noften used to display the shape and spread of a continuous variable. When creating a histogram, specify the following options:\n - Variable name: Specify the variable name\n - Transformation: Specify whether to use a transformation. Choose\n from log and square root\n - Number of bars: Specify the number of bars to use\n - Mark: Specify a marker type. Use area to create a density\n polygon\nLinear Regression\nThis plot predicts a set of values on a variable y from values on a\nvariable x by fitting a linear function (ax\u2005+\u2005b) so that for any value\non the x variable, this function yields the most probable value on the y\nvariable. The effectiveness of this prediction in a sample of values is\nrepresented by the discrepancies between the y values and their\ncorresponding predicted values. When creating a linear regression plot, specify the following options:\n - x variable name: Specify the name of the x variable\n - y variable name: Specify the name of the y variable\n - Mark: Specify a marker type.", + "prompt_type": "plain" + }, + { + "output": "The effectiveness of this prediction in a sample of values is\nrepresented by the discrepancies between the y values and their\ncorresponding predicted values. When creating a LOESS regression plot, specify the following options:\n - x variable name: Specify the name of the x variable\n - y variable name: Specify the name of the y variable\n - Mark: Specify a marker type. Choose from point and square\n - Bandwidth: Specify the interval that represents the proportion\n of cases during the smoothing window. This is set to 0.5 by\n default\nParallel Coordinates Plot\nThis plot is used for comparing multiple variables. Each variable has\nits own vertical axis in the plot, and each profile connects the values\non the axes for a single observation. If the data contains clusters,\nthese profiles are color-coded by their cluster number. When creating a parallel coordinates plot, specify the following\n options:\n - Variable names: Specify one or more variables to use.", + "prompt_type": "plain" + }, + { + "output": "Unique colors are assigned for each cluster ID\nProbability Plot\nThis plot evaluates the skewness of a distribution by plotting two\ncumulative distribution functions against each other. When creating a probability plot, specify the following options:\n - x variable name: Specify the name of the x variable\n - Distribution: Specify a distribution type. Choose from normal\n and uniform\n - Mark: Specify a marker type. Choose from point and square\n - Transpose: Specify whether to switch the X-axis and Y-axis\nQuantile Plot\nThis plot compares two probability distributions by plotting their\nquantiles against each other. When creating a quantile plot, specify the following options:\n - x variable name: Specify the name of the x variable\n - y variable name: Specify the name of the y variable\n - Distribution: Specify a distribution type. Choose from normal\n and uniform\n - Mark: Specify a marker type. Choose from point and square\n - Transpose: Specify whether to switch the X-axis and Y-axis\nScatterplot\nThis plot represents the values of two variables (y and x) in a frame\nthat contains one point for each row of the input sample data.", + "prompt_type": "plain" + }, + { + "output": "About Version Support\n\nEach X.Y.Z long-term support (LTS) release of Driverless AI is supported\nfor 18 months. For example, the end of support date for 1.10.4 is April\n13, 2024, which is 18 months after the release date of October 13, 2022.\nNote that the end of support date for each base version is also applied\nto each X.Y.Z.{1,2,3...} release.\n\nTo view end of support dates for recent DAI LTS releases, see the\nDriverless AI prior releases page.", + "prompt_type": "plain" + }, + { + "output": "Explainer (Recipes) Expert Settings\n\nThe following is a list of the explainer-specific expert settings that\nare available when setting up a new interpretation. These settings can\nbe accessed when running interpretation from the\nMLI page under recipes tab.\nFor info on general MLI expert settings, see\ninterpretation-expert-settings.\n\n- interpretation-expert-settings-absolute-permutation\n- interpretation-expert-settings-autodoc\n- interpretation-expert-settings-dia\n- interpretation-expert-settings-nlp-pdp\n- interpretation-expert-settings-nlp-vectorizer\n- interpretation-expert-settings-pdp\n- interpretation-expert-settings-sa\n- interpretation-expert-settings-shapley\n- interpretation-expert-settings-shapley-values\n- interpretation-expert-settings-surrogate-dt\n\nAbsolute Permutation Feature Importance Explainer Settings", + "prompt_type": "plain" + }, + { + "output": "mli_sample_size``\n\nSample size\n\nSpecify the sample size for the absolute permutation feature importance\nexplainer. This value defaults to 100000.", + "prompt_type": "plain" + }, + { + "output": "missing_values~~~~~~~~~~~~~~~~~~ .. container:: dropdown **List of values that should be interpreted as missing values** Specify the list of values that should be interpreted as missing values during data import. This applies to both numeric and string columns. Note that 'nan' is always interpreted as a missing value for numeric columns. Example:\"\"\"['',\n'?', 'None', 'nan', 'N/A', 'unknown', 'inf']\"\"``", + "prompt_type": "plain" + }, + { + "output": "autodoc_feature_importance_num_perm``\n\nNumber of Permutations for Feature Importance\n\nSpecify the number of permutations to make per feature when computing\nfeature importance. This is set to 1 by default.", + "prompt_type": "plain" + }, + { + "output": "autodoc_feature_importance_scorer``\n\nFeature Importance Scorer\n\nSpecify the name of the scorer to be used when calculating feature\nimportance. Leave this setting unspecified to use the default scorer for\nthe experiment.\n\nMLI AutoDoc Explainer Settings", + "prompt_type": "plain" + }, + { + "output": "autodoc_report_name``\n\nAutoDoc Name\n\nSpecify the name of the AutoDoc.", + "prompt_type": "plain" + }, + { + "output": "autodoc_template``\n\nAutoDoc Template Location\n\nSpecify the AutoDoc template path. Provide the full path to your custom\nAutoDoc template. To generate the standard AutoDoc, leave this field\nempty.", + "prompt_type": "plain" + }, + { + "output": "autodoc_output_type~~~~~~~~~~~~~~~~~~~~~~~ .. container:: dropdown **AutoDoc File Output Type** Specify the AutoDoc file output type. Choose fromdocx(the default value) andmd``.", + "prompt_type": "plain" + }, + { + "output": "autodoc_subtemplate_type``\n\nAutoDoc Sub-Template Type\n\nSpecify the type of sub-templates to use. Choose from the following:\n\n- auto (Default)\n- md\n- docx", + "prompt_type": "plain" + }, + { + "output": "autodoc_max_cm_size``\n\nConfusion Matrix Max Number of Classes\n\nSpecify the maximum number of classes in the confusion matrix. This\nvalue defaults to 10.", + "prompt_type": "plain" + }, + { + "output": "autodoc_num_features``\n\nNumber of Top Features to Document\n\nSpecify the number of top features to display in the document. To\ndisable this setting, specify -1. This is set to 50 by default.", + "prompt_type": "plain" + }, + { + "output": "autodoc_min_relative_importance``\n\nMinimum Relative Feature Importance Threshold\n\nSpecify the minimum relative feature importance in order for a feature\nto be displayed. This value must be a float >= 0 and <= 1. This is set\nto 0.003 by default.", + "prompt_type": "plain" + }, + { + "output": "autodoc_include_permutation_feature_importance``\n\nPermutation Feature Importance\n\nSpecify whether to compute permutation-based feature importance. This is\ndisabled by default.", + "prompt_type": "plain" + }, + { + "output": "autodoc_feature_importance_num_perm``\n\nNumber of Permutations for Feature Importance\n\nSpecify the number of permutations to make per feature when computing\nfeature importance. This is set to 1 by default.", + "prompt_type": "plain" + }, + { + "output": "autodoc_feature_importance_scorer``\n\nFeature Importance Scorer\n\nSpecify the name of the scorer to be used when calculating feature\nimportance. Leave this setting unspecified to use the default scorer for\nthe experiment.", + "prompt_type": "plain" + }, + { + "output": "autodoc_pd_max_rows~~~~~~~~~~~~~~~~~~~~~~~ .. container:: dropdown **PDP and Shapley Summary Plot Max Rows** Specify the number of rows shown for the partial dependence plots (PDP) and Shapley values summary plot in the AutoDoc. Random sampling is used for datasets with more than theautodoc_pd_max_rows``\n\n limit. This value defaults to 10000.", + "prompt_type": "plain" + }, + { + "output": "autodoc_pd_max_runtime``\n\nPDP Max Runtime in Seconds\n\nSpecify the maximum number of seconds Partial Dependency computation can\ntake when generating a report. Set to -1 for no time limit.", + "prompt_type": "plain" + }, + { + "output": "autodoc_out_of_range``\n\nPDP Out of Range\n\nSpecify the number of standard deviations outside of the range of a\ncolumn to include in partial dependence plots. This shows how the model\nreacts to data it has not seen before. This is set to 3 by default.", + "prompt_type": "plain" + }, + { + "output": "autodoc_num_rows``\n\nICE Number of Rows\n\nSpecify the number of rows to include in PDP and ICE plots if individual\nrows are not specified. This is set to 0 by default.", + "prompt_type": "plain" + }, + { + "output": "autodoc_population_stability_index``\n\nPopulation Stability Index\n\nSpecify whether to include a population stability index if the\nexperiment is a binary classification or regression problem. This is\ndisabled by default.", + "prompt_type": "plain" + }, + { + "output": "autodoc_population_stability_index_n_quantiles``\n\nPopulation Stability Index Number of Quantiles\n\nSpecify the number of quantiles to use for the population stability\nindex. This is set to 10 by default.", + "prompt_type": "plain" + }, + { + "output": "autodoc_prediction_stats``\n\nPrediction Statistics\n\nSpecify whether to include prediction statistics information if the\nexperiment is a binary classification or regression problem. This value\nis disabled by default.", + "prompt_type": "plain" + }, + { + "output": "autodoc_prediction_stats_n_quantiles``\n\nPrediction Statistics Number of Quantiles\n\nSpecify the number of quantiles to use for prediction statistics. This\nis set to 20 by default.", + "prompt_type": "plain" + }, + { + "output": "autodoc_response_rate``\n\nResponse Rates Plot\n\nSpecify whether to include response rates information if the experiment\nis a binary classification problem. This is disabled by default.", + "prompt_type": "plain" + }, + { + "output": "autodoc_response_rate_n_quantiles``\n\nResponse Rates Plot Number of Quantiles\n\nSpecify the number of quantiles to use for response rates information.\nThis is set to 10 by default.", + "prompt_type": "plain" + }, + { + "output": "autodoc_gini_plot``\n\nShow GINI Plot\n\nSpecify whether to show the GINI plot. This is disabled by default.", + "prompt_type": "plain" + }, + { + "output": "autodoc_enable_shapley_values``\n\nEnable Shapley Values\n\nSpecify whether to show Shapley values results in the AutoDoc. This is\nenabled by default.", + "prompt_type": "plain" + }, + { + "output": "autodoc_global_klime_num_features``\n\nGlobal k-LIME Number of Features\n\nSpecify the number of features to show in a k-LIME global GLM\ncoefficients table. This value must be an integer greater than 0 or -1.\nTo show all features, set this value to -1.", + "prompt_type": "plain" + }, + { + "output": "autodoc_global_klime_num_tables``\n\nGlobal k-LIME Number of Tables\n\nSpecify the number of k-LIME global GLM coefficients tables to show in\nthe AutoDoc. Set this value to 1 to show one table with coefficients\nsorted by absolute value. Set this value to 2 to show two tables - one\nwith the top positive coefficients and another with the top negative\ncoefficients. This value is set to 1 by default.", + "prompt_type": "plain" + }, + { + "output": "autodoc_data_summary_col_num``\n\nNumber of Features in Data Summary Table\n\nSpecify the number of features to be shown in the data summary table.\nThis value must be an integer. To show all columns, specify any value\nlower than 1. This is set to -1 by default.", + "prompt_type": "plain" + }, + { + "output": "autodoc_list_all_config_settings``\n\nList All Config Settings\n\nSpecify whether to show all config settings. If this is disabled, only\nsettings that have been changed are listed. All settings are listed when\nenabled. This is disabled by default.", + "prompt_type": "plain" + }, + { + "output": "autodoc_keras_summary_line_length``\n\nKeras Model Architecture Summary Line Length\n\nSpecify the line length of the Keras model architecture summary. This\nvalue must be either an integer greater than 0 or -1. To use the default\nline length, set this value to -1 (default).", + "prompt_type": "plain" + }, + { + "output": "autodoc_transformer_architecture_max_lines``\n\nNLP/Image Transformer Architecture Max Lines\n\nSpecify the maximum number of lines shown for advanced transformer\narchitecture in the Feature section. Note that the full architecture can\nbe found in the appendix.", + "prompt_type": "plain" + }, + { + "output": "autodoc_full_architecture_in_appendix``\n\nAppendix NLP/Image Transformer Architecture\n\nSpecify whether to show the full NLP/Image transformer architecture in\nthe appendix. This is disabled by default.", + "prompt_type": "plain" + }, + { + "output": "autodoc_coef_table_appendix_results_table``\n\nFull GLM Coefficients Table in the Appendix\n\nSpecify whether to show the full GLM coefficient table(s) in the\nappendix. This is disabled by default.", + "prompt_type": "plain" + }, + { + "output": "autodoc_coef_table_num_models``\n\nGLM Coefficient Tables Number of Models\n\nSpecify the number of models for which a GLM coefficients table is shown\nin the AutoDoc. This value must be -1 or an integer >= 1. Set this value\nto -1 to show tables for all models. This is set to 1 by default.", + "prompt_type": "plain" + }, + { + "output": "autodoc_coef_table_num_folds``\n\nGLM Coefficient Tables Number of Folds Per Model\n\nSpecify the number of folds per model for which a GLM coefficients table\nis shown in the AutoDoc. This value must be be -1 (default) or an\ninteger >= 1 (-1 shows all folds per model).", + "prompt_type": "plain" + }, + { + "output": "autodoc_coef_table_num_coef``\n\nGLM Coefficient Tables Number of Coefficients\n\nSpecify the number of coefficients to show within a GLM coefficients\ntable in the AutoDoc. This is set to 50 by default. Set this value to -1\nto show all coefficients.", + "prompt_type": "plain" + }, + { + "output": "autodoc_coef_table_num_classes``\n\nGLM Coefficient Tables Number of Classes\n\nSpecify the number of classes to show within a GLM coefficients table in\nthe AutoDoc. Set this value to -1 to show all classes. This is set to 9\nby default.", + "prompt_type": "plain" + }, + { + "output": "autodoc_num_histogram_plots``\n\nNumber of Histograms to Show\n\nSpecify the number of top features for which to show histograms. This is\nset to 10 by default.\n\nDisparate Impact Analysis Explainer Settings\n\nFor information on Disparate Impact Analysis in Driverless AI, see\ndai-dia. The following is a list of parameters that can be toggled from\nthe recipes tab of the MLI page when running a new interpretation.", + "prompt_type": "plain" + }, + { + "output": "dia_cols``\n\nList of Features for Which to Compute DIA\n\nSpecify a list of specific features for which to compute DIA.", + "prompt_type": "plain" + }, + { + "output": "cut_off``\n\nCut Off\n\nSpecify a cut off when performing DIA.", + "prompt_type": "plain" + }, + { + "output": "maximize_metric``\n\nMaximize Metric\n\nSpecify a metric to use when computing DIA. Choose from the following:\n\n- F1\n- F05\n- F2\n- MCC", + "prompt_type": "plain" + }, + { + "output": "use_holdout_preds``\n\nUse Internal Holdout Predictions\n\nSpecify whether to use internal holdout predictions when computing DIA.\nThis is enabled by default.", + "prompt_type": "plain" + }, + { + "output": "sample_size``\n\nSample Size for Disparate Impact Analysis\n\nSpecify the sample size for Disparate Impact Analysis. By default, this\nvalue is set to 100000.", + "prompt_type": "plain" + }, + { + "output": "max_card``\n\nMax Cardinality for Categorical Variables\n\nSpecify the max cardinality for categorical variables. By default, this\nvalue is set to 10.", + "prompt_type": "plain" + }, + { + "output": "min_card``\n\nMinimum Cardinality for Categorical Variables\n\nSpecify the minimum cardinality for categorical variables. By default,\nthis value is set to 2.", + "prompt_type": "plain" + }, + { + "output": "num_card``\n\nMax Cardinality for Numeric Variables to be Considered Categorical\n\nSpecify the max cardinality for numeric variables to be considered\ncategorical. By default, this value is set to 25.", + "prompt_type": "plain" + }, + { + "output": "fast_approx``\n\nSpeed Up Predictions With a Fast Approximation\n\nSpecify whether to increase the speed of predictions with a fast\napproximation. This is enabled by default.\n\nNLP Partial Dependence Plot Explainer Settings", + "prompt_type": "plain" + }, + { + "output": "max_tokens``\n\nNumber of text tokens\n\nSpecify the number of text tokens for the NLP Partial Dependence plot.\nThis value defaults to 20.", + "prompt_type": "plain" + }, + { + "output": "custom_tokens~~~~~~~~~~~~~~~~~ .. container:: dropdown **List of custom text tokens** Specify a list of custom text tokens for which to compute NLP partial dependence. For example,[\"text_feature('word_1')\"], wheretext_feature``\nis the name of the model text feature.\n\nNLP Vectorizer + Linear Model Text Feature Importance Explainer Settings", + "prompt_type": "plain" + }, + { + "output": "txt_cols``\n\nText feature for which to compute explanation\n\nSpecify the text feature for which to compute explanation.", + "prompt_type": "plain" + }, + { + "output": "cut_off``\n\nCut off for deciphering binary class outcome\n\nSpecify the cut off for deciphering binary class outcome based on DAI\nmodel predictions. Any DAI prediction greater than the cut off is the\ntarget label and any DAI prediction less than the cut off is the\nnon-target label.", + "prompt_type": "plain" + }, + { + "output": "maximize_metric``\n\nCut off based on a metric to maximize\n\nCalculate cut off based on a metric to maximize, which will decipher\nbinary class outcome based on DAI model predictions. Any DAI prediction\ngreater than the cut off is the target label and any DAI prediction less\nthan the cut off is the non-target label. It should be noted that\nspecifying a cut off AND a max metric will give precedence to the cut\noff.\n\nPartial Dependence Plot Explainer Settings\n\nFor information on Partial Dependence Plots in Driverless AI, see\npartial-dependence-plot. The following is a list of parameters that can\nbe toggled from the recipes tab of the MLI page when running a new\ninterpretation.", + "prompt_type": "plain" + }, + { + "output": "sample_size``\n\nSample Size for Partial Dependence Plot\n\nWhen number of rows is above this limit, sample for the Driverless AI\npartial dependence plot.", + "prompt_type": "plain" + }, + { + "output": "max_features``\n\nPartial Dependence Plot Number of Features\n\nSpecify the number of features that can be viewed on the partial\ndependence plot. By default, this is set to 10. To view all features,\nset this value to -1.", + "prompt_type": "plain" + }, + { + "output": "features``\n\nPartial Dependence Plot Feature List\n\nSpecify a list of features for the partial dependence plot.", + "prompt_type": "plain" + }, + { + "output": "oor_grid_resolution``\n\nPDP Number of Out of Range Bins\n\nSpecify the number of out of range bins for the partial dependence plot.\nBy default, this is set to 0.", + "prompt_type": "plain" + }, + { + "output": "qtile_grid_resolution``\n\nPDP Quantile Binning\n\nSpecify the total quantile points used to create bins. By default, this\nis set to 0.", + "prompt_type": "plain" + }, + { + "output": "grid_resolution``\n\nPDP Observations Per Bin\n\nSpecify the number of equally spaced points used to create bins. By\ndefault, this is set to 20.", + "prompt_type": "plain" + }, + { + "output": "center``\n\nCenter PDP Using ICE Centered at 0\n\nSpecify whether center the partial dependence plot using ICE centered at\n0. This is disabled by default.", + "prompt_type": "plain" + }, + { + "output": "sort_bins``\n\nEnsure Bin Values Sorting\n\nSpecify whether to ensure bin values sorting. This is enabled by\ndefault.", + "prompt_type": "plain" + }, + { + "output": "histograms``\n\nEnable Histograms\n\nSpecify whether to enable histograms for the partial dependence plot.\nThis is disabled by default.", + "prompt_type": "plain" + }, + { + "output": "qtile-bins~~~~~~~~~~~~~~ .. container:: dropdown **Per-Feature Quantile Binning** Specify per-feature quantile binning. For example, if you select features F1 and F2, this parameter can be specified as'{\"F1\":\n2,\"F2\": 5}'``.\n\n Note: You can set all features to use the same quantile binning with\n the quantile-bins parameter and then adjust the quantile binning for a\n subset of PDP features with this parameter.", + "prompt_type": "plain" + }, + { + "output": "1_frame``\n\nEnable PDP Calculation Optimization\n\nSpecify whether to enable PDP calculation optimization, which minimizes\nthe number of predictions by combining per-bin frames together. By\ndefault, this is set to 'Auto'.", + "prompt_type": "plain" + }, + { + "output": "numcat_num_chart``\n\nUnique Feature Values Count-Driven PDP Binning and Chart Selection\n\nSpecify whether to use dynamic switching between PDP numeric and\ncategorical binning and UI chart selection in cases where features were\nused both as numeric and categorical by the experiment. This is enabled\nby default.", + "prompt_type": "plain" + }, + { + "output": "numcat_threshold~~~~~~~~~~~~~~~~~~~~ .. container:: dropdown **Threshold for PD/ICE Binning and Chart Selection** Ifmli_pd_numcat_num_chart`` is enabled, and if the number of\n\n unique feature values is greater than the threshold, then numeric\n binning and chart is used. Otherwise, categorical binning and chart\n is used. The default threshold value is 11.\n\nSensitivity Analysis Explainer Settings", + "prompt_type": "plain" + }, + { + "output": "sample_size``\n\nSample Size for Sensitivity Analysis (SA)\n\nWhen the number of rows is above this limit, sample for Sensitivity\nAnalysis (SA). The default value is 500000.\n\nShapley Summary Plot Explainer Settings\n\nFor information on Shapley Summary Plots in Driverless AI, see\ndai-shapley-summary. The following is a list of parameters that can be\ntoggled from the recipes tab of the MLI page when running a new\ninterpretation.", + "prompt_type": "plain" + }, + { + "output": "max_features``\n\nMaximum Number of Features to be Shown\n\nSpecify the maximum number of features that are shown in the plot. By\ndefault, this value is set to 50.", + "prompt_type": "plain" + }, + { + "output": "sample_size``\n\nSample Size\n\nSpecify the sample size for the plot. By default, this value is set to\n20000.", + "prompt_type": "plain" + }, + { + "output": "x_resolution``\n\nX-Axis Resolution\n\nSpecify the number of Shapley value bins. By default, this value is set\nto 500.", + "prompt_type": "plain" + }, + { + "output": "drilldown_charts``\n\nEnable Creation of Per-Feature Shapley / Feature Value Scatter Plots\n\nSpecify whether to enable the creation of per-feature Shapley or feature\nvalue scatter plots. This is enabled by default.", + "prompt_type": "plain" + }, + { + "output": "fast_approx``\n\nSpeed Up Predictions With a Fast Approximation\n\nSpecify whether to increase the speed of predictions with a fast\napproximation. This is enabled by default.\n\nShapley Values for Original Features Settings", + "prompt_type": "plain" + }, + { + "output": "sample_size``\n\nSample Size for Naive Shapley\n\nWhen the number of rows is above this limit, sample for Naive Shapley.\nBy default, this value is set to 100000.", + "prompt_type": "plain" + }, + { + "output": "fast_approx``\n\nSpeed Up Predictions With a Fast Approximation\n\nSpecify whether to increase the speed of predictions with a fast\napproximation. This is enabled by default.\n\nSurrogate Decision Tree Explainer Settings\n\nFor information on Surrogate Decision Tree Plots in Driverless AI, see\ndecision-tree. The following is a list of parameters that can be toggled\nfrom the recipes tab of the MLI page when running a new interpretation.", + "prompt_type": "plain" + }, + { + "output": "dt_tree_depth``\n\nDecision Tree Depth\n\nSpecify the depth of the decision tree. By default, this value is set to\n3.", + "prompt_type": "plain" + }, + { + "output": "nfolds``\n\nNumber of CV Folds\n\nSpecify the number of CV folds to use. By default, this value is set to\n0.", + "prompt_type": "plain" + }, + { + "output": "qbin_cols``\n\nQuantile Binning Columns\n\nSpecify quantile binning columns.", + "prompt_type": "plain" + }, + { + "output": "qbin_count``\n\nQuantile Bins Count\n\nSpecify the number of quantile bins. By default, this value is set to 0.", + "prompt_type": "plain" + }, + { + "output": "Building Models in Driverless AI\n\nlaunching ga modeling_before_you_begin running-experiment time-series\nnlp image-processing unsupervised", + "prompt_type": "plain" + }, + { + "output": "References\nAdebayo, Julius A. \"Fairml: Toolbox for diagnosing bias in predictive\nmodeling.\" Master\u2019s Thesis, MIT, 2016. Breiman, Leo. \"Statistical Modeling: The Two Cultures (with comments and\na rejoinder by the author).\" Statistical Science 16, no. 3, 2001. Craven, Mark W. and Shavlik, Jude W. \"Extracting tree structured\nrepresentations of trained networks.\" Advances in Neural Information\nProcessing Systems, 1996. Goldstein, Alex, Kapelner, Adam, Bleich, Justin, and Pitkin, Emil. \"Peeking inside the black box: Visualizing statistical learning with\nplots of individual conditional expectation.\" Journal of Computational\nand Graphical Statistics, no. 24, 2015. Groeneveld, R.A. and Meeden, G. (1984), \u201cMeasuring Skewness and\nKurtosis.\u201d The Statistician, 33, 391-399. Hall, Patrick, Wen Phan, and SriSatish Ambati. \u201cIdeas for Interpreting\nMachine Learning.\u201d O\u2019Reilly Ideas. O\u2019Reilly Media, 2017. Hartigan, J. A. and Mohanty, S. (1992), \u201cThe RUNT test for\nmultimodality,\u201d Journal of Classification, 9, 63\u201370.", + "prompt_type": "plain" + }, + { + "output": "Local Authentication Example\nThis section describes how to enable local authentication in Driverless\nAI. Docker Image Installs\nTo enable authentication in Docker images, specify the authentication\nenvironment variable that you want to use. Each variable must be\nprepended with DRIVERLESS_AI. The example below starts Driverless AI\nwith environment variables the enable the following:\n- Local authentication when starting Driverless AI\n- S3 and HDFS access (without authentication)\n nvidia-docker run \\\n --pid=host \\\n --init \\\n --rm \\\n --shm-size=256m \\\n -p 12345:12345 \\\n -u `id -u`:`id -g` \\\n -e DRIVERLESS_AI_ENABLED_FILE_SYSTEMS=\"file,s3,hdfs\" \\\n -e DRIVERLESS_AI_AUTHENTICATION_METHOD=\"local\" \\\n -e DRIVERLESS_AI_LOCAL_HTPASSWD_FILE=\"\" \\\n -v `pwd`/data:/data \\\n -v `pwd`/log:/log \\\n -v `pwd`/license:/license \\\n -v `pwd`/tmp:/tmp \\\n h2oai/dai-ubi8-x86_64:|tag|\nNative Installs\nNative installs include DEBs, RPMs, and TAR SH installs.", + "prompt_type": "plain" + }, + { + "output": "Completed Experiment Page\nThe following sections describe the completed experiment page. - completed-actions\n- completed-insights-scores\nCompleted Experiment Actions\nThe following is a description of the actions that can be performed\nafter the status of an experiment changes from Running to Complete. []\n- Interpret This Model: Create an interpretation for the model. For\n more information, see interpreting_a_model. - Diagnose Model on New Dataset: For more information, see\n diagnosing_a_model. - Model Actions drop-down:\n - Predict: See Score_On_Another_Dataset. - Transform Dataset: See transform_dataset. (Not available for\n Time Series experiments.) - Fit & Transform Dataset: See fit_and_transform_dataset. (Not\n available for Time Series experiments.) - Shapley Values drop-down: Download\n Shapley values for original or transformed\n features. Driverless AI calls XGBoost and LightGBM SHAP\n functions to get contributions for transformed features.", + "prompt_type": "plain" + }, + { + "output": "For more information, see\n Shapley values in DAI . Select Fast Approximation\n to make Shapley predictions using only a single fold and model\n from all of the available folds and models in the ensemble. For more information on the fast approximation options, refer\n to the fast_approx_num_trees and\n fast_approx_do_one_fold_one_model\n config.toml settings . - Original Features (Fast Approximation)\n - Original Features\n - Transformed Features (Fast Approximation)\n - Transformed Features\n - Export: Export the experiment. For more information, see\n export_import. - Visualize Scoring Pipeline (Experimental): View a visualization of\n the experiment scoring pipeline. For more information, refer to\n visualize_scoring_pipeline. - Download Scoring Pipeline drop-down:\n - Download Python Scoring Pipeline: Download a standalone Python\n scoring pipeline for H2O Driverless AI.", + "prompt_type": "plain" + }, + { + "output": "- Download MOJO Scoring Pipeline: A standalone Model Object,\n Optimized scoring pipeline. For more information, refer to\n mojo_scoring_pipelines. (Note that this option is not\n available for TensorFlow or RuleFit models.) - (If h2o_mlops_ui_url is specified) Go to MLOps: When this button is\n clicked, a prompt is displayed on the screen. To open H2O MLOps in a\n new tab, click OK.\n- (If gui_enable_deploy_button=true) Deploy: Deploy the model. Note\n that by default, this button is disabled, and that the Completed\n Experiment -> Deploy functionality will be deprecated in version\n 1.10.5. For more information, refer to deployment. - Download Predictions: For regression experiments, output includes\n predictions with lower and upper bounds. For classification\n experiments, output includes probability for each class and labels\n created by using the threshold_scorer. For binary problems, F1 is\n the default threshold_scorer, so if a validation set is provided,\n then the threshold for max F1 on the validation set is used to\n create the labels.", + "prompt_type": "plain" + }, + { + "output": "For multiclass problems, argmax is used to create the\n labels. - Training (Holdout) Predictions: In CSV format, available if a\n validation set was not provided. - Validation Set Predictions: In CSV format, available if a\n validation set was provided. - Test Set Predictions: In CSV format, available if a test\n dataset is used. - Download Summary & Logs: Download a zip file containing the\n following files. For more information, refer to the\n experiment_summary section. - Experiment logs (regular and anonymized)\n - A summary of the experiment\n - The experiment features along with their relative importance\n - The individual_recipe for the experiment\n - Ensemble information\n - An experiment preview\n - Word version of an auto-generated report for the experiment\n - A target transformations tuning leaderboard\n - A tuning leaderboard\n- Download AutoDoc: Download an auto-generated report for the\n experiment as a Word (DOCX) document.", + "prompt_type": "plain" + }, + { + "output": "Note that this option is not\n available for deprecated models. For more information, see autodoc. - Tune Experiment drop-down: Tune the completed experiment by using\n the following options:\n - New / Continue: Select one of the following options:\n - With same settings: Create a new experiment that copies\n the setup of the original experiment. Selecting this\n option takes you to the Experiment Setup page, where you\n can change any parameter of the original experiment. - From last checkpoint: Create a new experiment that\n copies the setup of the original experiment and\n continues from the last iteration's checkpoint of models\n and features. Selecting this option takes you to the\n Experiment Setup page, where you can change any\n parameter of the original experiment. - Retrain / Refit: Retrain the experiment\u2019s final pipeline. For\n more information, see retrain.", + "prompt_type": "plain" + }, + { + "output": "Experiment performance\nThis page describes the factors that contribute to the performance of\nDriverless AI experiments. Each completed experiment iteration in Driverless AI experiments is a\nfitted model, but you can control the number of iterations with the time\ndial and the parameter_tuning_num_models TOML config mentioned in the\nfollowing section. Additionally, each model takes some number of model\niterations. XGBoost builds trees with a default up to about 3000 trees,\nbut this can be modified with the max_nestimators TOML config mentioned\nin the following section. List of TOML configs that can affect performance\nThe following list describes a variety of controls over the experiment\nand model runtimes:\n- Set max_runtime_minutes to a smaller number of minutes, e.g. 60 for\n 1 hour allowed. By default, DAI uses minimum of its estimate of an\n experiment runtime and max_runtime_minutes, or greater than 1 hour\n as chosen by min_auto_runtime_minutes. - Some algorithms perform much better on GPUs, like XGBoost, Bert, and\n Image models.", + "prompt_type": "plain" + }, + { + "output": "- Set the time dial to a lower value, which will do fewer models in\n tuning and evolution phases. - Set the interpretability dial to a larger value, which will more\n aggressively prune weak features, prune weak base models in\n ensemble, and avoid high-order feature interactions (interaction\n depth). You can also set fixed_feature_interaction_depth to control\n interaction depth directly. - Set parameter_tuning_num_models to a fixed non-zero but small value,\n to directly control number of tuning models instead of set\n automatically by dials. - Set the max_nestimators TOML config to a lower value (for example,\n 500, 1000, 1500, or 2000) instead of the default value of\n 3000. This controls the final model, and via\n max_nestimators_feature_evolution_factor (default 0.2), controls the\n max for tuning and evolution models. Sometimes the data and model\n are such that many trees continue to learn, but the gains are\n minimal for the metric chosen.", + "prompt_type": "plain" + }, + { + "output": "For RF and Dart, change n_estimators_list_no_early_stopping instead. - If the system is used by single user, set exclusive_mode to\n moderate. - Set enable_early_stopping_threshold to 0.01-0.1, which for (only)\n LightGBM will avoid using too many trees when evaluation metric for\n tree building has relative change less than this value. - Set max_abs_score_delta_train_valid and\n max_rel_score_delta_train_valid to a non-zero value to limit the\n number of trees by difference between train and valid scores on\n metric chosen to optimize. - Set reduce_mojo_size=True. In cases where the MOJO is too large or\n slow, you can also set the nfeatures_max TOML config to a value that\n is lower than the number of features you have. This lets you avoid\n too many features. - Set the min_learning_rate_final to a higher value (for example,\n 0.03). You can set max_learning_rate_final equal to\n min_learning_rate_final to force a fixed learning rate in final\n model.", + "prompt_type": "plain" + }, + { + "output": "- Set nfeatures_max to limit the number of features. This is useful in\n conjuction with ngenes_max to control the maximum number of\n transformations (each could make 1 or more features). - Set ensemble_level and fixed_ensemble_level to smaller values, e.g. 0 or 1, to limit the number of base models in final model. - Set fixed_fold_reps to a smaller value, e.g. 1, to limit the number\n of repeats. - Set max_max_depth to a smaller value, e.g. 8, to avoid trying larger\n depths for tree models. - Set max_max_bin to a smaller value, e.g. 128, to avoid larger\n max_bin values for tree models. - If TensorFlow MLP model is used and reproducible is set, only 1 core\n is used, unless you set\n tensorflow_use_all_cores_even_if_reproducible_true to true. This\n loses reproducibility for the TensorFlow model, but the rest of DAI\n will be reproducible. Note that the runtime estimate doesn't take into account the number of\ntrees needed for your data. The more trees needed by your data, the\ngreater the amount of time needed to complete an experiment.", + "prompt_type": "plain" + }, + { + "output": "The F0.5 score is the weighted harmonic mean of the precision and recall\n(given a threshold value). Unlike the F1 score, which gives equal weight\nto precision and recall, the F0.5 score gives more weight to precision\nthan to recall. More weight should be given to precision for cases where\nFalse Positives are considered worse than False Negatives. For example,\nif your use case is to predict which products you will run out of, you\nmay consider False Positives worse than False Negatives. In this case,\nyou want your predictions to be very precise and only capture the\nproducts that will definitely run out. If you predict a product will\nneed to be restocked when it actually doesn't, you incur cost by having\npurchased more inventory than you actually need. F05 equation:\n$$F0.5 = 1.25 \\;\\Big(\\; \\frac{(precision) \\; (recall)}{((0.25) \\; (precision)) + recall}\\; \\Big)$$\nWhere:\n- precision is the positive observations (true positives) the model\n correctly identified from all the observations it labeled as\n positive (the true positives + the false positives).", + "prompt_type": "plain" + }, + { + "output": "Missing and Unseen Levels Handling\nThis section describes how missing and unseen levels are handled by each\nalgorithm during training and scoring. How Does the Algorithm Handle Missing Values During Training? LightGBM, XGBoost, RuleFit\nDriverless AI treats missing values natively. (I.e., a missing value is\ntreated as a special value.) Experiments rarely benefit from imputation\ntechniques, unless the user has a strong understanding of the data. GLM\nDriverless AI automatically performs mean value imputation (equivalent\nto setting the value to zero after standardization). TensorFlow\nDriverless AI provides an imputation setting for TensorFlow in the\nconfig.toml file: tf_nan_impute_value (post-normalization). If you set\nthis option to 0, then missing values will be imputed by the mean. Setting it to (for example) +5 will specify 5 standard deviations above\nthe mean of the distribution. The default value in Driverless AI is -5,\nwhich specifies that TensorFlow will treat missing values as outliers on\nthe negative end of the spectrum.", + "prompt_type": "plain" + }, + { + "output": "FTRL\nIn FTRL, missing values have their own representation for each datable\ncolumn type. These representations are used to hash the missing value,\nwith their column's name, to an integer. This means FTRL replaces\nmissing values with special constants that are the same for each column\ntype, and then treats these special constants like a normal data value. Unsupervised Algorithms\nFor unsupervised algorithms , standardization in the\npre-transformation layer (where it is decided which columns and column\nencodings are fed in for clustering) is performed by ignoring any\nmissing values. Scikit-learn\u2019s StandardScaler is used internally during\nthe standardization process. Missing values are then replaced with 0 for\nfurther calculations or clustering. How Does the Algorithm Handle Missing Values During Scoring (Production)? LightGBM, XGBoost, RuleFit\nIf missing data is present during training, these tree-based algorithms\nlearn the optimal direction for missing data for each split (left or\nright).", + "prompt_type": "plain" + }, + { + "output": "If no missing data is present during training (for a particular\nfeature), then the majority path is followed if the value is missing. GLM\nMissing values are replaced by the mean value (from training), same as\nin training. TensorFlow\nMissing values are replaced by the same value as specified during\ntraining (parameterized by tf_nan_impute_value). FTRL\nTo ensure consistency, FTRL treats missing values during scoring in\nexactly the same way as during training. Clustering in Transformers\nMissing values are replaced with the mean along each column. This is\nused only on numeric columns. Isolation Forest Anomaly Score Transformer\nIsolation Forest uses out-of-range imputation that fills missing values\nwith the values beyond the maximum. What Happens When You Try to Predict on a Categorical Level Not Seen During Training? XGBoost, LightGBM, RuleFit, TensorFlow, GLM\nDriverless AI's feature engineering pipeline will compute a numeric\nvalue for every categorical level present in the data, whether it's a\npreviously seen value or not.", + "prompt_type": "plain" + }, + { + "output": "enable_tensorflow_textcnn``\n\nEnable Word-Based CNN TensorFlow Models for NLP\n\nSpecify whether to use out-of-fold predictions from Word-based CNN\nTensorFlow models as transformers for NLP. This option is ignored if\nTensorFlow is disabled. We recommend that you disable this option on\nsystems that do not use GPUs.", + "prompt_type": "plain" + }, + { + "output": "enable_tensorflow_textbigru``\n\nEnable Word-Based BiGRU TensorFlow Models for NLP\n\nSpecify whether to use out-of-fold predictions from Word-based BiG-RU\nTensorFlow models as transformers for NLP. This option is ignored if\nTensorFlow is disabled. We recommend that you disable this option on\nsystems that do not use GPUs.", + "prompt_type": "plain" + }, + { + "output": "enable_tensorflow_charcnn``\n\nEnable Character-Based CNN TensorFlow Models for NLP\n\nSpecify whether to use out-of-fold predictions from Character-level CNN\nTensorFlow models as transformers for NLP. This option is ignored if\nTensorFlow is disabled. We recommend that you disable this option on\nsystems that do not use GPUs.", + "prompt_type": "plain" + }, + { + "output": "enable_pytorch_nlp_model``\n\nEnable PyTorch Models for NLP\n\nSpecify whether to enable pretrained PyTorch models and fine-tune them\nfor NLP tasks. This is set to Auto by default. You need to set this to\nOn if you want to use the PyTorch models like BERT for modeling. Only\nthe first text column will be used for modeling with these models. We\nrecommend that you disable this option on systems that do not use GPUs.", + "prompt_type": "plain" + }, + { + "output": "enable_pytorch_nlp_transformer``\n\nEnable pre-trained PyTorch Transformers for NLP\n\nSpecify whether to enable pretrained PyTorch models for NLP tasks. This\nis set to Auto by default, and is enabled for text-dominated problems\nonly. You need to set this to On if you want to use the PyTorch models\nlike BERT for feature engineering (via fitting a linear model on top of\npretrained embeddings). We recommend that you disable this option on\nsystems that do not use GPUs.\n\nNotes:\n\n- This setting requires an Internet connection.", + "prompt_type": "plain" + }, + { + "output": "pytorch_nlp_pretrained_models``\n\nSelect Which Pretrained PyTorch NLP Models to Use\n\nSpecify one or more pretrained PyTorch NLP models to use. Select from\nthe following:\n\n- bert-base-uncased (Default)\n- distilbert-base-uncased (Default)\n- xlnet-base-cased\n- xlm-mlm-enfr-1024\n- roberta-base\n- albert-base-v2\n- camembert-base\n- xlm-roberta-base\n\nNotes:\n\n- This setting requires an Internet connection.\n- Models that are not selected by default may not have MOJO support.\n- Using BERT-like models may result in a longer experiment completion\n time.", + "prompt_type": "plain" + }, + { + "output": "tensorflow_max_epochs_nlp``\n\nMax TensorFlow Epochs for NLP\n\nWhen building TensorFlow NLP features (for text data), specify the\nmaximum number of epochs to train feature engineering models with (it\nmight stop earlier). The higher the number of epochs, the higher the run\ntime. This value defaults to 2 and is ignored if TensorFlow models is\ndisabled.", + "prompt_type": "plain" + }, + { + "output": "enable_tensorflow_nlp_accuracy_switch``\n\nAccuracy Above Enable TensorFlow NLP by Default for All Models\n\nSpecify the accuracy threshold. Values equal and above will add all\nenabled TensorFlow NLP models at the start of the experiment for\ntext-dominated problems when the following NLP expert settings are set\nto Auto:\n\n- Enable word-based CNN TensorFlow models for NLP\n- Enable word-based BigRU TensorFlow models for NLP\n- Enable character-based CNN TensorFlow models for NLP\n\nIf the above transformations are set to ON, this parameter is ignored.\n\nAt lower accuracy, TensorFlow NLP transformations will only be created\nas a mutation. This value defaults to 5.", + "prompt_type": "plain" + }, + { + "output": "pytorch_nlp_fine_tuning_num_epochs``\n\nNumber of Epochs for Fine-Tuning of PyTorch NLP Models\n\nSpecify the number of epochs used when fine-tuning PyTorch NLP models.\nThis value defaults to 2.", + "prompt_type": "plain" + }, + { + "output": "pytorch_nlp_fine_tuning_batch_size``\n\nBatch Size for PyTorch NLP Models\n\nSpecify the batch size for PyTorch NLP models. This value defaults to\n10.\n\nNote: Large models and batch sizes require more memory.", + "prompt_type": "plain" + }, + { + "output": "pytorch_nlp_fine_tuning_padding_length``\n\nMaximum Sequence Length for PyTorch NLP Models\n\nSpecify the maximum sequence length (padding length) for PyTorch NLP\nmodels. This value defaults to 100.\n\nNote: Large models and padding lengths require more memory.", + "prompt_type": "plain" + }, + { + "output": "pytorch_nlp_pretrained_models_dir``\n\nPath to Pretrained PyTorch NLP Models\n\nSpecify a path to pretrained PyTorch NLP models. To get all available\nmodels, download\nhttp://s3.amazonaws.com/artifacts.h2o.ai/releases/ai/h2o/pretrained/bert_models.zip,\nthen extract the folder and store it in a directory on the instance\nwhere Driverless AI is installed:\n\n pytorch_nlp_pretrained_models_dir = /path/on/server/to/bert_models_folder", + "prompt_type": "plain" + }, + { + "output": "tensorflow_nlp_pretrained_embeddings_file_path-------------------------------------------------- .. container:: dropdown **Path to Pretrained Embeddings for TensorFlow NLP Models** Specify a path to pretrained embeddings that will be used for the TensorFlow NLP models. Note that this can be either a path in the local file system (/path/on/server/to/file.txt) or an S3 location (s3://``). Notes:\n - If an S3 location is specified, an S3 access key ID and S3 secret\n access key can also be specified with the\n tensorflow_nlp_pretrained_s3_access_key_id and\n tensorflow_nlp_pretrained_s3_secret_access_key expert settings\n respectively. - You can download the Glove embeddings from here and specify the\n local path in this box. - You can download the fasttext embeddings from here and specify the\n local path in this box. - You can also train your own custom embeddings. Please refer to\n this code sample for creating custom embeddings that can be passed\n on to this option.", + "prompt_type": "plain" + }, + { + "output": "tensorflow_nlp_pretrained_s3_access_key_id---------------------------------------------- .. container:: dropdown **S3 access key ID to use when**tensorflow_nlp_pretrained_embeddings_file_path**is set to an S3 location** Specify an S3 access key ID to use whentensorflow_nlp_pretrained_embeddings_file_path` is set to an S3 location. For more information, see :ref:`the entry on the tensorflow_nlp_pretrained_embeddings_file_path \n\n expert setting.", + "prompt_type": "plain" + }, + { + "output": "tensorflow_nlp_pretrained_s3_secret_access_key-------------------------------------------------- .. container:: dropdown **S3 secret access key to use when**tensorflow_nlp_pretrained_embeddings_file_path**is set to an S3 location** Specify an S3 secret access key to use whentensorflow_nlp_pretrained_embeddings_file_path` is set to an S3 location. For more information, see :ref:`the entry on the tensorflow_nlp_pretrained_embeddings_file_path \n\n expert setting.", + "prompt_type": "plain" + }, + { + "output": "tensorflow_nlp_pretrained_embeddings_trainable``\n\nFor TensorFlow NLP, Allow Training of Unfrozen Pretrained Embeddings\n\nSpecify whether to allow training of all weights of the neural network\ngraph, including the pretrained embedding layer weights. If this is\ndisabled, the embedding layer will be frozen. All other weights,\nhowever, will still be fine-tuned. This is disabled by default.", + "prompt_type": "plain" + }, + { + "output": "text_fraction_for_text_dominated_problem``\n\nFraction of Text Columns Out of All Features to be Considered a\nText-Dominanted Problem\n\nSpecify the fraction of text columns out of all features to be\nconsidered as a text-dominated problem. This value defaults to 0.3.\n\nSpecify when a string column will be treated as text (for an NLP\nproblem) or just as a standard categorical variable. Higher values will\nfavor string columns as categoricals, while lower values will favor\nstring columns as text. This value defaults to 0.3.", + "prompt_type": "plain" + }, + { + "output": "text_transformer_fraction_for_text_dominated_problem``\n\nFraction of Text per All Transformers to Trigger That Text Dominated\n\nSpecify the fraction of text columns out of all features to be\nconsidered a text-dominated problem. This value defaults to 0.3.", + "prompt_type": "plain" + }, + { + "output": "string_col_as_text_threshold``\n\nThreshold for String Columns to be Treated as Text\n\nSpecify the threshold value (from 0 to 1) for string columns to be\ntreated as text (0.0 - text; 1.0 - string). This value defaults to 0.3.", + "prompt_type": "plain" + }, + { + "output": "text_transformers_max_vocabulary_size``\n\nMax Size of the Vocabulary for Text Transformers\n\nMax number of tokens created during fitting of Tfidf/Count based text\ntransformers. If multiple values are provided, will use the first one\nfor initial models, and use remaining values during parameter tuning and\nfeature evolution. The default value is [1000, 5000]. Values smaller\nthan 10000 are recommended for speed.", + "prompt_type": "plain" + }, + { + "output": "Which Pipeline Should I Use? Driverless AI Python Scoring Pipeline\nDriverless AI Python Scoring Pipeline is implemented as a Python whl\nfile. While this allows for a single process scoring engine, the scoring\nservice is generally implemented as a client/server architecture and\nsupports interfaces for TCP and HTTP. When running the Python Scoring\nPipeline:\n - HTTP is supported by virtually any language. HTTP supports RESTful\n calls via curl, wget, or supported packages in various scripting\n languages. - TCP is a bit more complex, though faster. TCP also requires\n Thrift, which currently does not handle NAs. k-LIME reason codes and Shapley reason codes whl file can be obtained\nfor all models from MLI Standalone Python Scoring Pipeline from the MLI\nexperiment page. Driverless AI MOJO Scoring Pipeline\nDriverless AI MOJO Scoring Pipeline is flexible and is faster than the\nPython Scoring Pipeline. It requires some coding. The MOJO Scoring\nPipeline is available as either a Java runtime or a\nC++ runtime (with R and Python wrappers).", + "prompt_type": "plain" + }, + { + "output": "JDBC Setup\n\nDriverless AI lets you explore Java Database Connectivity (JDBC) data\nsources from within the Driverless AI application. This section provides\ninstructions for configuring Driverless AI to work with JDBC.\n\nNote: Depending on your Docker install version, use either the", + "prompt_type": "plain" + }, + { + "output": "docker run --runtime=nvidia(>= Docker 19.03) ornvidia-docker(< Docker 19.03) command when starting the Driverless AI Docker image. Usedocker versionto check which version of Docker you are using. Tested Databases ---------------- The following databases have been tested for minimal functionality. Note that JDBC drivers that are not included in this list should work with Driverless AI. We recommend that you test out your JDBC driver even if you do not see it on list of tested databases. See the :ref:`untested-jdbc-driver` section at the end of this chapter for information on how to try out an untested JDBC driver. - Oracle DB - PostgreSQL - Amazon Redshift - Teradata Description of Configuration Attributes --------------------------------------- -jdbc_app_configs: Configuration for the JDBC connector. This is a JSON/Dictionary String with multiple keys. **Note**: This requires a JSON key (typically the name of the database being configured) to be associated with a nested JSON that contains theurl,jarpath, andclasspathfields.", + "prompt_type": "plain" + }, + { + "output": "Double quotation marks (\"...\") must be used to denote keys and values *within* the JSON dictionary, and *outer* quotations must be formatted as either\"\"\",''', or'. Depending on how the configuration value is applied, different forms of outer quotations may be required. The following examples show two unique methods for applying outer quotations. - Configuration value applied with the config.toml file: :: jdbc_app_configs = \"\"\"{\"my_json_string\": \"value\", \"json_key_2\": \"value2\"}\"\"\" - Configuration value applied with an **environment variable**: :: DRIVERLESS_AI_JDBC_APP_CONFIGS='{\"my_json_string\": \"value\", \"json_key_2\": \"value2\"}' For example: :: DRIVERLESS_AI_JDBC_APP_CONFIGS='{ \"postgres\": {\"url\": \"jdbc:postgresql://192.xxx.x.xxx:aaaa:/name_of_database;user=name_of_user;password=your_password\",\"jarpath\": \"/config/postgresql-xx.x.x.jar\",\"classpath\": \"org.postgresql.Driver\"}, \"postgres-local\": {\"url\": \"jdbc:postgresql://123.xxx.xxx.xxx:aaaa/name_of_database\",\"jarpath\": \"/config/postgresql-xx.x.x.jar\",\"classpath\": \"org.postgresql.Driver\"}, \"ms-sql\": {\"url\": \"jdbc:sqlserver://192.xxx.x.xxx:aaaa;databaseName=name_of_database;user=name_of_user;password=your_password\",\"Username\":\"your_username\",\"passsword\":\"your_password\",\"jarpath\": \"/config/sqljdbc42.jar\",\"classpath\": \"com.microsoft.sqlserver.jdbc.SQLServerDriver\"}, \"oracle\": {\"url\": \"jdbc:oracle:thin:@192.xxx.x.xxx:aaaa/orclpdb1\",\"jarpath\": \"ojdbc7.jar\",\"classpath\": \"oracle.jdbc.OracleDriver\"}, \"db2\": {\"url\": \"jdbc:db2://127.x.x.x:aaaaa/name_of_database\",\"jarpath\": \"db2jcc4.jar\",\"classpath\": \"com.ibm.db2.jcc.DB2Driver\"}, \"mysql\": {\"url\": \"jdbc:mysql://192.xxx.x.xxx:aaaa;\",\"jarpath\": \"mysql-connector.jar\",\"classpath\": \"com.mysql.jdbc.Driver\"}, \"Snowflake\": {\"url\": \"jdbc:snowflake://.snowflakecomputing.com/?\",\"jarpath\": \"/config/snowflake-jdbc-x.x.x.jar\",\"classpath\": \"net.snowflake.client.jdbc.SnowflakeDriver\"}, \"Derby\": {\"url\": \"jdbc:derby://127.x.x.x:aaaa/name_of_database\",\"jarpath\": \"/config/derbyclient.jar\",\"classpath\": \"org.apache.derby.jdbc.ClientDriver\"} }'\\ -jdbc_app_jvm_args: Extra jvm args for JDBC connector.", + "prompt_type": "plain" + }, + { + "output": "-jdbc_app_classpath: Optionally specify an alternative classpath for the JDBC connector. -enabled_file_systems: The file systems you want to enable. This must be configured in order for data connectors to function properly. Retrieve the JDBC Driver ------------------------ 1. Download JDBC Driver JAR files: .. - `Oracle DB `__ - `PostgreSQL `__ - `Amazon Redshift `__ - `Teradata `__ **Note**: Remember to take note of the driver classpath, as it is needed for the configuration steps (for example, org.postgresql.Driver). 2. Copy the driver JAR to a location that can be mounted into the Docker container. .. **Note**: The folder storing the JDBC jar file must be visible/readable by the dai process user.", + "prompt_type": "plain" + }, + { + "output": "Note that the JDBC connection strings will vary depending on the database that is used. .. code:: bash nvidia-docker run \\ --pid=host \\ --init \\ --rm \\ --shm-size=256m \\ --add-host name.node:172.16.2.186 \\ -e DRIVERLESS_AI_ENABLED_FILE_SYSTEMS=\"file,hdfs,jdbc\" \\ -e DRIVERLESS_AI_JDBC_APP_CONFIGS='{\"postgres\": {\"url\": \"jdbc:postgres://localhost:5432/my_database\", \"jarpath\": \"/path/to/postgresql/jdbc/driver.jar\", \"classpath\": \"org.postgresql.Driver\"}}' \\ -e DRIVERLESS_AI_JDBC_APP_JVM_ARGS=\"-Xmx2g\" \\ -p 12345:12345 \\ -v /path/to/local/postgresql/jdbc/driver.jar:/path/to/postgresql/jdbc/driver.jar \\ -v /etc/passwd:/etc/passwd:ro \\ -v /etc/group:/etc/group:ro \\ -v /tmp/dtmp/:/tmp \\ -v /tmp/dlog/:/log \\ -v /tmp/dlicense/:/license \\ -v /tmp/ddata/:/data \\ -u $(id -u):$(id -g) \\ h2oai/dai-ubi8-x86_64:|tag| .. container:: group-tab Docker Image with the config.toml This example shows how to configure JDBC options in the config.toml file, and then specify that file when starting Driverless AI in Docker.", + "prompt_type": "plain" + }, + { + "output": "Configure the Driverless AI config.toml file. Set the following configuration options: .. .. code:: bash enabled_file_systems = \"file, upload, jdbc\" jdbc_app_configs = \"\"\"{\"postgres\": {\"url\": \"jdbc:postgres://localhost:5432/my_database\", \"jarpath\": \"/path/to/postgresql/jdbc/driver.jar\", \"classpath\": \"org.postgresql.Driver\"}}\"\"\" 2. Mount the config.toml file and requisite JAR files into the Docker container. .. .. code:: bash nvidia-docker run \\ --pid=host \\ --init \\ --rm \\ --shm-size=256m \\ --add-host name.node:172.16.2.186 \\ -e DRIVERLESS_AI_CONFIG_FILE=/path/in/docker/config.toml \\ -p 12345:12345 \\ -v /local/path/to/jdbc/driver.jar:/path/in/docker/jdbc/driver.jar \\ -v /local/path/to/config.toml:/path/in/docker/config.toml \\ -v /etc/passwd:/etc/passwd:ro \\ -v /etc/group:/etc/group:ro \\ -v /tmp/dtmp/:/tmp \\ -v /tmp/dlog/:/log \\ -v /tmp/dlicense/:/license \\ -v /tmp/ddata/:/data \\ -u $(id -u):$(id -g) \\ h2oai/dai-ubi8-x86_64:|tag| .. container:: group-tab Native Installs This example enables the JDBC connector for PostgresQL.", + "prompt_type": "plain" + }, + { + "output": "- The configuration requires a JSON key (typically the name of the database being configured) to be associated with a nested JSON that contains theurl,jarpath, andclasspathfields. In addition, this should take the format: :: \"\"\"{\"my_jdbc_database\": {\"url\": \"jdbc:my_jdbc_database://hostname:port/database\", \"jarpath\": \"/path/to/my/jdbc/database.jar\", \"classpath\": \"com.my.jdbc.Driver\"}}\"\"\" 1. Export the Driverless AI config.toml file or add it to ~/.bashrc. For example: .. :: # DEB and RPM export DRIVERLESS_AI_CONFIG_FILE=\"/etc/dai/config.toml\" # TAR SH export DRIVERLESS_AI_CONFIG_FILE=\"/path/to/your/unpacked/dai/directory/config.toml\" 2. Edit the following values in the config.toml file. .. :: # File System Support # upload : standard upload feature # file : local file system/server file system # hdfs : Hadoop file system, remember to configure the HDFS config folder path and keytab below # dtap : Blue Data Tap file system, remember to configure the DTap section below # s3 : Amazon S3, optionally configure secret and access key below # gcs : Google Cloud Storage, remember to configure gcs_path_to_service_account_json below # gbq : Google Big Query, remember to configure gcs_path_to_service_account_json below # minio : Minio Cloud Storage, remember to configure secret and access key below # snow : Snowflake Data Warehouse, remember to configure Snowflake credentials below (account name, username, password) # kdb : KDB+ Time Series Database, remember to configure KDB credentials below (hostname and port, optionally: username, password, classpath, and jvm_args) # azrbs : Azure Blob Storage, remember to configure Azure credentials below (account name, account key) # jdbc: JDBC Connector, remember to configure JDBC below.", + "prompt_type": "plain" + }, + { + "output": "(hive_app_configs) # recipe_url: load custom recipe from URL # recipe_file: load custom recipe from local file system enabled_file_systems = \"upload, file, hdfs, jdbc\" # Configuration for JDBC Connector. # JSON/Dictionary String with multiple keys. # Format as a single line without using carriage returns (the following example is formatted for readability). # Use triple quotations to ensure that the text is read as a single string. # Example: # \"\"\"{ # \"postgres\": { # \"url\": \"jdbc:postgresql://ip address:port/postgres\", # \"jarpath\": \"/path/to/postgres_driver.jar\", # \"classpath\": \"org.postgresql.Driver\" # }, # \"mysql\": { # \"url\":\"mysql connection string\", # \"jarpath\": \"/path/to/mysql_driver.jar\", # \"classpath\": \"my.sql.classpath.Driver\" # } # }\"\"\" jdbc_app_configs = \"\"\"{\"postgres\": {\"url\": \"jdbc:postgres://localhost:5432/my_database\", \"jarpath\": \"/path/to/postgresql/jdbc/driver.jar\", \"classpath\": \"org.postgresql.Driver\"}}\"\"\" # extra jvm args for jdbc connector jdbc_app_jvm_args = \"\" # alternative classpath for jdbc connector jdbc_app_classpath = \"\" 3.", + "prompt_type": "plain" + }, + { + "output": "Adding Datasets Using JDBC -------------------------- After the JDBC connector is enabled, you can add datasets by selecting **JDBC** from the **Add Dataset (or Drag and Drop)** drop-down menu. .. figure:: ../images/jdbc.png :alt: 1. Click on the **Add Dataset** button on the Datasets page. 2. Select **JDBC** from the list that appears. 3. Click on the **Select JDBC Connection** button to select a JDBC configuration. 4. The form will populate with the JDBC Database, URL, Driver, and Jar information. Complete the following remaining fields: .. - **JDBC Username**: Enter your JDBC username. - **JDBC Password**: Enter your JDBC password. (See the *Notes* section) - **Destination Name**: Enter a name for the new dataset. - (Optional) **ID Column Name**: Enter a name for the ID column. Specify this field when making large data queries. **Notes**: - Do not include the password as part of the JDBC URL. Instead, enter the password in the **JDBC Password** field.", + "prompt_type": "plain" + }, + { + "output": "- Due to resource sharing within Driverless AI, the JDBC Connector is only allocated a relatively small amount of memory. - When making large queries, the ID column is used to partition the data into manageable portions. This ensures that the maximum memory allocation is not exceeded. - If a query that is larger than the maximum memory allocation is made without specifying an ID column, the query will not complete successfully. 5. Write a SQL Query in the format of the database that you want to query. (See the `Query Examples <#queryexamples>`__ section below.) The format will vary depending on the database that is used. 6. Click the **Click to Make Query** button to execute the query. The time it takes to complete depends on the size of the data being queried and the network speeds to the database. On a successful query, you will be returned to the datasets page, and the queried data will be available as a new dataset.", + "prompt_type": "plain" + }, + { + "output": "Configuration: .. :: jdbc_app_configs = \"\"\"{\"oracledb\": {\"url\": \"jdbc:oracle:thin:@localhost:1521/oracledatabase\", \"jarpath\": \"/home/ubuntu/jdbc-jars/ojdbc8.jar\", \"classpath\": \"oracle.jdbc.OracleDriver\"}}\"\"\" 2. Sample Query: .. - Select **oracledb** from the **Select JDBC Connection** dropdown menu. - **JDBC Username**:oracleuser- **JDBC Password**:oracleuserpassword- **ID Column Name**: - **Query**: .. :: SELECT MIN(ID) AS NEW_ID, EDUCATION, COUNT(EDUCATION) FROM my_oracle_schema.creditcardtrain GROUP BY EDUCATION **Note**: Because this query does not specify an **ID Column Name**, it will only work for small data. However, the **NEW_ID** column can be used as the ID Column if the query is for larger data. 3. Click the **Click to Make Query** button to execute the query. .. container:: group-tab PostgreSQL 1. Configuration: .. :: jdbc_app_configs = \"\"\"{\"postgres\": {\"url\": \"jdbc:postgresql://localhost:5432/postgresdatabase\", \"jarpath\": \"/home/ubuntu/postgres-artifacts/postgres/Driver.jar\", \"classpath\": \"org.postgresql.Driver\"}}\"\"\" 2.", + "prompt_type": "plain" + }, + { + "output": "- **JDBC Username**:postgres_user- **JDBC Password**:pguserpassword- **ID Column Name**:id``\n - Query:\n 3. Click the Click to Make Query button to execute the query. Adding an Untested JDBC Driver\nWe encourage you to try out JDBC drivers that are not tested in house. Docker Image Installs\n1. Download the JDBC jar for your database. 2. Move your JDBC jar file to a location that DAI can access. 3. Start the Driverless AI Docker image using the JDBC-specific\n environment variables. nvidia-docker run \\\n --pid=host \\\n --init \\\n --rm \\\n --shm-size=256m \\\n --add-host name.node:172.16.2.186 \\\n -e DRIVERLESS_AI_ENABLED_FILE_SYSTEMS=\"upload,file,hdfs,s3,recipe_file,jdbc\" \\\n -e DRIVERLESS_AI_JDBC_APP_CONFIGS=\"\"\"{\"my_jdbc_database\": {\"url\": \"jdbc:my_jdbc_database://hostname:port/database\",\n \"jarpath\": \"/path/to/my/jdbc/database.jar\", \n \"classpath\": \"com.my.jdbc.Driver\"}}\"\"\"\\ \n -e DRIVERLESS_AI_JDBC_APP_JVM_ARGS=\"-Xmx2g\" \\\n -p 12345:12345 \\\n -v /path/to/local/postgresql/jdbc/driver.jar:/path/to/postgresql/jdbc/driver.jar \\\n -v /etc/passwd:/etc/passwd:ro \\\n -v /etc/group:/etc/group:ro \\\n -v /tmp/dtmp/:/tmp \\\n -v /tmp/dlog/:/log \\\n -v /tmp/dlicense/:/license \\\n -v /tmp/ddata/:/data \\\n -u $(id -u):$(id -g) \\\n h2oai/dai-ubi8-x86_64:|tag|\nDocker Image with the config.toml\n1.", + "prompt_type": "plain" + }, + { + "output": "Install and Run in a Docker Container on Google Compute Engine\nThis section describes how to install and start Driverless AI from\nscratch using a Docker container in a Google Compute environment. This installation assumes that you already have a Google Cloud Platform\naccount. If you don't have an account, go to\nhttps://console.cloud.google.com/getting-started to create one. In\naddition, refer to Google's Machine Types documentation for information\non Google Compute machine types. Watch the installation video here. Note that some of the images in this\nvideo may change between releases, but the installation steps remain the\nsame. Before You Begin\nIf you are trying GCP for the first time and have just created an\naccount, check your Google Compute Engine (GCE) resource quota limits. By default, GCP allocates a maximum of 8 CPUs and no GPUs. You can\nchange these settings to match your quota limit, or you can request more\nresources from GCP. Refer to https://cloud.google.com/compute/quotas for\nmore information, including information on how to check your quota and\nrequest additional quota.", + "prompt_type": "plain" + }, + { + "output": "In your browser, log in to the Google Compute Engine Console at\n https://console.cloud.google.com/. 2. In the left navigation panel, select Compute Engine > VM Instances. 3. Click Create Instance. 4. Specify the following at a minimum:\n5. Create a Firewall rule for Driverless AI. On the Google Cloud\n Platform left navigation panel, select VPC network > Firewall rules. Specify the following settings:\n6. On the VM Instances page, SSH to the new VM Instance by selecting\n Open in Browser Window from the SSH dropdown. 7. H2O provides a script for you to run in your VM instance. Open an\n editor in the VM instance (for example, vi). Copy one of the scripts\n below (depending on whether you are running GPUs or CPUs). Save the\n script as install.sh. 8. Type the following commands to run the install script. 9. In your user folder, create the following directories as your user. 10. Add your Google Compute user name to the Docker container. 11. Reboot the system to enable NVIDIA drivers.", + "prompt_type": "plain" + }, + { + "output": "Retrieve the Driverless AI Docker image from\n https://www.h2o.ai/download/. 13. Load the Driverless AI Docker image. The following example shows how\n to load Driverless AI. Replace VERSION with your image. 14. If you are running CPUs, you can skip this step. Otherwise, you must\n enable persistence of the GPU. Note that this needs to be run once\n every reboot. Refer to the following for more information:\n http://docs.nvidia.com/deploy/driver-persistence/index.html. 15. Start the Driverless AI Docker image and replace TAG below with the\n image tag. Depending on your install version, use the\n docker run --runtime=nvidia (>= Docker 19.03) or nvidia-docker (<\n Docker 19.03) command. Refer to Data Connectors for information on\n how to add the GCS and GBQ data connectors to your Driverless AI\n instance. 16. Connect to Driverless AI with your browser:\nStopping the GCE Instance\nThe Google Compute Engine instance will continue to run even when you\nclose the portal.", + "prompt_type": "plain" + }, + { + "output": "On the VM Instances page, click on the VM instance that you want to\n stop. 2. Click Stop at the top of the page. 3. A confirmation page will display. Click Stop to stop the instance. Stopping in Terminal\nSSH into the machine that is running Driverless AI, and then run the\nfollowing:\n h2oai stop\nUpgrading Driverless AI\nThis section provides instructions for upgrading Driverless AI versions\nthat were installed in a Docker container. These steps ensure that\nexisting experiments are saved. WARNING: Experiments, MLIs, and MOJOs reside in the Driverless AI tmp\ndirectory and are not automatically upgraded when Driverless AI is\nupgraded. - Build MLI models before upgrading. - Build MOJO pipelines before upgrading. - Stop Driverless AI and make a backup of your Driverless AI tmp\n directory before upgrading. If you did not build MLI on a model before upgrading Driverless AI,\n then you will not be able to view MLI on that model after upgrading. Before upgrading, be sure to run MLI jobs on models that you want to\n continue to interpret in future releases.", + "prompt_type": "plain" + }, + { + "output": "If you did not build a MOJO pipeline on a model before upgrading\n Driverless AI, then you will not be able to build a MOJO pipeline on\n that model after upgrading. Before upgrading, be sure to build MOJO\n pipelines on all desired models and then back up your Driverless AI\n tmp directory. Note: Stop Driverless AI if it is still running. Requirements\nWe recommend to have NVIDIA driver >= installed (GPU only) in your host\nenvironment for a seamless experience on all architectures, including\nAmpere. Driverless AI ships with CUDA 11.2.2 for GPUs, but the driver\nmust exist in the host environment. Go to NVIDIA download driver to get the latest NVIDIA Tesla A/T/V/P/K\nseries drivers. For reference on CUDA Toolkit and Minimum Required\nDriver Versions and CUDA Toolkit and Corresponding Driver Versions, see\nhere . Note\nUpgrade Steps\n1. SSH into the IP address of the machine that is running Driverless\n AI. 2. Set up a directory for the version of Driverless AI on the host\n machine:\n3.", + "prompt_type": "plain" + }, + { + "output": "Scorers\nClassification or Regression\nGINI (Gini Coefficient)\nThe Gini index is a well-established method to quantify the inequality\namong values of a frequency distribution, and can be used to measure the\nquality of a binary classifier. A Gini index of zero expresses perfect\nequality (or a totally useless classifier), while a Gini index of one\nexpresses maximal inequality (or a perfect classifier). The Gini index is based on the Lorenz curve. The Lorenz curve plots the\ntrue positive rate (y-axis) as a function of percentiles of the\npopulation (x-axis). The Lorenz curve represents a collective of models represented by the\nclassifier. The location on the curve is given by the probability\nthreshold of a particular model. (i.e., Lower probability thresholds for\nclassification typically lead to more true positives, but also to more\nfalse positives.) The Gini index itself is independent of the model and only depends on\nthe Lorenz curve determined by the distribution of the scores (or\nprobabilities) obtained from the classifier.", + "prompt_type": "plain" + }, + { + "output": "The R2 value varies between 0 and 1 where 0\nrepresents no correlation between the predicted and actual value and 1\nrepresents complete correlation. Calculating the R2 value for linear models is mathematically equivalent\nto 1\u2005\u2212\u2005SSE/SST (or 1\u2005\u2212\u2005residual sum of squares/total sum of squares). For all other models, this equivalence does not hold, so the 1\u2005\u2212\u2005SSE/SST\nformula cannot be used. In some cases, this formula can produce negative\nR2 values, which is mathematically impossible for a real number. Because\nDriverless AI does not necessarily use linear models, the R2 value is\ncalculated using the squared Pearson correlation coefficient. R2 equation:\n$$R2 = \\frac{\\sum_{i=1}^{n}(x_i-\\bar{x})(y_i-\\bar{y})}{\\sqrt{\\sum_{i=1}^{n}(x_i-\\bar{x})^2\\sum_{i=1}^{n}(y_i-\\bar{y})^2}}$$\nWhere:\n- x is the predicted target value\n- y is the actual target value\nMSE (Mean Squared Error)\nThe MSE metric measures the average of the squares of the errors or\ndeviations. MSE takes the distances from the points to the regression\nline (these distances are the \u201cerrors\u201d) and squaring them to remove any\nnegative signs.", + "prompt_type": "plain" + }, + { + "output": "MSE also gives more weight to larger differences. The bigger the error,\nthe more it is penalized. For example, if your correct answers are 2,3,4\nand the algorithm guesses 1,4,3, then the absolute error on each one is\nexactly 1, so squared error is also 1, and the MSE is 1. But if the\nalgorithm guesses 2,3,6, then the errors are 0,0,2, the squared errors\nare 0,0,4, and the MSE is a higher 1.333. The smaller the MSE, the\nbetter the model's performance. (Tip: MSE is sensitive to outliers. If\nyou want a more robust metric, try mean absolute error (MAE).) MSE equation:\n$$MSE = \\frac{1}{N} \\sum_{i=1}^{N}(y_i -\\hat{y}_i)^2$$\nRMSE (Root Mean Squared Error)\nThe RMSE metric evaluates how well a model can predict a continuous\nvalue. The RMSE units are the same as the predicted target, which is\nuseful for understanding if the size of the error is of concern or not. The smaller the RMSE, the better the model's performance. (Tip: RMSE is\nsensitive to outliers. If you want a more robust metric, try mean\nabsolute error (MAE).)", + "prompt_type": "plain" + }, + { + "output": "- y is the actual target value. - y\u0302 is the predicted target value. RMSLE (Root Mean Squared Logarithmic Error)\nThis metric measures the ratio between actual values and predicted\nvalues and takes the log of the predictions and actual values. Use this\ninstead of RMSE if an under-prediction is worse than an over-prediction. You can also use this when you don't want to penalize large differences\nwhen both of the values are large numbers. RMSLE equation:\n$$RMSLE = \\sqrt{\\frac{1}{N} \\sum_{i=1}^{N} \\big(ln \\big(\\frac{y_i +1} {\\hat{y}_i +1}\\big)\\big)^2 }$$\nWhere:\n- N is the total number of rows (observations) of your corresponding\n dataframe. - y is the actual target value. - y\u0302 is the predicted target value. RMSPE (Root Mean Square Percentage Error)\nThis metric is the RMSE expressed as a percentage. The smaller the\nRMSPE, the better the model performance. RMSPE equation:\n$$RMSPE = \\sqrt{\\frac{1}{N} \\sum_{i=1}^{N} \\frac{(y_i -\\hat{y}_i)^2 }{(y_i)^2}}$$\nMAE (Mean Absolute Error)\nThe mean absolute error is an average of the absolute errors.", + "prompt_type": "plain" + }, + { + "output": "The\nsmaller the MAE the better the model's performance. (Tip: MAE is robust\nto outliers. If you want a metric that is sensitive to outliers, try\nroot mean squared error (RMSE).) MAE equation:\n$$MAE = \\frac{1}{N} \\sum_{i=1}^{N} | x_i - x |$$\nWhere:\n- N is the total number of errors\n- |x_(i)\u2005\u2212\u2005x| equals the absolute errors. MAPE (Mean Absolute Percentage Error)\nMAPE measures the size of the error in percentage terms. It is\ncalculated as the average of the unsigned percentage error. MAPE equation:\n$$MAPE = \\big(\\frac{1}{N} \\sum \\frac {|Actual - Forecast |}{|Actual|} \\big) * 100$$\nBecause the MAPE measure is in percentage terms, it gives an indication\nof how large the error is across different scales. Consider the\nfollowing example:\n --------------------------------------------------------------------\n Actual Predicted Absolute Error Absolute Percentage Error\n ---------- ------------ ---------------- ---------------------------\n 5 1 4 80%\n 15,000 15,004 4 0.03%\n --------------------------------------------------------------------\nBoth records have an absolute error of 4, but this error could be\nconsidered \"small\" or \"big\" when you compare it to the actual value.", + "prompt_type": "plain" + }, + { + "output": "This is important when the actual values\ncan be 0 or near 0. Actual values near 0 cause the MAPE value to become\ninfinitely high. Because SMAPE includes both the actual and the\npredicted values, the SMAPE value can never be greater than 200%. Consider the following example:\n -----------------------\n Actual Predicted\n ---------- ------------\n 0.01 0.05\n 0.03 0.04\n -----------------------\nThe MAPE for this data is 216.67% but the SMAPE is only 80.95%. Both records have an absolute error of 4, but this error could be\nconsidered \"small\" or \"big\" when you compare it to the actual value. MER (Median Error Rate or Median Absolute Percentage Error)\nMER measures the median size of the error in percentage terms. It is\ncalculated as the median of the unsigned percentage error. MER equation:\n$$MER = \\big(median \\frac {|Actual - Forecast |}{|Actual|} \\big) * 100$$\nBecause the MER is the median, half the scored population has a lower\nabsolute percentage error than the MER, and half the population has a\nlarger absolute percentage error than the MER.", + "prompt_type": "plain" + }, + { + "output": "The MCC metric combines the true positives,\nfalse positives, true negatives, and false negatives using the equation\ndescribed below. A Driverless AI model will return probabilities, not predicted classes. To convert probabilities to predicted classes, a threshold needs to be\ndefined. Driverless AI iterates over possible thresholds to calculate a\nconfusion matrix for each threshold. It does this to find the maximum\nMCC value. Driverless AI's goal is to continue increasing this maximum\nMCC. Unlike metrics like Accuracy, MCC is a good scorer to use when the\ntarget variable is imbalanced. In the case of imbalanced data, high\nAccuracy can be found by predicting the majority class. Metrics like\nAccuracy and F1 can be misleading, especially in the case of imbalanced\ndata, because they do not consider the relative size of the four\nconfusion matrix categories. MCC, on the other hand, takes the\nproportion of each class into account. The MCC value ranges from -1 to 1\nwhere -1 indicates a classifier that predicts the opposite class from\nthe actual value, 0 means the classifier does no better than random\nguessing, and 1 indicates a perfect classifier.", + "prompt_type": "plain" + }, + { + "output": "To\nconvert probabilities to predicted classes, a threshold needs to be\ndefined. Driverless AI iterates over possible thresholds to calculate a\nconfusion matrix for each threshold. It does this to find the maximum F\nmetric value. Driverless AI's goal is to continue increasing this\nmaximum F metric. The F1 score provides a measure for how well a binary classifier can\nclassify positive cases (given a threshold value). The F1 score is\ncalculated from the harmonic mean of the precision and recall. An F1\nscore of 1 means both precision and recall are perfect and the model\ncorrectly identified all the positive cases and didn't mark a negative\ncase as a positive case. If either precision or recall are very low it\nwill be reflected with a F1 score closer to 0. F1 equation:\n$$F1 = 2 \\;\\Big(\\; \\frac{(precision) \\; (recall)}{precision + recall}\\; \\Big)$$\nWhere:\n- precision is the positive observations (true positives) the model\n correctly identified from all the observations it labeled as\n positive (the true positives + the false positives).", + "prompt_type": "plain" + }, + { + "output": "The F2 score is the weighted harmonic mean of the precision and recall\n(given a threshold value). Unlike the F1 score, which gives equal weight\nto precision and recall, the F2 score gives more weight to recall than\nto precision. More weight should be given to recall for cases where\nFalse Negatives are considered worse than False Positives. For example,\nif your use case is to predict which customers will churn, you may\nconsider False Negatives worse than False Positives. In this case, you\nwant your predictions to capture all of the customers that will churn. Some of these customers may not be at risk for churning, but the extra\nattention they receive is not harmful. More importantly, no customers\nactually at risk of churning have been missed. F2 equation:\n$$F2 = 5 \\;\\Big(\\; \\frac{(precision) \\; (recall)}{((4)\\;(precision)) + recall}\\; \\Big)$$\nWhere:\n- precision is the positive observations (true positives) the model\n correctly identified from all the observations it labeled as\n positive (the true positives + the false positives).", + "prompt_type": "plain" + }, + { + "output": "Accuracy\nIn binary classification, Accuracy is the number of correct predictions\nmade as a ratio of all predictions made. In multiclass classification,\nthe set of labels predicted for a sample must exactly match the\ncorresponding set of labels in y_true. A Driverless AI model will return probabilities, not predicted classes. To convert probabilities to predicted classes, a threshold needs to be\ndefined. Driverless AI iterates over possible thresholds to calculate a\nconfusion matrix for each threshold. It does this to find the maximum\nAccuracy value. Driverless AI's goal is to continue increasing this\nmaximum Accuracy. Accuracy equation:\n$$Accuracy = \\Big(\\; \\frac{\\text{number correctly predicted}}{\\text{number of observations}}\\; \\Big)$$\nLogloss\nThe logarithmic loss metric can be used to evaluate the performance of a\nbinomial or multinomial classifier. Unlike AUC which looks at how well a\nmodel can classify a binary target, logloss evaluates how close a\nmodel's predicted values (uncalibrated probability estimates) are to the\nactual target value.", + "prompt_type": "plain" + }, + { + "output": "Logloss can be any value greater than or equal to 0,\nwith 0 meaning that the model correctly assigns a probability of 0% or\n100%. Binary classification equation:\n$$Logloss = - \\;\\frac{1}{N} \\sum_{i=1}^{N}w_i(\\;y_i \\ln(p_i)+(1-y_i)\\ln(1-p_i)\\;)$$\nMulticlass classification equation:\n$$Logloss = - \\;\\frac{1}{N} \\sum_{i=1}^{N}\\sum_{j=1}^{C}w_i(\\;y_i,_j \\; \\ln(p_i,_j)\\;)$$\nWhere:\n- N is the total number of rows (observations) of your corresponding\n dataframe. - w is the per row user-defined weight (defaults is 1). - C is the total number of classes (C=2 for binary classification). - p is the predicted value (uncalibrated probability) assigned to a\n given row (observation). - y is the actual target value. AUC (Area Under the Receiver Operating Characteristic Curve)\nThis model metric is used to evaluate how well a binary classification\nmodel is able to distinguish between true positives and false positives. For multi-class problems, this score is computed by micro-averaging the\nROC curves for each class.", + "prompt_type": "plain" + }, + { + "output": "An AUC of 1 indicates a perfect classifier, while an AUC of .5 indicates\na poor classifier whose performance is no better than random guessing. AUCPR (Area Under the Precision-Recall Curve)\nThis model metric is used to evaluate how well a binary classification\nmodel is able to distinguish between precision recall pairs or points. These values are obtained using different thresholds on a probabilistic\nor other continuous-output classifier. AUCPR is an average of the\nprecision-recall weighted by the probability of a given threshold. The main difference between AUC and AUCPR is that AUC calculates the\narea under the ROC curve and AUCPR calculates the area under the\nPrecision Recall curve. The Precision Recall curve does not care about\nTrue Negatives. For imbalanced data, a large quantity of True Negatives\nusually overshadows the effects of changes in other metrics like False\nPositives. The AUCPR will be much more sensitive to True Positives,\nFalse Positives, and False Negatives than AUC.", + "prompt_type": "plain" + }, + { + "output": "MACROAUC (Macro Average of Areas Under the Receiver Operating Characteristic Curves)\nFor multiclass classification problems, this score is computed by\nmacro-averaging the ROC curves for each class (one per class). The area\nunder the curve is a constant. A MACROAUC of 1 indicates a perfect\nclassifier, while a MACROAUC of .5 indicates a poor classifier whose\nperformance is no better than random guessing. This option is not\navailable for binary classification problems. Scorer Best Practices - Regression\nWhen deciding which scorer to use in a regression problem, consider the\nfollowing:\n- Do you want your scorer to be sensitive to outliers? - What unit should the scorer be in? Sensitive to Outliers\nCertain scorers are more sensitive to outliers. When a scorer is\nsensitive to outliers, it means that it is important that the model\npredictions are never exceedingly inaccurate. For example, say you have\nan experiment predicting the number of days until an event. The graph\nbelow shows the absolute error in your predictions.", + "prompt_type": "plain" + }, + { + "output": "RMSEdrops down significantly. **Performance Units** Different scorers show the performance of the Driverless AI experiment in different units. This section continues with the previous example where the target is to predict the number of days until an event. Some possible performance units are: - Same as target: The unit of the scorer is in days - ex: MAE = 5 means the model predictions are off by 5 days on average - Percent of target: The unit of the scorer is the percent of days - ex: MAPE = 10% means the model predictions are off by 10 percent on average - Square of target: The unit of the scorer is in days squared - ex: MSE = 25 means the model predictions are off by 5 days on average (square root of 25 = 5) **Comparison** +-------------+----------+--------------------------+-------------+ | Metric | Units | Sensitive to Outliers | Tip | +=============+==========+==========================+=============+ | R2 | Scaled | No | Use when | | | between | | you want | | | 0 and 1 | | performance | | | | | scaled | | | | | between 0 | | | | | and 1 | +-------------+----------+--------------------------+-------------+ | MSE | Square | Yes | | | | of | | | | | target | | | +-------------+----------+--------------------------+-------------+ | RMSE | Same as | Yes | | | | target | | | +-------------+----------+--------------------------+-------------+ | RMSLE | Log of | Yes | | | | target | | | +-------------+----------+--------------------------+-------------+ | RMSPE | Percent | Yes | Use when | | | of | | target | | | target | | values are | | | | | across | | | | | different | | | | | scales | +-------------+----------+--------------------------+-------------+ | MAE | Same as | No | | | | target | | | +-------------+----------+--------------------------+-------------+ | MAPE | Percent | No | Use when | | | of | | target | | | target | | values are | | | | | across | | | | | different | | | | | scales | +-------------+----------+--------------------------+-------------+ | SMAPE | Percent | No | Use when | | | of | | target | | | target | | values are | | | divided | | close to 0 | | | by 2 | | | +-------------+----------+--------------------------+-------------+ Scorer Best Practices - Classification -------------------------------------- When deciding which scorer to use in a classification problem, consider the following: - Do you want the scorer to evaluate the predicted probabilities or the classes that those probabilities can be converted to?", + "prompt_type": "plain" + }, + { + "output": "**Scorer Evaluates Probabilities or Classes** The final output of a Driverless AI model is a predicted probability that a record is in a particular class. The scorer you choose either evaluates how accurate the probability is or how accurate the assigned class is from that probability. Choosing this depends on the use of the Driverless AI model. Do you want to use the probabilities, or do you want to convert those probabilities into classes? For example, if you are predicting whether a customer will churn, you may take the predicted probabilities and turn them into distinct classes\u2014customers who will churn vs customers who will not churn. If you are predicting the expected loss of revenue, use the predicted probabilities instead (predicted probability of churn \\* value of customer). If your use case requires a class assigned to each record, select a scorer that evaluates the model's performance based on how well it classifies the records. If your use case uses the probabilities, select a scorer that evaluates the model's performance based on the predicted probability.", + "prompt_type": "plain" + }, + { + "output": "Modifying Datasets With Recipes\nDriverless AI lets you create a new dataset by\nmodifying an existing dataset with a data recipe . This example shows you how to create a new dataset with the Live Code\noption. 1. Navigate to the Datasets page, then click on the dataset you want to\n modify. 2. Click Details from the submenu that appears to open the Dataset\n Details page. 3. Click the Modify by Recipe button in the top right portion of the\n UI, then click Live Code from the submenu that appears. 4. Enter the code for the data recipe you want to use to modify the\n dataset. Click the Get Preview button to see a preview of how the\n data recipe will modify the dataset. In this example, the data\n recipe modifies the number of rows and columns in the dataset. 5. To download the entered code script as a .py file, click the\n Download button. 6. Click the Apply button to confirm the changes and create a new\n dataset. (The original dataset is still available on the Datasets\n page.)", + "prompt_type": "plain" + }, + { + "output": "Using Multiple Authenticators\n\nDriverless AI lets you enable multiple authentication methods at the\nsame time. The following are some examples of when this can be useful:\n\n- When you want to use single sign-on (SSO) options for the front-end\n and also give users direct access with credentials for headless\n setups like the Driverless AI Python client.\n- When you want to allow access to users that are not managed by the\n provider of the primary authentication option.\n\nTo enable additional authentications methods, use the", + "prompt_type": "plain" + }, + { + "output": "additional_authentication_methods:ref:`config.toml ` setting. **Note**: In order to let users access their data when using multiple authenticators, usernames for all of the enabled authentication methods need to match one another. Multiple Authentication Methods Example --------------------------------------- In this example, a user wants to use OpenID Connect authentication on the front-end and also let users use LDAP credentials to gain access with the Driverless AI Python client. To enable both authentication methods, use the :ref:`config.toml file ` to set the following parameters: :: authentication_method = \"openid\" additional_authentication_methods = \"['ldap']\" # Configure OpenID Connect auth_openid_provider_base_uri = ... # Configure LDAP ldap_server = ... The primary authentication method's login page is available on the standard/loginpath. All of the enabled authentication methods can be used on path/login/``.", + "prompt_type": "plain" + }, + { + "output": "Driverless AI Transformations\nTransformations in Driverless AI are applied to columns in the data. The\ntransformers create the engineered features in\nexperiments. Driverless AI provides a number of transformers. The downloaded\nexperiment logs include the transformations that were applied to your\nexperiment. Notes:\n- You can include or exclude specific transformers in your Driverless\n AI environment using the included_transformers or\n excluded_transformers config options. - You can control which transformers to use in individual experiments\n with the included_transformers Expert Setting in Recipe panel. - You can set transformers to be used as pre-processing transformers\n with the included_pretransformers Expert Setting in Recipe panel. Additional layers can be added with the num_pipeline_layers Expert\n Setting in Recipe panel. - An alternative to transformers that gives more flexibility (but has\n no fitted state) are data recipes, controlled by the included_datas\n Expert Setting in Recipe panel.", + "prompt_type": "plain" + }, + { + "output": "- Numeric Transformers \n- Categorical Transformers \n- Time and Date Transformers \n- Time Series Transformers \n- NLP (text) Transformers \n- Image Transformers \n- Autoviz Recommendation Transformer \nTransformed Feature Naming Convention\nTransformed feature names are encoded as follows:\n _::<...>:.\nFor example in 32_NumToCatTE:BILL_AMT1:EDUCATION:MARRIAGE:SEX.0 :\n - 32_ is the transformation index for specific transformation\n parameters. - NumToCatTE is the transformer name. - BILL_AMT1:EDUCATION:MARRIAGE:SEX represents original features\n used. - 0 is the extra and represents the likelihood encoding for\n target[0] after grouping by features (shown here as BILL_AMT1,\n EDUCATION, MARRIAGE and SEX) and making out-of-fold estimates.", + "prompt_type": "plain" + }, + { + "output": "For binary experiments,\n this value is always 0. Numeric Transformers (Integer, Real, Binary)\n- ClusterDist Transformer\n The Cluster Distance Transformer clusters selected numeric columns\n and uses the distance to a specific cluster as a new feature. - ClusterDist cuML Transformer\n The Cluster Distance cuML Transformer runs on GPUs to train cuML\n accelerated k-means clustering to create clusters on selected\n numeric columns and uses the distance to a specific cluster as a\n new feature. - ClusterTE Transformer\n The Cluster Target Encoding Transformer clusters selected numeric\n columns and calculates the mean of the response column for each\n cluster. The mean of the response is used as a new feature. Cross\n Validation is used to calculate mean response to prevent\n overfitting. - DBSCAN cuML Transformer\n DBSCAN cuML Transformer runs on GPUs to train cuML accelerated\n DBSCAN model on selected numeric columns and uses the output\n cluster label as a new feature.", + "prompt_type": "plain" + }, + { + "output": "This transformation uses a smart search to identify which feature\n pairs to transform. Only interactions that improve the baseline\n model score are kept. - InteractionsSimple Transformer\n The InteractionsSimple Transformer adds, divides, multiplies, and\n subtracts two numeric columns in the data to create a new feature. This transformation randomly selects pairs of features to\n transform. - NumCatTE Transformer\n The Numeric Categorical Target Encoding Transformer calculates the\n mean of the response column for several selected columns. If one\n of the selected columns is numeric, it is first converted to\n categorical by binning. The mean of the response column is used as\n a new feature. Cross Validation is used to calculate mean response\n to prevent overfitting. - NumToCatTE Transformer\n The Numeric to Categorical Target Encoding Transformer converts\n numeric columns to categoricals by binning and then calculates the\n mean of the response column for each group.", + "prompt_type": "plain" + }, + { + "output": "Cross Validation is\n used to calculate mean response to prevent overfitting. - NumToCatWoEMonotonic Transformer\n The Numeric to Categorical Weight of Evidence Monotonic\n Transformer converts a numeric column to categorical by binning\n and then calculates Weight of Evidence for each bin. The monotonic\n constraint ensures the bins of values are monotonically related to\n the Weight of Evidence value. The Weight of Evidence is used as a\n new feature. Weight of Evidence measures the \u201cstrength\u201d of a\n grouping for separating good and bad risk and is calculated by\n taking the log of the ratio of distributions for a binary response\n column. - NumToCatWoE Transformer\n The Numeric to Categorical Weight of Evidence Transformer converts\n a numeric column to categorical by binning and then calculates\n Weight of Evidence for each bin. The Weight of Evidence is used as\n a new feature. Weight of Evidence measures the \u201cstrength\u201d of a\n grouping for separating good and bad risk and is calculated by\n taking the log of the ratio of distributions for a binary response\n column.", + "prompt_type": "plain" + }, + { + "output": "- TruncSVDNum Transformer\n Truncated SVD Transformer trains a Truncated SVD model on selected\n numeric columns and uses the components of the truncated SVD\n matrix as new features. - TruncSVDNum cuML Transformer\n The Truncated SVD cuML Transformer runs on GPUs to train cuML\n accelerates Truncated SVD model on selected numeric columns and\n uses the components of the truncated SVD matrix as new features. Time Series Experiments Transformers\n- DateOriginal Transformer\n The Date Original Transformer retrieves date values such as year,\n quarter, month, day, day of the year, week, and weekday values. - DateTimeOriginal Transformer\n The Date Time Original Transformer retrieves date and time values\n such as year, quarter, month, day, day of the year, week, weekday,\n hour, minute, and second values. - EwmaLags Transformer\n The Exponentially Weighted Moving Average (EWMA) Transformer\n calculates the exponentially weighted moving average of target or\n feature lags.", + "prompt_type": "plain" + }, + { + "output": "The aggregation\n is used as a new feature. - LagsInteraction Transformer\n The Lags Interaction Transformer creates target/feature lags and\n calculates interactions between the lags (lag2 - lag1, for\n instance). The interaction is used as a new feature. - Lags Transformer\n The Lags Transformer creates target/feature lags, possibly over\n groups. Each lag is used as a new feature. Lag transformers may\n apply to categorical (strings) features or binary/multiclass\n string valued targets after they have been internally numerically\n encoded. - LinearLagsRegression Transformer\n The Linear Lags Regression transformer trains a linear model on\n the target or feature lags to predict the current target or\n feature value. The linear model prediction is used as a new\n feature. Categorical Transformers (String)\n- Cat Transformer\n The Cat Transformer sorts a categorical column in lexicographical\n order and uses the order index created as a new feature.", + "prompt_type": "plain" + }, + { + "output": "- CatOriginal Transformer\n The Categorical Original Transformer applies an identity\n transformation that leaves categorical features as they are. This\n transformer works with models that can handle non-numeric feature\n values. - CVCatNumEncode Transformer\n The Cross Validation Categorical to Numeric Encoding Transformer\n calculates an aggregation of a numeric column for each value in a\n categorical column (ex: calculate the mean Temperature for each\n City) and uses this aggregation as a new feature. - CVTargetEncode Transformer\n The Cross Validation Target Encoding Transformer calculates the\n mean of the response column for each value in a categorical column\n and uses this as a new feature. Cross Validation is used to\n calculate mean response to prevent overfitting. - Frequent Transformer\n The Frequent Transformer calculates the frequency for each value\n in categorical column(s) and uses this as a new feature.", + "prompt_type": "plain" + }, + { + "output": "- LexiLabelEncoder Transformer\n The Lexi Label Encoder sorts a categorical column in\n lexicographical order and uses the order index created as a new\n feature. - NumCatTE Transformer\n The Numeric Categorical Target Encoding Transformer calculates the\n mean of the response column for several selected columns. If one\n of the selected columns is numeric, it is first converted to\n categorical by binning. The mean of the response column is used as\n a new feature. Cross Validation is used to calculate mean response\n to prevent overfitting. - OneHotEncoding Transformer\n The One-hot Encoding transformer converts a categorical column to\n a series of Boolean features by performing one-hot encoding. The\n Boolean features are used as new features. If there are more than\n a specific number of unique values in the column, then they will\n be binned to the max number (10 by default) in lexicographical\n order. This value can be changed with the ohe_bin_list config.toml\n configuration option.", + "prompt_type": "plain" + }, + { + "output": "- WeightOfEvidence Transformer\n The Weight of Evidence Transformer calculates Weight of Evidence\n for each value in categorical column(s). The Weight of Evidence is\n used as a new feature. Weight of Evidence measures the \u201cstrength\u201d\n of a grouping for separating good and bad risk and is calculated\n by taking the log of the ratio of distributions for a binary\n response column. []\n This only works with a binary target variable. The likelihood\n needs to be created within a stratified k-fold if a fit_transform\n method is used. More information can be found here:\n http://ucanalytics.com/blogs/information-value-and-weight-of-evidencebanking-case/. Text Transformers (String)\n- BERT Transformer\n The Bidirectional Encoder Representations from Transformers (BERT)\n Transformer creates new features for each text column based on the\n pre-trained model embeddings and is ideally suited for datasets\n that contain additional important non-text features.", + "prompt_type": "plain" + }, + { + "output": "The GRU prediction is used as a new\n feature. Cross Validation is used when training the GRU model to\n prevent overfitting. - TextCharCNN Transformer\n The Text Character CNN Transformer trains a CNN TensorFlow model\n on character embeddings created from a text feature to predict the\n response column. The CNN prediction is used as a new feature. Cross Validation is used when training the CNN model to prevent\n overfitting. - TextCNN Transformer\n The Text CNN Transformer trains a CNN TensorFlow model on word\n embeddings created from a text feature to predict the response\n column. The CNN prediction is used as a new a feature. Cross\n Validation is used when training the CNN model to prevent\n overfitting. - TextLinModel Transformer\n The Text Linear Model Transformer trains a linear model on a\n TF-IDF matrix created from a text feature to predict the response\n column. The linear model prediction is used as a new feature.", + "prompt_type": "plain" + }, + { + "output": "- Text Transformer\n The Text Transformer tokenizes a text column and creates a TFIDF\n matrix (term frequency-inverse document frequency) or count (count\n of the word) matrix. When the number of TF-IDF features exceeds\n the config TOML value in the list text_gene_dim_reduction_choices,\n dimensionality reduction is performed using truncated SVD. Selected components of the TF-IDF/Count matrix are used as new\n features. - TextOriginal Transformer\n The TextOriginal Transformer performs no feature engineering on\n the text column. Note that this transformer is only available for\n models that have text feature support. Models that have text\n feature support are ImageAutoModel, FTRL, BERT, and unsupervised\n models, in addition to custom model recipes where _can_handle_text\n is set to True. Time Transformers (Date, Time)\n- Dates Transformer\n The Dates Transformer retrieves any date values, including:\n - Year\n - Quarter\n - Month\n - Day\n - Day of year\n - Week\n - Week day\n - Hour\n - Minute\n - Second\n- IsHoliday Transformer\n The Is Holiday Transformer determines if a date column is a\n holiday.", + "prompt_type": "plain" + }, + { + "output": "Creates a separate feature for holidays in\n the United States, United Kingdom, Germany, Mexico, and the\n European Central Bank. Other countries available in the python\n Holiday package can be added via the configuration file. Image Transformers\n- ImageOriginal Transformer\n The Image Original Transformer passes image paths to the model\n without performing any feature engineering. - ImageVectorizer Transformer\n The Image Vectorizer Transformer uses pre-trained ImageNet models\n to convert a column with an image path or URI to an embeddings\n (vector) representation that is derived from the last global\n average pooling layer of the model. Note: Fine-tuning of the pre-trained image models can be enabled\n with the image-model-fine-tune expert setting. Autoviz Recommendation Transformer\nThe Autoviz recommendation transformer applies the recommended\ntransformations obtained by\nvisualizing the dataset in Driverless AI .", + "prompt_type": "plain" + }, + { + "output": "The\nautoviz_recommended_transformation \nin the expert experiment settings list/control the transformation\napplied. The syntax is a dict of transformations from Autoviz\n{column_name: transformation} like\n{\"DIS\":\"log\",\"INDUS\":\"log\",\"RAD\":\"inverse\",\"ZN\":\"square_root\"}. The\nAutoviz recommendation transformer itself can be enabled or disabled\nfrom the expert panel by included_transformers \nconfig setting. This transformer is supported in\npython scoring pipelines and\nmojo scoring pipelines with Java Runtime (no C++ support\nat the moment). Example Transformations\nIn this section, we will describe some of the available transformations\nusing the example of predicting house prices on the example dataset. -------------------------------------------------------------------\n Date Built Square Footage Num Beds Num Baths State Price\n ------------ --------------- ---------- ----------- ------- -------\n 01/01/1920 1700 3 2 NY $700K\n -------------------------------------------------------------------\nFrequent Transformer\n- the count of each categorical value in the dataset\n- the count can be either the raw count or the normalized count\n -------------------------------------------------------------------\n Date Square Num Beds Num Baths S tate Price Fr\n Built Footage eq_State\n --------- ------------ -------- --------- ------ ------- ----------\n 01/ 1700 3 2 NY 70 4,500\n 01/1920 0,000 \n -------------------------------------------------------------------\nThere are 4,500 properties in this dataset with state = NY.", + "prompt_type": "plain" + }, + { + "output": "Truncated SVD Numeric Transformer\n- truncated SVD trained on selected numeric columns of the data\n- the components of the truncated SVD will be new features\n ---------------------------------------------------------------------\n Date Square Num Num St P rice TruncSVD_Price\n Built Footage Beds Baths ate _NumBeds_NumBaths_1\n -------- ---------- ------ ------- ----- ------ ---------------------\n 01/0 1700 3 2 NY 700 0.632\n 1/1920 ,000 \n ---------------------------------------------------------------------\nThe first component of the truncated SVD of the columns Price, Number of\nBeds, Number of Baths. Dates Transformer\n- get year, get quarter, get month, get day, get day of year, get\n week, get week day, get hour, get minute, get second\n --------------------------------------------------------------------\n Date Square Num Beds Num St Price Date\n Built Footage Baths ate Built_Month\n --------- ------------ -------- -------- ----- ------- -------------\n 01/ 1700 3 2 NY 70 1\n 01/1920 0,000 \n --------------------------------------------------------------------\nThe home was built in the month January.", + "prompt_type": "plain" + }, + { + "output": "*In order to prevent overfitting, Driverless AI calculates this average\non out-of-fold data using cross validation. Numeric to Categorical Target Encoding Transformer\n- numeric column converted to categorical by binning\n- cross validation target encoding done on the binned numeric column\n -------------------------------------------------------------------\n Date Square Num Num St P rice CV_TE\n Built Footage Beds Baths ate _SquareFootage\n -------- ----------- ------- -------- ----- ------ ----------------\n 01/0 1700 3 2 NY 700 345,000\n 1/1920 ,000 \n -------------------------------------------------------------------\nThe column Square Footage has been bucketed into 10 equally populated\nbins. This property lies in the Square Footage bucket 1,572 to 1,749. The average price of properties with this range of square footage is\n$345,000*. *In order to prevent overfitting, Driverless AI calculates this average\non out-of-fold data using cross validation.", + "prompt_type": "plain" + }, + { + "output": "Driverless AI release blogs\nLooking for the latest news on H2O Driverless AI releases? Find it here\nin a single convenient location. Driverless AI 1.10.4\nVersion 1.10.4 brings several new features that make it simpler for you\nto take advantage of the predictive modeling capabilities of DAI. For a\nfull list of changes and accompanying documentation, see version_1104. Read more: What's new in version 1.10.4\nDriverless AI GUI-based wizards\nSeveral new GUI-based wizards have been added to DAI as part of this\nrelease. - Experiment wizard: This wizard guides you step-by-step through to\n process of setting up and starting an experiment. For users who\n aren't already familiar with using DAI, the experiment wizard is a\n great way to start running experiments without having to worry about\n whether you've set up your experiment correctly. If you're an experienced user of DAI, you can still take advantage\n of this wizard to ensure that every aspect of your experiment has\n been configured correctly, especially in cases where you're\n attempting to set up more complex experiments.", + "prompt_type": "plain" + }, + { + "output": "To access the experiment wizard, go to the Experiments page and\n click New Experiment -> Wizard Setup. - Dataset join wizard: The process of joining two datasets together\n can sometimes be difficult, depending on the size and complexity of\n the datasets. This wizard guides you through this process so that\n you can be sure that the datasets are joined correctly. To access the Dataset Join Wizard, go to the Datasets page and\n click on the name of the dataset, then click Join Wizard from the\n list of options. - Leaderboard wizard: This wizard helps you set up and perform a\n business value analysis of all models in a project. To access the\n Leaderboard wizard, go to a project and click the Analyze Results\n button. []\nExpert Settings redesign\nThe Expert Settings window has been redesigned to make it simpler to\nnavigate and locate specific settings that are relevant to your\nexperiment. By clicking the Filter by Tags button, you can now also\nfilter the list of available settings by specific tags.", + "prompt_type": "plain" + }, + { + "output": "LDAP Authentication Example\nThis section describes how to enable Lightweight Directory Access\nProtocol in Driverless AI. The available parameters can be specified as\nenvironment variables when starting the Driverless AI Docker image, or\nthey can be set via the config.toml file for native installs. Upon\ncompletion, all the users in the configured LDAP should be able to log\nin to Driverless AI and run experiments, visualize datasets, interpret\nmodels, etc. Note: Driverless AI does not support LDAP client auth. If you have LDAP\nclient auth enabled, then the Driverless AI LDAP connector will not\nwork. Description of Configuration Attributes\nThe following options can be specified when enabling LDAP\nauthentication. - ldap_server: The LDAP server domain or IP. - ldap_port: The LDAP server port. - ldap_bind_dn: The complete distinguished name (DN) of the LDAP bind\n user. - ldap_bind_password: The password for the LDAP bind. - ldap_tls_file: The Transport Layer Security (TLS) certificate file\n location.", + "prompt_type": "plain" + }, + { + "output": "- ldap_search_base: The location in the Directory Information Tree\n (DIT) where the search will start. - ldap_search_filter: A string that describes what you are searching\n for. You can use Python substitution to have this constructed\n dynamically. (Only {{DAI_USERNAME}} is supported. For example,\n \"(&(objectClass=person)(cn:dn:={{DAI_USERNAME}}))\".) - ldap_search_attributes: LDAP attributes to return from search. - ldap_user_name_attribute=\"uid\": Specify the key to find user name. LDAP without SSL\nThe following examples describe how to enable LDAP without SSL when\nrunning Driverless AI in the Docker image or through native installs. If\nthe configuration and authentication authentication are successful, the\nuser can access Driverless AI and run experiments, visualize datasets,\ninterpret models, etc. Docker Image Installs\nThe following example shows how to configure LDAP without SSL when\nstarting the Driverless AI Docker image. nvidia-docker run \\\n --pid=host \\\n --init \\\n --rm \\\n --shm-size=256m \\\n -p 12345:12345 \\\n -u `id -u`:`id -g` \\\n -e DRIVERLESS_AI_ENABLED_FILE_SYSTEMS=\"file,s3,hdfs\" \\\n -e DRIVERLESS_AI_AUTHENTICATION_METHOD=\"ldap\" \\\n -e DRIVERLESS_AI_LDAP_USE_SSL=\"false\" \\\n -e DRIVERLESS_AI_LDAP_SERVER=\"ldap.forumsys.com\" \\\n -e DRIVERLESS_AI_LDAP_PORT=\"389\" \\\n -e DRIVERLESS_AI_LDAP_SEARCH_BASE=\"dc=example,dc=com\" \\\n -e DRIVERLESS_AI_LDAP_BIND_DN=\"cn=read-only-admin,dc=example,dc=com\" \\ \n -e DRIVERLESS_AI_LDAP_BIND_PASSWORD=password \\ \n -e DRIVERLESS_AI_LDAP_SEARCH_FILTER=\"(&(objectClass=person)(cn:dn:={{DAI_USERNAME}}))\" \\\n -e DRIVERLESS_AI_LDAP_USER_NAME_ATTRIBUTE=\"uid\" \\\n -v `pwd`/data:/data \\\n -v `pwd`/log:/log \\\n -v `pwd`/license:/license \\\n -v `pwd`/tmp:/tmp \\\n h2oai/dai-ubi8-x86_64:|tag|\nNative Installs\nThe following example shows how to configure LDAP without SSL when\nstarting Driverless AI from a native install.", + "prompt_type": "plain" + }, + { + "output": "1. Export the Driverless AI config.toml file or add it to ~/.bashrc. For example:\n2. Enable LDAP authentication without SSL. 3. Start (or restart) Driverless AI. Note that the command used to\n start Driverless AI varies depending on your install type. If authentication is successful, the user can access Driverless AI and\nrun experiments, visualize datasets, interpret models, etc. LDAP with SSL\nThese examples show how to enable LDAP authentication with SSL and\nadditional parameters that can be specified as environment variables\nwhen starting the Driverless AI Docker image, or they can be set via the\nconfig.toml file for native installs. Upon completion, all the users in\nthe configured LDAP should be able to log in to Driverless AI and run\nexperiments, visualize datasets, interpret models, etc. Docker Image Installs\nSpecify the following LDAP environment variables when starting the\nDriverless AI Docker image. This example enables LDAP authentication and\nshows how to specify additional options enabling SSL.", + "prompt_type": "plain" + }, + { + "output": "Leaderboards\nDriverless AI provides a feature to automatically create leaderboards. The Create Leaderboard feature runs multiple diverse experiments that\nprovide an overview of the dataset. This feature also provides you with\nrelevant information for deciding on complexity, accuracy, size, and\ntime tradeoffs when putting models into production. Refer to the\nexpert-settings topic for information on expert settings that can be\nused to control this feature. For more information on the default models\nbuilt for a leaderboard, see leaderboard_models. The built models are placed under the projects page and can be\nsimultaneously scored on the test dataset and compared. Creating a Leaderboard\nCreating a Leaderboard is similar to running a\nnew experiment . Refer to the experiment_settings,\nexpert-settings, and scorers topics for more information about options\nyou can set when running an experiment. 1. On the Datasets page, select the dataset that you want to use for\n the experiment, then click Predict\n or\n On the Experiments page, click New Experiment, then select the\n dataset that you want to use.", + "prompt_type": "plain" + }, + { + "output": "Specify whether to include dropped columns, a validation dataset,\n and a testing dataset. 3. Specify the Target column and optionally a fold column, weight\n column, and time column. 4. Optionally specify expert-settings. 5. Optionally adjust the Accuracy/Time/Interpretability knobs. 6. Optionally override the default scorer. 7. Optionally override the Classification/Regression setting. 8. Optionally specify to make the experiments reproducible and/or\n whether to enable GPUs. 9. Click the Create Leaderboard button. []\nDriverless AI creates a new, randomly named project and begins\nautomatically training models using the queuing mechanism. The new\nproject is given the description \"Automatic Leader Board\". After all\nmodels have been built, you can\nscore each experiment and\ncompare experiments , as described in the\nprojects topic. []\nLeaderboard Models\nWhen creating a leaderboard, the models that are built will vary based\non whether you are running a regular experiment or a time-series\nexperiment.", + "prompt_type": "plain" + }, + { + "output": "You can omit models from\nbeing built by disabling those models in the expert-settings. ---------------------------------------------------------------------------\n Model Ac Time Interpre Config Overrides\n curacy tability \n ------------------ -------- -------- ---------- ---------------------------\n Few Features 1 1 10 max_orig _cols_selected=5\n Decision Tree nfeatures_max=10\n Simple LightGBM 1 1 10 \n Constant Baseline 1 1 10 max_orig _cols_selected=1\n Single Decision Spe Spe S pecified fixed_ ensemble_level=0\n Tree cified cified in ex \n in expe in expe periment \n riment riment \n Single GLM Spe Spe S pecified fixed_ ensemble_level=0\n cified cified in ex \n in expe in expe periment \n riment riment \n Complex LightGBM 7 Spe S pecified \n Ensemble cified in ex \n in expe periment \n riment \n Few Features Spe Spe S pecified max_orig _cols_selected=5\n Single LightGBM cified cified in ex nfeatures_max=10\n in expe in expe periment fixed_ ensemble_level=0\n riment riment \n Default Single Spe Spe S pecified fixed_ ensemble_level=0\n LightGBM cified cified in ex \n in expe in expe periment \n riment riment \n Default Spe Spe S pecified \n XGBoost/LightGBM cified cified in ex \n Ensemble in expe in expe periment \n riment riment \n Single FTRL Spe Spe S pecified fixed_ ensemble_level=0\n cified cified in ex \n in expe in expe periment \n riment riment \n Single TensorFlow Spe Spe S pecified fixed_ ensemble_level=0\n cified cified in ex \n in expe in expe periment \n riment riment \n ---------------------------------------------------------------------------\nTime Series Experiments\nDriverless AI will build one time-series experiment using the default\nDriverless AI settings.", + "prompt_type": "plain" + }, + { + "output": "Experiments\n\nexperiment-settings expert-settings scorers experiment-new\nexperiment-sharing experiment-completed experiment-insights\nexperiment-scores experiment-graphs experiment-summary\nexperiment-performance\n\ndiagnosing view-experiments leaderboard projects", + "prompt_type": "plain" + }, + { + "output": "Imputation in Driverless AI\n\nThe impute feature lets you fill in missing values with substituted\nvalues. Missing values can be imputed based on the column's mean,\nmedian, minimum, maximum, or mode value. You can also impute based on a\nspecific percentile or by a constant value.\n\nThe imputation is precomputed on all data or inside the pipeline (based\non what's in the train split).\n\nThe following guidelines should be followed when performing imputation:\n\n- For constant imputation on numeric columns, constant must be\n numeric.\n- For constant imputation on string columns, constant must be a\n string.\n- For percentile imputation, the percentage value must be between 0\n and 100.\n\nNotes:\n\n- This feature is experimental.\n- Time columns cannot be imputed.\n\nEnabling Imputation\n\nImputation is disabled by default. It can be enabled by setting", + "prompt_type": "plain" + }, + { + "output": "enable_imputation=truein the config.toml (for native installs) or via theDRIVERLESS_AI_ENABLE_IMPUTATION=true``\nenvironment variable (Docker image installs). This enables imputation\nfunctionality in transformers.\n\nRunning an Experiment with Imputation\n\nOnce imputation is enabled, you will have the option when running an\nexperiment to add imputation columns.\n\n1. Click on Columns Imputation in the Experiment Setup page.\n\n2. Click on Add Imputation in the upper-right corner.\n3. Select the column that contains missing values you want to impute.\n4. Select the imputation type. Available options are:\n\n5. Optionally allow Driverless AI to compute the imputation value\n during validation instead of using the inputted imputed value.\n6. Click Save when you are done.\n\n7. At this point, you can add additional imputations, delete the\n imputation you just created, or close this form and return to the\n experiment. Note that each column can have only a single imputation.", + "prompt_type": "plain" + }, + { + "output": "FAQ\nH2O Driverless AI is an artificial intelligence (AI) platform for\nautomatic machine learning. Driverless AI automates some of the most\ndifficult data science and machine learning workflows such as feature\nengineering, model validation, model tuning, model selection and model\ndeployment. It aims to achieve highest predictive accuracy, comparable\nto expert data scientists, but in much shorter time thanks to end-to-end\nautomation. Driverless AI also offers automatic visualizations and\nmachine learning interpretability (MLI). Especially in regulated\nindustries, model transparency and explanation are just as important as\npredictive performance. Modeling pipelines (feature engineering and\nmodels) are exported (in full fidelity, without approximations) both as\nPython modules and as Java standalone scoring artifacts. This section provides answers to frequently asked questions. If you have\nadditional questions about using Driverless AI, post them on Stack\nOverflow using the driverless-ai tag at\nhttp://stackoverflow.com/questions/tagged/driverless-ai.", + "prompt_type": "plain" + }, + { + "output": "If you have not signed up for the H2O.ai\nCommunity Slack workspace, you can do so here:\nhttps://www.h2o.ai/community/. General\n- How is Driverless AI different than any other black box ML\n algorithm? - How often do new versions come out? Installation/Upgrade/Authentication\n- How can I change my username and password? - Can Driverless AI run on CPU-only machines? - How can I upgrade to a newer version of Driverless AI? - What kind of authentication is supported in Driverless AI? - How can I automatically turn on persistence each time the GPU system\n reboots? - How can I start Driverless AI on a different port than 12345? - Can I set up TLS/SSL on Driverless AI? - Can I set up TLS/SSL on Driverless AI in AWS? - Why do I receive a \"package dai-.x86_64 does not verify: no\n digest\" error during the installation? <#no-digest>__\n- I received a \"Must have exactly one OpenCL platform 'NVIDIA CUDA'\"\n error. How can I fix that? - Is it possible for multiple users to share a single Driverless AI\n instance?", + "prompt_type": "plain" + }, + { + "output": "- How can I retrieve a list of Driverless AI users? - Start of Driverless AI fails on the message \"Segmentation fault\n (core dumped)\" on Ubuntu 18/RHEL 7.6. How can I fix this? - Which Linux systems does Driverless AI support? Data\n- Is there a file size limit for datasets? - How can I import CSV files that use UTF-8 encoding into Excel? - Can a byte order mark be used when writing CSV files with datatable? - Which version of Longhorn is supported by Driverless AI? - Is it possible to download a transformed test dataset in Driverless\n AI? Connectors\n- Why can't I import a folder as a file when using a data connector on\n Windows? - I get a ClassNotFoundException error when I try to select a JDBC\n connection. How can I fix that? - I get a org.datanucleus.exceptions.NucleusUserException: Please\n check your CLASSPATH and plugin specification error when attempting\n to connect to hive. How can I fix that? - I get a \"Permission Denied\" error during Hive import.", + "prompt_type": "plain" + }, + { + "output": "Recipes\n- Where can I retrieve H2O's custom recipes? - How can I create my own custom recipe? - Are MOJOs supported for experiments that use custom recipes? - How can I use BYOR in my airgapped installation? - When enabling recipes in Driverless AI, can I install Python\n packages from my organization's internal Python package index? Experiments\n- How much memory does Driverless AI require in order to run\n experiments? - How many columns can Driverless AI handle? - How should I use Driverless AI if I have large data? - How does Driverless AI detect the ID column? - Can Driverless AI handle data with missing values/nulls? - How does Driverless AI deal with categorical variables? What if an\n integer column should really be treated as categorical? - How are outliers handled? - If I drop several columns from the Train dataset, will Driverless AI\n understand that it needs to drop the same columns from the Test\n dataset? - Does Driverless AI treat numeric variables as categorical variables?", + "prompt_type": "plain" + }, + { + "output": "- Why do my selected algorithms not show up in the Experiment Preview? - How can we turn on TensorFlow Neural Networks so they are evaluated? - Does Driverless AI standardize the data? - What objective function is used in XGBoost? - Does Driverless AI perform internal or external validation? - How does Driverless AI prevent overfitting? - How does Driverless AI avoid the multiple hypothesis (MH) problem? - How does Driverless AI suggest the experiment settings? - What happens when I set Interpretability and Accuracy to the same\n number? - Can I specify the number of GPUs to use when running Driverless AI? - How can I create the simplest model in Driverless AI? - Why is my experiment suddenly slow? - When I run multiple experiments with different seeds, why do I see\n different scores, runtimes, and sizes on disk in the Experiments\n listing page? - Why does the final model performance appear to be worse than\n previous iterations? - How can I find features that may be causing data leakages in my\n Driverless AI model?", + "prompt_type": "plain" + }, + { + "output": "- How can I see all the performance metrics possible for my\n experiment? - What if my training/validation and testing data sets come from\n different distributions? - Does Driverless AI handle weighted data? - How does Driverless AI handle fold assignments for weighted data? - Why do I see that adding new features to a dataset deteriorates the\n performance of the model? - How does Driverless AI handle imbalanced data for binary\n classification experiments? - How is feature importance calculated in Driverless AI? - I want to have only one LightGBM model in the final pipeline. How\n can I achieve this? - I want to have only one LightGBM model and no FE. How can I do this? - What is fast approximation in Driverless AI? - When should fast approximation be turned off? - Why does the confusion matrix sometimes show decimals instead of\n whole numbers? - Is data sampling for multiclass use cases supported? Feature Transformations\n- Where can I get details of the various transformations performed in\n an experiment?", + "prompt_type": "plain" + }, + { + "output": "- Why are predicted probabilities not available when I run an\n experiment without ensembling? Deployment\n- What drives the size of a MOJO? - Are MOJOs thread safe? - Running the scoring pipeline for my MOJO is taking several hours. How can I get this to run faster? - Why have I encountered a \"Best Score is not finite\" error? Time Series\n- What if my data has a time dependency? - What is a lag, and why does it help? - Why can't I specify a validation data set for time-series problems? Why do you look at the test set for time-series problems\n- Why does the gap between train and test matter? Is it because of\n creating the lag features on the test set? - In regards to applying the target lags to different subsets of the\n time group columns, are you saying Driverless AI perform\n auto-correlation at \"levels\" of the time series? For example,\n consider the Walmart dataset where I have Store and Dept (and my\n target is Weekly Sales). Are you saying that Driverless AI checks\n for auto-correlation in Weekly Sales based on just Store, just Dept,\n and both Store and Dept?", + "prompt_type": "plain" + }, + { + "output": "- What is the logic behind the selectable numbers for forecast horizon\n length? - Assume that in my Walmart dataset, all stores provided data at the\n week level, but one store provided data at the day level. What would\n Driverless AI do? - Assume that in my Walmart dataset, all stores and departments\n provided data at the weekly level, but one department in a specific\n store provided weekly sales on a bi-weekly basis (every two weeks). What would Driverless AI do? - Why does the number of weeks that you want to start predicting\n matter? - Are the scoring components of time series sensitive to the order in\n which new pieces of data arrive? I.e., is each row independent at\n scoring time, or is there a real-time windowing effect in the\n scoring pieces? - What happens if the user, at predict time, gives a row with a time\n value that is too small or too large? - What's the minimum data size for a time series recipe? - How long must the training data be compared to the test data?", + "prompt_type": "plain" + }, + { + "output": "- Can the time information be distributed across multiple columns in\n the input data (such as [year, day, month]? - What type of modeling approach does Driverless AI use for time\n series? - What's the idea behind exponential weighting of moving averages? Logging\n- How can I reduce the size of the Audit Logger? General\nHow is Driverless AI different than any other black box ML algorithm? How often do new versions come out? Installation/Upgrade/Authentication\nHow can I change my username and password? Can Driverless AI run on CPU-only machines? How can I upgrade to a newer version of Driverless AI? What kind of authentication is supported in Driverless AI? How can I automatically turn on persistence each time the GPU system\nreboots? How can I start Driverless AI on a different port than 12345? Can I set up TLS/SSL on Driverless AI? Can I set up TLS/SSL on Driverless AI in AWS? I received a \"package dai-.x86_64 does not verify: no digest\"\nerror during the installation.", + "prompt_type": "plain" + }, + { + "output": "I received a \"Must have exactly one OpenCL platform 'NVIDIA CUDA'\"\nerror. How can I fix that? Is it possible for multiple users to share a single Driverless AI\ninstance? Can multiple Driverless AI users share a GPU server? How can I retrieve a list of Driverless AI users? Start of Driverless AI fails on the message ``Segmentation fault (core\ndumped)`` on Ubuntu 18/RHEL 7.6. How can I fix this? Which Linux systems does Driverless AI support? Data\nIs there a file size limit for datasets? How can I import CSV files that use UTF-8 encoding into Excel? Can a byte order mark be used when writing CSV files with datatable? Which version of Longhorn is supported by Driverless AI? Is it possible to download a transformed test dataset in Driverless AI? Connectors\nWhy can't I import a folder as a file when using a data connector on\nWindows? I get a ClassNotFoundException error when I try to select a JDBC\nconnection. How can I fix that? I get a org.datanucleus.exceptions.NucleusUserException: Please check\nyour CLASSPATH and plugin specification error when attempting to connect\nto Hive.", + "prompt_type": "plain" + }, + { + "output": "I get a \"Permission Denied\" error during Hive import. How do I fix this? Recipes\nWhere can I retrieve H2O's custom recipes? How can I create my own custom recipe? Are MOJOs supported for experiments that use custom recipes? How can I use BYOR in my airgapped installation? When enabling recipes in Driverless AI, can I install Python packages\nfrom my organization's internal Python package index? Yes\u2014you can use the pip_install_options\n TOML option to specify your organization's\n internal Python package index as follows:\n pip_install_options=\"['--extra-index-url', 'http://my-own-repo:port']\"\n For more information on the --extra-index-url pip install\n option, refer to the official pip documentation. Experiments\nHow much memory does Driverless AI require in order to run experiments? How many columns can Driverless AI handle? How should I use Driverless AI if I have large data? How does Driverless AI detect the ID column? Can Driverless AI handle data with missing values/nulls?", + "prompt_type": "plain" + }, + { + "output": "What if an\ninteger column should really be treated as categorical? How are outliers handled? If I drop several columns from the Train dataset, will Driverless AI\nunderstand that it needs to drop the same columns from the Test dataset? Does Driverless AI treat numeric variables as categorical variables? Which algorithms are used in Driverless AI? Why do my selected algorithms not show up in the Experiment Preview? When changing the algorithms used via Expert Settings > Model and Expert\nSettings > Recipes, you may notice in the Experiment Preview that those\nchanges are not applied. Driverless AI determines whether to include\nmodels and/or recipes based on a hierarchy of those expert settings as\nwell as data types (numeric, categorical, text, image, etc.) and system\nproperties (GPUs, multiple GPUs, etc.). []\n- Setting an Algorithm to \"OFF\" in Expert Settings: If an algorithm is\n turned OFF in Expert Settings (for example, GLM Models) when\n running, then that algorithm will not be included in the experiment.", + "prompt_type": "plain" + }, + { + "output": "- Algorithms Not Specified as \"OFF\" and Included from Recipes: If a\n Driverless AI algorithm is specified as either \"AUTO\" or \"ON\" and\n additional models are selected for the experiment in the Include\n specific models option, than those algorithms may or may not be\n included in the experiment. Driverless AI will determine the\n algorithms to use based on the data and experiment type. - To show warnings in the preview for which models were not used, set\n show_inapplicable_models_preview = true in config.toml\nWhy do my selected transformers not show up in the Experiment Preview? When changing the transformers used via Expert Settings > Transformers\nand Expert Settings > Recipes, you may notice in the Experiment Preview\nthat those changes are not applied. Driverless AI determines whether to\ninclude transformers can be used based upon data types (numeric,\ncategorical, text, image, etc.) and system properties (GPUs, multiple\nGPUs, etc.). - Transformers Not Included from Recipes (BYOR): If a transformer from\n a custom recipe is not selected for the experiment in the Include\n specific transformers option, then that transformer will not be\n included in the experiment.", + "prompt_type": "plain" + }, + { + "output": "Does Driverless AI standardize the data? What objective function is used in XGBoost? Does Driverless AI perform internal or external validation? How does Driverless AI prevent overfitting? How does Driverless AI avoid the multiple hypothesis (MH) problem? How does Driverless AI suggest the experiment settings? What happens when I set Interpretability and Accuracy to the same\nnumber? Can I specify the number of GPUs to use when running Driverless AI? How can I create the simplest model in Driverless AI? For information on why your experiment isn't performing as expected, see\nexperiment_performance. When I run multiple experiments with different seeds, why do I see\ndifferent scores, runtimes, and sizes on disk in the Experiments listing\npage? Why does the final model performance appear to be worse than previous\niterations? How can I find features that may be causing data leakages in my\nDriverless AI model? How can I see the performance metrics on the test data? How can I see all the performance metrics possible for my experiment?", + "prompt_type": "plain" + }, + { + "output": "Does Driverless AI handle weighted data? How does Driverless AI handle fold assignments for weighted data? Why do I see that adding new features to a dataset deteriorates the\nperformance of the model? How does Driverless AI handle imbalanced data for binary classification\nexperiments? How is feature importance calculated in Driverless AI? I want to have only one LightGBM model in the final pipeline. How can I\ndo this? I want to have only one LightGBM model and no FE. How can I do this? What is fast approximation in Driverless AI? When should fast approximation be turned off? Why does the confusion matrix sometimes show decimals instead of whole\nnumbers? Is data sampling for multiclass use cases supported? Feature Transformations\nWhere can I get details of the various transformations performed in an\nexperiment? Predictions\nHow can I download the predictions onto the machine where Driverless AI\nis running? Why are predicted probabilities not available when I run an experiment\nwithout ensembling?", + "prompt_type": "plain" + }, + { + "output": "Are MOJOs thread safe? Running the scoring pipeline for my MOJO is taking several hours. How\ncan I get this to run faster? Why have I encountered a \"Best Score is not finite\" error? Time Series\nWhat if my data has a time dependency? What is a lag, and why does it help? Why can't I specify a validation data set for time-series problems? Why\ndo you look at the test set for time-series problems\nWhy does the gap between train and test matter? Is it because of\ncreating the lag features on the test set? In regards to applying the target lags to different subsets of the time\ngroup columns, are you saying Driverless AI perform auto-correlation at\n\"levels\" of the time series? For example, consider the Walmart dataset\nwhere I have Store and Dept (and my target is Weekly Sales). Are you\nsaying that Driverless AI checks for auto-correlation in Weekly Sales\nbased on just Store, just Dept, and both Store and Dept? How does Driverless AI detect the time period? What is the logic behind the selectable numbers for forecast horizon\nlength?", + "prompt_type": "plain" + }, + { + "output": "What would\nDriverless AI do? Assume that in my Walmart dataset, all stores and departments provided\ndata at the weekly level, but one department in a specific store\nprovided weekly sales on a bi-weekly basis (every two weeks). What would\nDriverless AI do? Why does the number of weeks that you want to start predicting matter? Are the scoring components of time series sensitive to the order in\nwhich new pieces of data arrive? I.e., is each row independent at\nscoring time, or is there a real-time windowing effect in the scoring\npieces? What happens if the user, at predict time, gives a row with a time value\nthat is too small or too large? What's the minimum data size for a time series recipe? How long must the training data be compared to the test data? How does the time series recipe deal with missing values? Can the time information be distributed across multiple columns in the\ninput data (such as [year, day, month]? What type of modeling approach does Driverless AI use for time series?", + "prompt_type": "plain" + }, + { + "output": "Dask Multinode Training (Alpha)\nDriverless AI can be configured to run in a multinode worker mode where\neach worker has a Dask CPU worker and (if the worker has GPUs) a Dask\nGPU worker. The main node in this setup has a Dask scheduler. This\ndocument describes the Dask training process and how to configure it. Before setting up Dask multinode training, you must configure\nRedis Multinode training in Driverless AI . Note: For Dask multinode examples, see\nDask Multinode examples . Understanding Dask Multinode Training\nDask multinode training in Driverless AI can be used to run a single\nexperiment that trains across the multinode cluster. It is effective in\nsituations where you need to run and complete a single experiment with\nlarge amounts of data or a large hyper-parameter space search. The Dask\ndistributed machines can be CPU only or CPU + GPU, with Dask experiments\nusing resources accordingly. For more information on Dask multinode design concepts, see\nhttps://dask.org/.", + "prompt_type": "plain" + }, + { + "output": "If you are interested in using Dask multinode configurations,\n contact support@h2o.ai. - Dask multinode training requires the transfer of data between\n several different workers. For example, if an experiment uses the\n Dask cluster, it must distribute data among cluster workers to be\n trained by XGBoost or Optuna hyper-parameter search. - Dask tasks are scheduled on a first in, first out (FIFO) basis. - Users can enable Dask multinode training on a per-experiment basis\n from the expert settings. - If an experiment chooses to use the Dask cluster (default is true if\n applicable), then a single experiment runs on the entire multinode\n cluster. For this reason, using a large number of commodity-grade\n hardware is not useful in the context of Dask multinode. - By default, Dask models are not selected because they can be less\n efficient for small data than non-Dask models. Set\n show_warnings_preview = true in the config.toml to display warnings\n whenever a user does not select Dask models and the system is\n capable of using them.", + "prompt_type": "plain" + }, + { + "output": "lightgbm_listen_port. Edit the Driverless AI config.toml ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ After Driverless AI is installed, edit the following config option in the config.toml file. .. code:: bash # Dask settings -- set the IP address of the Dask server. Same as the IP of the main Driverless AI node, and usually same as the Redis/MinIO IP dask_server_ip = \"\" For thedask_server_ipparameter, Driverless AI automatically tries the Redis, MinIO, and local IP addresses to see if it can find the Dask scheduler. In such a case, thedask_server_ip``\nparameter does not have to be set.\n\nOn EC2 systems, if the main server is", + "prompt_type": "plain" + }, + { + "output": "http://ec2-52-71-252-183.compute-1.amazonaws.com:12345/``, it is\nrecommended to use the nslookup-resolved IP instead of the EC2 IP due to\nthe way Dask and XGBoost (with rabit) operate. For example,", + "prompt_type": "plain" + }, + { + "output": "nslookup ec2-52-71-252-183.compute-1.amazonaws.com`` gives", + "prompt_type": "plain" + }, + { + "output": "10.10.4.103. Redis, MinIO, and Dask subsequently use that as the IP in the config.toml file. Ifdask_server_ipis not specified, its value is automatically inferred from Redis or MinIO. Once the worker node starts, use the Driverless AI server IP and Dask dashboard port(s) to view the status of the Dask cluster. .. figure:: images/dask_dashboard.png :alt: Description of Configuration Attributes --------------------------------------- General Dask Settings ~~~~~~~~~~~~~~~~~~~~~ -enable_dask_cluster: Specifies whether to enable a Dask worker on each multinode worker. -dask_server_ip: IP address used by server for Dask and Dask CUDA communications. CPU Cluster Dask Settings ~~~~~~~~~~~~~~~~~~~~~~~~~ -dask_server_port: Port used by server for Dask communications. -dask_dashboard_port: Dask dashboard port for Dask diagnostics. -dask_cluster_kwargs: Set Dask CUDA/RAPIDS cluster settings for single node workers. -dask_scheduler_env: Set Dask scheduler env. -dask_scheduler_options: Set Dask scheduler command-line options.", + "prompt_type": "plain" + }, + { + "output": "-dask_worker_options: Set Dask worker command-line options. -dask_protocol: Protocol used for Dask communications. -dask_worker_nprocs: Number of processes per Dask worker. -dask_worker_nthreads: Number of threads per process for Dask. GPU CUDA Cluster Dask Settings ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -dask_cuda_server_port: Port using by server for Dask cuda communications. -dask_cuda_dashboard_port: Dask dashboard port for dask_cuda diagnostics. -dask_cuda_cluster_kwargs: Set Dask CUDA/RAPIDS cluster settings for single node workers. -dask_cuda_scheduler_env: Set Dask CUDA scheduler env. -dask_cuda_scheduler_options: Set Dask CUDA scheduler command-line options. -dask_cuda_worker_options: Set Dask CUDA worker options. -dask_cuda_worker_env: Set Dask CUDA worker environment variables. -dask_cuda_protocol: Protocol using for dask cuda communications. -dask_cuda_worker_nthreads: Number of threads per process for dask_cuda. Other Cluster Dask Settings ~~~~~~~~~~~~~~~~~~~~~~~~~~~ -lightgbm_listen_port: LightGBM local listening port when using Dask with LightGBM.", + "prompt_type": "plain" + }, + { + "output": "**Notes**: - The same steps can be used for a local Dask cluster on a single node with multiple GPUs. - If have Dask cluster but only want to use the worker node's GPUs, set :ref:`use_dask_cluster ` to False. - If have Dask cluster or single dask node available as single user, one can set :ref:`exclusive_mode ` to \"max\" in expert settings to maximize usage of workers in cluster. User Experiment Dask Settings ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -use_dask_cluster: Whether to use Dask cluster (True) or only local cluster for multi-GPU case (False). -enable_xgboost_rapids: :ref:`Enable RAPIDS-cudf extensions to XGBoost GBM/Dart. ` (1) -enable_xgboost_gbm_dask: :ref:`Enable dask_cudf (multi-GPU) XGBoost GBM. ` (2) -enable_lightgbm_dask: :ref:`Enable Dask (multi-node) LightGBM. ` (*Experimental*) (2) -enable_xgboost_dart_dask: :ref:`Enable dask_cudf (multi-GPU) XGBoost Dart.", + "prompt_type": "plain" + }, + { + "output": "H2O AI Feature Store Setup\nYou can use the H2O AI Feature Store to store, update, and share the\nfeatures data scientists, developers, and engineers need to build AI\nmodels. This page describes how to configure Driverless AI to work with\nthe H2O AI Feature Store. Note: For more information on the H2O AI Feature Store, refer to the\nofficial documentation. Description of relevant configuration attributes\nThe following are descriptions of the relevant configuration attributes\nwhen enabling the H2O AI Feature Store data connector:\n- enabled_file_systems: A list of file systems you want to enable. To\n enable the Feature Store data connector, feature_store must be added\n to this list of data sources. - feature_store_endpoint_url: A URL that points to the Feature Store\n server. - feature_store_enable_tls: To enable TLS communication between DAI\n and the Feature Store server, set this to true. - feature_store_access_token_scopes: A space-separated list of access\n token scopes used by the Feature Store connector for authentication.", + "prompt_type": "plain" + }, + { + "output": "Driverless AI k-LIME MOJO Reason Code Pipeline - Java Runtime\nFor completed MLI experiments, users can download the k-LIME MOJO. The\nk-LIME MOJO Reason Code Pipeline is a reason code engine that can be\ndeployed in any Java environment to generate reason codes in real time. To obtain Java runtime MOJO for K-LIME reason codes, download K-Lime\nMOJO reason code Pipeline and for Python scoring pipeline for K-LIME\nreason codes and Shapley, download the Scoring pipeline. Note\nThe k-LIME MOJO Reason Code pipeline does not support multinomial,\nnatural language processing (NLP), and time series models. []\nPrerequisites\nThe following are required in order to run the k-LIME MOJO reason code\npipeline. - Java 7 runtime (JDK 1.7) or newer. Note: Using Java 11+ is\n recommended due to a bug in Java. For more information, see\n https://bugs.openjdk.java.net/browse/JDK-8186464. - Valid Driverless AI license. You can download the license.sig file\n from the machine hosting Driverless AI (usually in the license\n folder).", + "prompt_type": "plain" + }, + { + "output": "- mojo2-runtime.jar file. This is available from the top navigation\n menu in the Driverless AI UI and in the downloaded mojo-pipeline.zip\n file for an experiment. License Specification\nDriverless AI requires a license to be specified in order to run any\nDAI/MLI MOJO. The license can be specified with one of the following:\n- An environment variable:\n - DRIVERLESS_AI_LICENSE_FILE: Path to the Driverless AI license\n file, or\n - DRIVERLESS_AI_LICENSE_KEY: The Driverless AI license key\n (Base64 encoded string)\n- A system property of JVM (-D option):\n - ai.h2o.mojos.runtime.license.file: Path to the Driverless AI\n license file, or\n - ai.h2o.mojos.runtime.license.key: The Driverless AI license\n key (Base64 encoded string)\n- An application classpath:\n - The license is loaded from a resource called /license.sig. - The default resource name can be changed with the JVM system\n property ai.h2o.mojos.runtime.license.filename.", + "prompt_type": "plain" + }, + { + "output": "On the completed MLI page, click on the Download k-LIME MOJO Reason\n Code Pipeline button. 2. To run the Java application for reason code generation directly, use\n the following command:\n java -Dai.h2o.mojos.runtime.license.file=license.sig -cp mojo2-runtime.jar ai.h2o.mojos.ExecuteMojo klime_mojo.zip example.csv\nk-LIME MOJO Command Line Options\nExecuting the Java Runtime\nThe following are two general examples of how the Java runtime can be\nexecuted from the command-line. - With additional libraries:\n- Without additional libraries:\nSo, for example, the sys.ai.h2o.mojos.parser.csv.separator option can be\npassed with the following:\n java -Dsys.ai.h2o.mojos.parser.csv.separator='|' -Dai.h2o.mojos.runtime.license.file=../license.sig -jar mojo2-runtime.jar pipeline.mojo input.csv output.csv\nSimilarly, the sys.ai.h2o.mojos.exposedInputs option can be passed with:\n java -Xmx5g -Dsys.ai.h2o.mojos.exposedInputs=ALL -Dai.h2o.mojos.runtime.license.file= -cp mojo2-runtime.jar ai.h2o.mojos.ExecuteMojo pipeline.mojo example.csv\nNote: Data can be streamed from stdin to stdout by replacing both the\ninput and output CSV arguments with `-`.", + "prompt_type": "plain" + }, + { + "output": "This value\n defaults to True. - sys.ai.h2o.mojos.parser.csv.stripCrFromLastColumn (boolean)\n -Workaround for issues relating to the OpenCSV parser. This value\n defaults to True. - sys.ai.h2o.mojos.parser.csv.quotedHeaders (boolean) - Specify\n whether to quote header names in the output CSV file. This value\n defaults to False. - sys.ai.h2o.mojos.parser.csv.separator (char) - Specify the separator\n used between CSV fields. The special value `TAB` can be used for\n tab-separated values. This value defaults to `,`. - sys.ai.h2o.mojos.parser.csv.escapeChar (char) - Specify the escape\n character for parsing CSV fields. If this value is not specified,\n then no escaping is attempted. This value defaults to an empty\n string. - sys.ai.h2o.mojos.parser.csv.batch (int) - Specify the number of\n input records brought into memory for batch processing (determines\n consumed memory). This value defaults to 1000. - sys.ai.h2o.mojos.pipelineFormats (string) - When multiple formats\n are recognized, this option specifies the order in which they are\n tried.", + "prompt_type": "plain" + }, + { + "output": "- sys.ai.h2o.mojos.parser.csv.date.formats (string) - Specify a format\n for dates. This value defaults to an empty string. - sys.ai.h2o.mojos.exposedInputs (string) - Specify a comma separated\n list of input cols that are needed on output. The special value\n `ALL` takes all inputs. This defaults to a null value. - sys.ai.h2o.mojos.useWeakHash (boolean) - Specify whether to use\n WeakHashMap. This is set to False by default. Enabling this setting\n may improve MOJO loading times. JVM Options for Access Control\n- ai.h2o.mojos.runtime.license.key - Specify a license key. - ai.h2o.mojos.runtime.license.file - Specify the location of a\n license key. - ai.h2o.mojos.runtime.license.filename - Override the default license\n file name. - ai.h2o.mojos.runtime.signature.filename - Override the default\n signature file name. - ai.h2o.mojos.runtime.watermark.filename - Override the default\n watermark file name. JVM Options for Access Control\n- ai.h2o.mojos.runtime.license.key - Specify a license key.", + "prompt_type": "plain" + }, + { + "output": "Machine Learning Interpretability\n\ninterpreting interpret-the-mli-page.rst interpret-non-ts interpret-ts\ninterpret-recipes", + "prompt_type": "plain" + }, + { + "output": "OpenID Connect Authentication Examples\nThis section describes how to enable OpenID Connect authentication in\nDriverless AI. It provides two examples. The first describes how to\nenable OpenID connect and log in to the Driverless AI UI. The second\ndescribes additional token-based authentication settings, which allows\nyou to run the Driverless AI Python client. (Note that token-based\nauthentication is not yet supported on the Driverless AI R client.) This\nsection assumes that you have an understanding of OpenID Connect. The OpenID Connect Protocol\nOpenID Connect follows a distinct protocol during the authentication\nprocess:\n1. A request is sent from the client (RP) to the OpenID provider (OP). 2. The OP authenticates the end user and obtains authorization. 3. The OP responds with an ID Token. (An Access Token is usually\n provided as well.) 4. The Relying Party (RP) can send a request with the Access Token to\n the UserInfo Endpoint. 5. The UserInfo Endpoint returns Claims about the End User.", + "prompt_type": "plain" + }, + { + "output": "This information is subsequently used to\nconfigure further interactions with the provider. The well-known endpoint is typically configured as follows:\n https://yourOpenIDProviderHostname/.well-known/openid-configuration\nConfiguration Options\nOpenID Configuration Options\nThe following options in the config.toml file are used for enabling\nOpenID-based authentication. Setting these options lets you log in to\nthe Driverless AI UI using OpenID. # The OpenID server URL. (Ex: https://oidp.ourdomain.com) Do not end with a \"/\"\n auth_openid_provider_base_uri= \"https://yourOpenIDProviderHostname\"\n # The uri to pull OpenID config data from. (You can extract most of required OpenID config from this URL.) # Usually located at: /auth/realms/master/.well-known/openid-configuration\n # Quote method from urllib.parse used to encode payload dict in Authentication Request\n auth_openid_urlencode_quote_via=\"quote\"\n # These endpoints are made available by the well-known endpoint of the OpenID provider\n # All endpoints should start with a \"/\"\n auth_openid_auth_uri=\"\"\n auth_openid_token_uri=\"\"\n auth_openid_userinfo_uri=\"\"\n auth_openid_logout_uri=\"\"\n # In most cases, these values are usually 'code' and 'authorization_code' (as shown below)\n # Supported values for response_type and grant_type are listed in the response of well-known endpoint\n auth_openid_response_type=\"code\"\n auth_openid_grant_type=\"authorization_code\"\n # Scope values\u2014supported values are available in the response from the well-known endpoint\n # 'openid' is required\n # Additional scopes may be necessary if the response to the userinfo request\n # does not include enough information to use for authentication\n # Separate additional scopes with a blank space.", + "prompt_type": "plain" + }, + { + "output": "Token-based authentication allows\nclients to authenticate with the Driverless AI server by providing a\ntoken with each request. This is targeted for (but not limited to) the\nenvironments with OpenID Connect authentication. If these options are\nnot set, then clients are not able to authenticate with the server when\nOpenID Connect is configured as the authentication method. # Sets token introspection URL for OpenID Connect authentication. (needs to be an absolute URL)\n auth_openid_token_introspection_url = \"\"\n # Enables option to use Bearer token for authentication with the RPC endpoint. api_token_introspection_enabled = false\n # Sets the method that is used to introspect the bearer token. # OAUTH2_TOKEN_INTROSPECTION: Uses OAuth 2.0 Token Introspection (RPC 7662)\n # endpoint to introspect the bearer token. # This useful when 'openid' is used as the authentication method. # Uses 'auth_openid_client_id' and 'auth_openid_client_secret' and to\n # authenticate with the authorization server and\n # `auth_openid_token_introspection_url` to perform the introspection.", + "prompt_type": "plain" + }, + { + "output": "Space separated./\n # This is passed to the introspection endpoint and also verified after response\n # for the servers that don't enforce scopes. # Keeping this empty turns any the verification off. # \n api_token_oauth2_scopes = \"\"\n # Which field of the response returned by the token introspection endpoint should be used as a username. api_token_oauth2_username_field_name = \"username\"\n # Enables the option to initiate a PKCE flow from the UI in order to obtain tokens usable with Driverless clients\n oauth2_client_tokens_enabled = false\n # Sets up client id that will be used in the OAuth 2.0 Authorization Code Flow to obtain the tokens. Client needs to be public and be able to use PKCE with S256 code challenge. oauth2_client_tokens_client_id = \"\"\n # Sets up the absolute url to the authorize endpoint. oauth2_client_tokens_authorize_url = \"\"\n # Sets up the absolute url to the token endpoint. oauth2_client_tokens_token_url = \"\"\n # Sets up the absolute url to the token introspection endpoint.It's displayed in the UI so that clients can inspect the token expiration.", + "prompt_type": "plain" + }, + { + "output": "this /oauth2/client_token\n oauth2_client_tokens_redirect_url = \"\"\n # Sets up the scope for the requested tokens. Space seprated list. oauth2_client_tokens_scope = \"openid profile ai.h2o.storage\"\nExample 1: Enabling OpenID Connect\nThis example describes how to start Driverless AI in the Docker image\nand with native installs after OpenID has been configured. Note that\nthis example does not enable tokens, so the Driverless AI Python client\nwill be incompatible with this installation. Docker Image Installs\n1. Edit the OpenID configuration options in your config.toml file as\n described in the openid-config-options section. 2. Mount the edited config.toml file into the Docker container. The next step is to launch and log in to Driverless AI. Refer to\nlogging-in. Native Installs\n1. Export the Driverless AI config.toml file or add it to ~/.bashrc. For example:\n2. Edit the OpenID configuration properties in the config.toml file as\n described in the openid-config-options section.", + "prompt_type": "plain" + }, + { + "output": "Start (or restart) Driverless AI. The next step is to launch and log in to Driverless AI. Refer to\nlogging-in. Example 2: Enabling Token-based Authentication with OpenID Connect\nSimilar to Example 1, this example describes how to start Driverless AI\nin the Docker image and with native installs after OpenID has been\nconfigured. It also enables tokens for compatibility with the Driverless\nAI Python client. Docker Image Installs\n1. Edit the OpenID configuration options in your config.toml file as\n described in the openid-config-options section. Be sure to also\n enable the token-based authentication options described in the\n token_based_options options section. 2. Mount the edited config.toml file into the Docker container. The next step is to launch and log in to Driverless AI. Refer to\nlogging-in. Native Installs\n1. Export the Driverless AI config.toml file or add it to ~/.bashrc. For example:\n2. Edit the OpenID configuration properties in the config.toml file as\n described in the openid-config-options section.", + "prompt_type": "plain" + }, + { + "output": "3. Start (or restart) Driverless AI. The next step is to launch and log in to Driverless AI. Refer to\nlogging-in. Python Client\nThe following is an example of how to enable token-based authentication\nwith OpenID Connect for the Driverless AI Python Client:\n # setup a token provider with a refresh token from the Driverless AI web UI\n token_provider = driverlessai.token_providers.OAuth2TokenProvider(\n refresh_token=\"eyJhbGciOiJIUzI1N...\",\n client_id=\"python_client\",\n token_endpoint_url=\"https://keycloak-server/auth/realms/driverlessai/protocol/openid-connect/token\",\n token_introspection_url=\"https://keycloak-server/auth/realms/driverlessai/protocol/openid-connect/token/introspect\"\n )\n # use the token provider to get authorization to connect to the\n # Driverless AI server\n dai = driverlessai.Client(\n address=\"https://localhost:12345\",\n token_provider=token_provider.ensure_fresh_token\n )\nParameters:\n- refresh_token (str) \u2013 token from Driverless AI server web UI, used\n to obtain fresh access token when needed\n- client_id (str) \u2013 public ID for the Python client\n- token_endpoint_url (str) \u2013 Authorization server URL to get an access\n or refresh token\n- token_introspection_url (str) \u2013 Authorization server URL to get\n information about a token\n- access_token (Optional [str]) \u2013 token authorizing Python client\n access\n- client_secret (Optional [str]) \u2013 private secret for the Python\n client\nFor more information, see\nhttp://docs.h2o.ai/driverless-ai/pyclient/docs/html/utils.html#oauth-2-0-token-provider.", + "prompt_type": "plain" + }, + { + "output": "Image Processing in Driverless AI\nImage processing in Driverless AI is a powerful tool that can be used to\ngain insight from digital images. The following sections describe\nDriverless AI's image processing capabilities. - image-processing-supported-file-types\n- Uploading Image dataset to Driverless AI\n- Image Transformer : Use image transformers when a\n dataset contains both images and other feature types. - Image Model : Use an Image model when the only feature\n in the dataset is an image. - Deploying an Image Model to Production\nNote\n- Image models from Driverless AI version 1.9.x aren't supported in\n1.10.x. - Image and NLP use cases in Driverless AI benefit significantly\nfrom GPU usage. For more information, see GPU usage in DAI . Supported File Types for Image processing\nThe following is a list of supported file types for image processing in\nDriverless AI:\n- Windows bitmaps - .bmp\n- JPEG files - .jpeg, .jpg, .jpe\n- JPEG 2000 files - .jp2\n- Portable Network Graphics - .png\n- WebP - .webp\n- Portable image format - .pbm, .pgm, .ppm, .pnm\n- TIFF files - .tiff, .tif\n- OpenEXR Image files - .exr\n- Radiance HDR - .hdr\nDue to browser restrictions, images may not render for some formats\n(like .ppm, .tiff, .pnm and .exr) when viewing dataset rows from the\nGUI.", + "prompt_type": "plain" + }, + { + "output": "Ideally Driverless AI can support all OpenCV Image formats. Uploading Data for Image Processing\nDriverless AI supports multiple methods for uploading image datasets:\n- Archive with images in directories for each class. Labels for each\n class are automatically created based on directory hierarchy\n- Archive with images and a CSV file that contains at least one column\n with image names and a target column (best method for regression). Note that each image name must include the correct file extension. - CSV file with local paths to the images on the disk\n- CSV file with remote URLs to the images\nModeling Images\nDriverless AI features two different approaches to modeling images. Embeddings Transformer (Image Vectorizer)\nThe Image Vectorizer transformer utilizes TensorFlow\npre-trained ImageNet models to\nconvert a column with an image path or URI to an embeddings (vector)\nrepresentation that is derived from the last global average pooling\nlayer of the model.", + "prompt_type": "plain" + }, + { + "output": "There are several options in the Expert Settings panel that let you\nconfigure the Image Vectorizer transformer. This panel is available from\nwithin the experiment page above the Scorer knob. Refer to\nimage-settings for more information on these options. Notes:\n- This modeling approach supports classification and regression\n experiments. - This modeling approach supports the use of mixed data types (any\n number of image columns, text columns, numeric or categorical\n columns)\n- The Image Vectorizer transformer can also be enabled with the\n Pipeline Building Recipe expert setting,\n which is located in the Experiment tab. Automatic Image Model\nAutomatic Image Model is an AutoML model that accepts only an image and\na label as input features. This model automatically selects\nhyperparameters such as learning rate, optimizer, batch size, and image\ninput size. It also automates the training process by selecting the\nnumber of epochs, cropping strategy, augmentations, and learning rate\nscheduler.", + "prompt_type": "plain" + }, + { + "output": "The possible architectures list includes all\nthe well-known models: (SE)-ResNe(X)ts; DenseNets; EfficientNets; etc. Unique insights that provide information and sample images for the\ncurrent best individual model are available for Automatic Image Model. To view these insights, click on the Insights option while an experiment\nis running or after an experiment is complete. Refer to image-insights\nfor more information. Each individual model score (together with the neural network\narchitecture name) is available in the Iteration Data panel. The last\npoint in the Iteration Data is always called ENSEMBLE. This indicates\nthat the final model ensembles multiple individual models. Enabling Automatic Image Model\nTo enable Automatic Image Model, navigate to the\npipeline-building-recipe expert setting and select the image_model\noption:\nAfter confirming your selection, click Save. The experiment preview\nsection updates to include information about Automatic Image Model:\n[]\nNotes:\n- This modeling approach only supports a single image column as an\n input.", + "prompt_type": "plain" + }, + { + "output": "- This modeling approach supports classification and regression\n experiments. - This modeling approach does not support the use of mixed data types\n because of its limitation on input features. - This modeling approach does not use Genetic Algorithm (GA). - The use of one or more GPUs is strongly recommended for this\n modeling approach. - If an internet connection is available, ImageNet pretrained weights\n are downloaded automatically. If an internet connection is not\n available, weights must be downloaded from\n http://s3.amazonaws.com/artifacts.h2o.ai/releases/ai/h2o/pretrained/dai_image_models_1_10.zip\n and extracted into tensorflow_image_pretrained_models_dir\n (./pretrained/image/ by default). - If extensively running image models with Driverless AI\n Docker install , we recommend setting\n --shm-size=2g. Deploying an Image Model\nPython scoring and\nC++ MOJO scoring are both supported for the\nImage Vectorizer Transformer .", + "prompt_type": "plain" + }, + { + "output": "Data Recipe URL Setup\nDriverless AI lets you explore data recipe URL data sources from within\nthe Driverless AI application. This section provides instructions for\nconfiguring Driverless AI to work with data recipe URLs. When enabled\n(default), you will be able to modify datasets that have been added to\nDriverless AI. (Refer to modify_by_recipe for more information.) Notes:\n- This connector is enabled by default. These steps are provided in\n case this connector was previously disabled and you want to\n re-enable it. - Depending on your Docker install version, use either the\n docker run --runtime=nvidia (>= Docker 19.03) or nvidia-docker (<\n Docker 19.03) command when starting the Driverless AI Docker image. Use docker version to check which version of Docker you are using. Enable Data Recipe URL\nDocker Image Installs\nThis example enables the data recipe URL data connector. nvidia-docker run \\\n --shm-size=256m \\\n --add-host name.node:172.16.2.186 \\\n -e DRIVERLESS_AI_ENABLED_FILE_SYSTEMS=\"file, recipe_url\" \\\n -p 12345:12345 \\\n -it --rm \\\n -v /tmp/dtmp/:/tmp \\\n -v /tmp/dlog/:/log \\\n -v /tmp/dlicense/:/license \\\n -v /tmp/ddata/:/data \\\n -u $(id -u):$(id -g) \\\n h2oai/dai-ubi8-x86_64:|tag|\nDocker Image with the config.toml\nThis example shows how to enable the Data Recipe URL data connector in\nthe config.toml file, and then specify that file when starting\nDriverless AI in Docker.", + "prompt_type": "plain" + }, + { + "output": "1. Configure the Driverless AI config.toml file. Set the following\n configuration options. - enabled_file_systems = \"file, upload, recipe_url\"\n2. Mount the config.toml file into the Docker container. nvidia-docker run \\\n --pid=host \\\n --rm \\\n --shm-size=256m \\\n --add-host name.node:172.16.2.186 \\\n -e DRIVERLESS_AI_CONFIG_FILE=/path/in/docker/config.toml \\\n -p 12345:12345 \\\n -v /local/path/to/config.toml:/path/in/docker/config.toml \\\n -v /etc/passwd:/etc/passwd:ro \\\n -v /etc/group:/etc/group:ro \\\n -v /tmp/dtmp/:/tmp \\\n -v /tmp/dlog/:/log \\\n -v /tmp/dlicense/:/license \\\n -v /tmp/ddata/:/data \\\n -u $(id -u):$(id -g) \\\n h2oai/dai-ubi8-x86_64:|tag|\nNative Installs\nThis example enables the Data Recipe URL data connector. Note that\nrecipe_url is enabled by default. 1. Export the Driverless AI config.toml file or add it to ~/.bashrc. For example:\n2. Specify the following configuration options in the config.toml file.", + "prompt_type": "plain" + }, + { + "output": "Driverless AI Workflow\n\nA typical Driverless AI workflow is to:\n\n1. Load data\n2. Visualize data\n3. Run an experiment\n4. Interpret the model\n5. Deploy the scoring pipeline\n\nIn addition, you can diagnose a model, transform another dataset, score\nthe model against another dataset, and manage your data in Projects.\n\nAlso see the dai_wizard, a question and answer workflow that helps\nautomatically set up use case specific experiment settings.\n\nThe image below describes a typical workflow.\n\n[]", + "prompt_type": "plain" + }, + { + "output": "Out of memory handling in Driverless AI\nThis page describes options for reducing memory usage to avoid out of\nmemory errors during the final model building stage. Reducing estimated memory usage and the number of cores used per\nexperiment\nTo avoid out of memory errors in situations where many different\ntransformers are used at the same time, set the following options as\nenvironment variables when starting DAI. Note that these configuration\noptions can also be set in the config.toml file . - final_munging_memory_reduction_factor: Specify a factor by which to\n reduce estimated memory usage during the final ensemble feature\n engineering stage. Larger values use less memory, with 1 using the\n highest amount of memory. - max_cores: Specify the number of cores to use per experiment. Note\n that if you specify 0, all available cores will be used. To reduce\n memory usage, lowering this value to \u00bd or \u00bc of the available\n physical cores is recommended.", + "prompt_type": "plain" + }, + { + "output": "max_workers_final_base_models = 1to automatically limit the number of models built at the same time to 1. This option is useful in situations where a specific transformer or model uses more memory than expected. **Limiting the total number of features** You can limit the total number of features with the :ref:`config_nfeatures_max` configuration option. For example, if you encounter an out of memory error due to having a large number of features, you can set this option and refit the best model to see if the error is resolved. **Limiting the maximum number of genes per model** You can specify the maximum number of genes (transformer instances) per model with the :ref:`config_ngenes_max` configuration option. **Additional options** - :ref:`config_munging_memory_overhead_factor:`: Specify memory usage per transformer per input data size. In cases where final model data munging uses too much memory due to parallel operations, settingmunging_memory_overhead_factor = 10is recommended to reduce memory usage.", + "prompt_type": "plain" + }, + { + "output": "AWS Role-Based Authentication\n\nIn Driverless AI, it is possible to enable role-based authentication via\nthe IAM role. This is a two-step process that involves setting up AWS\nIAM and then starting Driverless AI by specifying the role in the\nconfig.toml file or by setting the AWS_USE_EC2_ROLE_CREDENTIALS\nenvironment variable to", + "prompt_type": "plain" + }, + { + "output": "True. AWS IAM Setup ------------- 1. Create an IAM role. This IAM role should have a Trust Relationship with Principal Trust Entity set to your Account ID. For example: trust relationship for Account ID 524466471676 would look like: .. .. code:: bash { \"Version\": \"2012-10-17\", \"Statement\": [ { \"Effect\": \"Allow\", \"Principal\": { \"AWS\": \"arn:aws:iam::524466471676:root\" }, \"Action\": \"sts:AssumeRole\" } ] } .. image:: ../images/aws_iam_role_create.png :alt: image :align: center 2. Create a new policy that lets users assume the role: .. .. image:: ../images/aws_iam_policy_create.png :alt: image 3. Assign the policy to the user. .. .. image:: ../images/aws_iam_policy_assign.png :alt: image 4. Test role switching here: https://signin.aws.amazon.com/switchrole. (Refer to https://docs.aws.amazon.com/IAM/latest/UserGuide/troubleshoot_roles.html#troubleshoot_roles_cant-assume-role.)", + "prompt_type": "plain" + }, + { + "output": "AWS_USE_EC2_ROLE_CREDENTIALS`` environment variable.\n\nResources\n\n1. Granting a User Permissions to Switch Roles:\n https://docs.aws.amazon.com/IAM/latest/UserGuide/id_roles_use_permissions-to-switch.html\n2. Creating a Role to Delegate Permissions to an IAM User:\n https://docs.aws.amazon.com/IAM/latest/UserGuide/id_roles_create_for-user.html\n3. Assuming an IAM Role in the AWS CLI:\n https://docs.aws.amazon.com/cli/latest/userguide/cli-configure-role.html", + "prompt_type": "plain" + }, + { + "output": "Driverless AI OpenID Connect Authentication\nThis page describes how to set up OpenID Connect (OIDC) authentication\nin Driverless AI (DAI). - oidc_setup\n- oidc_understanding\nSetting up OIDC authentication\nTo set up OIDC authentication locally (or in production), the following\nconfig.toml options must be specified:\n1. authentication_method = \"oidc\" - Specifies OIDC as the\n authentication method\n2. auth_oidc_issuer_url = \"https://login.microsoftonline.com//v2.0\"\n - Specifies the URL of the Identity Provider (IDP), which is also\n used for automatic provider discovery\n3. auth_oidc_identity_source = \"id_token\" - Specifies whether user\n identity is retrieved from ID Token or the UserInfo. The available\n options are [\"userinfo\", \"id_token\"]\n4. auth_oidc_username_claim = \"preferred_username\" - Specifies the\n Client ID (the application ID assigned to Driverless AI), which is\n provided by the IDP\n5. auth_openid_client_id = \"\" - Specifies the Client ID,\n which is provided by the IDP\n6. auth_openid_client_secret = \"\" - Specifies the Client\n secret created or given by the IDP\n7. auth_openid_redirect_uri = \"http://localhost:12345/oidc/callback\"\n - Specifies a redirection URL so that the IDP can redirect users\n back to the application after successfully logging in\n8. auth_oidc_post_logout_url = \"http://localhost:12345/login\"\n -Specifies the URL the user is directed to after logging out\nThis basic setup should be sufficient to use an IDP such as Azure AD.", + "prompt_type": "plain" + }, + { + "output": "The following example contains several overrides in addition to the\nrequired config.toml options:\n # AUTH\n authentication_method = \"oidc\"\n auth_oidc_id_token_username_key = \"preferred_username\"\n auth_oidc_identity_source = \"id_token\"\n auth_oidc_issuer_url = \"https://login.microsoftonline.com//v2.0\"\n auth_openid_client_id = \"\"\n auth_openid_client_secret = \"\"\n auth_openid_scope = \"openid profile email User.Read\"\n auth_openid_default_scopes = \"User.Read\"\n auth_openid_redirect_uri = \"http://localhost:12345/oidc/callback\"\n auth_oidc_post_logout_url = \"http://localhost:12345/login\"\nIn the preceding example, notice the usage of the following OIDC scopes:\n1. auth_openid_scope - Specifies the list of scopes requested at the\n authorization request\n2. auth_openid_default_scopes - Specifies a set of scopes that are\n requested when making an access token request\nHow does OIDC authentication work? The following sections describe how OIDC authentication is implemented\nin DAI.", + "prompt_type": "plain" + }, + { + "output": "As stated on the OpenID\nwebsite, the Authorization Code Flow returns an Authorization Code to\nthe Client, which can then exchange it for an ID Token and an Access\nToken directly. Note\nDAI mainly supports the client_secret_basic authentication method. Identity sources\nThe DAI OIDC authentication mechanism allows two different methods of\nretrieving a user identity from IDP. Note\nFor both of the following methods, the user must specify the\nauth_oidc_username_claim config.toml option, which controls which claim\nis used as a username in DAI. - userinfo: Makes a UserInfo endpoint request, which in response\n returns a set of claims that should contain the preferred username,\n which will be used as the DAI username. - id_token: Uses an ID Token introspection, which is typically\n acquired during the token exchange, to retrieve the claim holding\n the preferred username. Identity Validation\nDriverless AI allows two different methods of evaluating whether user\n(identity) has required privileges to access the DAI application.", + "prompt_type": "plain" + }, + { + "output": "- If auth_openid_use_objectpath_match is enabled, then the user must\n specify auth_openid_use_objectpath_expression, which evaluates\n ObjectPath against identity (UserInfo response or ID Token)\n- If auth_openid_use_objectpath_match is disabled, then the user may\n specify auth_openid_userinfo_auth_key and\n auth_openid_userinfo_auth_value to compare value with given key in\n identity against the configured value. Logging in using OIDC\nThe following steps describe the procedure of logging in using OIDC:\n1. The OIDC Client is initialized at server startup and performs\n Provider Discovery, which discovers all the Identity Provider (IDP)\n endpoints. 2. When a user enters the login page, authorization code flow is\n initialized and the IDP is requested for an authorization code. 3. The user is redirected to an OIDC callback URL, which processes the\n authorization response and retrieves the authorization code. 4. The OIDC callback handler performs the token exchange using the\n Token Endpoint and acquires the Access and ID Tokens (and when\n possible, the Refresh Token).", + "prompt_type": "plain" + }, + { + "output": "auth_oidc_post_logout_url`` needs to be specified in the config.toml\nfile, which by design should point to the absolute DAI login URL.", + "prompt_type": "plain" + }, + { + "output": "Using the config.toml File\nThe config.toml file is a configuration file that uses the TOML v0.5.0\nfile format. Administrators can customize various aspects of a\nDriverless AI (DAI) environment by editing the config.toml file before\nstarting DAI. Note\nFor information on configuration security, see configuration-security. Configuration Override Chain\nThe configuration engine reads and overrides variables in the following\norder:\n1. Driverless AI defaults: These are stored in a Python config module. 2. config.toml - Place this file in a folder or mount it in a Docker\n container and specify the path in the \"DRIVERLESS_AI_CONFIG_FILE\"\n environment variable. 3. Keystore file - Set the keystore_file parameter in the config.toml\n file or the environment variable \"DRIVERLESS_AI_KEYSTORE_FILE\" to\n point to a valid DAI keystore file generated using the\n h2oai.keystore tool. If an environment variable is set, the value in\n the config.toml for keystore_file is overridden.", + "prompt_type": "plain" + }, + { + "output": "Environment variable - Configuration variables can also be provided\n as environment variables. They must have the prefix DRIVERLESS_AI_\n followed by the variable name in all caps. For example,\n \"authentication_method\" can be provided as\n \"DRIVERLESS_AI_AUTHENTICATION_METHOD\". Setting environment variables\n overrides values from the keystore file. Docker Image Users\n1. Copy the config.toml file from inside the Docker image to your local\n filesystem. 2. Edit the desired variables in the config.toml file. Save your\n changes when you are done. 3. Start DAI with the DRIVERLESS_AI_CONFIG_FILE environment variable. Ensure that this environment variable points to the location of the\n edited config.toml file so that the software can locate the\n configuration file. Native Install Users\nNative installs include DEBs, RPMs, and TAR SH installs. 1. Export the DAI config.toml file or add it to ~/.bashrc. For example:\n2. Edit the desired variables in the config.toml file.", + "prompt_type": "plain" + }, + { + "output": "Free up space on a DAI instance\nThe following sections describe how to free up disk space on an instance\nof Driverless AI. Python API guide\nThis section describes how to free up disk space on an instance of\nDriverless AI (DAI) with the Python API. Note\n- The method described in this section is only available for H2O AI\nCloud customers. The following code sample lets you perform the following tasks:\n1. Link any of your experiments to a Project. Once an experiment is\n linked to a Project, it is automatically pushed to an external\n remote storage. 2. Delete the experiment from the DAI instance. Doing so frees up disk\n space on your DAI instance, and you can always import any experiment\n back into the DAI instance as needed. # Make a project called: \"Test\"\n project = dai.projects.create(name=\"Test\")\n # Link experiment to project to save it to remote storage\n project.link_experiment(experiment)\n # Delete experiment from instance\n experiment.delete()\nNote that when using this approach, the deleted experiment appears\ngrayed out in the Project.", + "prompt_type": "plain" + }, + { + "output": "Data leakage and shift detection in Driverless AI\nThis page describes data leakage and shift detection in Driverless AI\n(DAI). Overview\n- Data leakage: To detect data leakage, DAI runs a model (when\n available, LightGBM) to get the variable importance table, which\n determines the predictive power of each feature on the target\n variable. A simple model is then built on each feature with\n significant variable importance. The models with a high AUC (for\n classification) or R2 (for regression) score are reported to the\n user as potential leak features. - Shift detection: To detect shift in distribution between the\n training, validation or testing datasets, Driverless AI trains a\n binomial model to predict which dataset a row belongs to. For\n example, if a model is built using only a specific feature as a\n predictor and is able to separate the training and testing data with\n high accuracy (for example, an AUC of 0.9), then this indicates that\n there is a drift in the distribution of that feature in the training\n and testing data.", + "prompt_type": "plain" + }, + { + "output": "Enabling leakage detection\nTo enable leakage detection, set the config_check_leakage configuration\noption to on (default). When this option is enabled, Driverless AI runs\na model to determine the predictive power of each feature on the target\nvariable. If leakage detection has been enabled, then the\nconfig_detect_features_leakage_threshold_auc configuration option is\nused for per-feature leakage detection if AUC (or R2 for regression) on\noriginal data (label-encoded) is greater-than or equal to the specified\nvalue. By default, this option is set to 0.95. Identifying features responsible for leakage\nFor significant features (determined by feature importance), a simple\nmodel is built on each feature. The models with a high AUC\n(classification) or R2 (regression) score are reported to the user as\npotential leaks. If leakage detection is enabled, then the\nconfig_detect_features_per_feature_leakage_threshold_auc configuration\noption is used to notify users about features for which AUC or R2 is\ngreater-than or equal to the specific value.", + "prompt_type": "plain" + }, + { + "output": "Automatically drop features suspected in leakage\nA feature is dropped when the single feature model performance exceeds\nthe threshold for dropping features. You can specify this threshold with\nthe config_drop_features_leakage_threshold_auc configuration option,\nwhich has a default value of 0.999. When the AUC (or R2 for regression),\nGINI, or Spearman correlation is above the specified value, the feature\nis dropped. Shift detection\nDriverless AI can detect data distribution shifts between\ntrain/valid/test datasets when they are provided. Shift is detected by training a model to distinguish between\ntrain/validation/test datasets by assigning a unique target label to\neach of the datasets. If the model turns out to have high accuracy, data\nshift is reported with a notification. Shifted features can either be\ndropped or used to create more meaningful aggregate features by using\nthem as labels or bins. The following is a list of configuration options for shift detection:\n- config_check_distribution_shift: Specify whether to enable\n train/valid and train/test distribution shift detection.", + "prompt_type": "plain" + }, + { + "output": "fitted_model.pickle.meta.json`` file in the experiment summary zip\narchive.", + "prompt_type": "plain" + }, + { + "output": "Time Series in Driverless AI\nTime series forecasting is one of the most common and important tasks in\nbusiness analytics. There are many real-world applications like sales,\nweather, stock market, and energy demand, just to name a few. At H2O, we\nbelieve that automation can help our users deliver business value in a\ntimely manner. Therefore, we combined advanced time series analysis and\nour Kaggle Grand Masters\u2019 time series recipes into Driverless AI. The key features/recipes that make automation possible are:\n- Automatic handling of time groups (e.g., different stores and\n departments)\n- Robust time series validation\n - Accounts for gaps and forecast horizon\n - Uses past information only (i.e., no data leakage)\n- Time series-specific feature engineering recipes\n - Date features like day of week, day of month, etc. - AutoRegressive features, like optimal lag and lag-features\n interaction\n - Different types of exponentially weighted moving averages\n - Aggregation of past information (different time groups and time\n intervals)\n - Target transformations and differentiation\n- Integration with existing feature engineering functions (recipes and\n optimization)\n- Rolling-window based predictions for time series experiments with\n test-time augmentation or re-fit\n- Automatic pipeline generation (See \"From Kaggle Grand Masters'\n Recipes to Production Ready in a Few Clicks\" blog post.)", + "prompt_type": "plain" + }, + { + "output": "Converting datetime to a locale-independent format prior to running\nexperiments is recommended. For information on how to convert datetime\nformats so that they are accepted in DAI, refer to the final note in the\nmodify_by_recipe section. Understanding Time Series\nThe following is an in depth description of time series in Driverless\nAI. For an overview of best practices when running time series\nexperiments, see ts_bestpractices. Modeling Approach\nDriverless AI uses GBMs, GLMs and neural networks with a focus on time\nseries-specific feature engineering. The feature engineering includes:\n- Autoregressive elements: creating lag variables\n- Aggregated features on lagged variables: moving averages,\n exponential smoothing descriptive statistics, correlations\n- Date-specific features: week number, day of week, month, year\n- Target transformations: Integration/Differentiation, univariate\n transforms (like logs, square roots)\nThis approach is combined with AutoDL features as part of the genetic\nalgorithm.", + "prompt_type": "plain" + }, + { + "output": "In other\nwords, the same transformations/genes apply; plus there are new\ntransformations that come from time series. Some transformations (like\ntarget encoding) are deactivated. When running a time series experiment, Driverless AI builds multiple\nmodels by rolling the validation window back in time (and potentially\nusing less and less training data). User-Configurable Options\nGap\nThe guiding principle for properly modeling a time series forecasting\nproblem is to use the historical data in the model training dataset such\nthat it mimics the data/information environment at scoring time (i.e. deployed predictions). Specifically, you want to partition the training\nset to account for: 1) the information available to the model when\nmaking predictions and 2) the number of units out that the model should\nbe optimized to predict. Given a training dataset, the gap and forecast horizon are parameters\nthat determine how to split the training dataset into training samples\nand validation samples.", + "prompt_type": "plain" + }, + { + "output": "For example:\n- Assume there are daily data with days 1/1/2020, 2/1/2020, 3/1/2020,\n 4/1/2020 in train. There are 4 days in total for training. - In addition, the test data will start from 6/1/2020. There is only 1\n day in the test data. - The previous day (5/1/2020) does not belong to the train data. It is\n a day that cannot be used for training (i.e because information from\n that day may not be available at scoring time). This day cannot be\n used to derive information (such as historical lags) for the test\n data either. - Here the time bin (or time unit) is 1 day. This is the time interval\n that separates the different samples/rows in the data. - In summary, there are 4 time bins/units for the train data and 1\n time bin/unit for the test data plus the Gap. - In order to estimate the Gap between the end of the train data and\n the beginning of the test data, the following formula is applied. - Gap = min(time bin test) - max(time bin train) - 1.", + "prompt_type": "plain" + }, + { + "output": "This is the\n earliest (and only) day in the test data. - max(time bin train) is 4 (or 4/1/2020). This is the latest (or the\n most recent) day in the train data. - Therefore the GAP is 1 time bin (or 1 day in this case), because Gap\n = 6 - 4 - 1 or Gap = 1\n[]\nForecast Horizon\nIt's often not possible to have the most recent data available when\napplying a model (or it's costly to update the data table too often);\ntherefore some models need to be built accounting for a \u201cfuture gap\u201d. For example, if it takes a week to update a specific data table, you\nideally want to predict 7 days ahead with the data as it is \u201ctoday\u201d;\ntherefore a gap of 6 days is recommended. Not specifying a gap and\npredicting 7 days ahead with the data as it is is unrealistic (and\ncannot happen, as the data is updated on a weekly basis in this\nexample). Similarly, gap can be used if you want to forecast further in\nadvance. For example, if you want to know what will happen 7 days in the\nfuture, then set the gap to 6 days.", + "prompt_type": "plain" + }, + { + "output": "In other words it is\nthe future period that the model can make predictions for (or the number\nof units out that the model should be optimized to predict). Forecast\nhorizon is used in feature selection and engineering and in model\nselection. Note that forecast horizon might not equal the number of\npredictions. The actual predictions are determined by the test dataset. []\nThe periodicity of updating the data may require model predictions to\naccount for significant time in the future. In an ideal world where data\ncan be updated very quickly, predictions can always be made having the\nmost recent data available. In this scenario there is no need for a\nmodel to be able to predict cases that are well into the future, but\nrather focus on maximizing its ability to predict short term. However\nthis is not always the case, and a model needs to be able to make\npredictions that span deep into the future because it may be too costly\nto make predictions every single day after the data gets updated.", + "prompt_type": "plain" + }, + { + "output": "For example,\npredicting tomorrow with today\u2019s data is easier than predicting 2 days\nahead with today\u2019s data. Hence specifying the forecast horizon can\nfacilitate building models that optimize prediction accuracy for these\nfuture time intervals. Prediction Intervals\nFor regression problems, enable the compute-intervals expert setting to\nhave Driverless AI provide two additional columns y.lower and y.upper in\nthe prediction frame. The true target value y for a predicted sample is\nexpected to lie within [y.lower, y.upper] with a certain probability. The default value for this confidence level can be specified with the\nconfidence-level expert setting, which has a default value of 0.9. Driverless AI uses holdout predictions to determine intervals\nempirically (Williams, W.H. and Goodman, M.L. \"A Simple Method for the\nConstruction of Empirical Confidence Limits for Economic Forecasts.\" Journal of the American Statistical Association, 66, 752-754. 1971). This method makes no assumption about the underlying model or the\ndistribution of error and has been shown to outperform many other\napproaches (Lee, Yun Shin and Scholtes, Stefan.", + "prompt_type": "plain" + }, + { + "output": "num_prediction_periods``) needs to be in periods, and the size is\nunknown. To overcome this, you can use the optional", + "prompt_type": "plain" + }, + { + "output": "time_period_in_seconds`` parameter when running", + "prompt_type": "plain" + }, + { + "output": "start_experiment_sync(in Python) ortrain(in R). This is used to specify the forecast horizon in real time units (as well as for gap.) If this parameter is not specified, then Driverless AI will automatically detect the period size in the experiment, and the forecast horizon value will respect this period. I.e., if you are sure that your data has a 1 week period, you can saynum_prediction_periods=14``;\notherwise it is possible that the model will not work correctly. Groups\nGroups are categorical columns in the data that can significantly help\npredict the target variable in time series problems. For example, one\nmay need to predict sales given information about stores and products. Being able to identify that the combination of store and products can\nlead to very different sales is key for predicting the target variable,\nas a big store or a popular product will have higher sales than a small\nstore and/or with unpopular products. For example, if we don\u2019t know that the store is available in the data,\nand we try to see the distribution of sales along time (with all stores\nmixed together), it may look like that:\n[]\nThe same graph grouped by store gives a much clearer view of what the\nsales look like for different stores.", + "prompt_type": "plain" + }, + { + "output": "At a given sample with time stamp t, features at\nsome time difference T (lag) in the past are considered. For example, if\nthe sales today are 300, and sales of yesterday are 250, then the lag of\none day for sales is 250. Lags can be created on any feature as well as\non the target. []\nAs previously noted, the training dataset is appropriately split such\nthat the amount of validation data samples equals that of the testing\ndataset samples. If we want to determine valid lags, we must consider\nwhat happens when we will evaluate our model on the testing dataset. Essentially, the minimum lag size must be greater than the gap size. Aside from the minimum useable lag, Driverless AI attempts to discover\npredictive lag sizes based on auto-correlation. \"Lagging\" variables are important in time series because knowing what\nhappened in different time periods in the past can greatly facilitate\npredictions for the future. Consider the following example to see the\nlag of 1 and 2 days:\n+-----------+-------+------+------+\n| Date | Sales | Lag1 | Lag2 |\n+===========+=======+======+======+\n| 1/1/2020 | 100 | - | - |\n+-----------+-------+------+------+\n| 2/1/2020 | 150 | 100 | - |\n+-----------+-------+------+------+\n| 3/1/2020 | 160 | 150 | 100 |\n+-----------+-------+------+------+\n| 4/1/2020 | 200 | 160 | 150 |\n+-----------+-------+------+------+\n| 5/1/2020 | 210 | 200 | 160 |\n+-----------+-------+------+------+\n| 6/1/2020 | 150 | 210 | 200 |\n+-----------+-------+------+------+\n| 7/1/2020 | 160 | 150 | 210 |\n+-----------+-------+------+------+\n| 8/1/2020 | 120 | 160 | 150 |\n+-----------+-------+------+------+\n| 9/1/2020 | 80 | 120 | 160 |\n+-----------+-------+------+------+\n| 10/1/2020 | 70 | 80 | 120 |\n+-----------+-------+------+------+\nTime series target transformations\nThe following is a description of time series target transformations.", + "prompt_type": "plain" + }, + { + "output": "config.tomlfile. For more information, see :ref:`config_usage`. **Note:** Driverless AI does not attempt time series target transformations automatically; they must be set manually. :ref:`ts-target-transformation` (ts_lag_target_trafo): With this target transformation, you can select between the difference and ratio of the current and a lagged target. You can specify the corresponding lag size with the **Lag size used for time series target transformation** (ts_target_trafo_lag_size) setting. **Note:** This target transformation can be used together with the **Time series centering or detrending transformation** (ts_target_trafo) target transformation, but it is mutually exclusive with regular target transformations. :ref:`centering-detrending` (ts_target_trafo): With this target transformation, the free parameters of the trend model are fitted. The trend is removed from the target signal, and the pipeline is fitted on the residuals. Predictions are then made by adding back the trend.", + "prompt_type": "plain" + }, + { + "output": "- Thecentering\n(robust)andlinear\n(robust)detrending variants use scikit-learn's implementation of random sample consensus (RANSAC) to achieve a higher tolerance with regard to outliers. As stated on scikit-learn's `page on robust linear model estimation using RANSAC `__, \"The ordinary linear regressor is sensitive to outliers, and the fitted line can easily be skewed away from the true underlying relationship of data. The RANSAC regressor automatically splits the data into inliers and outliers, and the fitted line is determined only by the identified inliers.\" Settings Determined by Driverless AI ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Window/Moving Average ^^^^^^^^^^^^^^^^^^^^^ Using the above Lag table, a moving average of 2 would constitute the average of Lag1 and Lag2: +-----------+-------+------+------+------+ | Date | Sales | Lag1 | Lag2 | MA2 | +===========+=======+======+======+======+ | 1/1/2020 | 100 | - | - | - | +-----------+-------+------+------+------+ | 2/1/2020 | 150 | 100 | - | - | +-----------+-------+------+------+------+ | 3/1/2020 | 160 | 150 | 100 | 125 | +-----------+-------+------+------+------+ | 4/1/2020 | 200 | 160 | 150 | 155 | +-----------+-------+------+------+------+ | 5/1/2020 | 210 | 200 | 160 | 180 | +-----------+-------+------+------+------+ | 6/1/2020 | 150 | 210 | 200 | 205 | +-----------+-------+------+------+------+ | 7/1/2020 | 160 | 150 | 210 | 180 | +-----------+-------+------+------+------+ | 8/1/2020 | 120 | 160 | 150 | 155 | +-----------+-------+------+------+------+ | 9/1/2020 | 80 | 120 | 160 | 140 | +-----------+-------+------+------+------+ | 10/1/2020 | 70 | 80 | 120 | 100 | +-----------+-------+------+------+------+ Aggregating multiple lags together (instead of just one) can facilitate stability for defining the target variable.", + "prompt_type": "plain" + }, + { + "output": "Exponential Weighting ^^^^^^^^^^^^^^^^^^^^^ Exponential weighting is a form of weighted moving average where more recent values have higher weight than less recent values. That weight is exponentially decreased over time based on an **alpha** (a) (hyper) parameter (0,1), which is normally within the range of [0.9 - 0.99]. For example: - Exponential Weight = a**(time) - If sales 1 day ago = 3.0 and 2 days ago =4.5 and a=0.95: - Exp. smooth = 3.0*(0.95\\*\\ *1) + 4.5*\\ (0.95\\*\\ *2) / ((0.951) + (0.95*\\ \\*2)) =3.73 approx. Rolling-Window-Based Predictions -------------------------------- Driverless AI supports rolling-window-based predictions for time series experiments with two options: `Test Time Augmentation `__ (TTA) or re-fit. Both options are useful to assess the performance of the pipeline for predicting not just a single forecast horizon, but many in succession.", + "prompt_type": "plain" + }, + { + "output": "Re-fit simulates the process of re-fitting the entire pipeline (including the model) once new data is available. This process is automated when the test set spans for a longer period than the forecast horizon and if the target values of the test set are known. If the user scores a test set that meets these conditions after the experiment is finished, rolling predictions with TTA will be applied. Re-fit, on the other hand, is only applicable for test sets provided during an experiment. TTA is the default option and can be changed with the `Method to Create Rolling Test Set Predictions `__ expert setting. .. figure:: images/time_series_rolling_window_tta.png :alt: .. figure:: images/time_series_rolling_window_refit.png :alt: Time Series Constraints ----------------------- Dataset Size ~~~~~~~~~~~~ Usually, the forecast horizon (prediction length) :math:`H` equals the number of time periods in the testing data :math:`N_{TEST}` (i.e.", + "prompt_type": "plain" + }, + { + "output": "You want to have enough training data time periods :math:`N_{TRAIN}` to score well on the testing dataset. At a minimum, the training dataset should contain at least three times as many time periods as the testing dataset (i.e. :math:`N_{TRAIN} >= 3 \u00d7 N_{TEST}`). This allows for the training dataset to be split into a validation set with the same amount of time periods as the testing dataset while maintaining enough historical data for feature engineering. .. _time-series-use-case: Time Series Use Case: Sales Forecasting --------------------------------------- Below is a typical example of sales forecasting based on the `Walmart competition on Kaggle `__. In order to frame it as a machine learning problem, we formulate the historical sales data and additional attributes as shown below: **Raw data** .. figure:: images/time_series_raw_data.png :alt: **Data formulated for machine learning** .. figure:: images/time_series_ml_data.png :alt: The additional attributes are attributes that we will know at time of scoring.", + "prompt_type": "plain" + }, + { + "output": "In this case, you can assume that you will know whether or not a Store and Department will be running a promotional markdown. Features like the temperature of the Week are not used because that information is not available at the time of scoring. Once you have your data prepared in tabular format (see raw data above), Driverless AI can formulate it for machine learning and sort out the rest. If this is your very first session, the Driverless AI assistant will guide you through the journey. .. figure:: images/first_time_user.png :alt: Similar to previous Driverless AI examples, you need to select the dataset for training/test and define the target. For time series, you need to define the time column (by choosing AUTO or selecting the date column manually). If weighted scoring is required (like the Walmart Kaggle competition), you can select the column with specific weights for different samples. .. figure:: images/time_series_experiment_settings.png :alt: If you prefer to use automatic handling of time groups, you can leave the setting for time groups columns as AUTO, or you can define specific time groups.", + "prompt_type": "plain" + }, + { + "output": "Once the experiment is finished, you can make new predictions and download the scoring pipeline just like any other Driverless AI experiments. .. _ucapt: More About Unavailable Columns at Time of Prediction ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The **Unavailable Columns at Prediction Time** (UCAPT) option is a way to mark features that will not be available in the test dataset or at the time of prediction but might still be predictive when looking at historical values. These features will only be used in historical feature engineering recipes, such as Lagging or Exponential Weighted Moving Average. For example, if we were predicting the sales amount each day, we might have the number of customers each day as a feature in our training dataset. In the future, we won't know how many customers will be coming into the store, so this would be a leaky feature to use. However, the average number of customers last week might be predictive and is something that we could calculate ahead of time.", + "prompt_type": "plain" + }, + { + "output": "The default value for this setting is often--, meaning that all features can be used as they are. If you include a test dataset before selecting a time column, and that test dataset is missing any columns, then you will see a number as the default for **Unavailable Columns at Prediction Time**, which will be the number of columns that are in the training dataset but not the testing dataset. All of these features will only be looked at historically, and you can see a list of them by clicking on this setting. Using a Driverless AI Time Series Model to Forecast --------------------------------------------------- When you set the experiment's forecast horizon, you are telling the Driverless AI experiment the dates this model will be asked to forecast for. In the Walmart Sales example, we set the Driverless AI forecast horizon to 1 (1 week in the future). This means that Driverless AI expects this model to be used to forecast 1 week after training ends. Because the training data ends on 2020-10-26, this model should be used to score for the week of 2020-11-02.", + "prompt_type": "plain" + }, + { + "output": "There are two options: - Option 1: Trigger a Driverless AI experiment to be trained once the forecast horizon ends. A Driverless AI experiment will need to be re-trained every week. - Option 2: Use **Test Time Augmentation** (TTA) to update historical features so that we can use the same model to forecast outside of the forecast horizon. **Test Time Augmentation** (TTA) refers to the process where the model stays the same but the features are refreshed using the latest data. In our Walmart Sales Forecasting example, a feature that may be very important is the Weekly Sales from the previous week. Once we move outside of the forecast horizon, our model no longer knows the Weekly Sales from the previous week. By performing TTA, Driverless AI will automatically generate these historical features if new data is provided. In Option 1, we would launch a new Driverless AI experiment every week with the latest data and use the resulting model to forecast the next week. In Option 2, we would continue using the same Driverless AI experiment outside of the forecast horizon by using TTA.", + "prompt_type": "plain" + }, + { + "output": "By retraining an experiment with the latest data, Driverless AI has the ability to possibly improve the model by changing the features used, choosing a different algorithm, and/or selecting different parameters. As the data changes over time, for example, Driverless AI may find that the best algorithm for this use case has changed. There may be clear advantages for retraining an experiment after each forecast horizon or for using TTA. Refer to `this example `__ to see how to use the scoring pipeline to predict future data instead of using the prediction endpoint on the Driverless AI server. Using TTA to continue using the same experiment over a longer period of time means there is no longer any need to continually repeat a model review process. However, it is possible for the model to become out of date. The following is a table that lists several scoring methods and whether they support TTA: +-------------------------+--------------------------------+ | Scoring Method | Test Time Augmentation Support | +=========================+================================+ | Driverless AI Scorer | Supported | +-------------------------+--------------------------------+ | Python Scoring Pipeline | Supported | +-------------------------+--------------------------------+ | MOJO Scoring Pipeline | Not Supported | +-------------------------+--------------------------------+ For different use cases, there may be clear advantages for retraining an experiment after each forecast horizon or for using TTA.", + "prompt_type": "plain" + }, + { + "output": "**Notes**: - Scorers cannot refit or retrain a model. - To specify a method for creating rolling test set predictions, use :ref:`this expert setting `. Note that refitting performed with this expert setting is only applied to the test set that is provided by the user during an experiment. The final scoring pipeline always uses TTA. Triggering Test Time Augmentation ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ To perform Test Time Augmentation, create your forecast data to include any data that occurred after the training data ended up to the dates you want a forecast for. The dates that you want Driverless AI to forecast should have missing values (NAs) where the target column is. Target values for the remaining dates must be filled in. The following is an example of forecasting for 2020-11-23 and 2020-11-30 with the remaining dates being used for TTA: +----------+--------+----------+-----------+-----------+------------+ | Date | Store | Dept | Mark Down | Mark Down | We | | | | | 1 | 2 | ekly_Sales | +==========+========+==========+===========+===========+============+ | 20 | 1 | 1 | -1 | -1 | $35,000 | | 20-11-02 | | | | | | +----------+--------+----------+-----------+-----------+------------+ | 20 | 1 | 1 | -1 | -1 | $40,000 | | 20-11-09 | | | | | | +----------+--------+----------+-----------+-----------+------------+ | 20 | 1 | 1 | -1 | -1 | $45,000 | | 20-11-16 | | | | | | +----------+--------+----------+-----------+-----------+------------+ | 20 | 1 | 1 | -1 | -1 | NA | | 20-11-23 | | | | | | +----------+--------+----------+-----------+-----------+------------+ | 20 | 1 | 1 | -1 | -1 | NA | | 20-11-30 | | | | | | +----------+--------+----------+-----------+-----------+------------+ **Notes**: - Although TTA can span any length of time into the future, the dates that are being predicted cannot exceed the horizon.", + "prompt_type": "plain" + }, + { + "output": "Forecasting Future Dates ~~~~~~~~~~~~~~~~~~~~~~~~ To forecast or predict future dates, upload a dataset that contains the future dates of interest and provide additional information such as group IDs or features known in the future. The dataset can then be used to run and score your predictions. The following is an example of a model that was trained up to 2020-05-31: +------------+----------+-----------------+-----------------+ | Date | Group_ID | Known_Feature_1 | Known_Feature_2 | +============+==========+=================+=================+ | 2020-06-01 | A | 3 | 1 | +------------+----------+-----------------+-----------------+ | 2020-06-02 | A | 2 | 2 | +------------+----------+-----------------+-----------------+ | 2020-06-03 | A | 4 | 1 | +------------+----------+-----------------+-----------------+ | 2020-06-01 | B | 3 | 0 | +------------+----------+-----------------+-----------------+ | 2020-06-02 | B | 2 | 1 | +------------+----------+-----------------+-----------------+ | 2020-06-03 | B | 4 | 0 | +------------+----------+-----------------+-----------------+ Time Series Expert Settings --------------------------- The user may further configure the time series experiments with a dedicated set of options available through the **Expert Settings** panel.", + "prompt_type": "plain" + }, + { + "output": "Install on RHEL\nThis section describes how to install the Driverless AI Docker image on\nRHEL. The installation steps vary depending on whether your system has\nGPUs or if it is CPU only. Environment\n -------------------------------------------\n Operating System GPUs? Min Mem\n ------------------------- ------- ---------\n RHEL with GPUs Yes 64 GB\n RHEL with CPUs No 64 GB\n -------------------------------------------\nInstall on RHEL with GPUs\nNote: Refer to the following links for more information about using RHEL\nwith GPUs. These links describe how to disable automatic updates and\nspecific package updates. This is necessary in order to prevent a\nmismatch between the NVIDIA driver and the kernel, which can lead to the\nGPUs failures. - https://access.redhat.com/solutions/2372971\n - https://www.rootusers.com/how-to-disable-specific-package-updates-in-rhel-centos/\nWatch the installation video here. Note that some of the images in this\nvideo may change between releases, but the installation steps remain the\nsame.", + "prompt_type": "plain" + }, + { + "output": "Open a Terminal and ssh to the machine that will run Driverless AI. Once\nyou are logged in, perform the following steps. 1. Retrieve the Driverless AI Docker image from\n https://www.h2o.ai/download/. 2. Install and start Docker EE on RHEL (if not already installed). Follow the instructions on\n https://docs.docker.com/engine/installation/linux/docker-ee/rhel/. 3. Install nvidia-docker2 (if not already installed). More information\n is available at\n https://github.com/NVIDIA/nvidia-docker/blob/master/README.md. 4. Verify that the NVIDIA driver is up and running. If the driver is\n not up and running, log on to\n http://www.nvidia.com/Download/index.aspx?lang=en-us to get the\n latest NVIDIA Tesla V/P/K series driver. 5. Set up a directory for the version of Driverless AI on the host\n machine:\n6. Change directories to the new folder, then load the Driverless AI\n Docker image inside the new directory:\n7. Enable persistence of the GPU. Note that this needs to be run once\n every reboot.", + "prompt_type": "plain" + }, + { + "output": "8. Set up the data, log, and license directories on the host machine\n (within the new directory):\n9. At this point, you can copy data into the data directory on the host\n machine. The data will be visible inside the Docker container. 10. Run docker images to find the image tag. 11. Start the Driverless AI Docker image and replace TAG below with the\n image tag. Depending on your install version, use the\n docker run --runtime=nvidia (>= Docker 19.03) or nvidia-docker (<\n Docker 19.03) command. Note that from version 1.10 DAI docker image\n runs with internal tini that is equivalent to using --init from\n docker, if both are enabled in the launch command, tini will print a\n (harmless) warning message. For GPU users, as GPU needs --pid=host\n for nvml, which makes tini not use pid=1, so it will show the\n warning message (still harmless). 12. Connect to Driverless AI with your browser at\n http://Your-Driverless-AI-Host-Machine:12345. Install on RHEL with CPUs\nThis section describes how to install and start the Driverless AI Docker\nimage on RHEL.", + "prompt_type": "plain" + }, + { + "output": "Watch the installation video here. Note that some of the images in this\nvideo may change between releases, but the installation steps remain the\nsame. Note\nAs of this writing, Driverless AI has been tested on RHEL versions 7.4,\n8.3, and 8.4. Open a Terminal and ssh to the machine that will run Driverless AI. Once\nyou are logged in, perform the following steps. 1. Install and start Docker EE on RHEL (if not already installed). Follow the instructions on\n https://docs.docker.com/engine/installation/linux/docker-ee/rhel/. 2. On the machine that is running Docker EE, retrieve the Driverless AI\n Docker image from https://www.h2o.ai/download/. 3. Set up a directory for the version of Driverless AI on the host\n machine:\n4. Load the Driverless AI Docker image inside the new directory:\n5. Set up the data, log, license, and tmp directories (within the new\n directory):\n6. Copy data into the data directory on the host. The data will be\n visible inside the Docker container at //data.", + "prompt_type": "plain" + }, + { + "output": "Run docker images to find the image tag. 8. Start the Driverless AI Docker image. Note that GPU support will not\n be available. Note that from version 1.10 DAI docker image runs with\n internal tini that is equivalent to using --init from docker, if\n both are enabled in the launch command, tini will print a (harmless)\n warning message. 9. Connect to Driverless AI with your browser at\n http://Your-Driverless-AI-Host-Machine:12345. Stopping the Docker Image\nTo stop the Driverless AI Docker image, type Ctrl + C in the Terminal\n(Mac OS X) or PowerShell (Windows 10) window that is running the\nDriverless AI Docker image. Upgrading the Docker Image\nThis section provides instructions for upgrading Driverless AI versions\nthat were installed in a Docker container. These steps ensure that\nexisting experiments are saved. WARNING: Experiments, MLIs, and MOJOs reside in the Driverless AI tmp\ndirectory and are not automatically upgraded when Driverless AI is\nupgraded. - Build MLI models before upgrading.", + "prompt_type": "plain" + }, + { + "output": "- Stop Driverless AI and make a backup of your Driverless AI tmp\n directory before upgrading. If you did not build MLI on a model before upgrading Driverless AI,\n then you will not be able to view MLI on that model after upgrading. Before upgrading, be sure to run MLI jobs on models that you want to\n continue to interpret in future releases. If that MLI job appears in\n the list of Interpreted Models in your current version, then it will\n be retained after upgrading. If you did not build a MOJO pipeline on a model before upgrading\n Driverless AI, then you will not be able to build a MOJO pipeline on\n that model after upgrading. Before upgrading, be sure to build MOJO\n pipelines on all desired models and then back up your Driverless AI\n tmp directory. Note: Stop Driverless AI if it is still running. Requirements\nWe recommend to have NVIDIA driver >= installed (GPU only) in your host\nenvironment for a seamless experience on all architectures, including\nAmpere. Driverless AI ships with CUDA 11.2.2 for GPUs, but the driver\nmust exist in the host environment.", + "prompt_type": "plain" + }, + { + "output": "Overview\nH2O Driverless AI is an artificial intelligence (AI) platform for\nautomatic machine learning. Driverless AI automates some of the most\ndifficult data science and machine learning workflows, such as feature\nengineering, model validation, model tuning, model selection, and model\ndeployment. It aims to achieve the highest predictive accuracy,\ncomparable to expert data scientists, but in a much shorter time thanks\nto end-to-end automation. Driverless AI also offers automatic\nvisualization and machine learning interpretability (MLI). Especially in\nregulated industries, model transparency and explanation are just as\nimportant as predictive performance. Modeling pipelines (feature\nengineering and models) are exported (in full fidelity, without\napproximations) both as Python modules and as Java standalone scoring\nartifacts. Apart from the standard experiment workflow for\nmodel building, DAI offers an experiment setup wizard that\nmakes it simple for you to set up a Driverless AI experiment and ensure\nthat the experiment's settings are optimally configured for your\nspecific use case.", + "prompt_type": "plain" + }, + { + "output": "Unsupervised Algorithms in Driverless AI (Experimental)\nStarting with version 1.10, Driverless AI exposes unsupervised\ntransformers that you can use for unsupervised model building. The\nfollowing sections describe several unsupervised transformers and\ncontain information on support for custom recipes and expert control of\nunsupervised experiments. 1. Isolation Forest Anomaly detection \n2. K-Means Clustering \n3. Truncated SVD (Dimensionality Reduction) \n4. Full support for custom recipes \n5. Expert control over Unsupervised Experiments \nConceptually, the overall pipeline of an unsupervised experiment is\nsimilar to the pipeline of a regular supervised experiment. However,\nthere are a few notable differences:\n1. Only one unsupervised algorithm (model, pipeline) can be chosen\n (that is, either clustering or anomaly detection, but not both). In\n other words, all individuals in the genetic algorithm are of the\n same model type, but they can have different parameters (, number of\n clusters, columns used for clustering).", + "prompt_type": "plain" + }, + { + "output": "Each such unsupervised modeling pipeline consists of exactly one\n pretransformer, one transformer and one model. No labels (y) are\n required. 3. The unsupervised model has only one function: To list the included\n pretransformer, the included transformer and any applicable scorers. The model itself is a pure pass-through function, the\n models.predict() method returns the output of the transformer\n pipeline (any features the transformers makes). This also means that\n the variable importance of the model is ill-defined, and uniformly\n spread across features. For clustering, there will be only 1 feature\n (the assigned cluster label), and it will have variable importance\n of 1.0. 4. Automatic Machine Learning is only possible if there's a metric\n (scorer) that assesses the quality of the transformation via\n score(X, actual=None, predicted=transformed_X). For example, the\n quality of the labels created by a K-Means clustering algorithm can\n be evaluated for a given dataset, given labels, and a metric.", + "prompt_type": "plain" + }, + { + "output": "This value can be ignored, and signals Driverless AI\n that the experiment is converged after the first iteration. 5. No MLI support in 1.10.0, but is planned for future releases. 6. No ensembles and cross-validation for final models for unsupervised\n experiments (fixed_ensemble_level=0 is enforced). As a consequence,\n creation of training holdout predictions is not possible (all data\n is used for the final model). If predictions like cluster\n assignments are desired for the training data, please make\n predictions on the training data, with the usual caveats of\n overfitting (due to heavy tuning during AutoML) since fit() and\n predict() are performed with the same data. Isolation Forest Anomaly detection\nIsolation forest isolates or identifies the anomalous entries by\nrandomly splitting the decision trees. The idea is that an outlier will\nlie farther away from the regular observations in the feature space and\nhence will require fewer random splits to isolate to the terminal node\nof a tree.", + "prompt_type": "plain" + }, + { + "output": "The lower the score, the more likely it is that the row is an\nanomaly. Internally, Driverless AI runs sklearn's Isolation Forest\nimplementation. When building a model, the Accuracy and Time knobs of Driverless AI can\nbe toggled to adjust the effort spent on model tuning but presently as\nthere is no scorer being used for isolation forest, when doing\ngenetic algorithm , the model will converge immediately and use one\nof the models from the tuning phase as the final model. The\nInterpretability knob is ignored in the default set up. The number of\ntrees or n_estimators for the isolation forest model can be adjusted\nwith the isolation_forest_nestimators expert setting parameter. After building the model, the scores can be obtained by predicting on\nthe same dataset. Note that if you pass a test dataset, then you can\ndownload predictions immediately without predicting on the same dataset. If you don't pass a test dataset, then you must go to Model actions >\nPredict. The lower the scores of a row, the more likely it is an outlier\nor anomaly by the model.", + "prompt_type": "plain" + }, + { + "output": "To create labels from these scores, quantile value can be used as a\nthreshold. For example, if you know that 5% of the rows are anomalous in\nyour dataset, then this can be used to calculate the 95th quantile of\nthe scores. This quantile can act as a threshold to classify each row as\nbeing an anomaly or not. The Python scoring pipeline can be used to deploy the\nIsolation Forest model to production (currently no MOJO support). Use case idea: Given an anomaly detection experiment, you can create\npredictions on the training dataset, including all original columns, and\nre-upload into Driverless AI to run a supervised experiment. For a given\nsimilar dataset (in production), you now have an unsupervised scorer\nthat tells you the anomaly score for each row, and supervised scorer\nwhich makes Shapley per-feature contribution reason codes to explain why\neach row is an anomaly or not. Note: The following are some additional details on the transformers and\npretransformers that are relevant to IF.", + "prompt_type": "plain" + }, + { + "output": "- OrigFreqPreTransformer (pretransformer): Categoricals are frequency\n encoded with this pretransformer. Note that isolation forest itself\n only accepts numericals. KMeans Clustering\nClustering algorithms partition observations into clusters. Driverless\nAI uses sklearn KMeans clustering algorithm to partition the\nobservations so that they belong to the cluster with the nearest mean\n(centroid of the cluster). Driverless AI exposes the following unsupervised models that run on\nnumeric and categorical columns to build a K-Means clustering model. You\ncan either pick a model type based on the characteristics of your\ndataset, or run all of them (one by one) to decide which one works best\nfor your dataset. - KMeans : This does K-Means clustering only on numeric columns\n - KMeansFreq : This does K-Means clustering on numeric and\n frequency transformed categorical (integer\n columns are treated only as numeric)\n - KMeansOHE : This does K-Means clustering on numeric and\n one-hot-encoding transformed categorical columns\nDriverless AI provides the following scorers to enable automatic\nunsupervised clustering:\n - CALINSKI HARABASZ : The Calinski-Harabasz index also known as the\n Variance Ratio Criterion, is the ratio of the sum of\n between-clusters dispersion and of inter-cluster dispersion for\n all clusters.", + "prompt_type": "plain" + }, + { + "output": "- DAVIES BOULDIN : The Davies-Bouldin Index signifies the average\n 'similarity' between clusters, where similarity is a measure that\n compares distance between clusters with the size of the clusters\n themselves. A lower Davies-Bouldin index relates to a model with\n better separation between the clusters. - SILHOUETTE : The Silhouette Coefficient is defined for each sample\n and is composed of two scores. The mean distance between a sample\n and all other points in the same class. This score measure the\n closeness of points in the same cluster. And the mean distance\n between a sample and all other points in the next nearest cluster. This score measure the distance of points of different clusters. A\n higher Silhouette Coefficient score relates to a model with better\n defined clusters. This scorer can be slow for larger datasets. Ref\nWhile building a clustering model, Accuracy and Time knobs can be\ntoggled to adjust the effort spent on model tuning and validation.", + "prompt_type": "plain" + }, + { + "output": "unsupervised_clustering_max_clusters`` parameters can be used in the\nexpert panel to set the upper and lower bound on the number of clusters\nto build.\n\nDuring model building, Driverless AI creates KMeans Clustering model on\na subset of features (between 2 to 5). The feature subset size, columns\nto be used for clustering and the parameter tuning is decided during the\ngenetic algorithm process. User can set the feature subset size\n(dimensionality of space to cluster) by", + "prompt_type": "plain" + }, + { + "output": "fixed_interaction_depthparameter of the expert settings. The value should lie between 2 to 5. Say,fixed_interaction_depth=4, then clustering will be performed in 4D. If say, more than 4 features are present in the dataset (or after accounting for the pre-transformations like one-hot-encoding), then when doing genetic algorithm, DAI will select input features and model parameters (based on internal train/valid split(s)) to decide the best possible subset of 4 features and their parameter set to build the model that optimizes the scores. The **scorer** takes the *full dataset* (pre transformed with all features) and *labels* for the rows as created by the (subset of features) clustering model to give the scores. It compares the output of the unsupervised transformer to its input. The **Insights** tab of the experiment gives a peek into the working of clustering transformer on the subset of features to build the best model. It lists the cluster sizes and centroids for the features in the cluster.", + "prompt_type": "plain" + }, + { + "output": "Aggregator algorithm is used to reduce the datasize for the plot. This is a preview of the custom visualization capability (using Vega) that is coming soon to DAI. After building the model, the :ref:`Visualize Scoring Pipeline option ` can be used to inspect the **pre transformations** applied to the features, before building model (on subset of features) and scoring (on full set). It can also be used to inspect the features used to build the clustering model. The cluster **labels** can be created by predicting on the dataset. To get cluster label assignments for the training (or any) dataset, then the fitted model can be used to make predictions, just like any supervised model. Note that overfitting can occur anytime when fit and predict are performed on the same dataset. The clustering model produces :ref:`MOJOs ` and :ref:`Python scoring pipelines ` to deploy to :ref:`production `. .. figure:: images/clust_pipeline.png :alt: You can also write custom clustering recipes by defining your own pretransformer (i.e what columns with what encodings are fed in for clustering), clustering transformer, and scorer.", + "prompt_type": "plain" + }, + { + "output": "(For best results, use the release branch that corresponds with your version of Driverless AI.) .. _svd: Truncated SVD (Dimensionality Reduction) ---------------------------------------- `Truncated SVD `__ is a dimensionality reduction method and can be applied to a dataset to reduce the number of features before running say a supervised algorithm. It factorizes data matrix where the number of columns is equal to the specified truncation. It is useful in use cases where *sparse* data gets generated like recommender systems or in text processing like tfidf. Internally Driverless AI runs `sklearn Truncated SVD `__ implementation. .. raw:: html \"svd\" Driverless AI exposes the TRUNCSVD transformer to reduce the number of features. Presently, none of the parameters can be toggled by the user.", + "prompt_type": "plain" + }, + { + "output": "(Note that these are considered random mutations.) After building the model, :ref:`Visualizing scoring pipeline ` can be used to inspect the number of components created. Additionally, the dimensionality reduced dataset can be obtained by predicting on the dataset. Presently as there is no scorer being used for SVD experiment, when doing :ref:`genetic algorithm `, the model will converge immediately and use one of the models from the :ref:`tuning phase ` as the final model. The Dimensionality Reduction model produces :ref:`MOJOs ` and :ref:`Python ` scoring pipelines to deploy to :ref:`production `. .. _unsup_custom_recipes: Unsupervised Custom Recipes --------------------------- Driverless AI supports **custom Python recipes for unsupervised learning**. You can write custom unsupervised recipes by defining your own pretransformer, transformer, and scorer. To view examples, see the `official Driverless AI recipes repository `__.", + "prompt_type": "plain" + }, + { + "output": ".. _unsup_expert_control: Expert control over Unsupervised Experiments -------------------------------------------- You can control unsupervised experiments by selecting specific **pretransformers** and **transformers**. Pretransformers are equivalent to the first layer of a pipeline, and transformers are equivalent to the second layer of a pipeline. To specify pretransformers and transformers, use the Expert Settings window of an experiment. For more information, see :ref:`understanding-configs`. The following steps describe how to control unsupervised experiments with the Expert Settings window. 1. On the **Experiment Setup** page, select **Unsupervised**. 2. Click **Unsupervised learning model** and select **Unsupervised** from the list of options. The preview updates to display the transformers that are used by default. 3. On the Experiment Setup page, click **Expert Settings**. The Expert Settings window is displayed. a. **To select specific pretransformers:** In the **Training -> Feature Engineering** tab, click the **Select values** button for the **Include specific preprocessing transformers** (included_pretransformers) setting.", + "prompt_type": "plain" + }, + { + "output": "b. **To select specific transformers:** In the **Training -> Feature Engineering** tab, click the **Select values** button for the **Include specific transformers** (included_transformers). To confirm your selection, click **Done**. **Note:** Selecting pretransformers isn't required. If no pretransformers are selected, then the first layer is ignored. .. figure:: images/unsupervised-expert.png :alt: 4. To confirm your overall selection and exit out of the Expert Settings window, click the **Save** button. 5. In the **Training Settings** category on the Experiment Setup page, specify the **Unsupervised** scorer. Alternatively, select a custom scorer. .. figure:: images/unsup_expert.png :alt: Expert control example 1 ~~~~~~~~~~~~~~~~~~~~~~~~ The following list contains examples of how you can use expert control to configure unsupervised experiments. - Input text through through **term frequency\u2013inverse document frequency (TFIDF)** by settingTextTransformeras a pretransformer, and then through K-Means clustering by settingClusterIdAllNumTransformeras a transformer.", + "prompt_type": "plain" + }, + { + "output": "- Setone_hot_encoding_cardinality_thresholdandone_hot_encoding_cardinality_threshold_default_useto a large value like 10,000,000 to allow all possible categorical levels to be included. Expert control example 2 ~~~~~~~~~~~~~~~~~~~~~~~~ The following example describes how you can use expert control to configure unsupervised experiments using a custom recipe for text handling. - Upload https://github.com/h2oai/driverlessai-recipes/blob/master/transformers/nlp/text_topic_modeling_transformer.py (Or choose the version for your DAI release by selecting the correct branch version.) - Upload https://github.com/h2oai/driverlessai-recipes/blob/master/models/unsupervised/TextKMeansIsolationForest.py (Or choose the version for your DAI release by selecting the correct branch version.) - Upload a dataset. On the Experiment Setup page, select **Unsupervised**, and then select KMeansFreqTextModel for the unsupervised model. You can select a variety of other models in the TextKMeansIsolationForest recipe.", + "prompt_type": "plain" + }, + { + "output": "- Upload https://github.com/h2oai/driverlessai-recipes/blob/master/transformers/nlp/text_topic_modeling_transformer.py (or choose the version for your DAI release) - Upload a dataset. On the Experiment Setup page, select **Unsupervised**, and then select **UnsupervisedModel** for the unsupervised model. - Click **Expert Settings**. The Expert Settings window is displayed. - In the **Training -> Feature Engineering** tab, select **Specific transformers to include** (TOMLincluded_transformers) and select only ClusterIdAllNumTransformer. - In the **Training -> Feature Engineering** tab, select **Specific pretransformers to include** (TOMLincluded_pretransformers) and select only TextLDATopicTransformer. - On the **Experiment Setup** page, click **Scorer** and select either UnsupervisedScorer (for one-shot model) or CalinskiHarabasz (for optimal clusters). Expert control example 4 ~~~~~~~~~~~~~~~~~~~~~~~~ In many cases, you may only want a single output from an unsupervised model.", + "prompt_type": "plain" + }, + { + "output": "UNSUPERVISEDscorer to just do single model. Another way to achieve a similar result in Driverless AI version 1.10.5 and beyond is to make the recipe match the following: .. code:: python from h2oaicore.models_custom import CustomModel # don't use CustomUnsupervisedModel from h2oaicore.models_unsupervised import UnsupervisedModel class MyUnsupervisedModel(UnsupervisedModel, CustomModel): _ngenes_max = 1 _ngenes_max_by_layer = [1000, 1] but then set expert optioncustom_unsupervised_expert_mode=true. This forces the experiment to use this custom unsupervised model as if it were likeUnsupervisedModelin terms of requiring you to go to the expert panel and select which scorers, transformers, and pretransformers to be used (like supervised experiments). However, by forcing this model to only havengenes_max=1, it ensures only a single instance of the transformer is produced. Note that in this case, onlyUnsupervisedScoreris available as an option. A slight deviation from the preceding example is to use a recipe like the following: .. code:: python from h2oaicore.models_custom import CustomModel # don't use CustomUnsupervisedModel from h2oaicore.models_unsupervised import UnsupervisedModel class MyUnsupervisedModel(UnsupervisedModel, CustomModel): _ngenes_max = 1 _ngenes_max_by_layer = [1000, 1] _included_scorers = ['UnsupervisedScorer', 'SilhouetteScorer', 'CalinskiHarabaszScorer', 'DaviesBouldinScorer'] and set expert optioncustom_unsupervised_expert_mode=true, which behaves like the prior example, but lets you select other scorers and still give single feature from the model.", + "prompt_type": "plain" + }, + { + "output": "Using License Manager (beta)\nThe following sections describe how to use License Manager with\nDriverless AI. Presently it is in beta state and is optional. Please\ncontact support@h2o.ai to get License manager artifacts. - understanding-lm\n- configure-lm\nUnderstanding License Manager\nLicense Manager is a software that is used to assist in the monitoring\nof license usage for H2O.ai products. It allows for the application of a\nsingle global license that can optionally implement specific\nrestrictions (for example, a restriction on the maximum number of\nconcurrent Driverless AI users can be specified). The license is applied\nto the License Management server, not to individual products. Configuring Driverless AI to Use License Manager\nAlthough Driverless AI can technically be started without the license\nmanager server running, you would not be able to log in and use the\nsoftware if Driverless AI is unable to communicate with a running\nlicense management server. Therefore, it is recommended that the License\nManager server be started before starting any Driverless AI instances.", + "prompt_type": "plain" + }, + { + "output": "Obtain a license manager install artifact from H2O.ai. Choose from\n the following:\n - DEB\n - RPM\n - Docker\n - Linux binary\n2. Install the artifact:\n - DEB - dpkg -i /path/to/lms.deb\n - RPM - rpm -ivh /path/to/lms.rpm\n - Docker - docker load < /path/to/lms.tar.gz\n - Linux binary - No install necessary. Only a Linux-based\n machine is required\n3. Start the License Manager server. This process may vary depending on\n the install type. systemd-based artifacts may require some changes\n to startup scripts if custom startup is needed. Custom startup can\n be performed with the application.properties file or environment\n variables. By default, the license manager UI is available at\n http://license-manager-ip-address:9999. License Manager Server Setup\n1. To acquire a license, contact support@h2o.ai. 2. Create a new project or use the default project with a\n useful/explicit name. 3. Enable the new project.", + "prompt_type": "plain" + }, + { + "output": "Navigate to the Licenses panel in License Manager UI and load the\n license to the License Manager server. Links to the Licenses panel\n are located in the left-hand side bar of the interface. []\nStarting Driverless AI with License Manager\nTo configure Driverless AI to use License Manager on startup, use the\nconfig.toml file. The following TOML options can also be\nset with environment variables. Note: The Driverless AI instance must have the ability to communicate\nwith the License Manager server over a network. Sample config.toml :\n # License Management\n enable_license_manager = true\n license_manager_address = \"http://127.0.0.1:9999\"\n license_manager_project_name = \"license-manager-test\"\n license_manager_lease_duration = 3600000\n license_manager_ssl_certs = \"/home/npng\"\n license_manager_worker_startup_timeout = 60000\nThe following are descriptions of the relevant settings:\n- enable_license_manager - In order for Driverless AI to use the\n license manager, this must be set to true\n- license_manager_address - The IP address and port of the license\n manager so that Driverless AI knows where to access the license\n manager\n- license_manager_project_name - Name of the newly created project\n with license loaded to it from above\n- license_manager_lease_duration (Optional) - How long (in\n milliseconds) the lease issued by the license manager remains active\n before requiring a renewal.", + "prompt_type": "plain" + }, + { + "output": "S3 Setup\n\nDriverless AI lets you explore S3 data sources from within the\nDriverless AI application. This section provides instructions for\nconfiguring Driverless AI to work with S3.\n\nNote: Depending on your Docker install version, use either the", + "prompt_type": "plain" + }, + { + "output": "docker run\n--runtime=nvidia(>= Docker 19.03) ornvidia-docker(< Docker 19.03) command when starting the Driverless AI Docker image. Usedocker\nversionto check which version of Docker you are using. Description of Configuration Attributes --------------------------------------- -aws_access_key_id: The S3 access key ID -aws_secret_access_key: The S3 access key -aws_role_arn: The Amazon Resource Name -aws_default_region: The region to use when the aws_s3_endpoint_url option is not set. This is ignored when aws_s3_endpoint_url is set. -aws_s3_endpoint_url: The endpoint URL that will be used to access S3. -aws_use_ec2_role_credentials: If set to true, the S3 Connector will try to to obtain credentials associated with the role attached to the EC2 instance. -s3_init_path: The starting S3 path that will be displayed in UI S3 browser. -enabled_file_systems: The file systems you want to enable. This must be configured in order for data connectors to function properly. Example 1: Enable S3 with No Authentication ------------------------------------------- .. container:: tabs .. group-tab:: Docker Image Installs This example enables the S3 data connector and disables authentication.", + "prompt_type": "plain" + }, + { + "output": "This allows users to reference data stored in S3 directly using the name node address, for example: s3://name.node/datasets/iris.csv. .. code:: bash nvidia-docker run \\ --shm-size=256m \\ --add-host name.node:172.16.2.186 \\ -e DRIVERLESS_AI_ENABLED_FILE_SYSTEMS=\"file,s3\" \\ -p 12345:12345 \\ --init -it --rm \\ -v /tmp/dtmp/:/tmp \\ -v /tmp/dlog/:/log \\ -v /tmp/dlicense/:/license \\ -v /tmp/ddata/:/data \\ -u $(id -u):$(id -g) \\ h2oai/dai-ubi8-x86_64:|tag| .. container:: group-tab Docker Image with the config.toml This example shows how to configure S3 options in the config.toml file, and then specify that file when starting Driverless AI in Docker. Note that this example enables S3 with no authentication. 1. Configure the Driverless AI config.toml file. Set the following configuration options. .. -enabled_file_systems\n= \"file, upload,\ns3\"2. Mount the config.toml file into the Docker container.", + "prompt_type": "plain" + }, + { + "output": "It does not pass any S3 access key or secret. 1. Export the Driverless AI config.toml file or add it to ~/.bashrc. For example: .. :: # DEB and RPM export DRIVERLESS_AI_CONFIG_FILE=\"/etc/dai/config.toml\" # TAR SH export DRIVERLESS_AI_CONFIG_FILE=\"/path/to/your/unpacked/dai/directory/config.toml\" 2. Specify the following configuration options in the config.toml file. .. :: # File System Support # upload : standard upload feature # file : local file system/server file system # hdfs : Hadoop file system, remember to configure the HDFS config folder path and keytab below # dtap : Blue Data Tap file system, remember to configure the DTap section below # s3 : Amazon S3, optionally configure secret and access key below # gcs : Google Cloud Storage, remember to configure gcs_path_to_service_account_json below # gbq : Google Big Query, remember to configure gcs_path_to_service_account_json below # minio : Minio Cloud Storage, remember to configure secret and access key below # snow : Snowflake Data Warehouse, remember to configure Snowflake credentials below (account name, username, password) # kdb : KDB+ Time Series Database, remember to configure KDB credentials below (hostname and port, optionally: username, password, classpath, and jvm_args) # azrbs : Azure Blob Storage, remember to configure Azure credentials below (account name, account key) # jdbc: JDBC Connector, remember to configure JDBC below.", + "prompt_type": "plain" + }, + { + "output": "(hive_app_configs) # recipe_url: load custom recipe from URL # recipe_file: load custom recipe from local file system enabled_file_systems = \"file, s3\" 3. Save the changes when you are done, then stop/restart Driverless AI. Example 2: Enable S3 with Authentication ---------------------------------------- .. container:: tabs .. group-tab:: Docker Image Installs This example enables the S3 data connector with authentication by passing an S3 access key ID and an access key. It also configures Docker DNS by passing the name and IP of the S3 name node. This allows users to reference data stored in S3 directly using the name node address, for example: s3://name.node/datasets/iris.csv. .. code:: bash nvidia-docker run \\ --shm-size=256m \\ --add-host name.node:172.16.2.186 \\ -e DRIVERLESS_AI_ENABLED_FILE_SYSTEMS=\"file,s3\" \\ -e DRIVERLESS_AI_AWS_ACCESS_KEY_ID=\"\" \\ -e DRIVERLESS_AI_AWS_SECRET_ACCESS_KEY=\"\" \\ -p 12345:12345 \\ --init -it --rm \\ -v /tmp/dtmp/:/tmp \\ -v /tmp/dlog/:/log \\ -v /tmp/dlicense/:/license \\ -v /tmp/ddata/:/data \\ -u $(id -u):$(id -g) \\ h2oai/dai-ubi8-x86_64:|tag| .. container:: group-tab Docker Image with the config.toml This example shows how to configure S3 options with authentication in the config.toml file, and then specify that file when starting Driverless AI in Docker.", + "prompt_type": "plain" + }, + { + "output": "Upgrading the Driverless AI Community Image\nWARNINGS:\n- This release deprecates experiments and MLI models from 1.7.0 and\n earlier. - Experiments, MLIs, and MOJOs reside in the Driverless AI tmp\n directory and are not automatically upgraded when Driverless AI is\n upgraded. We recommend you take the following steps before\n upgrading. - Build MLI models before upgrading. - Build MOJO pipelines before upgrading. - Stop Driverless AI and make a backup of your Driverless AI tmp\n directory before upgrading. The upgrade process inherits the service user and group from\n/etc/dai/User.conf and /etc/dai/Group.conf. You do not need to manually\nspecify the DAI_USER or DAI_GROUP environment variables during an\nupgrade. Upgrading from Version 1.2.2 or Earlier\nThe following example shows how to upgrade from 1.2.2 or earlier to the\ncurrent version. Upgrading from these earlier versions requires an edit\nto the start and h2oai scripts. 1. SSH into the IP address of the image instance and copy the existing\n experiments to a backup location:\n2. wget the newer image.", + "prompt_type": "plain" + }, + { + "output": "Understanding the Model Interpretation Page\nThis document describes the various interpretations available from the\nMachine Learning Interpretability (MLI) explanations page for\nnon-time-series experiments. The explanations page is organized into four tabs:\n - Summary Tab \n - Interpretations Using Driverless AI Model - DAI Model Tab \n - Interpretations Using Surrogate Model - Surrogate Model Tab \n - Interpretations Using NLP Dataset - NLP Tab (Only\n visible for NLP problems)\nThe mli-dashboard button reveals a dashboard with an overview of the\ninterpretations built using surrogate models. The\nActions button on the MLI page can be used to download\nreason codes, scoring pipelines for productionization, and MLI logs. The task bar lists the status and logs of MLI\nexplainers . Summary Tab\nThe Summary tab provides an overview of the interpretation, including\nthe dataset and Driverless AI experiment name (if available) that were\nused for the interpretation along with the feature space (original or\ntransformed), target column, problem type, and k-Lime information.", + "prompt_type": "plain" + }, + { + "output": "[]\nInterpretations Using Driverless AI Model (DAI Model Tab)\nThe DAI Model tab is organized into tiles for each interpretation\nmethod. To view a specific plot, click the tile for the plot that you\nwant to view. For binary classification and regression experiments, this tab includes\nFeature Importance and Shapley (not supported for RuleFit and TensorFlow\nmodels) plots for original and transformed features as well as Partial\nDependence/ICE, Disparate Impact Analysis (DIA), Sensitivity Analysis,\nNLP Tokens and NLP LOCO (for text experiments), and Permutation Feature\nImportance (if the autodoc_include_permutation_feature_importance\nconfiguration option is enabled) plots. For multiclass classification\nexperiments, this tab includes Feature Importance and Shapley plots for\noriginal and transformed features. The following is a list of the interpretation plots available from the\nDriverless AI Model tab:\n - Feature Importance (Original and Transformed Features) \n - Shapley (Original and Transformed Features) \n - Shapley Summary Plot (Original Features) \n - Partial Dependence (PDP) and Individual Conditional Expectation (ICE) \n - Disparate Impact Analysis \n - Time Series Explainer \n - Sensitivity Analysis \n - NLP LOCO \n - Permutation Feature Importance \n[]\nNotes:\n - Shapley plots are not supported for RuleFit, FTRL, and TensorFlow\n models.", + "prompt_type": "plain" + }, + { + "output": "To enable the\n calculations using Kernel Explainer method, enable Original Kernel\n SHAP explainer in recipes . - Shapley plots are only supported for those BYOR (custom) models\n that implement the has_pred_contribs method (and return True) and\n implement proper handling of the argument pred_contribs=True in\n the predict method. - The Permutation-based feature importance plot is only available\n when the autodoc_include_permutation_feature_importance\n configuration option is enabled when starting Driverless AI or\n when starting the MLI experiment (enable AutoDoc from the recipe\n tab and include_permutation_feature_importance from MLI AutoDoc\n expert settings when launching the MLI job). - On the Feature Importance and Shapley plots, the transformed\n feature names are encoded as follows:\n _::<...>:.\n So in 32_NumToCatTE:BILL_AMT1:EDUCATION:MARRIAGE:SEX.0, for\n example:\n - 32_ is the transformation index for specific transformation\n parameters.", + "prompt_type": "plain" + }, + { + "output": "- BILL_AMT1:EDUCATION:MARRIAGE:SEX represent original features\n used. - 0 represents the likelihood encoding for target[0] after\n grouping by features (shown here as BILL_AMT1, EDUCATION,\n MARRIAGE and SEX) and making out-of-fold estimates. For\n multiclass experiments, this value is > 0. For binary\n experiments, this value is always 0. Interpretations Using Surrogate Model (Surrogate Model Tab)\nA surrogate model is a data mining and engineering technique in which a\ngenerally simpler model is used to explain another, usually more\ncomplex, model or phenomenon. For example, the decision tree surrogate\nmodel is trained to predict the predictions of the more complex\nDriverless AI model using the original model inputs. The trained\nsurrogate model enables a heuristic understanding (i.e., not a\nmathematically precise understanding) of the mechanisms of the highly\ncomplex and nonlinear Driverless AI model. The Surrogate Model tab is organized into tiles for each interpretation\nmethod.", + "prompt_type": "plain" + }, + { + "output": "For binary classification and regression experiments, this\ntab includes K-LIME/LIME-SUP and Decision Tree plots as well as Feature\nImportance, Partial Dependence, and LOCO plots for the Random Forest\nsurrogate model. For more information on these plots, see\nsurrogate-model-plots. The following is a list of the interpretation plots from Surrogate\nModels:\n - K-LIME and LIME-SUP \n - Random Forest Feature Importance \n - Random Forest Partial Dependence and Individual Conditional Expectation \n - Random Forest LOCO \n - Decision Tree \n - NLP Surrogate \n[]\nNote: For multiclass classification experiments, only the Decision Tree\nand Random Forest Feature Importance plots are available in this tab. Interpretations Using NLP Dataset (NLP Tab)\nThe NLP tab is only visible for natural language processing (NLP)\nproblems and is organized into tiles for each interpretation method. To\nview a specific plot, click the tile for the plot that you want to view\nThe following is a list of the interpretation plots available from the\nNLP tab:\n - dai-nlp-loco\n - mli-nlp-pdp\n - mli-nlp-tokens\n - mli-nlp-vlm\n[]\nSurrogate Models Dashboard\nTo view a dashboard with an overview of the interpretations built using\nsurrogate models, click the Surrogate Models Dashboard button.", + "prompt_type": "plain" + }, + { + "output": "[]\nFor binary classification and regression experiments, the Surrogate\nModels Dashboard page provides a single page with the following\nsurrogate plots. Note that the PDP and Feature Importance plots on this\npage are based on the Random Forest surrogate model. - Global Interpretable Model Explanations\n - Feature Importance\n - Decision Tree\n - Partial Dependence\nYou can also view explanations from this page by clicking the\nExplanations button located in the upper-right corner. Refer to the\nmli-explanations section for more information. Note: The Surrogate Models Dashboard is only available for binary\nclassification and regression experiments. []\nActions Button\nThe Actions button can be used to download reason codes, scoring\npipelines for productionization, and logs. Click this button to view the\nfollowing options:\n - MLI Docs: View the Machine Learning Interpretability section of\n the Driverless AI documentation. - Display MLI Java Logs: View MLI Java logs for the interpretation.", + "prompt_type": "plain" + }, + { + "output": "- Experiment: View the experiment that was used to generate the\n interpretation. - Download MLI Logs: Download a ZIP file of the logs that were\n generated during the interpretation. - Python Scoring Pipeline: For binomial and regression experiments,\n download the Python scoring pipeline for the interpretation. This\n option is not available for multiclass experiments. - Download k-LIME MOJO Reason Code Pipeline: Download the k-LIME\n MOJO Reason Code Pipeline. For more info, see klime-mojo. - Download Formatted Transformed Shapley Reason Codes: For\n regression, binary, and multiclass experiments, download a CSV\n file of formatted Shapley reason codes on transformed data. - Download Formatted LIME Reason Codes: For binomial experiments,\n download a CSV file of formatted LIME reason codes. - Download LIME Reason Codes: For binomial experiments, download a\n CSV file of LIME reason codes. - Download Formatted Original Shapley Reason Codes (Naive Shapley):\n For regression, binary, and multiclass experiments, download a CSV\n file of formatted Shapley reason codes for original data.", + "prompt_type": "plain" + }, + { + "output": "Feature Importance (Original and Transformed Features)\nThis plot is available for all models for binary classification,\nmulticlass classification, and regression experiments. This plot shows the Driverless AI feature importance. Driverless AI\nfeature importance is a measure of the contribution of an input variable\nto the overall predictions of the Driverless AI model. []\nShapley (Original and Transformed Features)\nThis plot is not available for RuleFit or TensorFlow models. For all\nother models, this plot is available for binary classification,\nmulticlass classification, and regression experiments. Shapley explanations are a technique with credible theoretical support\nthat presents consistent global and local variable contributions. Local\nnumeric Shapley values are calculated by tracing single rows of data\nthrough a trained tree ensemble and aggregating the contribution of each\ninput variable as the row of data moves through the trained ensemble. For regression tasks, Shapley values sum to the prediction of the\nDriverless AI model.", + "prompt_type": "plain" + }, + { + "output": "incomewill be 2.5 each. For ensembles, Shapley values (in the link space) are blended as per the model weights in the ensemble. Driverless AI :ref:`MOJO ` for productionization supports Naive Shapley (even split) approach for original features. Shapley values for original features can also be calculated with the **Kernel Explainer** method, which uses a special weighted linear regression to compute the importance of each feature. This can be enabled by using the :ref:`recipe ` Original Kernel SHAP explainer. More information about Kernel SHAP is available at http://papers.nips.cc/paper/7062-a-unified-approach-to-interpreting-model-predictions.pdf. .. figure:: images/shapley_original_features.png :alt: *Naive Shapley Original Feature Importance* *Naive Shapley Original Feature Importance* .. figure:: images/shapley_transformed.png :alt: *Transformed Shapley* *Transformed Shapley* The **Showing** :math:`n` **Features** dropdown for Feature Importance and Shapley plots lets you select between original and transformed features.", + "prompt_type": "plain" + }, + { + "output": "**Note**: The provided original values are approximations derived from the accompanying transformed values. For example, if the transformed feature :math:`feature1\\_feature2` has a value of 0.5, then the value of the original features (:math:`feature1` and :math:`feature2`) will be 0.25. .. _dai-shapley-summary: Shapley Summary Plot (Original Features) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The Shapley Summary Plot shows original features versus their local Shapley values on a sample of the dataset. Feature values are binned by Shapley values, and the average normalized feature value for each bin is plotted. To see the Shapley value, number of rows, and average normalized feature value for a particular feature bin, hold the pointer over the bin. The legend corresponds to numeric features and maps to their normalized value. Yellow is the lowest value, and deep orange is the highest. You can click on numeric features to see a scatter plot of the actual feature values versus their corresponding Shapley values.", + "prompt_type": "plain" + }, + { + "output": ".. raw:: html \"Shapley **Notes**: - The Shapley Summary Plot only shows original features that are used in the Driverless AI model. - The dataset sample size and the number of bins can be updated in the Interpretation Expert Settings. - For a list of Shapley Summary Plot explainer expert settings, see :ref:`interpretation-expert-settings-shapley`. .. _pdp-ice: Partial Dependence (PDP) and Individual Conditional Expectation (ICE) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ A Partial Dependence and ICE plot is available for both Driverless AI and surrogate models. The Partial Dependence Technique ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Partial dependence is a measure of the average model prediction with respect to an input variable. Partial dependence plots display how machine-learned response functions change based on the values of an input variable of interest while taking nonlinearity into consideration and averaging out the effects of all other input variables.", + "prompt_type": "plain" + }, + { + "output": "Partial dependence plots enable increased transparency in Driverless AI models and the ability to validate and debug Driverless AI models by comparing a variable's average predictions across its domain to known standards, domain knowledge, and reasonable expectations. The ICE Technique ^^^^^^^^^^^^^^^^^ This plot is available for binary classification and regression models. A newer adaptation of partial dependence plots called Individual conditional expectation (ICE) plots can be used to create more localized explanations for a single individual by using the same basic ideas as partial dependence plots. ICE Plots were described by Goldstein et al (2015). ICE values are disaggregated partial dependence, but ICE is also a type of nonlinear sensitivity analysis in which the model predictions for a single row are measured while a variable of interest is varied over its domain. ICE plots enable a user to determine whether the model's treatment of an individual row of data is outside one standard deviation from the average model behavior, whether the treatment of a specific row is valid in comparison to average model behavior, known standards, domain knowledge, and reasonable expectations, and how a model will behave in hypothetical situations where one variable in a selected row is varied across its domain.", + "prompt_type": "plain" + }, + { + "output": "Large differences in partial dependence and ICE are an indication that strong variable interactions may be present. In this case partial dependence plots may be misleading because average model behavior may not accurately reflect local behavior. .. _partial-dependence-plot: Partial Dependence Plot (PDP) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ This plot is available for binary classification and regression models. Overlaying ICE plots onto partial dependence plots allow the comparison of the Driverless AI model's treatment of certain examples or individuals to the model's average predictions over the domain of an input variable of interest. This plot shows the partial dependence when a variable is selected and the ICE values when a specific row is selected. Users may select a point on the graph to see the specific value at that point. You can also focus the PDP plot on a specific subset of data by using the slider in the middle of the screen. Partial dependence (yellow) portrays the average prediction behavior of the Driverless AI model across the domain of an input variable along with +/- 1 standard deviation bands.", + "prompt_type": "plain" + }, + { + "output": "Currently, partial dependence and ICE plots are only available for the top ten most important original input variables. Categorical variables with 20 or more unique values are never included in these plots. .. figure:: images/mli-pdp.png :alt: **Notes**: - To use dynamic switching between PDP numeric and categorical binning and UI chart selection in cases where features were used both as numeric and categorical by the experiment, enable themli_pd_numcat_num_chart:ref:`config.toml ` setting. (This setting is enabled by default.) When this setting is enabled, you can specify the threshold for PDP binning and chart selection with themli_pd_numcat_thresholdsetting, which defaults to 11. - The number of out of range / unseen PD or ICE bins can be specified through the PDP explainer :ref:`oor_grid_resolution` expert setting: .. .. raw:: html \"PDP - For a list of PDP explainer expert settings, see :ref:`interpretation-expert-settings-pdp`.", + "prompt_type": "plain" + }, + { + "output": "With this method, PD/ICE is calculated by an ad hoc explainer, then run and merged to the original DAI PD/ICE representation. To use the PD on-demand option, click the interpretation you want to use, then click **DAI Partial Dependence Plot** from the **DAI Model** tab. On the PD plot page, click the **Add Feature** button and select the feature(s) you want to calculate PD for. Click **Done** to confirm your selection. A notification appears at the bottom of the screen once Driverless AI has finished the on-demand computation. To view the computed PD values for a particular feature, click **Feature** on the PD plot page, then select the feature you want to view PD values for. .. raw:: html \"PDP .. _dai-dia: Disparate Impact Analysis (DIA) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ This plot is available for binary classification and regression models. DIA is a technique that is used to evaluate fairness.", + "prompt_type": "plain" + }, + { + "output": "DIA typically works by comparing aggregate measurements of unprivileged groups to a privileged group. For instance, the proportion of the unprivileged group that receives the potentially harmful outcome is divided by the proportion of the privileged group that receives the same outcome\u2014the resulting proportion is then used to determine whether the model is biased. Refer to the **Summary** section to determine if a categorical level (for example, Fairness Female) is fair in comparison to the specified reference level and user-defined thresholds. **Fairness All** is a true or false value that is only true if every category is fair in comparison to the reference level. Disparate impact testing is best suited for use with constrained models in Driverless AI, such as linear models, monotonic GBMs, or RuleFit. The average group metrics reported in most cases by DIA may miss cases of local discrimination, especially with complex, unconstrained models that can treat individuals very differently based on small changes in their data attributes.", + "prompt_type": "plain" + }, + { + "output": "Several tables are provided as part of the analysis: - **Group metrics**: The aggregated metrics calculated per group. For example, true positive rates per group. - **Group disparity**: This is calculated by dividing themetric_for_groupby thereference_group_metric. Disparity is observed if this value falls outside of the user-defined thresholds. - **Group parity**: This builds on Group disparity by converting the above calculation to a true or false value by applying the user-defined thresholds to the disparity values. In accordance with the established four-fifths rule, user-defined thresholds are set to 0.8 and 1.25 by default. These thresholds will generally detect if the model is (on average) treating the non-reference group 20% more or less favorably than the reference group. Users are encouraged to set the user-defined thresholds to align with their organization's guidance on fairness thresholds. Run DIA on external datasets ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ You can run DIA on a dataset that has predictions from an external source instead of getting predictions within Driverless AI.", + "prompt_type": "plain" + }, + { + "output": "1. In the main navigation, click **MLI**. The **Interpreted Models** page is displayed. 2. Click the **New Interpretation** button, and then click **New Interpretation** from the list of available options. 3. In the **Interpretation Settings** section, click **Select dataset**, and then specify a dataset that has predictions from an external source. 4. In the **Interpretation Settings** section, click **Recipes**. Click the **Uncheck all** button, and then select only **Disparate Impact Analysis**. To confirm your selection, click **Done**. .. figure:: images/dia-external-select-recipe.png :alt: 5. In the **Interpretation Target** section, click **Select target column**, and then specify the target column. 6. In the **Interpretation Target** section, click **Select prediction column**, and then specify the prediction column. 7. Click the **Launch MLI** button. .. figure:: images/dia-external-launch.png :alt: Metrics - Binary Classification ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ The following are formulas for error metrics and parity checks utilized by binary DIA.", + "prompt_type": "plain" + }, + { + "output": "- **ME** is the difference between the percent of the control group members receiving a favorable outcome and the percent of the protected class members receiving a favorable outcome: .. math:: \\text{ME} \\equiv 100 \\cdot (\\text{PR} (\\hat{y} = 1 \\vert X_c = 1) - \\text{Pr}(\\hat{y} = 1 \\vert X_p = 1)) .. Where: - :math:`\\hat{y}` is the model decisions. - :math:`X_c` and :math:`X_p` are binary markers created from some demographic attribute. - :math:`c` is the control group. - :math:`p` is the protected group. - :math:`Pr(\\cdot)` is the operator for conditional probability. - **AIR** is equal to the ratio of the proportion of the protected class that receives a favorable outcome and the proportion of the control class that receives a favorable outcome: .. math:: \\text{AIR} \\equiv \\frac{Pr(\\hat{y} \\; = 1 \\vert X_p = 1)}{Pr(\\hat{y} \\; = 1 \\vert X_c = 1)} .. Where: - :math:`\\hat{y}` is the model decisions. - :math:`X_p` and :math:`X_c` are binary markers created from some demographic attribute.", + "prompt_type": "plain" + }, + { + "output": "- :math:`p` is the protected group. - :math:`Pr(\u00b7)` is the operator for conditional probability. - **SMD** is used to assess disparities in continuous features such as income differences in employment analyses or interest rate differences in lending: .. math:: \\text{SMD} \\equiv \\frac{\\bar{\\hat y_p} - \\bar{\\hat y_c}}{\\sigma_{\\hat y}} .. Where: - :math:`\\bar{\\hat y_p}` is the difference in the average protected class outcome. - :math:`\\bar{\\hat y_c}` is the control class outcome. - :math:`\\sigma_{\\hat y}` is a measure of the standard deviation of the population. .. note:: - For more information on how DIA is implemented in Driverless AI, see https://www.frontiersin.org/articles/10.3389/frai.2021.695301/full. - Although the process of DIA is the same for both classification and regression experiments, the returned information is dependent on the type of experiment being interpreted. An analysis of a regression experiment returns an actual vs. predicted plot, while an analysis of a binary classification experiment returns confusion matrices.", + "prompt_type": "plain" + }, + { + "output": "In addition to its established use as a fairness tool, users may want to consider disparate impact for broader model debugging purposes. For example, users can analyze the supplied confusion matrices and group metrics for important, non-demographic features in the Driverless AI model. - For a list of DIA Summary Plot explainer expert settings, see :ref:`interpretation-expert-settings-dia`. - The mean prediction disparity is the average prediction for the group being considered divided by the average prediction for the reference group. - For more information on group disparity and parity, refer to https://h2oai.github.io/tutorials/disparate-impact-analysis/#5. .. figure:: images/disparate_impact_analysis.png :alt: *Classification Experiment* *Classification Experiment* .. figure:: images/dia_regression.png :alt: *Regression Experiment* *Regression Experiment* .. _dai-time-series: Time Series Explainer ~~~~~~~~~~~~~~~~~~~~~ For time series experiments, the following graphs are provided: - **Metric graph:** View a time series graph that uses the metric that your DAI experiment was optimized for.", + "prompt_type": "plain" + }, + { + "output": "Note that you can use the accompanying slider to view a specific range of dates. .. raw:: html \"Using - **Actual vs. Predicted:** View a graph that contrasts actual and predicted values. Note that this graph also features an accompanying slider that you can use to view a specific range of dates. In addition to the preceding graphs, the following additional information is provided: - **Group metrics:** Grouped metrics are based on an aggregation by group. For example, aggregate by store and department and get counts per group. You can also get the metric of interest, for example aggregate RMSE, etc. You can download all or specific group metrics by clicking the download button. - **Shapley values:** Based on the selected date, Shapley values for each feature are provided in this section. To view Value + Bias for each feature and definitions of the transformed feature, click the **Details** button.", + "prompt_type": "plain" + }, + { + "output": "Note that you can select a specific group and / or date by clicking **Group** or **Date**. .. figure:: images/interpret-time-series.png :alt: .. _dai-sa: Sensitivity Analysis (SA) ~~~~~~~~~~~~~~~~~~~~~~~~~ Overview ^^^^^^^^ **Note**: Sensitivity Analysis (SA) is only available for binary classification and regression experiments. Sensitivity Analysis (or \"What if?\") is a simple and powerful model debugging, explanation, fairness, and security tool. The idea behind SA is both direct and simple: Score your trained model on a single row, on multiple rows, or on an entire dataset of potentially interesting simulated values and compare the model\u2019s new outcome to the predicted outcome on the original data. Beyond traditional assessment practices, sensitivity analysis of machine learning model predictions is perhaps the most important validation technique for machine learning models. Sensitivity analysis investigates whether model behavior and outputs remain stable when data is intentionally perturbed or other changes are simulated in the data.", + "prompt_type": "plain" + }, + { + "output": "For example, when looking at predictions that determine financial decisions, SA can be used to help you understand the impact of changing the most important input variables and the impact of changing socially sensitive variables (such as Sex, Age, Race, etc.) in the model. If the model changes in reasonable and expected ways when important variable values are changed, this can enhance trust in the model. Similarly, if the model changes to sensitive variables have minimal impact on the model, then this is an indication of fairness in the model predictions. This page utilizes the `What If Tool `__ for displaying the SA information. The top portion of this page includes: - A summary of the experiment - Predictions for a specified column. Change the column on the Y axis to view predictions for that column. - The current working score set. This updates each time you rescore. The bottom portion of this page includes: - A filter tool for filtering the analysis.", + "prompt_type": "plain" + }, + { + "output": "Set the filter type (<,>, etc.). Choose to filter by False Positive, False Negative, True Positive, or True Negative. - Scoring chart. Click the **Rescore** button after applying a filter to update the scoring chart. This chart also lets you add or remove variables, toggle the main chart aggregation, reset the data, and delete the global history while resetting the data. - The current history of actions taken on this page. You can delete individual actions by selecting the action and then clicking the Delete button that appears. .. figure:: images/sensitivity_analysis.png :alt: Column actions ^^^^^^^^^^^^^^ When clicking a column in SA, the following actions are available: - **Absolute:** Change a column to a specific value for all rows. For example, you can set a column to have the value 5 for all observations. This is also possible for categorical columns. For example, you can set a categorical column to have the value \"foobar\" for all observations.", + "prompt_type": "plain" + }, + { + "output": "For example, you can add 9 to all observations in a numerical column. You can also pass in a negative number, for example, -9. The input must be numeric. - **Percentage:** Change a numeric column by some percentage. For example, passing 9 to this field changes all values to be 9% of its original value. For example, if the value is 2 and you pass in 9 as the percentage, then the value changes to be 0.18. The input must be an integer. - **Set:** Run the selected action with the valid value in the textbox. - **Randomize:** Randomly change the values in a column, irrespective of what is in the textbox. The change itself is absolute and based on the domain of the column. .. figure:: images/sa-column-actions.png :alt: Understand residuals ^^^^^^^^^^^^^^^^^^^^ Residuals are differences between observed and predicted values. In Sensitivity Analysis, the method used to calculate residuals varies depending on the type of problem. For classification problems, logloss residuals are calculated for the class of interest.", + "prompt_type": "plain" + }, + { + "output": "Use cases ^^^^^^^^^ **Use Case 1: Using SA on a Single Row or on a Small Group of Rows** This section describes scenarios for using SA for explanation, debugging, security, or fairness when scoring a trained model on a single row or on a small group of rows. - **Explanation**: Change values for a variable, and then rescore the model. View the difference between the original prediction and the new model prediction. If the change is big, then the changed variable is locally important. - **Debugging**: Change values for a variable, and then rescore the model. View the difference between the original prediction and the new model prediction and determine whether the change to variable made the model more or less accurate. - **Security**: Change values for a variable, and then rescore the model. View the difference between the original prediction and the new model prediction. If the change is big, then the user can, for example, inform their IT department that this variable can be used in an adversarial attack or inform the model makers that this variable should be more regularized.", + "prompt_type": "plain" + }, + { + "output": "View the difference between the original prediction and the new model prediction. If change is big, then the user can consider using a different model, regularizing the model more, or applying post-hoc bias remediation techniques. - **Random**: Set variables to random values, and then rescore the model. This can help you look for things the you might not have thought of. **Use Case 2: Using SA on an Entire Dataset and Trained Model** This section describes scenarios for using SA for explanation, debugging, security, or fairness when scoring a trained model for an entire dataset and trained predictive model. - **Financial Stress Testing**: Assume the user wants to see how their loan default rates will change (according to their trained probability of default model) when they change an entire dataset to simulate that all their customers are under more financial stress (such as lower FICO scores, lower savings balances, higher unemployment, etc). Change the values of the variables in their entire dataset, and look at the **Percentage Change** in the average model score (default probability) on the original and new data.", + "prompt_type": "plain" + }, + { + "output": "- **Random**: Set variables to random values, and then rescore the model. This lets users look for things they may not have otherwise considered. Additional Resources ^^^^^^^^^^^^^^^^^^^^ `Sensitivity Analysis on a Driverless AI Model `__: This ipynb uses the `UCI credit card default data `__ to perform sensitivity analysis and test model performance. .. _dai-permutation-feature-importance: Permutation Feature Importance ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. note:: - This plot is only available for binary classification and regression experiments. - When permutation importance is enabled for interpretations, it is run as part of the interpretation process, regardless of whether it was run for the original experiment or AutoDoc. Permutation-based feature importance shows how much a model's performance would change if a feature's values were permuted.", + "prompt_type": "plain" + }, + { + "output": "If a feature is highly predictive, however, shuffling its values should decrease the model's performance. The difference between the model's performance before and after permuting the feature provides the feature's absolute permutation importance. .. figure:: images/permutation_feature_importance.png :alt: Surrogate Model Plots --------------------- This section describes the plots that are available in the Surrogate Model Tab. .. _klime-limesup: K-LIME and LIME-SUP ~~~~~~~~~~~~~~~~~~~ The MLI screen includes a :ref:`K-LIME ` (K local interpretable model-agnostic explanations) or :ref:`LIME-SUP ` (Locally Interpretable Models and Effects based on Supervised Partitioning) graph. A K-LIME graph is available by default when you interpret a model from the experiment page. When you create a new interpretation, you can instead choose to use LIME-SUP as the LIME method. Note that these graphs are essentially the same, but the K-LIME/LIME-SUP distinction provides insight into the LIME method that was used during model interpretation.", + "prompt_type": "plain" + }, + { + "output": "**Summary** K-LIME creates one global surrogate GLM on the entire training data and also creates numerous local surrogate GLMs on samples formed from *k*-means clusters in the training data. The parameters of the global K-LIME model give an indication of overall linear feature importance and the overall average direction in which an input variable influences the Driverless AI model predictions. The in-cluster linear model parameters can be used to profile the local region, to give an average description of the important variables in the local region, and to understand the average direction in which an input variable affects the Driverless AI model predictions. **Additional details** K-LIME is a variant of the LIME technique proposed by Ribeiro at al (2016). K-LIME generates global and local explanations that increase the transparency of the Driverless AI model, and allow model behavior to be validated and debugged by analyzing the provided plots, and comparing global and local explanations to one-another, to known standards, to domain knowledge, and to reasonable expectations.", + "prompt_type": "plain" + }, + { + "output": "use_all_columns_klime_kmeansin the config.toml file totrue. All penalized GLM surrogates are trained to model the predictions of the Driverless AI model. The number of clusters for local explanations is chosen by a grid search in which the :math:`R^2` between the Driverless AI model predictions and all of the local K-LIME model predictions is maximized. The global and local linear model's intercepts, coefficients, :math:`R^2` values, accuracy, and predictions can all be used to debug and develop explanations for the Driverless AI model's behavior. In addition to the usage described in the preceding section, the global model is also used to generate explanations for very small clusters (:math:`N < 20`) where fitting a local linear model is inappropriate. As described in the preceding section, the in-cluster linear model parameters can be used to profile the local region, to give an average description of the important variables in the local region, and to understand the average direction in which an input variable affects the Driverless AI model predictions.", + "prompt_type": "plain" + }, + { + "output": "By disaggregating the K-LIME predictions into individual coefficient and input variable value products, the local linear impact of the variable can be determined. This product is sometimes referred to as a reason code and is used to create explanations for the Driverless AI model's behavior. .. raw:: html \"Recipe **Reason codes in K-LIME** The K-LIME plot includes a **Reason codes** page that can be accessed by clicking the **Explanations** button. From the **Reason codes** page, you can view information about both cluster-specific reason codes and global reason codes. In K-LIME, reason code values are calculated by determining each coefficient-feature product. Reason code values are also written into automatically generated reason codes, available in the local reason code section of the explanations dialog. In the following example, reason codes are created by evaluating and disaggregating a local linear model.", + "prompt_type": "plain" + }, + { + "output": "By taking into consideration the value of each contribution, reason codes for the Driverless AI decision can be derived. debt_to_income_ratio and credit_score would be the two largest negative reason codes, followed by savings_acct_balance. The local linear model intercept and the products of each coefficient and corresponding value sum to the K-LIME prediction. Moreover it can be seen that these linear explanations are reasonably representative of the nonlinear model's behavior for this individual because the K-LIME predictions are within 5.5% of the Driverless AI model prediction. This information is encoded into English language rules which can be viewed by clicking the **Explanations** button. Like all LIME explanations based on linear models, the local explanations are linear in nature and are offsets from the baseline prediction, or intercept, which represents the average of the penalized linear model residuals. Of course, linear approximations to complex non-linear response functions will not always create suitable explanations and users are urged to check the K-LIME plot, the local model :math:`R^2`, and the accuracy of the K-LIME prediction to understand the validity of the K-LIME local explanations.", + "prompt_type": "plain" + }, + { + "output": "In cases where K-LIME linear models are not fitting the Driverless AI model well, nonlinear LOCO feature importance values may be a better explanatory tool for local model behavior. As K-LIME local explanations rely on the creation of *k*-means clusters, extremely wide input data or strong correlation between input variables may also degrade the quality of K-LIME local explanations. .. _limesup_technique: The LIME-SUP Technique ^^^^^^^^^^^^^^^^^^^^^^ This plot is available for binary classification and regression models. LIME-SUP explains local regions of the trained Driverless AI model in terms of the original variables. Local regions are defined by each leaf node path of the decision tree surrogate model instead of simulated, perturbed observation samples - as in the original LIME. For each local region, a local GLM model is trained on the original inputs and the predictions of the Driverless AI model. Then the parameters of this local GLM can be used to generate approximate, local explanations of the Driverless AI model.", + "prompt_type": "plain" + }, + { + "output": "This graph is interactive. Hover over the **Model Prediction**, **LIME Model Prediction**, or **Actual Target** radio buttons to magnify the selected predictions. Or click those radio buttons to disable the view in the graph. You can also hover over any point in the graph to view LIME reason codes for that value. By default, this plot shows information for the global LIME model, but you can change the plot view to show local results from a specific cluster. The LIME plot also provides a visual indication of the linearity of the Driverless AI model and the trustworthiness of the LIME explanations. The closer the local linear model approximates the Driverless AI model predictions, the more linear the Driverless AI model and the more accurate the explanation generated by the LIME local linear models. .. figure:: images/global_interpretable.png :alt: .. _decision-tree: Surrogate Decision Tree ~~~~~~~~~~~~~~~~~~~~~~~ The decision tree surrogate model increases the transparency of the Driverless AI model by displaying an *approximate* flow-chart of the complex Driverless AI model's decision making process.", + "prompt_type": "plain" + }, + { + "output": "The decision tree surrogate model can be used for visualizing, validating, and debugging the Driverless AI model by comparing the displayed decision-process, important variables, and important interactions to known standards, domain knowledge, and reasonable expectations. It is known to date back at least to 1996 (Craven and Shavlik). A surrogate model is a data mining and engineering technique in which a generally simpler model is used to explain another usually more complex model or phenomenon. Given our learned function :math:`g` and set of predictions, :math:`g(X) = \\hat{Y}`, we can train a surrogate model :math:`h`: :math:`X,\\hat{Y} \\xrightarrow{\\mathcal{A}_{\\text{surrogate}}} h`, such that :math:`h(X)` is approximately equal to :math:`g(X)`. To preserve interpretability, the hypothesis set for :math:`h` is often restricted to linear models or decision trees. For the purposes of interpretation in Driverless AI, :math:`g` is considered to represent the entire pipeline, including both the feature transformations and model, and the surrogate model is a decision tree (:math:`h_{\\text{tree}}`).", + "prompt_type": "plain" + }, + { + "output": "The RMSE for :math:`h_{\\text{tree}}` is displayed for assessing the fit between :math:`h_{\\text{tree}}` and :math:`g`. :math:`h_{\\text{tree}}` is used to increase the transparency of :math:`g` by displaying an approximate flow chart of the decision making process of :math:`g` as displayed in the following image: .. figure:: images/dt_surrogate.png :alt: :math:`h_{\\text{tree}}` also shows the likely important features and the most important interactions in :math:`g`. :math:`h_{\\text{tree}}` can be used for visualizing, validating, and debugging :math:`g` by comparing the displayed decision-process, important features, and important interactions to known standards, domain knowledge, and reasonable expectations. The preceding image displays the decision tree surrogate, :math:`h_{\\text{tree}}`, for an example probability of default model, :math:`g`, created with Driverless AI using the UCI repository credit card default data (see https://www.kaggle.com/uciml/default-of-credit-card-clients-dataset).", + "prompt_type": "plain" + }, + { + "output": "First level interactions betweenPAY_0andPAY_2and betweenPAY_0andPAY_5are visible along with several second level interactions. Following the decision path to the lowest probability leaf node in :math:`h_{\\text{tree}}` (lower left in the preceding image) shows that customers who pay their first (PAY_0) and second (PAY_2) month bills on time are the least likely to default according to :math:`h_{\\text{tree}}`. The thickness of the edges in this path indicate that this is a very common decision path through :math:`h_{\\text{tree}}`. Following the decision path to the highest probability leaf node in :math:`h_{\\text{tree}}` (second from right in the preceding image) shows that customers who are late on their first (PAY_0) and fifth (PAY_5) month bills and who pay less than 16520 in their sixth payment (PAY_AMT6) are the most likely to default according to :math:`h_{\\text{tree}}`. The thinness of the edges in this path indicate that this is a relatively rare decision path through :math:`h_{\\text{tree}}`.", + "prompt_type": "plain" + }, + { + "output": "When a single observation, :math:`x^{(i)}`, is selected, its path through :math:`h_{\\text{tree}}` is highlighted. The path of :math:`x^{(i)}` through :math:`h_{\\text{tree}}` can be helpful when analyzing the logic or validity of :math:`g(x^{(i)})`. MLI Taxonomy: Decision Tree Surrogate Models ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - **Scope of Interpretability**: - (1) Generally, decision tree surrogates provide global interpretability. - (2) The attributes of a decision tree are used to explain global attributes of a complex Driverless AI model such as important features, interactions, and decision processes. - **Appropriate Response Function Complexity**: Decision tree surrogate models can create explanations for models of nearly any complexity. - **Understanding and Trust**: - (1) Decision tree surrogate models foster understanding and transparency because they provide insight into the internal mechanisms of complex models.", + "prompt_type": "plain" + }, + { + "output": "- **Application Domain**: Decision tree surrogate models are model agnostic. Surrogate Decision Tree Plot ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ This plot is available for binary and multiclass classification models as well as regression models. In the Decision Tree plot, the highlighted row shows the path to the highest probability leaf node and indicates the globally important variables and interactions that influence the Driverless AI model prediction for that row. You can view rules for a specific path by clicking the path's terminal node. **Note**: For a list of Surrogate Decision Tree explainer expert settings, see :ref:`interpretation-expert-settings-surrogate-dt`. .. raw:: html \"Surrogate For multiclass models, decision trees are created for each class. To view a decision tree for a specific class, click **Class** in the upper-left corner of the page and select the class you want to view a decision tree for.", + "prompt_type": "plain" + }, + { + "output": "**Global Feature Importance vs Local Feature Importance** Global feature importance (yellow) is a measure of the contribution of an input variable to the overall predictions of the Driverless AI model. Global feature importance is calculated by aggregating the improvement in splitting criterion caused by a single variable across all of the decision trees in the Random Forest surrogate model. Local feature importance (grey) is a measure of the contribution of an input variable to a single prediction of the Driverless AI model. Local feature importance values for regression and binomial cases are calculated by tracing single rows of data through the random forest surrogate model and returning the absolute LOCO values. For the multiclass case, local feature importance values are calculated by re-scoring the trained supervised model and measuring the impact of setting each variable to missing. The absolute value of differences across classes is then calculated for each dropped or replaced column.", + "prompt_type": "plain" + }, + { + "output": "**Note**: Engineered features are used for MLI when a time series experiment is built. This is because munged time series features are more useful features for MLI than raw time series features, as raw time series features are not IID (Independent and Identically Distributed). .. figure:: images/rf_feature_importance.png :alt: .. _rf-pdp-ice: Random Forest Partial Dependence and Individual Conditional Expectation ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ A Partial Dependence and ICE plot is available for both Driverless AI and surrogate models. Refer to the previous :ref:`pdp-ice` section for more information about this plot. .. _rf-loco: Random Forest LOCO ~~~~~~~~~~~~~~~~~~ This plot is available for binary and multiclass classification models as well as regression models. Local feature importance describes how the combination of the learned model rules or parameters and an individual row's attributes affect a model's prediction for that row while taking nonlinearity and interactions into effect.", + "prompt_type": "plain" + }, + { + "output": "The LOCO-variant method for binary and regression models is calculated by traversing the random forest surrogate model and removing the prediction contribution of any rule containing the variable of interest for every tree from the original prediction. Local LOCO values are calculated by tracing single rows of data through the random forest surrogate model. Global LOCO values are the average of the LOCO values over every row of a dataset. The LOCO-variant method for multiclass models differs slightly in that it calculates row-wise local feature importance values by re-scoring the trained supervised model and measuring the impact of setting each variable to missing. The sum of the absolute value of differences across classes is then calculated for each dropped or replaced column. Given the row of input data with its corresponding Driverless AI and K-LIME predictions: +-------------+-----+----------+-----------+-----------+-------------+ | debt_ | cr | saving | o | H2OAI_pr | K-LIME_ | | to_income\\_ | edi | s_acct\\_ | bserved\\_ | edicted\\_ | predicted\\_ | | ratio | t\\_ | balance | default | default | default | | | sc | | | | | | | ore | | | | | +=============+=====+==========+===========+===========+=============+ | 30 | 600 | 1000 | 1 | 0.85 | 0.9 | +-------------+-----+----------+-----------+-----------+-------------+ Taking the Driverless AI model as F(**X**), LOCO-variant feature importance values are calculated as follows.", + "prompt_type": "plain" + }, + { + "output": ":math:`\\text{Scaled}(\\text{LOCO}_{debt\\_to\\_income\\_ratio}) = \\text{Abs}(\\text{LOCO}_{~debt\\_to\\_income\\_ratio}/0.14) = 1` :math:`\\text{Scaled}(\\text{LOCO}_{credit\\_score}) = \\text{Abs}(\\text{LOCO}_{~credit\\_score}/0.14) = 0.86` :math:`\\text{Scaled}(\\text{LOCO}_{savings\\_acct\\_balance}) = \\text{Abs}(\\text{LOCO}_{~savings\\_acct\\_balance} / 0.14) = 0.21` One drawback to these LOCO-variant feature importance values is, unlike K-LIME, it is difficult to generate a mathematical error rate to indicate when LOCO values may be questionable. .. figure:: images/loco_plot.png :alt: .. _nlp-surrogate: NLP Surrogate Models ~~~~~~~~~~~~~~~~~~~~ These plots are available for natural language processing (NLP) models. For NLP surrogate models, Driverless AI creates a TF-IDF matrix by tokenizing all text features. The resulting frame is appended to numerical or categorical columns from the training dataset, and the original text columns are removed. This frame is then used for training surrogate models that have prediction columns consisting of tokens and the original numerical or categorical features.", + "prompt_type": "plain" + }, + { + "output": "- Each row in the TF-IDF matrix contains :math:`N` columns, where :math:`N` is the total number of tokens in the corpus with values that are appropriate for that row (0 if absent). - Driverless AI does not currently generate a K-LIME scoring pipeline for MLI NLP problems. .. _surrogate-models-on-residuals: Running Surrogate Models on Residuals ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ In Driverless AI, residuals (differences between observed and predicted values) can be used as targets in MLI surrogate models for the purpose of debugging models. The method used to calculate residuals varies depending on the type of problem. For classification problems, logloss residuals are calculated for a specified class. For regression problems, residuals are determined by calculating the square of the difference between targeted and predicted values. To run MLI surrogate models on residuals, enable the **Debug Model Residuals** interpretation expert setting. For classification experiments, specify a class to use as an outcome of interest with the **Class for Debugging Classification Model Logloss Residuals** interpretation expert setting (not visible for regression problems).", + "prompt_type": "plain" + }, + { + "output": ".. figure:: images/mli_surrogate_residuals.png :alt: .. _mli-nlp-plots: NLP Plots --------- This section describes the plots that are available in the NLP tab. - :ref:`dai-nlp-loco` - :ref:`mli-nlp-pdp` - :ref:`mli-nlp-tokens` - :ref:`mli-nlp-vlm` .. note:: - The following plots are only available for natural language processing (NLP) models. .. _dai-nlp-loco: NLP Leave-One-Covariate-Out (LOCO) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ This plot is available for binomial, multiclass, and regression natural language processing (NLP) models. It is located in the **NLP** tab on the Model Interpretation page, which is only visible for NLP models. .. raw:: html \"NLP This plot applies a leave-one-covariate-out (LOCO) styled approach to NLP models by removing a specific token, which is obtained by TF-IDF, from only a single column where the token is occurring. For example, if there is a tokenfooin bothcolumn1andcolumn2, LOCO is computed for both columns separately, even though the token is the same.", + "prompt_type": "plain" + }, + { + "output": "In addition, if a token does **not** exist in a row, then it is appended before calculating LOCO to ensure the token was evaluated across all rows. The difference between the resulting score and the original score (token included) is useful when trying to determine how specific changes to text features alter the predictions made by the model. Driverless AI fits a separate TF-IDF vectorizer for each individual column and concatenates the results. The terms (tokens) in the resulting importance frames are then wrapped with column names: .. table:: Column Names Example +-----------------------+-----------------------+-----------------------+ | column1('and') | column1('apple') | column2('and') | +=======================+=======================+=======================+ | 0.1 | 0.0005 | 0.412512 | +-----------------------+-----------------------+-----------------------+ The NLP LOCO plot lets you view text for a specific row by specifying a row number.", + "prompt_type": "plain" + }, + { + "output": "You can switch between different text features and view their respective importances globally and locally. .. note:: - Due to computational complexity, the global importance value is only calculated for :math:`N` (20 by default) tokens. This value can be changed with themli_nlp_top_nconfiguration option. - A specific token selection method can be used by specifying one of the following options for themli_nlp_min_token_modeconfiguration option: -linspace: Selects :math:`N` evenly spaced tokens according to their TF-IDF score (Default) -top: Selects top :math:`N` tokens by TF-IDF score -bottom: Selects bottom :math:`N` tokens by TF-IDF score - Local values for NLP LOCO can take a significant amount of time to calculate depending on the specifications of your hardware. - Driverless AI does not currently generate a K-LIME scoring pipeline for MLI NLP problems. .. _mli-nlp-pdp: NLP Partial Dependence Plot ~~~~~~~~~~~~~~~~~~~~~~~~~~~ This plot is available for binomial, multiclass, and regression natural language processing (NLP) models.", + "prompt_type": "plain" + }, + { + "output": "NLP partial dependence (yellow) portrays the average prediction behavior of the Driverless AI model when an input text token is left in its respective text and not included in its respective text along with +/- 1 standard deviation bands. ICE (grey) displays the prediction behavior for an individual row of data when an input text token is left in its respective text and not included in its respective text. The text tokens are generated from TF-IDF. .. raw:: html \"NLP .. _mli-nlp-tokens: NLP Tokenizer ~~~~~~~~~~~~~ This plot is available for natural language processing (NLP) models. It is located in the **NLP** tab on the Model Interpretation page, which is only visible for NLP models. .. raw:: html \"NLP This plot shows both the global and local importance values of each token in a corpus (a large and structured set of texts).", + "prompt_type": "plain" + }, + { + "output": "Local importance values are calculated by using the term frequency\u2013inverse document frequency (TF-IDF) as a weighting factor for each token in each row. The TF-IDF increases proportionally to the number of times a token appears in a given document and is offset by the number of documents in the corpus that contain the token. Specify the row that you want to view, then click the **Search** button to see the local importance of each token in that row. Global importance values are calculated by using the inverse document frequency (IDF), which measures how common or rare a given token is across all documents. (Default View) You can download an archive of files relating to the NLP Tokenizer plot by clicking \"NLP Tokenizer ZIP Archive\" in the NLP tab. .. note:: - MLI for NLP does not currently feature the option to remove stop words. - By default, up to 10,000 tokens are created during the tokenization process. This value can be changed in the configuration. - By default, Driverless AI uses up to 10,000 documents to extract tokens from.", + "prompt_type": "plain" + }, + { + "output": "Downsampling is used for datasets that are larger than the default sample limit. - Driverless AI does not currently generate a K-LIME scoring pipeline for MLI NLP problems. - With the LOCO method, a specific token is removed from only a single column where the token is occurring. For example, if there is a tokenfooin bothcolumn1andcolumn2``, LOCO is\n computed for both columns separately, even though the token is the\n same. The TF-IDF for the token differs in both columns. NLP Vectorizer + Linear Model (VLM) Text Feature Importance\nThis plot is available for binomial and regression natural language\nprocessing (NLP) models. It is located in the NLP tab on the Model\nInterpretation page, which is only visible for NLP models. NLP Vectorizer + Linear Model (VLM) text feature importance uses TF-IDF\nof individual words as features from a text column of interest and\nbuilds a linear model (currently GLM) using those features and fits it\nto either the predicted class (binary classification) or the continuous\nprediction (regression) of the Driverless AI model.", + "prompt_type": "plain" + }, + { + "output": "Driverless AI Installation and Upgrade\n\nThe following sections describe how to install and upgrade Driverless\nAI.\n\nNote: Driverless AI is available as part of the H2O AI Cloud (HAIC)\nplatform or as a standalone offering. For information on HAIC, see the\nofficial documentation.\n\nsupported-environments installing-before-you-begin docker native cloud", + "prompt_type": "plain" + }, + { + "output": "Splitting Datasets\nDriverless AI lets you split a dataset into two subsets that can be used\nas training and validation/test datasets during modeling. When splitting\ndatasets for modeling, each split should have a similar distribution to\navoid over fitting on the training set. Depending on the use case, you\ncan either split the dataset randomly, perform a stratified sampling\nbased on the target column, perform a fold column-based split to keep\nrows belonging to the same group together, or perform a time\ncolumn-based split to train on past data and validate/test on future\ndata. Perform the following steps to split a dataset:\n1. Click the dataset or select the [Click for Actions] button next to\n the dataset that you want to split and select Split from the submenu\n that appears. 2. The Dataset Splitter form displays. Specify an Output Name 1 and an\n Output Name 2 for each segment of the split. (For example, you can\n name one segment test and the other validation.) 3. Optionally specify a Target column (for stratified sampling), a Fold\n column (to keep rows belonging to the same group together), a Time\n column, and/or a Random Seed (defaults to 1234).", + "prompt_type": "plain" + }, + { + "output": "MLI Custom Recipes\nThe techniques and methodologies used by Driverless AI for model\ninterpretation can be extended with recipes (Python code snippets). You\ncan use your own recipes in combination with or in place of DAI's\nbuilt-in recipes. This lets you extend the capabilities of MLI\nexplainers and out of the box interpretation techniques. The following\nsteps describe how to upload and enable custom recipes in the Machine\nLearning Interpretability (MLI) view. Note\nFor more information on MLI custom recipes including best practices,\ntutorials, explainer templates, and explainer examples, see the official\nRecipes for Machine Learning Interpretability in Driverless AI repository . To upload a custom recipe:\n 1. Navigate to the MLI page and click the New Interpretation button. Select Upload MLI Recipe from the drop-down menu. You can also\n select MLI Recipe URL to load a recipe from a raw file, a GitHub\n repository / tree, or a local directory.", + "prompt_type": "plain" + }, + { + "output": "Multinode Training (Alpha)\n\nDriverless AI can be configured to run in a multinode worker mode. This\ndocument describes the multinode training process and how to configure\nit.\n\nNotes: For more information on queuing in Driverless AI, see\ndai-queuing.\n\nredis_multinode dask_multinode multinode_example health_api", + "prompt_type": "plain" + }, + { + "output": "Using Driverless AI configuration options\nThis page describes how to use Driverless AI (DAI) configuration\noptions. - understanding-configs\n- understanding-expert-settings\n- toml_editor_using\n- expert-settings-use-case\nUnderstanding DAI configuration options\nDriverless AI features many different kinds of configuration options\nthat you can use to configure various aspects of your DAI environment,\nincluding authentication, data connectors, UI, experiments, and MLI. The\nfollowing methods can be used to control the available DAI configuration\noptions:\n- Administrators can edit the config.toml file, which is a\n configuration file that uses the TOML v0.5.0 file format. The\n config.toml file lets you control all of the configuration options\n documented in the dai_config page. For more information, see\n config_file. - Using the Expert Settings window, which is accessible from the\n Experiment Setup page by clicking Expert Settings. - Using the built-in TOML config editor, which is accessible from the\n Expert Settings window.", + "prompt_type": "plain" + }, + { + "output": "Note\nSome configuration options, such as those related to authentication and\ndata connectors, are applied when starting the DAI server and cannot be\nchanged without restarting the DAI server. Understanding Expert Settings\nWhen creating an experiment, you can specify basic\nsettings for the experiment such as whether to\ndrop specific columns or whether to include a validation dataset. However, you may want to customize the experiment in a manner that is\nbeyond the scope of these basic settings\u2014in this case, Expert Settings\ncan be used to further fine-tune the experiment. For example, you can\nuse Expert Settings to include specific models or transformers as part\nof the experiment. To open the Expert Settings window, click Expert\nSettings on the Experiment Setup page. []\nNotes:\n- For supervised experiments, the Expert Settings window cannot be\n accessed until a target column has been selected. - Some of the settings listed in the dai_config page are not exposed\n in the Expert Settings window.", + "prompt_type": "plain" + }, + { + "output": "Navigating the Expert Settings window\nThe following sections describe how to navigate the Expert Settings\nwindow. Tabbed view\nWhen the Tabbed view is selected, the available Expert Settings are\norganized into the following tabs and sub-tabs. For each sub-tab in the\nfollowing list, the available settings are organized into Common and\nAdvanced settings. - Training: Configure settings related to the model training process. - General\n - Data\n - Feature Engineering\n - Models\n - Genetic Algorithm\n - Validation\n - Deployment\n- Documentation: Configure settings related to AutoDoc, model\n performance, and model interpretation. - General\n - Data\n - Models\n - Model Performance\n - Interpretation\n- System: Configure system-related settings. (This tab has only one\n sub-tab that is also called System.) []\nTabbed view: sub-tabs\nThe following is a list of sub-tab level categories:\n- Common\n- Advanced\n- Image\n- NLP\n- Time Series\n- Unsupervised\nFlat view\nYou can also select the Flat view to view all of the available settings\nin a single searchable window.", + "prompt_type": "plain" + }, + { + "output": "Searching for specific settings\nTo locate a specific Expert Setting, click the search box and type the\nconfiguration name of the Expert Setting you want to locate. For some\nExpert Settings, additional results for related Expert Settings are also\ndisplayed. Filtering settings by tags\nTo filter the list of available settings by specific tags, click the\nFilter by Tags button and select the checkbox next to the tag(s) that\nyou want to filter the list of available settings by. Note that both\nglobal and sub-tab level filtering are supported. []\nAdding custom recipes\nYou can add custom recipes from the Expert Settings window by clicking\nthe Add Custom Recipes button. Select one of the following options:\n- From computer: Add a custom recipe as a Python or ZIP file from your\n local file system. - From URL: Add one or more custom recipes from a URL that points to\n one of the following locations:\n - A GitHub repository. For example, you can enter\n https://github.com/h2oai/driverlessai-recipes/ to add all the\n custom recipes contained in the official Recipes for\n Driverless AI repository.", + "prompt_type": "plain" + }, + { + "output": "For example, you can enter\n https://github.com/h2oai/driverlessai-recipes/tree/master/models\n to add only the custom model recipes contained in the official\n Recipes for Driverless AI repository, or enter\n https://github.com/h2oai/driverlessai-recipes/tree/master/models/algorithms\n to add only the custom algorithm recipes contained in the\n repository. - A file system path. This option is equivalent to the File\n System option when adding datasets. - From Bitbucket: Add a custom recipe from a Bitbucket repository. To\n use this option, your Bitbucket username and password must be\n provided along with the custom recipe Bitbucket URL. - With Editor: Add a custom recipe with a built-in code editor. []\nNote that you can also view the official Recipes for Driverless AI\nrepository from the Expert Settings window by clicking the Official\nRecipes button. Using the built-in TOML config editor\nThe TOML configuration editor lets you manually add, remove, or edit\nExpert Setting parameters.", + "prompt_type": "plain" + }, + { + "output": "To open the built-in TOML configuration\neditor, click Edit TOML in the Expert Settings window. Opening the\nbuilt-in TOML editor is currently the best way to review changed\nconfiguration items in a single location. []\nThe built-in TOML editor is synchronized with the Expert Settings\nwindow. This means that if you change the default value of an expert\nsetting from the Expert Settings window, that change is displayed in the\nTOML configuration editor. For example, if you set the Make MOJO scoring\npipeline setting in the Experiment tab to Off, then the line\nmake_mojo_scoring_pipeline = \"off\" is displayed in the TOML editor. Conversely, if you make changes using the TOML editor, those changes are\nalso visible from the Expert Settings window. You can confirm that your\nchanges have been correctly entered into the editor by checking whether\nthe relevant settings have also changed in the Expert Settings window. To confirm your changes, click Save. The experiment preview updates to\nreflect your specified configuration changes.", + "prompt_type": "plain" + }, + { + "output": "This section provides Driverless AI with\ninformation about which custom recipes can be used by the experiment. This is important for keeping experiments comparable when performing\nretrain / refit operations. Note\n- The settings listed in the dai_config page cannot be edited from the\nbuilt-in TOML editor unless they are exposed in the Expert Settings\nwindow. - For information on TOML, see TOML v0.5.0. Order of settings in the TOML editor\nWhen using the built-in TOML editor, ensure that settings are added in\nthe following order:\n1. Booleans, integers, strings, and lists\n2. Unprocessed dictionaries, which are automatically processed after\n clicking the Save button\n3. Processed dictionaries\nChecking TOML validity\nThe TOML Python library can be used to check the validity of your TOML\nto avoid errors when using the built-in TOML editor. To install the TOML\nPython library, run the following command:\n pip install toml\nThe following examples demonstrate how the TOML Python library can be\nused to check whether your TOML is valid.", + "prompt_type": "plain" + }, + { + "output": "The toml.loads() function is then used to\n convert the string into a dictionary. - Entering an invalid string: In the following example, an error is\n returned after attempting to convert the entered TOML string into a\n dictionary, which means that the entered string is not valid. Sample use case: Hyperparameter tuning\nThe following steps describe how to perform hyperparameter tuning by\nusing the params_tune_lightgbm Expert Setting. 1. On the Experiments page, click the New Experiment button and select\n a training dataset to use for the experiment. 2. Select a target column and specify a test dataset to use for the\n experiment. 3. Click Expert Settings to open the Expert Settings window. 4. Go to the Recipes tab. For the Include specific models setting,\n click Uncheck All and select LightGBM from the list of available\n models. Click Done to confirm your selection. Completing this step\n lets you view how only LightGBM mutates. 5. In the Expert Settings window, enter params_tune into the search box\n to view all of the available params_tune TOMLs.", + "prompt_type": "plain" + }, + { + "output": "Driverless AI Security\nObjective\nThis document describes different aspects of Driverless AI security and\nprovides guidelines to secure the system by reducing its surface of\nvulnerability. This section covers the following areas of the product:\n - security_user_access\n - security_auth (Also see dai_auth)\n - Authorization\n - security_data\n - security_data_import\n - security_data_export\n - security_logs\n - security_data_isolation\n - security_client_server\n - security_response_headers\n - security_recommended_headers\n - security_other_headers\n - security_web_ui\n - security_custom_recipe\n - security_config (Also see\n in depth documentation on configuration\n security in DAI)\nImportant things to know\nWarning\nWARNING Security in a default installation of Driverless AI is DISABLED! By default, a Driverless AI installation targets ease-of-use and does\nnot enable all security features listed in this document.", + "prompt_type": "plain" + }, + { + "output": "------------------------------------------------------------------------\nUser Access\nAuthentication\nDriverless AI supports Client Certificate, LDAP, Local, mTLS, OpenID,\nPAM, none, and unvalidated (default) authentication. These can be\nconfigured by specifying the environment variables when starting the\nDriverless AI Docker image or by specifying the appropriate\nconfiguration options in the config.toml file. For more info, see\ndai_auth. --------------------------------------------------------------------------------------------------------------\n Option D efa ult Va lue Recommended Value Description\n ----------------------------------------- ------------------- ------------------------------- ----------------\n a uthenticati on_method \"un val ida ted \" Any supported authentication Define user\n (e.g., LDAP, PAM) method except authentication\n \"unvalidated\" and \"none\".", + "prompt_type": "plain" + }, + { + "output": "authe ntication_d efault_time out_hours 7 2 Consult your security Number of hours\n requirements. after which a\n user has to\n relogin. --------------------------------------------------------------------------------------------------------------\nmTLS Authentication\nDriverless AI supports Mutual TLS authentication (mTLS) by setting a\nspecific verification mode along with a certificate authority file, an\nSSL private key, and an SSL certificate file. For more information, see\nthe mtls_auth. Authorization Methods\nDriverless AI does not currently perform any authorization. ------------------------------------------------------------------------\nData Security\nData Import\n ----------------------------------------------------------------------------------------------------------------\n Op tion D efault Value Recommended Value Description\n --------------------------- ------------------------------ ----------------------------- -----------------------\n en able d_fi le_s yste ms \"u pload, file, hdfs, s3\" Configure only needed data Control list of\n sources.", + "prompt_type": "plain" + }, + { + "output": "ma x_fi le_u ploa d_si ze 104 857600 000B Configure based on expected Limit maximum size of\n file size and size of uploaded file. Driverless AI deployment. su ppor ted_ file _typ es see confi g.toml It is recommended to limit Supported file formats\n file types to extension used listed in filesystem\n in the target environment browsers. (e.g., parquet). sh ow_a ll_f iles yste ms true false Show all available data\n sources in WebUI (even\n though there are not\n configured).", + "prompt_type": "plain" + }, + { + "output": "----------------------------------------------------------------------------------------------------------------\nData Export\n ---------------------------------------------------------------------------------------------------------\n Option Def ault V alue Recommended Description\n Value \n ----------------------------------- ---------------- ---------------- -----------------------------------\n enab le_dataset_d ownloading tr ue false (disable Control ability to download any\n download of datasets (uploaded, predictions,\n datasets) MLI). Note: if dataset download is\n disabled, we strongly suggest to\n disable custom recipes as well to\n remove another way how data could\n be exported from the application.", + "prompt_type": "plain" + }, + { + "output": "(See notes below.) artif acts_store f ile_ syst em `file_system` Stores a MOJO on a file system\n directory denoted by\n artifac ts_file_system_directory. (See notes below.) artifacts _file_system _directory t mp tmp File system location where\n artifacts will be copied in case\n artifacts_store is set to\n file_system. (See notes below.) ---------------------------------------------------------------------------------------------------------\nNotes about Artifacts:\n- Currently, file_system is the only option that can be specified for\n artifacts_store. Additional options will be available in future\n releases.", + "prompt_type": "plain" + }, + { + "output": "- When these artifacts are enabled/configured, the menu options on the\n completed_experiment page change. Specifically, all \"Download\"\n options (with the exception of AutoDoc) change to \"Export.\" Refer to\n export_artifacts for more information. Logs\nThe Driverless AI produces several logs:\n - audit logs\n - server logs\n - experiment logs\nThe administrator of Driverless AI application (i.e., person who is\nresponsible for configuration and setup of the application) has control\nover content which is written to the logs. -------------------------------------------------------------------------------------------------------\n Option D ef au Reco Description\n lt V al mmended \n ue Value \n ------------------------------------------- ------- --------- -----------------------------------------\n audit_lo g_retentio n_period `5 ` (d 0 ( Number of days to keep audit logs.", + "prompt_type": "plain" + }, + { + "output": "audit log \n ro \n tation) \n do_not_ log_list s ee c --- Contain list of configuration options\n on fi which are not recorded in logs. g. to \n ml \n l og_level `1 ` see conf Define verbosity of logging\n ig.toml \n collect_se rver_logs_ in_experim ent_logs `f al false Dump server logs with experiment. se ` Dangerous because server logs can contain\n information about experiments of other\n users using Driverless AI. h2o _recipes_l og_level No ne --- Log level for OSS H2O instances used by\n custom recipes.", + "prompt_type": "plain" + }, + { + "output": "se ` \n write_ recipes_to _experimen t_logger `f al false Dump a custom recipe source code into\n se ` logs. -------------------------------------------------------------------------------------------------------\nUser Data Isolation\n+---------+---+----------------------+----------------------------------+\n| Option | D | Recommended Value | Description |\n| | e | | |\n| | f | | |\n| | a | | |\n| | u | | |\n| | l | | |\n| | t | | |\n| | V | | |\n| | a | | |\n| | l | | |\n| | u | | |\n| | e | | |\n+=========+===+======================+==================================+\n| da | | Specify proper name | Directory where Driverless AI |\n| ta_dir | \" | and location of | stores all computed experiments |\n| e ctory | | directory.", + "prompt_type": "plain" + }, + { + "output": "| | |\n| | | | |\n| | / | | |\n| | | | |\n| | t | | |\n| | | | |\n| | m | | |\n| | | | |\n| | p | | |\n| | | | |\n| | \" | | |\n| | | | |\n+---------+---+----------------------+----------------------------------+\n| file_ | | true | Hide data_directory in |\n| hide_da | t | | file-system browser.", + "prompt_type": "plain" + }, + { + "output": "|\n| | u | | |\n| | | | |\n| | e | | |\n| | | | |\n+---------+---+----------------------+----------------------------------+\n| f i | | true | Enable path filter for |\n| le_pat | f | | file-system browser (file data |\n| h_filte | | | source). By default the filter |\n| ring_e | a | | is disabled which means users |\n| n abled | | | can browse the entire |\n| | l | | application-local filesystem. |\n| | | | |\n| | s | | |\n| | | | |\n| | e | | |\n| | | | |\n+---------+---+----------------------+----------------------------------+\n| file_ | | Include a list of | List of absolute path prefixes |\n| path_fi | [ | folder paths or | to restrict access to in |\n| lter_i | | {{DAI_USERNAME}} for | file-browser.", + "prompt_type": "plain" + }, + { + "output": "For | |\n| | | example, | |\n| | | \"['/h | |\n| | | ome/{{DAI_USERNAME} | |\n| | | } /','/data/prod']\". | |\n+---------+---+----------------------+----------------------------------+\n| a ut | | \"\" | Directory where Driverless AI |\n| odoc_ a | \" | | searches for the updated AutoDoc |\n| dditio | | | templates. Providing empty value |\n| nal_tem | \" | | \"\" disables this functionality. |\n| plate_ | | | |\n| f older | | | |\n+---------+---+----------------------+----------------------------------+\n------------------------------------------------------------------------\nClient-Server Communication Security\n -----------------------------------------------------------------------------------------------\n Option Default Value Recommended Value Description\n ------------------ ------------------------------ ---------------------- ----------------------\n en able_h ttps false true Enable HTTPS\n ss l_key_ file \"/et c/dai/privat e_key.pem\" Correct private key.", + "prompt_type": "plain" + }, + { + "output": "ss l_crt_ file \"/etc/dai /cert.pem\" Correct public Public certificate to\n certifikate. setup HTTPS/SSL. ss l_no_s slv2 true true Prevents an SSLv2\n connection. ss l_no_s slv3 true true Prevents an SSLv3\n connection. ss l_no_t lsv1 true true Prevents an TLSv1\n connectiona. ssl_ no_tls v1_1 true true Prevents an TLSv1.1\n connection. ssl_ no_tls v1_2 false false (disable TLSv1.2 Prevents a TLSv1.2\n only if TLSv1.3 is connection.", + "prompt_type": "plain" + }, + { + "output": "-----------------------------------------------------------------------------------------------\nHTTP Cookie Attributes\nBy default, HTTP cookies used by Driverless AI are issued with the\nfollowing attributes:\n - HTTPOnly: True\n - SameSite: Lax\nIf either of these needs to be overridden, or if more custom attributes\nneed to be set, you can use the config http_cookie_attributes to specify\nkey-value pairs of so-called cookie morsels. For a list of supported\nkeys, see the official Python documentation. Response Headers\nThe response headers which are passed between Driverless AI server and\nclient (browser, Python/R clients) are controlled via the following\noption:\n ---------------------------------------------------------------------------\n Option Default Re Description\n Value commended \n Value \n --------------------- --------- ----------- -------------------------------\n extra_ht tp_headers \"{}\"`` See below Configure HTTP header returned\n in server response.", + "prompt_type": "plain" + }, + { + "output": "The | | |\n| | max-age | | |\n| | specifies | | |\n| | time, in | | |\n| | seconds, | | |\n| | that the | | |\n| | browser | | |\n| | should | | |\n| | remember | | |\n| | that a | | |\n| | site is | | |\n| | only to | | |\n| | be | | |\n| | accessed | | |\n| | using | | |\n| | HTTPS.", + "prompt_type": "plain" + }, + { + "output": "| c.mozilla.org/gu |\n| | certain | | idelines/web_sec |\n| | types of | | urity#Examples_5 |\n| | attacks, | | |\n| | including | | |\n| | Cross | | |\n| | Site | | |\n| | Scripting | | |\n| | and data | | |\n| | injection | | |\n| | attacks. | | |\n| | Controls | | |\n| | from | | |\n| | where the | | |\n| | page can | | |\n| | download | | |\n| | source.", + "prompt_type": "plain" + }, + { + "output": "| | |\n| | The value | | |\n| | here | | |\n| | overrides | | |\n| | the | | |\n| | default, | | |\n| | which is | | |\n| | SAM | | |\n| | E ORIGIN. | | |\n+------+-----------+--------------------------------+------------------+\n| X-C | Prevents | nosniff | https://develope |\n| o nt | the | | r.mozilla.org/en |\n| en t | browser | | -US/docs/Web/HTT |\n| -Ty | from | | P/Headers/X-Cont |\n| pe-O | trying to | | ent-Type-Options |\n| pti | determine | | |\n| o ns | the con | | |\n| | tent-type | | |\n| | of a | | |\n| | resource | | |\n| | that is | | |\n| | different | | |\n| | than the | | |\n| | declared | | |\n| | cont | | |\n| | ent-type.", + "prompt_type": "plain" + }, + { + "output": "|\n| Prot | rotection | | org/en-US/docs/W |\n| ect | response | | eb/HTTP/Headers/ |\n| i on | header is | | X-XSS-Protection |\n| | a feature | | |\n| | of | | |\n| | Internet | | |\n| | Explorer, | | |\n| | Chrome | | |\n| | and | | |\n| | Safari | | |\n| | that | | |\n| | stops | | |\n| | pages | | |\n| | from | | |\n| | loading | | |\n| | when they | | |\n| | detect | | |\n| | reflected | | |\n| | c | | |\n| | ross-site | | |\n| | scripting | | |\n| | (XSS) | | |\n| | attacks.", + "prompt_type": "plain" + }, + { + "output": "| | |\n+------+-----------+--------------------------------+------------------+\nOther Headers to Consider\n ------------------------------------------------------------------------\n Header Documentation\n ------------------ -----------------------------------------------------\n Pub lic-Key-Pins https://developer\n CORS-related .mozilla.org/en-US/docs/Web/HTTP/Public_Key_Pinning\n headers htt\n ps://developer.mozilla.org/en-US/docs/Web/HTTP/CORS\n ------------------------------------------------------------------------\n------------------------------------------------------------------------\nWeb UI Security\nNote\nThe Driverless AI UI is design to be user-friendly, and by default all\nfeatures like auto-complete are enabled. Disabling the user-friendly\nfeatures increases security of the application, but impacts\nuser-friendliness and usability of the application. -------------------------------------------------------------------------------------\n Option Def Recom Description\n ault V mended \n alue Value \n ----------------------------- ------- -------- --------------------------------------\n all ow_form_aut ocomplete tr ue f alse Control auto-completion in Web UI\n elements (e.g., login inputs).", + "prompt_type": "plain" + }, + { + "output": "show_all_fi lesystems tr ue f alse Show all available data sources in\n WebUI (even though there are not\n configured). It is recommended to show\n only configured data sources. verify_s ession_ip `fal true Verifies each request IP against IP\n se` which initialized the session. allow _concurrent _sessions tr ue f alse Disable concurrent sessions (logins). en able_xsrf_p rotection tr ue true Enable XSRF (cross-site request\n forgery) protection. e nable_secur e_cookies `fal true Enable SECURE cookie flag. Note that\n se` HTTPS must be enabled. -------------------------------------------------------------------------------------\n------------------------------------------------------------------------\nCustom Recipe Security\nNote\nBy default Driverless AI enables custom recipes as a main route for the\nway data-science teams can extend the application capabilities.", + "prompt_type": "plain" + }, + { + "output": "and bundle only a pre-defined\nand approved set of custom Driverless AI extensions. --------------------------------------------------------------------------------------------\n Option De fault Reco Description\n Value mmended \n Value \n ------------------------------------------- -------- --------- -----------------------------\n ena ble_custom_recipes t rue false Enable custom Python recipes. enable_cus tom_recipes_upload t rue false Enable uploading of custom\n recipes. enable_custo m_recipes_from_url t rue false Enable downloading of custom\n recipes from external URL. include_custom_ recipes_by_default fa lse false Include custom recipes in\n default inclusion lists.", + "prompt_type": "plain" + }, + { + "output": "Launching H2O Flow\n\nIf you opened port 12348 when starting Driverless AI, then you can\nlaunch H2O Flow from within Driverless AI. Click the H2O-3 link in the\ntop menu.\n\n[]\n\nThis launches Flow on port 12348.\n\n[]", + "prompt_type": "plain" + }, + { + "output": "mTLS Authentication Example\nDriverless AI supports Mutual TLS authentication (mTLS) by setting a\nspecific verification mode along with a certificate authority file, an\nSSL private key, and an SSL certificate file. The diagram below is a\nvisual representation of the mTLS authentication process. []\nDescription of Configuration Attributes\nUse the following configuration options to configure mTLS. - ssl_client_verify_mode: Sets the client verification mode. Choose\n from the following verification modes:\n- ssl_ca_file: Specifies the path to the certification authority (CA)\n certificate file, provided by your organization. This certificate\n will be used to verify the client certificate when client\n authentication is enabled. If this is not specified, clients are\n verified using the default system certificates. - ssl_key_file: Specifies your web server private key file. This is\n normally created by your organization's sys admin. - ssl_crt_file: Specifies your web server public certificate file.", + "prompt_type": "plain" + }, + { + "output": "- ssl_client_key_file: Required if\n ssl_client_verify_mode = \"CERT_REQUIRED\". Specifies the private key\n file that Driverless AI uses to authenticate itself. This is\n normally created by your organization's sys admin. - ssl_client_crt_file: Required if\n ssl_client_verify_mode = \"CERT_REQUIRED\". Specifies the private\n client certificate file that Driverless AI will use to authenticate\n itself. This is normally created by your organization's sys admin. - auth_tls_crl_file: Specifies the path to the certificate revocation\n list file that will be used to verify the client certificate. This\n file contains a list of revoked user IDs. Configuration Scenarios\nThe table below describes user certificate behavior for mTLS\nauthentication based on combinations of the configuration options\ndescribed above. +--------------------+--------------+------------------+--------------+\n| config.toml | User does | User has a | User has a |\n| settings | not have a | correct and | revoked |\n| | certificate | valid | certificate |\n| | | certificate | |\n+====================+==============+==================+==============+\n| ssl_client_verify | User certs | User certs are | User revoked |\n| _ mode='CERT_NONE' | are ignored | ignored | certs are |\n| | | | ignored |\n+--------------------+--------------+------------------+--------------+\n| ssl_ | User certs | User certs are | User revoked |\n| client_verify_mod | are ignored | set to | certs are |\n| e ='CERT_OPTIONAL' | | Driverless AI | not |\n| | | but are not used | validated |\n| | | for validating | |\n| | | the certs | |\n+--------------------+--------------+------------------+--------------+\n| ssl_ | Not allowed | User provides a | User revoke |\n| client_verify_mod | | valid | lists are |\n| e ='CERT_REQUIRED' | | certificate used | not |\n| | | by Driverless AI | validated |\n| | | but does not | |\n| | | authenticate the | |\n| | | user | |\n+--------------------+--------------+------------------+--------------+\n| sl_ | Not allowed | User provides a | User revoked |\n| client_verify_mod | | valid | certs are |\n| e ='CERT_REQUIRED' | | certificate.", + "prompt_type": "plain" + }, + { + "output": "| |\n+--------------------+--------------+------------------+--------------+\nEnabling mTLS Authentication\nDocker Image Installs\nTo enable mTLS authentication in Docker images, specify the\nauthentication environment variable that you want to use. Each variable\nmust be prepended with DRIVERLESS_AI. nvidia-docker run \\\n --pid=host \\\n --init \\\n --rm \\\n --shm-size=256m \\\n -p 12345:12345 \\\n -u `id -u`:`id -g` \\\n -e DRIVERLESS_AI_ENABLE_HTTPS=true \\\n -e DRIVERLESS_AI_SSL_KEY_FILE=/etc/dai/private_key.pem \\\n -e DRIVERLESS_AI_SSL_CRT_FILE=/etc/dai/cert.pem \\\n -e DRIVERLESS_AI_AUTHENTICATION_METHOD=tls_certificate \\\n -e DRIVERLESS_AI_SSL_CLIENT_VERIFY_MODE=CERT_REQUIRED \\\n -e DRIVERLESS_AI_SSL_CA_FILE=/etc/dai/rootCA.pem \\\n -e DRIVERLESS_AI_SSL_CLIENT_KEY_FILE=/etc/dai/client_config_key.key \\\n -e DRIVERLESS_AI_SSL_CLIENT_CRT_FILE=/etc/dai/client_config_cert.pem \\\n -v /user/log:/log \\\n -v /user/tmp:/tmp \\\n -v /user/certificates/server_config_key.pem:/etc/dai/private_key.pem \\\n -v /user/certificates/server_config_cert.pem:/etc/dai/cert.pem \\\n -v /user/certificates/client_config_cert.pem:/etc/dai/client_config_cert.pem \\\n -v /user/certificates/client_config_key.key:/etc/dai/client_config_key.key \\\n -v /user/certificates/rootCA.pem:/etc/dai/rootCA.pem \\\n h2oai/dai-ubi8-x86_64:|tag|\nNote: When certificate verification is required, use the Docker\nparameter --hostname to ensure that the certificate hostname is\nresolvable from within the Docker container to the container's IP\naddress.", + "prompt_type": "plain" + }, + { + "output": "Checkpointing, Rerunning, and Retraining Experiments\nThe upper-right corner of the Driverless AI UI includes an Experiments\nlink. []\nClick this link to open the Experiments page. From this page, you can\nrename an experiment, view previous experiments, begin a new experiment,\nrerun an experiment, and delete an experiment. []\nCheckpointing, Rerunning, and Retraining\nIn Driverless AI, you can retry an experiment from the last checkpoint,\nyou can run a new experiment using an existing experiment's settings,\nand you can retrain an experiment's final pipeline. []\nCheckpointing Experiments\nIn real-world scenarios, data can change. For example, you may have a\nmodel currently in production that was built using 1 million records. At\na later date, you may receive several hundred thousand more records. Rather than building a new model from scratch, Driverless AI includes\nH2O.ai Brain, which enables caching and smart re-use of prior models to\ngenerate features for new models. You can configure one of the following Brain levels in the experiment's\nexpert-settings.", + "prompt_type": "plain" + }, + { + "output": "(default)\n- 3: Smart checkpoint like level #1, but for the entire population. Tune only if the brain population is of insufficient size. - 4: Smart checkpoint like level #2, but for the entire population. Tune only if the brain population is of insufficient size. - 5: Smart checkpoint like level #4, but will scan over the entire\n brain cache of populations (starting from resumed experiment if\n chosen) in order to get the best scored individuals. If you chooses Level 2 (default), then Level 1 is also done when\nappropriate. To make use of smart checkpointing, be sure that the new data has:\n- The same data column names as the old experiment\n- The same data types for each column as the old experiment. (This\n won't match if, e.g,. a column was all int and then had one string\n row.) - The same target as the old experiment\n- The same target classes (if classification) as the old experiment\n- For time series, all choices for intervals and gaps must be the same\nWhen the above conditions are met, then you can:\n- Start the same kind of experiment, just rerun for longer.", + "prompt_type": "plain" + }, + { + "output": "fewer or more rows). - Effectively do a final ensemble re-fit by varying the data rows and\n starting an experiment with a new accuracy, time=1, and\n interpretability. Check the experiment preview for what the ensemble\n will be. - Restart/Resume a cancelled, aborted, or completed experiment\nTo run smart checkpointing on an existing experiment, click the right\nside of the experiment that you want to retry, then select New /\nContinue -> From Last Checkpoint. The experiment settings page opens. Specify the new dataset. If desired, you can also change experiment\nsettings, though the target column must be the same. Click Launch\nExperiment to resume the experiment from the last checkpoint and build a\nnew experiment. The smart checkpointing continues by adding a prior model as another\nmodel used during tuning. If that prior model is better (which is likely\nif it was run for more iterations), then that smart checkpoint model\nwill be used during feature evolution iterations and final ensemble.", + "prompt_type": "plain" + }, + { + "output": "- The directory where the H2O.ai Brain meta model files are stored is\n tmp/H2O.ai_brain. In addition, the default maximum brain size is\n 20GB. Both the directory and the maximum size can be changed in the\n config.toml file. Rerunning Experiments\nTo run a new experiment using an existing experiment's settings, click\nthe right side of the experiment that you want to use as the basis for\nthe new experiment, then select New Experiment with Same Settings. This\nopens the experiment settings page. From this page, you can rerun the\nexperiment using the original settings, or you can specify to use new\ndata and/or specify different experiment settings. Click Launch\nExperiment to create a new experiment with the same options. Retrain / Refit\nTo retrain an experiment's final pipeline, click on the group of square\nicons next to the experiment that you want to use as the basis for the\nnew experiment and click Retrain / Refit, then select From Final\nCheckpoint. This opens the experiment settings page with the same\nsettings as the original experiment except that Time is set to 0.", + "prompt_type": "plain" + }, + { + "output": "This may include the addition of\nnew features, the exclusion of previously used features, a change in the\nhyperparameter search space, or finding new parameters for the existing\nmodel architecture. To retrain the final pipeline without adding new features, select the\nFrom Best Models option, which overrides the following config.toml\noptions:\n refit_same_best_individual=True\n brain_add_features_for_new_columns=False\n feature_brain_reset_score=\"off\"\n force_model_restart_to_defaults=False\nFor more information, refer to the feature_brain_level setting in the\nconfig.toml file. Note\nFor information on the equivalent Python client calls\nfor Retrain / Refit options, refer to the following list. - New / Continue - With Same Settings:\n retrain(...)\n- New / Continue - From Last Checkpoint:\n retrain(..., use_smart_checkpoint=True)\n- Retrain / Refit - From Final Checkpoint\n retrain(..., final_pipeline_only=True)\n- Retrain / Refit - From Best Models (1.10.1 client)\n retrain(..., final_models_only=True)\n\"Pausing\" an Experiment\nA trick for \"pausing\" an experiment is to:\n1.", + "prompt_type": "plain" + }, + { + "output": "Simple Configurations\nBelow is a list of some simple configurations that can be run with\ncopy/paste config.toml settings in Driverless AI GUI. Get a quick Final Model: no Genetic Algorithm no Ensembling\nThese settings can be copy pasted in the Toml editor in the Expert\nSettings. The experiment preview can be checked to make sure the changes\nhave taken effect. The Toml editor of a completed experiment will also\nlist them at the end of the experiment. Toml editor\n enable_genetic_algorithm = \"off\"\n fixed_ensemble_level = 0\nUse Original Features With Genetic Algorithm\nThis example does no transformations on numeric features and only a\nsingle simple encoding on categorical features, i.e. no interactions,\ntarget-encoding, dates, text, etc. It only does model selection and\ntuning via GA. The examples can be copy pasted in the Toml editor in the Expert\nSettings. The experiment preview gets modified and can be inspected to\nconfirm the changes have taken effect. 1) The example applies only identity or\n original transformation on numeric columns and\n Frequent Transformer on integer and categorical\n columns, i.e it does not do feature engineering or feature\n interactions (consider mutation_mode = \"full\" if set interaction\n depth >1).", + "prompt_type": "plain" + }, + { + "output": "Toml editor\n included_transformers = [\"OriginalTransformer\",\"OneHotEncodingTransformer\"]\n max_feature_interaction_depth = 1\n no_drop_features = true\nBuild models with your choice of algorithm and parameters\nThese settings can be copy pasted in the\nAdd to config.toml via toml string under the Expert Experiment settings\nof an experiment. Always check the Driverless preview to make sure the\nchanges have taken effect before launching the experiment. The Scores\ntab can be used to inspect the built model. 1) This example builds a single GBM model with 2 folds cross\n validation and user provided parameters with no genetic algorithm. Add to config.toml via toml string\n \"\" included_models = ['XGBOOSTGBM']\\n\n params_xgboost = \"{'max_depth': 2, 'max_leaves': 4, 'n_estimators': 50, 'learning_rate': 0.03}\"\\n\n fixed_num_folds = 2 \\n\n feature_brain_level = 0 \\n \n enable_genetic_algorithm = \"off\" \\n\n \"\"\n 2) This example builds a single TensorFlow model on original numeric\n features with user defined parameters.", + "prompt_type": "plain" + }, + { + "output": "The model\n is evaluated with a 4 fold cross validation scheme. Mojo creation,\n pipeline visualization and genetic algorithm is turned off. Experiment logs can be viewed to verify the parameter used by the\n TensorFlow model. Add to config.toml via toml string\n \"\" included_models = [\"TensorFlowModel\"] \\n\n included_transformers = [\"OriginalTransformer\"] \\n\n fixed_ensemble_level = 1 \\n\n fixed_num_folds = 4 \\n\n params_tensorflow = \"{'batch_size': 4096, 'epochs': 100, 'hidden': [1000, 1000]}\" \\n\n target_transformer = \"identity_noclip\" \\n\n make_mojo_scoring_pipeline = \"off\" \\n\n make_pipeline_visualization = \"off\" \\n\n enable_genetic_algorithm = \"off\" \\n\n \"\"\n 3) This example builds LightGBM models. During genetic algorithm, it\n does feature engineering and will do model tuning by toggling\n other params not set by the user.The Scores tab can be used to\n inspect the built models.", + "prompt_type": "plain" + }, + { + "output": "Driverless AI Standalone Python Scoring Pipeline\nA standalone Python scoring pipeline is available after successfully\ncompleting an experiment. This package contains an exported model and\nPython 3.8 source code examples for productionizing models built using\nH2O Driverless AI. The files in this package let you transform and score on new data in\nseveral different ways:\n- From Python 3.8, you can import a scoring module and use it to\n transform and score on new data. - From other languages and platforms, you can use the TCP/HTTP scoring\n service bundled with this package to call into the scoring pipeline\n module through remote procedure calls (RPC). For more information on the Python Scoring Pipeline, refer to the\nfollowing sections:\n- python-scoring-before\n- python-scoring-files\n- python-scoring-quick-start\n- python-scoring-module\n- python-scoring-service\n- python-scoring-shapley\n- python-scoring-faq\n- python-scoring-troubleshooting\nBefore You Begin\nRefer to the following notes for important information regarding the\nPython Scoring Pipeline.", + "prompt_type": "plain" + }, + { + "output": "For more information, see\ncuda-opencl-cudnn. Note\nThe downloaded scorer zip file contains a shell script called\nrun_example.sh, which is used to set up a virtual environment and run an\nexample Python script. If you use the pip-virtualenv mode for the\nrun_example.sh shell script, refer to the following examples to install\nprerequisites for Python scoring:\nDocker\nTo install the necessary prerequisites and activate a virtual\nenvironment using the run_example.sh shell script with Docker, refer to\nthe following examples:\nUbuntu 18.04 or later\n # replace with your license key\ndocker run -ti --entrypoint=bash --runtime nvidia -e\nDRIVERLESS_AI_LICENSE_KEY= -v /home/$USER/scorers:/scorers\ndocker.io/nvidia/cuda:11.2.2-base-ubuntu18.04 apt-get update apt-get\ninstall python3.8 virtualenv unzip git -y apt-get install libgomp1\nlibopenblas-base ocl-icd-libopencl1 -y # required at runtime apt install\nbuild-essential libssl-dev libffi-dev python3-dev python3.8-dev -y # to\ncompile some packages apt install language-pack-en -y # for proper\nencoding support apt-get install libopenblas-dev -y # for runtime mkdir\n-p /etc/OpenCL/vendors && echo \"libnvidia-opencl.so.1\" >\n/etc/OpenCL/vendors/nvidia.icd export LANG=\"en_US.UTF-8\" export\nLC_ALL=\"en_US.UTF-8\" unzip /scorers/scorer.zip cd scoring-pipeline # if\ndon't need h2o-3 recipe server, then add dai_enable_h2o_recipes=0 before\nbash below bash run_example.sh\nRed Hat Enterprise Linux (Red Hat Universal Base Image 8 without GPUs)\n docker run -ti --entrypoint=bash -v /home/$USER/scorers:/scorers registry.access.redhat.com/ubi8/ubi:8.4\n dnf -y install python38 unzip virtualenv openblas libgomp\n unzip /scorers/scorer.zip\n cd scoring-pipeline\n bash run_example.sh\nCentOS 8\n docker run -ti --entrypoint=bash -v /home/$USER/Downloads/scorers:/scorers centos:8\n dnf -y install python38 unzip virtualenv openblas libgomp procps\n unzip /scorers/scorer.zip\n cd scoring-pipeline\n bash run_example.sh\nUbuntu 16.04\nTo install the necessary prerequisites and activate a virtual\nenvironment using the run_example.sh shell script on Ubuntu 16.04, run\nthe following commands:\n sudo apt-get update\n sudo apt-get install software-properties-common # Ubuntu 16.04 only\n sudo add-apt-repository ppa:deadsnakes/ppa # Ubuntu 16.04 only\n sudo apt-get update\n sudo apt-get install python3.8 virtualenv unzip -y\n sudo apt-get install libgomp1 libopenblas-base ocl-icd-libopencl1 -y # required at runtime\n unzip scorer.zip\n cd scoring-pipeline\n bash run_example.sh\nIf you need to be able to compile, also run the following command:\n sudo apt install build-essential libssl-dev libffi-dev python3-dev -y\nTo run a scoring job using the example.py file after the virtual\nenvironment has been activated, run the following command:\n export DRIVERLESS_AI_LICENSE_FILE=\"/path/to/license.sig\"\n python example.py\nUbuntu 18.04 or later\nTo install the necessary prerequisites and activate a virtual\nenvironment using the run_example.sh shell script on Ubuntu 18.04 or\nlater, run the following commands:\n sudo apt-get update\n sudo apt-get install python3.8 virtualenv unzip -y\n sudo apt-get install libgomp1 libopenblas-base ocl-icd-libopencl1 -y # required at runtime\n unzip scorer.zip\n cd scoring-pipeline\n bash run_example.sh\nIf you need to be able to compile, also run the following command:\n sudo apt install build-essential libssl-dev libffi-dev python3-dev -y\nTo run a scoring job using the example.py file after the virtual\nenvironment has been activated, run the following command:\n export DRIVERLESS_AI_LICENSE_FILE=\"/path/to/license.sig\"\n python example.py\nRHEL 8\nTo install the necessary prerequisites and activate a virtual\nenvironment using the run_example.sh shell script on Red Hat Enterprise\nLinux 8, run the following command:\n dnf -y install python38 unzip virtualenv openblas libgomp\n unzip /rpms/scorer.zip\n cd scoring-pipeline\n bash run_example.sh\nCentOS 8\nTo install the necessary prerequisites and activate a virtual\nenvironment using the run_example.sh shell script on CentOS 8, run the\nfollowing command:\n dnf -y install python38 unzip virtualenv openblas libgomp procps\n unzip /rpms/scorer.zip\n cd scoring-pipeline\n bash run_example.sh\nNote\nCustom Recipes and the Python Scoring Pipeline\nBy default, if a custom recipe has been uploaded into Driverless AI and\nis subsequently not used in the experiment, the Python Scoring Pipeline\nstill contains the H2O recipe server.", + "prompt_type": "plain" + }, + { + "output": "In addition, Java has to be installed in the container,\nwhich further increases the runtime storage and memory requirements. A\nworkaround is to set the following environment variable before running\nthe Python Scoring Pipeline:\n export dai_enable_custom_recipes=0\nCUDA, OpenCL, and cuDNN Install Instructions\nRefer to the following sections for instructions on installing CUDA,\nOpenCL, and cuDNN when using the virtualenv or pip run methods of Python\nscoring. Installing CUDA with NVIDIA Drivers\nBefore installing CUDA, make sure you have already installed wget, gcc,\nmake, and elfutils-libelf-devel:\n sudo yum -y install wget\n sudo yum -y install gcc\n sudo yum -y install make\n sudo yum -y install elfutils-libelf-devel\nNext, visit\nhttps://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.html\nfor instructions on installing CUDA. It is recommended that you use the\nrunfile method of installation. If prompted to select what tools you would like to install, select\nDrivers only.", + "prompt_type": "plain" + }, + { + "output": "sudo yum -y clean all\n sudo yum -y makecache\n sudo yum -y update\n wget http://dl.fedoraproject.org/pub/epel/7/x86_64/Packages/c/clinfo-2.1.17.02.09-1.el7.x86_64.rpm\n wget http://dl.fedoraproject.org/pub/epel/7/x86_64/Packages/o/ocl-icd-2.2.12-1.el7.x86_64.rpm\n sudo rpm -if ocl-icd-2.2.12-1.el7.x86_64.rpm\n sudo rpm -if clinfo-2.1.17.02.09-1.el7.x86_64.rpm\n clinfo\n mkdir -p /etc/OpenCL/vendors && \\\n echo \"libnvidia-opencl.so.1\" > /etc/OpenCL/vendors/nvidia.icd\nInstalling cuDNN\nFor information on installing cuDNN on Linux, refer to\nhttps://docs.nvidia.com/deeplearning/cudnn/install-guide/index.html. Note\ncuDNN 8 or later is required. Python Scoring Pipeline Files\nThe scoring-pipeline folder includes the following notable files:\n- example.py: An example Python script demonstrating how to import and\n score new records. - run_example.sh: Runs example.py (also sets up a virtualenv with\n prerequisite libraries). For more information, refer to the second\n note in the python-scoring-before section.", + "prompt_type": "plain" + }, + { + "output": "- http_server.py: A standalone HTTP server for hosting scoring\n services. - run_tcp_server.sh: Runs TCP scoring service (runs tcp_server.py). - run_http_server.sh: Runs HTTP scoring service (runs http_server.py). - example_client.py: An example Python script demonstrating how to\n communicate with the scoring server. - run_tcp_client.sh: Demonstrates how to communicate with the scoring\n service via TCP (runs example_client.py). - run_http_client.sh: Demonstrates how to communicate with the scoring\n service via HTTP (using curl). Quick Start\nThere are two methods for starting the Python Scoring Pipeline. Quick Start - Recommended Method\nThis is the recommended method for running the Python Scoring Pipeline. Use this method if:\n- You have an air gapped environment with no access to the Internet. - You want to use a quick start approach. Prerequisites\n- A valid Driverless AI license key. - A completed Driverless AI experiment. - Downloaded Python Scoring Pipeline.", + "prompt_type": "plain" + }, + { + "output": "1. Download the TAR SH version of Driverless AI from\n https://www.h2o.ai/download/. 2. Use bash to execute the download. This creates a new\n dai- folder, where represents your\n version of Driverless AI, for example, 1.7.1-linux-x86_64.) 3. Change directories into the new Driverless AI folder. (Replace\n below with your the version that was created in Step\n 2.) 4. Run the following to change permissions:\n5. Run the following to install the Python Scoring Pipeline for your\n completed Driverless AI experiment:\n6. Run the following command from the scoring-pipeline directory:\nQuick Start - Alternative Method\nThis section describes an alternative method for running the Python\nScoring Pipeline. This version requires Internet access. Note\nIf you use a scorer from a version prior to 1.10.4.1, you need to add\nexport SKLEARN_ALLOW_DEPRECATED_SKLEARN_PACKAGE_INSTALL=True prior to\ncreating the new scorer python environment, either in run_example.sh or\nin the same terminal where the shell scripts are executed.", + "prompt_type": "plain" + }, + { + "output": "Prerequisites\n- The scoring module and scoring service are supported only on Linux\n with Python 3.8 and OpenBLAS. - The scoring module and scoring service download additional packages\n at install time and require Internet access. Depending on your\n network environment, you might need to set up internet access via a\n proxy. - Valid Driverless AI license. Driverless AI requires a license to be\n specified in order to run the Python Scoring Pipeline. - Apache Thrift (to run the scoring service in TCP mode)\n- Linux environment\n- Python 3.8\n- libopenblas-dev (required for H2O4GPU)\n- OpenCL\nFor info on how to install these prerequisites, refer to the following\nexamples. Installing Python 3.8 and OpenBLAS on Ubuntu 16.10 or Later:\n sudo apt install python3.8 python3.8-dev python3-pip python3-dev \\\n python-virtualenv python3-virtualenv libopenblas-dev\nInstalling Python 3.8 and OpenBLAS on Ubuntu 16.04:\n sudo add-apt-repository ppa:deadsnakes/ppa\n sudo apt-get update\n sudo apt-get install python3.8 python3.8-dev python3-pip python3-dev \\\n python-virtualenv python3-virtualenv libopenblas-dev\nInstalling Conda 3.6:\n You can install Conda using either Anaconda or Miniconda.", + "prompt_type": "plain" + }, + { + "output": "DRIVERLESS_AI_LICENSE_KEYwould be similar. **Installing the Thrift Compiler** Thrift is required to run the scoring service in TCP mode, but it is not required to run the scoring module. The following steps are available on the Thrift documentation site at: https://thrift.apache.org/docs/BuildingFromSource. :: sudo apt-get install automake bison flex g++ git libevent-dev \\ libssl-dev libtool make pkg-config libboost-all-dev ant wget https://github.com/apache/thrift/archive/0.10.0.tar.gz tar -xvf 0.10.0.tar.gz cd thrift-0.10.0 ./bootstrap.sh ./configure make sudo make install Run the following to refresh the runtime shared after installing Thrift: :: sudo ldconfig /usr/local/lib Running the Python Scoring Pipeline - Alternative Method ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 1. On the completed Experiment page, click on the **Download Python Scoring Pipeline** button to download the **scorer.zip** file for this experiment onto your local machine.", + "prompt_type": "plain" + }, + { + "output": "Extract the scoring pipeline. You can run the scoring module and the scoring service after downloading and extracting the pipeline. **Score from a Python Program** If you intend to score from a Python program, run the scoring module example. (Requires Linux and Python 3.8.) :: export DRIVERLESS_AI_LICENSE_FILE=\"/path/to/license.sig\" bash run_example.sh **Score Using a Web Service** If you intend to score using a web service, run the HTTP scoring server example. (Requires Linux x86_64 and Python 3.8.) :: export DRIVERLESS_AI_LICENSE_FILE=\"/path/to/license.sig\" bash run_http_server.sh bash run_http_client.sh **Score Using a Thrift Service** If you intend to score using a Thrift service, run the TCP scoring server example. (Requires Linux x86_64, Python 3.8 and Thrift.) :: export DRIVERLESS_AI_LICENSE_FILE=\"/path/to/license.sig\" bash run_tcp_server.sh bash run_tcp_client.sh **Note**: By default, therun*.shscripts mentioned above create a virtual environment using virtualenv and pip, within which the Python code is executed.", + "prompt_type": "plain" + }, + { + "output": "The package manager to use is provided as an argument to the script. :: # to use conda package manager export DRIVERLESS_AI_LICENSE_FILE=\"/path/to/license.sig\" bash run_example.sh --pm conda # to use pip package manager export DRIVERLESS_AI_LICENSE_FILE=\"/path/to/license.sig\" bash run_example.sh --pm pip If you experience errors while running any of the above scripts, check to make sure your system has a properly installed and configured Python 3.8 installation. Refer to the `Troubleshooting Python Environment Issues <#troubleshooting-python-environment-issues>`__ section that follows to see how to set up and test the scoring module using a cleanroom Ubuntu 16.04 virtual machine. .. _python-scoring-module: The Python Scoring Module ------------------------- The scoring module is a Python module bundled into a standalone wheel file (name `scoring <>`__\\ \\*.whl). All the prerequisites for the scoring module to work correctly are listed in the requirements.txt file.", + "prompt_type": "plain" + }, + { + "output": "from scoring_487931_20170921174120_b4066 import Scorer scorer = Scorer() # Create instance. score = scorer.score([ # Call score() 7.416, # sepal_len 3.562, # sepal_wid 1.049, # petal_len 2.388, # petal_wid ]) The scorer instance provides the following methods (and more): - score(list): Score one row (list of values). - score_batch(df): Score a Pandas dataframe. - fit_transform_batch(df): Transform a Pandas dataframe. - get_target_labels(): Get target column labels (for classification problems). The process of importing and using the scoring module is demonstrated by the bash scriptrun_example.sh, which effectively performs the following steps: :: # See 'run_example.sh' for complete example. virtualenv -p python3.8 env source env/bin/activate pip install --use-deprecated=legacy-resolver -r requirements.txt export DRIVERLESS_AI_LICENSE_FILE=\"/path/to/license.sig\" python example.py .. _python-scoring-service: The Scoring Service ------------------- The scoring service hosts the scoring module as an HTTP or TCP service.", + "prompt_type": "plain" + }, + { + "output": "In effect, this mechanism lets you invoke scoring functions from languages other than Python on the same computer or from another computer on a shared network or on the Internet. The scoring service can be started in two ways: - In TCP mode, the scoring service provides high-performance RPC calls via Apache Thrift (https://thrift.apache.org/) using a binary wire protocol. - In HTTP mode, the scoring service provides JSON-RPC 2.0 calls served by Tornado (http://www.tornadoweb.org). Scoring operations can be performed on individual rows (row-by-row) or in batch mode (multiple rows at a time). Scoring Service - TCP Mode (Thrift) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The TCP mode lets you use the scoring service from any language supported by Thrift, including C, C++, C#, Cocoa, D, Dart, Delphi, Go, Haxe, Java, Node.js, Lua, perl, PHP, Python, Ruby and Smalltalk. To start the scoring service in TCP mode, you will need to generate the Thrift bindings once, then run the server: :: # See 'run_tcp_server.sh' for complete example.", + "prompt_type": "plain" + }, + { + "output": "It is not a run time dependency, i.e. once the scoring services are built and tested, you do not need to repeat this installation process on the machines where the scoring services are intended to be deployed. To call the scoring service, generate the Thrift bindings for your language of choice, then make RPC calls via TCP sockets using Thrift's buffered transport in conjunction with its binary protocol. :: # See 'run_tcp_client.sh' for complete example. thrift --gen py scoring.thrift # See 'example_client.py' for complete example. socket = TSocket.TSocket('localhost', 9090) transport = TTransport.TBufferedTransport(socket) protocol = TBinaryProtocol.TBinaryProtocol(transport) client = ScoringService.Client(protocol) transport.open() row = Row() row.sepalLen = 7.416 # sepal_len row.sepalWid = 3.562 # sepal_wid row.petalLen = 1.049 # petal_len row.petalWid = 2.388 # petal_wid scores = client.score(row) transport.close() You can reproduce the exact same result from other languages, e.g.", + "prompt_type": "plain" + }, + { + "output": "This is usually less performant compared to Thrift, but has the advantage of being usable from any HTTP client library in your language of choice, without any dependency on Thrift. For JSON-RPC documentation, see http://www.jsonrpc.org/specification. To start the scoring service in HTTP mode: :: # See 'run_http_server.sh' for complete example. export DRIVERLESS_AI_LICENSE_FILE=\"/path/to/license.sig\" python http_server.py --port=9090 To invoke scoring methods, compose a JSON-RPC message and make a HTTP POST request to `http://host:port/rpc `__ as follows: :: # See 'run_http_client.sh' for complete example. curl http://localhost:9090/rpc \\ --header \"Content-Type: application/json\" \\ --data @- <` for **transformed features** and **original features** are **available** for XGBoost (GBM, GLM, RF, DART), LightGBM, Zero-Inflated, Imbalanced and DecisionTree models (and their ensemble). For ensemble with ExtraTrees meta learner (ensemble_meta_learner='extra_trees') models we suggest to use the Python scoring packages.", + "prompt_type": "plain" + }, + { + "output": "- The :ref:`Shapley fast approximation ` uses only one model (from the first fold) with no more than the first 50 trees. For details seefast_approx_num_treesandfast_approx_do_one_fold_one_model:ref:`config.toml settings `. .. _python-scoring-faq: Frequently asked questions -------------------------- **I'm getting GCC compile errors on Red Hat / CentOS when not using tar and**SCORING_PIPELINE_INSTALL_DEPENDENCIES\n=\n0. **How do I fix this? ** To fix this issue, run the following command: :: sudo yum -y install gcc **Why am I getting a \"TensorFlow is disabled\" message when I run the Python Scoring Pipeline? ** If you ran an experiment when TensorFlow was enabled and then attempt to run the Python Scoring Pipeline, you may receive a message similar to the following: :: TensorFlow is disabled. To enable, export DRIVERLESS_AI_ENABLE_TENSORFLOW=1 or set enable_tensorflow=true in config.toml. To successfully run the Python Scoring Pipeline, you must enable theDRIVERLESS_AI_ENABLE_TENSORFLOW``\nflag.", + "prompt_type": "plain" + }, + { + "output": "Using a Custom Transformer\nDriverless AI supports a number of feature transformers as described in\ntransformations. This example shows how you can include a custom\ntransformer in your experiment. Specifically, this example will show how\nto add the ExpandingMean transformer. 1. Start an experiment in Driverless AI by selecting your training\n dataset along with (optionally) validation and testing datasets and\n then specifying a Target Column. Notice the list of transformers\n that will be used in the Feature engineering search space (where\n applicable) section of the experiment summary. Driverless AI\n determines this list based on the dataset and experiment. 2. Click on Expert Settings. 3. Specify the custom recipe using one of the following methods:\n4. Navigate to the Expert Settings > Recipes tab and click the Include\n Specific Transformers button. Notice that all transformers are\n selected by default, including the new ExpandingMean transformer\n (bottom of page).", + "prompt_type": "plain" + }, + { + "output": "Google Cloud Storage Setup\nDriverless AI lets you explore Google Cloud Storage data sources from\nwithin the Driverless AI application. This section provides instructions\nfor configuring Driverless AI to work with Google Cloud Storage. This\nsetup requires you to enable authentication. If you enable GCS or GBP\nconnectors, those file systems will be available in the UI, but you will\nnot be able to use those connectors without authentication. In order to enable the GCS data connector with authentication, you must:\n1. Obtain a JSON authentication file from GCP. 2. Mount the JSON file to the Docker instance. 3. Specify the path to the /json_auth_file.json in the\n gcs_path_to_service_account_json config option. Notes:\n- The account JSON includes authentications as provided by the system\n administrator. You can be provided a JSON file that contains both\n Google Cloud Storage and Google BigQuery authentications, just one\n or the other, or none at all. - Depending on your Docker install version, use either the\n docker run --runtime=nvidia (>= Docker 19.03) or nvidia-docker (<\n Docker 19.03) command when starting the Driverless AI Docker image.", + "prompt_type": "plain" + }, + { + "output": "Description of Configuration Attributes\n- gcs_path_to_service_account_json: Specifies the path to the\n /json_auth_file.json file. - gcs_init_path: Specifies the starting GCS path displayed in the UI\n of the GCS browser. Start GCS with Authentication\nDocker Image Installs\nThis example enables the GCS data connector with authentication by\npassing the JSON authentication file. This assumes that the JSON file\ncontains Google Cloud Storage authentications. nvidia-docker run \\\n --pid=host \\\n --init \\\n --rm \\\n --shm-size=256m \\\n -e DRIVERLESS_AI_ENABLED_FILE_SYSTEMS=\"file,gcs\" \\\n -e DRIVERLESS_AI_GCS_PATH_TO_SERVICE_ACCOUNT_JSON=\"/service_account_json.json\" \\\n -u `id -u`:`id -g` \\\n -p 12345:12345 \\\n -v `pwd`/data:/data \\\n -v `pwd`/log:/log \\\n -v `pwd`/license:/license \\\n -v `pwd`/tmp:/tmp \\\n -v `pwd`/service_account_json.json:/service_account_json.json \\\n h2oai/dai-ubi8-x86_64:|tag|\nDocker Image with the config.toml\nThis example shows how to configure the GCS data connector options in\nthe config.toml file, and then specify that file when starting\nDriverless AI in Docker.", + "prompt_type": "plain" + }, + { + "output": "Monitoring Pending Jobs\nDriverless AI features a Pending Jobs panel that lets you monitor the\nprogress of various long-running jobs that can be started from the\ncompleted_experiment page. To view this panel, click the group of square\nicons located in the upper-right corner. The following jobs are monitored in this panel:\n- Create AutoDoc\n- Create MOJO Scoring Pipeline\n- Create Python Scoring Pipeline\n- Create Test Set Predictions\n- Create Training Predictions\n- Score Model\n- Transform Data\nThe circular icon next to the description of a pending job indicates its\nstatus:\n+---------+------------+\n| Icon | Status |\n+=========+============+\n| [logo] | Complete |\n+---------+------------+\n| [logo2] | Failed |\n+---------+------------+\n| | Running |\n+---------+------------+\nNavigate to a completed job by clicking the Open icon. You can also\nclear a completed job from the panel by clicking Remove or cancel an\nongoing job by clicking Abort. Note: Certain jobs cannot be cancelled.", + "prompt_type": "plain" + }, + { + "output": "BlueData DataTap Setup\n\nThis section provides instructions for configuring Driverless AI to work\nwith BlueData DataTap.\n\nNote: Depending on your Docker install version, use either the", + "prompt_type": "plain" + }, + { + "output": "docker run --runtime=nvidia(>= Docker 19.03) ornvidia-docker(< Docker 19.03) command when starting the Driverless AI Docker image. Usedocker versionto check which version of Docker you are using. Description of Configuration Attributes --------------------------------------- -dtap_auth_type: Selects DTAP authentication. Available values are: -noauth: No authentication needed -principal: Authenticate with DataTap with a principal user -keytab: Authenticate with a Key tab (recommended). If running Driverless AI as a service, then the Kerberos keytab needs to be owned by the Driverless AI user. -keytabimpersonation: Login with impersonation using a keytab -dtap_config_path: The location of the DTAP (HDFS) config folder path. This folder can contain multiple config files. **Note**: The DTAP config file core-site.xml needs to contain DTap FS configuration, for example: :: fs.dtap.impl com.bluedata.hadoop.bdfs.Bdfs The FileSystem for BlueData dtap: URIs. -dtap_key_tab_path: The path of the principal key tab file.", + "prompt_type": "plain" + }, + { + "output": "-dtap_app_principal_user: The Kerberos app principal user (recommended). -dtap_app_login_user: The user ID of the current user (for example, user@realm). -dtap_app_jvm_args: JVM args for DTap distributions. Separate each argument with spaces. -dtap_app_classpath: The DTap classpath. -dtap_init_path: Specifies the starting DTAP path displayed in the UI of the DTAP browser. -enabled_file_systems: The file systems you want to enable. This must be configured in order for data connectors to function properly. Example 1: Enable DataTap with No Authentication ------------------------------------------------ .. container:: tabs .. group-tab:: Docker Image Installs This example enables the DataTap data connector and disables authentication. It does not pass any configuration file; however it configures Docker DNS by passing the name and IP of the DTap name node. This lets users reference data stored in DTap directly using the name node address, for example:dtap://name.node/datasets/iris.csvordtap://name.node/datasets/.", + "prompt_type": "plain" + }, + { + "output": ".. code:: bash nvidia-docker run \\ --pid=host \\ --init \\ --rm \\ --shm-size=256m \\ --add-host name.node:172.16.2.186 \\ -e DRIVERLESS_AI_ENABLED_FILE_SYSTEMS=\"file,dtap\" \\ -e DRIVERLESS_AI_DTAP_AUTH_TYPE='noauth' \\ -p 12345:12345 \\ -v /etc/passwd:/etc/passwd \\ -v /tmp/dtmp/:/tmp \\ -v /tmp/dlog/:/log \\ -v /tmp/dlicense/:/license \\ -v /tmp/ddata/:/data \\ -u $(id -u):$(id -g) \\ h2oai/dai-ubi8-x86_64:|tag| .. container:: group-tab Docker Image with the config.toml This example shows how to configure DataTap options in the config.toml file, and then specify that file when starting Driverless AI in Docker. Note that this example enables DataTap with no authentication. 1. Configure the Driverless AI config.toml file. Set the following configuration options: .. -enabled_file_systems = \"file, upload, dtap\"2. Mount the config.toml file into the Docker container.", + "prompt_type": "plain" + }, + { + "output": "This allows users to reference data stored in DataTap directly using the name node address, for example:dtap://name.node/datasets/iris.csvordtap://name.node/datasets/. (**Note**: The trailing slash is currently required for directories.) 1. Export the Driverless AI config.toml file or add it to ~/.bashrc. For example: .. :: # DEB and RPM export DRIVERLESS_AI_CONFIG_FILE=\"/etc/dai/config.toml\" # TAR SH export DRIVERLESS_AI_CONFIG_FILE=\"/path/to/your/unpacked/dai/directory/config.toml\" 2. Specify the following configuration options in the config.toml file. .. :: # File System Support # upload : standard upload feature # dtap : Blue Data Tap file system, remember to configure the DTap section below enabled_file_systems = \"file, dtap\" 3. Save the changes when you are done, then stop/restart Driverless AI. Example 2: Enable DataTap with Keytab-Based Authentication ---------------------------------------------------------- **Notes**: - If using Kerberos Authentication, the the time on the Driverless AI server must be in sync with Kerberos server.", + "prompt_type": "plain" + }, + { + "output": "- If running Driverless AI as a service, then the Kerberos keytab needs to be owned by the Driverless AI user; otherwise Driverless AI will not be able to read/access the Keytab and will result in a fallback to simple authentication and, hence, fail. .. container:: tabs .. group-tab:: Docker Image Installs This example: - Places keytabs in the/tmp/dtmpfolder on your machine and provides the file path as described below. - Configures the environment variableDRIVERLESS_AI_DTAP_APP_PRINCIPAL_USERto reference a user for whom the keytab was created (usually in the form of user@realm). .. code:: bash nvidia-docker run \\ --pid=host \\ --init \\ --rm \\ --shm-size=256m \\ -e DRIVERLESS_AI_ENABLED_FILE_SYSTEMS=\"file,dtap\" \\ -e DRIVERLESS_AI_DTAP_AUTH_TYPE='keytab' \\ -e DRIVERLESS_AI_DTAP_KEY_TAB_PATH='tmp/<>' \\ -e DRIVERLESS_AI_DTAP_APP_PRINCIPAL_USER='<>' \\ -p 12345:12345 \\ -v /etc/passwd:/etc/passwd \\ -v /tmp/dtmp/:/tmp \\ -v /tmp/dlog/:/log \\ -v /tmp/dlicense/:/license \\ -v /tmp/ddata/:/data \\ -u $(id -u):$(id -g) \\ h2oai/dai-ubi8-x86_64:|tag| .. container:: group-tab Docker Image with the config.toml This example: - Places keytabs in the/tmp/dtmpfolder on your machine and provides the file path as described below.", + "prompt_type": "plain" + }, + { + "output": "1. Configure the Driverless AI config.toml file. Set the following configuration options: .. -enabled_file_systems = \"file, upload, dtap\"-dtap_auth_type = \"keytab\"-dtap_key_tab_path = \"/tmp/\"-dtap_app_principal_user = \"\"2. Mount the config.toml file into the Docker container. .. .. code:: bash nvidia-docker run \\ --pid=host \\ --init \\ --rm \\ --shm-size=256m \\ --add-host name.node:172.16.2.186 \\ -e DRIVERLESS_AI_CONFIG_FILE=/path/in/docker/config.toml \\ -p 12345:12345 \\ -v /local/path/to/config.toml:/path/in/docker/config.toml \\ -v /etc/passwd:/etc/passwd:ro \\ -v /etc/group:/etc/group:ro \\ -v /tmp/dtmp/:/tmp \\ -v /tmp/dlog/:/log \\ -v /tmp/dlicense/:/license \\ -v /tmp/ddata/:/data \\ -u $(id -u):$(id -g) \\ h2oai/dai-ubi8-x86_64:|tag| .. container:: group-tab Native Installs This example: - Places keytabs in the/tmp/dtmpfolder on your machine and provides the file path as described below.", + "prompt_type": "plain" + }, + { + "output": "1. Export the Driverless AI config.toml file or add it to ~/.bashrc. For example: .. :: # DEB and RPM export DRIVERLESS_AI_CONFIG_FILE=\"/etc/dai/config.toml\" # TAR SH export DRIVERLESS_AI_CONFIG_FILE=\"/path/to/your/unpacked/dai/directory/config.toml\" 2. Specify the following configuration options in the config.toml file. .. :: # File System Support # file : local file system/server file system # dtap : Blue Data Tap file system, remember to configure the DTap section below enabled_file_systems = \"file, dtap\" # Blue Data DTap connector settings are similar to HDFS connector settings. # # Specify DTap Auth Type, allowed options are: # noauth : No authentication needed # principal : Authenticate with DTab with a principal user # keytab : Authenticate with a Key tab (recommended). If running # DAI as a service, then the Kerberos keytab needs to # be owned by the DAI user.", + "prompt_type": "plain" + }, + { + "output": "Save the changes when you are done, then stop/restart Driverless AI. Example 3: Enable DataTap with Keytab-Based Impersonation --------------------------------------------------------- **Notes**: - If using Kerberos, be sure that the Driverless AI time is synched with the Kerberos server. - If running Driverless AI as a service, then the Kerberos keytab needs to be owned by the Driverless AI user. .. container:: tabs .. group-tab:: Docker Image Installs This example: - Places keytabs in the/tmp/dtmpfolder on your machine and provides the file path as described below. - Configures theDRIVERLESS_AI_DTAP_APP_PRINCIPAL_USERvariable, which references a user for whom the keytab was created (usually in the form of user@realm). - Configures theDRIVERLESS_AI_DTAP_APP_LOGIN_USERvariable, which references a user who is being impersonated (usually in the form of user@realm). .. code:: bash # Docker instructions nvidia-docker run \\ --pid=host \\ --init \\ --rm \\ --shm-size=256m \\ -e DRIVERLESS_AI_ENABLED_FILE_SYSTEMS=\"file,dtap\" \\ -e DRIVERLESS_AI_DTAP_AUTH_TYPE='keytabimpersonation' \\ -e DRIVERLESS_AI_DTAP_KEY_TAB_PATH='tmp/<>' \\ -e DRIVERLESS_AI_DTAP_APP_PRINCIPAL_USER='<>' \\ -e DRIVERLESS_AI_DTAP_APP_LOGIN_USER='<>' \\ -p 12345:12345 \\ -v /etc/passwd:/etc/passwd \\ -v /tmp/dtmp/:/tmp \\ -v /tmp/dlog/:/log \\ -v /tmp/dlicense/:/license \\ -v /tmp/ddata/:/data \\ -u $(id -u):$(id -g) \\ h2oai/dai-ubi8-x86_64:|tag| .. container:: group-tab Docker Image with the config.toml This example: - Places keytabs in the/tmp/dtmpfolder on your machine and provides the file path as described below.", + "prompt_type": "plain" + }, + { + "output": "- Configures thedtap_app_login_uservariable, which references a user who is being impersonated (usually in the form of user@realm). 1. Configure the Driverless AI config.toml file. Set the following configuration options: .. -enabled_file_systems = \"file, upload, dtap\"-dtap_auth_type = \"keytabimpersonation\"-dtap_key_tab_path = \"/tmp/\"-dtap_app_principal_user = \"\"-dtap_app_login_user = \"\"2. Mount the config.toml file into the Docker container. .. .. code:: bash nvidia-docker run \\ --pid=host \\ --init \\ --rm \\ --shm-size=256m \\ --add-host name.node:172.16.2.186 \\ -e DRIVERLESS_AI_CONFIG_FILE=/path/in/docker/config.toml \\ -p 12345:12345 \\ -v /local/path/to/config.toml:/path/in/docker/config.toml \\ -v /etc/passwd:/etc/passwd:ro \\ -v /etc/group:/etc/group:ro \\ -v /tmp/dtmp/:/tmp \\ -v /tmp/dlog/:/log \\ -v /tmp/dlicense/:/license \\ -v /tmp/ddata/:/data \\ -u $(id -u):$(id -g) \\ h2oai/dai-ubi8-x86_64:|tag| .. container:: group-tab Native Installs This example: - Places keytabs in the/tmp/dtmpfolder on your machine and provides the file path as described below.", + "prompt_type": "plain" + }, + { + "output": "- Configures thedtap_app_login_user`` variable, which references\n a user who is being impersonated (usually in the form of\n user@realm). 1. Export the Driverless AI config.toml file or add it to ~/.bashrc. For example:\n # DEB and RPM\n export DRIVERLESS_AI_CONFIG_FILE=\"/etc/dai/config.toml\"\n # TAR SH\n export DRIVERLESS_AI_CONFIG_FILE=\"/path/to/your/unpacked/dai/directory/config.toml\" \n 2. Specify the following configuration options in the config.toml\n file. # File System Support\n # upload : standard upload feature\n # file : local file system/server file system\n # hdfs : Hadoop file system, remember to configure the HDFS config folder path and keytab below\n # dtap : Blue Data Tap file system, remember to configure the DTap section below\n # s3 : Amazon S3, optionally configure secret and access key below\n # gcs : Google Cloud Storage, remember to configure gcs_path_to_service_account_json below\n # gbq : Google Big Query, remember to configure gcs_path_to_service_account_json below\n # minio : Minio Cloud Storage, remember to configure secret and access key below\n # snow : Snowflake Data Warehouse, remember to configure Snowflake credentials below (account name, username, password)\n # kdb : KDB+ Time Series Database, remember to configure KDB credentials below (hostname and port, optionally: username, password, classpath, and jvm_args)\n # azrbs : Azure Blob Storage, remember to configure Azure credentials below (account name, account key)\n # jdbc: JDBC Connector, remember to configure JDBC below.", + "prompt_type": "plain" + }, + { + "output": "(hive_app_configs)\n # recipe_url: load custom recipe from URL\n # recipe_file: load custom recipe from local file system\n enabled_file_systems = \"file, dtap\"\n # Blue Data DTap connector settings are similar to HDFS connector settings. #\n # Specify DTap Auth Type, allowed options are:\n # noauth : No authentication needed\n # principal : Authenticate with DTab with a principal user\n # keytab : Authenticate with a Key tab (recommended). If running\n # DAI as a service, then the Kerberos keytab needs to\n # be owned by the DAI user. # keytabimpersonation : Login with impersonation using a keytab\n dtap_auth_type = \"keytabimpersonation\"\n # Path of the principal key tab file\n dtap_key_tab_path = \"/tmp/\"\n # Kerberos app principal user (recommended)\n dtap_app_principal_user = \"\"\n # Specify the user id of the current user here as user@realm\n dtap_app_login_user = \"\"\n 3.", + "prompt_type": "plain" + }, + { + "output": "Feature Count Control\nThis page describes how to control feature counts during the feature\nselection process in H2O Driverless AI (DAI). - original_feature_control\n- transformed_feature_control\n- individuals_control\n- feature_count_use_case\nOriginal Feature Control\nTo control the count of original features when creating an experiment,\nuse one of the following methods:\n- On the Experiment Setup page, click Dropped Columns to manually\n select specific columns to drop. - Use the Features to Drop Expert Setting to enter\n a list of features to drop. The list of features must be formatted\n as follows:\n- If you are unsure about which original columns are best, you can let\n DAI select the best features by setting the following configuration\n options, which use DAI's feature selection (FS) by permutation\n importance to determine which original features are beneficial to\n keep, and which features to remove if they negatively impact the\n model.", + "prompt_type": "plain" + }, + { + "output": "- max_orig_numeric_cols_selected: This option has the same\n functionality as max_orig_cols_selected, but for numeric\n columns. - max_orig_nonnumeric_cols_selected: This option has the same\n functionality as max_orig_cols_selected, but for non-numeric\n columns. - To view a report about original features without any action, set\n orig_features_fs_report = true. - In general, FS can be controlled by setting the following\n parameters:\n- If strategy is FS (for high interpretability dial) we will use FS to\n get rid of poor features that hurt the model, and this can be\n fine-tuned with the following parameters:\nTransformed Feature Control\nFor transformed features, the Experiment Setup page and expert-settings\ncontrol the genetic algorithm (GA) that decides how many features\nshould be present. In some cases, however, too few or too many features\nare made. To control the number of transformed features that are made during an\nexperiment, use the nfeatures_max and ngenes_max settings.", + "prompt_type": "plain" + }, + { + "output": "These\nsettings can be used to control the number of allowed transformers and\ntransformed features by setting a limit beyond which transformed\nfeatures or transformers are removed. (The transformed features or\ntransformers with the lowest variable importance are removed first.) In some cases, specifying nfeatures_max and ngenes_max may be sufficient\nto get a restricted model. However, the best practice when using these\nsettings is to first run an experiment without specifying any\nrestrictions, and then retrain the final pipeline with the restrictions\nenabled. You can retrain the final pipeline from the\ncompleted experiment page by clicking Tune\nExperiment > Retrain / Refit > From Final Checkpoint. For more\ninformation on retraining the final pipeline, see retrain. To force DAI to add more transformations, use the ngenes_min parameter. This can be useful if you want DAI to search more actively through all\nof the potential permutations of transformers and input features.", + "prompt_type": "plain" + }, + { + "output": "enable_genetic_algorithm='off'.) .. _individuals_control: Individuals Control ------------------- You can control the number or type of individuals that are tuned or evolved by using the following config.toml parameters: .. code:: parameter_tuning_num_models fixed_num_individuals .. _feature_count_use_case: Sample Use Case --------------- The following is a sample use case for controlling feature counts. **Example**: You want to limit the number of features used for scoring to 14. **Solution A**: - For transformed features, setnfeatures_max\n=\n14in the :ref:`Expert Settings window `. - For original features, set the following parameters: .. .. code:: max_orig_cols_selected max_orig_numeric_cols_selected max_orig_nonnumeric_cols_selected **Solution B** Without changing any parameters, let DAI complete the experiment. After the experiment is complete, inspect theensemble_features_orig`\nfiles in the :ref:`experiment_summary to see which original features\nwere not important, then decide whether to drop even more of them by\nperforming \"tune\" experiment and retrain final pipeline (You can also\nchoose to refit from best model for an even closer match to the original\nexperiment).", + "prompt_type": "plain" + }, + { + "output": "Experiment Queuing In Driverless AI\nDriverless AI supports automatic queuing of experiments to avoid system\noverload. You can launch multiple experiments simultaneously that are\nautomatically queued and run when the necessary resources become\navailable. The worker queue indicates the number of experiments that are waiting\nfor their turn on a CPU or GPU + CPU system. Significant jobs like\nrunning experiments and making predictions are distinguished from minor\ntasks. In the following image, 'GPU queue' indicates that there are two\nexperiments waiting in the worker queue on a GPU-enabled system, and not\nthat two workers are waiting for a GPU:\n[]\nNotes:\n- By default, each node runs two experiments at a time. This is\n controlled by the worker_remote_processors option in the\n config.toml file . Starting with version 1.10.4,\n Driverless AI automatically sets the maximum number of CPU cores to\n use per experiment and the maximum number of remote tasks to be\n processed at one time based on the number of CPU cores your system\n has.", + "prompt_type": "plain" + }, + { + "output": "enable_tensorflow_image``\n\nEnable Image Transformer for Processing of Image Data\n\nSpecify whether to use pretrained deep learning models for processing of\nimage data as part of the feature engineering pipeline. When this is\nenabled, a column of Uniform Resource Identifiers (URIs) to images is\nconverted to a numeric representation using ImageNet-pretrained deep\nlearning models. This is enabled by default.", + "prompt_type": "plain" + }, + { + "output": "tensorflow_image_pretrained_models-------------------------------------- .. container:: dropdown **Supported ImageNet Pretrained Architectures for Image Transformer** Specify the supported `ImageNet `__ pretrained architectures for image transformer. Select from the following: - densenet121 - efficientnetb0 - efficientnetb2 - inception_v3 - mobilenetv2 - resnet34 - resnet50 - seresnet50 - seresnext50 - xception (Selected by default) **Notes**: - If an internet connection is available, non-default models are downloaded automatically. If an internet connection is not available, non-default models must be downloaded from http://s3.amazonaws.com/artifacts.h2o.ai/releases/ai/h2o/pretrained/dai_image_models_1_10.zip and extracted intotensorflow_image_pretrained_models_dir``. - Multiple transformers can be activated at the same time to allow\n the selection of multiple options.", + "prompt_type": "plain" + }, + { + "output": "tensorflow_image_vectorization_output_dimension``\n\nDimensionality of Feature Space Created by Image Transformer\n\nSpecify the dimensionality of the feature (embedding) space created by\nImage Transformer. Select from the following:\n\n- 10\n- 25\n- 50\n- 100 (Default)\n- 200\n- 300\n\nNote: Multiple transformers can be activated at the same time to allow\nthe selection of multiple options.", + "prompt_type": "plain" + }, + { + "output": "tensorflow_image_fine_tune``\n\nEnable Fine-Tuning of the Pretrained Models Used for the Image\nTransformer\n\nSpecify whether to enable fine-tuning of the ImageNet pretrained models\nused for the Image Transformer. This is disabled by default.", + "prompt_type": "plain" + }, + { + "output": "tensorflow_image_fine_tuning_num_epochs``\n\nNumber of Epochs for Fine-Tuning Used for the Image Transformer\n\nSpecify the number of epochs for fine-tuning ImageNet pretrained models\nused for the Image Transformer. This value defaults to 2.", + "prompt_type": "plain" + }, + { + "output": "tensorflow_image_augmentations``\n\nList of Augmentations for Fine-Tuning Used for the Image Transformer\n\nSpecify the list of possible image augmentations to apply while\nfine-tuning the ImageNet pretrained models used for the Image\nTransformer. Select from the following:\n\n- Blur\n- CLAHE\n- Downscale\n- GaussNoise\n- GridDropout\n- HorizontalFlip (Default)\n- HueSaturationValue\n- ImageCompression\n- OpticalDistortion\n- RandomBrightnessContrast\n- RandomRotate90\n- ShiftScaleRotate\n- VerticalFlip\n\nNote: For more information on individual augmentations, see\nhttps://albumentations.ai/docs/.", + "prompt_type": "plain" + }, + { + "output": "tensorflow_image_batch_size``\n\nBatch Size for the Image Transformer\n\nSpecify the batch size for the Image Transformer. By default, the batch\nsize is set to -1 (selected automatically).\n\nNote: Larger architectures and batch sizes use more memory.", + "prompt_type": "plain" + }, + { + "output": "image_download_timeout``\n\nImage Download Timeout in Seconds\n\nWhen providing images through URLs, specify the maximum number of\nseconds to wait for an image to download. This value defaults to 60 sec.", + "prompt_type": "plain" + }, + { + "output": "string_col_as_image_max_missing_fraction``\n\nMaximum Allowed Fraction of Missing Values for Image Column\n\nSpecify the maximum allowed fraction of missing elements in a string\ncolumn for it to be considered as a potential image path. This value\ndefaults to 0.1.", + "prompt_type": "plain" + }, + { + "output": "string_col_as_image_min_valid_types_fraction------------------------------------------------ .. container:: dropdown **Minimum Fraction of Images That Need to Be of Valid Types for Image Column to Be Used** Specify the fraction of unique image URIs that need to have valid endings (as defined bystring_col_as_image_valid_types``) for a\n\n string column to be considered as image data. This value defaults to\n 0.8.", + "prompt_type": "plain" + }, + { + "output": "tensorflow_image_use_gpu``\n\nEnable GPU(s) for Faster Transformations With the Image Transformer\n\nSpecify whether to use any available GPUs to transform images into\nembeddings with the Image Transformer. Enabling this setting can lead to\nsignificantly faster transformation speeds. This is enabled by default.\n\nNote: This setting only applies when scoring inside Driverless AI or\nwith Py Scoring.", + "prompt_type": "plain" + }, + { + "output": "This section provides instructions for upgrading Driverless AI versions\nthat were installed in a Docker container. These steps ensure that\nexisting experiments are saved. WARNING: Experiments, MLIs, and MOJOs reside in the Driverless AI tmp\ndirectory and are not automatically upgraded when Driverless AI is\nupgraded. - Build MLI models before upgrading. - Build MOJO pipelines before upgrading. - Stop Driverless AI and make a backup of your Driverless AI tmp\n directory before upgrading. If you did not build MLI on a model before upgrading Driverless AI,\n then you will not be able to view MLI on that model after upgrading. Before upgrading, be sure to run MLI jobs on models that you want to\n continue to interpret in future releases. If that MLI job appears in\n the list of Interpreted Models in your current version, then it will\n be retained after upgrading. If you did not build a MOJO pipeline on a model before upgrading\n Driverless AI, then you will not be able to build a MOJO pipeline on\n that model after upgrading.", + "prompt_type": "plain" + }, + { + "output": "Note: Stop Driverless AI if it is still running. Requirements\nWe recommend to have NVIDIA driver >= installed (GPU only) in your host\nenvironment for a seamless experience on all architectures, including\nAmpere. Driverless AI ships with CUDA 11.2.2 for GPUs, but the driver\nmust exist in the host environment. Go to NVIDIA download driver to get the latest NVIDIA Tesla A/T/V/P/K\nseries drivers. For reference on CUDA Toolkit and Minimum Required\nDriver Versions and CUDA Toolkit and Corresponding Driver Versions, see\nhere . Note\nIf you are using K80 GPUs, the minimum required NVIDIA driver version is\n450.80.02. Upgrade Steps\n1. SSH into the IP address of the machine that is running Driverless\n AI. 2. Set up a directory for the version of Driverless AI on the host\n machine:\n3. Retrieve the Driverless AI package from https://www.h2o.ai/download/\n and add it to the new directory. 4. Load the Driverless AI Docker image inside the new directory:\n5. Copy the data, log, license, and tmp directories from the previous\n Driverless AI directory to the new Driverless AI directory:\n6.", + "prompt_type": "plain" + }, + { + "output": "Using the Standalone Python Scoring Pipeline in a Different Docker Container\nThe Standalone Python Scoring Pipeline runs inside of the Driverless AI\nDocker container. This is the recommended method for running the Python\nScoring Pipeline. If necessary, though, this pipeline can also be run\ninside of a different Docker container. The following steps describe how\nto do this. This setup assumes that you have a valid Driverless AI\nlicense key, which will be required during setup. It also assumes that\nyou have completed a Driverless AI experiment and downloaded the Scoring\nPipeline. 1. On the machine where you want to run the Python Scoring Pipeline,\n create a new directory for Driverless AI (for example, dai-nnn.) 2. Download the TAR SH version of Driverless AI from\n https://www.h2o.ai/download/ (for either Linux or IBM Power). 3. Use bash to execute the download and unpack it into the new\n Driverless AI folder. 4. Change directories into the new Driverless AI folder. 5. Run the following to install the Python Scoring Pipeline for your\n completed Driverless AI experiment:\n6.", + "prompt_type": "plain" + }, + { + "output": "Driverless AI Health API\nThe following sections describe the Driverless AI Health API. - health-api-overview\n- retrieve-health-status\n- health-api-json-attributes\nOverview\nThe Driverless AI Health API is a publicly available API that exposes\nbasic system metrics and statistics. Its primary purpose is to provide\ninformation for resource monitoring and auto-scaling of\nDriverless AI multinode clusters. The API outputs a\nset of metrics in a JSON format so that they can be used by tools like\nKEDA or K8S Autoscaler. Notes:\n- The Health API is only available in multinode or singlenode mode. For more information, refer to the worker_mode\n config.toml option. - For security purposes, the Health API endpoint can be disabled by\n setting the enable_health_api config.toml option\n to false. This setting is enabled by default. - The Health API is designed with the intention to provide information\n that is needed by users to write their own autoscaling logic for\n Multinode Driverless AI .", + "prompt_type": "plain" + }, + { + "output": "Using the DAI Health API\nTo retrieve Driverless AI's health status, create a GET request:\n GET http://{driverless-ai-instance-address}/apis/health/v1\nThis returns the following JSON response:\n {\n \"api_version\": \"1.0\",\n \"server_version\": \"1.10\",\n \"application_id\": \"dai-12345\",\n \"timestamp\": \"ISO 8601 Datetime\",\n \"last_system_interaction\": \"ISO 8601 Datetime\",\n \"is_idle\": true,\n \"active_users\": 3,\n \"resources\": {\n \"cpu_cores\": 150,\n \"gpus\": 12,\n \"nodes\": 5,\n },\n \"tasks\": {\n \"running\": 45,\n \"scheduled\": 123,\n \"scheduled_on_gpu\": 10,\n \"scheduled_on_cpu\": 50,\n },\n \"utilization\": {\n \"cpu\": 0.12,\n \"gpu\": 0.45,\n \"memory\": 0.56,\n },\n \"workers\": [\n {\n \"name\": \"NODE:LOCAL1\",\n \"running_tasks\": 4,\n \"scheduled_tasks\": 0\n },\n {\n \"name\": \"NODE:REMOTE2\",\n \"running_tasks\": 4,\n \"scheduled_tasks\": 11\n }\n ]\n }\nAttribute Definitions\nThe following is a list of relevant JSON attribute definitions.", + "prompt_type": "plain" + }, + { + "output": "Driverless AI with H2O-3 Algorithms\n\nDriverless AI already supports a variety of\nalgorithms . This example shows how you can use\nour h2o-3-models-py recipe to include H2O-3 supervised learning\nalgorithms in your experiment. The available H2O-3 algorithms in the\nrecipe include:\n\n- Naive Bayes\n- GBM\n- Random Forest\n- Deep Learning\n- GLM\n- AutoML\n\nCaution: Because AutoML is treated as a regular ML algorithm here, the\nruntime requirements can be large. We recommend that you adjust the", + "prompt_type": "plain" + }, + { + "output": "max_runtime_secs`` parameters as suggested here:\nhttps://github.com/h2oai/driverlessai-recipes/blob/rel-1.9.0/models/algorithms/h2o-3-models.py#L45\n1. Start an experiment in Driverless AI by selecting your training\n dataset along with (optionally) validation and testing datasets and\n then specifying a Target Column. Notice the list of algorithms that\n will be used in the Feature evolution section of the experiment\n summary. In the example below, the experiment will use LightGBM and\n XGBoostGBM. 2. Click on Expert Settings. 3. Specify the custom recipe using one of the following methods:\n4. In the Expert Settings page, specify any additional settings and\n then click Save. This returns you to the experiment summary. 5. To include each of the new models in your experiment, return to the\n Expert Settings option. Click the Recipes > Include Specific Models\n option. Select the algorithm(s) that you want to include. Click Done\n to return to the experiment summary.", + "prompt_type": "plain" + }, + { + "output": "Viewing Explanations\nNote: Not all explanatory functionality is available for multinomial\nclassification scenarios. Driverless AI provides explanations for completed models. You can view\nthese by clicking the Explanations button on the Model Interpretation >\nSurrogate Models Dashboard page for an interpreted model. The UI lets you view global, cluster-specific, and local reason codes. You can also export the explanations to CSV. - Global Reason Codes: To view global reason codes, click Cluster and\n select Global from the list of options. With Global selected, click\n the Explanations button located in the upper-right corner. - Cluster Reason Codes: To view reason codes for a specific cluster,\n click Cluster and select a specific cluster from the list of\n options. With a cluster selected, click the Explanations button. - Local Reason Codes by Row Number: To view local reason codes for a\n specific row, select a point on the graph or type a value in the Row\n Number or Feature Value field.", + "prompt_type": "plain" + }, + { + "output": "Configuration and Authentication\n\nconfig-usage config_docs/index\n\nconfig_toml setting-environment-variables user-settings connectors\nnotifications export-artifacts language multinode snowflake-integration\npip-install\n\nauthentication", + "prompt_type": "plain" + }, + { + "output": "Add Custom Recipes\nCustom recipes are Python code snippets that can be uploaded into\nDriverless AI at runtime like plugins. Restarting Driverless AI is not\nrequired. If you do not have a custom recipe, you can select from a\nnumber of recipes available in the Recipes for H2O Driverless AI\nrepository. For more information and examples, refer to custom-recipes. To add a custom recipe to Driverless AI, click Add Custom Recipe and\nselect one of the following options:\n- From computer: Add a custom recipe as a Python or ZIP file from your\n local file system. - From URL: Add a custom recipe from a URL. - From Bitbucket: Add a custom recipe from a Bitbucket repository. To\n use this option, your Bitbucket username and password must be\n provided along with the custom recipe Bitbucket URL. Official Recipes (Open Source)\nTo access H2O's official recipes repository, click Official Recipes\n(Open Source). Editing the TOML Configuration\nTo open the built-in TOML configuration editor, click TOML in the\nexpert-settings window.", + "prompt_type": "plain" + }, + { + "output": "make_mojo_scoring_pipeline = \"off\"is displayed in the TOML editor. The TOML configuration editor lets you manually add, remove, or edit expert setting parameters. To confirm your changes, click **Save**. The experiment preview updates to reflect your specified configuration changes. For a full list of available settings, see :ref:`expert-settings`. .. note:: Do not edit the section below the[recipe_activation]`` line. This\n\n section provides Driverless AI with information about which custom\n recipes can be used by the experiment. This is important for keeping\n experiments comparable when performing retrain / refit operations.\n\nNote\n\nFor information on TOML, see https://toml.io/en/v0.4.0.", + "prompt_type": "plain" + }, + { + "output": "Automated Model Documentation (AutoDoc)\n\nThis section describes Driverless AI's AutoDoc feature.\n\nautodoc-using autodoc-placeholders", + "prompt_type": "plain" + }, + { + "output": "MOJO Scoring Pipelines\n\nThe MOJO Scoring Pipeline provides a standalone scoring pipeline that\nconverts experiments to MOJOs, which can be scored in real time. The\nMOJO Scoring Pipeline is a scoring engine that can be deployed in any\nJava environment (Java Runtime) or in Python or R environment (C++\nruntime) for scoring in real time or batch. For deployment options see\nDeploying the MOJO Pipeline to production \n\nscoring-mojo-scoring-pipeline scoring-pipeline-cpp mojo2_javadoc\nscoring-klime-mojo-scoring-pipeline", + "prompt_type": "plain" + }, + { + "output": "Scoring on Another Dataset\n\nAfter you generate a model, you can use that model to make predictions\non another dataset.\n\n1. Click the Experiments link in the top menu and select the experiment\n that you want to use.\n2. On the completed Experiment page, click Model Actions > Predict.\n3. Select the new dataset (test set) that you want to score on. Note\n that this new dataset must include the same columns as the dataset\n used in selected experiment.\n4. Select the columns from the test set to include in the predictions\n frame.\n5. Click Done to start the scoring process.\n6. Click the Download Predictions button after scoring is complete.\n\nNote: This feature runs batch scoring on a new dataset. You may notice\nslow speeds if you attempt to perform single-row scoring.", + "prompt_type": "plain" + }, + { + "output": "Productionizing Your Model\n\nH2O.ai outputs the best model in an experiment. This model can then be\ndownloaded and then saved to a production environment.\n\nRun the following commands in Python 3.8 to save the displayed model as\na .csv. Note that Python 3.8 is the only supported Python version for\nuse with H2O.ai.\n\n ## final pipeline (logic, not state)\n pipe = population[best_id].get_pipe()\n\n ## final pipeline state, based on LARGE training data\n train_df_munged, y_munged = pipe.fit_transform(train_df, y)\n #train_df_munged.to_csv(\"munged_amazon_train.csv\", index=False)\n\n ## Load Kaggle test set without response, convert to munged state\n # test = \"../../../../h2oai-benchmarks/Data/Amazon/test.csv\"\n # test_df = dt.fread(test).topandas()\n test_df = train_df\n test_df_munged = pipe.transform(test_df)\n #test_df_munged.to_csv(\"munged_amazon_test.csv\", index=False)", + "prompt_type": "plain" + }, + { + "output": "Visualizing the Scoring Pipeline\n\nA visualization of the scoring pipeline is available for each completed\nexperiment.\n\nNotes:\n\n- This pipeline is best viewed in the latest version of Chrome.\n- A .png image of this pipeline is available in the AutoDoc \n and in the mojo.zip file ONLY with the Driverless AI Docker image.\n For tar, deb, and rpm installs, you must install Graphviz manually\n in order for the visualization pipeline to be included in the\n AutoDoc and mojo.zip.\n\nClick the Visualize Scoring Pipeline (Experimental) button on the\ncompleted experiment page to view the visualization.\n\n[]\n\nTo view a visual representation of a specific model, click on the oval\nthat corresponds with that model.\n\n[]\n\n[]\n\nTo change the orientation of the visualization, click the Transpose\nbutton in the bottom right corner of the screen.\n\n[]", + "prompt_type": "plain" + }, + { + "output": "Configuration Security\nDriverless AI provides the option to store sensitive or secure\nconfiguration information in an encrypted keystore as an alternative to\nkeeping security settings as clear text in the config.toml file. Updates to config override chain\nThe Configuration Override Chain has been updated to load the settings\nfrom the encrypted keystore after the settings are read from the plain\ntext config.toml file. The Environment Variable can still override the\nvalues from the keystore:\n 1. h2oai/config/config.toml\n [Internal, not visible to users]\n 2. config.toml\n [Place file in a folder/mount file in docker container and provide path\n in \"DRIVERLESS_AI_CONFIG_FILE\" environment variable]\n 3. Keystore file\n [Set keystore_file parameter in config.toml or environment variable\n \"DRIVERLESS_AI_KEYSTORE_FILE\" to point to a valid DAI keystore file \n generated using the h2oai.keystore tool. If env variable is set, the value\n in the config.toml for keystore_file path is overridden]\n 4.", + "prompt_type": "plain" + }, + { + "output": "They must have the prefix \"DRIVERLESS_AI_\" followed\n by the variable name in caps. For example, \"authentication_method\"\n can be provided as \"DRIVERLESS_AI_AUTHENTICATION_METHOD\"]\nKeystore setup workflow\nCreating the keystore\nAlthough the keystore file can contain any configuration parameter\nsupported by the config.toml, it is recommended to store only config\nparameters that contain secure/sensitive information in the keystore\nfile and use the regular config.toml file for other config parameters. Step 1: Create a cleartext config subset\nTo start, create a file config.clear that follows the TOML syntax of a\nregular config.toml file and contains the config parameters that you\nwant to store securely. For example:\n vagrant@ubuntu-bionic:~$ cat /home/vagrant/config.clear\n # ldap connection details\n ldap_bind_password = \"somepassword\"\n # Snowflake Connector credentials\n snowflake_url = \"https://sampleurl\"\n snowflake_user = \"sampleuser\"\n snowflake_password = \"samplepass\"\n snowflake_account = \"sampleaccount\"\n vagrant@ubuntu-bionic:~$\nStep 2: Using the h2oai.keystore tool to create keystore\nThe keystore should be placed so that it is accessible by root or the\nuser id with which the Driverless AI process is running.", + "prompt_type": "plain" + }, + { + "output": "h2oai.keystoretool: - The keystore tool needs to be run asrootand within the context of Driverless AI Python environment provided by thedai-env.shscript. - Theadd-keyscommand accepts the path to keystore as the first argument and the clear text config.toml subset as the second. - If the keystore does not exist, it is created. - All keys in theconfig.clearare either Inserted or Updated in the keystore. If a key already exists in the key store, it is updated. If the keystore contains any keys that are not inconfig.clear, they are not altered. - Once the keystore file is created, it is recommended to ensure the following: - Ownership is with root user with read and write permissions. - Change group ownership to the Driverless group (or the appropriate ID that matches the group ID with which the Driverless processes run in your system) with read only permissions. No other user or group should have read access to this file. - Theconfig.keystorefile is created along with the ownership permissions.", + "prompt_type": "plain" + }, + { + "output": "If root access shell is available; this step can be skipped (root) # /opt/h2oai/dai/dai-env.sh python -m h2oai.keystore add-keys /etc/dai/config.keystore /home/vagrant/config.clear ....some output here ====================================================================== Key: ldap_bind_password; Action: Inserted Key: snowflake_url; Action: Inserted Key: snowflake_user; Action: Inserted Key: snowflake_password; Action: Inserted Key: snowflake_account; Action: Inserted (root) # ls -l /etc/dai total 240 -rw-rw-r-- 1 root root 353 Jul 14 03:28 EnvironmentFile.conf -rw-r--r-- 1 root root 210 Jul 20 06:57 Group.conf -rw-r--r-- 1 root root 209 Jul 20 06:57 User.conf -rw-r----- 1 root dai 236 Jul 20 07:09 config.keystore -rw-r--r-- 1 root root 157135 Jul 20 07:17 config.toml -rw-rw-r-- 1 root root 347 Jul 14 03:28 jaas.conf -rw-r--r-- 1 root root 62206 Jul 20 06:57 redis.conf (root) # chown root:dai /etc/dai/config.keystore (root) # chmod 640 /etc/dai/config.keystore **Step 3: Using h2oai.keystore tool to manage keystore** Theh2oai.keystoretool provides three commands for keystore management: -add-keys: Adds or updates the Driverless AI secrets keystore with config.", + "prompt_type": "plain" + }, + { + "output": "Using AutoDoc\nThe following sections describe Driverless AI's AutoDoc feature. - understanding-autodoc\n- generate-autodoc\n- configure-autodoc\n- autodoc-custom\nUnderstanding AutoDoc\nThe AutoDoc feature is used to generate automated machine learning\ndocumentation for individual Driverless AI experiments. This editable\ndocument contains an overview of the experiment and includes other\nsignificant details like feature engineering and final model\nperformance. To download and view a sample experiment report in Word format,\nclick here . AutoDoc Support\nAutoDoc only supports resumed experiments for certain Driverless AI\nversions. See the following table to check the types of resumed\nexperiments that are supported for your version:\n ---------------------------------------------------------------------\n AutoDoc Support for Resumed 1.7.0 and 1 .7 1.9.0 and later\n Experiments Via older .1 \n ---------------------------------- ------------ ---- ----------------\n New experiment with same settings yes y es yes\n Restart from last checkpoint no y es yes\n Retrain final pipeline no no yes\n ---------------------------------------------------------------------\nNote\n- To ensure that AutoDoc pipeline visualizations are generated correctly\non native installations, installing fontconfig is recommended.", + "prompt_type": "plain" + }, + { + "output": "- Reports for unsupported resumed experiments\nwill still build, but they will only include the following text:\n\"AutoDoc not yet supported for resumed experiments.\" Custom AutoDocs\nAll Driverless AI experiments can generate either a standard or custom\nAutoDoc. A standard AutoDoc uses the default AutoDoc template that is\nincluded with Driverless AI, while a custom AutoDoc uses a\ncustomer-specific template that Driverless AI automatically populates. If you are interested in creating a custom AutoDoc, contact\nsupport@h2o.ai. If you have already purchased a custom AutoDoc template\nand want to learn how to generate custom AutoDocs from your experiments,\nsee autodoc-custom. Note\n- For a list of custom AutoDoc placeholders, see autodoc_placeholders. -\nCustom AutoDocs are Driverless AI version-specific. BYOR Recipes with AutoDoc\nThe experiment AutoDoc supports experiments that use custom scorers,\ntransformers, or models. Custom scorers and transformers are documented\nthe same as Driverless AI scorers and transformers.", + "prompt_type": "plain" + }, + { + "output": "(Note: custom-transformer descriptions are\ncurrently shown as \"None\" in this section.) For custom models, the\nstandard performance metrics and plots are included; however,\ninformation that Driverless AI cannot access is not included, or is\nshown as \"custom\", \"unavailable\", or \"auto.\" For example, in the Model\nTuning table, the booster is listed as \"custom\", and in the Alternative\nModels section, the model package documentation is listed as\n\"unavailable.\" Generating an AutoDoc\nThree different approaches can be used to generate an AutoDoc:\n- autodoc-experiment-ui\n- autodoc-mli-ui\n- autodoc-python-client\nNotes:\n- For more information on how to configure plots/tables and\n enable/disable specific sections in the AutoDoc, see\n configure-autodoc. - These approaches also apply to custom AutoDocs. For more\n information, see autodoc-custom. Experiment UI\nNavigate to the Experiments page and click on the completed experiment\nyou want to generate an AutoDoc for. If AutoDoc was not previously enabled for the experiment, click the\nBuild AutoDoc button.", + "prompt_type": "plain" + }, + { + "output": "[]\nMLI UI\nNavigate to the MLI page and click on the completed experiment you want\nto generate an AutoDoc for. Select AutoDoc from the MLI RECIPES's menu and optionally select\nexplainers that can be included in the AutoDoc (the standard AutoDoc\nsupports the k-LIME Explainer and DT Surrogate Explainer). []\nThe Standard AutoDoc with Explainers:\n[]\nPython Client\n- autodoc-generate-driverlessai\nAutoDoc Functions\n- create_and_download_autodoc()\n- make_autodoc_sync()\nFor local downloads:\n create_and_download_autodoc(\n model_key:str,\n template_path:str='',\n config_overrides:str='',\n dest_path:str='. ',\n mli_key:str='',\n individual_rows:list=[], \n external_dataset_keys:list=[])\nTo save an AutoDoc to the DAI experiment directory (recommended if local\ndownloads are disabled):\n make_autodoc_sync(\n model_key:str,\n template_path:str='',\n config_overrides:str='',\n mli_key:str='',\n individual_rows:list=[], \n external_dataset_keys:list=[])\n- model_key: The experiment key string.", + "prompt_type": "plain" + }, + { + "output": "- config_overrides: The TOML string format with configurations\n overrides for the AutoDoc. - dest_path: The local path where the AutoDoc should be saved. - mli_key: The mli key string. - individual_rows: List of row indices for rows of interest in the\n training dataset, for which additional information can be shown\n (ICE, LOCO, KLIME). - external_dataset_keys: List of DAI dataset keys. driverlessai\nConnect to a running DAI instance:\n import driverlessai\n address = 'http://ip_where_driverless_is_running:12345'\n username = 'username'\n password = 'password'\n dai = driverlessai.Client(address=address, username=username, password=username)\nGenerate an AutoDoc and download it to your current working directory:\n report = dai._backend.create_and_download_autodoc(\n model_key=exp_key,\n dest_path:str='. ',\n )\nConfiguring AutoDoc\nThe plots, tables, and sections of an AutoDoc can be configured through\nfour different workflows:\n- config-experiment-expert\n- config-mli-expert\n- config-python-client\n- config.toml file \nYou can also configure the font of an AutoDoc by setting\nthe H2O_AUTODOC_PLOTS_FONT_FAMILY environment variable.", + "prompt_type": "plain" + }, + { + "output": "The following are several commonly used\nconfiguration parameters:\n import toml\n # Set the document to limit features displayed to the top ten\n config_dict={\n \"autodoc_num_features\": 10\n }\n # Partial Dependence Plots (PDP) and ICE Plots\n config_dict[\"autodoc_pd_max_runtime\"] = 60\n config_dict[\"autodoc_num_rows\"] = 4\n # Prediction statistics\n config_dict[\"autodoc_prediction_stats\"] = True\n config_dict[\"autodoc_prediction_stats_n_quantiles\"] = 10\n # Population Stability Index (PSI)\n config_dict[\"autodoc_population_stability_index\"] = True\n config_dict[\"autodoc_population_stability_index_n_quantiles\"] = 10\n # Permutation feature importance\n config_dict[\"autodoc_include_permutation_feature_importance\"] = True\n config_dict[\"autodoc_feature_importance_scorer\"] = \"GINI\"\n config_dict[\"autodoc_feature_importance_num_perm\"] = 1\n # Response rates (only applicable to Binary classification)\n config_dict[\"autodoc_response_rate\"] = True\n config_dict[\"autodoc_response_rate_n_quantiles\"] = 10\n toml_string = toml.dumps(config_dict)\n print(toml_string)\nAfter setting these parameters, generate an AutoDoc and download it to\nyour current working directory:\ndriverlessai\n report = dai._backend.create_and_download_autodoc(\n model_key=exp_key,\n config_overrides=config_overrides,\n dest_path:str='.", + "prompt_type": "plain" + }, + { + "output": "Note: The following steps assume that DAI has been installed on an EC2\ninstance or an Ubuntu lab machine. These steps still apply if you are\nusing H2O Enterprise Puddle to run a DAI instance\u2014just log in to the EC2\ninstance where the DAI service is running using the provided SSH key. If the DAI service has not been started\n1. Create an EC2 instance with enough memory and storage to run DAI. 2. Install the font you want to use. In this example, the font\n TakaoPGothic is used. 3. Create and install the DAI debian file. 4. Set the font setting environment variable by adding the following\n line to the EnvironmentFile.conf file. 5. Start the DAI service. If the DAI service has already been started\n1. Ensure that the font is available on your system. In this example,\n the font TakaoPGothic is used. 2. Stop the DAI service. 3. Set the font setting environment variable by adding the following\n line to the EnvironmentFile.conf file. 4. Start the DAI service. Generating a Custom AutoDoc\nThis section describes how to generate an AutoDoc from a custom AutoDoc\ntemplate.", + "prompt_type": "plain" + }, + { + "output": "config.tomlsettings: -autodoc_template: Specify the path for the main template file. -autodoc_additional_template_folder: If you have additional custom sub-templates, use this setting to specify the location of additional AutoDoc templates. Note that if this field is left empty, only the default sub-templates folder is used. To generate custom AutoDocs, Driverless AI must have access to the custom template(s). To make sure that Driverless AI has access, update the path in the following example with your own path: .. code:: autodoc_template=\"/full/path/to/your/custom_autodoc_template.docx\" # Required if you have additional custom sub-templates. autodoc_additional_template_folder=\"/path/to/additional_templates_folder\" Custom AutoDoc for Individual Experiments ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ You can use the Python Client to generate standard or custom AutoDocs from an experiment by setting thetemplate_pathvariable to your custom AutoDoc's path: .. code:: template_path='/full/path/to/your/custom_autodoc_template.docx' **Python Client**:driverlessai``\n report = dai._backend.create_and_download_autodoc(\n model_key=exp_key,\n template_path=template_path,\n dest_path:str='.", + "prompt_type": "plain" + }, + { + "output": "Snowflake Setup\n\nDriverless AI allows you to explore Snowflake data sources from within\nthe Driverless AI application. This section provides instructions for\nconfiguring Driverless AI to work with Snowflake. This setup requires\nyou to enable authentication. If you enable Snowflake connectors, those\nfile systems will be available in the UI, but you will not be able to\nuse those connectors without authentication.\n\nNote: Depending on your Docker install version, use either the", + "prompt_type": "plain" + }, + { + "output": "docker run --runtime=nvidia(>= Docker 19.03) ornvidia-docker(< Docker 19.03) command when starting the Driverless AI Docker image. Usedocker versionto check which version of Docker you are using. Description of Configuration Attributes --------------------------------------- -snowflake_account: The Snowflake account ID -snowflake_user: The username for accessing the Snowflake account -snowflake_password: The password for accessing the Snowflake account -enabled_file_systems: The file systems you want to enable. This must be configured in order for data connectors to function properly. Enable Snowflake with Authentication ------------------------------------ .. container:: tabs .. group-tab:: Docker Image Installs This example enables the Snowflake data connector with authentication by passing theaccount,user, andpasswordvariables. .. code:: bash nvidia-docker run \\ --rm \\ --shm-size=256m \\ -e DRIVERLESS_AI_ENABLED_FILE_SYSTEMS=\"file,snow\" \\ -e DRIVERLESS_AI_SNOWFLAKE_ACCOUNT = \"\" \\ -e DRIVERLESS_AI_SNOWFLAKE_USER = \"\" \\ -e DRIVERLESS_AI_SNOWFLAKE_PASSWORD = \"\"\\ -u `id -u`:`id -g` \\ -p 12345:12345 \\ -v `pwd`/data:/data \\ -v `pwd`/log:/log \\ -v `pwd`/license:/license \\ -v `pwd`/tmp:/tmp \\ -v `pwd`/service_account_json.json:/service_account_json.json \\ h2oai/dai-ubi8-x86_64:|tag| .. container:: group-tab Docker Image with the config.toml This example shows how to configure Snowflake options in the config.toml file, and then specify that file when starting Driverless AI in Docker.", + "prompt_type": "plain" + }, + { + "output": "Configure the Driverless AI config.toml file. Set the following configuration options. .. -enabled_file_systems = \"file, snow\"-snowflake_account = \"\"-snowflake_user = \"\"-snowflake_password = \"\"2. Mount the config.toml file into the Docker container. .. .. code:: bash nvidia-docker run \\ --pid=host \\ --init \\ --rm \\ --shm-size=256m \\ --add-host name.node:172.16.2.186 \\ -e DRIVERLESS_AI_CONFIG_FILE=/path/in/docker/config.toml \\ -p 12345:12345 \\ -v /local/path/to/config.toml:/path/in/docker/config.toml \\ -v /etc/passwd:/etc/passwd:ro \\ -v /etc/group:/etc/group:ro \\ -v /tmp/dtmp/:/tmp \\ -v /tmp/dlog/:/log \\ -v /tmp/dlicense/:/license \\ -v /tmp/ddata/:/data \\ -u $(id -u):$(id -g) \\ h2oai/dai-ubi8-x86_64:|tag| .. container:: group-tab Native Installs This example enables the Snowflake data connector with authentication by passing theaccount,user, andpasswordvariables.", + "prompt_type": "plain" + }, + { + "output": "Export the Driverless AI config.toml file or add it to ~/.bashrc. For example: .. :: # DEB and RPM export DRIVERLESS_AI_CONFIG_FILE=\"/etc/dai/config.toml\" # TAR SH export DRIVERLESS_AI_CONFIG_FILE=\"/path/to/your/unpacked/dai/directory/config.toml\" 2. Specify the following configuration options in the config.toml file. .. :: # File System Support # upload : standard upload feature # file : local file system/server file system # hdfs : Hadoop file system, remember to configure the HDFS config folder path and keytab below # dtap : Blue Data Tap file system, remember to configure the DTap section below # s3 : Amazon S3, optionally configure secret and access key below # gcs : Google Cloud Storage, remember to configure gcs_path_to_service_account_json below # gbq : Google Big Query, remember to configure gcs_path_to_service_account_json below # minio : Minio Cloud Storage, remember to configure secret and access key below # snow : Snowflake Data Warehouse, remember to configure Snowflake credentials below (account name, username, password) # kdb : KDB+ Time Series Database, remember to configure KDB credentials below (hostname and port, optionally: username, password, classpath, and jvm_args) # azrbs : Azure Blob Storage, remember to configure Azure credentials below (account name, account key) # jdbc: JDBC Connector, remember to configure JDBC below.", + "prompt_type": "plain" + }, + { + "output": "(hive_app_configs) # recipe_url: load custom recipe from URL # recipe_file: load custom recipe from local file system enabled_file_systems = \"file, snow\" # Snowflake Connector credentials snowflake_account = \"\" snowflake_user = \"\" snowflake_password = \"\" 3. Save the changes when you are done, then stop/restart Driverless AI. Adding Datasets Using Snowflake ------------------------------- After the Snowflake connector is enabled, you can add datasets by selecting **Snowflake** from the **Add Dataset (or Drag and Drop)** drop-down menu. .. figure:: ../images/add_dataset_dropdown.png :alt: :width: 237px :height: 338px Specify the following information to add your dataset. 1. **Enter Database**: Specify the name of the Snowflake database that you are querying. 2. **Enter Warehouse**: Specify the name of the Snowflake warehouse that you are querying. 3. **Enter Schema**: Specify the schema of the dataset that you are querying.", + "prompt_type": "plain" + }, + { + "output": "**Enter Name for Dataset to Be Saved As**: Specify a name for the dataset to be saved as. Note that this can only be a CSV file (for example, **myfile.csv**). 5. **Enter Username**: (Optional) Specify the username associated with this Snowflake account. This can be left blank ifsnowflake_userwas specified in the config.toml when starting Driverless AI; otherwise, this field is required. 6. **Enter Password**: (Optional) Specify the password associated with this Snowflake account. This can be left blank ifsnowflake_passwordwas specified in the config.toml when starting Driverless AI; otherwise, this field is required. 7. **Enter Role**: (Optional) Specify your role as designated within Snowflake. See https://docs.snowflake.net/manuals/user-guide/security-access-control-overview.html for more information. 8. **Enter Region**: (Optional) Specify the region of the warehouse that you are querying. This can be found in the Snowflake-provided URL to access your database (as in **...snowflakecomputing.com**).", + "prompt_type": "plain" + }, + { + "output": "9. **Enter File Formatting Parameters**: (Optional) Specify any additional parameters for formatting your datasets. Available parameters are listed in https://docs.snowflake.com/en/sql-reference/sql/create-file-format.html#type-csv. (**Note**: Use only parameters forTYPE = CSV.) For example, if your dataset includes a text column that contains commas, you can specify a different delimiter usingFIELD_DELIMITER='character'. Multiple parameters must be separated with spaces: .. :: FIELD_DELIMITER=',' FIELD_OPTIONALLY_ENCLOSED_BY=\"\" SKIP_BLANK_LINES=TRUE **Note**: Be sure that the specified delimiter is not also used as a character within a cell; otherwise an error will occur. For example, you might specify the following to load the \"AMAZON_REVIEWS\" dataset: - Database: UTIL_DB - Warehouse: DAI_SNOWFLAKE_TEST - Schema: AMAZON_REVIEWS_SCHEMA - Query: SELECT \\* FROM AMAZON_REVIEWS - Enter File Formatting Parameters (Optional): FIELD_OPTIONALLY_ENCLOSED_BY = '\"' In the above example, if theFIELD_OPTIONALLY_ENCLOSED_BYoption is not set, the following row will result in a failure to import the dataset (as the dataset's delimiter is,by default): :: positive, 2012-05-03,Wonderful\\, tasty taffy,0,0,3,5,2012,Thu,0 **Note**: Numeric columns from Snowflake that have NULL values are sometimes converted to strings (for example, N).", + "prompt_type": "plain" + }, + { + "output": "H2O Drive setup\nH2O Drive is an object-store for H2O AI Cloud. This page describes how\nto configure Driverless AI to work with H2O Drive. Note: For more information on the H2O Drive, refer to the official\ndocumentation. Description of relevant configuration attributes\nThe following are descriptions of the relevant configuration attributes\nwhen enabling the H2O AI Feature Store data connector:\n- enabled_file_systems: A list of file systems you want to enable. To\n enable the Feature Store data connector, h2o_drive must be added to\n this list of data sources. - h2o_drive_endpoint_url: The H2O Drive server endpoint URL. - h2o_drive_access_token_scopes: A space-separated list of OpenID\n scopes for the access token that are used by the H2O Drive\n connector. - h2o_drive_session_duration: The maximum duration in seconds for a\n session with the H2O Drive. - authentication_method: The authentication method used by DAI. When\n enabling the Feature Store data connector, this must be set to\n OpenID Connect (authentication_method=\"oidc\").", + "prompt_type": "plain" + }, + { + "output": "Data Recipe File Setup\nDriverless AI lets you explore data recipe file data sources from within\nthe Driverless AI application. This section provides instructions for\nconfiguring Driverless AI to work with local data recipe files. When\nenabled (default), you will be able to modify datasets that have been\nadded to Driverless AI. (Refer to modify_by_recipe for more\ninformation.) Notes:\n- This connector is enabled by default. These steps are provided in\n case this connector was previously disabled and you want to\n re-enable it. - Depending on your Docker install version, use either the\n docker run --runtime=nvidia (>= Docker 19.03) or nvidia-docker (<\n Docker 19.03) command when starting the Driverless AI Docker image. Use docker version to check which version of Docker you are using. Enable Data Recipe File\nDocker Image Installs\nThis example enables the data recipe file data connector. nvidia-docker run \\\n --shm-size=256m \\\n --add-host name.node:172.16.2.186 \\\n -e DRIVERLESS_AI_ENABLED_FILE_SYSTEMS=\"file,upload,recipe_file\" \\\n -p 12345:12345 \\\n --init -it --rm \\\n -v /tmp/dtmp/:/tmp \\\n -v /tmp/dlog/:/log \\\n -v /tmp/dlicense/:/license \\\n -v /tmp/ddata/:/data \\\n -u $(id -u):$(id -g) \\\n h2oai/dai-ubi8-x86_64:|tag|\nDocker Image with the config.toml\nThis example shows how to enable the Upload Data Recipe connector in the\nconfig.toml file, and then specify that file when starting Driverless AI\nin Docker.", + "prompt_type": "plain" + }, + { + "output": "1. Configure the Driverless AI config.toml file. Set the following\n configuration options. - enabled_file_systems = \"file, upload, recipe_file\"\n2. Mount the config.toml file into the Docker container. nvidia-docker run \\\n --pid=host \\\n --init \\\n --rm \\\n --shm-size=256m \\\n --add-host name.node:172.16.2.186 \\\n -e DRIVERLESS_AI_CONFIG_FILE=/path/in/docker/config.toml \\\n -p 12345:12345 \\\n -v /local/path/to/config.toml:/path/in/docker/config.toml \\\n -v /etc/passwd:/etc/passwd:ro \\\n -v /etc/group:/etc/group:ro \\\n -v /tmp/dtmp/:/tmp \\\n -v /tmp/dlog/:/log \\\n -v /tmp/dlicense/:/license \\\n -v /tmp/ddata/:/data \\\n -u $(id -u):$(id -g) \\\n h2oai/dai-ubi8-x86_64:|tag|\nNative Installs\nThis example enables the Upload Data Recipe data connector. Note that\nrecipe_file is enabled by default. 1. Export the Driverless AI config.toml file or add it to ~/.bashrc. For example:\n2. Specify the following configuration options in the config.toml file.", + "prompt_type": "plain" + }, + { + "output": "Interpreting a Model\nModel interpretations can be run on a Driverless AI experiment or on the\npredictions created by an external model (that is, a model not created\nby Driverless AI). Use the Interpret This Model button on a completed experiment page to\ninterpret a Driverless AI model on original and transformed features. You can also click the MLI link from the top navigation menu to\ninterpret either a Driverless AI model or an external model. - Interpreting a Driverless AI Model \n- Interpreting Predictions From an External Model \nInterpreting a Driverless AI Model\nA completed Driverless AI model can be interpreted from either the\nInterpreted Models page or the completed_experiment. - from-mli-page\n- from-exp-page\nNote\n- This release deprecates experiments run in 1.8.9 and earlier. MLI\nmigration is not supported for experiments from versions <= 1.8.9. This\nmeans that you can't directly run interpretations on a Driverless AI\nmodel built using versions 1.8.9 and earlier, but you can still view\ninterpretations built using those versions.", + "prompt_type": "plain" + }, + { + "output": "- MLI is not supported for Image or\nmulticlass Time Series experiments. - MLI does not require an Internet\nconnection to run on current models. - To specify a port of a specific\nH2O instance for use by MLI, use the h2o_port\nconfig.toml setting. You can also specify an IP\naddress for use by MLI with the h2o_ip setting. Run Interpretations From Interpreted Models Page\nThe following steps describe how to run an interpretation from the\nInterpreted Models page. 1. Click the MLI link in the upper-right corner of the UI to view a\n list of interpreted models. 2. Click the New Interpretation button. The Interpretation Settings\n page is displayed. 3. Select a dataset to use for the interpretation. The selected\n dataset must contain the same columns as the training dataset used\n for the experiment. 4. Specify the Driverless AI model that you want to use for the\n interpretation. After you select a model, the Target Column used\n for the model is automatically selected.", + "prompt_type": "plain" + }, + { + "output": "Optionally specify which MLI recipes (or\n Explainers) to run. You can also change\n Explainer (recipe) specific settings when\n selecting which recipes to use for the interpretation. 6. Optionally specify any additional\n Interpretation Expert Settings to use when\n running this interpretation. 7. Optionally specify a weight column. 8. Optionally specify one or more dropped columns. Columns that were\n dropped when the model was created are automatically dropped for\n the interpretation. 9. Click the Launch MLI button. Run Interpretation From Completed Experiment Page\nThe following steps describe how to run an interpretation from the\ncompleted_experiment. 1. On the Completed Experiment page, click the Interpret This Model\n button. 2. Select a dataset to use for the interpretation. The selected dataset\n must contain the same columns as the training dataset used for the\n experiment.", + "prompt_type": "plain" + }, + { + "output": "Select one of the following options:\n - With Default Settings: Run an interpretation using the default\n settings. - With Custom Settings: Run an interpretation using custom\n settings. Selecting this option opens the Interpretation\n Settings page, where you can specify which\n MLI recipes (explainers) to use for the\n interpretation and change\n explainer-specific settings and\n interpretation expert settings . To run\n an interpretation with your specified custom settings, click\n the Launch MLI button. The interpretation includes a summary of the interpretation,\ninterpretations using the built Driverless AI model, and interpretations\nusing surrogate models that are built on the predictions from the\nDriverless AI model. For information on the available plots, see\ninterpret-regular-understand-model. The plots are interactive, and the logs / artifacts can be downloaded by\nclicking on the Actions button.", + "prompt_type": "plain" + }, + { + "output": "More information about this page is available in the\nUnderstanding the Model Interpretation Page \nsection later in this chapter. []\nInterpreting Predictions From an External Model\nModel Interpretation does not need to be run on a Driverless AI\nexperiment. You can train an external model and run Model\nInterpretability on the predictions from the model. This can be done\nfrom the MLI page. 1. Click the MLI link in the upper-right corner of the UI to view a\n list of interpreted models. 2. Click the New Interpretation button. 3. Leave the Select Model option to none\n 4. Select the dataset that you want to use for the model\n interpretation. This must include a prediction column that was\n generated by the external model. If the dataset does not have\n predictions, then you can join the external predictions. An\n example showing how to do this in Python is available in the Run\n Model Interpretation on External Model Predictions section of the\n Credit Card Demo.", + "prompt_type": "plain" + }, + { + "output": "Specify a Target Column (actuals) and the Prediction Column\n (scores from the external model). 6. Optionally specify any additional MLI\n Expert Settings to use when running this\n interpretation. 7. Optionally specify a weight column. 8. Optionally specify one or more dropped columns. Columns that were\n dropped when the model was created are automatically dropped for\n the interpretation. 9. Click the Launch MLI button. Note: When running interpretations on an external model, leave the\n Select Model option empty. That option is for selecting a Driverless\n AI model. The generated interpretation includes the plots and explanations created\nusing the surrogate models and a summary. For more information, see\ninterpret-regular-understand-model. Explainer Recipes\nDriverless AI Machine Learning Interpretability comes with a number of\nout-of-the-box explainer recipes for model interpretation that can be\nenabled when\nrunning a new interpretation from the MLI page .", + "prompt_type": "plain" + }, + { + "output": "And a list of explainer\nspecific expert settings can be found here . The following is a list of available recipes:\n- Absolute Permutation Feature Importance\n- AutoDoc\n- Disparate Impact Analysis\n- Interpretability Data ZIP (Surrogate and Shapley Techniques)\n- NLP Leave-one-covariate-out (LOCO)\n- NLP Partial Dependence Plot\n- NLP Tokenizer\n- NLP Vectorizer + Linear Model (VLM) Text Feature Importance\n- Original Feature Importance\n- Partial Dependence Plot\n- Relative Permutation Feature Importance\n- Sensitivity Analysis\n- Shapley Summary Plot for Original Features (Naive Shapley Method)\n- Shapley Values for Original Features (Kernel SHAP Method)\n- Shapley Values for Original Features (Naive Method)\n- Shapley Values for Transformed Features\n- Surrogate Decision Tree\n- Surrogate Random Forest Importance\n- Surrogate Random Forest Leave-one-covariate-out (LOCO)\n- Surrogate Random Forest Partial Dependence Plot\n- Transformed Feature Importance\n- k-LIME / LIME-SUP\n []\nThis recipe list is extensible, and users can create their own custom\nrecipes.", + "prompt_type": "plain" + }, + { + "output": "[]\nInterpretation Expert Settings\nWhen interpreting from the MLI page , a variety of\nconfiguration options are available in the Interpretation Expert\nSettings panel that let you customize interpretations. Recipe-specific\nsettings are also available for some recipes. Use the search bar to\nrefine the list of settings or locate a specific setting. For more information on each of these settings, see\ninterpretation-expert-settings. Also see for\nexplainer (recipe) specific expert settings. Notes:\n - The selection of available expert settings is determined by the\n type of model you want to interpret and the specified LIME method. - Expert settings are not available for time-series models. Expert Settings from Recipes (Explainers)\nFor some recipes like\nDriverless AI Partial dependence ,\nDisparate Impact Analysis (DIA) explainer and\nDT (Decision Tree) Surrogate explainer , some of the\nsettings can be toggled from the recipe page.", + "prompt_type": "plain" + }, + { + "output": "Before You Begin\nDriverless AI can run on machines with only CPUs or machines with CPUs\nand GPUs. For the best (and intended-as-designed) experience, install\nDriverless AI on modern data center hardware with GPUs and CUDA support. Feature engineering and model building are primarily performed on CPU\nand GPU respectively. For this reason, Driverless AI benefits from\nmulti-core CPUs with sufficient system memory and GPUs with sufficient\nRAM. For best results, we recommend GPUs that use the Pascal or Volta\narchitectures. The older K80 and M60 GPUs available in EC2 are supported\nand very convenient, but not as fast. Ampere-based NVIDIA GPUs are also\nsupported on x86, as Driverless AI ships with NVIDIA CUDA 11.2.2\ntoolkit. Image processing and NLP use cases in particular, benefit\nsignificantly from GPU usage. For details, see gpu_in_dai. Driverless AI supports local, LDAP, and PAM authentication. Authentication can be configured by setting environment variables or via\na config.toml file.", + "prompt_type": "plain" + }, + { + "output": "Note that the default authentication method is \"unvalidated.\" Driverless AI also supports HDFS, S3, Google Cloud Storage, Google Big\nQuery, KDB, MinIO, and Snowflake access. Support for these data sources\ncan be configured by setting environment variables for the data\nconnectors or via a config.toml file. Refer to the Data Connectors\nsection for more information. Sizing Requirements\nSizing Requirements for Native Installs\nDriverless AI requires a minimum of 5 GB of system memory in order to\nstart experiments and a minimum of 5 GB of disk space in order to run a\nsmall experiment. Note that these limits can changed in the config.toml\nfile. We recommend that you have sufficient system CPU memory (64 GB or\nmore) and 1 TB of free disk space available. Sizing Requirements for Docker Installs\nFor Docker installs, we recommend 1 TB of free disk space. Driverless AI\nuses approximately 38 GB. In addition, the unpacking/temp files require\nspace on the same Linux mount /var during installation.", + "prompt_type": "plain" + }, + { + "output": "GPU Sizing Requirements\nIf you are running Driverless AI with GPUs, ensure that your GPU has\ncompute capability >=3.5 and at least 4GB of RAM. If these requirements\nare not met, then Driverless AI switches to CPU-only mode. Sizing Requirements for Storing Experiments\nWe recommend that your Driverless tmp directory has at least 500 GB to 1\nTB of space. The (Driverless) tmp directory holds all experiments and\nall datasets. We also recommend that you use SSDs (preferably NVMe). Virtual Memory Settings in Linux\nIf you are running Driverless AI on a Linux machine, we recommend\nsetting the overcommit memory to 0. The setting can be changed with the\nfollowing command:\n sudo sh -c \"/bin/echo 0 > /proc/sys/vm/overcommit_memory\"\nThis is the default value that indicates that the Linux kernel is free\nto overcommit memory. If this value is set to 2, then the Linux kernel\ndoes not overcommit memory. In the latter case, the memory requirements\nof Driverless AI may surpass the memory allocation limit and prevent the\nexperiment from completing.", + "prompt_type": "plain" + }, + { + "output": "--shm-size=2g``\n\nWithout this option, those packages will fail. Triton inference server\nalso requires this option be set, and if under heavy load, may require\neven larger values than 2g.\n\nDocker resource limits\n\nDAI controls various resources and needs more resources than what\nsystems typically set by default. You can use the following option to\nensure that DAI is given enough resources:", + "prompt_type": "plain" + }, + { + "output": "--ulimit nofile=131071:131071 --ulimit nproc=16384:16384``\n\nWithout this option, DAI crashes under load.\n\nDocker NICE\n\nAs stated in the official Docker documentation, the", + "prompt_type": "plain" + }, + { + "output": "--cap-add=SYS_NICEoption grants the container theCAP_SYS_NICEcapability, which lets the container raise processnicevalues, set real-time scheduling policies, set CPU affinity, and other operations. If this flag isn't passed when starting the container, DAI isn't able to control resources and can end up with all processes only using a single core. This is also required to use the built-in NVIDIA Triton Inference Server and its use of non-uniform memory access (NUMA) control. Memory Requirements per Experiment ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ As a rule of thumb, the memory requirement per experiment is approximately 5 to 10 times the size of the dataset. Dataset size can be estimated as the number of rows x columns x 4 bytes; if text is present in the data, then more bytes per element are needed. Backup Strategy --------------- The **Driverless AI tmp** directory is used to store all experiment artifacts such as deployment artifacts and MLIs. It also stores the master.db database that tracks users to Driverless artifacts.", + "prompt_type": "plain" + }, + { + "output": "We recommend periodically stopping Driverless AI and backing up the **Driverless AI** **tmp** directory to ensure that a copy of the Driverless AI state is available for instances where you may need to revert to a prior state. Upgrade Strategy ---------------- When upgrading Driverless AI, note that: - Image models from version 1.9.x aren't supported in 1.10.x. All other models from 1.9.x are supported in 1.10.x. - (**MLI**) Interpretations made in version 1.9.0 are supported in 1.9.x and later. - (**MLI**) Interpretations made in version 1.8.x aren't supported in 1.9.x and later. However, interpretations made in 1.8.x can still be viewed and rerun. - We recommend following these steps before upgrading: - *Build MLI models*: Before upgrading, run MLI jobs on models that you want to continue to interpret in future Driverless AI releases. If an MLI job appears in the list of Interpreted Models in your current version, then it is retained after upgrading.", + "prompt_type": "plain" + }, + { + "output": "- Stop Driverless AI and make a backup (copy) of the **Driverless AI** **tmp** directory. The upgrade process inherits the service user and group from /etc/dai/User.conf and /etc/dai/Group.conf. You do not need to manually specify the DAI_USER or DAI_GROUP environment variables during an upgrade. **Note**: Driverless AI does not support data migration from a newer version to an older version. If you rollback to an older version of Driverless AI after upgrading, newer versions of the **master.db** file will not work with the older Driverless AI version. For this reason, we recommend saving a copy of the older 'tmp' directory to fully restore the older Driverless AI version's state. Other Notes ----------- Supported Browsers ~~~~~~~~~~~~~~~~~~ Driverless AI is tested most extensively on Chrome and Firefox. For the best user experience, we recommend using the latest version of Chrome. You may encounter issues if you use other browsers or earlier versions of Chrome and/or Firefox.", + "prompt_type": "plain" + }, + { + "output": "ulimitoptions by using the--ulimitargument todocker\nrun. The following is an example of how to configure these options: :: --ulimit nproc=65535:65535 \\ --ulimit nofile=4096:8192 \\ Refer to https://docs.docker.com/engine/reference/commandline/run/#set-ulimits-in-container---ulimit for more information on these options. Note about nvidia-docker 1.0 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ If you have nvidia-docker 1.0 installed, you need to remove it and all existing GPU containers. Refer to https://github.com/NVIDIA/nvidia-docker/blob/master/README.md for more information. Deprecation ofnvidia-smi~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Thenvidia-smi``\ncommand has been deprecated by NVIDIA. Refer to\nhttps://github.com/nvidia/nvidia-docker#upgrading-with-nvidia-docker2-deprecated\nfor more information. The installation steps have been updated for\nenabling persistence mode for GPUs. Note About CUDA Versions\nDriverless AI ships with CUDA 11.2.2 for GPUs, but the driver must exist\nin the host environment.", + "prompt_type": "plain" + }, + { + "output": "NVIDIA driver >=\n471.68installed in your environment, for a seamless experience on all NVIDIA architectures, including Ampere. Go to `NVIDIA download driver `__ to get the latest NVIDIA Tesla A/T/V/P/K series driver. For reference on CUDA Toolkit and Minimum Required Driver Versions and CUDA Toolkit and Corresponding Driver Versions, see `here `__ . .. note:: If you are using K80 GPUs, the minimum required NVIDIA driver version is 450.80.02. Note About Authentication ~~~~~~~~~~~~~~~~~~~~~~~~~ The default authentication setting in Driverless AI is \"unvalidated.\" In this case, Driverless AI will accept any login and password combination, it will not validate whether the password is correct for the specified login ID, and it will connect to the system as the user specified in the login ID. This is true for all instances, including Cloud, Docker, and native instances.", + "prompt_type": "plain" + }, + { + "output": "Driverless AI provides a number of authentication options, including LDAP, PAM, Local, and None. Refer to :ref:`dai_auth` for information on how to enable a different authentication method. **Note**: Driverless AI is also integrated with IBM Spectrum Conductor and supports authentication from Conductor. Contact sales@h2o.ai for more information about using IBM Spectrum Conductor authentication. Note About Shared File Systems ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ If your environment uses a shared file system, then you must set the following configuration option: :: datatable_strategy='write' The above can be specified in the `config.toml file `__ (for native installs) or specified as an `environment variable `__ (Docker image installs). This configuration is required because, in some cases, Driverless AI can fail to read files during an experiment. Thewrite``\noption lets Driverless AI properly read and write data from shared file\nsystems to disk.", + "prompt_type": "plain" + }, + { + "output": "Using the MOJO Scoring Pipeline with Spark/Sparkling Water\nNote: The Driverless AI 1.5 release will be the last release with\nTOML-based MOJO2. Releases after 1.5 will include protobuf-based MOJO2. MOJO scoring pipeline artifacts can be used in Spark to deploy\npredictions in parallel using the Sparkling Water API. This section\nshows how to load and run predictions on the MOJO scoring pipeline in\nSpark using Scala and the Python API. In the event that you upgrade H2O Driverless AI, we have a good news! Sparkling Water is backwards compatible with MOJO versions produced by\nolder Driverless AI versions. Requirements\n- You must have a Spark cluster with the Sparkling Water JAR file\n passed to Spark. - To run with PySparkling, you must have the PySparkling zip file. The H2OContext does not have to be created if you only want to run\npredictions on MOJOs using Spark. This is because the scoring is\nindependent of the H2O run-time. Preparing Your Environment\nIn order use the MOJO scoring pipeline, Driverless AI license has to be\npassed to Spark.", + "prompt_type": "plain" + }, + { + "output": "Note: In Local Spark mode, use --driver-class-path to specify path to\nthe license file. PySparkling\nFirst, start PySpark with PySparkling Python package and Driverless AI\nlicense. ./bin/pyspark --jars license.sig --py-files pysparkling.zip\nor, you can download official Sparkling Water distribution from H2O\nDownload page. Follow the steps on the Sparkling Water download page. Once you are in the Sparkling Water directory, you can call:\n ./bin/pysparkling --jars license.sig\nAt this point, you should have available a PySpark interactive terminal\nwhere you can try out predictions. If you would like to productionalize\nthe scoring process, you can use the same configuration, except instead\nof using ./bin/pyspark, you would use ./bin/spark-submit to submit your\njob to a cluster. # First, specify the dependencies\n from pysparkling.ml import H2OMOJOPipelineModel, H2OMOJOSettings\n # The 'namedMojoOutputColumns' option ensures that the output columns are named properly. # If you want to use old behavior when all output columns were stored inside an array,\n # set it to False.", + "prompt_type": "plain" + }, + { + "output": "settings = H2OMOJOSettings(namedMojoOutputColumns = True)\n # Load the pipeline. 'settings' is an optional argument. If it's not specified, the default values are used. mojo = H2OMOJOPipelineModel.createFromMojo(\"file:///path/to/the/pipeline.mojo\", settings)\n # Load the data as Spark's Data Frame\n dataFrame = spark.read.csv(\"file:///path/to/the/data.csv\", header=True)\n # Run the predictions. The predictions contain all the original columns plus the predictions\n # added as new columns\n predictions = mojo.transform(dataFrame)\n # You can easily get the predictions for a desired column using the helper function as\n predictions.select(mojo.selectPredictionUDF(\"AGE\")).collect()\nSparkling Water\nFirst, start Spark with Sparkling Water Scala assembly and Driverless AI\nlicense. ./bin/spark-shell --jars license.sig,sparkling-water-assembly.jar\nor, you can download official Sparkling Water distribution from H2O\nDownload page. Follow the steps on the Sparkling Water download page.", + "prompt_type": "plain" + }, + { + "output": "Install on NVIDIA GPU Cloud/NGC Registry\nDriverless AI is supported on the following NVIDIA DGX products, and the\ninstallation steps for each platform are the same. - NVIDIA GPU Cloud\n- NVIDIA DGX-1\n- NVIDIA DGX-2\n- NVIDIA DGX Station\nEnvironment\n ---------------------------------------------------------------\n Provider GPUs Min Memory Suitable for\n ---------------------------- ------ ------------ --------------\n NVIDIA GPU Cloud Yes Serious use\n NVIDIA DGX-1/DGX-2 Yes 128 GB Serious use\n NVIDIA DGX Station Yes 64 GB Serious Use\n ---------------------------------------------------------------\nInstalling the NVIDIA NGC Registry\nNote: These installation instructions assume that you are running on an\nNVIDIA DGX machine. Driverless AI is only available in the NGC registry\nfor DGX machines. 1. Log in to your NVIDIA GPU Cloud account at\n https://ngc.nvidia.com/registry. (Note that NVIDIA Compute is no\n longer supported by NVIDIA.)", + "prompt_type": "plain" + }, + { + "output": "In the Registry > Partners menu, select h2oai-driverless. 3. At the bottom of the screen, select one of the H2O Driverless AI\n tags to retrieve the pull command. 4. On your NVIDIA DGX machine, open a command prompt and use the\n specified pull command to retrieve the Driverless AI image. For\n example:\n5. Set up a directory for the version of Driverless AI on the host\n machine:\n6. Set up the data, log, license, and tmp directories on the host\n machine:\n7. At this point, you can copy data into the data directory on the host\n machine. The data will be visible inside the Docker container. 8. Enable persistence of the GPU. Note that this only needs to be run\n once. Refer to the following for more information:\n http://docs.nvidia.com/deploy/driver-persistence/index.html. 9. Run docker images to find the new image tag. 10. Start the Driverless AI Docker image and replace TAG below with the\n image tag. Depending on your install version, use the\n docker run --runtime=nvidia (>= Docker 19.03) or nvidia-docker (<\n Docker 19.03) command.", + "prompt_type": "plain" + }, + { + "output": "11. Connect to Driverless AI with your browser:\nStopping Driverless AI\nUse Ctrl+C to stop Driverless AI. Upgrading Driverless AI\nThe steps for upgrading Driverless AI on an NVIDIA DGX system are\nsimilar to the installation steps. WARNINGS:\n- This release deprecates experiments and MLI models from 1.7.0 and\n earlier. - Experiments, MLIs, and MOJOs reside in the Driverless AI tmp\n directory and are not automatically upgraded when Driverless AI is\n upgraded. We recommend you take the following steps before\n upgrading. - Build MLI models before upgrading. - Build MOJO pipelines before upgrading. - Stop Driverless AI and make a backup of your Driverless AI tmp\n directory before upgrading. The upgrade process inherits the service user and group from\n/etc/dai/User.conf and /etc/dai/Group.conf. You do not need to manually\nspecify the DAI_USER or DAI_GROUP environment variables during an\nupgrade. Note: Use Ctrl+C to stop Driverless AI if it is still running.", + "prompt_type": "plain" + }, + { + "output": "R Client\n\nThis section describes how to install the Driverless AI R client.\nSeveral end-to-end examples that demonstrate how to use the client are\nalso provided. For more information on the R client, see the Driverless\nAI R client documentation.\n\nr_install_client r_client_tutorial", + "prompt_type": "plain" + }, + { + "output": "Experiment Graphs\nThis section describes the dashboard graphs that display for running and\ncompleted experiments. These graphs are interactive. Hover over a point\non the graph for more details about the point. Binary Classification Experiments\nFor Binary Classification experiments, Driverless AI shows a ROC Curve,\na Precision-Recall graph, a Lift chart, a Kolmogorov-Smirnov chart, and\na Gains chart. []\n- ROC: This shows Receiver-Operator Characteristics curve stats on\n validation data along with the best Accuracy, MCC, and F1 values. An\n ROC curve is a useful tool because it only focuses on how well the\n model was able to distinguish between classes. Keep in mind, though,\n that for models where one of the classes happens rarely, a high AUC\n could provide a false sense that the model is correctly predicting\n the results. This is where the notion of precision and recall become\n important. - Precision-Recall: This shows the Precision-Recall curve on\n validation data along with the best Accuracy, MCC, and F1 values.", + "prompt_type": "plain" + }, + { + "output": "Prec-Recall is a\n complementary tool to ROC curves, especially when the dataset has a\n significant skew. The Prec-Recall curve plots the precision or\n positive predictive value (y-axis) versus sensitivity or true\n positive rate (x-axis) for every possible classification threshold. At a high level, you can think of precision as a measure of\n exactness or quality of the results and recall as a measure of\n completeness or quantity of the results obtained by the model. Prec-Recall measures the relevance of the results obtained by the\n model. - Lift: This chart shows lift stats on validation data. For example,\n \"How many times more observations of the positive target class are\n in the top predicted 1%, 2%, 10%, etc. (cumulative) compared to\n selecting observations randomly?\" By definition, the Lift at 100% is\n 1.0. Lift can help answer the question of how much better you can\n expect to do with the predictive model compared to a random model\n (or no model).", + "prompt_type": "plain" + }, + { + "output": "In other\n words, the ratio of gain % to the random expectation % at a given\n quantile. The random expectation of the xth quantile is x%. - Kolmogorov-Smirnov: This chart measures the degree of separation\n between positives and negatives for validation or test data. - Gains: This shows Gains stats on validation data. For example, \"What\n fraction of all observations of the positive target class are in the\n top predicted 1%, 2%, 10%, etc. (cumulative)?\" By definition, the\n Gains at 100% are 1.0. Multiclass Classification Experiments\nFor multiclass classification experiments, a Confusion Matrix is\navailable in addition to the ROC Curve, Precision-Recall graph, Lift\nchart, Kolmogorov-Smirnov chart, and Gains chart. Driverless AI\ngenerates these graphs by considering the multiclass problem as multiple\none-vs-all problems. These graphs and charts (Confusion Matrix excepted)\nare based on a method known as micro-averaging (reference:\nhttp://scikit-learn.org/stable/auto_examples/model_selection/plot_roc.html#multiclass-settings).", + "prompt_type": "plain" + }, + { + "output": "The\npredictions would look something like this:\n+--------------------+-----------------------+-----------------------+\n| class.Iris-setosa | class.Iris-versicolor | class.Iris-virginica |\n+--------------------+-----------------------+-----------------------+\n| 0.9628 | 0.021 | 0.0158 |\n+--------------------+-----------------------+-----------------------+\n| 0.0182 | 0.3172 | 0.6646 |\n+--------------------+-----------------------+-----------------------+\n| 0.0191 | 0.9534 | 0.0276 |\n+--------------------+-----------------------+-----------------------+\nTo create these charts, Driverless AI converts the results to 3\none-vs-all problems:\n+--------+--------+---+----------+-----------+---+---------+----------+\n| prob | actual | | prob-v | actual-v | | prob-v | actual-v |\n| - | - | | e | ersicolor | | i | irginica |\n| setosa | setosa | | rsicolor | | | rginica | |\n+--------+--------+---+----------+-----------+---+---------+----------+\n| 0.9628 | 1 | | 0.021 | 0 | | 0.0158 | 0 |\n+--------+--------+---+----------+-----------+---+---------+----------+\n| 0.0182 | 0 | | 0.3172 | 1 | | 0.6646 | 0 |\n+--------+--------+---+----------+-----------+---+---------+----------+\n| 0.0191 | 0 | | 0.9534 | 1 | | 0.0276 | 0 |\n+--------+--------+---+----------+-----------+---+---------+----------+\nThe result is 3 vectors of predicted and actual values for binomial\nproblems.", + "prompt_type": "plain" + }, + { + "output": "predicted = [0.9628, 0.0182, 0.0191, 0.021, 0.3172, 0.9534, 0.0158, 0.6646, 0.0276]\n actual = [1, 0, 0, 0, 1, 1, 0, 0, 0]\nMulticlass Confusion Matrix\nA confusion matrix shows experiment performance in terms of false\npositives, false negatives, true positives, and true negatives. For each\nthreshold, the confusion matrix represents the balance between TPR and\nFPR (ROC) or Precision and Recall (Prec-Recall). In general, most useful\noperating points are in the top left corner. In this graph, the actual results display in the columns and the\npredictions display in the rows; correct predictions are highlighted. In\nthe example below, Iris-setosa was predicted correctly 30 times, while\nIris-virginica was predicted correctly 32 times, and Iris-versicolor was\npredicted as Iris-virginica 2 times (against the validation set). Note that while the experiment is running, the CM results are displayed\nonly for the first fold/validation split. A CM for all rows can't be\ndisplayed since, in general, DAI isn't performing k-fold CV but could be\nperforming 2 repeats of 1/3 validation splits with overlaps.", + "prompt_type": "plain" + }, + { + "output": "Install the Driverless AI AWS Community AMI\nWatch the installation video here. Note that some of the images in this\nvideo may change between releases, but the installation steps remain the\nsame. Environment\n+---------------------------+--------------+---------+----------------+\n| Provider | Instance | Num | Suitable for |\n| | Type | GPUs | |\n+===========================+==============+=========+================+\n| AWS | p2.xlarge | 1 | E |\n| | | | |\n| - | ---- | ---- | xperimentation |\n| - | -----------+ | ------+ | |\n| - | | | ---- |\n| - | p2.8xlarge | 8 | -------------+ |\n| - | | | |\n| - | ---- | ---- | Serious |\n| - | -----------+ | ------+ | use |\n| - | | | |\n| | | 16 | ---- |\n| | p2.16xlarge | | -------------+ |\n| | | ---- | |\n| | ---- | ------+ | Serious |\n| | -----------+ | | use |\n| | | 1 | |\n| | p3.2xlarge | | ---- |\n| | | ---- | -------------+ |\n| | ---- | ------+ | |\n| | -----------+ | | E |\n| | | 4 | |\n| | p3.8xlarge | | xperimentation |\n| | | ---- | |\n| | ---- | ------+ | ---- |\n| | -----------+ | | -------------+ |\n| | | 8 | |\n| | | | Serious |\n| | p3.16xlarge | ---- | use |\n| | | ------+ | |\n| | ---- | | ---- |\n| | -----------+ | 1 | -------------+ |\n| | | | |\n| | g3.4xlarge | ---- | Serious |\n| | | ------+ | use |\n| | ---- | | |\n| | -----------+ | 2 | ---- |\n| | | | -------------+ |\n| | g3.8xlarge | ---- | |\n| | | ------+ | E |\n| | ---- | | |\n| | -----------+ | 4 | xperimentation |\n| | | | |\n| | | | ---- |\n| | g3.16xlarge | | -------------+ |\n| | | | |\n| | | | E |\n| | | | |\n| | | | xperimentation |\n| | | | |\n| | | | ---- |\n| | | | -------------+ |\n| | | | |\n| | | | Serious |\n| | | | use |\n+---------------------------+--------------+---------+----------------+\nInstalling the EC2 Instance\n1.", + "prompt_type": "plain" + }, + { + "output": "2. In the upper right corner of the Amazon Web Services page, set the\n location drop-down. (Note: We recommend selecting the US East region\n because H2O's resources are stored there. It also offers more\n instance types than other regions.) 3. Select the EC2 option under the Compute section to open the EC2\n Dashboard. 4. Click the Launch Instance button under the Create Instance section. 5. Under Community AMIs, search for h2oai, and then select the version\n that you want to launch. 6. On the Choose an Instance Type page, select GPU compute in the\n Filter by dropdown. This will ensure that your Driverless AI\n instance will run on GPUs. Select a GPU compute instance from the\n available options. (We recommend at least 32 vCPUs.) Click the Next:\n Configure Instance Details button. 7. Specify the Instance Details that you want to configure. Create a\n VPC or use an existing one, and ensure that \"Auto-Assign Public IP\"\n is enabled and associated to your subnet.", + "prompt_type": "plain" + }, + { + "output": "8. Specify the Storage Device settings. Note again that Driverless AI\n requires 10 GB to run and will stop working of less than 10 GB is\n available. The machine should have a minimum of 30 GB of disk space. Click Next: Add Tags. 9. If desired, add unique Tag name to identify your instance. Click\n Next: Configure Security Group. 10. Add the following security rules to enable SSH access to Driverless\n AI, then click Review and Launch. --------------------------------------------------------------------\n Type Pro Port Range Source Description\n tocol \n ------------ ------- ---------- -------------- ---------------------\n SSH TCP 22 Anywhere \n 0.0.0.0/0 \n Custom TCP TCP 12345 Anywhere Launch DAI\n Rule 0.0.0.0/0 \n --------------------------------------------------------------------\n11. Review the configuration, and then click Launch.", + "prompt_type": "plain" + }, + { + "output": "A popup will appear prompting you to select a key pair. This is\n required in order to SSH into the instance. You can select your\n existing key pair or create a new one. Be sure to accept the\n acknowledgement, then click Launch Instances to start the new\n instance. 13. Upon successful completion, a message will display informing you\n that your instance is launching. Click the View Instances button to\n see information about the instance including the IP address. The\n Connect button on this page provides information on how to SSH into\n your instance. 14. Open a Terminal window and SSH into the IP address of the AWS\n instance. Replace the DNS name below with your instance DNS. 15. If you selected a GPU-compute instance, then you must enable\n persistence and optimizations of the GPU. The commands vary\n depending on the instance type. Note also that these commands need\n to be run once every reboot. Refer to the following for more\n information:\n16.", + "prompt_type": "plain" + }, + { + "output": "For example:\n17. Connect to Driverless AI with your browser. Sign in to Driverless AI\n with the username h2oai and use the AWS InstanceID as the password. You will be prompted to enter your Driverless AI license key when\n you log in for the first time. Stopping the EC2 Instance\nThe EC2 instance will continue to run even when you close the\naws.amazon.com portal. To stop the instance:\n1. On the EC2 Dashboard, click the Running Instances link under the\n Resources section. 2. Select the instance that you want to stop. 3. In the Actions drop down menu, select Instance State > Stop. 4. A confirmation page will display. Click Yes, Stop to stop the\n instance. Upgrading the Driverless AI Community Image\nWARNINGS:\n- This release deprecates experiments and MLI models from 1.7.0 and\n earlier. - Experiments, MLIs, and MOJOs reside in the Driverless AI tmp\n directory and are not automatically upgraded when Driverless AI is\n upgraded. We recommend you take the following steps before\n upgrading.", + "prompt_type": "plain" + }, + { + "output": "- Build MOJO pipelines before upgrading. - Stop Driverless AI and make a backup of your Driverless AI tmp\n directory before upgrading. The upgrade process inherits the service user and group from\n/etc/dai/User.conf and /etc/dai/Group.conf. You do not need to manually\nspecify the DAI_USER or DAI_GROUP environment variables during an\nupgrade. Upgrading from Version 1.2.2 or Earlier\nThe following example shows how to upgrade from 1.2.2 or earlier to the\ncurrent version. Upgrading from these earlier versions requires an edit\nto the start and h2oai scripts. 1. SSH into the IP address of the image instance and copy the existing\n experiments to a backup location:\n2. wget the newer image. The command below retrieves version 1.2.2:\n3. In the /home/ubuntu/scripts/ folder, edit both the start.sh and\n h2oai.sh scripts to use the newer image. 4. Use the docker load command to load the image:\n5. Optionally run docker images to ensure that the new image is in the\n registry.", + "prompt_type": "plain" + }, + { + "output": "Internal Validation Technique\nThis section describes the technique behind internal validation in\nDriverless AI. For the experiment, Driverless AI will either:\n(1) split the data into a training set and internal validation set\n(2) use cross validation to split the data into n folds\nDriverless AI chooses the method based on the size of the data and the\nAccuracy setting. For method 1, part of the data is removed to be used\nfor internal validation. (Note: This train and internal validation split\nmay be repeated if the data is small so that more data can be used for\ntraining.) For method 2, however, no data is wasted for internal validation. With\ncross validation, the whole dataset is utilized, and each model is\ntrained on a different subset of the training data. The following\nvisualization shows an example of cross validation with 5 folds. []\nDriverless AI randomly splits the data into the specified number of\nfolds for cross validation. With cross validation, the whole dataset is\nutilized, and each model is trained on a different subset of the\ntraining data.", + "prompt_type": "plain" + }, + { + "output": "Linux x86_64 Installs\n\nThis section provides installation steps for RPM, deb, and tar installs\nin Linux x86_64 environments.\n\nlinux-rpm linux-deb linux-tarsh", + "prompt_type": "plain" + }, + { + "output": "General Considerations\nMachine Learning and Approximate Explanations\nFor years, common sense has deemed the complex, intricate formulas\ncreated by training machine learning algorithms to be uninterpretable. While great advances have been made in recent years to make these often\nnonlinear, non-monotonic, and non-continuous machine-learned response\nfunctions more understandable (Hall et al, 2017), it is likely that such\nfunctions will never be as directly or universally interpretable as more\ntraditional linear models. Why consider machine learning approaches for inferential purposes? In\ngeneral, linear models focus on understanding and predicting average\nbehavior, whereas machine-learned response functions can often make\naccurate, but more difficult to explain, predictions for subtler aspects\nof modeled phenomenon. In a sense, linear models create very exact\ninterpretations for approximate models. The approach here seeks to make\napproximate explanations for very exact models. It is quite possible\nthat an approximate explanation of an exact model may have as much, or\nmore, value and meaning than the exact interpretations of an approximate\nmodel.", + "prompt_type": "plain" + }, + { + "output": "The Multiplicity of Good Models in Machine Learning\nIt is well understood that for the same set of input variables and\nprediction targets, complex machine learning algorithms can produce\nmultiple accurate models with very similar, but not exactly the same,\ninternal architectures (Breiman, 2001). This alone is an obstacle to\ninterpretation, but when using these types of algorithms as\ninterpretation tools or with interpretation tools it is important to\nremember that details of explanations will change across multiple\naccurate models. Expectations for Consistency Between Explanatory Techniques\n- The decision tree surrogate is a global, nonlinear description of\n the Driverless AI model behavior. Variables that appear in the tree\n should have a direct relationship with variables that appear in the\n global feature importance plot. For certain, more linear Driverless\n AI models, variables that appear in the decision tree surrogate\n model may also have large coefficients in the global K-LIME model.", + "prompt_type": "plain" + }, + { + "output": "LOCO\n importance values are nonlinear, do consider interactions, and do\n not explicitly consider a linear intercept or offset. LIME\n explanations and LOCO importance values are not expected to have a\n direct relationship but can align roughly as both are measures of a\n variable's local impact on a model's predictions, especially in more\n linear regions of the Driverless AI model's learned response\n function. - ICE is a type of nonlinear sensitivity analysis which has a complex\n relationship to LOCO feature importance values. Comparing ICE to\n LOCO can only be done at the value of the selected variable that\n actually appears in the selected row of the training data. When\n comparing ICE to LOCO the total value of the prediction for the row,\n the value of the variable in the selected row, and the distance of\n the ICE value from the average prediction for the selected variable\n at the value in the selected row must all be considered. - ICE curves that are outside the standard deviation of partial\n dependence would be expected to fall into less populated decision\n paths of the decision tree surrogate; ICE curves that lie within the\n standard deviation of partial dependence would be expected to belong\n to more common decision paths.", + "prompt_type": "plain" + }, + { + "output": "Upgrading the Driverless AI Image\nWARNINGS:\n- This release deprecates experiments and MLI models from 1.7.0 and\n earlier. - Experiments, MLIs, and MOJOs reside in the Driverless AI tmp\n directory and are not automatically upgraded when Driverless AI is\n upgraded. We recommend you take the following steps before\n upgrading. - Build MLI models before upgrading. - Build MOJO pipelines before upgrading. - Stop Driverless AI and make a backup of your Driverless AI tmp\n directory before upgrading. The upgrade process inherits the service user and group from\n/etc/dai/User.conf and /etc/dai/Group.conf. You do not need to manually\nspecify the DAI_USER or DAI_GROUP environment variables during an\nupgrade. Upgrading from Version 1.2.2 or Earlier\nIt is not possible to upgrade from version 1.2.2 or earlier to the\nlatest version. You have to manually remove the 1.2.2 container and then\nreinstall the latest Driverless AI version. Be sure to backup your data\nbefore doing this.", + "prompt_type": "plain" + }, + { + "output": "SSH into the IP address of the image instance and copy the existing\n experiments to a backup location:\n2. wget the newer image. Replace VERSION and BUILD below with the\n Driverless AI version. 3. Use the docker load command to load the image:\n4. Run docker images to find the new image tag. 5. Start the Driverless AI Docker image and replace TAG below with the\n image tag. Depending on your install version, use the\n docker run --runtime=nvidia (>= Docker 19.03) or nvidia-docker (<\n Docker 19.03) command. Upgrading from version 1.5.2 or Later\nUpgrading to versions 1.5.2 and later is no longer done via Docker. Instead, perform the following steps if you are upgrading to version\n1.5.2 or later. Replace dai_NEWVERSION.deb below with the new Driverless\nAI version (for example, dai_1.8.4.1_amd64.deb). Note that this upgrade\nprocess inherits the service user and group from /etc/dai/User.conf and\n/etc/dai/Group.conf. You do not need to manually specify the DAI_USER or\nDAI_GROUP environment variables during an upgrade.", + "prompt_type": "plain" + }, + { + "output": "Hive Setup\n\nDriverless AI lets you explore Hive data sources from within the\nDriverless AI application. This section provides instructions for\nconfiguring Driverless AI to work with Hive.\n\nNote: Depending on your Docker install version, use either the", + "prompt_type": "plain" + }, + { + "output": "docker run --runtime=nvidia(>= Docker 19.03) ornvidia-docker(< Docker 19.03) command when starting the Driverless AI Docker image. Usedocker versionto check which version of Docker you are using. Description of Configuration Attributes --------------------------------------- -enabled_file_systems: The file systems you want to enable. This must be configured in order for data connectors to function properly. -hive_app_configs: Configuration for Hive Connector. Inputs are similar to configuring the HDFS connector. Important keys include: -hive_conf_path: The path to Hive configuration. This can have multiple files (e.g. hive-site.xml, hdfs-site.xml, etc.) -auth_type: Specify one ofnoauth,keytab, orkeytabimpersonationfor Kerberos authentication -keytab_path: Specify the path to Kerberos keytab to use for authentication (this can be\"\"if usingauth_type=\"noauth\") -principal_user: Specify the Kerberos app principal user (required when usingauth_type=\"keytab\"orauth_type=\"keytabimpersonation\") **Notes:** - With Hive connectors, it is assumed that DAI is running on the edge node.", + "prompt_type": "plain" + }, + { + "output": "missing classes, dependencies, authorization errors). - Ensure the core-site.xml file (from e.g Hadoop conf) is also present in the Hive conf with the rest of the files (hive-site.xml, hdfs-site.xml, etc.). The core-site.xml file should have proxyuser configured (e.g.hadoop.proxyuser.hive.hosts&hadoop.proxyuser.hive.groups). - If you have tez as the Hive execution engine, make sure that the required tez dependencies (classpaths, jars, etc.) are available on the DAI node. Alternatively, you can use internal engines that come with DAI by changing yourhive.execution.enginevalue in the hive-site.xml file tomrorspark. The configuration should be JSON/Dictionary String with multiple keys. For example: :: \"\"\"{ \"hive_connection_1\": { \"hive_conf_path\": \"/path/to/hive/conf\", \"auth_type\": \"one of ['noauth', 'keytab', 'keytabimpersonation']\", \"keytab_path\": \"/path/to/.keytab\", \"principal_user\": \"hive/node1.example.com@EXAMPLE.COM\", }, \"hive_connection_2\": { \"hive_conf_path\": \"/path/to/hive/conf_2\", \"auth_type\": \"one of ['noauth', 'keytab', 'keytabimpersonation']\", \"keytab_path\": \"/path/to/.keytab\", \"principal_user\": \"hive/node2.example.com@EXAMPLE.COM\", } }\"\"\" **Note**: The expected input ofhive_app_configsis a `JSON string `__.", + "prompt_type": "plain" + }, + { + "output": "Depending on how the configuration value is applied, different forms of outer quotations may be required. The following examples show two unique methods for applying outer quotations. - Configuration value applied with the config.toml file: :: hive_app_configs = \"\"\"{\"my_json_string\": \"value\", \"json_key_2\": \"value2\"}\"\"\" - Configuration value applied with an environment variable: :: DRIVERLESS_AI_HIVE_APP_CONFIGS='{\"my_json_string\": \"value\", \"json_key_2\": \"value2\"}' -hive_app_jvm_args: Optionally specify additional Java Virtual Machine (JVM) args for the Hive connector. Each arg must be separated by a space. .. **Notes**: - If a custom `JAAS configuration file `__ is needed for your Kerberos setup, usehive_app_jvm_argsto specify the appropriate file: .. :: hive_app_jvm_args = \"-Xmx20g -Djava.security.auth.login.config=/etc/dai/jaas.conf\" Samplejaas.conffile: : :: com.sun.security.jgss.initiate { com.sun.security.auth.module.Krb5LoginModule required useKeyTab=true useTicketCache=false principal=\"hive/localhost@EXAMPLE.COM\" [Replace this line] doNotPrompt=true keyTab=\"/path/to/hive.keytab\" [Replace this line] debug=true; }; -hive_app_classpath``: Optionally specify an alternative classpath\n for the Hive connector.", + "prompt_type": "plain" + }, + { + "output": "nvidia-docker run`` command or by editing the configuration options in\nthe config.toml file and then specifying that file in the", + "prompt_type": "plain" + }, + { + "output": "Introduction to Driverless AI\n\nintroduction_to_dai key-features supported-algorithms workflow", + "prompt_type": "plain" + }, + { + "output": "MLI for Time-Series Experiments\nThis section describes how to run MLI for time-series experiments. Refer\nto interpret-regular for MLI information with regular experiments. There are two methods you can use for interpreting time-series models:\n- Using the MLI link in the top main menu on the upper right corner of\n the UI to interpret either a Driverless AI model or an external\n model. This process is described in the\n Interpreting a Driverless AI Model and\n Interpreting Predictions from an External Model \n sections. - Using the Interpret this Model button on a completed experiment page\n to interpret a Driverless AI model on original and transformed\n features. Run Interpretation from Completed Experiment page\n (See below.) - interpret-ts-multi\n- interpret-ts-single\n- Run IID or regular explainers on a Time series experiment \nLimitations\n- This release deprecates experiments run in 1.8.9 and earlier.", + "prompt_type": "plain" + }, + { + "output": "- MLI is not available for multiclass Time Series. - When the test set contains actuals, you will see the time series\n metric plot and the group metrics table. If there are no actuals,\n MLI will run, but you will see only the prediction value time series\n and a Shapley table. - MLI does not require an Internet connection to run on current\n models. Multi-Group Time Series MLI\nThis section describes how to run MLI on time series data for multiple\ngroups. 1. Click the Interpret this Model button on a completed time series\n experiment to launch Model Interpretation for that experiment. This\n page includes the following:\n2. Scroll to the bottom of the panel and select a grouping in the Group\n Search field to view a graph of Actual vs. Predicted values for the\n group. The outputted graph can be downloaded to your local machine. 3. Click on a prediction point in the plot (white line) to view Shapley\n values for that prediction point. The Shapley values plot can also\n be downloaded to your local machine.", + "prompt_type": "plain" + }, + { + "output": "Click Add Panel to add a new MLI Time Series panel. This lets you\n compare different groups in the same model and also provides the\n flexibility to do a \"side-by-side\" comparison between different\n models. Single Time Series MLI\nTime Series MLI can also be run when only one group is available. 1. Click the Interpret this Model button on a completed time series\n experiment to launch Model Interpretation for that experiment. This\n page includes the following:\n2. Scroll to the bottom of the panel and select an option in the Group\n Search field to view a graph of Actual vs. Predicted values for the\n group. (Note that for Single Time Series MLI, there will only be one\n option in this field.) The outputted graph can be downloaded to your\n local machine. 3. Click on a prediction point in the plot (white line) to view Shapley\n values for that prediction point. The Shapley values plot can also\n be downloaded to your local machine. 4. Click Add Panel to add a new MLI Time Series panel.", + "prompt_type": "plain" + }, + { + "output": "Environment Variables and Configuration Options\nDriverless AI provides a number of environment variables that can be\npassed when starting Driverless AI or specified in a config.toml file. The complete list of variables is in the config_file section. The steps\nfor specifying variables vary depending on whether you installed a\nDriverless AI RPM, DEB, or TAR SH or whether you are running a Docker\nimage. Setting Environment Variables and Configuration Options\nDocker Image Installs\nEach property must be prepended with DRIVERLESS_AI. The example below\nstarts Driverless AI with environment variables that enable S3 and HDFS\naccess (without authentication). nvidia-docker run \\\n --pid=host \\\n --rm \\\n -u `id -u`:`id -g` \\\n -e DRIVERLESS_AI_ENABLED_FILE_SYSTEMS=\"file,s3,hdfs\" \\\n -e DRIVERLESS_AI_AUTHENTICATION_METHOD=\"local\" \\\n -e DRIVERLESS_AI_LOCAL_HTPASSWD_FILE=\"\" \\\n -v /etc/passwd:/etc/passwd:ro \\\n -v /etc/group:/etc/group:ro \\\n -v `pwd`/data:/data \\\n -v `pwd`/log:/log \\\n -v `pwd`/license:/license \\\n -v `pwd`/tmp:/tmp \\\n h2oai/dai-ubi8-x86_64:|tag|\nNative Installs\nThe config.toml file is available in the etc/dai folder after the RPM,\nDEB, or TAR SH is installed.", + "prompt_type": "plain" + }, + { + "output": "Google BigQuery Setup\nDriverless AI lets you explore Google BigQuery (GBQ) data sources from\nwithin the Driverless AI application. This page provides instructions\nfor configuring Driverless AI to work with GBQ. Note\nThe setup described on this page requires you to enable authentication. Enabling the GCS and/or GBQ connectors causes those file systems to be\ndisplayed in the UI, but the GCS and GBQ connectors cannot be used\nwithout first enabling authentication. Before enabling the GBQ data connector with authentication, the\nfollowing steps must be performed:\n1. In the Google Cloud Platform (GCP), create a private key for your\n service account. To create a private key, click Service Accounts >\n Keys, and then click the Add Key button. When the Create private key\n dialog appears, select JSON as the key type. To finish creating the\n JSON private key and download it to your local file system, click\n Create. 2. Mount the downloaded JSON file to the Docker instance. 3.", + "prompt_type": "plain" + }, + { + "output": "Note\nDepending on your Docker install version, use either the\ndocker run --runtime=nvidia (>= Docker 19.03) or nvidia-docker (< Docker\n19.03) command when starting the Driverless AI Docker image. Use\ndocker version to check which version of Docker you are using. The following sections describe how to enable the GBQ data connector:\n- gbq-config-toml\n- gbq-environment-variable\n- gbq-workload-identity\nEnabling GBQ with the config.toml file\nDocker Image Installs\nThis example enables the GBQ data connector with authentication by\npassing the JSON authentication file. This assumes that the JSON file\ncontains Google BigQuery authentications. nvidia-docker run \\\n --pid=host \\\n --rm \\\n --shm-size=256m \\\n -e DRIVERLESS_AI_ENABLED_FILE_SYSTEMS=\"file,gbq\" \\\n -e DRIVERLESS_AI_GCS_PATH_TO_SERVICE_ACCOUNT_JSON=\"/service_account_json.json\" \\\n -u `id -u`:`id -g` \\\n -p 12345:12345 \\\n -v `pwd`/data:/data \\\n -v `pwd`/log:/log \\\n -v `pwd`/license:/license \\\n -v `pwd`/tmp:/tmp \\\n -v `pwd`/service_account_json.json:/service_account_json.json \\\n h2oai/dai-ubi8-x86_64:|tag|\nDocker Image with the config.toml\nThis example shows how to configure the GBQ data connector options in\nthe config.toml file, and then specify that file when starting\nDriverless AI in Docker.", + "prompt_type": "plain" + }, + { + "output": "GOOGLE_APPLICATION_CREDENTIALSenvironment variable as follows: :: export GOOGLE_APPLICATION_CREDENTIALS=\"SERVICE_ACCOUNT_KEY_PATH\" In the preceding example, replaceSERVICE_ACCOUNT_KEY_PATHwith the path of the JSON file that contains your service account key. The following is an example of how this might look: :: export GOOGLE_APPLICATION_CREDENTIALS=\"/etc/dai/service-account.json\" To see how to set this environment variable with Docker, refer to the following example: .. code:: bash nvidia-docker run \\ --pid=host \\ --rm \\ --shm-size=256m \\ -e DRIVERLESS_AI_ENABLED_FILE_SYSTEMS=\"file,gbq\" \\ -e GOOGLE_APPLICATION_CREDENTIALS=\"/service_account.json\" \\ -u `id -u`:`id -g` \\ -p 12345:12345 \\ -v `pwd`/data:/data \\ -v `pwd`/log:/log \\ -v `pwd`/license:/license \\ -v `pwd`/tmp:/tmp \\ -v `pwd`/service_account_json.json:/service_account_json.json \\ h2oai/dai-ubi8-x86_64:|tag| For more information on setting theGOOGLE_APPLICATION_CREDENTIALSenvironment variable, refer to the `official documentation on setting the environment variable `__.", + "prompt_type": "plain" + }, + { + "output": "For information on how to enable Workload Identity, refer to the `official documentation on enabling Workload Identity on a GKE cluster `__. .. note:: If Workload Identity is enabled, then theGOOGLE_APPLICATION_CREDENTIALSenvironment variable does not need to be set. Adding Datasets Using GBQ ------------------------- After Google BigQuery is enabled, you can add datasets by selecting **Google Big Query** from the **Add Dataset (or Drag and Drop)** drop-down menu. .. note:: To run a BigQuery query with Driverless AI, the associated service account must have the following Identity and Access Management (IAM) permissions: :: bigquery.jobs.create bigquery.tables.create bigquery.tables.delete bigquery.tables.export bigquery.tables.get bigquery.tables.getData bigquery.tables.list bigquery.tables.update bigquery.tables.updateData storage.buckets.get storage.objects.create storage.objects.delete storage.objects.list storage.objects.update For a list of all Identity and Access Management permissions, refer to the `IAM permissions reference `__ from the official Google Cloud documentation.", + "prompt_type": "plain" + }, + { + "output": "**Enter BQ Dataset ID with write access to create temporary table**: Enter a dataset ID in Google BigQuery that this user has read/write access to. BigQuery uses this dataset as the location for the new table generated by the query. .. **Note**: Driverless AI's connection to GBQ will inherit the top-level directory from the service JSON file. So if a dataset named \"my-dataset\" is in a top-level directory named \"dai-gbq\", then the value for the dataset ID input field would be \"my-dataset\" and not \"dai-gbq:my-dataset\". 2. **Enter Google Storage destination bucket**: Specify the name of Google Cloud Storage destination bucket. Note that the user must have write access to this bucket. 3. **Enter Name for Dataset to be saved as**: Specify a name for the dataset, for example,my_file. 4. **Enter BigQuery Query (Use StandardSQL)**: Enter a StandardSQL query that you want BigQuery to execute. For example:SELECT * FROM .. 5. (Optional) Specify a project to use with the GBQ connector.", + "prompt_type": "plain" + }, + { + "output": "Driverless AI Experiment Setup Wizard\nThe Driverless AI Experiment Setup Wizard makes it simple for you to set\nup a Driverless AI experiment and ensure that the experiment's settings\nare optimally configured for your specific use case. The Experiment\nSetup Wizard helps you learn about your data and lets you provide\ninformation about your use case that is used to determine the\nexperiment's settings. This Wizard covers topics such as data leakage,\nNLP handling, validation method, model reproducibility, and model\ndeployment. Notes:\n- This feature is currently in an experimental state. - A Dataset Join Wizard that makes it simple for you to join two\n datasets together is also available in Driverless AI. For more\n information, see join_dataset_wizard. The following sections describe how to access and use the Driverless AI\nWizard. - wizard-accessing\n- wizard-using\nAccessing the Driverless AI Wizard\nChoose one of the following methods to access the Driverless AI Wizard:\n- On the Datasets page, click the name of the dataset you want to use\n for the experiment and select Predict Wizard from the list of\n options.", + "prompt_type": "plain" + }, + { + "output": "If this method is used, then the Driverless AI Wizard\n prompts you to select a dataset to use for the experiment. []\nDriverless AI Wizard sample walkthrough\nThe following example walks through the Driverless AI Wizard. Note that\nthis walkthrough does not contain every possible step that the wizard\noffers. 1. Select the option that best describes your role and specify how many\n years of experience you have with machine learning and data science. In this example, the options Data Scientist and <1 year are\n selected. Click Continue to proceed. 2. Select a dataset. Select a tabular dataset with training data. Each\n row in the dataset must contain predictor variables (features) that\n can be used to predict the target column. In this example, the Rain\n in Australia dataset is selected. 3. Select a problem type and target column. Specify a problem type and\n a target column for that problem type. Note that you can select a\n target column for only one of the available problem types.", + "prompt_type": "plain" + }, + { + "output": "Click Continue to proceed. 4. Target column analysis. The Driverless AI Wizard provides\n information about the selected target column and prompts you to\n confirm that the target column looks as expected. Click Yes to\n proceed, or click No to return to the previous page and select a\n different column. 5. Exclude columns. The Driverless AI Wizard prompts you to check for\n columns to drop from the experiment. Dropped columns are not used as\n predictors for the target column. If you already know which\n column(s) you want to drop, then you can click the Yes, I want to\n have a look button to select the column(s) you want to drop. If you\n don't want to proceed without dropping any columns, click the No,\n don't drop any columns button. 6. Model deployment. The Driverless AI Wizard prompts you to specify\n how you plan to use the model. In this example, the I'm not ready\n for production option is selected. 7. Importance of time order. If your dataset contains at least one date\n or datetime column that doesn't contain missing values, the\n Driverless AI Wizard prompts you to specify how important time order\n is to the experiment.", + "prompt_type": "plain" + }, + { + "output": "8. Provide a test set. Specify a test set to use for the experiment. You can select an existing test set, create a test set from the\n training data, or skip this step entirely. To refresh the list of\n available datasets, click the Refresh dataset list button. In this\n example, the Create test set from training data option is selected. 9. Split the training data. Use the slider to specify what fraction of\n the training dataset you want to use for testing. The Driverless AI\n Wizard automatically suggests a percentage based on the size of your\n training dataset. In this example, 15 percent of the training\n dataset is used for testing. Click Split my training data to\n proceed. 10. Confirm the train / test split. The Driverless AI Wizard lists the\n following information for both the training and testing data based\n on the percentage specified in the preceding step:\n - The size of each dataset. - The number of rows and columns in each dataset. - Whether either dataset has any temporal order.", + "prompt_type": "plain" + }, + { + "output": "Select a model type. Specify a model type based on settings for\n Accuracy, Time, and Interpretability, as well as training time and\n deployment size. You can also optionally specify whether you have\n strict runtime limits or if you want to limit the complexity of the\n model. In this example, the Keep it simple option is selected. Click\n Continue to proceed. 12. Select a scorer. Specify a scorer to optimize. In this example, Area\n under ROC Curve (AUC) is selected. Click Continue to proceed. 13. Experiment parameters. The Driverless AI Wizard lists all of the\n experiment parameters that have been configured up until this point. From this page, you can specify a name for the experiment and begin\n training, show additional details about the experiment (Python code\n and Expert Settings), or cancel the experiment and restart from the\n beginning of the wizard. In this example, Start Training is\n selected. 14. The experiment now appears on the Experiments page in Driverless AI.", + "prompt_type": "plain" + }, + { + "output": "Dataset Join Wizard\nThe Driverless AI Dataset Join Wizard makes it simple for you to join\ntwo datasets together. This wizard performs a left (outer) join. Note\nthat the join key column name(s) must match between both datasets. To\nrename columns, or to prepare datasets more generally, go to Dataset\nDetails and select Modify by Recipe -> Live Code, or use data recipes. If a model is trained on the resulting dataset, make sure to also\nperform the same join on testing or production data. To access the Dataset Join Wizard, navigate to the Datasets page and\nclick on the name of the dataset you want to join with another dataset. A list of dataset-specific options is displayed. Select Join Wizard to\nopen the wizard. []\nWhen using the Join Datasets wizard, you can either specify a dataset to\njoin, or first specify the join key column(s) to use. Notes:\n- This feature is currently in an experimental state. - An Experiment Setup Wizard that makes it simple for you to set up an\n experiment is also available in Driverless AI.", + "prompt_type": "plain" + }, + { + "output": "Client Certificate Authentication Example\nThis section describes how to configure client certificate\nauthentication in Driverless AI. Client Certificate and SSL Configuration Options\nThe following options can be specified when configuring client\ncertificate authentication. SSL Configuration Options\nMutual TLS authentication (mTLS) must be enabled in order to enable\nClient Certificate Authentication. Use the following configuration\noptions to configure mTLS. Refer to the mTLS Authentication topic for\nmore information on how to enable mTLS. - ssl_client_verify_mode: Sets the client verification mode. Choose\n from the following verification modes:\n- ssl_ca_file: Specifies the path to the certification authority (CA)\n certificate file. This certificate will be used to verify the client\n certificate when client authentication is enabled. If this is not\n specified, clients are verified using the default system\n certificates. - ssl_client_key_file: Required if\n ssl_client_verify_mode = \"CERT_REQUIRED\".", + "prompt_type": "plain" + }, + { + "output": "- ssl_client_crt_file: Required if\n ssl_client_verify_mode = \"CERT_REQUIRED\". Specifies the HTTPS\n settings path to the client certificate that Driverless AI will use\n to authenticate itself. Client Certificate Options\n- auth_tls_crl_file: The path to the certificate revocation list (CRL)\n file that is used to verify the client certificate. - auth_tls_user_lookup: Specifies how a user's identity is obtained. Choose from the following:\n - REGEXP_ONLY: Uses auth_tls_subject_field and\n auth_tls_field_parse_regexp to extract the username from the\n client certificate. - LDAP_LOOKUP: Uses the LDAP server to obtain the username. (Refer to the ldap_authentication section for information\n about additional LDAP Authentication configuration options.) Used with LDAP_LOOKUP:\n- auth_tls_ldap_server: Specifies the LDAP server hostname or IP\n address. - auth_tls_ldap_port: Specifies the LDAP server port number. This is\n 389 by default.", + "prompt_type": "plain" + }, + { + "output": "- auth_tls_ldap_tls_file: Specifies the path to the SSL certificate. - auth_tls_ldap_bind_dn: Specifies the complete DN of the LDAP bind\n user. - auth_tls_ldap_bind_password: Specifies the password for the LDAP\n bind. - auth_tls_subject_field: The subject field that is used as a source\n for a username or other values that provide further validation. - auth_tls_field_parse_regexp: The regular expression that is used to\n parse the subject field in order to obtain the username or other\n values that provide further validation. - auth_tls_ldap_search_base: Specifies the location in the Directory\n Information Tree (DIT) where the search will start. - auth_tls_ldap_search_filter: Specifies an LDAP search filter that is\n used to find a specific user with LDAP_LOOKUP when using the\n tls_certificate authentication method. This can be dynamically built\n by using the named capturing groups from auth_tls_field_parse_regexp\n for substitution:\n auth_tls_field_parse_regexp = \"\\w+ (?P\\d+)\"\n auth_tls_ldap_search_filter = \"(&(objectClass=person)(id={{id}}))\n- auth_tls_ldap_username_attribute: Specifies the LDAP record\n attribute that is used as a username.", + "prompt_type": "plain" + }, + { + "output": "auth_tls_ldap_authorization_lookup_filteroption to determine whether individual users are members of thechemistsgroup in an LDAP schema where group (organizational unit) membership is defined within group entries. :: # Specify to use email as username auth_tls_ldap_username_attribute = \"mail\" # Specify search string auth_tls_ldap_search_filter = \"(&(objectClass=inetOrgPerson)(uid={{username}}))\" # Specify the base DN to start the search from auth_tls_ldap_authorization_search_base=\"dc=example,dc=com\" # Filter the results of the search to determine which users are members of a specific group auth_tls_ldap_authorization_lookup_filter = \"(&(objectClass=groupOfUniqueNames)(uniqueMember=uid={{uid}},dc=example,dc=com)(ou=chemists))\" Enabling Client Certificate Authentication ------------------------------------------ .. container:: tabs .. group-tab:: Docker Image Installs To enable Client Certificate authentication in Docker images, specify the authentication environment variable that you want to use.", + "prompt_type": "plain" + }, + { + "output": "The following example enables Client Certification authentication and usesLDAP_LOOKUPfor the TLS user lookup method. .. code:: bash nvidia-docker run \\ --pid=host \\ --rm \\ --shm-size=256m \\ -p 12345:12345 \\ -u `id -u`:`id -g` \\ -e DRIVERLESS_AI_ENABLED_FILE_SYSTEMS=\"file,s3,hdfs\" \\ -e DRIVERLESS_AI_ENABLE_HTTPS=\"true\" \\ -e DRIVERLESS_AI_SSL_KEY_FILE=\"/etc/pki/dai-server.key\" \\ -e DRIVERLESS_AI_SSL_CRT_FILE=\"/etc/pki/dai-server.crt\" \\ -e DRIVERLESS_AI_SSL_CA_FILE=\"/etc/pki/ca.crt\" \\ -e DRIVERLESS_AI_SSL_CLIENT_VERIFY_MODE=\"CERT_REQUIRED\" \\ -e DRIVERLESS_AI_SSL_CLIENT_KEY_FILE=\"/etc/pki/dai-self.key\" \\ -e DRIVERLESS_AI_SSL_CLIENT_CRT_FILE=\"/etc/pki/dai-self.cert\" \\ -e DRIVERLESS_AI_AUTHENTICATION_METHOD=\"tls_certificate\" \\ -e DRIVERLESS_AI_AUTH_TLS_SUBJECT_FIELD=\"CN\" \\ -e DRIVERLESS_AI_AUTH_TLS_CRL_FILE=\"/etc/pki/crl.pem\" \\ -e DRIVERLESS_AI_AUTH_TLS_FIELD_PARS_REGEXP=\"(?P.", + "prompt_type": "plain" + }, + { + "output": "Using a Custom Scorer\nDriverless AI supports a number of scorers, including:\n- Regression: GINI, MAE, MAPE, MER, MSE, R2, RMSE (default), RMSLE,\n RMSPE, SMAPE, TOPDECILE\n- Classification: ACCURACY, AUC (default), AUCPR, F05, F1, F2, GINI,\n LOGLOSS, MACROAUC, MCC\nThis example shows how you can include a custom scorer in your\nexperiment. This example will use the Explained Variance scorer, which\nis used for regression experiments. 1. Start an experiment in Driverless AI by selecting your training\n dataset along with (optionally) validation and testing datasets and\n then specifying a (regression) Target Column. 2. The scorer defaults to RMSE. Click on Expert Settings. 3. Specify the custom scorer recipe using one of the following methods:\n4. In the Experiment Summary page, select the new Explained Variance\n (EXPVAR) scorer. (Note: If you do not see the EXPVAR option, return\n to the Expert Settings, select Recipes > Include Specific Scorers,\n then click the Enable Custom button in the top right corner.", + "prompt_type": "plain" + }, + { + "output": "Linux RPMs\nFor Linux machines that will not use the Docker image or DEB, an RPM\ninstallation is available for the following environments:\n- x86_64 RHEL 7 / RHEL 8\n- CentOS 7 / CentOS 8\nThe installation steps assume that you have a license key for Driverless\nAI. For information on how to obtain a license key for Driverless AI,\nvisit https://www.h2o.ai/products/h2o-driverless-ai/. Once obtained, you\nwill be prompted to paste the license key into the Driverless AI UI when\nyou first log in, or you can save it as a .sig file and place it in the\nlicense folder that you will create during the installation process. Note\n- To ensure that AutoDoc pipeline visualizations are generated\ncorrectly on native installations, installing fontconfig is recommended. - When using systemd, remove the dai-minio, dai-h2o, dai-redis,\n dai-procsy, and dai-vis-server services. When upgrading, you can use\n the following commands to deactivate these services:\n systemctl stop dai-minio\n systemctl disable dai-minio\n systemctl stop dai-h2o\n systemctl disable dai-h2o\n systemctl stop dai-redis\n systemctl disable dai-redis\n systemctl stop dai-procsy\n systemctl disable dai-procsy\n systemctl stop dai-vis-server\n systemctl disable dai-vis-server\nEnvironment\n -----------------------------------\n Operating System Min Mem\n ------------------------- ---------\n RHEL with GPUs 64 GB\n RHEL with CPUs 64 GB\n CentOS with GPUS 64 GB\n CentOS with CPUs 64 GB\n -----------------------------------\nRequirements\n- RedHat 7/RedHat 8/CentOS 7/CentOS 8\n- NVIDIA drivers >= recommended (GPU only).", + "prompt_type": "plain" + }, + { + "output": "About the Install\n- The 'dai' service user is created locally (in /etc/passwd) if it is\n not found by 'getent passwd'. You can override the user by providing\n the DAI_USER environment variable during rpm or dpkg installation. - The 'dai' service group is created locally (in /etc/group) if it is\n not found by 'getent group'. You can override the group by providing\n the DAI_GROUP environment variable during rpm or dpkg installation. - Configuration files are placed in /etc/dai and owned by the 'root'\n user:\n - /etc/dai/config.toml: Driverless AI config file (See config_file\n section for details). - /etc/dai/User.conf: systemd config file specifying the service\n user. - /etc/dai/Group.conf: systemd config file specifying the service\n group. - /etc/dai/EnvironmentFile.conf: systemd config file specifying\n (optional) environment variable overrides. - Software files are placed in /opt/h2oai/dai and owned by the 'root'\n user\n- The following directories are owned by the service user so that they\n can be updated by the running software:\n - /opt/h2oai/dai/home: The application's home directory (license\n key files are stored here).", + "prompt_type": "plain" + }, + { + "output": "- /opt/h2oai/dai/log: Log files go here if you are not using\n systemd (if you are using systemd, then the use the standard\n journalctl tool). - By default, for Docker or DEB/RPM installs, Driverless AI looks for\n a license key in /opt/h2oai/dai/home/.driverlessai/license.sig. If\n you are installing Driverless AI programmatically, you can copy a\n license key file to that location. For TAR SH installs, the\n equivalent location is /home/.driverlessai, and after\n the license is imported, it is copied under ~/.driverlessai. If no\n license key is found, the application guides you through the process\n of adding one through the UI. - systemd unit files are placed in /usr/lib/systemd/system. - Symbolic links to the configuration files in /etc/dai files are\n placed in /etc/systemd/system. If your environment is running an operational systemd, that is the\npreferred way to manage Driverless AI. The package installs the\nfollowing systemd services and a wrapper service:\n- dai: Wrapper service that starts/stops the other three services.", + "prompt_type": "plain" + }, + { + "output": "- dai-h2o: H2O-3 helper process used by Driverless AI. - dai-procsy: Procsy helper process used by Driverless AI. - dai-vis-server: Visualization server helper process used by\n Driverless AI. If you don't have systemd, refer to linux-tarsh for install\ninstructions. Installing OpenCL\nOpenCL is required for full LightGBM support on GPU-powered systems. To\ninstall OpenCL, run the following as root:\n mkdir -p /etc/OpenCL/vendors && echo \"libnvidia-opencl.so.1\" > /etc/OpenCL/vendors/nvidia.icd && chmod a+r /etc/OpenCL/vendors/nvidia.icd && chmod a+x /etc/OpenCL/vendors/ && chmod a+x /etc/OpenCL\nNote\nIf OpenCL is not installed, then CUDA LightGBM is automatically used. CUDA LightGBM is only supported on Pascal-powered (and later) systems,\nand can be enabled manually with the enable_lightgbm_cuda_support\nconfig.toml setting. Installing Driverless AI\nRun the following commands to install the Driverless AI RPM. # Install Driverless AI. sudo rpm -i |VERSION-rpm-lin|\nNote: For RHEL 7.5, it is necessary to upgrade library glib2:\n sudo yum upgrade glib2\nBy default, the Driverless AI processes are owned by the 'dai' user and\n'dai' group.", + "prompt_type": "plain" + }, + { + "output": "Replace and as appropriate. # Temporarily specify service user and group when installing Driverless AI. # rpm saves these for systemd in the /etc/dai/User.conf and /etc/dai/Group.conf files. sudo DAI_USER=myuser DAI_GROUP=mygroup rpm -i |VERSION-rpm-lin|\nYou may now optionally make changes to /etc/dai/config.toml. Starting Driverless AI\nIf you have systemd (preferred):\n # Start Driverless AI. sudo systemctl start dai\nIf you do not have systemd:\n # Start Driverless AI. sudo -H -u dai /opt/h2oai/dai/run-dai.sh\nStarting NVIDIA Persistence Mode\nIf you have NVIDIA GPUs, you must run the following NVIDIA command. This\ncommand needs to be run every reboot. For more information:\nhttp://docs.nvidia.com/deploy/driver-persistence/index.html. sudo nvidia-smi -pm 1\nLooking at Driverless AI log files\nIf you have systemd (preferred):\n sudo systemctl status dai-dai\n sudo journalctl -u dai-dai\nIf you do not have systemd:\n sudo less /opt/h2oai/dai/log/dai.log\n sudo less /opt/h2oai/dai/log/h2o.log\n sudo less /opt/h2oai/dai/log/procsy.log\n sudo less /opt/h2oai/dai/log/vis-server.log\nStopping Driverless AI\nIf you have systemd (preferred):\n # Stop Driverless AI.", + "prompt_type": "plain" + }, + { + "output": "Verify. sudo ps -u dai\nIf you do not have systemd:\n # Stop Driverless AI. sudo pkill -U dai\n # The processes should now be stopped. Verify. sudo ps -u dai\nUpgrading Driverless AI\nWARNINGS:\n- This release deprecates experiments and MLI models from 1.7.0 and\n earlier. - Experiments, MLIs, and MOJOs reside in the Driverless AI tmp\n directory and are not automatically upgraded when Driverless AI is\n upgraded. We recommend you take the following steps before\n upgrading. - Build MLI models before upgrading. - Build MOJO pipelines before upgrading. - Stop Driverless AI and make a backup of your Driverless AI tmp\n directory before upgrading. The upgrade process inherits the service user and group from\n/etc/dai/User.conf and /etc/dai/Group.conf. You do not need to manually\nspecify the DAI_USER or DAI_GROUP environment variables during an\nupgrade. Requirements\nWe recommend to have NVIDIA driver >= installed (GPU only) in your host\nenvironment for a seamless experience on all architectures, including\nAmpere.", + "prompt_type": "plain" + }, + { + "output": "Go to NVIDIA download driver to get the latest NVIDIA Tesla A/T/V/P/K\nseries drivers. For reference on CUDA Toolkit and Minimum Required\nDriver Versions and CUDA Toolkit and Corresponding Driver Versions, see\nhere . Note\nIf you are using K80 GPUs, the minimum required NVIDIA driver version is\n450.80.02. Upgrade Steps\nIf you have systemd (preferred):\n # Stop Driverless AI. sudo systemctl stop dai\n # The processes should now be stopped. Verify. sudo ps -u dai\n # Make a backup of /opt/h2oai/dai/tmp directory at this time. # Upgrade and restart. sudo rpm -U |VERSION-rpm-lin|\n sudo systemctl daemon-reload\n sudo systemctl start dai\nIf you do not have systemd:\n # Stop Driverless AI. sudo pkill -U dai\n # The processes should now be stopped. Verify. sudo ps -u dai\n # Make a backup of /opt/h2oai/dai/tmp directory at this time. # Upgrade and restart. sudo rpm -U |VERSION-rpm-lin|\n sudo -H -u dai /opt/h2oai/dai/run-dai.sh\nUninstalling Driverless AI\nIf you have systemd (preferred):\n # Stop Driverless AI.", + "prompt_type": "plain" + }, + { + "output": "Imbalanced modeling in Driverless AI\nThis page describes Driverless AI's imbalanced modeling capabilities. - imbalanced_modeling_overview\n- imbalanced_algorithms_enabling\nOverview\nDriverless AI offers imbalanced algorithms for use cases where there is\na binary, imbalanced target. These algorithms are enabled by default if\nthe target column is considered imbalanced. While they are enabled,\nDriverless AI may decide to not use them in the final model to avoid\npoor performance. Note\nWhile Driverless AI does try imbalanced algorithms by default, they have\nnot generally been found to improve model performance. Note that using\nimbalanced algorithms also results in a significantly larger final\nmodel, because multiple models are combined with different balancing\nratios. Imbalanced algorithms\nDriverless AI provides two types of imbalanced algorithms:\nImbalancedXGBoost and ImbalancedLightGBM. These imbalanced algorithms\ntrain an XGBoost or LightGBM model multiple times on different samples\nof data and then combine the predictions of these models together.", + "prompt_type": "plain" + }, + { + "output": "(By trying multiple ratios, DAI is more likely to come up with a\nrobust model.) Note\n- When your experiment is complete, you can find more details about what\nbagging was performed in the experiment AutoDoc . For a sample\nAutoDoc, view the blog post on this topic. - For more information on imbalanced modeling sampling methods, see\n imbalanced-sampling. Enabling imbalanced algorithms\nThe following steps describe how to enable only imbalanced algorithms:\n1. On the Experiment Setup page, click Expert Settings. 2. In the Expert Settings window, click on the Training -> Models\n subtab. 3. For the Include specific models setting, click the Select Values\n button. 4. On the Selected Included Models page, click Uncheck All, and then\n select only the imbalanced algorithms: ImbalancedXGBoost and\n ImbalancedLightGBM. Click Done to confirm your selection. 5. In the Expert Settings window, click the Save button. Additional tips\nThis section describes additional tips you can make use of when enabling\nimbalanced algorithms.", + "prompt_type": "plain" + }, + { + "output": "Custom Individual Recipe\nThe following sections describe Driverless AI's Individual Recipe\nfeature. - individual-recipe-understanding\n- individual-recipe-getting\n- individual-recipe-using\n- individual-recipe-including\n- individual-recipe-example\nUnderstanding the Individual Recipe\nIn Driverless AI, every completed experiment automatically generates\nPython code for the experiment that corresponds to the individual(s)\nused to build the final model. You can edit this auto-generated Python\ncode offline and upload it as a recipe, or edit and save it using the\nbuilt-in custom recipe management editor . This feature\ngives you code-first access to a significant portion of DAI's internal\ntransformer and model generation process. The Individual Recipe contains information about model type, model\nhyperparameters, data science types for input features, transformers\nused, and transformer parameters. It is an object that is evolved by\nmutation within the context of DAI's genetic algorithm .", + "prompt_type": "plain" + }, + { + "output": "This feature is supported for experiments made using DAI 1.7.2 and\nlater. Using custom individuals\nA custom individual can be run as is, evolved alongside other models or\nindividuals, or frozen to be included as is during the final evolution\nstage alongside other models from the experiment. - As is: To ensemble the custom individuals as they are, set\n enable_genetic_algorithm to off. Note\n that to get reproducible results, set reproducibility to on and make\n sure that the same accuracy knob settings are selected (as accuracy\n settings affects the internal cross validation fold data\n assignment). - Evolve alongside other models or individuals: This is the default\n behavior where a custom individual behaves like a standard internal\n DAI individual, which has its features and model hyperparameters\n mutated during the genetic algorithm process as per the\n experiment settings. - Frozen individuals: By default, a custom individual behaves like a\n standard internal DAI individual, which has its features and model\n hyperparameters mutated during evolution.", + "prompt_type": "plain" + }, + { + "output": "You can specify the number of such\n individuals to be included in an ensemble along with any other, by\n modifying the\n Ensemble Level for Final Modeling Pipeline \n expert setting. Getting the Individual Recipe from experiments\nIn Driverless AI, every experiment automatically generates editable\npython code for the best individuals (or models). The following sections\ndescribe how to get the Individual Recipe code for a completed\nexperiment. - From a completed experiment: From a completed experiment page, click\n Tune Experiment > Create Individual Recipe, then select Upload as\n Custom Recipe. When this option is selected, the Individual Recipe\n becomes available on the Recipes page and in the Expert Settings\n under the Include specific individuals setting. You can also select\n Download to download the Individual Recipe Python file directly to\n your local file system. You can then add the downloaded Individual\n Recipe to DAI by clicking Recipes in the main navigation, then\n clicking Add Custom Recipes > From Computer.", + "prompt_type": "plain" + }, + { + "output": "- From the Downloaded Summary: The Individual Recipe Python file is\n included as part of the summary file for every completed experiment. To download the summary file, click the Download Summary & Logs\n button of any completed experiment. The individual recipe filename\n is final_indiv0.py. Using the Individual Recipe\nThis section describes how you can use the Individual Recipe to view\ndetailed information about how the final model was built and make\nfine-tuned adjustments to the model by editing the auto-generated Python\ncode and using the edited Individual Recipe in a new experiment. - individual-recipe-transparency\n- individual-recipe-model-control\n- individual-recipe-feature-control\nModel Transparency\nThe following functions in the Individual Recipe provide significant\ntransparency for the final model:\n- The set_model function lets you view various details about the final\n model such as model type and the model's parameters. - The set_genes function lets you view each feature that is in the\n model and information about how each feature was transformed.", + "prompt_type": "plain" + }, + { + "output": "You can make minor modifications to these\nparameters by editing the self.model_params dictionary. This can be\nhelpful if you want to see whether minor changes to the parameters\nresult in more robust or accurate models or if you are required to\nchange the model parameters for business or regulatory purposes. Feature Control\nEach feature used in the model is listed in the set_genes function,\nbeginning with features that were not engineered and followed by\nengineered features. The following examples show original and\ntransformed features as they appear in the auto-generated Python code. Original features\nThe following example provides details on an original feature called\nHumidity3pm. Note\nOriginal features are labeled with the value OriginalTransformer in the\nadd_transformer() field. # Gene Normalized Importance: 1\n # Transformed Feature Names and Importances: {'3_Humidity3pm': 1.0}\n # Valid parameters: ['num_cols', 'random_state', 'output_features_to_drop', 'labels']\n params = {'num_cols': ['Humidity3pm'], 'random_state': 997149340}\n self.add_transformer('OriginalTransformer', col_type='numeric', gene_index=3, forced=False, mono=False, **params)\nEngineered features\nIn the following example, the Cross Validation Target Encoding\ntransformer was applied to the WindDir3pm column.", + "prompt_type": "plain" + }, + { + "output": "The following sections describe how to perform these actions\nusing the Individual Recipe. Adding features\nDuring the experiment, Driverless AI uses a Genetic Algorithm to\ndetermine which features to drop from the model. However, your use case\nmay require you to force a column to be used by the model. The following\nsteps describe how to force in a numeric column that was dropped by\nDriverless AI:\n1. Copy an OriginalTransformer feature that is already in the code and\n paste it below. 2. Specify the column you want to force in with the num_cols field. In\n the example below, Driverless AI dropped YearsSinceLastPromotion, so\n an OriginalTransformer example that was already present was copied\n and the value for num_cols was edited. 3. To ensure that the model uses the feature, set forced=True. 4. Change the gene_index to a value that is not used . The following is an example of how the final code appears:\n params = {'num_cols': ['YearsSinceLastPromotion'], 'random_state': 730763716}\n self.add_transformer('OriginalTransformer', col_type='numeric', gene_index=100, forced=True, mono=False, **params)\nDeleting features\nThe Experiment Setup page contains a dropped_columns setting that lets\nyou drop columns from an experiment so that they are not used by any\nmodel.", + "prompt_type": "plain" + }, + { + "output": "In this scenario, you can delete the unwanted feature from the\nIndividual Recipe code. Modifying features\nDriverless AI automatically creates engineered features that have a list\nof editable parameters that are specific to the transformer. Because\nthese are internal parameters, contacting support@h2o.ai is recommended\nwhen modifying these parameters. The following are two common use cases for modifying specific features\nin the Individual Recipe code:\n- Forcing features into a model: To force in a specific feature and\n ensure that it is not pruned, set forced=True. - Enforcing monotonicity: To enforce monotonicity for a specific\n feature, set mono=True. Using the edited Individual Recipe in a new experiment\nThe following steps describe how to use an edited Individual Recipe in a\nnew experiment from the built-in\ncustom recipe management editor . 1. On the Custom Recipes page, click the Individual Recipe you want to\n edit. 2. Use the built-in recipe editor to make changes to the Individual\n Recipe.", + "prompt_type": "plain" + }, + { + "output": "Click Save as New Recipe and Activate. 4. Click More Actions > Use in New Experiment. Including specific individuals in an experiment\nThe downloaded individual recipe (zip or Python file) can be directly\nuploaded from the computer via the expert settings when creating a new\nexperiment. You can also perform the following steps to include an Individual Recipe\nthat has already been uploaded by using the\nInclude specific individuals expert setting. 1. On the Experiment Setup page, click Expert Settings. The Expert\n Settings window is displayed. 2. Click the Recipes tab, then click Select Values for the Include\n specific individuals expert setting. 3. Select the custom individuals you want to include in the experiment,\n then click Done. 4. In the Expert Settings window, click Save. The experiment preview\n updates to reflect the inclusion of the selected custom individuals. Individual Recipe Example\nThis section contains a list of minimum required parameters for a custom\nIndividual Recipe, as well as an example of a custom Individual Recipe\nusing the Credit Card dataset.", + "prompt_type": "plain" + }, + { + "output": "Minimum required parameters\nThe following is a list of the minimum required parameters for a custom\nIndividual Recipe:\n- Model type: Specify the model type. For example:\n- Model parameters: Specify the parameters of the model. For example:\n- Genome: Specify all valid parameters for genes. For example:\nSample Individual Recipe\nThe following is an example of a custom Individual Recipe using the\nCredit Card dataset. Note\nThe following example does not contain all available parameters for\ncustom Individual Recipes. For an example Individual Recipe that\nfeatures all available parameters, see creditcard.py from the official\nDriverless AI recipes GitHub repository. from h2oaicore.ga import CustomIndividual\n # Custom wrapper class used to construct the DAI Individual. # Contains information related to model type, model parameters,\n # feature types, and feature parameters. class IndivCCsimple(CustomIndividual):\n # Function to set the model type and its parameters.", + "prompt_type": "plain" + }, + { + "output": "Security\n\nsecurity config-security", + "prompt_type": "plain" + }, + { + "output": "PAM Authentication Example\nThe following sections describe how to enable Pluggable Authentication\nModules (PAM) in Driverless AI. You can do this by specifying\nenvironment variables in the Docker image or by updating the config.toml\nfile. Note: This assumes that the user has an understanding of how to grant\npermissions in their own environment in order for PAM to work. Specifically for Driverless AI, be sure that the Driverless AI processes\nowner has access to /etc/shadow (without root); otherwise authentication\nwill fail. Docker Image Installs\nNote: The following instructions are only applicable with a CentOS 7\nhost. In this example, the host Linux system has PAM enabled for\nauthentication and Docker running on that Linux system. The goal is to\nenable PAM for Driverless AI authentication while the Linux system hosts\nthe user information. 1. Verify that the username (\"eric\" in this case) is defined in the\n Linux system. 2. Start Docker on the Linux Server and enable PAM in Driverless AI.", + "prompt_type": "plain" + }, + { + "output": "Obtain the Driverless AI container ID. This ID is required for the\n next step and will be different every time Driverless AI is started. 4. From the Linux Server, verify that the Docker Driverless AI instance\n can see the shadow file. The example below references 8e333475ffd8,\n which is the container ID obtained in the previous step. 5. Open a Web browser and navigate to port 12345 on the Linux system\n that is running the Driverless AI Docker Image. Log in with\n credentials known to the Linux system. The login information will\n now be validated using PAM. Native Installs\nIn this example, the host Linux system has PAM enabled for\nauthentication. The goal is to enable PAM for Driverless AI\nauthentication while the Linux system hosts the user information. This example shows how to edit the config.toml file to enable PAM. The\nconfig.toml file is available in the etc/dai folder after the RPM or DEB\nis installed. Edit the authentication_method variable in this file to\nenable PAM authentication, and then restart Driverless AI.", + "prompt_type": "plain" + }, + { + "output": "time_series_recipe``\nTime-Series Lag-Based Recipe\nThis recipe specifies whether to include Time Series lag features when\ntraining a model with a provided (or autodetected) time column. This is\nenabled by default. Lag features are the primary automatically generated\ntime series features and represent a variable's past values. At a given\nsample with time stamp t, features at some time difference T (lag) in\nthe past are considered. For example, if the sales today are 300, and\nsales of yesterday are 250, then the lag of one day for sales is 250. Lags can be created on any feature as well as on the target. Lagging\nvariables are important in time series because knowing what happened in\ndifferent time periods in the past can greatly facilitate predictions\nfor the future. Note: Ensembling is disabled when the lag-based recipe\nwith time columns is activated because it only supports a single final\nmodel. Ensembling is also disabled if a time column is selected or if\ntime column is set to [Auto] on the experiment setup screen.", + "prompt_type": "plain" + }, + { + "output": "time_series_leaderboard_mode-------------------------------- .. container:: dropdown **Control the automatic time-series leaderboard mode** Select from the following options: - 'diverse': explore a diverse set of models built using various expert settings. Note that it's possible to rerun another such diverse leaderboard on top of the best-performing model(s), which will effectively help you compose these expert settings. - 'sliding_window': If the forecast horizon is N periods, create a separate model for \"each of the (gap, horizon) pairs of (0,n), (n,n), (2*n,n), ..., (2*N-1, n) in units of time periods. The number of periods to predict per model n is controlled by the expert settingtime_series_leaderboard_periods_per_model``, which defaults\n\n to 1. This can help to improve short-term forecasting quality.", + "prompt_type": "plain" + }, + { + "output": "time_series_leaderboard_periods_per_model--------------------------------------------- .. container:: dropdown **Number of periods per model if time_series_leaderboard_mode is 'sliding_window'** Specify the number of periods per model iftime_series_leaderboard_modeis set tosliding_window``. Larger\n\n values lead to fewer models.", + "prompt_type": "plain" + }, + { + "output": "time_series_merge_splits``\n\nLarger Validation Splits for Lag-Based Recipe\n\nSpecify whether to create larger validation splits that are not bound to\nthe length of the forecast horizon. This can help to prevent overfitting\non small data or short forecast horizons. This is enabled by default.", + "prompt_type": "plain" + }, + { + "output": "merge_splits_max_valid_ratio``\n\nMaximum Ratio of Training Data Samples Used for Validation\n\nSpecify the maximum ratio of training data samples used for validation\nacross splits when larger validation splits are created (see\ntime_series_merge_splits setting). The default value (-1) will set the\nratio automatically depending on the total amount of validation splits.", + "prompt_type": "plain" + }, + { + "output": "fixed_size_splits``\n\nFixed-Size Train Timespan Across Splits\n\nSpecify whether to keep a fixed-size train timespan across time-based\nsplits during internal validation. That leads to roughly the same amount\nof train samples in every split. This is disabled by default.", + "prompt_type": "plain" + }, + { + "output": "time_series_validation_fold_split_datetime_boundaries``\n\nCustom Validation Splits for Time-Series Experiments\n\nSpecify date or datetime timestamps (in the same format as the time\ncolumn) to use for custom training and validation splits.", + "prompt_type": "plain" + }, + { + "output": "timeseries_split_suggestion_timeout``\n\nTimeout in Seconds for Time-Series Properties Detection in UI\n\nSpecify the timeout in seconds for time-series properties detection in\nDriverless AI's user interface. This value defaults to 30.", + "prompt_type": "plain" + }, + { + "output": "holiday_features``\n\nGenerate Holiday Features\n\nFor time-series experiments, specify whether to generate holiday\nfeatures for the experiment. This is enabled by default.", + "prompt_type": "plain" + }, + { + "output": "holiday_countries``\n\nCountry code(s) for holiday features\n\nSpecify country codes in the form of a list that is used to look up\nholidays.\n\nNote: This setting is for migration purposes only.", + "prompt_type": "plain" + }, + { + "output": "override_lag_sizes``\n\nTime-Series Lags Override\n\nSpecify the override lags to be used. The lag values provided here are\nthe only set of lags to be explored in the experiment. The following\nexamples show the variety of different methods that can be used to\nspecify override lags:\n\n- \"[0]\" disable lags\n- \"[7, 14, 21]\" specifies this exact list\n- \"21\" specifies every value from 1 to 21\n- \"21:3\" specifies every value from 1 to 21 in steps of 3\n- \"5-21\" specifies every value from 5 to 21\n- \"5-21:3\" specifies every value from 5 to 21 in steps of 3", + "prompt_type": "plain" + }, + { + "output": "override_ufapt_lag_sizes``\n\nLags Override for Features That are not Known Ahead of Time\n\nSpecify lags override for non-target features that are not known ahead\nof time.\n\n- \"[0]\" disable lags\n- \"[7, 14, 21]\" specifies this exact list\n- \"21\" specifies every value from 1 to 21\n- \"21:3\" specifies every value from 1 to 21 in steps of 3\n- \"5-21\" specifies every value from 5 to 21\n- \"5-21:3\" specifies every value from 5 to 21 in steps of 3", + "prompt_type": "plain" + }, + { + "output": "override_non_ufapt_lag_sizes``\n\nLags Override for Features That are Known Ahead of Time\n\nSpecify lags override for non-target features that are known ahead of\ntime.\n\n- \"[0]\" disable lags\n- \"[7, 14, 21]\" specifies this exact list\n- \"21\" specifies every value from 1 to 21\n- \"21:3\" specifies every value from 1 to 21 in steps of 3\n- \"5-21\" specifies every value from 5 to 21\n- \"5-21:3\" specifies every value from 5 to 21 in steps of 3", + "prompt_type": "plain" + }, + { + "output": "min_lag_size``\n\nSmallest Considered Lag Size\n\nSpecify a minimum considered lag size. This value defaults to -1.", + "prompt_type": "plain" + }, + { + "output": "allow_time_column_as_feature``\n\nEnable Feature Engineering from Time Column\n\nSpecify whether to enable feature engineering based on the selected time\ncolumn, e.g. Date~weekday. This is enabled by default.", + "prompt_type": "plain" + }, + { + "output": "allow_time_column_as_numeric_feature``\n\nAllow Integer Time Column as Numeric Feature\n\nSpecify whether to enable feature engineering from an integer time\ncolumn. Note that if you are using a time series recipe, using a time\ncolumn (numeric time stamps) as an input feature can lead to a model\nthat memorizes the actual timestamps instead of features that generalize\nto the future. This is disabled by default.", + "prompt_type": "plain" + }, + { + "output": "datetime_funcs------------------ .. container:: dropdown **Allowed Date and Date-Time Transformations** Specify the date or date-time transformations to allow Driverless AI to use. Choose from the following transformers: - year - quarter - month - week - weekday - day - dayofyear - num (direct numeric value representing the floating point value of time, disabled by default) - hour - minute - second Features in Driverless AI will appear asgetfollowed by the name of the transformation. Note thatget_num`` can lead to\n\n overfitting if used on IID problems and is disabled by default.", + "prompt_type": "plain" + }, + { + "output": "filter_datetime_funcs``\n\nAuto Filtering of Date and Date-Time Transformations\n\nWhether to automatically filter out date and date-time transformations\nthat would lead to unseen values in the future. This is enabled by\ndefault.", + "prompt_type": "plain" + }, + { + "output": "allow_tgc_as_features``\n\nConsider Time Groups Columns as Standalone Features\n\nSpecify whether to consider time groups columns as standalone features.\nThis is disabled by default.", + "prompt_type": "plain" + }, + { + "output": "allowed_coltypes_for_tgc_as_features``\n\nWhich TGC Feature Types to Consider as Standalone Features\n\nSpecify whether to consider time groups columns (TGC) as standalone\nfeatures. If \"Consider time groups columns as standalone features\" is\nenabled, then specify which TGC feature types to consider as standalone\nfeatures. Available types are numeric, categorical, ohe_categorical,\ndatetime, date, and text. All types are selected by default. Note that\n\"time_column\" is treated separately via the \"Enable Feature Engineering\nfrom Time Column\" option. Also note that if \"Time Series Lag-Based\nRecipe\" is disabled, then all time group columns are allowed features.", + "prompt_type": "plain" + }, + { + "output": "enable_time_unaware_transformers``\n\nEnable Time Unaware Transformers\n\nSpecify whether various transformers (clustering, truncated SVD) are\nenabled, which otherwise would be disabled for time series experiments\ndue to the potential to overfit by leaking across time within the fit of\neach fold. This is set to Auto by default.", + "prompt_type": "plain" + }, + { + "output": "tgc_only_use_all_groups``\n\nAlways Group by All Time Groups Columns for Creating Lag Features\n\nSpecify whether to group by all time groups columns for creating lag\nfeatures, instead of sampling from them. This is enabled by default.", + "prompt_type": "plain" + }, + { + "output": "tgc_allow_target_encoding----------------------------- .. container:: dropdown **Allow Target Encoding of Time Groups Columns** Specify whether it is allowed to target encode the time groups columns. This is disabled by default. **Notes**: - This setting is not affected byallow_tgc_as_features. - Subgroups can be encoded by disablingtgc_only_use_all_groups``.", + "prompt_type": "plain" + }, + { + "output": "time_series_holdout_preds``\n\nGenerate Time-Series Holdout Predictions\n\nSpecify whether to create diagnostic holdout predictions on training\ndata using moving windows. This is enabled by default. This can be\nuseful for MLI, but it will slow down the experiment considerably when\nenabled. Note that the model itself remains unchanged when this setting\nis enabled.", + "prompt_type": "plain" + }, + { + "output": "time_series_validation_splits``\n\nNumber of Time-Based Splits for Internal Model Validation\n\nSpecify a fixed number of time-based splits for internal model\nvalidation. Note that the actual number of allowed splits can be less\nthan the specified value, and that the number of allowed splits is\ndetermined at the time an experiment is run. This value defaults to -1\n(auto).", + "prompt_type": "plain" + }, + { + "output": "time_series_splits_max_overlap``\n\nMaximum Overlap Between Two Time-Based Splits\n\nSpecify the maximum overlap between two time-based splits. The amount of\npossible splits increases with higher values. This value defaults to\n0.5.", + "prompt_type": "plain" + }, + { + "output": "time_series_max_holdout_splits---------------------------------- .. container:: dropdown **Maximum Number of Splits Used for Creating Final Time-Series Model's Holdout Predictions** Specify the maximum number of splits used for creating the final time-series Model's holdout predictions. The default value (-1) will use the same number of splits that are used during model validation. Usetime_series_validation_splits`` to control amount of time-based\n\n splits used for model validation.", + "prompt_type": "plain" + }, + { + "output": "mli_ts_fast_approx``\n\nWhether to Speed up Calculation of Time-Series Holdout Predictions\n\nSpecify whether to speed up time-series holdout predictions for\nback-testing on training data. This setting is used for MLI and\ncalculating metrics. Note that predictions can be slightly less accurate\nwhen this setting is enabled. This is disabled by default.", + "prompt_type": "plain" + }, + { + "output": "mli_ts_fast_approx_contribs``\n\nWhether to Speed up Calculation of Shapley Values for Time-Series\nHoldout Predictions\n\nSpecify whether to speed up Shapley values for time-series holdout\npredictions for back-testing on training data. This setting is used for\nMLI. Note that predictions can be slightly less accurate when this\nsetting is enabled. This is enabled by default.", + "prompt_type": "plain" + }, + { + "output": "mli_ts_holdout_contribs``\n\nGenerate Shapley Values for Time-Series Holdout Predictions at the Time\nof Experiment\n\nSpecify whether to enable the creation of Shapley values for holdout\npredictions on training data using moving windows at the time of the\nexperiment. This can be useful for MLI, but it can slow down the\nexperiment when enabled. If this setting is disabled, MLI will generate\nShapley values on demand. This is enabled by default.", + "prompt_type": "plain" + }, + { + "output": "time_series_min_interpretability``\n\nLower Limit on Interpretability Setting for Time-Series Experiments\n(Implicitly Enforced)\n\nSpecify the lower limit on interpretability setting for time-series\nexperiments. Values of 5 (default) or more can improve generalization by\nmore aggressively dropping the least important features. To disable this\nsetting, set this value to 1.", + "prompt_type": "plain" + }, + { + "output": "lags_dropout``\n\nDropout Mode for Lag Features\n\nSpecify the dropout mode for lag features in order to achieve an equal\nn.a. ratio between train and validation/tests. Independent mode performs\na simple feature-wise dropout. Dependent mode takes the lag-size\ndependencies per sample/row into account. Dependent is enabled by\ndefault.", + "prompt_type": "plain" + }, + { + "output": "prob_lag_non_targets``\n\nProbability to Create Non-Target Lag Features\n\nLags can be created on any feature as well as on the target. Specify a\nprobability value for creating non-target lag features. This value\ndefaults to 0.1.", + "prompt_type": "plain" + }, + { + "output": "rolling_test_method``\n\nMethod to Create Rolling Test Set Predictions\n\nSpecify the method used to create rolling test set predictions. Choose\nbetween test time augmentation (TTA) and a successive refitting of the\nfinal pipeline (Refit). TTA is enabled by default.\n\nNotes:\n\n- This setting only applies to the test set that is provided by the\n user during an experiment.\n- This setting only has an effect if the provided test set spans more\n periods than the forecast horizon and if the target values of the\n test set are known.", + "prompt_type": "plain" + }, + { + "output": "fast_tta_internal``\n\nFast TTA for Internal Validation\n\nSpecify whether the genetic algorithm applies Test Time Augmentation\n(TTA) in one pass instead of using rolling windows for validation splits\nlonger than the forecast horizon. This is enabled by default.", + "prompt_type": "plain" + }, + { + "output": "prob_default_lags``\n\nProbability for New Time-Series Transformers to Use Default Lags\n\nSpecify the probability for new lags or the EWMA gene to use default\nlags. This is determined independently of the data by frequency, gap,\nand horizon. This value defaults to 0.2.", + "prompt_type": "plain" + }, + { + "output": "prob_lagsinteraction``\n\nProbability of Exploring Interaction-Based Lag Transformers\n\nSpecify the unnormalized probability of choosing other lag time-series\ntransformers based on interactions. This value defaults to 0.2.", + "prompt_type": "plain" + }, + { + "output": "prob_lagsaggregates``\n\nProbability of Exploring Aggregation-Based Lag Transformers\n\nSpecify the unnormalized probability of choosing other lag time-series\ntransformers based on aggregations. This value defaults to 0.2.", + "prompt_type": "plain" + }, + { + "output": "ts_target_trafo``\nTime Series Centering or Detrending Transformation\nSpecify whether to use centering or detrending transformation for time\nseries experiments. Select from the following:\n- None (Default)\n- Centering (Fast)\n- Centering (Robust)\n- Linear (Fast)\n- Linear (Robust)\n- Logistic\n- Epidemic (Uses the SEIRD model)\nThe fitted signal is removed from the target signal per individual time\nseries once the free parameters of the selected model are fitted. Linear\nor Logistic will remove the fitted linear or logistic trend, Centering\nwill only remove the mean of the target signal and Epidemic will remove\nthe signal specified by a Susceptible-Infected-Exposed-Recovered-Dead\n(SEIRD) epidemic model. Predictions are made by adding the previously\nremoved signal once the pipeline is fitted on the residuals. Notes:\n- MOJO support is currently disabled when this setting is enabled. - The Fast centering and linear detrending options use least squares\n fitting. - The Robust centering and linear detrending options use random sample\n consensus (RANSAC) to achieve higher tolerance w.r.t.", + "prompt_type": "plain" + }, + { + "output": "ts_target_trafo_epidemic_params_dict---------------------------------------- .. container:: dropdown **Custom Bounds for SEIRD Epidemic Model Parameters** Specify the custom bounds for controlling `Susceptible-Infected-Exposed-Recovered-Dead `__ (SEIRD) epidemic model parameters for detrending of the target for each time series group. The target column must correspond to *I(t)*, which represents infection cases as a function of time. For each training split and time series group, the SEIRD model is fit to the target signal by optimizing a set of free parameters for each time series group. The model's value is then subtracted from the training response, and the residuals are passed to the feature engineering and modeling pipeline. For predictions, the SEIRD model's value is added to the residual predictions from the pipeline for each time series group. The following is a list of free parameters: - **N**: Total population, *N = S+E+I+R+D* - **beta**: Rate of exposure (*S* -> *E*) - **gamma**: Rate of recovering (*I* -> *R*) - **delta**: Incubation period - **alpha**: Fatality rate - **rho**: Rate at which individuals expire - **lockdown**: Day of lockdown (-1 => no lockdown) - **beta_decay**: Beta decay due to lockdown - **beta_decay_rate**: Speed of beta decay Provide upper or lower bounds for each parameter you want to control.", + "prompt_type": "plain" + }, + { + "output": "ts_target_trafo_epidemic_target``\n\nWhich SEIRD Model Component the Target Column Corresponds To\n\nSpecify a SEIRD model component for the target column to correspond to.\nSelect from the following:\n\n- I (Default): Infected\n- R: Recovered\n- D: Deceased", + "prompt_type": "plain" + }, + { + "output": "ts_lag_target_trafo----------------------- .. container:: dropdown **Time Series Lag-Based Target Transformation** Specify whether to use either the difference between or ratio of the current target and a lagged target. Select from **None** (default), **Difference**, and **Ratio**. **Notes**: - MOJO support is currently disabled when this setting is enabled. - The corresponding lag size is specified with thets_target_trafo_lag_size``\nexpert setting.", + "prompt_type": "plain" + }, + { + "output": "ts_target_trafo_lag_size---------------------------- .. container:: dropdown **Lag Size Used for Time Series Target Transformation** Specify the lag size used for time series target transformation. Specify this setting when using thets_lag_target_trafo`` setting.\n\n This value defaults to -1.\n\n Note: The lag size should not be smaller than the sum of forecast\n horizon and gap.", + "prompt_type": "plain" + }, + { + "output": "UI Language\nThe Driverless AI UI is available in English (default), Japanese,\nChinese (Simplified), and Korean. This section describes how you can use\nthe app_language config setting/environment variable to change the\nlanguage of the UI before starting Driverless AI. When using app_language, the following options can be specified:\n- en: English (default)\n- ja: Japanese\n- cn: Chinese (Simplified)\n- ko: Korean\nExamples\nThe following examples show how to change the app language from English\nto Japanese. Docker Image Installs\nTo change the application language in Docker images, specify the\nAPP_LANGUAGE environment variable. Note that this variable must be\nprepended with DRIVERLESS_AI_. Replace nvidia-docker with docker in the\nexample below if necessary. nvidia-docker run \\\n --pid=host \\\n --init \\\n --rm \\\n --shm-size=256m \\\n -p 12345:12345 \\\n -u `id -u`:`id -g` \\\n -e DRIVERLESS_AI_ENABLED_FILE_SYSTEMS=\"file,s3,hdfs\" \\\n -e DRIVERLESS_AI_APP_LANGUAGE=\"ja\" \\\n -v `pwd`/data:/data \\\n -v `pwd`/log:/log \\\n -v `pwd`/license:/license \\\n -v `pwd`/tmp:/tmp \\\n h2oai/dai-ubi8-x86_64:|tag|\nDocker Image with the config.toml\nThis example shows how to configure Minio options in the config.toml\nfile, and then specify that file when starting Driverless AI in Docker.", + "prompt_type": "plain" + }, + { + "output": "Configure the Driverless AI config.toml file. Set the following\n configuration option. - app_language=\"ja\"\n2. Mount the config.toml file into the Docker container. Replace\n nvidia-docker with docker if necessary. nvidia-docker run \\\n --pid=host \\\n --init \\\n --rm \\\n --shm-size=256m \\\n --add-host name.node:172.16.2.186 \\\n -e DRIVERLESS_AI_CONFIG_FILE=/path/in/docker/config.toml \\\n -p 12345:12345 \\\n -v /local/path/to/config.toml:/path/in/docker/config.toml \\\n -v /etc/passwd:/etc/passwd:ro \\\n -v /etc/group:/etc/group:ro \\\n -v /tmp/dtmp/:/tmp \\\n -v /tmp/dlog/:/log \\\n -v /tmp/dlicense/:/license \\\n -v /tmp/ddata/:/data \\\n -u $(id -u):$(id -g) \\\n h2oai/dai-ubi8-x86_64:|tag|\nNative Installs\nNative installs include DEBs, RPMs, and TAR SH installs. The example\nbelow shows how to use the app_language configuration option in the\nconfig.toml file to change the language to Japanese. 1. Export the Driverless AI config.toml file or add it to ~/.bashrc.", + "prompt_type": "plain" + }, + { + "output": "R Client Tutorial\nThis tutorial describes how to use the Driverless AI R client package to\nuse and control the Driverless AI platform. It covers the main\npredictive data-science workflow, including:\n1. Data load\n2. Automated feature engineering and model tuning\n3. Model inspection\n4. Predicting on new data\n5. Managing the datasets and models\nNote: These steps assume that you have entered your license key in the\nDriverless AI UI. Loading the Data\nBefore we can start working with the Driverless.ai platform (DAI), we\nhave to import the package and initialize the connection:\n library(dai)\n dai.connect(uri = 'http://localhost:12345', username = 'h2oai', password = 'h2oai')\n creditcard <- dai.create_dataset('/data/smalldata/kaggle/CreditCard/creditcard_train_cat.csv')\n #> \n | \n | | 0%\n | \n |================ | 24%\n | \n |=================================================================| 100%\nThe function dai.create_dataset() loads the data located at the machine\nthat hosts DAI.", + "prompt_type": "plain" + }, + { + "output": "dai.upload_dataset()`` instead.\n\nIf you already have the data loaded into R data.frame, you can convert\nit into a DAIFrame. For example:\n\n iris.dai <- as.DAIFrame(iris)\n #> \n | \n | | 0%\n | \n |=================================================================| 100%\n\n print(iris.dai)\n #> DAI frame '7c38cb84-5baa-11e9-a50b-b938de969cdb': 150 obs. of 5 variables\n #> File path: ./tmp/7c38cb84-5baa-11e9-a50b-b938de969cdb/iris9e1f15d2df00.csv.1554912339.9424415.bin\n\nYou can switch off the progress bar whenever it is displayed by setting", + "prompt_type": "plain" + }, + { + "output": "head, andformat. .. code:: r dim(creditcard) #> [1] 23999 25 head(creditcard, 10) #> ID LIMIT_BAL SEX EDUCATION MARRIAGE AGE PAY_1 PAY_2 PAY_3 PAY_4 #> 1 1 20000 female university married 24 2 2 -1 -1 #> 2 2 120000 female university single 26 -1 2 0 0 #> 3 3 90000 female university single 34 0 0 0 0 #> 4 4 50000 female university married 37 0 0 0 0 #> 5 5 50000 male university married 57 -1 0 -1 0 #> 6 6 50000 male graduate single 37 0 0 0 0 #> 7 7 500000 male graduate single 29 0 0 0 0 #> 8 8 100000 female university single 23 0 -1 -1 0 #> 9 9 140000 female highschool married 28 0 0 2 0 #> 10 10 20000 male highschool single 35 -2 -2 -2 -2 #> PAY_5 PAY_6 BILL_AMT1 BILL_AMT2 BILL_AMT3 BILL_AMT4 BILL_AMT5 BILL_AMT6 #> 1 -2 -2 3913 3102 689 0 0 0 #> 2 0 2 2682 1725 2682 3272 3455 3261 #> 3 0 0 29239 14027 13559 14331 14948 15549 #> 4 0 0 46990 48233 49291 28314 28959 29547 #> 5 0 0 8617 5670 35835 20940 19146 19131 #> 6 0 0 64400 57069 57608 19394 19619 20024 #> 7 0 0 367965 412023 445007 542653 483003 473944 #> 8 0 -1 11876 380 601 221 -159 567 #> 9 0 0 11285 14096 12108 12211 11793 3719 #> 10 -1 -1 0 0 0 0 13007 13912 #> PAY_AMT1 PAY_AMT2 PAY_AMT3 PAY_AMT4 PAY_AMT5 PAY_AMT6 #> 1 0 689 0 0 0 0 #> 2 0 1000 1000 1000 0 2000 #> 3 1518 1500 1000 1000 1000 5000 #> 4 2000 2019 1200 1100 1069 1000 #> 5 2000 36681 10000 9000 689 679 #> 6 2500 1815 657 1000 1000 800 #> 7 55000 40000 38000 20239 13750 13770 #> 8 380 601 0 581 1687 1542 #> 9 3329 0 432 1000 1000 1000 #> 10 0 0 0 13007 1122 0 #> DEFAULT_PAYMENT_NEXT_MONTH #> 1 TRUE #> 2 TRUE #> 3 FALSE #> 4 FALSE #> 5 FALSE #> 6 FALSE #> 7 FALSE #> 8 FALSE #> 9 FALSE #> 10 FALSE You cannot, however, useDAIFrameto access all its data, nor can you use it to modify the data.", + "prompt_type": "plain" + }, + { + "output": "The head function gives access only to example data: .. code:: r creditcard$example_data[1:10, ] #> ID LIMIT_BAL SEX EDUCATION MARRIAGE AGE PAY_1 PAY_2 PAY_3 PAY_4 #> 1 1 20000 female university married 24 2 2 -1 -1 #> 2 2 120000 female university single 26 -1 2 0 0 #> 3 3 90000 female university single 34 0 0 0 0 #> 4 4 50000 female university married 37 0 0 0 0 #> 5 5 50000 male university married 57 -1 0 -1 0 #> 6 6 50000 male graduate single 37 0 0 0 0 #> 7 7 500000 male graduate single 29 0 0 0 0 #> 8 8 100000 female university single 23 0 -1 -1 0 #> 9 9 140000 female highschool married 28 0 0 2 0 #> 10 10 20000 male highschool single 35 -2 -2 -2 -2 #> PAY_5 PAY_6 BILL_AMT1 BILL_AMT2 BILL_AMT3 BILL_AMT4 BILL_AMT5 BILL_AMT6 #> 1 -2 -2 3913 3102 689 0 0 0 #> 2 0 2 2682 1725 2682 3272 3455 3261 #> 3 0 0 29239 14027 13559 14331 14948 15549 #> 4 0 0 46990 48233 49291 28314 28959 29547 #> 5 0 0 8617 5670 35835 20940 19146 19131 #> 6 0 0 64400 57069 57608 19394 19619 20024 #> 7 0 0 367965 412023 445007 542653 483003 473944 #> 8 0 -1 11876 380 601 221 -159 567 #> 9 0 0 11285 14096 12108 12211 11793 3719 #> 10 -1 -1 0 0 0 0 13007 13912 #> PAY_AMT1 PAY_AMT2 PAY_AMT3 PAY_AMT4 PAY_AMT5 PAY_AMT6 #> 1 0 689 0 0 0 0 #> 2 0 1000 1000 1000 0 2000 #> 3 1518 1500 1000 1000 1000 5000 #> 4 2000 2019 1200 1100 1069 1000 #> 5 2000 36681 10000 9000 689 679 #> 6 2500 1815 657 1000 1000 800 #> 7 55000 40000 38000 20239 13750 13770 #> 8 380 601 0 581 1687 1542 #> 9 3329 0 432 1000 1000 1000 #> 10 0 0 0 13007 1122 0 #> DEFAULT_PAYMENT_NEXT_MONTH #> 1 TRUE #> 2 TRUE #> 3 FALSE #> 4 FALSE #> 5 FALSE #> 6 FALSE #> 7 FALSE #> 8 FALSE #> 9 FALSE #> 10 FALSE A dataset can be split into e.g.", + "prompt_type": "plain" + }, + { + "output": ".. code:: r creditcard.splits$train #> DAI frame '7cf3024c-5baa-11e9-a50b-b938de969cdb': 19199 obs. of 25 variables #> File path: ./tmp/7cf3024c-5baa-11e9-a50b-b938de969cdb/train.1554912341.0864356.bin creditcard.splits$test #> DAI frame '7cf613a6-5baa-11e9-a50b-b938de969cdb': 4800 obs. of 25 variables #> File path: ./tmp/7cf613a6-5baa-11e9-a50b-b938de969cdb/test.1554912341.0966916.bin By default it yields a random sample, but you can do stratified or time-based splits as well. See the function\u2019s documentation for more details. Automated Feature Engineering and Model Tuning ---------------------------------------------- One of the main strengths of Driverless AI is the fully automated feature engineering along with hyperparameter tuning, model selection and ensembling. The functiondai.train()executes the experiment that results in a DAIModel instance that represents the model. .. code:: r model <- dai.train(training_frame = creditcard.splits$train, testing_frame = creditcard.splits$test, target_col = 'DEFAULT_PAYMENT_NEXT_MONTH', is_classification = T, is_timeseries = F, accuracy = 1, time = 1, interpretability = 10, seed = 25) #> | | | 0% | |========================== | 40% | |=============================================== | 73% | |=========================================================== | 91% | |=================================================================| 100% If you do not specify the accuracy, time, or interpretability, they will be suggested by the DAI platform.", + "prompt_type": "plain" + }, + { + "output": "summary, andpredictwork with DAIModel: .. code:: r print(model) #> Status: Complete #> Experiment: 7e2b70ae-5baa-11e9-a50b-b938de969cdb, 2019-04-10 18:06, 1.7.0+local_0c7d019-dirty #> Settings: 1/1/10, seed=25, GPUs enabled #> Train data: train (19199, 25) #> Validation data: N/A #> Test data: test (4800, 24) #> Target column: DEFAULT_PAYMENT_NEXT_MONTH (binary, 22.366% target class) #> System specs: Linux, 126 GB, 40 CPU cores, 2/2 GPUs #> Max memory usage: 0.406 GB, 0.167 GB GPU #> Recipe: AutoDL (2 iterations, 2 individuals) #> Validation scheme: stratified, 1 internal holdout #> Feature engineering: 33 features scored (18 selected) #> Timing: #> Data preparation: 4.94 secs #> Model and feature tuning: 10.13 secs (3 models trained) #> Feature evolution: 5.54 secs (1 of 3 model trained) #> Final pipeline training: 7.85 secs (1 model trained) #> Python / MOJO scorer building: 42.05 secs / 0.00 secs #> Validation score: AUC = 0.77802 +/- 0.0077539 (baseline) #> Validation score: AUC = 0.77802 +/- 0.0077539 (final pipeline) #> Test score: AUC = 0.7861 +/- 0.0064711 (final pipeline) summary(model)$score #> [1] 0.7780229 Predicting on New Data ---------------------- New data can be scored in two different ways: - Callpredict()directly on the model in R session.", + "prompt_type": "plain" + }, + { + "output": "Predicting in R ~~~~~~~~~~~~~~~ Genericpredict()either directly returns an R data.frame with the results (by default) or it returns a URL pointing to a CSV file with the results (return_df=FALSE). The latter option may be useful when you predict on a large dataset. .. code:: r predictions <- predict(model, newdata = creditcard.splits$test) #> | | | 0% | |=================================================================| 100% #> Loading required package: bitops head(predictions) #> DEFAULT_PAYMENT_NEXT_MONTH.0 DEFAULT_PAYMENT_NEXT_MONTH.1 #> 1 0.8879988 0.11200116 #> 2 0.9289870 0.07101299 #> 3 0.9550328 0.04496716 #> 4 0.3513577 0.64864230 #> 5 0.9183724 0.08162758 #> 6 0.9154425 0.08455751 predict(model, newdata = creditcard.splits$test, return_df = FALSE) #> | | | 0% | |=================================================================| 100% #> [1] \"h2oai_experiment_7e2b70ae-5baa-11e9-a50b-b938de969cdb/7e2b70ae-5baa-11e9-a50b-b938de969cdb_preds_f854b49f.csv\" Downloading Python or MOJO Scoring Pipelines ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ For productizing your model in a Python or Java, you can download full Python or MOJO pipelines, respectively.", + "prompt_type": "plain" + }, + { + "output": ".. code:: r dai.download_mojo(model, path = tempdir(), force = TRUE) #> | | | 0% | |=================================================================| 100% #> Downloading the pipeline: #> [1] \"/tmp/RtmppsLTZ9/mojo-7e2b70ae-5baa-11e9-a50b-b938de969cdb.zip\" dai.download_python_pipeline(model, path = tempdir(), force = TRUE) #> | | | 0% | |=================================================================| 100% #> Downloading the pipeline: #> [1] \"/tmp/RtmppsLTZ9/python-pipeline-7e2b70ae-5baa-11e9-a50b-b938de969cdb.zip\" Managing the Datasets and Models -------------------------------- After some time, you may have multiple datasets and models on your DAI server.", + "prompt_type": "plain" + }, + { + "output": "If you already have the dataset loaded into DAI, you can get the DAIFrame object by eitherdai.get_frame(if you know the frame\u2019s key) ordai.find_dataset(if you know the original path or at least a part of it): .. code:: r dai.get_frame(creditcard$key) #> DAI frame '7abe28b2-5baa-11e9-a50b-b938de969cdb': 23999 obs. of 25 variables #> File path: tests/smalldata/kaggle/CreditCard/creditcard_train_cat.csv dai.find_dataset('creditcard') #> DAI frame '7abe28b2-5baa-11e9-a50b-b938de969cdb': 23999 obs. of 25 variables #> File path: tests/smalldata/kaggle/CreditCard/creditcard_train_cat.csv The latter directly returns you the frame if there\u2019s only one match. Otherwise it let you select which frame to return from all the matching candidates. Furthermore, you can get a list of datasets or models: .. code:: r datasets <- dai.list_datasets() head(datasets) #> key name #> 1 7cf613a6-5baa-11e9-a50b-b938de969cdb test #> 2 7cf3024c-5baa-11e9-a50b-b938de969cdb train #> 3 7c38cb84-5baa-11e9-a50b-b938de969cdb iris9e1f15d2df00.csv #> 4 7abe28b2-5baa-11e9-a50b-b938de969cdb creditcard_train_cat.csv #> file_path #> 1 ./tmp/7cf613a6-5baa-11e9-a50b-b938de969cdb/test.1554912341.0966916.bin #> 2 ./tmp/7cf3024c-5baa-11e9-a50b-b938de969cdb/train.1554912341.0864356.bin #> 3 ./tmp/7c38cb84-5baa-11e9-a50b-b938de969cdb/iris9e1f15d2df00.csv.1554912339.9424415.bin #> 4 tests/smalldata/kaggle/CreditCard/creditcard_train_cat.csv #> file_size data_source row_count column_count import_status import_error #> 1 567584 upload 4800 25 0 #> 2 2265952 upload 19199 25 0 #> 3 7064 upload 150 5 0 #> 4 2832040 file 23999 25 0 #> aggregation_status aggregation_error aggregated_frame mapping_frame #> 1 -1 #> 2 -1 #> 3 -1 #> 4 -1 #> uploaded #> 1 TRUE #> 2 TRUE #> 3 TRUE #> 4 FALSE models <- dai.list_models() head(models) #> key description #> 1 7e2b70ae-5baa-11e9-a50b-b938de969cdb mupulori #> dataset_name parameters.dataset_key #> 1 train.1554912341.0864356.bin 7cf3024c-5baa-11e9-a50b-b938de969cdb #> parameters.resumed_model_key parameters.target_col #> 1 DEFAULT_PAYMENT_NEXT_MONTH #> parameters.weight_col parameters.fold_col parameters.orig_time_col #> 1 #> parameters.time_col parameters.is_classification parameters.cols_to_drop #> 1 [OFF] TRUE NULL #> parameters.validset_key parameters.testset_key #> 1 7cf613a6-5baa-11e9-a50b-b938de969cdb #> parameters.enable_gpus parameters.seed parameters.accuracy #> 1 TRUE 25 1 #> parameters.time parameters.interpretability parameters.scorer #> 1 1 10 AUC #> parameters.time_groups_columns parameters.time_period_in_seconds #> 1 NULL NA #> parameters.num_prediction_periods parameters.num_gap_periods #> 1 NA NA #> parameters.is_timeseries parameters.config_overrides #> 1 FALSE NA #> log_file_path #> 1 h2oai_experiment_7e2b70ae-5baa-11e9-a50b-b938de969cdb/h2oai_experiment_logs_7e2b70ae-5baa-11e9-a50b-b938de969cdb.zip #> pickle_path #> 1 h2oai_experiment_7e2b70ae-5baa-11e9-a50b-b938de969cdb/best_individual.pickle #> summary_path #> 1 h2oai_experiment_7e2b70ae-5baa-11e9-a50b-b938de969cdb/h2oai_experiment_summary_7e2b70ae-5baa-11e9-a50b-b938de969cdb.zip #> train_predictions_path valid_predictions_path #> 1 #> test_predictions_path #> 1 h2oai_experiment_7e2b70ae-5baa-11e9-a50b-b938de969cdb/test_preds.csv #> progress status training_duration scorer score test_score deprecated #> 1 1 0 71.43582 AUC 0.7780229 0.7861 FALSE #> model_file_size diagnostic_keys #> 1 695996094 NULL If you know the key of the dataset or model, you can obtain the instance of DAIFrame or DAIModel bydai.get_modelanddai.get_frame: .. code:: r dai.get_model(models$key[1]) #> Status: Complete #> Experiment: 7e2b70ae-5baa-11e9-a50b-b938de969cdb, 2019-04-10 18:06, 1.7.0+local_0c7d019-dirty #> Settings: 1/1/10, seed=25, GPUs enabled #> Train data: train (19199, 25) #> Validation data: N/A #> Test data: test (4800, 24) #> Target column: DEFAULT_PAYMENT_NEXT_MONTH (binary, 22.366% target class) #> System specs: Linux, 126 GB, 40 CPU cores, 2/2 GPUs #> Max memory usage: 0.406 GB, 0.167 GB GPU #> Recipe: AutoDL (2 iterations, 2 individuals) #> Validation scheme: stratified, 1 internal holdout #> Feature engineering: 33 features scored (18 selected) #> Timing: #> Data preparation: 4.94 secs #> Model and feature tuning: 10.13 secs (3 models trained) #> Feature evolution: 5.54 secs (1 of 3 model trained) #> Final pipeline training: 7.85 secs (1 model trained) #> Python / MOJO scorer building: 42.05 secs / 0.00 secs #> Validation score: AUC = 0.77802 +/- 0.0077539 (baseline) #> Validation score: AUC = 0.77802 +/- 0.0077539 (final pipeline) #> Test score: AUC = 0.7861 +/- 0.0064711 (final pipeline) dai.get_frame(datasets$key[1]) #> DAI frame '7cf613a6-5baa-11e9-a50b-b938de969cdb': 4800 obs.", + "prompt_type": "plain" + }, + { + "output": "creditcard.splits$trainandcreditcard.splits$testobjects will not be removed from R session because they are actually function calls (recall that$``\nis a function).", + "prompt_type": "plain" + }, + { + "output": "Model Scores\nYou can view detailed information about model scores after an experiment\nis complete by clicking on the Scores option. []\nThe Model Scores page that opens includes the following tables:\n- Model and feature tuning leaderboard: This leaderboard shows scoring\n information based on the scorer that was selected in the experiment. This information is also available in the tuning_leaderboard.json\n file of the experiment_summary. You can download that file directly\n from the bottom of this table. - Final pipeline scores across cross-validation folds and models: This\n table shows the final pipeline scores across cross-validation folds\n and models. Note that if Constant Model was enabled (default), then\n that model is added in this table as a baseline (reference) only and\n will be dropped in most cases. This information is also included in\n the ensemble_base_learner_fold_scores.json file of the\n experiment_summary. You can download that file directly from a link\n at the bottom of this table.", + "prompt_type": "plain" + }, + { + "output": "Scoring Pipelines\n\npython-mojo-pipelines scoring_pipeline_visualize\nscoring-pipeline-which-to-use scoring-standalone-python\nscoring-mli-standalone-python scoring-mojo-pipelines", + "prompt_type": "plain" + }, + { + "output": "Driverless AI user settings\n\nYou can configure several user-specific settings from the UI by clicking\nUser -> User Settings. A window is displayed that lets you set user\nsettings for various connectors. You can also use the search box to\nlocate specific user settings. Click the Save button to confirm your\nchanges.\n\nAWS\n\nSpecify the following AWS-related user settings:\n\n- AWS Access Key ID\n- AWS Secret Access Key\n- AWS S3 Bucket name for artifact export\n\nAzure\n\nSpecify the following Azure-related user settings:\n\n- Azure Blob Store account name\n- Azure Blob Store account key\n- Azure Blob Store Connection String\n\nMinIO\n\nSpecify the following MinIO-related user settings:\n\n- MinIO Access Key ID\n- MinIO Secret Access Key", + "prompt_type": "plain" + }, + { + "output": "Driverless AI MOJO Scoring Pipeline - Java Runtime (With Shapley contribution)\nFor completed experiments, Driverless AI automatically converts models\nto MOJOs (Model Objects, Optimized). The MOJO Scoring Pipeline is a\nscoring engine that can be deployed in any Java environment for scoring\nin real time. (For information on the C++ scoring runtime with Python\nand R wrappers, see\nH2O MOJO C++ scoring pipeline .) For info on the\navailable deployment options, see H2O MOJO Deployment . MOJOs are tied to experiments. Experiments and MOJOs are not\nautomatically upgraded when Driverless AI is upgraded. Notes:\n- This scoring pipeline is not currently available for TensorFlow,\n BERT, RuleFit or Image models. TensorFlow/Bert are\n supported by C++ Runtime. - To disable the automatic creation of this scoring pipeline, set the\n Make MOJO Scoring Pipeline expert setting to Off while building an\n experiment. - You can have Driverless AI attempt to reduce the size of the MOJO\n scoring pipeline when the experiment is being built by enabling the\n Reduce MOJO Size expert setting also\n see .", + "prompt_type": "plain" + }, + { + "output": "- Shapley contributions for transformed features and\n original features are currently available for XGBoost (GBM, GLM, RF,\n DART), LightGBM, Zero-Inflated, Imbalanced and DecisionTree models\n (and their ensemble). For ensemble with ExtraTrees meta learner\n (ensemble_meta_learner='extra_trees') models, we suggest to use the\n MLI Python scoring package. Download\nBecause the Java MOJO runtime is backward compatible, we recommend using\nthe latest available version. You can download the latest Java MOJO\nruntime from https://mvnrepository.com/artifact/ai.h2o/mojo2-runtime. A Quick run\nTo get a quick output from the downloaded MOJO scoring pipeline in the\nconsole on the example test set:\n- Make sure Java7 or later is installed. - copy Driverless AI license file (say license.file) to the downloaded\n mojo-pipeline folder\n- cd into the mojo-pipeline folder\n- Score the rows of the example.csv file using the pipeline.mojo file(\n with the mojo2-runtime) created from the experiment to get the\n predictions.", + "prompt_type": "plain" + }, + { + "output": "Bigger test files/MOJOs may require\nmore memory (Xmx) to score. Notes:\n - Presently, Shapley contributions for transformed\n features and original features are available for XGBoost (GBM,\n GLM, RF, DART), LightGBM, Zero-Inflated, Imbalanced and\n DecisionTree models (and their ensemble). For ensemble with\n ExtraTrees meta learner (ensemble_meta_learner='extra_trees')\n models we suggest to use the MLI Python scoring package. - In MOJOs, Shapley values for original features are approximated\n from the accompanying Shapley values for transformed features with\n the Naive Shapley (even split ) method. - The Shapley fast approximation uses only\n one model (from the first fold) with no more than the first 50\n trees. For details see fast_approx_num_trees and\n fast_approx_do_one_fold_one_model\n config.toml settings . Prerequisites\nThe following are required in order to run the MOJO scoring pipeline.", + "prompt_type": "plain" + }, + { + "output": "NOTE: We recommend using Java 11+\n due to a bug in Java. (See\n https://bugs.openjdk.java.net/browse/JDK-8186464.) - Valid Driverless AI license. You can download the license.sig file\n from the machine hosting Driverless AI (usually in the license\n folder). Copy the license file into the downloaded mojo-pipeline\n folder. - mojo2-runtime.jar file. This is available from the top navigation\n menu in the Driverless AI UI and in the downloaded mojo-pipeline.zip\n file for an experiment. License Specification\nDriverless AI requires a license to be specified in order to run the\nMOJO Scoring Pipeline. The license can be specified in one of the\nfollowing ways:\n- Via an environment variable:\n - DRIVERLESS_AI_LICENSE_FILE: Path to the Driverless AI license\n file, or\n - DRIVERLESS_AI_LICENSE_KEY: The Driverless AI license key\n (Base64 encoded string)\n- Via a system property of JVM (-D option):\n - ai.h2o.mojos.runtime.license.file: Path to the Driverless AI\n license file, or\n - ai.h2o.mojos.runtime.license.key: The Driverless AI license\n key (Base64 encoded string)\n- Via an application classpath:\n - The license is loaded from a resource called /license.sig.", + "prompt_type": "plain" + }, + { + "output": "For example:\n # Specify the license via a temporary environment variable\n export DRIVERLESS_AI_LICENSE_FILE=\"path/to/license.sig\"\nMOJO Scoring Pipeline Files\nThe mojo-pipeline folder includes the following files:\n- run_example.sh: An bash script to score a sample test set. - pipeline.mojo: Standalone scoring pipeline in MOJO format. - mojo2-runtime.jar: MOJO Java runtime. - example.csv: Sample test set (synthetic, of the correct format). - DOT files: Text files that can be rendered as graphs that provide a\n visual representation of the MOJO scoring pipeline (can be edited to\n change the appearance and structure of a rendered graph). - PNG files: Image files that provide a visual representation of the\n MOJO scoring pipeline. Quickstart\nBefore running the quickstart examples, be sure that the MOJO scoring\npipeline is already downloaded and unzipped:\n1. On the completed Experiment page, click on the Download MOJO Scoring\n Pipeline button. 2. In the pop-up menu that appears, click on the Download MOJO Scoring\n Pipeline button once again to download the scorer.zip file for this\n experiment onto your local machine.", + "prompt_type": "plain" + }, + { + "output": "Run the following to score all rows in the sample test set with the\n file paths to the test set (example.csv), MOJO pipeline\n (pipeline.mojo) and license (license.sig) stored in environment\n variables TEST_SET_FILE, MOJO_PIPELINE_FILE,\n DRIVERLESS_AI_LICENSE_KEY:\n4. Run the following to score a specific test set (example.csv) with\n MOJO pipeline (pipeline.mojo) and the license file (license.sig):\n5. To run the Java application for data transformation directly:\nMOJO Scoring Command-Line Options\nExecuting the Java Runtime\nThe following are two general examples of how the Java runtime can be\nexecuted from the command-line. - With additional libraries:\n- Without additional libraries:\nSo, for example, the sys.ai.h2o.mojos.parser.csv.separator option can be\npassed with the following:\n java -Dsys.ai.h2o.mojos.parser.csv.separator='|' -Dai.h2o.mojos.runtime.license.file=../license.sig -jar mojo2-runtime.jar pipeline.mojo input.csv output.csv\nSimilarly, the sys.ai.h2o.mojos.exposedInputs option can be passed with:\n java -Xmx5g -Dsys.ai.h2o.mojos.exposedInputs=ALL -Dai.h2o.mojos.runtime.license.file= -cp mojo2-runtime.jar ai.h2o.mojos.ExecuteMojo pipeline.mojo example.csv\nNote: Data can be streamed from stdin to stdout by replacing both the\ninput and output CSV arguments with `-`.", + "prompt_type": "plain" + }, + { + "output": "This value\n defaults to True. - sys.ai.h2o.mojos.parser.csv.stripCrFromLastColumn (boolean)\n -Workaround for issues relating to the OpenCSV parser. This value\n defaults to True. - sys.ai.h2o.mojos.parser.csv.quotedHeaders (boolean) - Specify\n whether to quote header names in the output CSV file. This value\n defaults to False. - sys.ai.h2o.mojos.parser.csv.separator (char) - Specify the separator\n used between CSV fields. The special value `TAB` can be used for\n tab-separated values. This value defaults to `,`. - sys.ai.h2o.mojos.parser.csv.escapeChar (char) - Specify the escape\n character for parsing CSV fields. If this value is not specified,\n then no escaping is attempted. This value defaults to an empty\n string. - sys.ai.h2o.mojos.parser.csv.batch (int) - Specify the number of\n input records brought into memory for batch processing (determines\n consumed memory). This value defaults to 1000. - sys.ai.h2o.mojos.pipelineFormats (string) - When multiple formats\n are recognized, this option specifies the order in which they are\n tried.", + "prompt_type": "plain" + }, + { + "output": "- sys.ai.h2o.mojos.parser.csv.date.formats (string) - Specify a format\n for dates. This value defaults to an empty string. - sys.ai.h2o.mojos.exposedInputs (string) - Specify a comma separated\n list of input cols that are needed on output. The special value\n `ALL` takes all inputs. This defaults to a null value. - sys.ai.h2o.mojos.useWeakHash (boolean) - Specify whether to use\n WeakHashMap. This is set to False by default. Enabling this setting\n may improve MOJO loading times. JVM Options for Access Control\n- ai.h2o.mojos.runtime.license.key - Specify a license key. - ai.h2o.mojos.runtime.license.file - Specify the location of a\n license key. - ai.h2o.mojos.runtime.license.filename - Override the default license\n file name. - ai.h2o.mojos.runtime.signature.filename - Override the default\n signature file name. - ai.h2o.mojos.runtime.watermark.filename - Override the default\n watermark file name. Execute the MOJO from Java\n1. Open a new terminal window, create an experiment folder, and change\n directories to that new folder:\n2.", + "prompt_type": "plain" + }, + { + "output": "Include the following contents. 3. Compile the source code with the files of the MOJO runtime\n (mojo2-runtime.jar) and MOJO pipeline (pipeline.mojo) copied into\n the experiment:\n4. Run the MOJO example with the license (license.sig) copied into the\n experiment:\n5. The following output is displayed:\nUsing the MOJO Scoring Pipeline with Spark/Sparkling Water\nNote: The Driverless AI 1.5 release will be the last release with\nTOML-based MOJO2. Releases after 1.5 will include protobuf-based MOJO2. MOJO scoring pipeline artifacts can be used in Spark to deploy\npredictions in parallel using the Sparkling Water API. This section\nshows how to load and run predictions on the MOJO scoring pipeline in\nSpark using Scala and the Python API. In the event that you upgrade H2O Driverless AI, we have a good news! Sparkling Water is backwards compatible with MOJO versions produced by\nolder Driverless AI versions. Requirements\n- You must have a Spark cluster with the Sparkling Water JAR file\n passed to Spark.", + "prompt_type": "plain" + }, + { + "output": "The H2OContext does not have to be created if you only want to run\npredictions on MOJOs using Spark. This is because the scoring is\nindependent of the H2O run-time. Preparing Your Environment\nIn order use the MOJO scoring pipeline, Driverless AI license has to be\npassed to Spark. This can be achieved via --jars argument of the Spark\nlauncher scripts. Note: In Local Spark mode, use --driver-class-path to specify path to\nthe license file. PySparkling\nFirst, start PySpark with PySparkling Python package and Driverless AI\nlicense. ./bin/pyspark --jars license.sig --py-files pysparkling.zip\nor, you can download official Sparkling Water distribution from H2O\nDownload page. Follow the steps on the Sparkling Water download page. Once you are in the Sparkling Water directory, you can call:\n ./bin/pysparkling --jars license.sig\nAt this point, you should have available a PySpark interactive terminal\nwhere you can try out predictions. If you would like to productionalize\nthe scoring process, you can use the same configuration, except instead\nof using ./bin/pyspark, you would use ./bin/spark-submit to submit your\njob to a cluster.", + "prompt_type": "plain" + }, + { + "output": "# If you want to use old behavior when all output columns were stored inside an array,\n # set it to False. However we strongly encourage users to use True which is defined as a default value. settings = H2OMOJOSettings(namedMojoOutputColumns = True)\n # Load the pipeline. 'settings' is an optional argument. If it's not specified, the default values are used. mojo = H2OMOJOPipelineModel.createFromMojo(\"file:///path/to/the/pipeline.mojo\", settings)\n # Load the data as Spark's Data Frame\n dataFrame = spark.read.csv(\"file:///path/to/the/data.csv\", header=True)\n # Run the predictions. The predictions contain all the original columns plus the predictions\n # added as new columns\n predictions = mojo.transform(dataFrame)\n # You can easily get the predictions for a desired column using the helper function as\n predictions.select(mojo.selectPredictionUDF(\"AGE\")).collect()\nSparkling Water\nFirst, start Spark with Sparkling Water Scala assembly and Driverless AI\nlicense.", + "prompt_type": "plain" + }, + { + "output": "kdb+ Setup\n\nDriverless AI lets you explore kdb+ data sources from within the\nDriverless AI application. This section provides instructions for\nconfiguring Driverless AI to work with kdb+.\n\nNote: Depending on your Docker install version, use either the", + "prompt_type": "plain" + }, + { + "output": "docker run\n--runtime=nvidia(>= Docker 19.03) ornvidia-docker(< Docker 19.03) command when starting the Driverless AI Docker image. Usedocker\nversionto check which version of Docker you are using. Description of Configuration Attributes --------------------------------------- -kdb_user: (Optional) User name -kdb_password: (Optional) User's password -kdb_hostname: IP address or host of the KDB server -kdb_port: Port on which the kdb+ server is listening -kdb_app_jvm_args: (Optional) JVM args for kdb+ distributions (for example,-Dlog4j.configuration). Separate each argument with spaces. -kdb_app_classpath: (Optional) The kdb+ classpath (or other if the jar file is stored elsewhere). -enabled_file_systems: The file systems you want to enable. This must be configured in order for data connectors to function properly. Example 1: Enable kdb+ with No Authentication --------------------------------------------- .. container:: tabs .. group-tab:: Docker Image Installs This example enables the kdb+ connector without authentication.", + "prompt_type": "plain" + }, + { + "output": ".. code:: bash nvidia-docker run \\ --pid=host \\ --init \\ --rm \\ --shm-size=256m \\ --add-host name.node:172.16.2.186 \\ -e DRIVERLESS_AI_ENABLED_FILE_SYSTEMS=\"file,kdb\" \\ -e DRIVERLESS_AI_KDB_HOSTNAME=\"\" \\ -e DRIVERLESS_AI_KDB_PORT=\"\" \\ -p 12345:12345 \\ -v /tmp/dtmp/:/tmp \\ -v /tmp/dlog/:/log \\ -v /tmp/dlicense/:/license \\ -v /tmp/ddata/:/data \\ -u $(id -u):$(id -g) \\ h2oai/dai-ubi8-x86_64:|tag| .. container:: group-tab Docker Image with the config.toml This example shows how to configure kdb+ options in the config.toml file, and then specify that file when starting Driverless AI in Docker. Note that this example enables kdb+ with no authentication. 1. Configure the Driverless AI config.toml file. Set the following configuration options. .. -enabled_file_systems\n= \"file, upload, kdb\"-kdb_hostname =\n\"-kdb_port =\n\"\"2.", + "prompt_type": "plain" + }, + { + "output": ".. .. code:: bash nvidia-docker run \\ --pid=host \\ --init \\ --rm \\ --shm-size=256m \\ --add-host name.node:172.16.2.186 \\ -e DRIVERLESS_AI_CONFIG_FILE=/path/in/docker/config.toml \\ -p 12345:12345 \\ -v /local/path/to/config.toml:/path/in/docker/config.toml \\ -v /etc/passwd:/etc/passwd:ro \\ -v /etc/group:/etc/group:ro \\ -v /tmp/dtmp/:/tmp \\ -v /tmp/dlog/:/log \\ -v /tmp/dlicense/:/license \\ -v /tmp/ddata/:/data \\ -u $(id -u):$(id -g) \\ h2oai/dai-ubi8-x86_64:|tag| .. container:: group-tab Native Installs This example enables the kdb+ connector without authentication. The only required flags are the hostname and the port. 1. Export the Driverless AI config.toml file or add it to ~/.bashrc. For example: .. :: # DEB and RPM export DRIVERLESS_AI_CONFIG_FILE=\"/etc/dai/config.toml\" # TAR SH export DRIVERLESS_AI_CONFIG_FILE=\"/path/to/your/unpacked/dai/directory/config.toml\" 2.", + "prompt_type": "plain" + }, + { + "output": ".. :: # File System Support # upload : standard upload feature # file : local file system/server file system # hdfs : Hadoop file system, remember to configure the HDFS config folder path and keytab below # dtap : Blue Data Tap file system, remember to configure the DTap section below # s3 : Amazon S3, optionally configure secret and access key below # gcs : Google Cloud Storage, remember to configure gcs_path_to_service_account_json below # gbq : Google Big Query, remember to configure gcs_path_to_service_account_json below # minio : Minio Cloud Storage, remember to configure secret and access key below # snow : Snowflake Data Warehouse, remember to configure Snowflake credentials below (account name, username, password) # kdb : KDB+ Time Series Database, remember to configure KDB credentials below (hostname and port, optionally: username, password, classpath, and jvm_args) # azrbs : Azure Blob Storage, remember to configure Azure credentials below (account name, account key) # jdbc: JDBC Connector, remember to configure JDBC below.", + "prompt_type": "plain" + }, + { + "output": "(hive_app_configs) # recipe_url: load custom recipe from URL # recipe_file: load custom recipe from local file system enabled_file_systems = \"file, kdb\" # KDB Connector credentials kdb_hostname = \" kdb_port = \"\" 3. Save the changes when you are done, then stop/restart Driverless AI. Example 2: Enable kdb+ with Authentication ------------------------------------------ .. container:: tabs .. group-tab:: Docker Image Installs This example provides users credentials for accessing a kdb+ server from Driverless AI. .. code:: bash nvidia-docker run \\ --pid=host \\ --init \\ --rm \\ --shm-size=256m \\ -e DRIVERLESS_AI_ENABLED_FILE_SYSTEMS=\"file,kdb\" \\ -e DRIVERLESS_AI_KDB_HOSTNAME=\"\" \\ -e DRIVERLESS_AI_KDB_PORT=\"\" \\ -e DRIVERLESS_AI_KDB_USER=\"\" \\ -e DRIVERLESS_AI_KDB_PASSWORD=\"\" \\ -p 12345:12345 \\ -v /tmp/dtmp/:/tmp \\ -v /tmp/dlog/:/log \\ -v /tmp/dlicense/:/license \\ -v /tmp/ddata/:/data \\ -u $(id -u):$(id -g) \\ h2oai/dai-ubi8-x86_64:|tag| .. container:: group-tab Docker Image with the config.toml This example shows how to configure kdb+ options in the config.toml file, and then specify that file when starting Driverless AI in Docker.", + "prompt_type": "plain" + }, + { + "output": "1. Configure the Driverless AI config.toml file. Set the following configuration options. .. -enabled_file_systems\n= \"file, upload, kdb\"-kdb_user = \"\"-kdb_password =\n\"\"-kdb_hostname = \"-kdb_port =\n\"\"-kdb_app_classpath = \"\"-kdb_app_jvm_args =\n\"\"2. Mount the config.toml file into the Docker container. .. .. code:: bash nvidia-docker run \\ --pid=host \\ --init \\ --rm \\ --shm-size=256m \\ --add-host name.node:172.16.2.186 \\ -e DRIVERLESS_AI_CONFIG_FILE=/path/in/docker/config.toml \\ -p 12345:12345 \\ -v /local/path/to/config.toml:/path/in/docker/config.toml \\ -v /etc/passwd:/etc/passwd:ro \\ -v /etc/group:/etc/group:ro \\ -v /tmp/dtmp/:/tmp \\ -v /tmp/dlog/:/log \\ -v /tmp/dlicense/:/license \\ -v /tmp/ddata/:/data \\ -u $(id -u):$(id -g) \\ h2oai/dai-ubi8-x86_64:|tag| .. container:: group-tab Native Installs This example provides users credentials for accessing a kdb+ server from Driverless AI.", + "prompt_type": "plain" + }, + { + "output": "Export the Driverless AI config.toml file or add it to ~/.bashrc. For example: .. :: # DEB and RPM export DRIVERLESS_AI_CONFIG_FILE=\"/etc/dai/config.toml\" # TAR SH export DRIVERLESS_AI_CONFIG_FILE=\"/path/to/your/unpacked/dai/directory/config.toml\" 2. Specify the following configuration options in the config.toml file. .. :: # File System Support # upload : standard upload feature # file : local file system/server file system # hdfs : Hadoop file system, remember to configure the HDFS config folder path and keytab below # dtap : Blue Data Tap file system, remember to configure the DTap section below # s3 : Amazon S3, optionally configure secret and access key below # gcs : Google Cloud Storage, remember to configure gcs_path_to_service_account_json below # gbq : Google Big Query, remember to configure gcs_path_to_service_account_json below # minio : Minio Cloud Storage, remember to configure secret and access key below # snow : Snowflake Data Warehouse, remember to configure Snowflake credentials below (account name, username, password) # kdb : KDB+ Time Series Database, remember to configure KDB credentials below (hostname and port, optionally: username, password, classpath, and jvm_args) # azrbs : Azure Blob Storage, remember to configure Azure credentials below (account name, account key) # jdbc: JDBC Connector, remember to configure JDBC below.", + "prompt_type": "plain" + }, + { + "output": "(hive_app_configs) # recipe_url: load custom recipe from URL # recipe_file: load custom recipe from local file system enabled_file_systems = \"file, kdb\" # kdb+ Connector credentials kdb_user = \"\" kdb_password = \"\" kdb_hostname = \" kdb_port = \"\" kdb_app_classpath = \"\" kdb_app_jvm_args = \"\" 3. Save the changes when you are done, then stop/restart Driverless AI. Adding Datasets Using kdb+ -------------------------- After the kdb+ connector is enabled, you can add datasets by selecting **kdb+** from the **Add Dataset (or Drag and Drop)** drop-down menu. .. figure:: ../images/add_dataset_dropdown.png :alt: :width: 237px :height: 338px Specify the following information to add your dataset. 1. **Enter filepath to save query**. Enter the local file path for storing your dataset. For example, **/home//myfile.csv**.", + "prompt_type": "plain" + }, + { + "output": "Deploying Driverless AI Models to Production\nBy default, each completed Driverless AI experiment (unless explicitly\ndisabled or not available due to modified expert settings) creates at\nleast one scoring pipeline for scoring in Python,\nC++, Java and R.\nThe H2O MLOps service provides a way to manage, collaborate, deploy and\nmonitor your experiments and models. This can be done in the cloud or as\na standalone service. In addition to the H2O MLOps service, here we list several other\ndeployment options and examples for deploying Driverless AI MOJO (Java\nand C++ with Python/R wrappers) and Python Scoring pipelines for\nproduction purposes. The deployment template documentation can be\naccessed from here. For more customized requirements, contact\nsupport@h2o.ai. - Deployment via H2O AI MLOps \n - MOJO with Java runtime \n - MOJO with C++ Runtime \n - Standalone Python Scoring Pipeline \n - Deployment options from within Driverless AI GUI \nDeployment With H2O MLOps\nH2O MLOps is a platform for model deployment, management, governance,\nmonitoring, and colaboration.", + "prompt_type": "plain" + }, + { + "output": "It can be deployed as a\nstandalone service or as an H2O AI Cloud Application. For details, see\nthe H2O MLOps Documentation. MOJO With Java Runtime Deployment Options\nThe following are several options for deploying Driverless AI MOJO with\nJava Runtime. The links in the diagram lead to code examples and\ntemplates. digraph \"example java\" {\n layout=\"circo\"; node [fontname=\"Verdana\",\n fontsize=\"30\",shape=plaintext]; edge [color=\"black\"]; b\n [label=\"Driverless AI MOJO Java Runtime\",\n href=\"https://docs.h2o.ai/driverless-ai/latest-stable/docs/userguide/scoring-mojo-scoring-pipeline.html\",target=\"_top\",fontcolor=\"black\"];\n af [label=\"As a library\",fontcolor=\"green\"]; aa [label=\"As REST\n Server\",\n href=\"https://h2oai.github.io/dai-deployment-templates/local-rest-scorer/\",target=\"_top\",fontcolor=\"green\"];\n ad [label=\"As AzureML\",fontcolor=\"green\"]; ab [label=\"As AWS\n Lambda\",\n href=\"https://h2oai.github.io/dai-deployment-templates/aws_lambda_scorer/\",target=\"_top\",fontcolor=\"green\"];\n ac [label=\"As Google Cloud Run\",\n href=\"https://h2oai.github.io/dai-deployment-templates/gcp/\",target=\"_top\",fontcolor=\"green\"];\n ae [label=\"As Apache Nifi\",\n href=\"https://github.com/h2oai/dai-deployment-examples/tree/master/mojo-nifi\",target=\"_top\",fontcolor=\"green\"];\n ag [label=\"As Snowflake Function\",\n href=\"https://docs.h2o.ai/driverless-ai/latest-stable/docs/userguide/snowflake-integration.html\",target=\"_top\",fontcolor=\"green\"];\n ah [label=\"As Apache Flink\",\n href=\"https://github.com/h2oai/dai-deployment-examples/tree/master/mojo-flink\",target=\"_top\",fontcolor=\"green\",fontcolor=\"green\"];\n ai [label=\"As Sagemaker\",fontcolor=\"red\"]; aj [label=\"As Hive\n UDF\",\n href=\"https://github.com/h2oai/dai-deployment-templates/tree/master/hive-mojo-scorer\",target=\"_top\",fontcolor=\"red\"];\n ak [label=\"As DB scorer\",\n href=\"https://h2oai.github.io/dai-deployment-templates/sql-jdbc-scorer/\",target=\"_top\",fontcolor=\"red\"];\n al [label=\"As Apache Spark Batch/Stream\",\n href=\"http://docs.h2o.ai/sparkling-water/3.0/latest-stable/doc/deployment/load_mojo_pipeline.html#loading-and-score-the-mojo\",target=\"_top\",fontcolor=\"red\"];\n am [label=\"As Apache Kafka Topic\",\n href=\"https://github.com/h2oai/dai-deployment-examples/blob/master/mojo-flink/daimojo-flink-kafka.md\",target=\"_top\",fontcolor=\"blue\"];\n an [label=\"As Active MQ\",fontcolor=\"blue\"]; ao [label=\"As Task\n Queue \",fontcolor=\"blue\"]; ap [label=\"KNIME\",fontcolor=\"blue\"];\n b -> aa; b -> ab; b -> ac; b -> ad; b -> ae; b -> af; b -> ag; b\n -> ah; b -> ai; b -> aj; b -> ak; b -> al; b -> am; b -> an; b ->\n ao; b -> ap;\n }\nThe Java MOJO scoring pipelines can also be deployed from within the\nDriverless AI GUI.", + "prompt_type": "plain" + }, + { + "output": "MOJO With C++ Runtime Deployment Options\nHere we list some example scenarios and platforms for deploying\nDriverless AI MOJO with C++ Runtime. MOJO C++ runtime can also be run\ndirectly from R/Python terminals. For more information, see\ncpp_scoring_pipeline. digraph \"example c++\" {\n layout=\"circo\"; node [fontname=\"Verdana\",\n fontsize=\"16\",shape=plaintext]; edge [color=\"black\"]; b\n [label=\"Driverless AI MOJO C++ Runtime\",\n href=\"https://docs.h2o.ai/driverless-ai/latest-stable/docs/userguide/scoring-pipeline-cpp.html\",target=\"_top\"];\n ab [label=\"As REST Server\",\n href=\"\",target=\"_top\",fontcolor=\"green\"]; ac [label=\"As AWS\n Lambda\", href=\"\",target=\"_top\",fontcolor=\"green\"]; ad [label=\"As\n AzureML\",fontcolor=\"green\"]; aa [label=\"As a\n library\",fontcolor=\"green\"]; ae [label=\"As Apache Nifi\",\n href=\"\",target=\"_top\",fontcolor=\"green\"]; ag [label=\"As Apache\n Spark Batch\", href=\"\",target=\"_top\",fontcolor=\"red\"]; af\n [label=\"As Sagemaker\",fontcolor=\"red\"];\n b -> aa; b -> ab; b -> ac; b -> ad; b -> ae; b -> af; b -> ag;\n }\nStandalone Python Scoring Pipeline Deployment Options\ndigraph \"example py\" {\n layout=\"circo\"; node [fontname=\"Verdana\",\n fontsize=\"20\",shape=plaintext]; edge [color=\"black\"]; b\n [label=\"Driverless AI Python Scoring Pipeline\",\n href=\"https://docs.h2o.ai/driverless-ai/latest-stable/docs/userguide/scoring-standalone-python.html\",target=\"_top\"];\n aa [label=\"As REST Server\",\n href=\"https://github.com/h2oai/driverlessai-tutorials/tree/master/scoring-pipeline-deployment/python/ubuntu/docker\",target=\"_top\",fontcolor=\"green\"];\n ac [label=\"As AWS Lambda\",fontcolor=\"green\"]; ad [label=\"As\n AzureML\",fontcolor=\"green\"]; ae [label=\"As Apache\n Nifi\",fontcolor=\"green\"]; ah [label=\"As a\n library\",fontcolor=\"green\"]; ab [label=\"As Docker Image\",\n href=\"https://github.com/h2oai/driverlessai-tutorials/tree/master/scoring-pipeline-deployment/python/ubuntu/docker\",\n target=\"_top\",fontcolor=\"red\"] af [label=\"As\n Sagemaker\",fontcolor=\"red\"]; ag [label=\"As Apache Spark Batch\",\n href=\"https://github.com/h2oai/driverlessai-tutorials/tree/master/scoring-pipeline-deployment/python/pyspark\",target=\"_top\",fontcolor=\"red\"];\n b -> aa; b -> ab; b -> ac; b -> ad; b -> ae; b -> af; b -> ag; b\n -> ah;\n }\nAvailable Deployments from within Driverless AI GUI\nThe following deployments are available in Driverless AI GUI.", + "prompt_type": "plain" + }, + { + "output": "- amazon-lambda\n- rest-server\nAll of the existing MOJO scoring pipeline deployments are available in\nthe Deployments Overview page, which is available from the top menu. This page lists all active deployments and the information needed to\naccess the respective endpoints. In addition, it lets you stop any\ndeployments that are no longer needed. []\nAmazon Lambda Deployment\nDriverless AI can deploy the trained MOJO scoring pipeline as an AWS\nLambda Function, i.e., a server-less scorer running in Amazon Cloud and\ncharged by the actual usage. Additional Resources\nRefer to the aws-lambda-scorer folder in the dai-deployment-templates\nrepository to see different deployment templates for AWS Lambda scorer. Driverless AI Prerequisites\n- Driverless AI MOJO Scoring Pipeline: To deploy a MOJO scoring\n pipeline as an AWS Lambda function, the MOJO pipeline archive has to\n be created first by choosing the Build MOJO Scoring Pipeline option\n on the completed experiment page. Refer to the\n mojo_scoring_pipelines section for information on how to build a\n MOJO scoring pipeline.", + "prompt_type": "plain" + }, + { + "output": "The Driverless AI deployment pipeline\n to AWS Lambdas explicitly sets the license key as an environment\n variable. You will not be able to use MOJOs if your Driverless AI\n license is expired. If you have an expired license, you can update\n this manually for each MOJO in AWS, or you can update all MOJOs for\n a deployment region using a script. Refer to\n update_license_in_production for more information. AWS Prerequisites\nUsage Plans\nUsage plans must be enabled in the target AWS region in order for API\nkeys to work when accessing the AWS Lambda via its REST API. Refer to\nhttps://aws.amazon.com/blogs/aws/new-usage-plans-for-amazon-api-gateway/\nfor more information. Access Permissions\nThe following AWS access permissions need to be provided to the role in\norder for Driverless AI Lambda deployment to succeed. - AWSLambdaFullAccess\n- IAMFullAccess\n- AmazonAPIGatewayAdministrator\n[]\nThe policy can be further stripped down to restrict Lambda and S3 rights\nusing the JSON policy definition as follows:\n {\n \"Version\": \"2012-10-17\",\n \"Statement\": [\n {\n \"Sid\": \"VisualEditor0\",\n \"Effect\": \"Allow\",\n \"Action\": [\n \"iam:GetPolicyVersion\",\n \"iam:DeletePolicy\",\n \"iam:CreateRole\",\n \"iam:AttachRolePolicy\",\n \"iam:ListInstanceProfilesForRole\",\n \"iam:PassRole\",\n \"iam:DetachRolePolicy\",\n \"iam:ListAttachedRolePolicies\",\n \"iam:GetRole\",\n \"iam:GetPolicy\",\n \"iam:DeleteRole\",\n \"iam:CreatePolicy\",\n \"iam:ListPolicyVersions\"\n ],\n \"Resource\": [\n \"arn:aws:iam::*:role/h2oai*\",\n \"arn:aws:iam::*:policy/h2oai*\"\n ]\n },\n {\n \"Sid\": \"VisualEditor1\",\n \"Effect\": \"Allow\",\n \"Action\": \"apigateway:*\",\n \"Resource\": \"*\"\n },\n {\n \"Sid\": \"VisualEditor2\",\n \"Effect\": \"Allow\",\n \"Action\": [\n \"lambda:CreateFunction\",\n \"lambda:ListFunctions\",\n \"lambda:InvokeFunction\",\n \"lambda:GetFunction\",\n \"lambda:UpdateFunctionConfiguration\",\n \"lambda:DeleteFunctionConcurrency\",\n \"lambda:RemovePermission\",\n \"lambda:UpdateFunctionCode\",\n \"lambda:AddPermission\",\n \"lambda:ListVersionsByFunction\",\n \"lambda:GetFunctionConfiguration\",\n \"lambda:DeleteFunction\",\n \"lambda:PutFunctionConcurrency\",\n \"lambda:GetPolicy\"\n ],\n \"Resource\": \"arn:aws:lambda:*:*:function:h2oai*\"\n },\n {\n \"Sid\": \"VisualEditor3\",\n \"Effect\": \"Allow\",\n \"Action\": \"s3:*\",\n \"Resource\": [\n \"arn:aws:s3:::h2oai*/*\",\n \"arn:aws:s3:::h2oai*\"\n ]\n }\n ]\n }\nDeploying on Amazon Lambda\nOnce the MOJO pipeline archive is ready, Driverless AI provides a Deploy\n(Local & Cloud) option on the completed experiment page.", + "prompt_type": "plain" + }, + { + "output": "[]\nThis option opens a new dialog for setting the AWS account credentials\n(or use those supplied in the Driverless AI configuration file or\nenvironment variables), AWS region, and the desired deployment name\n(which must be unique per Driverless AI user and AWS account used). []\nAmazon Lambda deployment parameters:\n - Deployment Name: A unique name of the deployment. By default,\n Driverless AI offers a name based on the name of the experiment\n and the deployment type. This has to be unique both for Driverless\n AI user and the AWS account used. - Region: The AWS region to deploy the MOJO scoring pipeline to. It\n makes sense to choose a region geographically close to any client\n code calling the endpoint in order to minimize request latency. (See also AWS Regions and Availability Zones.) - Use AWS environment variables: If enabled, the AWS credentials are\n taken from the Driverless AI configuration file (see records\n deployment_aws_access_key_id and deployment_aws_secret_access_key)\n or environment variables\n (DRIVERLESS_AI_DEPLOYMENT_AWS_ACCESS_KEY_ID and\n DRIVERLESS_AI_DEPLOYMENT_AWS_SECRET_ACCESS_KEY).", + "prompt_type": "plain" + }, + { + "output": "- AWS Access Key ID and AWS Secret Access Key: Credentials to access\n the AWS account. This pair of secrets identifies the AWS user and\n the account and can be obtained from the AWS account console. Testing the Lambda Deployment\nOn a successful deployment, all the information needed to access the new\nendpoint (URL and an API Key) is printed, and the same information is\navailable in the Deployments Overview Page after clicking on the\ndeployment row. []\nNote that the actual scoring endpoint is located at the path /score. In\naddition, to prevent DDoS and other malicious activities, the resulting\nAWS lambda is protected by an API Key, i.e., a secret that has to be\npassed in as a part of the request using the x-api-key HTTP header. The request is a JSON object containing attributes:\n - fields: A list of input column names that should correspond to the\n training data columns. - rows: A list of rows that are in turn lists of cell values to\n predict the target values for.", + "prompt_type": "plain" + }, + { + "output": "An example request providing 2 columns on the input and asking to get\none column copied to the output looks as follows:\n {\n \"fields\": [\n \"age\", \"salary\"\n ],\n \"includeFieldsInOutput\": [\n \"salary\"\n ],\n \"rows\": [\n [\n \"48.0\", \"15000.0\"\n ],\n [\n \"35.0\", \"35000.0\"\n ],\n [\n \"18.0\", \"22000.0\"\n ]\n ]\n }\nAssuming the request is stored locally in a file named test.json, the\nrequest to the endpoint can be sent, e.g., using the curl utility, as\nfollows:\n URL={place the endpoint URL here}\n API_KEY={place the endpoint API key here}\n curl \\\n -d @test.json \\\n -X POST \\\n -H \"x-api-key: ${API_KEY}\" \\\n ${URL}/score\nThe response is a JSON object with a single attribute score, which\ncontains the list of rows with the optional copied input values and the\npredictions. For the example above with a two class target field, the result is\nlikely to look something like the following snippet.", + "prompt_type": "plain" + }, + { + "output": "The bucket names\nhave to be unique throughout AWS S3, and one user can create a maximum\nof 100 buckets. Therefore, we recommend setting the bucket name used for\ndeployment with the deployment_aws_bucket_name config option. REST Server Deployment\nThis section describes how to deploy the trained MOJO scoring pipeline\nas a local Representational State Transfer (REST) Server. Note: For information on REST server deployment limitations, see\nrest_limitations. Additional Resources\nThe REST server deployment supports API endpoints such as model\nmetadata, file/CSV scoring, etc. It uses SpringFox for both programmatic\nand manual inspection of the API. Refer to the local-rest-scorer folder\nin the dai-deployment-templates repository to see different deployment\ntemplates for Local REST scorers. Prerequisites\n- Driverless AI MOJO Scoring Pipeline: To deploy a MOJO scoring\n pipeline as a Local REST Scorer, the MOJO pipeline archive has to be\n created first by choosing the Build MOJO Scoring Pipeline option on\n the completed experiment page.", + "prompt_type": "plain" + }, + { + "output": "- When using a firewall or a virtual private cloud (VPC), the ports\n that are used by the REST server must be exposed. - Ensure that you have enough memory and CPUs to run the REST scorer. Typically, a good estimation for the amount of required memory is 12\n times the size of the pipeline.mojo file. For example, a 100MB\n pipeline.mojo file will require approximately 1200MB of RAM. (Note:\n To conveniently view in-depth information about your system in\n Driverless AI, click on Resources at the top of the screen, then\n click System Info.) - When running Driverless AI in a Docker container, you must expose\n ports on Docker for the REST service deployment within the\n Driverless AI Docker container. For example, the following exposes\n the Driverless AI Docker container to listen to port 8094 for\n requests arriving at the host port at 18094. Deploying on REST Server\nOnce the MOJO pipeline archive is ready, Driverless AI provides a Deploy\n(Local & Cloud) option on the completed experiment page.", + "prompt_type": "plain" + }, + { + "output": "- This button is not available on PPC64LE environments. []\nThis option opens a new dialog for setting the REST Server deployment\nname, port number, and maximum heap size (optional). []\n1. Specify a name for the REST scorer in order to help track the\n deployed REST scorers. 2. Provide a port number on which the REST scorer will run. For\n example, if port number 8081 is selected, the scorer will be\n available at http://my-ip-address:8081/models\n3. Optionally specify the maximum heap size for the Java Virtual\n Machine (JVM) running the REST scorer. This can help constrain the\n REST scorer from overconsuming memory of the machine. Because the\n REST scorer is running on the same machine as Driverless AI, it may\n be helpful to limit the amount of memory that is allocated to the\n REST scorer. This option will limit the amount of memory the REST\n scorer can use, but it will also produce an error if the memory\n allocated is not enough to run the scorer. (The amount of memory\n required is mostly dependent on the size of MOJO.", + "prompt_type": "plain" + }, + { + "output": "Testing the REST Server Deployment\n[]\nNote that the actual scoring endpoint is located at the path /score. The request is a JSON object containing attributes:\n - fields: A list of input column names that should correspond to the\n training data columns. - rows: A list of rows that are in turn lists of cell values to\n predict the target values for. - optional includeFieldsInOutput: A list of input columns that\n should be included in the output. An example request providing 2 columns on the input and asking to get\none column copied to the output looks as follows:\n {\n \"fields\": [\n \"age\", \"salary\"\n ],\n \"includeFieldsInOutput\": [\n \"salary\"\n ],\n \"rows\": [\n [\n \"48.0\", \"15000.0\"\n ],\n [\n \"35.0\", \"35000.0\"\n ],\n [\n \"18.0\", \"22000.0\"\n ]\n ]\n }\nAssuming the request is stored locally in a file named test.json, the\nrequest to the endpoint can be sent, e.g., using the curl utility, as\nfollows:\n URL={place the endpoint URL here}\n curl \\\n -X POST \\\n -d {\"fields\": ['age', 'salary', 'education'], \"rows\": [1, 2, 3], \"includeFieldsInOutput\": [\"education\"]}\\\n -H \"Content-Type: application/json\" \\\n ${URL}/score\nThe response is a JSON object with a single attribute score, which\ncontains the list of rows with the optional copied input values and the\npredictions.", + "prompt_type": "plain" + }, + { + "output": "The particular\nvalues would of course depend on the scoring pipeline:\n {\n \"score\": [\n [\n \"48.0\",\n \"0.6240277982943945\",\n \"0.045458571508101536\",\n ],\n [\n \"35.0\",\n \"0.7209441819603676\",\n \"0.06299909138586585\",\n ],\n [\n \"18.0\",\n \"0.7209441819603676\",\n \"0.06299909138586585\",\n ]\n ]\n }\nREST Server Deployment Limitations\n- Local REST server deployments are useful for determining the\n behavioral characteristics of a MOJO that is intended for\n deployment. However, using the REST Server deployment as a\n production level scoring service is not recommended. The REST Server\n deployment runs in the same machine as the core of Driverless AI,\n and therefore has to share system resources with all other\n Driverless AI processes. This can lead to unexpected scenarios in\n which competition for compute resources causes the REST Server to\n fail.", + "prompt_type": "plain" + }, + { + "output": "Install on AWS\n\nDriverless AI can be installed on Amazon AWS using the AWS Marketplace\nAMI or the AWS Community AMI.\n\nchoose-AWS aws-marketplace-ami aws-community-ami\n\nWhen installing via AWS, you can also enable role-based authentication.\n\naws-role-based-authentication", + "prompt_type": "plain" + }, + { + "instruction": "How is Driverless AI different than any other black box ML algorithm?", + "output": "Driverless AI uses many techniques (some older and some cutting-edge) for interpreting black box models including creating reason codes for every prediction the system makes. We have also created numerous open source code examples and free publications that explain these techniques. See the list below for links to these resources and for references for the interpretability techniques.Open source interpretability examples:https://github.com/jphall663/interpretable_machine_learning_with_pythonhttps://content.oreilly.com/oriole/Interpretable-machine-learning-with-Python-XGBoost-and-H2Ohttps://github.com/h2oai/mli-resourcesFree Machine Learning Interpretability publications:http://www.oreilly.com/data/free/an-introduction-to-machine-learning-interpretability.csphttp://docs.h2o.ai/driverless-ai/latest-stable/docs/booklets/MLIBooklet.pdfMachine Learning Techniques already in Driverless AI:Tree-based Variable Importance: https://web.stanford.edu/~hastie/ElemStatLearn/printings/ESLII_print12.pdfPartial Dependence: https://web.stanford.edu/~hastie/ElemStatLearn/printings/ESLII_print12.pdfLIME: http://www.kdd.org/kdd2016/papers/files/rfp0573-ribeiroA.pdfLOCO: http://www.stat.cmu.edu/~ryantibs/papers/conformal.pdfICE: https://arxiv.org/pdf/1309.6392.pdfSurrogate Models:https://papers.nips.cc/paper/1152-extracting-tree-structured-representations-of-trained-networks.pdfhttps://arxiv.org/pdf/1705.08504.pdfShapley Explanations: http://papers.nips.cc/paper/7062-a-unified-approach-to-interpreting-model-predictions", + "prompt_type": "human_bot" + }, + { + "instruction": "How often do new versions come out?", + "output": "The frequency of major new Driverless AI releases has historically been about every two months.Installation/Upgrade/Authentication", + "prompt_type": "human_bot" + }, + { + "instruction": "How can I change my username and password?", + "output": "The username and password are tied to the experiments you have created. For example, if I log in with the username/password: megan/megan and start an experiment, then I would need to log back in with the same username and password to see those experiments. The username and password, however, does not limit your access to Driverless AI. If you want to use a new user name and password, you can log in again with a new username and password, but keep in mind that you won't see your old experiments.", + "prompt_type": "human_bot" + }, + { + "instruction": "Can Driverless AI run on CPU-only machines?", + "output": "Yes, Driverless AI can run on machines with CPUs only, though GPUs are recommended. Installation instructions are available for GPU and CPU systems. Refer to :ref:`before_you_begin` for more information.No role entry for \"ref\" in module \"docutils.parsers.rst.languages.en\".\nTrying \"ref\" as canonical role name.Unknown interpreted text role \"ref\".", + "prompt_type": "human_bot" + }, + { + "instruction": "How can I upgrade to a newer version of Driverless AI?", + "output": "Upgrade instructions vary depending on your environment. Refer to the installation section for your environment. Upgrade instructions are included there.", + "prompt_type": "human_bot" + }, + { + "instruction": "What kind of authentication is supported in Driverless AI?", + "output": "Driverless AI supports Client Certificate, LDAP, Local, mTLS, OpenID, none, and unvalidated (default) authentication. These can be configured by setting the appropriate environment variables in the config.toml file or by specifying the environment variables when starting Driverless AI. Refer to :ref:`dai_auth` for more information.No role entry for \"ref\" in module \"docutils.parsers.rst.languages.en\".\nTrying \"ref\" as canonical role name.Unknown interpreted text role \"ref\".", + "prompt_type": "human_bot" + }, + { + "instruction": "How can I automatically turn on persistence each time the GPU system reboots?", + "output": "For GPU machines, the sudo nvidia-persistenced --user dai command can be run after each reboot to enable persistence. For systems that have systemd, it is possible to automatically enable persistence after each reboot by removing the --no-persistence-mode flag from nvidia-persistenced.service. Before running the steps below, be sure to review the following for more information:https://docs.nvidia.com/deploy/driver-persistence/index.html#persistence-daemonhttps://docs.nvidia.com/deploy/driver-persistence/index.html#installationRun the following to stop the nvidia-persistenced.service:Cannot analyze code. Pygments package not found... code:: bash\n\n sudo systemctl stop nvidia-persistenced.service\nOpen the file /lib/systemd/system/nvidia-persistenced.service. This file includes a line \"ExecStart=/usr/bin/nvidia-persistenced --user nvidia-persistenced --no-persistence-mode --verbose\".Remove the flag --no-persistence-mode from that line so that it reads:Enumerated list start value not ordinal-1: \"2\" (ordinal 2)Cannot analyze code. Pygments package not found... code:: bash\n\n ExecStart=/usr/bin/nvidia-persistenced --user nvidia-persistenced --verbose\nRun the following command to start the nvidia-persistenced.service:Enumerated list start value not ordinal-1: \"4\" (ordinal 4)Cannot analyze code. Pygments package not found... code:: bash\n\n sudo systemctl start nvidia-persistenced.service\n", + "prompt_type": "human_bot" + }, + { + "instruction": "How can I start Driverless AI on a different port than 12345?", + "output": "No directive entry for \"tabs\" in module \"docutils.parsers.rst.languages.en\".\nTrying \"tabs\" as canonical directive name.Unknown directive type \"tabs\"... tabs::\n .. group-tab:: Docker Image Installs\n\n When starting Driverless AI in Docker, the ``-p`` option specifies the port on which Driverless AI will run. Change this option in the start script if you need to run on a port other than 12345. The following example shows how to run on port 22345. (Change ``nvidia-docker run`` to ``docker-run`` if needed.) Keep in mind that `priviliged ports will require root access `__.\n\n .. code-block:: bash\n :substitutions:\n\n nvidia-docker run \\\n --pid=host \\\n --init \\\n --rm \\\n --shm-size=256m \\\n -u `id -u`:`id -g` \\\n -p 22345:12345 \\\n -v `pwd`/data:/data \\\n -v `pwd`/log:/log \\\n -v `pwd`/license:/license \\\n -v `pwd`/tmp:/tmp \\\n h2oai/dai-ubi8-x86_64:|tag|\n\n .. group-tab:: Native Installs\n\n To run on a port other than 12345, update the port value in the **config.toml** file. The following example shows how to run Driverless AI on port 22345. Keep in mind that `priviliged ports will require root access `__.\n\n ::\n\n # Export the Driverless AI config.toml file (or add it to ~/.bashrc)\n export DRIVERLESS_AI_CONFIG_FILE=\u201c/config/config.toml\u201d\n\n # IP address and port for Driverless AI HTTP server.\n ip = \"127.0.0.1\"\n port = 22345\n\n Point to this updated config file when restarting Driverless AI.\n", + "prompt_type": "human_bot" + }, + { + "instruction": "Can I set up TLS/SSL on Driverless AI?", + "output": "Yes, Driverless AI provides configuration options that let you set up HTTPS/TLS/SSL. You will need to have your own SSL certificate, or you can create a self-signed certificate for yourself.To enable HTTPS/TLS/SSL on the Driverless AI server, add the following to the config.toml file:Cannot analyze code. Pygments package not found... code:: bash\n\n enable_https = true\n ssl_key_file = \"/etc/dai/private_key.pem\"\n ssl_crt_file = \"/etc/dai/cert.pem\"\nYou can make a self-signed certificate for testing with the following commands:Cannot analyze code. Pygments package not found... code:: bash\n\n umask 077\n openssl req -x509 -newkey rsa:4096 -keyout private_key.pem -out cert.pem -days 20 -nodes -subj '/O=Driverless AI'\n sudo chown dai:dai cert.pem private_key.pem\n sudo mv cert.pem private_key.pem /etc/dai\nTo configure specific versions of TLS/SSL, enable or disable the following settings in the config.toml file:Cannot analyze code. Pygments package not found... code:: bash\n\n ssl_no_sslv2 = true\n ssl_no_sslv3 = true\n ssl_no_tlsv1 = true\n ssl_no_tlsv1_1 = true\n ssl_no_tlsv1_2 = false\n ssl_no_tlsv1_3 = false\n", + "prompt_type": "human_bot" + }, + { + "instruction": "Can I set up TLS/SSL on Driverless AI in AWS?", + "output": "Yes, you can set up HTTPS/TLS/SSL on Driverless AI running in an AWS environment. HTTPS/TLS/SSL needs to be configured on the host machine, and the necessary ports will need to be opened on the AWS side. You will need to have your own TLS/SSL cert or you can create a self signed cert for yourself.The following is a very simple example showing how to configure HTTPS with a proxy pass to the port on the container 12345 with the keys placed in /etc/nginx/. Replace with your server name.Cannot analyze code. Pygments package not found... code:: bash\n\n server {\n listen 80;\n return 301 https://$host$request_uri;\n }\n\n server {\n listen 443;\n\n # Specify your server name here\n server_name ;\n\n ssl_certificate /etc/nginx/cert.crt;\n ssl_certificate_key /etc/nginx/cert.key;\n ssl on;\n ssl_session_cache builtin:1000 shared:SSL:10m;\n ssl_protocols TLSv1 TLSv1.1 TLSv1.2;\n ssl_ciphers HIGH:!aNULL:!eNULL:!EXPORT:!CAMELLIA:!DES:!MD5:!PSK:!RC4;\n ssl_prefer_server_ciphers on;\n\n access_log /var/log/nginx/dai.access.log;\n\n location / {\n proxy_set_header Host $host;\n proxy_set_header X-Real-IP $remote_addr;\n proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;\n proxy_set_header X-Forwarded-Proto $scheme;\n\n # Fix the \u201cIt appears that your reverse proxy set up is broken\" error.\n proxy_pass http://localhost:12345;\n proxy_read_timeout 90;\n\n # Specify your server name for the redirect\n proxy_redirect http://localhost:12345 https://;\n }\n }\nMore information about SSL for Nginx in Ubuntu 16.04 can be found here: https://www.digitalocean.com/community/tutorials/how-to-create-a-self-signed-ssl-certificate-for-nginx-in-ubuntu-16-04.", + "prompt_type": "human_bot" + }, + { + "instruction": "I received a \"package dai-.x86_64 does not verify: no digest\" error during the installation. How can I fix this?", + "output": "You will recieve a \"package dai-.x86_64 does not verify: no digest\" error when installing the rpm using an RPM version newer than 4.11.3. You can run the following as a workaround, replacing with your DAI version:Cannot analyze code. Pygments package not found... code:: bash\n\n rpm --nodigest -i dai-.x86_64.rpm\n", + "prompt_type": "human_bot" + }, + { + "instruction": "I received a \"Must have exactly one OpenCL platform 'NVIDIA CUDA'\" error. How can I fix that?", + "output": "If you encounter problems with opencl errors at server time, you may see the following message:Cannot analyze code. Pygments package not found... code:: bash\n\n 2018-11-08 14:26:15,341 C: D:452.2GB M:246.0GB 21603 ERROR : Must have exactly one OpenCL platform 'NVIDIA CUDA', but got:\n Platform #0: Clover\n Platform #1: NVIDIA CUDA\n +-- Device #0: GeForce GTX 1080 Ti\n +-- Device #1: GeForce GTX 1080 Ti\n +-- Device #2: GeForce GTX 1080 Ti\n\n Uninstall all but 'NVIDIA CUDA' platform.\nFor Ubuntu, the solution is to run the following:Cannot analyze code. Pygments package not found... code:: bash\n\n sudo apt-get remove mesa-opencl-icd\n", + "prompt_type": "human_bot" + }, + { + "instruction": "Is it possible for multiple users to share a single Driverless AI instance?", + "output": "Driverless AI supports multiple users, and Driverless AI is licensed per a single named user. Therefore, in order, to have different users run experiments simultaneously, they would each need a license. Driverless AI manages the GPU(s) that it is given and ensures that different experiments from different users can run safely simultaneously and don\u2019t interfere with each other. So when two licensed users log in with different credentials, then neither of them will see the other\u2019s experiment. Similarly, if a licensed user logs in using a different set of credentials, then that user will not see any previously run experiments.", + "prompt_type": "human_bot" + }, + { + "instruction": "Can multiple Driverless AI users share a GPU server?", + "output": "Yes, you can allocate multiple users in a single GPU box. For example, a single box with four GPUs can allocate that User1 has two GPUs and User2 has the other two GPUs. This is accomplished by having two separated Driverless AI instances running on the same server.There are two ways to assign specific GPUs to Driverless AI. And in the scenario with four GPUs (two GPUs allocated to two users), both of these options allow each Docker container only to see two GPUs.Use the CUDA_VISIBLE_DEVICES environment variable. In the case of Docker deployment, this will translate in passing the -e CUDA_VISIBLE_DEVICES=\"0,1\" to the nvidia-docker run command.Passing the NV_GPU option at the beginning of the nvidia-docker run command. (See example below.)Error in \"code-block\" directive:\nunknown option: \"substitutions\"... code-block:: bash\n :substitutions:\n\n #Team 1\n NV_GPU='0,1' nvidia-docker run\n --pid=host\n --init\n --rm\n --shm-size=256m\n -u id -u:id -g\n -p port-to-team:12345\n -e DRIVERLESS_AI_CONFIG_FILE=\"/config/config.toml\"\n -v /data:/data\n -v /log:/log\n -v /license:/license\n -v /tmp:/tmp\n -v /config:/config\n h2oai/dai-ubi8-x86_64:|tag|\n\n\n #Team 2\n NV_GPU='0,1' nvidia-docker run\n --pid=host\n --init\n --rm\n --shm-size=256m\n -u id -u:id -g\n -p port-to-team:12345\n -e DRIVERLESS_AI_CONFIG_FILE=\"/config/config.toml\"\n -v /data:/data\n -v /log:/log\n -v /license:/license\n -v /tmp:/tmp\n -v /config:/config\n h2oai/dai-ubi8-x86_64:|tag|\nNote, however, that a Driverless AI instance expects to fully utilize and not share the GPUs that are assigned to it. Sharing a GPU with other Driverless AI instances or other running programs can result in out-of-memory issues.", + "prompt_type": "human_bot" + }, + { + "instruction": "How can I retrieve a list of Driverless AI users?", + "output": "A list of users can be retrieved using the Python client.Cannot analyze code. Pygments package not found... code:: bash\n\n h2o = Client(address='http://:12345', username='', password='')\n h2o.get_users()\n", + "prompt_type": "human_bot" + }, + { + "instruction": "Start of Driverless AI fails on the message ``Segmentation fault (core dumped)`` on Ubuntu 18/RHEL 7.6. How can I fix this?", + "output": "This problem is caused by the font NotoColorEmoji.ttf, which cannot be processed by the Python matplotlib library. A workaround is to disable the font by renaming it. (Do not use fontconfig because it is ignored by matplotlib.) The following will print out the command that should be executed.Cannot analyze code. Pygments package not found... code:: bash\n\n sudo find / -name \"NotoColorEmoji.ttf\" 2>/dev/null | xargs -I{} echo sudo mv {} {}.backup\n\n", + "prompt_type": "human_bot" + }, + { + "instruction": "Which Linux systems does Driverless AI support?", + "output": "Supported Linux systems include x86_64 RHEL 7, RHEL 8, CentOS 7, and CentOS 8.Data", + "prompt_type": "human_bot" + }, + { + "instruction": "Is there a file size limit for datasets?", + "output": "For GBMs, the file size for datasets is limited by the collective CPU or GPU memory on the system, but we continue to make optimizations for getting more data into an experiment, such as using TensorFlow streaming to stream to arbitrarily large datasets.", + "prompt_type": "human_bot" + }, + { + "instruction": "How can I import CSV files that use UTF-8 encoding into Excel?", + "output": "Excel requires a byte order mark (BOM) to correctly identify CSV files that use UTF-8 encoding. Refer to the following FAQ entry for more information on how to use a BOM when writing CSV files with datatable.", + "prompt_type": "human_bot" + }, + { + "instruction": "Can a byte order mark be used when writing CSV files with datatable?", + "output": "Yes, a byte order mark (BOM) can be used when writing CSV files with datatable by enabling datatable_bom_csv in the config.toml file when starting Driverless AI.Note: Support for UTF-8 encoding in Excel requires the use of a BOM.", + "prompt_type": "human_bot" + }, + { + "instruction": "Which version of Longhorn is supported by Driverless AI?", + "output": "Driverless AI supports Longhorn v1.1.0 or later.", + "prompt_type": "human_bot" + }, + { + "instruction": "Is it possible to download a transformed test dataset in Driverless AI?", + "output": "Yes, a transformed test dataset can be downloaded in Driverless AI. To do this, click Model Actions > Transform Dataset on the completed experiment page, then specify both a train and a test dataset to use for the transformation. The transformed test dataset is made available for download once this process is completed.Connectors", + "prompt_type": "human_bot" + }, + { + "instruction": "Why can't I import a folder as a file when using a data connector on Windows?", + "output": "If you try to use the Import Folder as File option via a data connector on Windows, the import will fail if the folder contains files that do not have file extensions. For example, if a folder contains the files file1.csv, file2.csv, file3.csv, and _SUCCESS, the function will fail due to the presence of the _SUCCESS file.Note that this only occurs if the data is sourced from a volume that is mounted from the Windows filesystem onto the Docker container via -v /path/to/windows/filesystem:/path/in/docker/container flags. This error occurs because of the difference in how files without file extensions are treated in Windows and in the Docker container (CentOS Linux).", + "prompt_type": "human_bot" + }, + { + "instruction": "I get a ClassNotFoundException error when I try to select a JDBC connection. How can I fix that?", + "output": "The folder storing the JDBC jar file must be visible/readable by the dai process user.If you downloaded the JDBC jar file from Oracle, they may provide you with a tar.gz file that you can unpackage with the following command:Cannot analyze code. Pygments package not found... code:: bash\n\n tar --no-same-permissions --no-same-owner -xzvf .gz\nAlternatively you can ensure that the permissions on the file are correct in general by running the following:Cannot analyze code. Pygments package not found... code:: bash\n\n chmod -R o+rx /path/to/folder_containing_jar_file\nFinally, if you just want to check the permissions use the command ls -altr and check the final 3 values in the permissions output.", + "prompt_type": "human_bot" + }, + { + "instruction": "I get a org.datanucleus.exceptions.NucleusUserException: Please check your CLASSPATH and plugin specification error when attempting to connect to Hive. How can I fix that?", + "output": "Make sure hive-site.xml is configured in /etc/hive/conf and not in /etc/hadoop/conf.", + "prompt_type": "human_bot" + }, + { + "instruction": "I get a \"Permission Denied\" error during Hive import. How do I fix this?", + "output": "If you see the following error, your Driverless AI instance may not be able to create a temporary Hive folder due to file system permissions restrictions.Cannot analyze code. Pygments package not found... code:: bash\n\n ERROR HiveAgent: Error during execution of query: java.lang.RuntimeException: java.lang.RuntimeException: java.io.IOException: Permission denied;\n org.apache.spark.sql.AnalysisException: java.lang.RuntimeException: java.lang.RuntimeException: java.io.IOException: Permission denied;\nTo fix this error, add the following name-value pair to your hive-site.xml file to specify the location that is accessible to Driverless AI (that is, your Driverless AI /tmp directory).Cannot analyze code. Pygments package not found... code:: bash\n\n \n hive.exec.local.scratchdir\n /path/to/dai/tmp\n \nRecipes", + "prompt_type": "human_bot" + }, + { + "instruction": "Where can I retrieve H2O's custom recipes?", + "output": "H2O's custom recipes can be obtained from the official :recipes-repo:`Recipes for Driverless AI repository `.No role entry for \"recipes-repo\" in module \"docutils.parsers.rst.languages.en\".\nTrying \"recipes-repo\" as canonical role name.Unknown interpreted text role \"recipes-repo\".", + "prompt_type": "human_bot" + }, + { + "instruction": "How can I create my own custom recipe?", + "output": "Refer to the :recipes-writing:`How to Write a Recipe ` guide for details on how to create your own custom recipe.No role entry for \"recipes-writing\" in module \"docutils.parsers.rst.languages.en\".\nTrying \"recipes-writing\" as canonical role name.Unknown interpreted text role \"recipes-writing\".", + "prompt_type": "human_bot" + }, + { + "instruction": "Are MOJOs supported for experiments that use custom recipes?", + "output": "In most cases, MOJOs will not be available for custom recipes. Unless the recipe is simple, creating the MOJO is only possible with additional MOJO runtime support. Contact support@h2o.ai for more information about creating MOJOs for custom recipes. (Note: The Python Scoring Pipeline features full support for custom recipes.)", + "prompt_type": "human_bot" + }, + { + "instruction": "How can I use BYOR in my airgapped installation?", + "output": "If your Driverless AI environment cannot access Internet and, thus, cannot access Driverless AI's \"Bring Your Own Recipes\" from GitHub, please contact H2O support. We can work with you directly to help you access recipes.", + "prompt_type": "human_bot" + }, + { + "instruction": "When enabling recipes in Driverless AI, can I install Python packages from my organization's internal Python package index?", + "output": "Yes\u2014you can use the pip_install_options :ref:`TOML option ` to specify your organization's internal Python package index as follows:No role entry for \"ref\" in module \"docutils.parsers.rst.languages.en\".\nTrying \"ref\" as canonical role name.Unknown interpreted text role \"ref\".pip_install_options=\"['--extra-index-url', 'http://my-own-repo:port']\"For more information on the --extra-index-url pip install option, refer to the official pip documentation.Experiments", + "prompt_type": "human_bot" + }, + { + "instruction": "How much memory does Driverless AI require in order to run experiments?", + "output": "Right now, Driverless AI requires approximately 10x the size of the data in system memory.", + "prompt_type": "human_bot" + }, + { + "instruction": "How many columns can Driverless AI handle?", + "output": "Driverless AI has been tested on datasets with 10k columns. When running experiments on wide data, Driverless AI automatically checks if it is running out of memory, and if it is, it reduces the number of features until it can fit in memory. This may lead to a worse model, but Driverless AI shouldn't crash because the data is wide.", + "prompt_type": "human_bot" + }, + { + "instruction": "How should I use Driverless AI if I have large data?", + "output": "Driverless AI can handle large datasets out of the box. For very large datasets (more than 10 billion rows x columns), we recommend sampling your data for Driverless AI. Keep in mind that the goal of driverless AI is to go through many features and models to find the best modeling pipeline, and not to just train a few models on the raw data (H2O-3 is ideally suited for that case).For large datasets, the recommended steps are:Run with the recommended accuracy/time/interpretability settings first, especially accuracy <= 7Gradually increase accuracy settings to 7 and choose accuracy 9 or 10 only after observing runs with <= 7.", + "prompt_type": "human_bot" + }, + { + "instruction": "How does Driverless AI detect the ID column?", + "output": "The ID column logic is one of the following:The column is named 'id', 'Id', 'ID' or 'iD' exactlyThe column contains a significant number of unique values (above max_relative_cardinality in the config.toml file or Max. allowed fraction of uniques for integer and categorical cols in Expert settings)", + "prompt_type": "human_bot" + }, + { + "instruction": "Can Driverless AI handle data with missing values/nulls?", + "output": "Yes, data that is imported into Driverless AI can include missing values. Feature engineering is fully aware of missing values, and missing values are treated as information - either as a special categorical level or as a special number. So for target encoding, for example, rows with a certain missing feature will belong to the same group. For Categorical Encoding where aggregations of a numeric columns are calculated for a grouped categorical column, missing values are kept. The formula for calculating the mean is the sum of non-missing values divided by the count of all non-missing values. For clustering, we impute missing values. And for frequency encoding, we count the number of rows that have a certain missing feature.The imputation strategy is as follows:XGBoost/LightGBM do not need missing value imputation and may, in fact, perform worse with any specific other strategy unless the user has a strong understanding of the data.Driverless AI automatically imputes missing values using the mean for GLM.Driverless AI provides an imputation setting for TensorFlow in the config.toml file: tf_nan_impute_value post-normalization. If you set this option to 0, then missing values will be imputed. Setting it to (for example) +5 will specify 5 standard deviations outside the distribution. The default for TensorFlow is -5, which specifies that TensorFlow will treat NAs like a missing value. We recommend that you specify 0 if the mean is better.More information is available in the Missing and Unseen Values Handling section.", + "prompt_type": "human_bot" + }, + { + "instruction": "How does Driverless AI deal with categorical variables? What if an integer column should really be treated as categorical?", + "output": "If a column has string values, then Driverless AI will treat it as a categorical feature. There are multiple methods for how Driverless AI converts the categorical variables to numeric. These include:One Hot Encoding: creating dummy variables for each valueFrequency Encoding: replace category with how frequently it is seen in the dataTarget Encoding: replace category with the average target value (additional steps included to prevent overfitting)Weight of Evidence: calculate weight of evidence for each category (http://ucanalytics.com/blogs/information-value-and-weight-of-evidencebanking-case/)Driverless AI will try multiple methods for representing the column and determine which representation(s) are best.If the column has integers, Driverless AI will try treating the column as a categorical column and numeric column. It will treat any integer column as both categorical and numeric if the number of unique values is less than 50.This is configurable in the config.toml file:Cannot analyze code. Pygments package not found... code:: bash\n\n # Whether to treat some numerical features as categorical\n # For instance, sometimes an integer column may not represent a numerical feature but\n # represents different numerical codes instead.\n num_as_cat = true\n\n # Max number of unique values for integer/real columns to be treated as categoricals (test applies to first statistical_threshold_data_size_small rows only)\n max_int_as_cat_uniques = 50\n(Note: Driverless AI will also check if the distribution of any numeric column differs significantly from the distribution of typical numerical data using Benford's Law. If the column distribution does not obey Benford's Law, we will also try to treat it as categorical even if there are more than 50 unique values.)", + "prompt_type": "human_bot" + }, + { + "instruction": "How are outliers handled?", + "output": "Outliers are not removed from the data. Instead Driverless AI finds the best way to represent data with outliers. For example, Driverless AI may find that binning a variable with outliers improves performance.For target columns, Driverless AI first determines the best representation of the column. It may find that for a target column with outliers, it is best to predict the log of the column.", + "prompt_type": "human_bot" + }, + { + "instruction": "If I drop several columns from the Train dataset, will Driverless AI understand that it needs to drop the same columns from the Test dataset?", + "output": "If you drop columns from the training dataset, Driverless AI will do the same for the validation and test datasets (if the columns are present). There is no need for these columns because no features will be created from them.", + "prompt_type": "human_bot" + }, + { + "instruction": "Does Driverless AI treat numeric variables as categorical variables?", + "output": "In certain cases, yes. You can prevent this behavior by setting the num_as_cat variable in your installation's config.toml file to false. You can have finer grain control over this behavior by excluding the Numeric to Categorical Target Encoding Transformer and the Numeric To Categorical Weight of Evidence Transformer and their corresponding genes in your installation's config.toml file. To learn more about the config.toml file, see the :ref:`config_file` section.No role entry for \"ref\" in module \"docutils.parsers.rst.languages.en\".\nTrying \"ref\" as canonical role name.Unknown interpreted text role \"ref\".", + "prompt_type": "human_bot" + }, + { + "instruction": "Which algorithms are used in Driverless AI?", + "output": "Features are engineered with a proprietary stack of Kaggle-winning statistical approaches including some of the most sophisticated target encoding and likelihood estimates based on groupings, aggregations and joins, but we also employ linear models, neural nets, clustering and dimensionality reduction models and many traditional approaches such as one-hot encoding etc.On top of the engineered features, sophisticated models are fitted, including, but not limited to: XGBoost (both original XGBoost and 'lossguide' (LightGBM) mode), Decision Trees, GLM, TensorFlow (including a TensorFlow NLP recipe based on CNN Deeplearning models), RuleFit, FTRL (Follow the Regularized Leader), Isolation Forest, and Constant Models. (Refer to :ref:`supported_algorithms` for more information.) And additional algorithms can be added via :ref:`Recipes `.No role entry for \"ref\" in module \"docutils.parsers.rst.languages.en\".\nTrying \"ref\" as canonical role name.Unknown interpreted text role \"ref\".No role entry for \"ref\" in module \"docutils.parsers.rst.languages.en\".\nTrying \"ref\" as canonical role name.Unknown interpreted text role \"ref\".In general, GBMs are the best single-shot algorithms. Since 2006, boosting methods have proven to be the most accurate for noisy predictive modeling tasks outside of pattern recognition in images and sound (https://www.cs.cornell.edu/~caruana/ctp/ct.papers/caruana.icml06.pdf). The advent of XGBoost and Kaggle only cemented this position.", + "prompt_type": "human_bot" + }, + { + "instruction": "Why do my selected algorithms not show up in the Experiment Preview?", + "output": "When changing the algorithms used via Expert Settings > Model and Expert Settings > Recipes, you may notice in the Experiment Preview that those changes are not applied. Driverless AI determines whether to include models and/or recipes based on a hierarchy of those expert settings as well as data types (numeric, categorical, text, image, etc.) and system properties (GPUs, multiple GPUs, etc.).Setting an Algorithm to \"OFF\" in Expert Settings: If an algorithm is turned OFF in Expert Settings (for example, GLM Models) when running, then that algorithm will not be included in the experiment.Algorithms Not Included from Recipes (BYOR): If an algorithm from a custom recipe is not selected for the experiment in the Include specific models option, then that algorithm will not be included in the experiment, regardless of whether that same algorithm is set to AUTO or ON on the Expert Settings > Model page.Algorithms Not Specified as \"OFF\" and Included from Recipes: If a Driverless AI algorithm is specified as either \"AUTO\" or \"ON\" and additional models are selected for the experiment in the Include specific models option, than those algorithms may or may not be included in the experiment. Driverless AI will determine the algorithms to use based on the data and experiment type.To show warnings in the preview for which models were not used, set show_inapplicable_models_preview = true in config.toml", + "prompt_type": "human_bot" + }, + { + "instruction": "Why do my selected transformers not show up in the Experiment Preview?", + "output": "When changing the transformers used via Expert Settings > Transformers and Expert Settings > Recipes, you may notice in the Experiment Preview that those changes are not applied. Driverless AI determines whether to include transformers can be used based upon data types (numeric, categorical, text, image, etc.) and system properties (GPUs, multiple GPUs, etc.).Transformers Not Included from Recipes (BYOR): If a transformer from a custom recipe is not selected for the experiment in the Include specific transformers option, then that transformer will not be included in the experiment.To show warnings in the preview for which models were not used, set show_inapplicable_transformers_preview = true in config.toml", + "prompt_type": "human_bot" + }, + { + "instruction": "How can we turn on TensorFlow Neural Networks so they are evaluated?", + "output": "Neural networks are considered by Driverless AI, although they may not be evaluated by default. To ensure that neural networks are tried, you can turn on TensorFlow in the Expert Settings:Once you have set TensorFlow to ON. You should see the Experiment Preview on the left hand side change and mention that it will evaluate TensorFlow models:We recommend using TensorFlow neural networks if you have a multinomial use case with more than 5 unique values.", + "prompt_type": "human_bot" + }, + { + "instruction": "Does Driverless AI standardize the data?", + "output": "Driverless AI will automatically do variable standardization for certain algorithms. For example, with Linear Models and Neural Networks, the data is automatically standardized. For decision tree algorithms, however, we do not perform standardization because these algorithms do not benefit from standardization.", + "prompt_type": "human_bot" + }, + { + "instruction": "What objective function is used in XGBoost?", + "output": "The objective function used in XGBoost is:reg:squarederror and a custom absolute error objective function for regressionbinary:logistic or multi:softprob for classificationThe objective function does not change depending on the scorer chosen. The scorer influences parameter tuning only. For regression, Tweedie, Gamma, and Poisson regression objectives are supported.More information on the XGBoost instantiations can be found in the logs and in the model summary, both of which can be downloaded from the GUI or found in the /tmp/h2oai_experiment_/ folder on the server.", + "prompt_type": "human_bot" + }, + { + "instruction": "Does Driverless AI perform internal or external validation?", + "output": "Driverless AI does internal validation when only training data is provided. It does external validation when training and validation data are provided. In either scenario, the validation data is used for all parameter tuning (models and features), not just for feature selection. Parameter tuning includes target transformation, model selection, feature engineering, feature selection, stacking, etc.Specifically:Internal validation (only training data given):Ideal when data is either close to i.i.d., or for time-series problemsInternal holdouts are used for parameter tuning, with temporal causality for time-series problemsWill do the full spectrum from single holdout split to 5-fold CV, depending on accuracy settingsNo need to split training data manuallyFinal models are trained using CV on the training dataExternal validation (training + validation data given):Ideal when there\u2019s some amount of drift in the data, and the validation set mimics the test set data better than the training dataNo training data wasted during training because training data not used for parameter tuningValidation data is used only for parameter tuning, and is not part of training dataNo CV possible because we explicitly do not want to overfit on the training dataNot allowed for time-series problems (see Time Series FAQ section that follows)Tip: If you want both training and validation data to be used for parameter tuning (the training process), just concatenate the datasets together and turn them both into training data for the \u201cinternal validation\u201d method.", + "prompt_type": "human_bot" + }, + { + "instruction": "How does Driverless AI prevent overfitting?", + "output": "Driverless AI performs a number of checks to prevent overfitting. For example, during certain transformations, Driverless AI calculates the average on out-of-fold data using cross validation. Driverless AI also performs early stopping for every model built, ensuring that the model build will stop when it ceases to improve on holdout data. And additional steps to prevent overfitting include checking for i.i.d. and avoiding leakage during feature engineering.A blog post describing Driverless AI overfitting protection in greater detail is available here: https://www.h2o.ai/blog/driverless-ai-prevents-overfitting-leakage/.More aggressive overfit protection can be enabled by setting lock_ga_to_final_trees=true to true or using recipe='more_overfit_protection' and fixed_only_first_fold_model='true' and for time-series experiments allow_stabilize_varimp_for_ts=true.", + "prompt_type": "human_bot" + }, + { + "instruction": "How does Driverless AI avoid the multiple hypothesis (MH) problem?", + "output": "Driverless AI uses a variant of the reusable holdout technique to address the multiple hypothesis problem. Refer to https://pdfs.semanticscholar.org/25fe/96591144f4af3d8f8f79c95b37f415e5bb75.pdf for more information.", + "prompt_type": "human_bot" + }, + { + "instruction": "How does Driverless AI suggest the experiment settings?", + "output": "When you run an experiment on a dataset, the experiment settings (Accuracy, Time, and Interpretability) are automatically suggested by Driverless AI. For example, Driverless AI may suggest the parameters Accuracy = 7, Time = 3, Interpretability = 6, based on your data.Driverless AI will automatically suggest experiment settings based on the number of columns and number of rows in your dataset. The settings are suggested to ensure best handling when the data is small. If the data is small, Driverless AI will suggest the settings that prevent overfitting and ensure the full dataset is utilized.If the number of rows and number of columns are each below a certain threshold, then:Accuracy will be increased up to 8.The accuracy is increased so that cross validation is done. (We don't want to \"throw away\" any data for internal validation purposes.)Interpretability will be increased up to 8.The higher the interpretability setting, the smaller the number of features in the final model.More complex features are not allowed.This prevents overfitting.Time will be decreased down to 2.There will be fewer feature engineering iterations to prevent overfitting.", + "prompt_type": "human_bot" + }, + { + "instruction": "What happens when I set Interpretability and Accuracy to the same number?", + "output": "The answer is currently that interpretability controls which features are created and what features are kept. (Also above interpretability = 6, monotonicity constraints are used in XGBoost GBM, XGBoost Dart, LightGBM, and Decision Tree models.) The accuracy refers to how hard Driverless AI then tries to make those features into the most accurate model", + "prompt_type": "human_bot" + }, + { + "instruction": "Can I specify the number of GPUs to use when running Driverless AI?", + "output": "When running an experiment, the Expert Settings let you specify the starting GPU ID for Driverless AI to use. You can also specify the maximum number of GPUs to use per model and per experiment. Refer to the :ref:`expert-settings` section for more information.No role entry for \"ref\" in module \"docutils.parsers.rst.languages.en\".\nTrying \"ref\" as canonical role name.Unknown interpreted text role \"ref\".", + "prompt_type": "human_bot" + }, + { + "instruction": "How can I create the simplest model in Driverless AI?", + "output": "To create the simplest model in Driverless AI, set the following Experiment Settings:Set Accuracy to 1. Note that this can hurt performance as a sample will be used. If necessary, adjust the knob until the preview shows no sampling.Set Time to 1.Set Interpretability to 10.Next, configure the following Expert Settings:Turn OFF all algorithms except GLM.Set GLM models to ON.Set Ensemble level to 0.Set Select target transformation of the target for regression problems to Identity.Disable Data distribution shift detection.Disable Target Encoding.Alternatively, you can set Pipeline Building Recipe to Compliant. Compliant automatically configures the following experiment and expert settings:interpretability=10 (To avoid complexity. This overrides GUI or Python client settings for Interpretability.)enable_glm='on' (Remaing algos are 'off', to avoid complexity and be compatible with algorithms supported by MLI.)num_as_cat=true: Treat some numerical features as categorical. For instance, sometimes an integer column may not represent a numerical feature but represent different numerical codes instead.fixed_ensemble_level=0: Don't use any ensemble (to avoid complexity).feature_brain_level=0: No feature brain used (to ensure every restart is identical).max_feature_interaction_depth=1: Interaction depth is set to 1 (no multi-feature interactions to avoid complexity).target_transformer=\"identity\": For regression (to avoid complexity).check_distribution_shift=\"off\": Don't use distribution shift between train, valid, and test to drop features (bit risky without fine-tuning).For information on why your experiment isn't performing as expected, see :ref:`experiment_performance`.No role entry for \"ref\" in module \"docutils.parsers.rst.languages.en\".\nTrying \"ref\" as canonical role name.Unknown interpreted text role \"ref\".", + "prompt_type": "human_bot" + }, + { + "instruction": "When I run multiple experiments with different seeds, why do I see different scores, runtimes, and sizes on disk in the Experiments listing page?", + "output": "When running multiple experiments with all of the same settings except the seed, understand that a feature brain level > 0 can lead to variations in models, features, timing, and sizes on disk. (The default value is 2.) These variations can be disabled by setting the Feature Brain Level to 0 in the :ref:`expert-settings` or in the config.toml file.No role entry for \"ref\" in module \"docutils.parsers.rst.languages.en\".\nTrying \"ref\" as canonical role name.Unknown interpreted text role \"ref\".In addition, if you use a different seed for each experiment, then each experiment can be different due to the randomness in the genetic algorithm that searches for the best features and model parameters. Only if Reproducible is set with the same seed and with a feature brain level of 0 should users expect the same outcome. Once a different seed is set, the models, features, timing, and sizes on disk can all vary within the constraints set by the choices made for the experiment. (I.e., accuracy, time, interpretability, expert settings, etc., all constrain the outcome, and then a different seed can change things within those constraints.)", + "prompt_type": "human_bot" + }, + { + "instruction": "Why does the final model performance appear to be worse than previous iterations?", + "output": "There are a few things to remember:Driverless AI creates a best effort estimate of the generalization performance of the best modeling pipeline found so far.The performance estimation is always based on holdout data (data unseen by the model).If no validation dataset is provided, the training data is split internally to create internal validation holdout data (once or multiple times or cross-validation, depending on the accuracy settings).If no validation dataset is provided, for accuracy <= 7, a single holdout split is used, and a \"lucky\" or \"unlucky\" split can bias estimates for small datasets or datasets with high variance.If a validation dataset is provided, then all performance estimates are solely based on the entire validation dataset (independent of accuracy settings).All scores reported are based on bootstrapped-based statistical methods and come with error bars that represent a range of estimate uncertainty.After the final iteration, a best final model is trained on a final set of engineered features. Depending on accuracy settings, a more accurate estimation of generalization performance may be done using cross-validation. Also, the final model may be a stacked ensemble consisting of multiple base models, which generally leads to better performance. Consequently, in rare cases, the difference in performance estimation method can lead to the final model's estimated performance seeming poorer than those from previous iterations. (i.e., The final model's estimated score is significantly worse than the last iteration score and error bars don't overlap.) In that case, it is very likely that the final model performance estimation is more accurate, and the prior estimates were biased due to a \"lucky\" split. To confirm this, you can re-run the experiment multiple times (without setting the reproducible flag).If you would like to minimize the likelihood of the final model performance appearing worse than previous iterations, here are some recommendations:Increase accuracy settingsProvide a validation datasetProvide more data", + "prompt_type": "human_bot" + }, + { + "instruction": "How can I find features that may be causing data leakages in my Driverless AI model?", + "output": "To find original features that are causing leakage, have a look at features_orig.txt in the experiment summary download. Features causing leakage will have high importance there. To get a hint at derived features that might be causing leakage, create a new experiment with dials set to 2/2/8, and run the new experiment on your data with all your features and response. Then analyze the top 1-2 features in the model variable importance. They are likely the main contributors to data leakage if it is occurring.", + "prompt_type": "human_bot" + }, + { + "instruction": "How can I see the performance metrics on the test data?", + "output": "As long as you provide a target column in the test set, Driverless AI will show the best estimate of the final model's performance on the test set at the end of the experiment. The test set is never used to tune parameters (unlike to what Kagglers often do), so this is purely a convenience. Of course, you can still make test set predictions and compute your own metrics using a method of your choice.", + "prompt_type": "human_bot" + }, + { + "instruction": "How can I see all the performance metrics possible for my experiment?", + "output": "At the end of the experiment, the model's estimated performance on all provided datasets with a target column is printed in the experiment logs. For example, for the test set:Cannot analyze code. Pygments package not found... code:: bash\n\n Final scores on test (external holdout) +/- stddev:\n GINI = 0.87794 +/- 0.035305 (more is better)\n MCC = 0.71124 +/- 0.043232 (more is better)\n F05 = 0.79175 +/- 0.04209 (more is better)\n F1 = 0.75823 +/- 0.038675 (more is better)\n F2 = 0.82752 +/- 0.03604 (more is better)\n ACCURACY = 0.91513 +/- 0.011975 (more is better)\n LOGLOSS = 0.28429 +/- 0.016682 (less is better)\n AUCPR = 0.79074 +/- 0.046223 (more is better)\n optimized: AUC = 0.93386 +/- 0.018856 (more is better)\n", + "prompt_type": "human_bot" + }, + { + "instruction": "What if my training/validation and testing data sets come from different distributions?", + "output": "In general, Driverless AI uses training data to engineer features and train models and validation data to tune all parameters. If no external validation data is given, the training data is used to create internal holdouts. The way holdouts are created internally depends on whether there is a strong time dependence, see the point below. If the data has no obvious time dependency (e.g., if there is no time column neither implicit or explicit), or if the data can be sorted arbitrarily and it won't affect the outcome (e.g., Iris data, predicting flower species from measurements), and if the test dataset is different (e.g., new flowers or only large flowers), then the model performance on validation (either internal or external) as measured during training won't be achieved during final testing due to the obvious inability of the model to generalize.", + "prompt_type": "human_bot" + }, + { + "instruction": "Does Driverless AI handle weighted data?", + "output": "Yes. You can optionally provide an extra weight column in your training (and validation) data with non-negative observation weights. This can be useful to implement domain-specific effects such as exponential weighting in time or class weights. All of our algorithms and metrics in Driverless AI support observation weights, but note that estimated likelihoods can be skewed as a consequence.", + "prompt_type": "human_bot" + }, + { + "instruction": "How does Driverless AI handle fold assignments for weighted data?", + "output": "Currently, Driverless AI does not take the weights into account during fold creation, but you can provide a fold column to enforce your own grouping, i.e., to keep rows that belong to the same group together (either in train or valid). The fold column has to be a categorical column (integers ok) that assigns a group ID to each row. (It needs to have at least 5 groups because we do up to 5-fold CV.)", + "prompt_type": "human_bot" + }, + { + "instruction": "Why do I see that adding new features to a dataset deteriorates the performance of the model?", + "output": "You may notice that after adding one or more new features to a dataset, it deteriorates the performance of the Driverless AI model. In Driverless AI, the feature engineering sequence is fairly random and may end up not doing same things with original features if you restart entirely fresh with new columns.Beginning in Driverless AI v1.4.0, you now have the option to Restart from Last Checkpoint. This lets you pull in a new dataset with more columns, and Driverless AI will more iteratively take advantage of the new columns.", + "prompt_type": "human_bot" + }, + { + "instruction": "How does Driverless AI handle imbalanced data for binary classification experiments?", + "output": "If you have data that is imbalanced, a binary imbalanced model can help to improve scoring with a variety of imbalanced sampling methods. An imbalanced model is able to take advantage of most (or even all) of the imbalanced dataset's positive values during sampling, while a regular model significantly limits the population of positive values. Imbalanced models, however, take more time to make predictions, and they are not always more accurate than regular models. We still recommend that you try using an imbalanced model if your data is imbalanced to see if scoring is improved over a regular model. Note that this information only applies to binary models.", + "prompt_type": "human_bot" + }, + { + "instruction": "How is feature importance calculated in Driverless AI?", + "output": "For most models, such as XGBoost or LightGBM models, Driverless AI uses normalized information gain to calculate feature importance. Other estimates of importance are sometimes used for certain models.", + "prompt_type": "human_bot" + }, + { + "instruction": "I want to have only one LightGBM model in the final pipeline. How can I do this?", + "output": "You can do this by using :ref:`ensemble-levels`. To change the ensemble level, use the Ensemble Level for Final Modeling Pipeline expert setting (fixed_ensemble_level in the config.toml), which is located in the Model tab. If you want a single model, use level 0. If you are okay with using the same model with hyperparameters but trained with multiple cross validation folds, then use level 1.No role entry for \"ref\" in module \"docutils.parsers.rst.languages.en\".\nTrying \"ref\" as canonical role name.Unknown interpreted text role \"ref\".To use only one model type, use the Include Specific Models expert setting, which is located in the Recipes tab.For more information, see :ref:`ensemble-learning-in-dai`.No role entry for \"ref\" in module \"docutils.parsers.rst.languages.en\".\nTrying \"ref\" as canonical role name.Unknown interpreted text role \"ref\".Setting fixed_ensemble_level = 0 returns a single model trained on one hundred percent of the data, not just a single model type with CV.When the Cross-validate Single Final Model expert setting is enabled (default), the single model with fixed_ensemble_level = 0 has the optimal number of trees because it is tuned with CV. Disabling this setting is not recommended when fixed_ensemble_level = 0.\"Ensemble", + "prompt_type": "human_bot" + }, + { + "instruction": "I want to have only one LightGBM model and no FE. How can I do this?", + "output": "You can do this by additionally limiting the set of allowed transformations to just the OriginalTransformer, which leaves numeric features in their original form and drops all non-numeric features. To include or exclude specific transformers in your Driverless AI environment, use the Include Specific Transformers expert setting (included_transformers in the config.toml), which is located in the Recipes tab. You can also set the Feature Engineering Effort expert setting (feature_engineering_effort in the config.toml) to 0 to achieve the same effect.For more information, see :ref:`Transformations`.No role entry for \"ref\" in module \"docutils.parsers.rst.languages.en\".\nTrying \"ref\" as canonical role name.Unknown interpreted text role \"ref\".\"Include", + "prompt_type": "human_bot" + }, + { + "instruction": "What is fast approximation in Driverless AI?", + "output": "Fast approximation is available for both regular and Shapley predictions. It is enabled by default for MLI / AutoDoc and turned off by default for other clients. The extent of approximation can be fully configured or turned off with the fast approximation expert settings. Enabling fast approximation can result in a significant speedup for large prediction tasks like the creation of partial dependence plots and other MLI-related tasks.The following is a list of expert settings that can be used to configure fast approximation.Regular predictions::ref:`fast-approx-trees`No role entry for \"ref\" in module \"docutils.parsers.rst.languages.en\".\nTrying \"ref\" as canonical role name.Unknown interpreted text role \"ref\".:ref:`fast-approx-one-fold`No role entry for \"ref\" in module \"docutils.parsers.rst.languages.en\".\nTrying \"ref\" as canonical role name.Unknown interpreted text role \"ref\".:ref:`fast-approx-one-model`No role entry for \"ref\" in module \"docutils.parsers.rst.languages.en\".\nTrying \"ref\" as canonical role name.Unknown interpreted text role \"ref\".Shapley predictions::ref:`fast-approx-trees-shap`No role entry for \"ref\" in module \"docutils.parsers.rst.languages.en\".\nTrying \"ref\" as canonical role name.Unknown interpreted text role \"ref\".:ref:`fast-approx-one-fold-shap`No role entry for \"ref\" in module \"docutils.parsers.rst.languages.en\".\nTrying \"ref\" as canonical role name.Unknown interpreted text role \"ref\".:ref:`fast-approx-one-model-shap`No role entry for \"ref\" in module \"docutils.parsers.rst.languages.en\".\nTrying \"ref\" as canonical role name.Unknown interpreted text role \"ref\".MLI::ref:`mli_fast_approx `No role entry for \"ref\" in module \"docutils.parsers.rst.languages.en\".\nTrying \"ref\" as canonical role name.Unknown interpreted text role \"ref\".", + "prompt_type": "human_bot" + }, + { + "instruction": "When should fast approximation be turned off?", + "output": "In situations where a more detailed partial dependence plot or interpretation is required, you may want to disable fast approximation.", + "prompt_type": "human_bot" + }, + { + "instruction": "Why does the confusion matrix sometimes show decimals instead of whole numbers?", + "output": "Fractional confusion matrix values most commonly arise as a consequence of the averaging of confusion matrices across cross-validation fold splits or across repeated fold splits, but the same can also happen for non-integer observation weights.", + "prompt_type": "human_bot" + }, + { + "instruction": "Is data sampling for multiclass use cases supported?", + "output": "Data sampling for multiclass use cases is not currently supported. However, it is possible to approximate the data sampling approach by adding more weight in order to penalize rare classes. You can add weight to an individual observation by using a :ref:`weight column ` when setting up your experiment. You can also enable LightGBM multiclass balancing by setting the enable_lightgbm_multiclass_balancing configuration setting to on, which enables automatic class weighting for imbalanced multiclass problems.No role entry for \"ref\" in module \"docutils.parsers.rst.languages.en\".\nTrying \"ref\" as canonical role name.Unknown interpreted text role \"ref\".Feature Transformations", + "prompt_type": "human_bot" + }, + { + "instruction": "Where can I get details of the various transformations performed in an experiment?", + "output": "Download the experiment's log .zip file from the GUI. This zip file includes summary information, log information, and a gene_summary.txt file with details of the transformations used in the experiment. Specifically, there is a details folder with all subprocess logs.On the server, the experiment specific files are inside the /tmp/h2oai_experiment_/ folder after the experiment completes, particularly h2oai_experiment_logs_.zip and h2oai_experiment_summary_.zip.Predictions", + "prompt_type": "human_bot" + }, + { + "instruction": "How can I download the predictions onto the machine where Driverless AI is running?", + "output": "When you select Score on Another Dataset, the predictions will automatically be stored on the machine where Driverless AI is running. They will be saved in the following locations (and can be opened again by Driverless AI, both for .csv and .bin):Training Data Predictions: tmp/h2oai_experiment_/train_preds.csv (also saved as .bin)Testing Data Predictions: tmp/h2oai_experiment_/test_preds.csv (also saved as .bin)New Data Predictions: tmp/h2oai_experiment_/automatically_generated_name.csv. Note that the automatically generated name will match the name of the file downloaded to your local computer.", + "prompt_type": "human_bot" + }, + { + "instruction": "Why are predicted probabilities not available when I run an experiment without ensembling?", + "output": "When Driverless AI provides pre-computed predictions after completing an experiment, it uses only those parts of the modeling pipeline that were not trained on the particular rows for which the predictions are made. This means that Driverless AI needs holdout data in order to create predictions, such as validation or test sets, where the model is trained on training data only. In the case of ensembles, Driverless AI uses cross-validation to generate holdout folds on the training data, so we are able to provide out-of-fold estimates for every row in the training data and, hence, can also provide training holdout predictions (that will provide a good estimate of generalization performance). In the case of a single model, though, that is trained on 100% of the training data. There is no way to create unbiased estimates for any row in the training data. While DAI uses an internal validation dataset, this is a re-usable holdout, and therefore will not contain holdout predictions for the full training dataset. You need cross-validation in order to get out-of-fold estimates, and then that's not a single model anymore. If you want to still get predictions for the training data for a single model, then you have to use the scoring API to create predictions on the training set. From the GUI, this can be done using the Score on Another Dataset button for a completed experiment. Note, though, that the results will likely be overly optimistic, too good to be true, and virtually useless.Deployment", + "prompt_type": "human_bot" + }, + { + "instruction": "What drives the size of a MOJO?", + "output": "The size of the MOJO is based on the complexity of the final modeling pipeline (i.e., feature engineering and models). One of the biggest factors is the amount of higher-order interactions between features, especially target encoding and related features, which have to store lookup tables for all possible combinations observed in the training data. You can reduce the amount of these transformations by reducing the value of Max. feature interaction depth and/or Feature engineering effort under Expert Settings, or by increasing the interpretability settings for the experiment. Ensembles also contribute to the final modeling pipeline's complexity as each model has its own pipeline. Lowering the accuracy settings or setting :ref:`ensemble level ` to a lower number. The number of features Max. pipeline features also affects the MOJO size. Text transformers are pretty bulky as well and can add to the MOJO size.No role entry for \"ref\" in module \"docutils.parsers.rst.languages.en\".\nTrying \"ref\" as canonical role name.Unknown interpreted text role \"ref\".To toggle to a smaller mojo during model building with a single click, see - :ref:`Reduce mojo size ` under experiment settings of an experiment.No role entry for \"ref\" in module \"docutils.parsers.rst.languages.en\".\nTrying \"ref\" as canonical role name.Unknown interpreted text role \"ref\".", + "prompt_type": "human_bot" + }, + { + "instruction": "Are MOJOs thread safe?", + "output": "Yes, all Driverless AI MOJOs are thread safe.", + "prompt_type": "human_bot" + }, + { + "instruction": "Running the scoring pipeline for my MOJO is taking several hours. How can I get this to run faster?", + "output": "When running example.sh, Driverless AI implements a memory setting, which is suitable for most use cases. For very large models, however, it may be necessary to increase the memory limit when running the Java application for data transformation. This can be done using the -Xmx25g parameter. For example:Cannot analyze code. Pygments package not found... code:: bash\n\n java -Xmx25g -Dai.h2o.mojos.runtime.license.file=license.sig -cp mojo2-runtime.jar ai.h2o.mojos.ExecuteMojo pipeline.mojo example.csv\n", + "prompt_type": "human_bot" + }, + { + "instruction": "Why have I encountered a \"Best Score is not finite\" error?", + "output": "Driverless AI uses 32-bit floats by default. You may encounter this error if your data value exceeds 1E38 or if you are resolving more than 1 part in 10 million. You can resolve this error using one of the following methods:Enable the Force 64-bit Precision option in the experiment's Expert Settings.orSet data_precision=\"float64\" and transformer_precision=\"float64\" in config.toml.Time Series", + "prompt_type": "human_bot" + }, + { + "instruction": "What if my data has a time dependency?", + "output": "If you know that your data has a strong time dependency, select a time column before starting the experiment. The time column must be in a Datetime format that can be parsed by pandas, such as \"2017-11-06 14:32:21\", \"Monday, June 18, 2012\" or \"Jun 18 2018 14:34:00\" etc., or contain only integers.If you are unsure about the strength of the time dependency, run two experiments: One with time column set to \"[OFF]\" and one with time column set to \"[AUTO]\" (or pick a time column yourself).", + "prompt_type": "human_bot" + }, + { + "instruction": "What is a lag, and why does it help?", + "output": "A lag is a feature value from a previous point in time. Lags are useful to take advantage of the fact that the current (unknown) target value is often correlated with previous (known) target values. Hence, they can better capture target patterns along the time axis.Why can't I specify a validation data set for time-series problems? Why do you look at the test set for time-series problemsThe problem with validation vs test in the time series setting is that there is only one valid way to define the split. If a test set is given, its length in time defines the validation split and the validation data has to be part of train. Otherwise the time-series validation won't be useful.For instance: Let's assume we have train = [1,2,3,4,5,6,7,8,9,10] and test = [12,13], where integers define time periods (e.g., weeks). For this example, the most natural train/valid split that mimics the test scenario would be: train = [1,2,3,4,5,6,7] and valid = [9,10], and month 8 is not included in the training set to allow for a gap. Note that we will look at the start time and the duration of the test set only (if provided), and not at the contents of the test data (neither features nor target). If the user provides validation = [8,9,10] instead of test data, then this could lead to inferior validation strategy and worse generalization. Hence, we use the user-given test set only to create the optimal internal train/validation splits. If no test set is provided, the user can provide the length of the test set (in periods), the length of the train/test gap (in periods) and the length of the period itself (in seconds).", + "prompt_type": "human_bot" + }, + { + "instruction": "Why does the gap between train and test matter? Is it because of creating the lag features on the test set?", + "output": "Taking the gap into account is necessary in order to avoid too optimistic estimates of the true error and to avoid creating history-based features like lags for the training and validation data (which cannot be created for the test data due to the missing information).", + "prompt_type": "human_bot" + }, + { + "instruction": "In regards to applying the target lags to different subsets of the time group columns, are you saying Driverless AI perform auto-correlation at \"levels\" of the time series? For example, consider the Walmart dataset where I have Store and Dept (and my target is Weekly Sales). Are you saying that Driverless AI checks for auto-correlation in Weekly Sales based on just Store, just Dept, and both Store and Dept?", + "output": "Currently, auto-correlation is only applied on the detected superkey (entire TGC) of the training dataset relation at the very beginning. It's used to rank potential lag-sizes, with the goal to prune the search space for the GA optimization process, which is responsible for selecting the lag features.", + "prompt_type": "human_bot" + }, + { + "instruction": "How does Driverless AI detect the time period?", + "output": "Driverless AI treats each time series as a function with some frequency 1/ns. The actual value is estimated by the median of time deltas across maximal length TGC subgroups. The chosen SI unit minimizes the distance to all available SI units.", + "prompt_type": "human_bot" + }, + { + "instruction": "What is the logic behind the selectable numbers for forecast horizon length?", + "output": "The shown forecast horizon options are based on quantiles of valid splits. This is necessary because Driverless AI cannot display all possible options in general.", + "prompt_type": "human_bot" + }, + { + "instruction": "Assume that in my Walmart dataset, all stores provided data at the week level, but one store provided data at the day level. What would Driverless AI do?", + "output": "Driverless AI would still assume \"weekly data\" in this case because the majority of stores are yielding this property. The \"daily\" store would be resampled to the detected overall frequency.", + "prompt_type": "human_bot" + }, + { + "instruction": "Assume that in my Walmart dataset, all stores and departments provided data at the weekly level, but one department in a specific store provided weekly sales on a bi-weekly basis (every two weeks). What would Driverless AI do?", + "output": "That's similar to having missing data. Due to proper resampling, Driverless AI can handle this without any issues.", + "prompt_type": "human_bot" + }, + { + "instruction": "Why does the number of weeks that you want to start predicting matter?", + "output": "That's an option to provide a train-test gap if there is no test data is available. That is to say, \"I don't have my test data yet, but I know it will have a gap to train of x.\"", + "prompt_type": "human_bot" + }, + { + "instruction": "Are the scoring components of time series sensitive to the order in which new pieces of data arrive? I.e., is each row independent at scoring time, or is there a real-time windowing effect in the scoring pieces?", + "output": "Each row is independent at scoring time.", + "prompt_type": "human_bot" + }, + { + "instruction": "What happens if the user, at predict time, gives a row with a time value that is too small or too large?", + "output": "Internally, \"out-of bounds\" time values are encoded with special values. The samples will still be scored, but the predictions won't be trustworthy.", + "prompt_type": "human_bot" + }, + { + "instruction": "What's the minimum data size for a time series recipe?", + "output": "We recommended that you have around 10,000 validation samples in order to get a reliable estimate of the true error. The time series recipe can still be applied for smaller data, but the validation error might be inaccurate.", + "prompt_type": "human_bot" + }, + { + "instruction": "How long must the training data be compared to the test data?", + "output": "At a minimum, the training data has to be at least twice as long as the test data along the time axis. However, we recommended that the training data is at least three times as long as the test data.", + "prompt_type": "human_bot" + }, + { + "instruction": "How does the time series recipe deal with missing values?", + "output": "Missing values will be converted to a special value, which is different from any non-missing feature value. Explicit imputation techniques won't be applied.", + "prompt_type": "human_bot" + }, + { + "instruction": "Can the time information be distributed across multiple columns in the input data (such as [year, day, month]?", + "output": "Currently Driverless AI requires the data to have the time stamps given in a single column. Driverless AI will create additional time features like [year, day, month] on its own, if they turn out to be useful.", + "prompt_type": "human_bot" + }, + { + "instruction": "What type of modeling approach does Driverless AI use for time series?", + "output": "Driverless AI combines the creation of history-based features like lags, moving averages etc. with the modeling techniques, which are also applied for i.i.d. data. The primary model of choice is XGBoost.", + "prompt_type": "human_bot" + }, + { + "instruction": "What's the idea behind exponential weighting of moving averages?", + "output": "Exponential weighting accounts for the possibility that more recent observations are better suited to explain the present than older observations.Logging", + "prompt_type": "human_bot" + }, + { + "instruction": "How can I reduce the size of the Audit Logger?", + "output": "An Audit Logger file is created every day that Driverless AI is in use. The audit_log_retention_period config variable lets you specify the number of days, after which the audit.log will be overwritten. This option defaults to 5 days, which means that Driverless AI will maintain Audit Logger files for the last 5 days, and audit.log files older than 5 days are removed and replaced with newer log files. When this option is set to 0, the audit.log file will not be overwritten.", + "prompt_type": "human_bot" + }, + { + "instruction": "How is Driverless AI different than any other black box ML algorithm?", + "output": "Driverless AI uses many techniques (some older and some cutting-edge) for interpreting black box models including creating reason codes for every prediction the system makes. We have also created numerous open source code examples and free publications that explain these techniques. See the list below for links to these resources and for references for the interpretability techniques.Open source interpretability examples:https://github.com/jphall663/interpretable_machine_learning_with_pythonhttps://content.oreilly.com/oriole/Interpretable-machine-learning-with-Python-XGBoost-and-H2Ohttps://github.com/h2oai/mli-resourcesFree Machine Learning Interpretability publications:http://www.oreilly.com/data/free/an-introduction-to-machine-learning-interpretability.csphttp://docs.h2o.ai/driverless-ai/latest-stable/docs/booklets/MLIBooklet.pdfMachine Learning Techniques already in Driverless AI:Tree-based Variable Importance: https://web.stanford.edu/~hastie/ElemStatLearn/printings/ESLII_print12.pdfPartial Dependence: https://web.stanford.edu/~hastie/ElemStatLearn/printings/ESLII_print12.pdfLIME: http://www.kdd.org/kdd2016/papers/files/rfp0573-ribeiroA.pdfLOCO: http://www.stat.cmu.edu/~ryantibs/papers/conformal.pdfICE: https://arxiv.org/pdf/1309.6392.pdfSurrogate Models:https://papers.nips.cc/paper/1152-extracting-tree-structured-representations-of-trained-networks.pdfhttps://arxiv.org/pdf/1705.08504.pdfShapley Explanations: http://papers.nips.cc/paper/7062-a-unified-approach-to-interpreting-model-predictions", + "prompt_type": "human_bot" + }, + { + "instruction": "How often do new versions come out?", + "output": "The frequency of major new Driverless AI releases has historically been about every two months.Installation/Upgrade/Authentication", + "prompt_type": "human_bot" + }, + { + "instruction": "How can I change my username and password?", + "output": "The username and password are tied to the experiments you have created. For example, if I log in with the username/password: megan/megan and start an experiment, then I would need to log back in with the same username and password to see those experiments. The username and password, however, does not limit your access to Driverless AI. If you want to use a new user name and password, you can log in again with a new username and password, but keep in mind that you won't see your old experiments.", + "prompt_type": "human_bot" + }, + { + "instruction": "Can Driverless AI run on CPU-only machines?", + "output": "Yes, Driverless AI can run on machines with CPUs only, though GPUs are recommended. Installation instructions are available for GPU and CPU systems. Refer to :ref:`before_you_begin` for more information.No role entry for \"ref\" in module \"docutils.parsers.rst.languages.en\".\nTrying \"ref\" as canonical role name.Unknown interpreted text role \"ref\".", + "prompt_type": "human_bot" + }, + { + "instruction": "How can I upgrade to a newer version of Driverless AI?", + "output": "Upgrade instructions vary depending on your environment. Refer to the installation section for your environment. Upgrade instructions are included there.", + "prompt_type": "human_bot" + }, + { + "instruction": "What kind of authentication is supported in Driverless AI?", + "output": "Driverless AI supports Client Certificate, LDAP, Local, mTLS, OpenID, none, and unvalidated (default) authentication. These can be configured by setting the appropriate environment variables in the config.toml file or by specifying the environment variables when starting Driverless AI. Refer to :ref:`dai_auth` for more information.No role entry for \"ref\" in module \"docutils.parsers.rst.languages.en\".\nTrying \"ref\" as canonical role name.Unknown interpreted text role \"ref\".", + "prompt_type": "human_bot" + }, + { + "instruction": "How can I automatically turn on persistence each time the GPU system reboots?", + "output": "For GPU machines, the sudo nvidia-persistenced --user dai command can be run after each reboot to enable persistence. For systems that have systemd, it is possible to automatically enable persistence after each reboot by removing the --no-persistence-mode flag from nvidia-persistenced.service. Before running the steps below, be sure to review the following for more information:https://docs.nvidia.com/deploy/driver-persistence/index.html#persistence-daemonhttps://docs.nvidia.com/deploy/driver-persistence/index.html#installationRun the following to stop the nvidia-persistenced.service:Cannot analyze code. Pygments package not found... code:: bash\n\n sudo systemctl stop nvidia-persistenced.service\nOpen the file /lib/systemd/system/nvidia-persistenced.service. This file includes a line \"ExecStart=/usr/bin/nvidia-persistenced --user nvidia-persistenced --no-persistence-mode --verbose\".Remove the flag --no-persistence-mode from that line so that it reads:Enumerated list start value not ordinal-1: \"2\" (ordinal 2)Cannot analyze code. Pygments package not found... code:: bash\n\n ExecStart=/usr/bin/nvidia-persistenced --user nvidia-persistenced --verbose\nRun the following command to start the nvidia-persistenced.service:Enumerated list start value not ordinal-1: \"4\" (ordinal 4)Cannot analyze code. Pygments package not found... code:: bash\n\n sudo systemctl start nvidia-persistenced.service\n", + "prompt_type": "human_bot" + }, + { + "instruction": "How can I start Driverless AI on a different port than 12345?", + "output": "No directive entry for \"tabs\" in module \"docutils.parsers.rst.languages.en\".\nTrying \"tabs\" as canonical directive name.Unknown directive type \"tabs\"... tabs::\n .. group-tab:: Docker Image Installs\n\n When starting Driverless AI in Docker, the ``-p`` option specifies the port on which Driverless AI will run. Change this option in the start script if you need to run on a port other than 12345. The following example shows how to run on port 22345. (Change ``nvidia-docker run`` to ``docker-run`` if needed.) Keep in mind that `priviliged ports will require root access `__.\n\n .. code-block:: bash\n :substitutions:\n\n nvidia-docker run \\\n --pid=host \\\n --init \\\n --rm \\\n --shm-size=256m \\\n -u `id -u`:`id -g` \\\n -p 22345:12345 \\\n -v `pwd`/data:/data \\\n -v `pwd`/log:/log \\\n -v `pwd`/license:/license \\\n -v `pwd`/tmp:/tmp \\\n h2oai/dai-ubi8-x86_64:|tag|\n\n .. group-tab:: Native Installs\n\n To run on a port other than 12345, update the port value in the **config.toml** file. The following example shows how to run Driverless AI on port 22345. Keep in mind that `priviliged ports will require root access `__.\n\n ::\n\n # Export the Driverless AI config.toml file (or add it to ~/.bashrc)\n export DRIVERLESS_AI_CONFIG_FILE=\u201c/config/config.toml\u201d\n\n # IP address and port for Driverless AI HTTP server.\n ip = \"127.0.0.1\"\n port = 22345\n\n Point to this updated config file when restarting Driverless AI.\n", + "prompt_type": "human_bot" + }, + { + "instruction": "Can I set up TLS/SSL on Driverless AI?", + "output": "Yes, Driverless AI provides configuration options that let you set up HTTPS/TLS/SSL. You will need to have your own SSL certificate, or you can create a self-signed certificate for yourself.To enable HTTPS/TLS/SSL on the Driverless AI server, add the following to the config.toml file:Cannot analyze code. Pygments package not found... code:: bash\n\n enable_https = true\n ssl_key_file = \"/etc/dai/private_key.pem\"\n ssl_crt_file = \"/etc/dai/cert.pem\"\nYou can make a self-signed certificate for testing with the following commands:Cannot analyze code. Pygments package not found... code:: bash\n\n umask 077\n openssl req -x509 -newkey rsa:4096 -keyout private_key.pem -out cert.pem -days 20 -nodes -subj '/O=Driverless AI'\n sudo chown dai:dai cert.pem private_key.pem\n sudo mv cert.pem private_key.pem /etc/dai\nTo configure specific versions of TLS/SSL, enable or disable the following settings in the config.toml file:Cannot analyze code. Pygments package not found... code:: bash\n\n ssl_no_sslv2 = true\n ssl_no_sslv3 = true\n ssl_no_tlsv1 = true\n ssl_no_tlsv1_1 = true\n ssl_no_tlsv1_2 = false\n ssl_no_tlsv1_3 = false\n", + "prompt_type": "human_bot" + }, + { + "instruction": "Can I set up TLS/SSL on Driverless AI in AWS?", + "output": "Yes, you can set up HTTPS/TLS/SSL on Driverless AI running in an AWS environment. HTTPS/TLS/SSL needs to be configured on the host machine, and the necessary ports will need to be opened on the AWS side. You will need to have your own TLS/SSL cert or you can create a self signed cert for yourself.The following is a very simple example showing how to configure HTTPS with a proxy pass to the port on the container 12345 with the keys placed in /etc/nginx/. Replace with your server name.Cannot analyze code. Pygments package not found... code:: bash\n\n server {\n listen 80;\n return 301 https://$host$request_uri;\n }\n\n server {\n listen 443;\n\n # Specify your server name here\n server_name ;\n\n ssl_certificate /etc/nginx/cert.crt;\n ssl_certificate_key /etc/nginx/cert.key;\n ssl on;\n ssl_session_cache builtin:1000 shared:SSL:10m;\n ssl_protocols TLSv1 TLSv1.1 TLSv1.2;\n ssl_ciphers HIGH:!aNULL:!eNULL:!EXPORT:!CAMELLIA:!DES:!MD5:!PSK:!RC4;\n ssl_prefer_server_ciphers on;\n\n access_log /var/log/nginx/dai.access.log;\n\n location / {\n proxy_set_header Host $host;\n proxy_set_header X-Real-IP $remote_addr;\n proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;\n proxy_set_header X-Forwarded-Proto $scheme;\n\n # Fix the \u201cIt appears that your reverse proxy set up is broken\" error.\n proxy_pass http://localhost:12345;\n proxy_read_timeout 90;\n\n # Specify your server name for the redirect\n proxy_redirect http://localhost:12345 https://;\n }\n }\nMore information about SSL for Nginx in Ubuntu 16.04 can be found here: https://www.digitalocean.com/community/tutorials/how-to-create-a-self-signed-ssl-certificate-for-nginx-in-ubuntu-16-04.", + "prompt_type": "human_bot" + }, + { + "instruction": "I received a \"package dai-.x86_64 does not verify: no digest\" error during the installation. How can I fix this?", + "output": "You will recieve a \"package dai-.x86_64 does not verify: no digest\" error when installing the rpm using an RPM version newer than 4.11.3. You can run the following as a workaround, replacing with your DAI version:Cannot analyze code. Pygments package not found... code:: bash\n\n rpm --nodigest -i dai-.x86_64.rpm\n", + "prompt_type": "human_bot" + }, + { + "instruction": "I received a \"Must have exactly one OpenCL platform 'NVIDIA CUDA'\" error. How can I fix that?", + "output": "If you encounter problems with opencl errors at server time, you may see the following message:Cannot analyze code. Pygments package not found... code:: bash\n\n 2018-11-08 14:26:15,341 C: D:452.2GB M:246.0GB 21603 ERROR : Must have exactly one OpenCL platform 'NVIDIA CUDA', but got:\n Platform #0: Clover\n Platform #1: NVIDIA CUDA\n +-- Device #0: GeForce GTX 1080 Ti\n +-- Device #1: GeForce GTX 1080 Ti\n +-- Device #2: GeForce GTX 1080 Ti\n\n Uninstall all but 'NVIDIA CUDA' platform.\nFor Ubuntu, the solution is to run the following:Cannot analyze code. Pygments package not found... code:: bash\n\n sudo apt-get remove mesa-opencl-icd\n", + "prompt_type": "human_bot" + }, + { + "instruction": "Is it possible for multiple users to share a single Driverless AI instance?", + "output": "Driverless AI supports multiple users, and Driverless AI is licensed per a single named user. Therefore, in order, to have different users run experiments simultaneously, they would each need a license. Driverless AI manages the GPU(s) that it is given and ensures that different experiments from different users can run safely simultaneously and don\u2019t interfere with each other. So when two licensed users log in with different credentials, then neither of them will see the other\u2019s experiment. Similarly, if a licensed user logs in using a different set of credentials, then that user will not see any previously run experiments.", + "prompt_type": "human_bot" + }, + { + "instruction": "Can multiple Driverless AI users share a GPU server?", + "output": "Yes, you can allocate multiple users in a single GPU box. For example, a single box with four GPUs can allocate that User1 has two GPUs and User2 has the other two GPUs. This is accomplished by having two separated Driverless AI instances running on the same server.There are two ways to assign specific GPUs to Driverless AI. And in the scenario with four GPUs (two GPUs allocated to two users), both of these options allow each Docker container only to see two GPUs.Use the CUDA_VISIBLE_DEVICES environment variable. In the case of Docker deployment, this will translate in passing the -e CUDA_VISIBLE_DEVICES=\"0,1\" to the nvidia-docker run command.Passing the NV_GPU option at the beginning of the nvidia-docker run command. (See example below.)Error in \"code-block\" directive:\nunknown option: \"substitutions\"... code-block:: bash\n :substitutions:\n\n #Team 1\n NV_GPU='0,1' nvidia-docker run\n --pid=host\n --init\n --rm\n --shm-size=256m\n -u id -u:id -g\n -p port-to-team:12345\n -e DRIVERLESS_AI_CONFIG_FILE=\"/config/config.toml\"\n -v /data:/data\n -v /log:/log\n -v /license:/license\n -v /tmp:/tmp\n -v /config:/config\n h2oai/dai-ubi8-x86_64:|tag|\n\n\n #Team 2\n NV_GPU='0,1' nvidia-docker run\n --pid=host\n --init\n --rm\n --shm-size=256m\n -u id -u:id -g\n -p port-to-team:12345\n -e DRIVERLESS_AI_CONFIG_FILE=\"/config/config.toml\"\n -v /data:/data\n -v /log:/log\n -v /license:/license\n -v /tmp:/tmp\n -v /config:/config\n h2oai/dai-ubi8-x86_64:|tag|\nNote, however, that a Driverless AI instance expects to fully utilize and not share the GPUs that are assigned to it. Sharing a GPU with other Driverless AI instances or other running programs can result in out-of-memory issues.", + "prompt_type": "human_bot" + }, + { + "instruction": "How can I retrieve a list of Driverless AI users?", + "output": "A list of users can be retrieved using the Python client.Cannot analyze code. Pygments package not found... code:: bash\n\n h2o = Client(address='http://:12345', username='', password='')\n h2o.get_users()\n", + "prompt_type": "human_bot" + }, + { + "instruction": "Start of Driverless AI fails on the message ``Segmentation fault (core dumped)`` on Ubuntu 18/RHEL 7.6. How can I fix this?", + "output": "This problem is caused by the font NotoColorEmoji.ttf, which cannot be processed by the Python matplotlib library. A workaround is to disable the font by renaming it. (Do not use fontconfig because it is ignored by matplotlib.) The following will print out the command that should be executed.Cannot analyze code. Pygments package not found... code:: bash\n\n sudo find / -name \"NotoColorEmoji.ttf\" 2>/dev/null | xargs -I{} echo sudo mv {} {}.backup\n\n", + "prompt_type": "human_bot" + }, + { + "instruction": "Which Linux systems does Driverless AI support?", + "output": "Supported Linux systems include x86_64 RHEL 7, RHEL 8, CentOS 7, and CentOS 8.Data", + "prompt_type": "human_bot" + }, + { + "instruction": "Is there a file size limit for datasets?", + "output": "For GBMs, the file size for datasets is limited by the collective CPU or GPU memory on the system, but we continue to make optimizations for getting more data into an experiment, such as using TensorFlow streaming to stream to arbitrarily large datasets.", + "prompt_type": "human_bot" + }, + { + "instruction": "How can I import CSV files that use UTF-8 encoding into Excel?", + "output": "Excel requires a byte order mark (BOM) to correctly identify CSV files that use UTF-8 encoding. Refer to the following FAQ entry for more information on how to use a BOM when writing CSV files with datatable.", + "prompt_type": "human_bot" + }, + { + "instruction": "Can a byte order mark be used when writing CSV files with datatable?", + "output": "Yes, a byte order mark (BOM) can be used when writing CSV files with datatable by enabling datatable_bom_csv in the config.toml file when starting Driverless AI.Note: Support for UTF-8 encoding in Excel requires the use of a BOM.", + "prompt_type": "human_bot" + }, + { + "instruction": "Which version of Longhorn is supported by Driverless AI?", + "output": "Driverless AI supports Longhorn v1.1.0 or later.", + "prompt_type": "human_bot" + }, + { + "instruction": "Is it possible to download a transformed test dataset in Driverless AI?", + "output": "Yes, a transformed test dataset can be downloaded in Driverless AI. To do this, click Model Actions > Transform Dataset on the completed experiment page, then specify both a train and a test dataset to use for the transformation. The transformed test dataset is made available for download once this process is completed.Connectors", + "prompt_type": "human_bot" + }, + { + "instruction": "Why can't I import a folder as a file when using a data connector on Windows?", + "output": "If you try to use the Import Folder as File option via a data connector on Windows, the import will fail if the folder contains files that do not have file extensions. For example, if a folder contains the files file1.csv, file2.csv, file3.csv, and _SUCCESS, the function will fail due to the presence of the _SUCCESS file.Note that this only occurs if the data is sourced from a volume that is mounted from the Windows filesystem onto the Docker container via -v /path/to/windows/filesystem:/path/in/docker/container flags. This error occurs because of the difference in how files without file extensions are treated in Windows and in the Docker container (CentOS Linux).", + "prompt_type": "human_bot" + }, + { + "instruction": "I get a ClassNotFoundException error when I try to select a JDBC connection. How can I fix that?", + "output": "The folder storing the JDBC jar file must be visible/readable by the dai process user.If you downloaded the JDBC jar file from Oracle, they may provide you with a tar.gz file that you can unpackage with the following command:Cannot analyze code. Pygments package not found... code:: bash\n\n tar --no-same-permissions --no-same-owner -xzvf .gz\nAlternatively you can ensure that the permissions on the file are correct in general by running the following:Cannot analyze code. Pygments package not found... code:: bash\n\n chmod -R o+rx /path/to/folder_containing_jar_file\nFinally, if you just want to check the permissions use the command ls -altr and check the final 3 values in the permissions output.", + "prompt_type": "human_bot" + }, + { + "instruction": "I get a org.datanucleus.exceptions.NucleusUserException: Please check your CLASSPATH and plugin specification error when attempting to connect to Hive. How can I fix that?", + "output": "Make sure hive-site.xml is configured in /etc/hive/conf and not in /etc/hadoop/conf.", + "prompt_type": "human_bot" + }, + { + "instruction": "I get a \"Permission Denied\" error during Hive import. How do I fix this?", + "output": "If you see the following error, your Driverless AI instance may not be able to create a temporary Hive folder due to file system permissions restrictions.Cannot analyze code. Pygments package not found... code:: bash\n\n ERROR HiveAgent: Error during execution of query: java.lang.RuntimeException: java.lang.RuntimeException: java.io.IOException: Permission denied;\n org.apache.spark.sql.AnalysisException: java.lang.RuntimeException: java.lang.RuntimeException: java.io.IOException: Permission denied;\nTo fix this error, add the following name-value pair to your hive-site.xml file to specify the location that is accessible to Driverless AI (that is, your Driverless AI /tmp directory).Cannot analyze code. Pygments package not found... code:: bash\n\n \n hive.exec.local.scratchdir\n /path/to/dai/tmp\n \nRecipes", + "prompt_type": "human_bot" + }, + { + "instruction": "Where can I retrieve H2O's custom recipes?", + "output": "H2O's custom recipes can be obtained from the official :recipes-repo:`Recipes for Driverless AI repository `.No role entry for \"recipes-repo\" in module \"docutils.parsers.rst.languages.en\".\nTrying \"recipes-repo\" as canonical role name.Unknown interpreted text role \"recipes-repo\".", + "prompt_type": "human_bot" + }, + { + "instruction": "How can I create my own custom recipe?", + "output": "Refer to the :recipes-writing:`How to Write a Recipe ` guide for details on how to create your own custom recipe.No role entry for \"recipes-writing\" in module \"docutils.parsers.rst.languages.en\".\nTrying \"recipes-writing\" as canonical role name.Unknown interpreted text role \"recipes-writing\".", + "prompt_type": "human_bot" + }, + { + "instruction": "Are MOJOs supported for experiments that use custom recipes?", + "output": "In most cases, MOJOs will not be available for custom recipes. Unless the recipe is simple, creating the MOJO is only possible with additional MOJO runtime support. Contact support@h2o.ai for more information about creating MOJOs for custom recipes. (Note: The Python Scoring Pipeline features full support for custom recipes.)", + "prompt_type": "human_bot" + }, + { + "instruction": "How can I use BYOR in my airgapped installation?", + "output": "If your Driverless AI environment cannot access Internet and, thus, cannot access Driverless AI's \"Bring Your Own Recipes\" from GitHub, please contact H2O support. We can work with you directly to help you access recipes.", + "prompt_type": "human_bot" + }, + { + "instruction": "When enabling recipes in Driverless AI, can I install Python packages from my organization's internal Python package index?", + "output": "Yes\u2014you can use the pip_install_options :ref:`TOML option ` to specify your organization's internal Python package index as follows:No role entry for \"ref\" in module \"docutils.parsers.rst.languages.en\".\nTrying \"ref\" as canonical role name.Unknown interpreted text role \"ref\".pip_install_options=\"['--extra-index-url', 'http://my-own-repo:port']\"For more information on the --extra-index-url pip install option, refer to the official pip documentation.Experiments", + "prompt_type": "human_bot" + }, + { + "instruction": "How much memory does Driverless AI require in order to run experiments?", + "output": "Right now, Driverless AI requires approximately 10x the size of the data in system memory.", + "prompt_type": "human_bot" + }, + { + "instruction": "How many columns can Driverless AI handle?", + "output": "Driverless AI has been tested on datasets with 10k columns. When running experiments on wide data, Driverless AI automatically checks if it is running out of memory, and if it is, it reduces the number of features until it can fit in memory. This may lead to a worse model, but Driverless AI shouldn't crash because the data is wide.", + "prompt_type": "human_bot" + }, + { + "instruction": "How should I use Driverless AI if I have large data?", + "output": "Driverless AI can handle large datasets out of the box. For very large datasets (more than 10 billion rows x columns), we recommend sampling your data for Driverless AI. Keep in mind that the goal of driverless AI is to go through many features and models to find the best modeling pipeline, and not to just train a few models on the raw data (H2O-3 is ideally suited for that case).For large datasets, the recommended steps are:Run with the recommended accuracy/time/interpretability settings first, especially accuracy <= 7Gradually increase accuracy settings to 7 and choose accuracy 9 or 10 only after observing runs with <= 7.", + "prompt_type": "human_bot" + }, + { + "instruction": "How does Driverless AI detect the ID column?", + "output": "The ID column logic is one of the following:The column is named 'id', 'Id', 'ID' or 'iD' exactlyThe column contains a significant number of unique values (above max_relative_cardinality in the config.toml file or Max. allowed fraction of uniques for integer and categorical cols in Expert settings)", + "prompt_type": "human_bot" + }, + { + "instruction": "Can Driverless AI handle data with missing values/nulls?", + "output": "Yes, data that is imported into Driverless AI can include missing values. Feature engineering is fully aware of missing values, and missing values are treated as information - either as a special categorical level or as a special number. So for target encoding, for example, rows with a certain missing feature will belong to the same group. For Categorical Encoding where aggregations of a numeric columns are calculated for a grouped categorical column, missing values are kept. The formula for calculating the mean is the sum of non-missing values divided by the count of all non-missing values. For clustering, we impute missing values. And for frequency encoding, we count the number of rows that have a certain missing feature.The imputation strategy is as follows:XGBoost/LightGBM do not need missing value imputation and may, in fact, perform worse with any specific other strategy unless the user has a strong understanding of the data.Driverless AI automatically imputes missing values using the mean for GLM.Driverless AI provides an imputation setting for TensorFlow in the config.toml file: tf_nan_impute_value post-normalization. If you set this option to 0, then missing values will be imputed. Setting it to (for example) +5 will specify 5 standard deviations outside the distribution. The default for TensorFlow is -5, which specifies that TensorFlow will treat NAs like a missing value. We recommend that you specify 0 if the mean is better.More information is available in the Missing and Unseen Values Handling section.", + "prompt_type": "human_bot" + }, + { + "instruction": "How does Driverless AI deal with categorical variables? What if an integer column should really be treated as categorical?", + "output": "If a column has string values, then Driverless AI will treat it as a categorical feature. There are multiple methods for how Driverless AI converts the categorical variables to numeric. These include:One Hot Encoding: creating dummy variables for each valueFrequency Encoding: replace category with how frequently it is seen in the dataTarget Encoding: replace category with the average target value (additional steps included to prevent overfitting)Weight of Evidence: calculate weight of evidence for each category (http://ucanalytics.com/blogs/information-value-and-weight-of-evidencebanking-case/)Driverless AI will try multiple methods for representing the column and determine which representation(s) are best.If the column has integers, Driverless AI will try treating the column as a categorical column and numeric column. It will treat any integer column as both categorical and numeric if the number of unique values is less than 50.This is configurable in the config.toml file:Cannot analyze code. Pygments package not found... code:: bash\n\n # Whether to treat some numerical features as categorical\n # For instance, sometimes an integer column may not represent a numerical feature but\n # represents different numerical codes instead.\n num_as_cat = true\n\n # Max number of unique values for integer/real columns to be treated as categoricals (test applies to first statistical_threshold_data_size_small rows only)\n max_int_as_cat_uniques = 50\n(Note: Driverless AI will also check if the distribution of any numeric column differs significantly from the distribution of typical numerical data using Benford's Law. If the column distribution does not obey Benford's Law, we will also try to treat it as categorical even if there are more than 50 unique values.)", + "prompt_type": "human_bot" + }, + { + "instruction": "How are outliers handled?", + "output": "Outliers are not removed from the data. Instead Driverless AI finds the best way to represent data with outliers. For example, Driverless AI may find that binning a variable with outliers improves performance.For target columns, Driverless AI first determines the best representation of the column. It may find that for a target column with outliers, it is best to predict the log of the column.", + "prompt_type": "human_bot" + }, + { + "instruction": "If I drop several columns from the Train dataset, will Driverless AI understand that it needs to drop the same columns from the Test dataset?", + "output": "If you drop columns from the training dataset, Driverless AI will do the same for the validation and test datasets (if the columns are present). There is no need for these columns because no features will be created from them.", + "prompt_type": "human_bot" + }, + { + "instruction": "Does Driverless AI treat numeric variables as categorical variables?", + "output": "In certain cases, yes. You can prevent this behavior by setting the num_as_cat variable in your installation's config.toml file to false. You can have finer grain control over this behavior by excluding the Numeric to Categorical Target Encoding Transformer and the Numeric To Categorical Weight of Evidence Transformer and their corresponding genes in your installation's config.toml file. To learn more about the config.toml file, see the :ref:`config_file` section.No role entry for \"ref\" in module \"docutils.parsers.rst.languages.en\".\nTrying \"ref\" as canonical role name.Unknown interpreted text role \"ref\".", + "prompt_type": "human_bot" + }, + { + "instruction": "Which algorithms are used in Driverless AI?", + "output": "Features are engineered with a proprietary stack of Kaggle-winning statistical approaches including some of the most sophisticated target encoding and likelihood estimates based on groupings, aggregations and joins, but we also employ linear models, neural nets, clustering and dimensionality reduction models and many traditional approaches such as one-hot encoding etc.On top of the engineered features, sophisticated models are fitted, including, but not limited to: XGBoost (both original XGBoost and 'lossguide' (LightGBM) mode), Decision Trees, GLM, TensorFlow (including a TensorFlow NLP recipe based on CNN Deeplearning models), RuleFit, FTRL (Follow the Regularized Leader), Isolation Forest, and Constant Models. (Refer to :ref:`supported_algorithms` for more information.) And additional algorithms can be added via :ref:`Recipes `.No role entry for \"ref\" in module \"docutils.parsers.rst.languages.en\".\nTrying \"ref\" as canonical role name.Unknown interpreted text role \"ref\".No role entry for \"ref\" in module \"docutils.parsers.rst.languages.en\".\nTrying \"ref\" as canonical role name.Unknown interpreted text role \"ref\".In general, GBMs are the best single-shot algorithms. Since 2006, boosting methods have proven to be the most accurate for noisy predictive modeling tasks outside of pattern recognition in images and sound (https://www.cs.cornell.edu/~caruana/ctp/ct.papers/caruana.icml06.pdf). The advent of XGBoost and Kaggle only cemented this position.", + "prompt_type": "human_bot" + }, + { + "instruction": "Why do my selected algorithms not show up in the Experiment Preview?", + "output": "When changing the algorithms used via Expert Settings > Model and Expert Settings > Recipes, you may notice in the Experiment Preview that those changes are not applied. Driverless AI determines whether to include models and/or recipes based on a hierarchy of those expert settings as well as data types (numeric, categorical, text, image, etc.) and system properties (GPUs, multiple GPUs, etc.).Setting an Algorithm to \"OFF\" in Expert Settings: If an algorithm is turned OFF in Expert Settings (for example, GLM Models) when running, then that algorithm will not be included in the experiment.Algorithms Not Included from Recipes (BYOR): If an algorithm from a custom recipe is not selected for the experiment in the Include specific models option, then that algorithm will not be included in the experiment, regardless of whether that same algorithm is set to AUTO or ON on the Expert Settings > Model page.Algorithms Not Specified as \"OFF\" and Included from Recipes: If a Driverless AI algorithm is specified as either \"AUTO\" or \"ON\" and additional models are selected for the experiment in the Include specific models option, than those algorithms may or may not be included in the experiment. Driverless AI will determine the algorithms to use based on the data and experiment type.To show warnings in the preview for which models were not used, set show_inapplicable_models_preview = true in config.toml", + "prompt_type": "human_bot" + }, + { + "instruction": "Why do my selected transformers not show up in the Experiment Preview?", + "output": "When changing the transformers used via Expert Settings > Transformers and Expert Settings > Recipes, you may notice in the Experiment Preview that those changes are not applied. Driverless AI determines whether to include transformers can be used based upon data types (numeric, categorical, text, image, etc.) and system properties (GPUs, multiple GPUs, etc.).Transformers Not Included from Recipes (BYOR): If a transformer from a custom recipe is not selected for the experiment in the Include specific transformers option, then that transformer will not be included in the experiment.To show warnings in the preview for which models were not used, set show_inapplicable_transformers_preview = true in config.toml", + "prompt_type": "human_bot" + }, + { + "instruction": "How can we turn on TensorFlow Neural Networks so they are evaluated?", + "output": "Neural networks are considered by Driverless AI, although they may not be evaluated by default. To ensure that neural networks are tried, you can turn on TensorFlow in the Expert Settings:Once you have set TensorFlow to ON. You should see the Experiment Preview on the left hand side change and mention that it will evaluate TensorFlow models:We recommend using TensorFlow neural networks if you have a multinomial use case with more than 5 unique values.", + "prompt_type": "human_bot" + }, + { + "instruction": "Does Driverless AI standardize the data?", + "output": "Driverless AI will automatically do variable standardization for certain algorithms. For example, with Linear Models and Neural Networks, the data is automatically standardized. For decision tree algorithms, however, we do not perform standardization because these algorithms do not benefit from standardization.", + "prompt_type": "human_bot" + }, + { + "instruction": "What objective function is used in XGBoost?", + "output": "The objective function used in XGBoost is:reg:squarederror and a custom absolute error objective function for regressionbinary:logistic or multi:softprob for classificationThe objective function does not change depending on the scorer chosen. The scorer influences parameter tuning only. For regression, Tweedie, Gamma, and Poisson regression objectives are supported.More information on the XGBoost instantiations can be found in the logs and in the model summary, both of which can be downloaded from the GUI or found in the /tmp/h2oai_experiment_/ folder on the server.", + "prompt_type": "human_bot" + }, + { + "instruction": "Does Driverless AI perform internal or external validation?", + "output": "Driverless AI does internal validation when only training data is provided. It does external validation when training and validation data are provided. In either scenario, the validation data is used for all parameter tuning (models and features), not just for feature selection. Parameter tuning includes target transformation, model selection, feature engineering, feature selection, stacking, etc.Specifically:Internal validation (only training data given):Ideal when data is either close to i.i.d., or for time-series problemsInternal holdouts are used for parameter tuning, with temporal causality for time-series problemsWill do the full spectrum from single holdout split to 5-fold CV, depending on accuracy settingsNo need to split training data manuallyFinal models are trained using CV on the training dataExternal validation (training + validation data given):Ideal when there\u2019s some amount of drift in the data, and the validation set mimics the test set data better than the training dataNo training data wasted during training because training data not used for parameter tuningValidation data is used only for parameter tuning, and is not part of training dataNo CV possible because we explicitly do not want to overfit on the training dataNot allowed for time-series problems (see Time Series FAQ section that follows)Tip: If you want both training and validation data to be used for parameter tuning (the training process), just concatenate the datasets together and turn them both into training data for the \u201cinternal validation\u201d method.", + "prompt_type": "human_bot" + }, + { + "instruction": "How does Driverless AI prevent overfitting?", + "output": "Driverless AI performs a number of checks to prevent overfitting. For example, during certain transformations, Driverless AI calculates the average on out-of-fold data using cross validation. Driverless AI also performs early stopping for every model built, ensuring that the model build will stop when it ceases to improve on holdout data. And additional steps to prevent overfitting include checking for i.i.d. and avoiding leakage during feature engineering.A blog post describing Driverless AI overfitting protection in greater detail is available here: https://www.h2o.ai/blog/driverless-ai-prevents-overfitting-leakage/.More aggressive overfit protection can be enabled by setting lock_ga_to_final_trees=true to true or using recipe='more_overfit_protection' and fixed_only_first_fold_model='true' and for time-series experiments allow_stabilize_varimp_for_ts=true.", + "prompt_type": "human_bot" + }, + { + "instruction": "How does Driverless AI avoid the multiple hypothesis (MH) problem?", + "output": "Driverless AI uses a variant of the reusable holdout technique to address the multiple hypothesis problem. Refer to https://pdfs.semanticscholar.org/25fe/96591144f4af3d8f8f79c95b37f415e5bb75.pdf for more information.", + "prompt_type": "human_bot" + }, + { + "instruction": "How does Driverless AI suggest the experiment settings?", + "output": "When you run an experiment on a dataset, the experiment settings (Accuracy, Time, and Interpretability) are automatically suggested by Driverless AI. For example, Driverless AI may suggest the parameters Accuracy = 7, Time = 3, Interpretability = 6, based on your data.Driverless AI will automatically suggest experiment settings based on the number of columns and number of rows in your dataset. The settings are suggested to ensure best handling when the data is small. If the data is small, Driverless AI will suggest the settings that prevent overfitting and ensure the full dataset is utilized.If the number of rows and number of columns are each below a certain threshold, then:Accuracy will be increased up to 8.The accuracy is increased so that cross validation is done. (We don't want to \"throw away\" any data for internal validation purposes.)Interpretability will be increased up to 8.The higher the interpretability setting, the smaller the number of features in the final model.More complex features are not allowed.This prevents overfitting.Time will be decreased down to 2.There will be fewer feature engineering iterations to prevent overfitting.", + "prompt_type": "human_bot" + }, + { + "instruction": "What happens when I set Interpretability and Accuracy to the same number?", + "output": "The answer is currently that interpretability controls which features are created and what features are kept. (Also above interpretability = 6, monotonicity constraints are used in XGBoost GBM, XGBoost Dart, LightGBM, and Decision Tree models.) The accuracy refers to how hard Driverless AI then tries to make those features into the most accurate model", + "prompt_type": "human_bot" + }, + { + "instruction": "Can I specify the number of GPUs to use when running Driverless AI?", + "output": "When running an experiment, the Expert Settings let you specify the starting GPU ID for Driverless AI to use. You can also specify the maximum number of GPUs to use per model and per experiment. Refer to the :ref:`expert-settings` section for more information.No role entry for \"ref\" in module \"docutils.parsers.rst.languages.en\".\nTrying \"ref\" as canonical role name.Unknown interpreted text role \"ref\".", + "prompt_type": "human_bot" + }, + { + "instruction": "How can I create the simplest model in Driverless AI?", + "output": "To create the simplest model in Driverless AI, set the following Experiment Settings:Set Accuracy to 1. Note that this can hurt performance as a sample will be used. If necessary, adjust the knob until the preview shows no sampling.Set Time to 1.Set Interpretability to 10.Next, configure the following Expert Settings:Turn OFF all algorithms except GLM.Set GLM models to ON.Set Ensemble level to 0.Set Select target transformation of the target for regression problems to Identity.Disable Data distribution shift detection.Disable Target Encoding.Alternatively, you can set Pipeline Building Recipe to Compliant. Compliant automatically configures the following experiment and expert settings:interpretability=10 (To avoid complexity. This overrides GUI or Python client settings for Interpretability.)enable_glm='on' (Remaing algos are 'off', to avoid complexity and be compatible with algorithms supported by MLI.)num_as_cat=true: Treat some numerical features as categorical. For instance, sometimes an integer column may not represent a numerical feature but represent different numerical codes instead.fixed_ensemble_level=0: Don't use any ensemble (to avoid complexity).feature_brain_level=0: No feature brain used (to ensure every restart is identical).max_feature_interaction_depth=1: Interaction depth is set to 1 (no multi-feature interactions to avoid complexity).target_transformer=\"identity\": For regression (to avoid complexity).check_distribution_shift=\"off\": Don't use distribution shift between train, valid, and test to drop features (bit risky without fine-tuning).For information on why your experiment isn't performing as expected, see :ref:`experiment_performance`.No role entry for \"ref\" in module \"docutils.parsers.rst.languages.en\".\nTrying \"ref\" as canonical role name.Unknown interpreted text role \"ref\".", + "prompt_type": "human_bot" + }, + { + "instruction": "When I run multiple experiments with different seeds, why do I see different scores, runtimes, and sizes on disk in the Experiments listing page?", + "output": "When running multiple experiments with all of the same settings except the seed, understand that a feature brain level > 0 can lead to variations in models, features, timing, and sizes on disk. (The default value is 2.) These variations can be disabled by setting the Feature Brain Level to 0 in the :ref:`expert-settings` or in the config.toml file.No role entry for \"ref\" in module \"docutils.parsers.rst.languages.en\".\nTrying \"ref\" as canonical role name.Unknown interpreted text role \"ref\".In addition, if you use a different seed for each experiment, then each experiment can be different due to the randomness in the genetic algorithm that searches for the best features and model parameters. Only if Reproducible is set with the same seed and with a feature brain level of 0 should users expect the same outcome. Once a different seed is set, the models, features, timing, and sizes on disk can all vary within the constraints set by the choices made for the experiment. (I.e., accuracy, time, interpretability, expert settings, etc., all constrain the outcome, and then a different seed can change things within those constraints.)", + "prompt_type": "human_bot" + }, + { + "instruction": "Why does the final model performance appear to be worse than previous iterations?", + "output": "There are a few things to remember:Driverless AI creates a best effort estimate of the generalization performance of the best modeling pipeline found so far.The performance estimation is always based on holdout data (data unseen by the model).If no validation dataset is provided, the training data is split internally to create internal validation holdout data (once or multiple times or cross-validation, depending on the accuracy settings).If no validation dataset is provided, for accuracy <= 7, a single holdout split is used, and a \"lucky\" or \"unlucky\" split can bias estimates for small datasets or datasets with high variance.If a validation dataset is provided, then all performance estimates are solely based on the entire validation dataset (independent of accuracy settings).All scores reported are based on bootstrapped-based statistical methods and come with error bars that represent a range of estimate uncertainty.After the final iteration, a best final model is trained on a final set of engineered features. Depending on accuracy settings, a more accurate estimation of generalization performance may be done using cross-validation. Also, the final model may be a stacked ensemble consisting of multiple base models, which generally leads to better performance. Consequently, in rare cases, the difference in performance estimation method can lead to the final model's estimated performance seeming poorer than those from previous iterations. (i.e., The final model's estimated score is significantly worse than the last iteration score and error bars don't overlap.) In that case, it is very likely that the final model performance estimation is more accurate, and the prior estimates were biased due to a \"lucky\" split. To confirm this, you can re-run the experiment multiple times (without setting the reproducible flag).If you would like to minimize the likelihood of the final model performance appearing worse than previous iterations, here are some recommendations:Increase accuracy settingsProvide a validation datasetProvide more data", + "prompt_type": "human_bot" + }, + { + "instruction": "How can I find features that may be causing data leakages in my Driverless AI model?", + "output": "To find original features that are causing leakage, have a look at features_orig.txt in the experiment summary download. Features causing leakage will have high importance there. To get a hint at derived features that might be causing leakage, create a new experiment with dials set to 2/2/8, and run the new experiment on your data with all your features and response. Then analyze the top 1-2 features in the model variable importance. They are likely the main contributors to data leakage if it is occurring.", + "prompt_type": "human_bot" + }, + { + "instruction": "How can I see the performance metrics on the test data?", + "output": "As long as you provide a target column in the test set, Driverless AI will show the best estimate of the final model's performance on the test set at the end of the experiment. The test set is never used to tune parameters (unlike to what Kagglers often do), so this is purely a convenience. Of course, you can still make test set predictions and compute your own metrics using a method of your choice.", + "prompt_type": "human_bot" + }, + { + "instruction": "How can I see all the performance metrics possible for my experiment?", + "output": "At the end of the experiment, the model's estimated performance on all provided datasets with a target column is printed in the experiment logs. For example, for the test set:Cannot analyze code. Pygments package not found... code:: bash\n\n Final scores on test (external holdout) +/- stddev:\n GINI = 0.87794 +/- 0.035305 (more is better)\n MCC = 0.71124 +/- 0.043232 (more is better)\n F05 = 0.79175 +/- 0.04209 (more is better)\n F1 = 0.75823 +/- 0.038675 (more is better)\n F2 = 0.82752 +/- 0.03604 (more is better)\n ACCURACY = 0.91513 +/- 0.011975 (more is better)\n LOGLOSS = 0.28429 +/- 0.016682 (less is better)\n AUCPR = 0.79074 +/- 0.046223 (more is better)\n optimized: AUC = 0.93386 +/- 0.018856 (more is better)\n", + "prompt_type": "human_bot" + }, + { + "instruction": "What if my training/validation and testing data sets come from different distributions?", + "output": "In general, Driverless AI uses training data to engineer features and train models and validation data to tune all parameters. If no external validation data is given, the training data is used to create internal holdouts. The way holdouts are created internally depends on whether there is a strong time dependence, see the point below. If the data has no obvious time dependency (e.g., if there is no time column neither implicit or explicit), or if the data can be sorted arbitrarily and it won't affect the outcome (e.g., Iris data, predicting flower species from measurements), and if the test dataset is different (e.g., new flowers or only large flowers), then the model performance on validation (either internal or external) as measured during training won't be achieved during final testing due to the obvious inability of the model to generalize.", + "prompt_type": "human_bot" + }, + { + "instruction": "Does Driverless AI handle weighted data?", + "output": "Yes. You can optionally provide an extra weight column in your training (and validation) data with non-negative observation weights. This can be useful to implement domain-specific effects such as exponential weighting in time or class weights. All of our algorithms and metrics in Driverless AI support observation weights, but note that estimated likelihoods can be skewed as a consequence.", + "prompt_type": "human_bot" + }, + { + "instruction": "How does Driverless AI handle fold assignments for weighted data?", + "output": "Currently, Driverless AI does not take the weights into account during fold creation, but you can provide a fold column to enforce your own grouping, i.e., to keep rows that belong to the same group together (either in train or valid). The fold column has to be a categorical column (integers ok) that assigns a group ID to each row. (It needs to have at least 5 groups because we do up to 5-fold CV.)", + "prompt_type": "human_bot" + }, + { + "instruction": "Why do I see that adding new features to a dataset deteriorates the performance of the model?", + "output": "You may notice that after adding one or more new features to a dataset, it deteriorates the performance of the Driverless AI model. In Driverless AI, the feature engineering sequence is fairly random and may end up not doing same things with original features if you restart entirely fresh with new columns.Beginning in Driverless AI v1.4.0, you now have the option to Restart from Last Checkpoint. This lets you pull in a new dataset with more columns, and Driverless AI will more iteratively take advantage of the new columns.", + "prompt_type": "human_bot" + }, + { + "instruction": "How does Driverless AI handle imbalanced data for binary classification experiments?", + "output": "If you have data that is imbalanced, a binary imbalanced model can help to improve scoring with a variety of imbalanced sampling methods. An imbalanced model is able to take advantage of most (or even all) of the imbalanced dataset's positive values during sampling, while a regular model significantly limits the population of positive values. Imbalanced models, however, take more time to make predictions, and they are not always more accurate than regular models. We still recommend that you try using an imbalanced model if your data is imbalanced to see if scoring is improved over a regular model. Note that this information only applies to binary models.", + "prompt_type": "human_bot" + }, + { + "instruction": "How is feature importance calculated in Driverless AI?", + "output": "For most models, such as XGBoost or LightGBM models, Driverless AI uses normalized information gain to calculate feature importance. Other estimates of importance are sometimes used for certain models.", + "prompt_type": "human_bot" + }, + { + "instruction": "I want to have only one LightGBM model in the final pipeline. How can I do this?", + "output": "You can do this by using :ref:`ensemble-levels`. To change the ensemble level, use the Ensemble Level for Final Modeling Pipeline expert setting (fixed_ensemble_level in the config.toml), which is located in the Model tab. If you want a single model, use level 0. If you are okay with using the same model with hyperparameters but trained with multiple cross validation folds, then use level 1.No role entry for \"ref\" in module \"docutils.parsers.rst.languages.en\".\nTrying \"ref\" as canonical role name.Unknown interpreted text role \"ref\".To use only one model type, use the Include Specific Models expert setting, which is located in the Recipes tab.For more information, see :ref:`ensemble-learning-in-dai`.No role entry for \"ref\" in module \"docutils.parsers.rst.languages.en\".\nTrying \"ref\" as canonical role name.Unknown interpreted text role \"ref\".Setting fixed_ensemble_level = 0 returns a single model trained on one hundred percent of the data, not just a single model type with CV.When the Cross-validate Single Final Model expert setting is enabled (default), the single model with fixed_ensemble_level = 0 has the optimal number of trees because it is tuned with CV. Disabling this setting is not recommended when fixed_ensemble_level = 0.\"Ensemble", + "prompt_type": "human_bot" + }, + { + "instruction": "I want to have only one LightGBM model and no FE. How can I do this?", + "output": "You can do this by additionally limiting the set of allowed transformations to just the OriginalTransformer, which leaves numeric features in their original form and drops all non-numeric features. To include or exclude specific transformers in your Driverless AI environment, use the Include Specific Transformers expert setting (included_transformers in the config.toml), which is located in the Recipes tab. You can also set the Feature Engineering Effort expert setting (feature_engineering_effort in the config.toml) to 0 to achieve the same effect.For more information, see :ref:`Transformations`.No role entry for \"ref\" in module \"docutils.parsers.rst.languages.en\".\nTrying \"ref\" as canonical role name.Unknown interpreted text role \"ref\".\"Include", + "prompt_type": "human_bot" + }, + { + "instruction": "What is fast approximation in Driverless AI?", + "output": "Fast approximation is available for both regular and Shapley predictions. It is enabled by default for MLI / AutoDoc and turned off by default for other clients. The extent of approximation can be fully configured or turned off with the fast approximation expert settings. Enabling fast approximation can result in a significant speedup for large prediction tasks like the creation of partial dependence plots and other MLI-related tasks.The following is a list of expert settings that can be used to configure fast approximation.Regular predictions::ref:`fast-approx-trees`No role entry for \"ref\" in module \"docutils.parsers.rst.languages.en\".\nTrying \"ref\" as canonical role name.Unknown interpreted text role \"ref\".:ref:`fast-approx-one-fold`No role entry for \"ref\" in module \"docutils.parsers.rst.languages.en\".\nTrying \"ref\" as canonical role name.Unknown interpreted text role \"ref\".:ref:`fast-approx-one-model`No role entry for \"ref\" in module \"docutils.parsers.rst.languages.en\".\nTrying \"ref\" as canonical role name.Unknown interpreted text role \"ref\".Shapley predictions::ref:`fast-approx-trees-shap`No role entry for \"ref\" in module \"docutils.parsers.rst.languages.en\".\nTrying \"ref\" as canonical role name.Unknown interpreted text role \"ref\".:ref:`fast-approx-one-fold-shap`No role entry for \"ref\" in module \"docutils.parsers.rst.languages.en\".\nTrying \"ref\" as canonical role name.Unknown interpreted text role \"ref\".:ref:`fast-approx-one-model-shap`No role entry for \"ref\" in module \"docutils.parsers.rst.languages.en\".\nTrying \"ref\" as canonical role name.Unknown interpreted text role \"ref\".MLI::ref:`mli_fast_approx `No role entry for \"ref\" in module \"docutils.parsers.rst.languages.en\".\nTrying \"ref\" as canonical role name.Unknown interpreted text role \"ref\".", + "prompt_type": "human_bot" + }, + { + "instruction": "When should fast approximation be turned off?", + "output": "In situations where a more detailed partial dependence plot or interpretation is required, you may want to disable fast approximation.", + "prompt_type": "human_bot" + }, + { + "instruction": "Why does the confusion matrix sometimes show decimals instead of whole numbers?", + "output": "Fractional confusion matrix values most commonly arise as a consequence of the averaging of confusion matrices across cross-validation fold splits or across repeated fold splits, but the same can also happen for non-integer observation weights.", + "prompt_type": "human_bot" + }, + { + "instruction": "Is data sampling for multiclass use cases supported?", + "output": "Data sampling for multiclass use cases is not currently supported. However, it is possible to approximate the data sampling approach by adding more weight in order to penalize rare classes. You can add weight to an individual observation by using a :ref:`weight column ` when setting up your experiment. You can also enable LightGBM multiclass balancing by setting the enable_lightgbm_multiclass_balancing configuration setting to on, which enables automatic class weighting for imbalanced multiclass problems.No role entry for \"ref\" in module \"docutils.parsers.rst.languages.en\".\nTrying \"ref\" as canonical role name.Unknown interpreted text role \"ref\".Feature Transformations", + "prompt_type": "human_bot" + }, + { + "instruction": "Where can I get details of the various transformations performed in an experiment?", + "output": "Download the experiment's log .zip file from the GUI. This zip file includes summary information, log information, and a gene_summary.txt file with details of the transformations used in the experiment. Specifically, there is a details folder with all subprocess logs.On the server, the experiment specific files are inside the /tmp/h2oai_experiment_/ folder after the experiment completes, particularly h2oai_experiment_logs_.zip and h2oai_experiment_summary_.zip.Predictions", + "prompt_type": "human_bot" + }, + { + "instruction": "How can I download the predictions onto the machine where Driverless AI is running?", + "output": "When you select Score on Another Dataset, the predictions will automatically be stored on the machine where Driverless AI is running. They will be saved in the following locations (and can be opened again by Driverless AI, both for .csv and .bin):Training Data Predictions: tmp/h2oai_experiment_/train_preds.csv (also saved as .bin)Testing Data Predictions: tmp/h2oai_experiment_/test_preds.csv (also saved as .bin)New Data Predictions: tmp/h2oai_experiment_/automatically_generated_name.csv. Note that the automatically generated name will match the name of the file downloaded to your local computer.", + "prompt_type": "human_bot" + }, + { + "instruction": "Why are predicted probabilities not available when I run an experiment without ensembling?", + "output": "When Driverless AI provides pre-computed predictions after completing an experiment, it uses only those parts of the modeling pipeline that were not trained on the particular rows for which the predictions are made. This means that Driverless AI needs holdout data in order to create predictions, such as validation or test sets, where the model is trained on training data only. In the case of ensembles, Driverless AI uses cross-validation to generate holdout folds on the training data, so we are able to provide out-of-fold estimates for every row in the training data and, hence, can also provide training holdout predictions (that will provide a good estimate of generalization performance). In the case of a single model, though, that is trained on 100% of the training data. There is no way to create unbiased estimates for any row in the training data. While DAI uses an internal validation dataset, this is a re-usable holdout, and therefore will not contain holdout predictions for the full training dataset. You need cross-validation in order to get out-of-fold estimates, and then that's not a single model anymore. If you want to still get predictions for the training data for a single model, then you have to use the scoring API to create predictions on the training set. From the GUI, this can be done using the Score on Another Dataset button for a completed experiment. Note, though, that the results will likely be overly optimistic, too good to be true, and virtually useless.Deployment", + "prompt_type": "human_bot" + }, + { + "instruction": "What drives the size of a MOJO?", + "output": "The size of the MOJO is based on the complexity of the final modeling pipeline (i.e., feature engineering and models). One of the biggest factors is the amount of higher-order interactions between features, especially target encoding and related features, which have to store lookup tables for all possible combinations observed in the training data. You can reduce the amount of these transformations by reducing the value of Max. feature interaction depth and/or Feature engineering effort under Expert Settings, or by increasing the interpretability settings for the experiment. Ensembles also contribute to the final modeling pipeline's complexity as each model has its own pipeline. Lowering the accuracy settings or setting :ref:`ensemble level ` to a lower number. The number of features Max. pipeline features also affects the MOJO size. Text transformers are pretty bulky as well and can add to the MOJO size.No role entry for \"ref\" in module \"docutils.parsers.rst.languages.en\".\nTrying \"ref\" as canonical role name.Unknown interpreted text role \"ref\".To toggle to a smaller mojo during model building with a single click, see - :ref:`Reduce mojo size ` under experiment settings of an experiment.No role entry for \"ref\" in module \"docutils.parsers.rst.languages.en\".\nTrying \"ref\" as canonical role name.Unknown interpreted text role \"ref\".", + "prompt_type": "human_bot" + }, + { + "instruction": "Are MOJOs thread safe?", + "output": "Yes, all Driverless AI MOJOs are thread safe.", + "prompt_type": "human_bot" + }, + { + "instruction": "Running the scoring pipeline for my MOJO is taking several hours. How can I get this to run faster?", + "output": "When running example.sh, Driverless AI implements a memory setting, which is suitable for most use cases. For very large models, however, it may be necessary to increase the memory limit when running the Java application for data transformation. This can be done using the -Xmx25g parameter. For example:Cannot analyze code. Pygments package not found... code:: bash\n\n java -Xmx25g -Dai.h2o.mojos.runtime.license.file=license.sig -cp mojo2-runtime.jar ai.h2o.mojos.ExecuteMojo pipeline.mojo example.csv\n", + "prompt_type": "human_bot" + }, + { + "instruction": "Why have I encountered a \"Best Score is not finite\" error?", + "output": "Driverless AI uses 32-bit floats by default. You may encounter this error if your data value exceeds 1E38 or if you are resolving more than 1 part in 10 million. You can resolve this error using one of the following methods:Enable the Force 64-bit Precision option in the experiment's Expert Settings.orSet data_precision=\"float64\" and transformer_precision=\"float64\" in config.toml.Time Series", + "prompt_type": "human_bot" + }, + { + "instruction": "What if my data has a time dependency?", + "output": "If you know that your data has a strong time dependency, select a time column before starting the experiment. The time column must be in a Datetime format that can be parsed by pandas, such as \"2017-11-06 14:32:21\", \"Monday, June 18, 2012\" or \"Jun 18 2018 14:34:00\" etc., or contain only integers.If you are unsure about the strength of the time dependency, run two experiments: One with time column set to \"[OFF]\" and one with time column set to \"[AUTO]\" (or pick a time column yourself).", + "prompt_type": "human_bot" + }, + { + "instruction": "What is a lag, and why does it help?", + "output": "A lag is a feature value from a previous point in time. Lags are useful to take advantage of the fact that the current (unknown) target value is often correlated with previous (known) target values. Hence, they can better capture target patterns along the time axis.Why can't I specify a validation data set for time-series problems? Why do you look at the test set for time-series problemsThe problem with validation vs test in the time series setting is that there is only one valid way to define the split. If a test set is given, its length in time defines the validation split and the validation data has to be part of train. Otherwise the time-series validation won't be useful.For instance: Let's assume we have train = [1,2,3,4,5,6,7,8,9,10] and test = [12,13], where integers define time periods (e.g., weeks). For this example, the most natural train/valid split that mimics the test scenario would be: train = [1,2,3,4,5,6,7] and valid = [9,10], and month 8 is not included in the training set to allow for a gap. Note that we will look at the start time and the duration of the test set only (if provided), and not at the contents of the test data (neither features nor target). If the user provides validation = [8,9,10] instead of test data, then this could lead to inferior validation strategy and worse generalization. Hence, we use the user-given test set only to create the optimal internal train/validation splits. If no test set is provided, the user can provide the length of the test set (in periods), the length of the train/test gap (in periods) and the length of the period itself (in seconds).", + "prompt_type": "human_bot" + }, + { + "instruction": "Why does the gap between train and test matter? Is it because of creating the lag features on the test set?", + "output": "Taking the gap into account is necessary in order to avoid too optimistic estimates of the true error and to avoid creating history-based features like lags for the training and validation data (which cannot be created for the test data due to the missing information).", + "prompt_type": "human_bot" + }, + { + "instruction": "In regards to applying the target lags to different subsets of the time group columns, are you saying Driverless AI perform auto-correlation at \"levels\" of the time series? For example, consider the Walmart dataset where I have Store and Dept (and my target is Weekly Sales). Are you saying that Driverless AI checks for auto-correlation in Weekly Sales based on just Store, just Dept, and both Store and Dept?", + "output": "Currently, auto-correlation is only applied on the detected superkey (entire TGC) of the training dataset relation at the very beginning. It's used to rank potential lag-sizes, with the goal to prune the search space for the GA optimization process, which is responsible for selecting the lag features.", + "prompt_type": "human_bot" + }, + { + "instruction": "How does Driverless AI detect the time period?", + "output": "Driverless AI treats each time series as a function with some frequency 1/ns. The actual value is estimated by the median of time deltas across maximal length TGC subgroups. The chosen SI unit minimizes the distance to all available SI units.", + "prompt_type": "human_bot" + }, + { + "instruction": "What is the logic behind the selectable numbers for forecast horizon length?", + "output": "The shown forecast horizon options are based on quantiles of valid splits. This is necessary because Driverless AI cannot display all possible options in general.", + "prompt_type": "human_bot" + }, + { + "instruction": "Assume that in my Walmart dataset, all stores provided data at the week level, but one store provided data at the day level. What would Driverless AI do?", + "output": "Driverless AI would still assume \"weekly data\" in this case because the majority of stores are yielding this property. The \"daily\" store would be resampled to the detected overall frequency.", + "prompt_type": "human_bot" + }, + { + "instruction": "Assume that in my Walmart dataset, all stores and departments provided data at the weekly level, but one department in a specific store provided weekly sales on a bi-weekly basis (every two weeks). What would Driverless AI do?", + "output": "That's similar to having missing data. Due to proper resampling, Driverless AI can handle this without any issues.", + "prompt_type": "human_bot" + }, + { + "instruction": "Why does the number of weeks that you want to start predicting matter?", + "output": "That's an option to provide a train-test gap if there is no test data is available. That is to say, \"I don't have my test data yet, but I know it will have a gap to train of x.\"", + "prompt_type": "human_bot" + }, + { + "instruction": "Are the scoring components of time series sensitive to the order in which new pieces of data arrive? I.e., is each row independent at scoring time, or is there a real-time windowing effect in the scoring pieces?", + "output": "Each row is independent at scoring time.", + "prompt_type": "human_bot" + }, + { + "instruction": "What happens if the user, at predict time, gives a row with a time value that is too small or too large?", + "output": "Internally, \"out-of bounds\" time values are encoded with special values. The samples will still be scored, but the predictions won't be trustworthy.", + "prompt_type": "human_bot" + }, + { + "instruction": "What's the minimum data size for a time series recipe?", + "output": "We recommended that you have around 10,000 validation samples in order to get a reliable estimate of the true error. The time series recipe can still be applied for smaller data, but the validation error might be inaccurate.", + "prompt_type": "human_bot" + }, + { + "instruction": "How long must the training data be compared to the test data?", + "output": "At a minimum, the training data has to be at least twice as long as the test data along the time axis. However, we recommended that the training data is at least three times as long as the test data.", + "prompt_type": "human_bot" + }, + { + "instruction": "How does the time series recipe deal with missing values?", + "output": "Missing values will be converted to a special value, which is different from any non-missing feature value. Explicit imputation techniques won't be applied.", + "prompt_type": "human_bot" + }, + { + "instruction": "Can the time information be distributed across multiple columns in the input data (such as [year, day, month]?", + "output": "Currently Driverless AI requires the data to have the time stamps given in a single column. Driverless AI will create additional time features like [year, day, month] on its own, if they turn out to be useful.", + "prompt_type": "human_bot" + }, + { + "instruction": "What type of modeling approach does Driverless AI use for time series?", + "output": "Driverless AI combines the creation of history-based features like lags, moving averages etc. with the modeling techniques, which are also applied for i.i.d. data. The primary model of choice is XGBoost.", + "prompt_type": "human_bot" + }, + { + "instruction": "What's the idea behind exponential weighting of moving averages?", + "output": "Exponential weighting accounts for the possibility that more recent observations are better suited to explain the present than older observations.Logging", + "prompt_type": "human_bot" + }, + { + "instruction": "How can I reduce the size of the Audit Logger?", + "output": "An Audit Logger file is created every day that Driverless AI is in use. The audit_log_retention_period config variable lets you specify the number of days, after which the audit.log will be overwritten. This option defaults to 5 days, which means that Driverless AI will maintain Audit Logger files for the last 5 days, and audit.log files older than 5 days are removed and replaced with newer log files. When this option is set to 0, the audit.log file will not be overwritten.", + "prompt_type": "human_bot" + }, + { + "instruction": "How is Driverless AI different than any other black box ML algorithm?", + "output": "Driverless AI uses many techniques (some older and some cutting-edge) for interpreting black box models including creating reason codes for every prediction the system makes. We have also created numerous open source code examples and free publications that explain these techniques. See the list below for links to these resources and for references for the interpretability techniques.Open source interpretability examples:https://github.com/jphall663/interpretable_machine_learning_with_pythonhttps://content.oreilly.com/oriole/Interpretable-machine-learning-with-Python-XGBoost-and-H2Ohttps://github.com/h2oai/mli-resourcesFree Machine Learning Interpretability publications:http://www.oreilly.com/data/free/an-introduction-to-machine-learning-interpretability.csphttp://docs.h2o.ai/driverless-ai/latest-stable/docs/booklets/MLIBooklet.pdfMachine Learning Techniques already in Driverless AI:Tree-based Variable Importance: https://web.stanford.edu/~hastie/ElemStatLearn/printings/ESLII_print12.pdfPartial Dependence: https://web.stanford.edu/~hastie/ElemStatLearn/printings/ESLII_print12.pdfLIME: http://www.kdd.org/kdd2016/papers/files/rfp0573-ribeiroA.pdfLOCO: http://www.stat.cmu.edu/~ryantibs/papers/conformal.pdfICE: https://arxiv.org/pdf/1309.6392.pdfSurrogate Models:https://papers.nips.cc/paper/1152-extracting-tree-structured-representations-of-trained-networks.pdfhttps://arxiv.org/pdf/1705.08504.pdfShapley Explanations: http://papers.nips.cc/paper/7062-a-unified-approach-to-interpreting-model-predictions", + "prompt_type": "human_bot" + }, + { + "instruction": "How often do new versions come out?", + "output": "The frequency of major new Driverless AI releases has historically been about every two months.Installation/Upgrade/Authentication", + "prompt_type": "human_bot" + }, + { + "instruction": "How can I change my username and password?", + "output": "The username and password are tied to the experiments you have created. For example, if I log in with the username/password: megan/megan and start an experiment, then I would need to log back in with the same username and password to see those experiments. The username and password, however, does not limit your access to Driverless AI. If you want to use a new user name and password, you can log in again with a new username and password, but keep in mind that you won't see your old experiments.", + "prompt_type": "human_bot" + }, + { + "instruction": "Can Driverless AI run on CPU-only machines?", + "output": "Yes, Driverless AI can run on machines with CPUs only, though GPUs are recommended. Installation instructions are available for GPU and CPU systems. Refer to :ref:`before_you_begin` for more information.No role entry for \"ref\" in module \"docutils.parsers.rst.languages.en\".\nTrying \"ref\" as canonical role name.Unknown interpreted text role \"ref\".", + "prompt_type": "human_bot" + }, + { + "instruction": "How can I upgrade to a newer version of Driverless AI?", + "output": "Upgrade instructions vary depending on your environment. Refer to the installation section for your environment. Upgrade instructions are included there.", + "prompt_type": "human_bot" + }, + { + "instruction": "What kind of authentication is supported in Driverless AI?", + "output": "Driverless AI supports Client Certificate, LDAP, Local, mTLS, OpenID, none, and unvalidated (default) authentication. These can be configured by setting the appropriate environment variables in the config.toml file or by specifying the environment variables when starting Driverless AI. Refer to :ref:`dai_auth` for more information.No role entry for \"ref\" in module \"docutils.parsers.rst.languages.en\".\nTrying \"ref\" as canonical role name.Unknown interpreted text role \"ref\".", + "prompt_type": "human_bot" + }, + { + "instruction": "How can I automatically turn on persistence each time the GPU system reboots?", + "output": "For GPU machines, the sudo nvidia-persistenced --user dai command can be run after each reboot to enable persistence. For systems that have systemd, it is possible to automatically enable persistence after each reboot by removing the --no-persistence-mode flag from nvidia-persistenced.service. Before running the steps below, be sure to review the following for more information:https://docs.nvidia.com/deploy/driver-persistence/index.html#persistence-daemonhttps://docs.nvidia.com/deploy/driver-persistence/index.html#installationRun the following to stop the nvidia-persistenced.service:Cannot analyze code. Pygments package not found... code:: bash\n\n sudo systemctl stop nvidia-persistenced.service\nOpen the file /lib/systemd/system/nvidia-persistenced.service. This file includes a line \"ExecStart=/usr/bin/nvidia-persistenced --user nvidia-persistenced --no-persistence-mode --verbose\".Remove the flag --no-persistence-mode from that line so that it reads:Enumerated list start value not ordinal-1: \"2\" (ordinal 2)Cannot analyze code. Pygments package not found... code:: bash\n\n ExecStart=/usr/bin/nvidia-persistenced --user nvidia-persistenced --verbose\nRun the following command to start the nvidia-persistenced.service:Enumerated list start value not ordinal-1: \"4\" (ordinal 4)Cannot analyze code. Pygments package not found... code:: bash\n\n sudo systemctl start nvidia-persistenced.service\n", + "prompt_type": "human_bot" + }, + { + "instruction": "How can I start Driverless AI on a different port than 12345?", + "output": "No directive entry for \"tabs\" in module \"docutils.parsers.rst.languages.en\".\nTrying \"tabs\" as canonical directive name.Unknown directive type \"tabs\"... tabs::\n .. group-tab:: Docker Image Installs\n\n When starting Driverless AI in Docker, the ``-p`` option specifies the port on which Driverless AI will run. Change this option in the start script if you need to run on a port other than 12345. The following example shows how to run on port 22345. (Change ``nvidia-docker run`` to ``docker-run`` if needed.) Keep in mind that `priviliged ports will require root access `__.\n\n .. code-block:: bash\n :substitutions:\n\n nvidia-docker run \\\n --pid=host \\\n --init \\\n --rm \\\n --shm-size=256m \\\n -u `id -u`:`id -g` \\\n -p 22345:12345 \\\n -v `pwd`/data:/data \\\n -v `pwd`/log:/log \\\n -v `pwd`/license:/license \\\n -v `pwd`/tmp:/tmp \\\n h2oai/dai-ubi8-x86_64:|tag|\n\n .. group-tab:: Native Installs\n\n To run on a port other than 12345, update the port value in the **config.toml** file. The following example shows how to run Driverless AI on port 22345. Keep in mind that `priviliged ports will require root access `__.\n\n ::\n\n # Export the Driverless AI config.toml file (or add it to ~/.bashrc)\n export DRIVERLESS_AI_CONFIG_FILE=\u201c/config/config.toml\u201d\n\n # IP address and port for Driverless AI HTTP server.\n ip = \"127.0.0.1\"\n port = 22345\n\n Point to this updated config file when restarting Driverless AI.\n", + "prompt_type": "human_bot" + }, + { + "instruction": "Can I set up TLS/SSL on Driverless AI?", + "output": "Yes, Driverless AI provides configuration options that let you set up HTTPS/TLS/SSL. You will need to have your own SSL certificate, or you can create a self-signed certificate for yourself.To enable HTTPS/TLS/SSL on the Driverless AI server, add the following to the config.toml file:Cannot analyze code. Pygments package not found... code:: bash\n\n enable_https = true\n ssl_key_file = \"/etc/dai/private_key.pem\"\n ssl_crt_file = \"/etc/dai/cert.pem\"\nYou can make a self-signed certificate for testing with the following commands:Cannot analyze code. Pygments package not found... code:: bash\n\n umask 077\n openssl req -x509 -newkey rsa:4096 -keyout private_key.pem -out cert.pem -days 20 -nodes -subj '/O=Driverless AI'\n sudo chown dai:dai cert.pem private_key.pem\n sudo mv cert.pem private_key.pem /etc/dai\nTo configure specific versions of TLS/SSL, enable or disable the following settings in the config.toml file:Cannot analyze code. Pygments package not found... code:: bash\n\n ssl_no_sslv2 = true\n ssl_no_sslv3 = true\n ssl_no_tlsv1 = true\n ssl_no_tlsv1_1 = true\n ssl_no_tlsv1_2 = false\n ssl_no_tlsv1_3 = false\n", + "prompt_type": "human_bot" + }, + { + "instruction": "Can I set up TLS/SSL on Driverless AI in AWS?", + "output": "Yes, you can set up HTTPS/TLS/SSL on Driverless AI running in an AWS environment. HTTPS/TLS/SSL needs to be configured on the host machine, and the necessary ports will need to be opened on the AWS side. You will need to have your own TLS/SSL cert or you can create a self signed cert for yourself.The following is a very simple example showing how to configure HTTPS with a proxy pass to the port on the container 12345 with the keys placed in /etc/nginx/. Replace with your server name.Cannot analyze code. Pygments package not found... code:: bash\n\n server {\n listen 80;\n return 301 https://$host$request_uri;\n }\n\n server {\n listen 443;\n\n # Specify your server name here\n server_name ;\n\n ssl_certificate /etc/nginx/cert.crt;\n ssl_certificate_key /etc/nginx/cert.key;\n ssl on;\n ssl_session_cache builtin:1000 shared:SSL:10m;\n ssl_protocols TLSv1 TLSv1.1 TLSv1.2;\n ssl_ciphers HIGH:!aNULL:!eNULL:!EXPORT:!CAMELLIA:!DES:!MD5:!PSK:!RC4;\n ssl_prefer_server_ciphers on;\n\n access_log /var/log/nginx/dai.access.log;\n\n location / {\n proxy_set_header Host $host;\n proxy_set_header X-Real-IP $remote_addr;\n proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;\n proxy_set_header X-Forwarded-Proto $scheme;\n\n # Fix the \u201cIt appears that your reverse proxy set up is broken\" error.\n proxy_pass http://localhost:12345;\n proxy_read_timeout 90;\n\n # Specify your server name for the redirect\n proxy_redirect http://localhost:12345 https://;\n }\n }\nMore information about SSL for Nginx in Ubuntu 16.04 can be found here: https://www.digitalocean.com/community/tutorials/how-to-create-a-self-signed-ssl-certificate-for-nginx-in-ubuntu-16-04.", + "prompt_type": "human_bot" + }, + { + "instruction": "I received a \"package dai-.x86_64 does not verify: no digest\" error during the installation. How can I fix this?", + "output": "You will recieve a \"package dai-.x86_64 does not verify: no digest\" error when installing the rpm using an RPM version newer than 4.11.3. You can run the following as a workaround, replacing with your DAI version:Cannot analyze code. Pygments package not found... code:: bash\n\n rpm --nodigest -i dai-.x86_64.rpm\n", + "prompt_type": "human_bot" + }, + { + "instruction": "I received a \"Must have exactly one OpenCL platform 'NVIDIA CUDA'\" error. How can I fix that?", + "output": "If you encounter problems with opencl errors at server time, you may see the following message:Cannot analyze code. Pygments package not found... code:: bash\n\n 2018-11-08 14:26:15,341 C: D:452.2GB M:246.0GB 21603 ERROR : Must have exactly one OpenCL platform 'NVIDIA CUDA', but got:\n Platform #0: Clover\n Platform #1: NVIDIA CUDA\n +-- Device #0: GeForce GTX 1080 Ti\n +-- Device #1: GeForce GTX 1080 Ti\n +-- Device #2: GeForce GTX 1080 Ti\n\n Uninstall all but 'NVIDIA CUDA' platform.\nFor Ubuntu, the solution is to run the following:Cannot analyze code. Pygments package not found... code:: bash\n\n sudo apt-get remove mesa-opencl-icd\n", + "prompt_type": "human_bot" + }, + { + "instruction": "Is it possible for multiple users to share a single Driverless AI instance?", + "output": "Driverless AI supports multiple users, and Driverless AI is licensed per a single named user. Therefore, in order, to have different users run experiments simultaneously, they would each need a license. Driverless AI manages the GPU(s) that it is given and ensures that different experiments from different users can run safely simultaneously and don\u2019t interfere with each other. So when two licensed users log in with different credentials, then neither of them will see the other\u2019s experiment. Similarly, if a licensed user logs in using a different set of credentials, then that user will not see any previously run experiments.", + "prompt_type": "human_bot" + }, + { + "instruction": "Can multiple Driverless AI users share a GPU server?", + "output": "Yes, you can allocate multiple users in a single GPU box. For example, a single box with four GPUs can allocate that User1 has two GPUs and User2 has the other two GPUs. This is accomplished by having two separated Driverless AI instances running on the same server.There are two ways to assign specific GPUs to Driverless AI. And in the scenario with four GPUs (two GPUs allocated to two users), both of these options allow each Docker container only to see two GPUs.Use the CUDA_VISIBLE_DEVICES environment variable. In the case of Docker deployment, this will translate in passing the -e CUDA_VISIBLE_DEVICES=\"0,1\" to the nvidia-docker run command.Passing the NV_GPU option at the beginning of the nvidia-docker run command. (See example below.)Error in \"code-block\" directive:\nunknown option: \"substitutions\"... code-block:: bash\n :substitutions:\n\n #Team 1\n NV_GPU='0,1' nvidia-docker run\n --pid=host\n --init\n --rm\n --shm-size=256m\n -u id -u:id -g\n -p port-to-team:12345\n -e DRIVERLESS_AI_CONFIG_FILE=\"/config/config.toml\"\n -v /data:/data\n -v /log:/log\n -v /license:/license\n -v /tmp:/tmp\n -v /config:/config\n h2oai/dai-ubi8-x86_64:|tag|\n\n\n #Team 2\n NV_GPU='0,1' nvidia-docker run\n --pid=host\n --init\n --rm\n --shm-size=256m\n -u id -u:id -g\n -p port-to-team:12345\n -e DRIVERLESS_AI_CONFIG_FILE=\"/config/config.toml\"\n -v /data:/data\n -v /log:/log\n -v /license:/license\n -v /tmp:/tmp\n -v /config:/config\n h2oai/dai-ubi8-x86_64:|tag|\nNote, however, that a Driverless AI instance expects to fully utilize and not share the GPUs that are assigned to it. Sharing a GPU with other Driverless AI instances or other running programs can result in out-of-memory issues.", + "prompt_type": "human_bot" + }, + { + "instruction": "How can I retrieve a list of Driverless AI users?", + "output": "A list of users can be retrieved using the Python client.Cannot analyze code. Pygments package not found... code:: bash\n\n h2o = Client(address='http://:12345', username='', password='')\n h2o.get_users()\n", + "prompt_type": "human_bot" + }, + { + "instruction": "Start of Driverless AI fails on the message ``Segmentation fault (core dumped)`` on Ubuntu 18/RHEL 7.6. How can I fix this?", + "output": "This problem is caused by the font NotoColorEmoji.ttf, which cannot be processed by the Python matplotlib library. A workaround is to disable the font by renaming it. (Do not use fontconfig because it is ignored by matplotlib.) The following will print out the command that should be executed.Cannot analyze code. Pygments package not found... code:: bash\n\n sudo find / -name \"NotoColorEmoji.ttf\" 2>/dev/null | xargs -I{} echo sudo mv {} {}.backup\n\n", + "prompt_type": "human_bot" + }, + { + "instruction": "Which Linux systems does Driverless AI support?", + "output": "Supported Linux systems include x86_64 RHEL 7, RHEL 8, CentOS 7, and CentOS 8.Data", + "prompt_type": "human_bot" + }, + { + "instruction": "Is there a file size limit for datasets?", + "output": "For GBMs, the file size for datasets is limited by the collective CPU or GPU memory on the system, but we continue to make optimizations for getting more data into an experiment, such as using TensorFlow streaming to stream to arbitrarily large datasets.", + "prompt_type": "human_bot" + }, + { + "instruction": "How can I import CSV files that use UTF-8 encoding into Excel?", + "output": "Excel requires a byte order mark (BOM) to correctly identify CSV files that use UTF-8 encoding. Refer to the following FAQ entry for more information on how to use a BOM when writing CSV files with datatable.", + "prompt_type": "human_bot" + }, + { + "instruction": "Can a byte order mark be used when writing CSV files with datatable?", + "output": "Yes, a byte order mark (BOM) can be used when writing CSV files with datatable by enabling datatable_bom_csv in the config.toml file when starting Driverless AI.Note: Support for UTF-8 encoding in Excel requires the use of a BOM.", + "prompt_type": "human_bot" + }, + { + "instruction": "Which version of Longhorn is supported by Driverless AI?", + "output": "Driverless AI supports Longhorn v1.1.0 or later.", + "prompt_type": "human_bot" + }, + { + "instruction": "Is it possible to download a transformed test dataset in Driverless AI?", + "output": "Yes, a transformed test dataset can be downloaded in Driverless AI. To do this, click Model Actions > Transform Dataset on the completed experiment page, then specify both a train and a test dataset to use for the transformation. The transformed test dataset is made available for download once this process is completed.Connectors", + "prompt_type": "human_bot" + }, + { + "instruction": "Why can't I import a folder as a file when using a data connector on Windows?", + "output": "If you try to use the Import Folder as File option via a data connector on Windows, the import will fail if the folder contains files that do not have file extensions. For example, if a folder contains the files file1.csv, file2.csv, file3.csv, and _SUCCESS, the function will fail due to the presence of the _SUCCESS file.Note that this only occurs if the data is sourced from a volume that is mounted from the Windows filesystem onto the Docker container via -v /path/to/windows/filesystem:/path/in/docker/container flags. This error occurs because of the difference in how files without file extensions are treated in Windows and in the Docker container (CentOS Linux).", + "prompt_type": "human_bot" + }, + { + "instruction": "I get a ClassNotFoundException error when I try to select a JDBC connection. How can I fix that?", + "output": "The folder storing the JDBC jar file must be visible/readable by the dai process user.If you downloaded the JDBC jar file from Oracle, they may provide you with a tar.gz file that you can unpackage with the following command:Cannot analyze code. Pygments package not found... code:: bash\n\n tar --no-same-permissions --no-same-owner -xzvf .gz\nAlternatively you can ensure that the permissions on the file are correct in general by running the following:Cannot analyze code. Pygments package not found... code:: bash\n\n chmod -R o+rx /path/to/folder_containing_jar_file\nFinally, if you just want to check the permissions use the command ls -altr and check the final 3 values in the permissions output.", + "prompt_type": "human_bot" + }, + { + "instruction": "I get a org.datanucleus.exceptions.NucleusUserException: Please check your CLASSPATH and plugin specification error when attempting to connect to Hive. How can I fix that?", + "output": "Make sure hive-site.xml is configured in /etc/hive/conf and not in /etc/hadoop/conf.", + "prompt_type": "human_bot" + }, + { + "instruction": "I get a \"Permission Denied\" error during Hive import. How do I fix this?", + "output": "If you see the following error, your Driverless AI instance may not be able to create a temporary Hive folder due to file system permissions restrictions.Cannot analyze code. Pygments package not found... code:: bash\n\n ERROR HiveAgent: Error during execution of query: java.lang.RuntimeException: java.lang.RuntimeException: java.io.IOException: Permission denied;\n org.apache.spark.sql.AnalysisException: java.lang.RuntimeException: java.lang.RuntimeException: java.io.IOException: Permission denied;\nTo fix this error, add the following name-value pair to your hive-site.xml file to specify the location that is accessible to Driverless AI (that is, your Driverless AI /tmp directory).Cannot analyze code. Pygments package not found... code:: bash\n\n \n hive.exec.local.scratchdir\n /path/to/dai/tmp\n \nRecipes", + "prompt_type": "human_bot" + }, + { + "instruction": "Where can I retrieve H2O's custom recipes?", + "output": "H2O's custom recipes can be obtained from the official :recipes-repo:`Recipes for Driverless AI repository `.No role entry for \"recipes-repo\" in module \"docutils.parsers.rst.languages.en\".\nTrying \"recipes-repo\" as canonical role name.Unknown interpreted text role \"recipes-repo\".", + "prompt_type": "human_bot" + }, + { + "instruction": "How can I create my own custom recipe?", + "output": "Refer to the :recipes-writing:`How to Write a Recipe ` guide for details on how to create your own custom recipe.No role entry for \"recipes-writing\" in module \"docutils.parsers.rst.languages.en\".\nTrying \"recipes-writing\" as canonical role name.Unknown interpreted text role \"recipes-writing\".", + "prompt_type": "human_bot" + }, + { + "instruction": "Are MOJOs supported for experiments that use custom recipes?", + "output": "In most cases, MOJOs will not be available for custom recipes. Unless the recipe is simple, creating the MOJO is only possible with additional MOJO runtime support. Contact support@h2o.ai for more information about creating MOJOs for custom recipes. (Note: The Python Scoring Pipeline features full support for custom recipes.)", + "prompt_type": "human_bot" + }, + { + "instruction": "How can I use BYOR in my airgapped installation?", + "output": "If your Driverless AI environment cannot access Internet and, thus, cannot access Driverless AI's \"Bring Your Own Recipes\" from GitHub, please contact H2O support. We can work with you directly to help you access recipes.", + "prompt_type": "human_bot" + }, + { + "instruction": "When enabling recipes in Driverless AI, can I install Python packages from my organization's internal Python package index?", + "output": "Yes\u2014you can use the pip_install_options :ref:`TOML option ` to specify your organization's internal Python package index as follows:No role entry for \"ref\" in module \"docutils.parsers.rst.languages.en\".\nTrying \"ref\" as canonical role name.Unknown interpreted text role \"ref\".pip_install_options=\"['--extra-index-url', 'http://my-own-repo:port']\"For more information on the --extra-index-url pip install option, refer to the official pip documentation.Experiments", + "prompt_type": "human_bot" + }, + { + "instruction": "How much memory does Driverless AI require in order to run experiments?", + "output": "Right now, Driverless AI requires approximately 10x the size of the data in system memory.", + "prompt_type": "human_bot" + }, + { + "instruction": "How many columns can Driverless AI handle?", + "output": "Driverless AI has been tested on datasets with 10k columns. When running experiments on wide data, Driverless AI automatically checks if it is running out of memory, and if it is, it reduces the number of features until it can fit in memory. This may lead to a worse model, but Driverless AI shouldn't crash because the data is wide.", + "prompt_type": "human_bot" + }, + { + "instruction": "How should I use Driverless AI if I have large data?", + "output": "Driverless AI can handle large datasets out of the box. For very large datasets (more than 10 billion rows x columns), we recommend sampling your data for Driverless AI. Keep in mind that the goal of driverless AI is to go through many features and models to find the best modeling pipeline, and not to just train a few models on the raw data (H2O-3 is ideally suited for that case).For large datasets, the recommended steps are:Run with the recommended accuracy/time/interpretability settings first, especially accuracy <= 7Gradually increase accuracy settings to 7 and choose accuracy 9 or 10 only after observing runs with <= 7.", + "prompt_type": "human_bot" + }, + { + "instruction": "How does Driverless AI detect the ID column?", + "output": "The ID column logic is one of the following:The column is named 'id', 'Id', 'ID' or 'iD' exactlyThe column contains a significant number of unique values (above max_relative_cardinality in the config.toml file or Max. allowed fraction of uniques for integer and categorical cols in Expert settings)", + "prompt_type": "human_bot" + }, + { + "instruction": "Can Driverless AI handle data with missing values/nulls?", + "output": "Yes, data that is imported into Driverless AI can include missing values. Feature engineering is fully aware of missing values, and missing values are treated as information - either as a special categorical level or as a special number. So for target encoding, for example, rows with a certain missing feature will belong to the same group. For Categorical Encoding where aggregations of a numeric columns are calculated for a grouped categorical column, missing values are kept. The formula for calculating the mean is the sum of non-missing values divided by the count of all non-missing values. For clustering, we impute missing values. And for frequency encoding, we count the number of rows that have a certain missing feature.The imputation strategy is as follows:XGBoost/LightGBM do not need missing value imputation and may, in fact, perform worse with any specific other strategy unless the user has a strong understanding of the data.Driverless AI automatically imputes missing values using the mean for GLM.Driverless AI provides an imputation setting for TensorFlow in the config.toml file: tf_nan_impute_value post-normalization. If you set this option to 0, then missing values will be imputed. Setting it to (for example) +5 will specify 5 standard deviations outside the distribution. The default for TensorFlow is -5, which specifies that TensorFlow will treat NAs like a missing value. We recommend that you specify 0 if the mean is better.More information is available in the Missing and Unseen Values Handling section.", + "prompt_type": "human_bot" + }, + { + "instruction": "How does Driverless AI deal with categorical variables? What if an integer column should really be treated as categorical?", + "output": "If a column has string values, then Driverless AI will treat it as a categorical feature. There are multiple methods for how Driverless AI converts the categorical variables to numeric. These include:One Hot Encoding: creating dummy variables for each valueFrequency Encoding: replace category with how frequently it is seen in the dataTarget Encoding: replace category with the average target value (additional steps included to prevent overfitting)Weight of Evidence: calculate weight of evidence for each category (http://ucanalytics.com/blogs/information-value-and-weight-of-evidencebanking-case/)Driverless AI will try multiple methods for representing the column and determine which representation(s) are best.If the column has integers, Driverless AI will try treating the column as a categorical column and numeric column. It will treat any integer column as both categorical and numeric if the number of unique values is less than 50.This is configurable in the config.toml file:Cannot analyze code. Pygments package not found... code:: bash\n\n # Whether to treat some numerical features as categorical\n # For instance, sometimes an integer column may not represent a numerical feature but\n # represents different numerical codes instead.\n num_as_cat = true\n\n # Max number of unique values for integer/real columns to be treated as categoricals (test applies to first statistical_threshold_data_size_small rows only)\n max_int_as_cat_uniques = 50\n(Note: Driverless AI will also check if the distribution of any numeric column differs significantly from the distribution of typical numerical data using Benford's Law. If the column distribution does not obey Benford's Law, we will also try to treat it as categorical even if there are more than 50 unique values.)", + "prompt_type": "human_bot" + }, + { + "instruction": "How are outliers handled?", + "output": "Outliers are not removed from the data. Instead Driverless AI finds the best way to represent data with outliers. For example, Driverless AI may find that binning a variable with outliers improves performance.For target columns, Driverless AI first determines the best representation of the column. It may find that for a target column with outliers, it is best to predict the log of the column.", + "prompt_type": "human_bot" + }, + { + "instruction": "If I drop several columns from the Train dataset, will Driverless AI understand that it needs to drop the same columns from the Test dataset?", + "output": "If you drop columns from the training dataset, Driverless AI will do the same for the validation and test datasets (if the columns are present). There is no need for these columns because no features will be created from them.", + "prompt_type": "human_bot" + }, + { + "instruction": "Does Driverless AI treat numeric variables as categorical variables?", + "output": "In certain cases, yes. You can prevent this behavior by setting the num_as_cat variable in your installation's config.toml file to false. You can have finer grain control over this behavior by excluding the Numeric to Categorical Target Encoding Transformer and the Numeric To Categorical Weight of Evidence Transformer and their corresponding genes in your installation's config.toml file. To learn more about the config.toml file, see the :ref:`config_file` section.No role entry for \"ref\" in module \"docutils.parsers.rst.languages.en\".\nTrying \"ref\" as canonical role name.Unknown interpreted text role \"ref\".", + "prompt_type": "human_bot" + }, + { + "instruction": "Which algorithms are used in Driverless AI?", + "output": "Features are engineered with a proprietary stack of Kaggle-winning statistical approaches including some of the most sophisticated target encoding and likelihood estimates based on groupings, aggregations and joins, but we also employ linear models, neural nets, clustering and dimensionality reduction models and many traditional approaches such as one-hot encoding etc.On top of the engineered features, sophisticated models are fitted, including, but not limited to: XGBoost (both original XGBoost and 'lossguide' (LightGBM) mode), Decision Trees, GLM, TensorFlow (including a TensorFlow NLP recipe based on CNN Deeplearning models), RuleFit, FTRL (Follow the Regularized Leader), Isolation Forest, and Constant Models. (Refer to :ref:`supported_algorithms` for more information.) And additional algorithms can be added via :ref:`Recipes `.No role entry for \"ref\" in module \"docutils.parsers.rst.languages.en\".\nTrying \"ref\" as canonical role name.Unknown interpreted text role \"ref\".No role entry for \"ref\" in module \"docutils.parsers.rst.languages.en\".\nTrying \"ref\" as canonical role name.Unknown interpreted text role \"ref\".In general, GBMs are the best single-shot algorithms. Since 2006, boosting methods have proven to be the most accurate for noisy predictive modeling tasks outside of pattern recognition in images and sound (https://www.cs.cornell.edu/~caruana/ctp/ct.papers/caruana.icml06.pdf). The advent of XGBoost and Kaggle only cemented this position.", + "prompt_type": "human_bot" + }, + { + "instruction": "Why do my selected algorithms not show up in the Experiment Preview?", + "output": "When changing the algorithms used via Expert Settings > Model and Expert Settings > Recipes, you may notice in the Experiment Preview that those changes are not applied. Driverless AI determines whether to include models and/or recipes based on a hierarchy of those expert settings as well as data types (numeric, categorical, text, image, etc.) and system properties (GPUs, multiple GPUs, etc.).Setting an Algorithm to \"OFF\" in Expert Settings: If an algorithm is turned OFF in Expert Settings (for example, GLM Models) when running, then that algorithm will not be included in the experiment.Algorithms Not Included from Recipes (BYOR): If an algorithm from a custom recipe is not selected for the experiment in the Include specific models option, then that algorithm will not be included in the experiment, regardless of whether that same algorithm is set to AUTO or ON on the Expert Settings > Model page.Algorithms Not Specified as \"OFF\" and Included from Recipes: If a Driverless AI algorithm is specified as either \"AUTO\" or \"ON\" and additional models are selected for the experiment in the Include specific models option, than those algorithms may or may not be included in the experiment. Driverless AI will determine the algorithms to use based on the data and experiment type.To show warnings in the preview for which models were not used, set show_inapplicable_models_preview = true in config.toml", + "prompt_type": "human_bot" + }, + { + "instruction": "Why do my selected transformers not show up in the Experiment Preview?", + "output": "When changing the transformers used via Expert Settings > Transformers and Expert Settings > Recipes, you may notice in the Experiment Preview that those changes are not applied. Driverless AI determines whether to include transformers can be used based upon data types (numeric, categorical, text, image, etc.) and system properties (GPUs, multiple GPUs, etc.).Transformers Not Included from Recipes (BYOR): If a transformer from a custom recipe is not selected for the experiment in the Include specific transformers option, then that transformer will not be included in the experiment.To show warnings in the preview for which models were not used, set show_inapplicable_transformers_preview = true in config.toml", + "prompt_type": "human_bot" + }, + { + "instruction": "How can we turn on TensorFlow Neural Networks so they are evaluated?", + "output": "Neural networks are considered by Driverless AI, although they may not be evaluated by default. To ensure that neural networks are tried, you can turn on TensorFlow in the Expert Settings:Once you have set TensorFlow to ON. You should see the Experiment Preview on the left hand side change and mention that it will evaluate TensorFlow models:We recommend using TensorFlow neural networks if you have a multinomial use case with more than 5 unique values.", + "prompt_type": "human_bot" + }, + { + "instruction": "Does Driverless AI standardize the data?", + "output": "Driverless AI will automatically do variable standardization for certain algorithms. For example, with Linear Models and Neural Networks, the data is automatically standardized. For decision tree algorithms, however, we do not perform standardization because these algorithms do not benefit from standardization.", + "prompt_type": "human_bot" + }, + { + "instruction": "What objective function is used in XGBoost?", + "output": "The objective function used in XGBoost is:reg:squarederror and a custom absolute error objective function for regressionbinary:logistic or multi:softprob for classificationThe objective function does not change depending on the scorer chosen. The scorer influences parameter tuning only. For regression, Tweedie, Gamma, and Poisson regression objectives are supported.More information on the XGBoost instantiations can be found in the logs and in the model summary, both of which can be downloaded from the GUI or found in the /tmp/h2oai_experiment_/ folder on the server.", + "prompt_type": "human_bot" + }, + { + "instruction": "Does Driverless AI perform internal or external validation?", + "output": "Driverless AI does internal validation when only training data is provided. It does external validation when training and validation data are provided. In either scenario, the validation data is used for all parameter tuning (models and features), not just for feature selection. Parameter tuning includes target transformation, model selection, feature engineering, feature selection, stacking, etc.Specifically:Internal validation (only training data given):Ideal when data is either close to i.i.d., or for time-series problemsInternal holdouts are used for parameter tuning, with temporal causality for time-series problemsWill do the full spectrum from single holdout split to 5-fold CV, depending on accuracy settingsNo need to split training data manuallyFinal models are trained using CV on the training dataExternal validation (training + validation data given):Ideal when there\u2019s some amount of drift in the data, and the validation set mimics the test set data better than the training dataNo training data wasted during training because training data not used for parameter tuningValidation data is used only for parameter tuning, and is not part of training dataNo CV possible because we explicitly do not want to overfit on the training dataNot allowed for time-series problems (see Time Series FAQ section that follows)Tip: If you want both training and validation data to be used for parameter tuning (the training process), just concatenate the datasets together and turn them both into training data for the \u201cinternal validation\u201d method.", + "prompt_type": "human_bot" + }, + { + "instruction": "How does Driverless AI prevent overfitting?", + "output": "Driverless AI performs a number of checks to prevent overfitting. For example, during certain transformations, Driverless AI calculates the average on out-of-fold data using cross validation. Driverless AI also performs early stopping for every model built, ensuring that the model build will stop when it ceases to improve on holdout data. And additional steps to prevent overfitting include checking for i.i.d. and avoiding leakage during feature engineering.A blog post describing Driverless AI overfitting protection in greater detail is available here: https://www.h2o.ai/blog/driverless-ai-prevents-overfitting-leakage/.More aggressive overfit protection can be enabled by setting lock_ga_to_final_trees=true to true or using recipe='more_overfit_protection' and fixed_only_first_fold_model='true' and for time-series experiments allow_stabilize_varimp_for_ts=true.", + "prompt_type": "human_bot" + }, + { + "instruction": "How does Driverless AI avoid the multiple hypothesis (MH) problem?", + "output": "Driverless AI uses a variant of the reusable holdout technique to address the multiple hypothesis problem. Refer to https://pdfs.semanticscholar.org/25fe/96591144f4af3d8f8f79c95b37f415e5bb75.pdf for more information.", + "prompt_type": "human_bot" + }, + { + "instruction": "How does Driverless AI suggest the experiment settings?", + "output": "When you run an experiment on a dataset, the experiment settings (Accuracy, Time, and Interpretability) are automatically suggested by Driverless AI. For example, Driverless AI may suggest the parameters Accuracy = 7, Time = 3, Interpretability = 6, based on your data.Driverless AI will automatically suggest experiment settings based on the number of columns and number of rows in your dataset. The settings are suggested to ensure best handling when the data is small. If the data is small, Driverless AI will suggest the settings that prevent overfitting and ensure the full dataset is utilized.If the number of rows and number of columns are each below a certain threshold, then:Accuracy will be increased up to 8.The accuracy is increased so that cross validation is done. (We don't want to \"throw away\" any data for internal validation purposes.)Interpretability will be increased up to 8.The higher the interpretability setting, the smaller the number of features in the final model.More complex features are not allowed.This prevents overfitting.Time will be decreased down to 2.There will be fewer feature engineering iterations to prevent overfitting.", + "prompt_type": "human_bot" + }, + { + "instruction": "What happens when I set Interpretability and Accuracy to the same number?", + "output": "The answer is currently that interpretability controls which features are created and what features are kept. (Also above interpretability = 6, monotonicity constraints are used in XGBoost GBM, XGBoost Dart, LightGBM, and Decision Tree models.) The accuracy refers to how hard Driverless AI then tries to make those features into the most accurate model", + "prompt_type": "human_bot" + }, + { + "instruction": "Can I specify the number of GPUs to use when running Driverless AI?", + "output": "When running an experiment, the Expert Settings let you specify the starting GPU ID for Driverless AI to use. You can also specify the maximum number of GPUs to use per model and per experiment. Refer to the :ref:`expert-settings` section for more information.No role entry for \"ref\" in module \"docutils.parsers.rst.languages.en\".\nTrying \"ref\" as canonical role name.Unknown interpreted text role \"ref\".", + "prompt_type": "human_bot" + }, + { + "instruction": "How can I create the simplest model in Driverless AI?", + "output": "To create the simplest model in Driverless AI, set the following Experiment Settings:Set Accuracy to 1. Note that this can hurt performance as a sample will be used. If necessary, adjust the knob until the preview shows no sampling.Set Time to 1.Set Interpretability to 10.Next, configure the following Expert Settings:Turn OFF all algorithms except GLM.Set GLM models to ON.Set Ensemble level to 0.Set Select target transformation of the target for regression problems to Identity.Disable Data distribution shift detection.Disable Target Encoding.Alternatively, you can set Pipeline Building Recipe to Compliant. Compliant automatically configures the following experiment and expert settings:interpretability=10 (To avoid complexity. This overrides GUI or Python client settings for Interpretability.)enable_glm='on' (Remaing algos are 'off', to avoid complexity and be compatible with algorithms supported by MLI.)num_as_cat=true: Treat some numerical features as categorical. For instance, sometimes an integer column may not represent a numerical feature but represent different numerical codes instead.fixed_ensemble_level=0: Don't use any ensemble (to avoid complexity).feature_brain_level=0: No feature brain used (to ensure every restart is identical).max_feature_interaction_depth=1: Interaction depth is set to 1 (no multi-feature interactions to avoid complexity).target_transformer=\"identity\": For regression (to avoid complexity).check_distribution_shift=\"off\": Don't use distribution shift between train, valid, and test to drop features (bit risky without fine-tuning).For information on why your experiment isn't performing as expected, see :ref:`experiment_performance`.No role entry for \"ref\" in module \"docutils.parsers.rst.languages.en\".\nTrying \"ref\" as canonical role name.Unknown interpreted text role \"ref\".", + "prompt_type": "human_bot" + }, + { + "instruction": "When I run multiple experiments with different seeds, why do I see different scores, runtimes, and sizes on disk in the Experiments listing page?", + "output": "When running multiple experiments with all of the same settings except the seed, understand that a feature brain level > 0 can lead to variations in models, features, timing, and sizes on disk. (The default value is 2.) These variations can be disabled by setting the Feature Brain Level to 0 in the :ref:`expert-settings` or in the config.toml file.No role entry for \"ref\" in module \"docutils.parsers.rst.languages.en\".\nTrying \"ref\" as canonical role name.Unknown interpreted text role \"ref\".In addition, if you use a different seed for each experiment, then each experiment can be different due to the randomness in the genetic algorithm that searches for the best features and model parameters. Only if Reproducible is set with the same seed and with a feature brain level of 0 should users expect the same outcome. Once a different seed is set, the models, features, timing, and sizes on disk can all vary within the constraints set by the choices made for the experiment. (I.e., accuracy, time, interpretability, expert settings, etc., all constrain the outcome, and then a different seed can change things within those constraints.)", + "prompt_type": "human_bot" + }, + { + "instruction": "Why does the final model performance appear to be worse than previous iterations?", + "output": "There are a few things to remember:Driverless AI creates a best effort estimate of the generalization performance of the best modeling pipeline found so far.The performance estimation is always based on holdout data (data unseen by the model).If no validation dataset is provided, the training data is split internally to create internal validation holdout data (once or multiple times or cross-validation, depending on the accuracy settings).If no validation dataset is provided, for accuracy <= 7, a single holdout split is used, and a \"lucky\" or \"unlucky\" split can bias estimates for small datasets or datasets with high variance.If a validation dataset is provided, then all performance estimates are solely based on the entire validation dataset (independent of accuracy settings).All scores reported are based on bootstrapped-based statistical methods and come with error bars that represent a range of estimate uncertainty.After the final iteration, a best final model is trained on a final set of engineered features. Depending on accuracy settings, a more accurate estimation of generalization performance may be done using cross-validation. Also, the final model may be a stacked ensemble consisting of multiple base models, which generally leads to better performance. Consequently, in rare cases, the difference in performance estimation method can lead to the final model's estimated performance seeming poorer than those from previous iterations. (i.e., The final model's estimated score is significantly worse than the last iteration score and error bars don't overlap.) In that case, it is very likely that the final model performance estimation is more accurate, and the prior estimates were biased due to a \"lucky\" split. To confirm this, you can re-run the experiment multiple times (without setting the reproducible flag).If you would like to minimize the likelihood of the final model performance appearing worse than previous iterations, here are some recommendations:Increase accuracy settingsProvide a validation datasetProvide more data", + "prompt_type": "human_bot" + }, + { + "instruction": "How can I find features that may be causing data leakages in my Driverless AI model?", + "output": "To find original features that are causing leakage, have a look at features_orig.txt in the experiment summary download. Features causing leakage will have high importance there. To get a hint at derived features that might be causing leakage, create a new experiment with dials set to 2/2/8, and run the new experiment on your data with all your features and response. Then analyze the top 1-2 features in the model variable importance. They are likely the main contributors to data leakage if it is occurring.", + "prompt_type": "human_bot" + }, + { + "instruction": "How can I see the performance metrics on the test data?", + "output": "As long as you provide a target column in the test set, Driverless AI will show the best estimate of the final model's performance on the test set at the end of the experiment. The test set is never used to tune parameters (unlike to what Kagglers often do), so this is purely a convenience. Of course, you can still make test set predictions and compute your own metrics using a method of your choice.", + "prompt_type": "human_bot" + }, + { + "instruction": "How can I see all the performance metrics possible for my experiment?", + "output": "At the end of the experiment, the model's estimated performance on all provided datasets with a target column is printed in the experiment logs. For example, for the test set:Cannot analyze code. Pygments package not found... code:: bash\n\n Final scores on test (external holdout) +/- stddev:\n GINI = 0.87794 +/- 0.035305 (more is better)\n MCC = 0.71124 +/- 0.043232 (more is better)\n F05 = 0.79175 +/- 0.04209 (more is better)\n F1 = 0.75823 +/- 0.038675 (more is better)\n F2 = 0.82752 +/- 0.03604 (more is better)\n ACCURACY = 0.91513 +/- 0.011975 (more is better)\n LOGLOSS = 0.28429 +/- 0.016682 (less is better)\n AUCPR = 0.79074 +/- 0.046223 (more is better)\n optimized: AUC = 0.93386 +/- 0.018856 (more is better)\n", + "prompt_type": "human_bot" + }, + { + "instruction": "What if my training/validation and testing data sets come from different distributions?", + "output": "In general, Driverless AI uses training data to engineer features and train models and validation data to tune all parameters. If no external validation data is given, the training data is used to create internal holdouts. The way holdouts are created internally depends on whether there is a strong time dependence, see the point below. If the data has no obvious time dependency (e.g., if there is no time column neither implicit or explicit), or if the data can be sorted arbitrarily and it won't affect the outcome (e.g., Iris data, predicting flower species from measurements), and if the test dataset is different (e.g., new flowers or only large flowers), then the model performance on validation (either internal or external) as measured during training won't be achieved during final testing due to the obvious inability of the model to generalize.", + "prompt_type": "human_bot" + }, + { + "instruction": "Does Driverless AI handle weighted data?", + "output": "Yes. You can optionally provide an extra weight column in your training (and validation) data with non-negative observation weights. This can be useful to implement domain-specific effects such as exponential weighting in time or class weights. All of our algorithms and metrics in Driverless AI support observation weights, but note that estimated likelihoods can be skewed as a consequence.", + "prompt_type": "human_bot" + }, + { + "instruction": "How does Driverless AI handle fold assignments for weighted data?", + "output": "Currently, Driverless AI does not take the weights into account during fold creation, but you can provide a fold column to enforce your own grouping, i.e., to keep rows that belong to the same group together (either in train or valid). The fold column has to be a categorical column (integers ok) that assigns a group ID to each row. (It needs to have at least 5 groups because we do up to 5-fold CV.)", + "prompt_type": "human_bot" + }, + { + "instruction": "Why do I see that adding new features to a dataset deteriorates the performance of the model?", + "output": "You may notice that after adding one or more new features to a dataset, it deteriorates the performance of the Driverless AI model. In Driverless AI, the feature engineering sequence is fairly random and may end up not doing same things with original features if you restart entirely fresh with new columns.Beginning in Driverless AI v1.4.0, you now have the option to Restart from Last Checkpoint. This lets you pull in a new dataset with more columns, and Driverless AI will more iteratively take advantage of the new columns.", + "prompt_type": "human_bot" + }, + { + "instruction": "How does Driverless AI handle imbalanced data for binary classification experiments?", + "output": "If you have data that is imbalanced, a binary imbalanced model can help to improve scoring with a variety of imbalanced sampling methods. An imbalanced model is able to take advantage of most (or even all) of the imbalanced dataset's positive values during sampling, while a regular model significantly limits the population of positive values. Imbalanced models, however, take more time to make predictions, and they are not always more accurate than regular models. We still recommend that you try using an imbalanced model if your data is imbalanced to see if scoring is improved over a regular model. Note that this information only applies to binary models.", + "prompt_type": "human_bot" + }, + { + "instruction": "How is feature importance calculated in Driverless AI?", + "output": "For most models, such as XGBoost or LightGBM models, Driverless AI uses normalized information gain to calculate feature importance. Other estimates of importance are sometimes used for certain models.", + "prompt_type": "human_bot" + }, + { + "instruction": "I want to have only one LightGBM model in the final pipeline. How can I do this?", + "output": "You can do this by using :ref:`ensemble-levels`. To change the ensemble level, use the Ensemble Level for Final Modeling Pipeline expert setting (fixed_ensemble_level in the config.toml), which is located in the Model tab. If you want a single model, use level 0. If you are okay with using the same model with hyperparameters but trained with multiple cross validation folds, then use level 1.No role entry for \"ref\" in module \"docutils.parsers.rst.languages.en\".\nTrying \"ref\" as canonical role name.Unknown interpreted text role \"ref\".To use only one model type, use the Include Specific Models expert setting, which is located in the Recipes tab.For more information, see :ref:`ensemble-learning-in-dai`.No role entry for \"ref\" in module \"docutils.parsers.rst.languages.en\".\nTrying \"ref\" as canonical role name.Unknown interpreted text role \"ref\".Setting fixed_ensemble_level = 0 returns a single model trained on one hundred percent of the data, not just a single model type with CV.When the Cross-validate Single Final Model expert setting is enabled (default), the single model with fixed_ensemble_level = 0 has the optimal number of trees because it is tuned with CV. Disabling this setting is not recommended when fixed_ensemble_level = 0.\"Ensemble", + "prompt_type": "human_bot" + }, + { + "instruction": "I want to have only one LightGBM model and no FE. How can I do this?", + "output": "You can do this by additionally limiting the set of allowed transformations to just the OriginalTransformer, which leaves numeric features in their original form and drops all non-numeric features. To include or exclude specific transformers in your Driverless AI environment, use the Include Specific Transformers expert setting (included_transformers in the config.toml), which is located in the Recipes tab. You can also set the Feature Engineering Effort expert setting (feature_engineering_effort in the config.toml) to 0 to achieve the same effect.For more information, see :ref:`Transformations`.No role entry for \"ref\" in module \"docutils.parsers.rst.languages.en\".\nTrying \"ref\" as canonical role name.Unknown interpreted text role \"ref\".\"Include", + "prompt_type": "human_bot" + }, + { + "instruction": "What is fast approximation in Driverless AI?", + "output": "Fast approximation is available for both regular and Shapley predictions. It is enabled by default for MLI / AutoDoc and turned off by default for other clients. The extent of approximation can be fully configured or turned off with the fast approximation expert settings. Enabling fast approximation can result in a significant speedup for large prediction tasks like the creation of partial dependence plots and other MLI-related tasks.The following is a list of expert settings that can be used to configure fast approximation.Regular predictions::ref:`fast-approx-trees`No role entry for \"ref\" in module \"docutils.parsers.rst.languages.en\".\nTrying \"ref\" as canonical role name.Unknown interpreted text role \"ref\".:ref:`fast-approx-one-fold`No role entry for \"ref\" in module \"docutils.parsers.rst.languages.en\".\nTrying \"ref\" as canonical role name.Unknown interpreted text role \"ref\".:ref:`fast-approx-one-model`No role entry for \"ref\" in module \"docutils.parsers.rst.languages.en\".\nTrying \"ref\" as canonical role name.Unknown interpreted text role \"ref\".Shapley predictions::ref:`fast-approx-trees-shap`No role entry for \"ref\" in module \"docutils.parsers.rst.languages.en\".\nTrying \"ref\" as canonical role name.Unknown interpreted text role \"ref\".:ref:`fast-approx-one-fold-shap`No role entry for \"ref\" in module \"docutils.parsers.rst.languages.en\".\nTrying \"ref\" as canonical role name.Unknown interpreted text role \"ref\".:ref:`fast-approx-one-model-shap`No role entry for \"ref\" in module \"docutils.parsers.rst.languages.en\".\nTrying \"ref\" as canonical role name.Unknown interpreted text role \"ref\".MLI::ref:`mli_fast_approx `No role entry for \"ref\" in module \"docutils.parsers.rst.languages.en\".\nTrying \"ref\" as canonical role name.Unknown interpreted text role \"ref\".", + "prompt_type": "human_bot" + }, + { + "instruction": "When should fast approximation be turned off?", + "output": "In situations where a more detailed partial dependence plot or interpretation is required, you may want to disable fast approximation.", + "prompt_type": "human_bot" + }, + { + "instruction": "Why does the confusion matrix sometimes show decimals instead of whole numbers?", + "output": "Fractional confusion matrix values most commonly arise as a consequence of the averaging of confusion matrices across cross-validation fold splits or across repeated fold splits, but the same can also happen for non-integer observation weights.", + "prompt_type": "human_bot" + }, + { + "instruction": "Is data sampling for multiclass use cases supported?", + "output": "Data sampling for multiclass use cases is not currently supported. However, it is possible to approximate the data sampling approach by adding more weight in order to penalize rare classes. You can add weight to an individual observation by using a :ref:`weight column ` when setting up your experiment. You can also enable LightGBM multiclass balancing by setting the enable_lightgbm_multiclass_balancing configuration setting to on, which enables automatic class weighting for imbalanced multiclass problems.No role entry for \"ref\" in module \"docutils.parsers.rst.languages.en\".\nTrying \"ref\" as canonical role name.Unknown interpreted text role \"ref\".Feature Transformations", + "prompt_type": "human_bot" + }, + { + "instruction": "Where can I get details of the various transformations performed in an experiment?", + "output": "Download the experiment's log .zip file from the GUI. This zip file includes summary information, log information, and a gene_summary.txt file with details of the transformations used in the experiment. Specifically, there is a details folder with all subprocess logs.On the server, the experiment specific files are inside the /tmp/h2oai_experiment_/ folder after the experiment completes, particularly h2oai_experiment_logs_.zip and h2oai_experiment_summary_.zip.Predictions", + "prompt_type": "human_bot" + }, + { + "instruction": "How can I download the predictions onto the machine where Driverless AI is running?", + "output": "When you select Score on Another Dataset, the predictions will automatically be stored on the machine where Driverless AI is running. They will be saved in the following locations (and can be opened again by Driverless AI, both for .csv and .bin):Training Data Predictions: tmp/h2oai_experiment_/train_preds.csv (also saved as .bin)Testing Data Predictions: tmp/h2oai_experiment_/test_preds.csv (also saved as .bin)New Data Predictions: tmp/h2oai_experiment_/automatically_generated_name.csv. Note that the automatically generated name will match the name of the file downloaded to your local computer.", + "prompt_type": "human_bot" + }, + { + "instruction": "Why are predicted probabilities not available when I run an experiment without ensembling?", + "output": "When Driverless AI provides pre-computed predictions after completing an experiment, it uses only those parts of the modeling pipeline that were not trained on the particular rows for which the predictions are made. This means that Driverless AI needs holdout data in order to create predictions, such as validation or test sets, where the model is trained on training data only. In the case of ensembles, Driverless AI uses cross-validation to generate holdout folds on the training data, so we are able to provide out-of-fold estimates for every row in the training data and, hence, can also provide training holdout predictions (that will provide a good estimate of generalization performance). In the case of a single model, though, that is trained on 100% of the training data. There is no way to create unbiased estimates for any row in the training data. While DAI uses an internal validation dataset, this is a re-usable holdout, and therefore will not contain holdout predictions for the full training dataset. You need cross-validation in order to get out-of-fold estimates, and then that's not a single model anymore. If you want to still get predictions for the training data for a single model, then you have to use the scoring API to create predictions on the training set. From the GUI, this can be done using the Score on Another Dataset button for a completed experiment. Note, though, that the results will likely be overly optimistic, too good to be true, and virtually useless.Deployment", + "prompt_type": "human_bot" + }, + { + "instruction": "What drives the size of a MOJO?", + "output": "The size of the MOJO is based on the complexity of the final modeling pipeline (i.e., feature engineering and models). One of the biggest factors is the amount of higher-order interactions between features, especially target encoding and related features, which have to store lookup tables for all possible combinations observed in the training data. You can reduce the amount of these transformations by reducing the value of Max. feature interaction depth and/or Feature engineering effort under Expert Settings, or by increasing the interpretability settings for the experiment. Ensembles also contribute to the final modeling pipeline's complexity as each model has its own pipeline. Lowering the accuracy settings or setting :ref:`ensemble level ` to a lower number. The number of features Max. pipeline features also affects the MOJO size. Text transformers are pretty bulky as well and can add to the MOJO size.No role entry for \"ref\" in module \"docutils.parsers.rst.languages.en\".\nTrying \"ref\" as canonical role name.Unknown interpreted text role \"ref\".To toggle to a smaller mojo during model building with a single click, see - :ref:`Reduce mojo size ` under experiment settings of an experiment.No role entry for \"ref\" in module \"docutils.parsers.rst.languages.en\".\nTrying \"ref\" as canonical role name.Unknown interpreted text role \"ref\".", + "prompt_type": "human_bot" + }, + { + "instruction": "Are MOJOs thread safe?", + "output": "Yes, all Driverless AI MOJOs are thread safe.", + "prompt_type": "human_bot" + }, + { + "instruction": "Running the scoring pipeline for my MOJO is taking several hours. How can I get this to run faster?", + "output": "When running example.sh, Driverless AI implements a memory setting, which is suitable for most use cases. For very large models, however, it may be necessary to increase the memory limit when running the Java application for data transformation. This can be done using the -Xmx25g parameter. For example:Cannot analyze code. Pygments package not found... code:: bash\n\n java -Xmx25g -Dai.h2o.mojos.runtime.license.file=license.sig -cp mojo2-runtime.jar ai.h2o.mojos.ExecuteMojo pipeline.mojo example.csv\n", + "prompt_type": "human_bot" + }, + { + "instruction": "Why have I encountered a \"Best Score is not finite\" error?", + "output": "Driverless AI uses 32-bit floats by default. You may encounter this error if your data value exceeds 1E38 or if you are resolving more than 1 part in 10 million. You can resolve this error using one of the following methods:Enable the Force 64-bit Precision option in the experiment's Expert Settings.orSet data_precision=\"float64\" and transformer_precision=\"float64\" in config.toml.Time Series", + "prompt_type": "human_bot" + }, + { + "instruction": "What if my data has a time dependency?", + "output": "If you know that your data has a strong time dependency, select a time column before starting the experiment. The time column must be in a Datetime format that can be parsed by pandas, such as \"2017-11-06 14:32:21\", \"Monday, June 18, 2012\" or \"Jun 18 2018 14:34:00\" etc., or contain only integers.If you are unsure about the strength of the time dependency, run two experiments: One with time column set to \"[OFF]\" and one with time column set to \"[AUTO]\" (or pick a time column yourself).", + "prompt_type": "human_bot" + }, + { + "instruction": "What is a lag, and why does it help?", + "output": "A lag is a feature value from a previous point in time. Lags are useful to take advantage of the fact that the current (unknown) target value is often correlated with previous (known) target values. Hence, they can better capture target patterns along the time axis.Why can't I specify a validation data set for time-series problems? Why do you look at the test set for time-series problemsThe problem with validation vs test in the time series setting is that there is only one valid way to define the split. If a test set is given, its length in time defines the validation split and the validation data has to be part of train. Otherwise the time-series validation won't be useful.For instance: Let's assume we have train = [1,2,3,4,5,6,7,8,9,10] and test = [12,13], where integers define time periods (e.g., weeks). For this example, the most natural train/valid split that mimics the test scenario would be: train = [1,2,3,4,5,6,7] and valid = [9,10], and month 8 is not included in the training set to allow for a gap. Note that we will look at the start time and the duration of the test set only (if provided), and not at the contents of the test data (neither features nor target). If the user provides validation = [8,9,10] instead of test data, then this could lead to inferior validation strategy and worse generalization. Hence, we use the user-given test set only to create the optimal internal train/validation splits. If no test set is provided, the user can provide the length of the test set (in periods), the length of the train/test gap (in periods) and the length of the period itself (in seconds).", + "prompt_type": "human_bot" + }, + { + "instruction": "Why does the gap between train and test matter? Is it because of creating the lag features on the test set?", + "output": "Taking the gap into account is necessary in order to avoid too optimistic estimates of the true error and to avoid creating history-based features like lags for the training and validation data (which cannot be created for the test data due to the missing information).", + "prompt_type": "human_bot" + }, + { + "instruction": "In regards to applying the target lags to different subsets of the time group columns, are you saying Driverless AI perform auto-correlation at \"levels\" of the time series? For example, consider the Walmart dataset where I have Store and Dept (and my target is Weekly Sales). Are you saying that Driverless AI checks for auto-correlation in Weekly Sales based on just Store, just Dept, and both Store and Dept?", + "output": "Currently, auto-correlation is only applied on the detected superkey (entire TGC) of the training dataset relation at the very beginning. It's used to rank potential lag-sizes, with the goal to prune the search space for the GA optimization process, which is responsible for selecting the lag features.", + "prompt_type": "human_bot" + }, + { + "instruction": "How does Driverless AI detect the time period?", + "output": "Driverless AI treats each time series as a function with some frequency 1/ns. The actual value is estimated by the median of time deltas across maximal length TGC subgroups. The chosen SI unit minimizes the distance to all available SI units.", + "prompt_type": "human_bot" + }, + { + "instruction": "What is the logic behind the selectable numbers for forecast horizon length?", + "output": "The shown forecast horizon options are based on quantiles of valid splits. This is necessary because Driverless AI cannot display all possible options in general.", + "prompt_type": "human_bot" + }, + { + "instruction": "Assume that in my Walmart dataset, all stores provided data at the week level, but one store provided data at the day level. What would Driverless AI do?", + "output": "Driverless AI would still assume \"weekly data\" in this case because the majority of stores are yielding this property. The \"daily\" store would be resampled to the detected overall frequency.", + "prompt_type": "human_bot" + }, + { + "instruction": "Assume that in my Walmart dataset, all stores and departments provided data at the weekly level, but one department in a specific store provided weekly sales on a bi-weekly basis (every two weeks). What would Driverless AI do?", + "output": "That's similar to having missing data. Due to proper resampling, Driverless AI can handle this without any issues.", + "prompt_type": "human_bot" + }, + { + "instruction": "Why does the number of weeks that you want to start predicting matter?", + "output": "That's an option to provide a train-test gap if there is no test data is available. That is to say, \"I don't have my test data yet, but I know it will have a gap to train of x.\"", + "prompt_type": "human_bot" + }, + { + "instruction": "Are the scoring components of time series sensitive to the order in which new pieces of data arrive? I.e., is each row independent at scoring time, or is there a real-time windowing effect in the scoring pieces?", + "output": "Each row is independent at scoring time.", + "prompt_type": "human_bot" + }, + { + "instruction": "What happens if the user, at predict time, gives a row with a time value that is too small or too large?", + "output": "Internally, \"out-of bounds\" time values are encoded with special values. The samples will still be scored, but the predictions won't be trustworthy.", + "prompt_type": "human_bot" + }, + { + "instruction": "What's the minimum data size for a time series recipe?", + "output": "We recommended that you have around 10,000 validation samples in order to get a reliable estimate of the true error. The time series recipe can still be applied for smaller data, but the validation error might be inaccurate.", + "prompt_type": "human_bot" + }, + { + "instruction": "How long must the training data be compared to the test data?", + "output": "At a minimum, the training data has to be at least twice as long as the test data along the time axis. However, we recommended that the training data is at least three times as long as the test data.", + "prompt_type": "human_bot" + }, + { + "instruction": "How does the time series recipe deal with missing values?", + "output": "Missing values will be converted to a special value, which is different from any non-missing feature value. Explicit imputation techniques won't be applied.", + "prompt_type": "human_bot" + }, + { + "instruction": "Can the time information be distributed across multiple columns in the input data (such as [year, day, month]?", + "output": "Currently Driverless AI requires the data to have the time stamps given in a single column. Driverless AI will create additional time features like [year, day, month] on its own, if they turn out to be useful.", + "prompt_type": "human_bot" + }, + { + "instruction": "What type of modeling approach does Driverless AI use for time series?", + "output": "Driverless AI combines the creation of history-based features like lags, moving averages etc. with the modeling techniques, which are also applied for i.i.d. data. The primary model of choice is XGBoost.", + "prompt_type": "human_bot" + }, + { + "instruction": "What's the idea behind exponential weighting of moving averages?", + "output": "Exponential weighting accounts for the possibility that more recent observations are better suited to explain the present than older observations.Logging", + "prompt_type": "human_bot" + }, + { + "instruction": "How can I reduce the size of the Audit Logger?", + "output": "An Audit Logger file is created every day that Driverless AI is in use. The audit_log_retention_period config variable lets you specify the number of days, after which the audit.log will be overwritten. This option defaults to 5 days, which means that Driverless AI will maintain Audit Logger files for the last 5 days, and audit.log files older than 5 days are removed and replaced with newer log files. When this option is set to 0, the audit.log file will not be overwritten.", + "prompt_type": "human_bot" + } +] \ No newline at end of file