File size: 2,895 Bytes
b6a7e2b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
import argparse

from giskard_cicd.loaders import GithubLoader, HuggingFaceLoader
from giskard_cicd.pipeline.runner import PipelineRunner

from automation import create_discussion

if __name__ == "__main__":
    parser = argparse.ArgumentParser(
        prog="Giskard Scanner", description="Scans a model for vulnerabilities and produces a report."
    )
    parser.add_argument(
        "--loader",
        help="Which loader to use to set up the model. Currently only `github` and `huggingface` are supported.",
        required=True,
    )
    parser.add_argument("--model", help="The model to scan.", required=True)
    parser.add_argument("--dataset", help="The validation or test dataset that will be used.")
    parser.add_argument(
        "--dataset_split", help="The split of the dataset to use. If not provided, the best split will be selected."
    )
    parser.add_argument("--dataset_config", help="The name of the dataset config subset to use.")
    parser.add_argument("--scan_config", help="Path to YAML file containing the configuration of the scan.")
    parser.add_argument("--output", help="Optional name of the output file.")
    parser.add_argument("--output_format", help="Format of the report (either HTML or markdown). Default is HTML.")
    parser.add_argument("--output_portal", help="The output portal of the report (either huggingface or local directory). Default is local.")
    parser.add_argument("--discussion_repo", help="The repo to push the report to.")
    parser.add_argument("--hf_token", help="The token to push the report to the repo.")

    args = parser.parse_args()

    supported_loaders = {
        "huggingface": HuggingFaceLoader(),
        "github": GithubLoader(),
    }

    runner = PipelineRunner(loaders=supported_loaders)

    runner_kwargs = {"loader_id": args.loader,
                     "model": args.model,
                     "dataset": args.dataset,
                     "scan_config": args.scan_config}

    if args.loader == "huggingface":
        runner_kwargs.update({"dataset_split": args.dataset_split,
                              "dataset_config": args.dataset_config})

    report = runner.run(**runner_kwargs)

    # In the future, write markdown report or directly push to discussion.
    if args.output_format == "markdown":
        rendered_report = report.to_markdown(template="github")
    else:
        rendered_report = report.to_html()
    
    if args.output_portal == "huggingface":
        # Push to discussion
        create_discussion(args.discussion_repo, args.model, args.hf_token, rendered_report)


    if args.output:
        with open(args.output, "w") as f:
            f.write(rendered_report)
    else:
        # To stdout
        # print(rendered_report)
        model_name = args.model.split("/")[-1]
        with open(f"{model_name}_report.html", "w") as f:
            f.write(rendered_report)