anton-l HF staff commited on
Commit
cac139e
β€’
1 Parent(s): e9b1718

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -2
app.py CHANGED
@@ -33,6 +33,8 @@ As part of the BigCode project, we released and maintain [The Stack V2](https://
33
  """ + """\
34
 
35
  This tool lets you check if a repository under a given username is part of The Stack dataset. Would you like to have your data removed from future versions of The Stack? You can opt-out following the instructions [here](https://www.bigcode-project.org/docs/about/the-stack/#how-can-i-request-that-my-data-be-removed-from-the-stack). Note that previous opt-outs might still be displayed in the release candidate (denoted with "-rc"), which will be removed for the release.
 
 
36
  """
37
 
38
  opt_out_text_template = """\
@@ -68,9 +70,15 @@ def check_username(username, version):
68
  if username in usernames[version] and len(usernames[version][username])>0:
69
  repos = usernames[version][username]
70
  repo_word = "repository" if len(repos)==1 else "repositories"
71
- output_md += f"**Yes**, there is code from **{len(repos)} {repo_word}** in The Stack:\n\n"
 
 
 
72
  for repo in repos:
73
- output_md += f"_{repo}_\n\n"
 
 
 
74
 
75
  return output_md.strip(), issue_url(username, repos)
76
  else:
 
33
  """ + """\
34
 
35
  This tool lets you check if a repository under a given username is part of The Stack dataset. Would you like to have your data removed from future versions of The Stack? You can opt-out following the instructions [here](https://www.bigcode-project.org/docs/about/the-stack/#how-can-i-request-that-my-data-be-removed-from-the-stack). Note that previous opt-outs might still be displayed in the release candidate (denoted with "-rc"), which will be removed for the release.
36
+
37
+ **Note**: The Stack v2.0 is built from the data provided by the [Software Heriage Archive](https://archive.softwareheritage.org/), so it may include repositories that are no longer present on GitHub.
38
  """
39
 
40
  opt_out_text_template = """\
 
70
  if username in usernames[version] and len(usernames[version][username])>0:
71
  repos = usernames[version][username]
72
  repo_word = "repository" if len(repos)==1 else "repositories"
73
+ if version[:2] == "v2":
74
+ output_md += f"**Yes**, there is code from **{len(repos)} {repo_word}** in The Stack. Check the links to see when it was archived by Software Heritage:\n\n"
75
+ else:
76
+ output_md += f"**Yes**, there is code from **{len(repos)} {repo_word}** in The Stack:\n\n"
77
  for repo in repos:
78
+ if version[:2] == "v2":
79
+ output_md += f"[{repo}](https://archive.softwareheritage.org/browse/origin/visits/?origin_url=https://github.com/{repo})\n\n"
80
+ else:
81
+ output_md += f"_{repo}_\n\n"
82
 
83
  return output_md.strip(), issue_url(username, repos)
84
  else: