arpy8 commited on
Commit
6dce24d
·
1 Parent(s): 5f2207b

fresh start

Browse files
Files changed (6) hide show
  1. .gitattributes +35 -0
  2. .gitignore +3 -0
  3. Dockerfile +2 -9
  4. README.md +8 -14
  5. main.py → app.py +35 -31
  6. requirements.txt +3 -4
.gitattributes ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
.gitignore ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ .venv/
2
+
3
+ *.pyc*
Dockerfile CHANGED
@@ -6,17 +6,10 @@ USER root
6
 
7
  COPY ./requirements.txt /code/requirements.txt
8
 
9
- ADD --chmod=755 https://astral.sh/uv/install.sh /install.sh
10
-
11
- RUN /install.sh && rm /install.sh
12
-
13
- RUN /root/.cargo/bin/uv pip install --system --no-cache -r requirements.txt
14
 
15
  COPY . /code
16
 
17
  USER user
18
 
19
- # CMD ["python", "main.py"]
20
- # CMD ["gunicorn", "--bind", "0.0.0.0:7860", "app:get_repos()"]
21
- # CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "7860"]
22
- CMD ["flask", "run", "--host", "0.0.0.0", "--port", "7860"]
 
6
 
7
  COPY ./requirements.txt /code/requirements.txt
8
 
9
+ RUN pip install -r requirements.txt
 
 
 
 
10
 
11
  COPY . /code
12
 
13
  USER user
14
 
15
+ CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
 
 
 
README.md CHANGED
@@ -1,14 +1,8 @@
1
- This is a Flask-based API that fetches GitHub repositories tagged with “Hacktoberfest” across different programming languages.
2
-
3
- ### Features
4
- • Retrieve repositories for popular languages including Python, JavaScript, HTML, TypeScript, Java, C++, PHP, Go, CSS, and C.
5
- • Get all repositories for these languages at once or fetch repositories specific to a single language.
6
- • Simple error handling and CORS enabled for cross-origin access.
7
-
8
- ### Tech Stack
9
- • Python
10
- • Flask
11
- • BeautifulSoup4
12
-
13
-
14
- Deployed Link: https://arpy8-github-repo-scraper.hf.space/
 
1
+ ---
2
+ title: HackProjects Server
3
+ emoji: 😜
4
+ colorFrom: red
5
+ colorTo: blue
6
+ sdk: docker
7
+ pinned: false
8
+ ---
 
 
 
 
 
 
main.py → app.py RENAMED
@@ -1,25 +1,34 @@
 
1
  import bs4
2
- import json
3
  import requests
4
- from flask import Flask
5
- from flask_cors import CORS
6
 
7
- app = Flask(__name__)
8
- CORS(app)
 
 
 
 
 
 
 
9
 
10
  LANGUAGES = ["python", "javascript", "html", "typescript", "java", "cpp", "php", "go", "css", "c"]
11
 
12
- def fetch_github_page(**kwargs):
13
  try:
14
  params = '&'.join([f'{k}={v}' for k, v in kwargs.items()])
15
  url = 'https://github.com/topics/hacktoberfest?{}'.format(params)
16
  response = requests.get(url)
17
  response.raise_for_status()
18
- return response.content
 
 
19
  except requests.RequestException as e:
20
- return f"Error fetching data: {e}"
21
 
22
- def parse_repositories(page_content):
23
  soup = bs4.BeautifulSoup(page_content, 'html.parser')
24
  articles = soup.find_all("article")
25
  repositories = []
@@ -30,44 +39,39 @@ def parse_repositories(page_content):
30
  if len(repo_info) >= 3 and "Star" in repo_info[2]:
31
  owner = repo_info[0]
32
  repo_name = repo_info[1]
33
- stars = repo_info[2]
34
- repo_link = f"https://github.com/{owner}/{repo_name}"
35
 
36
  repositories.append({
37
  "repo": repo_name,
38
  "owner": owner,
39
  "stars": stars,
40
- "link": repo_link
41
  })
42
 
43
  return repositories
44
 
45
- @app.route('/all')
 
 
 
 
46
  def get_all_repos():
47
- payload = {}
48
-
49
  for lang in LANGUAGES:
50
  page_content = fetch_github_page(l=lang)
51
-
52
- if isinstance(page_content, str) and page_content.startswith("Error"):
53
- return page_content
54
-
55
  repositories = parse_repositories(page_content)
56
  payload[lang] = repositories
57
-
58
- print(payload)
59
-
60
- return json.dumps(payload, indent=4)
61
 
62
- @app.route('/data/<string:lang>')
63
- def get_repos(lang):
64
- page_content = fetch_github_page(l=lang)
65
- if isinstance(page_content, str) and page_content.startswith("Error"):
66
- return page_content
67
 
 
 
 
68
  repositories = parse_repositories(page_content)
69
- return json.dumps(repositories, indent=4)
 
70
 
71
 
72
- if __name__ == '__main__':
73
- app.run(port=8000, debug=True)
 
 
1
+ from typing import Dict, List, Any
2
  import bs4
 
3
  import requests
4
+ from fastapi import FastAPI, HTTPException
5
+ from fastapi.middleware.cors import CORSMiddleware
6
 
7
+ app = FastAPI()
8
+
9
+ app.add_middleware(
10
+ CORSMiddleware,
11
+ allow_origins=["*"],
12
+ allow_credentials=True,
13
+ allow_methods=["*"],
14
+ allow_headers=["*"],
15
+ )
16
 
17
  LANGUAGES = ["python", "javascript", "html", "typescript", "java", "cpp", "php", "go", "css", "c"]
18
 
19
+ def fetch_github_page(**kwargs) -> str:
20
  try:
21
  params = '&'.join([f'{k}={v}' for k, v in kwargs.items()])
22
  url = 'https://github.com/topics/hacktoberfest?{}'.format(params)
23
  response = requests.get(url)
24
  response.raise_for_status()
25
+
26
+ return response.text
27
+
28
  except requests.RequestException as e:
29
+ raise HTTPException(status_code=502, detail=f"Error fetching data: {e}")
30
 
31
+ def parse_repositories(page_content: str) -> List[Dict[str, Any]]:
32
  soup = bs4.BeautifulSoup(page_content, 'html.parser')
33
  articles = soup.find_all("article")
34
  repositories = []
 
39
  if len(repo_info) >= 3 and "Star" in repo_info[2]:
40
  owner = repo_info[0]
41
  repo_name = repo_info[1]
42
+ stars = repo_info[2][6:-1] if repo_info[2].startswith("Star\n") else repo_info[2]
 
43
 
44
  repositories.append({
45
  "repo": repo_name,
46
  "owner": owner,
47
  "stars": stars,
 
48
  })
49
 
50
  return repositories
51
 
52
+ @app.get("/")
53
+ def index():
54
+ return {"name": "github repo scraper", "author": "arpy8"}
55
+
56
+ @app.get("/all")
57
  def get_all_repos():
58
+ payload: Dict[str, Any] = {}
59
+
60
  for lang in LANGUAGES:
61
  page_content = fetch_github_page(l=lang)
 
 
 
 
62
  repositories = parse_repositories(page_content)
63
  payload[lang] = repositories
 
 
 
 
64
 
65
+ return payload
 
 
 
 
66
 
67
+ @app.get("/data/{lang}")
68
+ def get_repos(lang: str):
69
+ page_content = fetch_github_page(l=lang)
70
  repositories = parse_repositories(page_content)
71
+
72
+ return repositories
73
 
74
 
75
+ if __name__=="__main__":
76
+ import uvicorn
77
+ uvicorn.run(app)
requirements.txt CHANGED
@@ -1,6 +1,5 @@
1
- Flask==3.0.3
 
2
  beautifulsoup4==4.12.3
3
  requests==2.32.3
4
- html5lib==1.1
5
- flask-cors==5.0.0
6
- gunicorn
 
1
+ fastapi==0.95.2
2
+ uvicorn[standard]==0.22.0
3
  beautifulsoup4==4.12.3
4
  requests==2.32.3
5
+ html5lib==1.1