Spaces:
Sleeping
Sleeping
buzzCraft
commited on
Commit
·
be5af2d
1
Parent(s):
681cecd
Created setup.py and updated readme
Browse files- .gitignore +1 -0
- README.md +13 -0
- setup.py +40 -0
- src/database.py +6 -2
- src/extractor.py +1 -1
.gitignore
CHANGED
@@ -11,3 +11,4 @@
|
|
11 |
.idea/vcs.xml
|
12 |
extractor.log
|
13 |
data/games.db
|
|
|
|
11 |
.idea/vcs.xml
|
12 |
extractor.log
|
13 |
data/games.db
|
14 |
+
/data/dataset/SoccerNet
|
README.md
CHANGED
@@ -12,6 +12,19 @@ Rename .env_demo to .env and fill in the required fields.
|
|
12 |
|
13 |
## Setting up the database
|
14 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
15 |
### Required data
|
16 |
The data required to run the code is not included in this repository.
|
17 |
The data can be downloaded from the [Soccernet](https://www.soccer-net.org/data).
|
|
|
12 |
|
13 |
## Setting up the database
|
14 |
|
15 |
+
By running
|
16 |
+
````bash
|
17 |
+
python setup.py
|
18 |
+
````
|
19 |
+
from project root, all files will be downloaded, and the database will be set up.
|
20 |
+
Before running the setup, make sure to fill in the required fields in the .env file, and do a
|
21 |
+
````bash
|
22 |
+
pip install soccernet
|
23 |
+
````
|
24 |
+
as this package is not in the requirements.txt file.
|
25 |
+
Expected setup time is around 10 minutes.
|
26 |
+
|
27 |
+
If you want to download the data and set up the database manually, you can do so by following the instructions below.
|
28 |
### Required data
|
29 |
The data required to run the code is not included in this repository.
|
30 |
The data can be downloaded from the [Soccernet](https://www.soccer-net.org/data).
|
setup.py
ADDED
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Try to import pandas, if fails, ask the user to install it
|
2 |
+
try:
|
3 |
+
from SoccerNet.Downloader import SoccerNetDownloader
|
4 |
+
except ImportError:
|
5 |
+
print("SoccerNet package not found. Please install it by running 'pip install soccernet'")
|
6 |
+
exit(1)
|
7 |
+
|
8 |
+
from src.database import process_json_files,fill_Augmented_Team, fill_Augmented_League
|
9 |
+
import threading
|
10 |
+
|
11 |
+
mySoccerNetDownloader = SoccerNetDownloader(LocalDirectory="data/dataset/SoccerNet")
|
12 |
+
|
13 |
+
# Download function
|
14 |
+
def download_labels(file_name):
|
15 |
+
try:
|
16 |
+
mySoccerNetDownloader.downloadGames(files=[file_name], split=["train", "valid", "test"])
|
17 |
+
except Exception as e:
|
18 |
+
print(f"Error downloading {file_name}: {e}")
|
19 |
+
|
20 |
+
|
21 |
+
# Create threads for downloading different sets of labels
|
22 |
+
thread_v2 = threading.Thread(target=download_labels, args=("Labels-v2.json",))
|
23 |
+
thread_caption = threading.Thread(target=download_labels, args=("Labels-caption.json",))
|
24 |
+
|
25 |
+
# Start the threads
|
26 |
+
thread_v2.start()
|
27 |
+
thread_caption.start()
|
28 |
+
|
29 |
+
# Wait for both threads to complete
|
30 |
+
thread_v2.join()
|
31 |
+
thread_caption.join()
|
32 |
+
|
33 |
+
print("All files downloaded successfully!")
|
34 |
+
print("Creating database..")
|
35 |
+
|
36 |
+
|
37 |
+
|
38 |
+
process_json_files("data/dataset/SoccerNet/")
|
39 |
+
fill_Augmented_Team("data/Dataset/augmented_teams.csv")
|
40 |
+
fill_Augmented_League("data/Dataset/augmented_leagues.csv")
|
src/database.py
CHANGED
@@ -3,8 +3,12 @@ from sqlalchemy.orm import declarative_base, sessionmaker
|
|
3 |
import pandas as pd
|
4 |
import os
|
5 |
import json
|
|
|
|
|
6 |
|
7 |
-
|
|
|
|
|
8 |
Base = declarative_base()
|
9 |
|
10 |
|
@@ -445,7 +449,7 @@ def fill_Augmented_League(file_path):
|
|
445 |
|
446 |
if __name__ == "__main__":
|
447 |
# Example directory path
|
448 |
-
process_json_files('../data/Dataset/
|
449 |
fill_Augmented_Team('../data/Dataset/augmented_teams.csv')
|
450 |
fill_Augmented_League('../data/Dataset/augmented_leagues.csv')
|
451 |
# Rename the event/annotation table to something more descriptive. Events are fucking everything else over
|
|
|
3 |
import pandas as pd
|
4 |
import os
|
5 |
import json
|
6 |
+
import dotenv
|
7 |
+
dotenv.load_dotenv()
|
8 |
|
9 |
+
db_uri = os.getenv('DATABASE_PATH')
|
10 |
+
db_uri = f"sqlite:///{db_uri}"
|
11 |
+
engine = create_engine(db_uri, echo=False)
|
12 |
Base = declarative_base()
|
13 |
|
14 |
|
|
|
449 |
|
450 |
if __name__ == "__main__":
|
451 |
# Example directory path
|
452 |
+
process_json_files('../data/Dataset/SoccerNet/')
|
453 |
fill_Augmented_Team('../data/Dataset/augmented_teams.csv')
|
454 |
fill_Augmented_League('../data/Dataset/augmented_leagues.csv')
|
455 |
# Rename the event/annotation table to something more descriptive. Events are fucking everything else over
|
src/extractor.py
CHANGED
@@ -414,7 +414,7 @@ def update_prompt(prompt, properties, pk, properties_original, retrievers):
|
|
414 |
if orig_value != updated_value and pk_value:
|
415 |
update_statement = f"\n- {orig_value} (now referred to as {updated_value}) has a primary key: {pk_value}."
|
416 |
elif orig_value != updated_value:
|
417 |
-
update_statement = f"\n- {orig_value} (now referred to as {updated_value}."
|
418 |
elif pk_value:
|
419 |
update_statement = f"\n- {orig_value} has a primary key: {pk_value}."
|
420 |
elif orig_value == updated_value and pk_value:
|
|
|
414 |
if orig_value != updated_value and pk_value:
|
415 |
update_statement = f"\n- {orig_value} (now referred to as {updated_value}) has a primary key: {pk_value}."
|
416 |
elif orig_value != updated_value:
|
417 |
+
update_statement = f"\n- {orig_value} (now referred to as {updated_value}.)"
|
418 |
elif pk_value:
|
419 |
update_statement = f"\n- {orig_value} has a primary key: {pk_value}."
|
420 |
elif orig_value == updated_value and pk_value:
|