buzzCraft commited on
Commit
be5af2d
1 Parent(s): 681cecd

Created setup.py and updated readme

Browse files
Files changed (5) hide show
  1. .gitignore +1 -0
  2. README.md +13 -0
  3. setup.py +40 -0
  4. src/database.py +6 -2
  5. src/extractor.py +1 -1
.gitignore CHANGED
@@ -11,3 +11,4 @@
11
  .idea/vcs.xml
12
  extractor.log
13
  data/games.db
 
 
11
  .idea/vcs.xml
12
  extractor.log
13
  data/games.db
14
+ /data/dataset/SoccerNet
README.md CHANGED
@@ -12,6 +12,19 @@ Rename .env_demo to .env and fill in the required fields.
12
 
13
  ## Setting up the database
14
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
  ### Required data
16
  The data required to run the code is not included in this repository.
17
  The data can be downloaded from the [Soccernet](https://www.soccer-net.org/data).
 
12
 
13
  ## Setting up the database
14
 
15
+ By running
16
+ ````bash
17
+ python setup.py
18
+ ````
19
+ from project root, all files will be downloaded, and the database will be set up.
20
+ Before running the setup, make sure to fill in the required fields in the .env file, and do a
21
+ ````bash
22
+ pip install soccernet
23
+ ````
24
+ as this package is not in the requirements.txt file.
25
+ Expected setup time is around 10 minutes.
26
+
27
+ If you want to download the data and set up the database manually, you can do so by following the instructions below.
28
  ### Required data
29
  The data required to run the code is not included in this repository.
30
  The data can be downloaded from the [Soccernet](https://www.soccer-net.org/data).
setup.py ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Try to import pandas, if fails, ask the user to install it
2
+ try:
3
+ from SoccerNet.Downloader import SoccerNetDownloader
4
+ except ImportError:
5
+ print("SoccerNet package not found. Please install it by running 'pip install soccernet'")
6
+ exit(1)
7
+
8
+ from src.database import process_json_files,fill_Augmented_Team, fill_Augmented_League
9
+ import threading
10
+
11
+ mySoccerNetDownloader = SoccerNetDownloader(LocalDirectory="data/dataset/SoccerNet")
12
+
13
+ # Download function
14
+ def download_labels(file_name):
15
+ try:
16
+ mySoccerNetDownloader.downloadGames(files=[file_name], split=["train", "valid", "test"])
17
+ except Exception as e:
18
+ print(f"Error downloading {file_name}: {e}")
19
+
20
+
21
+ # Create threads for downloading different sets of labels
22
+ thread_v2 = threading.Thread(target=download_labels, args=("Labels-v2.json",))
23
+ thread_caption = threading.Thread(target=download_labels, args=("Labels-caption.json",))
24
+
25
+ # Start the threads
26
+ thread_v2.start()
27
+ thread_caption.start()
28
+
29
+ # Wait for both threads to complete
30
+ thread_v2.join()
31
+ thread_caption.join()
32
+
33
+ print("All files downloaded successfully!")
34
+ print("Creating database..")
35
+
36
+
37
+
38
+ process_json_files("data/dataset/SoccerNet/")
39
+ fill_Augmented_Team("data/Dataset/augmented_teams.csv")
40
+ fill_Augmented_League("data/Dataset/augmented_leagues.csv")
src/database.py CHANGED
@@ -3,8 +3,12 @@ from sqlalchemy.orm import declarative_base, sessionmaker
3
  import pandas as pd
4
  import os
5
  import json
 
 
6
 
7
- engine = create_engine('sqlite:///../data/games.db', echo=False)
 
 
8
  Base = declarative_base()
9
 
10
 
@@ -445,7 +449,7 @@ def fill_Augmented_League(file_path):
445
 
446
  if __name__ == "__main__":
447
  # Example directory path
448
- process_json_files('../data/Dataset/SN-ASR_captions_and_actions/')
449
  fill_Augmented_Team('../data/Dataset/augmented_teams.csv')
450
  fill_Augmented_League('../data/Dataset/augmented_leagues.csv')
451
  # Rename the event/annotation table to something more descriptive. Events are fucking everything else over
 
3
  import pandas as pd
4
  import os
5
  import json
6
+ import dotenv
7
+ dotenv.load_dotenv()
8
 
9
+ db_uri = os.getenv('DATABASE_PATH')
10
+ db_uri = f"sqlite:///{db_uri}"
11
+ engine = create_engine(db_uri, echo=False)
12
  Base = declarative_base()
13
 
14
 
 
449
 
450
  if __name__ == "__main__":
451
  # Example directory path
452
+ process_json_files('../data/Dataset/SoccerNet/')
453
  fill_Augmented_Team('../data/Dataset/augmented_teams.csv')
454
  fill_Augmented_League('../data/Dataset/augmented_leagues.csv')
455
  # Rename the event/annotation table to something more descriptive. Events are fucking everything else over
src/extractor.py CHANGED
@@ -414,7 +414,7 @@ def update_prompt(prompt, properties, pk, properties_original, retrievers):
414
  if orig_value != updated_value and pk_value:
415
  update_statement = f"\n- {orig_value} (now referred to as {updated_value}) has a primary key: {pk_value}."
416
  elif orig_value != updated_value:
417
- update_statement = f"\n- {orig_value} (now referred to as {updated_value}."
418
  elif pk_value:
419
  update_statement = f"\n- {orig_value} has a primary key: {pk_value}."
420
  elif orig_value == updated_value and pk_value:
 
414
  if orig_value != updated_value and pk_value:
415
  update_statement = f"\n- {orig_value} (now referred to as {updated_value}) has a primary key: {pk_value}."
416
  elif orig_value != updated_value:
417
+ update_statement = f"\n- {orig_value} (now referred to as {updated_value}.)"
418
  elif pk_value:
419
  update_statement = f"\n- {orig_value} has a primary key: {pk_value}."
420
  elif orig_value == updated_value and pk_value: