Spaces:

SushantGautam
/

SoccerRAG

Sleeping

buzzCraft commited on May 8, 2024

Commit

be5af2d

1 Parent(s): 681cecd

Created setup.py and updated readme

Files changed (5) hide show

.gitignore CHANGED Viewed

@@ -11,3 +11,4 @@
 .idea/vcs.xml
 extractor.log
 data/games.db

 .idea/vcs.xml
 extractor.log
 data/games.db
+/data/dataset/SoccerNet

README.md CHANGED Viewed

@@ -12,6 +12,19 @@ Rename .env_demo to .env and fill in the required fields.
 ## Setting up the database
 ### Required data
 The data required to run the code is not included in this repository.
 The data can be downloaded from the [Soccernet](https://www.soccer-net.org/data).

 ## Setting up the database
+By running
+````bash
+python setup.py
+````
+from project root, all files will be downloaded, and the database will be set up.
+Before running the setup, make sure to fill in the required fields in the .env file, and do a
+````bash
+pip install soccernet
+````
+as this package is not in the requirements.txt file.
+Expected setup time is around 10 minutes.
+If you want to download the data and set up the database manually, you can do so by following the instructions below.
 ### Required data
 The data required to run the code is not included in this repository.
 The data can be downloaded from the [Soccernet](https://www.soccer-net.org/data).

setup.py ADDED Viewed

+# Try to import pandas, if fails, ask the user to install it
+try:
+    from SoccerNet.Downloader import SoccerNetDownloader
+except ImportError:
+    print("SoccerNet package not found. Please install it by running 'pip install soccernet'")
+    exit(1)
+from src.database import process_json_files,fill_Augmented_Team, fill_Augmented_League
+import threading
+mySoccerNetDownloader = SoccerNetDownloader(LocalDirectory="data/dataset/SoccerNet")
+# Download function
+def download_labels(file_name):
+    try:
+        mySoccerNetDownloader.downloadGames(files=[file_name], split=["train", "valid", "test"])
+    except Exception as e:
+        print(f"Error downloading {file_name}: {e}")
+# Create threads for downloading different sets of labels
+thread_v2 = threading.Thread(target=download_labels, args=("Labels-v2.json",))
+thread_caption = threading.Thread(target=download_labels, args=("Labels-caption.json",))
+# Start the threads
+thread_v2.start()
+thread_caption.start()
+# Wait for both threads to complete
+thread_v2.join()
+thread_caption.join()
+print("All files downloaded successfully!")
+print("Creating database..")
+process_json_files("data/dataset/SoccerNet/")
+fill_Augmented_Team("data/Dataset/augmented_teams.csv")
+fill_Augmented_League("data/Dataset/augmented_leagues.csv")

src/database.py CHANGED Viewed

@@ -3,8 +3,12 @@ from sqlalchemy.orm import declarative_base, sessionmaker
 import pandas as pd
 import os
 import json
-engine = create_engine('sqlite:///../data/games.db', echo=False)
 Base = declarative_base()
@@ -445,7 +449,7 @@ def fill_Augmented_League(file_path):
 if __name__ == "__main__":
     # Example directory path
-    process_json_files('../data/Dataset/SN-ASR_captions_and_actions/')
     fill_Augmented_Team('../data/Dataset/augmented_teams.csv')
     fill_Augmented_League('../data/Dataset/augmented_leagues.csv')
 # Rename the event/annotation table to something more descriptive. Events are fucking everything else over

 import pandas as pd
 import os
 import json
+import dotenv
+dotenv.load_dotenv()
+db_uri = os.getenv('DATABASE_PATH')
+db_uri = f"sqlite:///{db_uri}"
+engine = create_engine(db_uri, echo=False)
 Base = declarative_base()
 if __name__ == "__main__":
     # Example directory path
+    process_json_files('../data/Dataset/SoccerNet/')
     fill_Augmented_Team('../data/Dataset/augmented_teams.csv')
     fill_Augmented_League('../data/Dataset/augmented_leagues.csv')
 # Rename the event/annotation table to something more descriptive. Events are fucking everything else over

src/extractor.py CHANGED Viewed

@@ -414,7 +414,7 @@ def update_prompt(prompt, properties, pk, properties_original, retrievers):
                 if orig_value != updated_value and pk_value:
                     update_statement = f"\n- {orig_value} (now referred to as {updated_value}) has a primary key: {pk_value}."
                 elif orig_value != updated_value:
-                    update_statement = f"\n- {orig_value} (now referred to as {updated_value}."
                 elif pk_value:
                     update_statement = f"\n- {orig_value} has a primary key: {pk_value}."
                 elif orig_value == updated_value and pk_value:

                 if orig_value != updated_value and pk_value:
                     update_statement = f"\n- {orig_value} (now referred to as {updated_value}) has a primary key: {pk_value}."
                 elif orig_value != updated_value:
+                    update_statement = f"\n- {orig_value} (now referred to as {updated_value}.)"
                 elif pk_value:
                     update_statement = f"\n- {orig_value} has a primary key: {pk_value}."
                 elif orig_value == updated_value and pk_value: