Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -25,71 +25,6 @@ from sklearn.svm import SVC
|
|
| 25 |
# Ignore FutureWarning messages
|
| 26 |
warnings.simplefilter(action='ignore', category=FutureWarning)
|
| 27 |
|
| 28 |
-
import os
|
| 29 |
-
import sys
|
| 30 |
-
from tempfile import NamedTemporaryFile
|
| 31 |
-
from urllib.request import urlopen
|
| 32 |
-
from urllib.parse import unquote, urlparse
|
| 33 |
-
from urllib.error import HTTPError
|
| 34 |
-
from zipfile import ZipFile
|
| 35 |
-
import tarfile
|
| 36 |
-
import shutil
|
| 37 |
-
|
| 38 |
-
CHUNK_SIZE = 40960
|
| 39 |
-
DATA_SOURCE_MAPPING = 'sentiment-analysis-dataset:https%3A%2F%2Fstorage.googleapis.com%2Fkaggle-data-sets%2F989445%2F1808590%2Fbundle%2Farchive.zip%3FX-Goog-Algorithm%3DGOOG4-RSA-SHA256%26X-Goog-Credential%3Dgcp-kaggle-com%2540kaggle-161607.iam.gserviceaccount.com%252F20240418%252Fauto%252Fstorage%252Fgoog4_request%26X-Goog-Date%3D20240418T100202Z%26X-Goog-Expires%3D259200%26X-Goog-SignedHeaders%3Dhost%26X-Goog-Signature%3D37697dd0d9910676a3f12986b24306fc3726be4de82536c784ffb79deff0ba33d8973d6d612a53bcf9ed39bd7ad8a1d69bb34c42a34c7d6cffee6dd3048a9ef68f047745664f48ea6f3773a1f263129a6f78d48923235cc363b4081daadea014b0958575bf8376d565858404a8b1be7e5f317bdd9f5823ce4777f0b7052445c648bcda039294c804978828087705abe4416a6f9a0e0743388667017128a5ab2ef5ab2dade0d40d1659f4313296501907b4baec3161131e151e6f5b982eee9a6f7eb1b022da9c874f216d7fac981dc1351e9001ee56d03d1da8b2e0d4c97320f18d7e9b00ec63f4ba7444d81595cc8edff2b05f13aef4b204dd2710d0fddf0ef9'
|
| 40 |
-
|
| 41 |
-
KAGGLE_INPUT_PATH='/kaggle/input'
|
| 42 |
-
KAGGLE_WORKING_PATH='/kaggle/working'
|
| 43 |
-
KAGGLE_SYMLINK='kaggle'
|
| 44 |
-
|
| 45 |
-
import subprocess
|
| 46 |
-
subprocess.run(["umount", "/kaggle/input/"], stderr=subprocess.DEVNULL)
|
| 47 |
-
shutil.rmtree('/kaggle/input', ignore_errors=True)
|
| 48 |
-
os.makedirs(KAGGLE_INPUT_PATH, 0o777, exist_ok=True)
|
| 49 |
-
os.makedirs(KAGGLE_WORKING_PATH, 0o777, exist_ok=True)
|
| 50 |
-
|
| 51 |
-
try:
|
| 52 |
-
os.symlink(KAGGLE_INPUT_PATH, os.path.join("..", 'input'), target_is_directory=True)
|
| 53 |
-
except FileExistsError:
|
| 54 |
-
pass
|
| 55 |
-
try:
|
| 56 |
-
os.symlink(KAGGLE_WORKING_PATH, os.path.join("..", 'working'), target_is_directory=True)
|
| 57 |
-
except FileExistsError:
|
| 58 |
-
pass
|
| 59 |
-
|
| 60 |
-
for data_source_mapping in DATA_SOURCE_MAPPING.split(','):
|
| 61 |
-
directory, download_url_encoded = data_source_mapping.split(':')
|
| 62 |
-
download_url = unquote(download_url_encoded)
|
| 63 |
-
filename = urlparse(download_url).path
|
| 64 |
-
destination_path = os.path.join(KAGGLE_INPUT_PATH, directory)
|
| 65 |
-
try:
|
| 66 |
-
with urlopen(download_url) as fileres, NamedTemporaryFile() as tfile:
|
| 67 |
-
total_length = fileres.headers['content-length']
|
| 68 |
-
print(f'Downloading {directory}, {total_length} bytes compressed')
|
| 69 |
-
dl = 0
|
| 70 |
-
data = fileres.read(CHUNK_SIZE)
|
| 71 |
-
while len(data) > 0:
|
| 72 |
-
dl += len(data)
|
| 73 |
-
tfile.write(data)
|
| 74 |
-
done = int(50 * dl / int(total_length))
|
| 75 |
-
sys.stdout.write(f"\r[{'=' * done}{' ' * (50-done)}] {dl} bytes downloaded")
|
| 76 |
-
sys.stdout.flush()
|
| 77 |
-
data = fileres.read(CHUNK_SIZE)
|
| 78 |
-
if filename.endswith('.zip'):
|
| 79 |
-
with ZipFile(tfile) as zfile:
|
| 80 |
-
zfile.extractall(destination_path)
|
| 81 |
-
else:
|
| 82 |
-
with tarfile.open(tfile.name) as tarfile:
|
| 83 |
-
tarfile.extractall(destination_path)
|
| 84 |
-
print(f'\nDownloaded and uncompressed: {directory}')
|
| 85 |
-
except HTTPError as e:
|
| 86 |
-
print(f'Failed to load (likely expired) {download_url} to path {destination_path}')
|
| 87 |
-
continue
|
| 88 |
-
except OSError as e:
|
| 89 |
-
print(f'Failed to load {download_url} to path {destination_path}')
|
| 90 |
-
continue
|
| 91 |
-
|
| 92 |
-
print('Data source import complete.')
|
| 93 |
|
| 94 |
import numpy as np # linear algebra
|
| 95 |
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
|
|
|
|
| 25 |
# Ignore FutureWarning messages
|
| 26 |
warnings.simplefilter(action='ignore', category=FutureWarning)
|
| 27 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 28 |
|
| 29 |
import numpy as np # linear algebra
|
| 30 |
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
|