File size: 2,317 Bytes
c680313
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
import requests
import re
import os
import dotenv
from huggingface_hub import HfApi

dotenv.load_dotenv()
hf_token = os.getenv("HF_API_TOKEN")
access_key = os.getenv("ACCESS_KEY")
api = HfApi(token=hf_token)

def fetch_page(url):
    HEADERS = {
        'User-Agent': 'Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; Googlebot/2.1; +http://www.google.com/bot.html) Chrome/131.0.6778.135 Safari/537.36'
    }
    page_content = requests.get(url, headers=HEADERS).content
    return page_content

def check_data_in_file(data, file):
    with open(file, 'r') as f:
        existing_data = f.read().splitlines()
    if data in existing_data:
        return True
    else:
        return False
    
def write_data_to_file(data, file):
    with open(file, 'a') as f:
        f.write(data + '\n')
    return True

def verify_simple_title(title):
    pattern = re.compile(r'^[a-zA-Z0-9\s\.\-\+\*/=\(\)\[\]\{\},:;"\'?\>\<\@\#\%\^\*\|\_\~\`]+$')
    if pattern.match(title):
        return True
    else:
        return False
    
def download_datafile(filename):
    try:
        api.hf_hub_download(repo_id="raannakasturi/ReXploreData", filename=filename, repo_type="dataset", local_dir='.', cache_dir='.', force_download=True)
        return True
    except Exception as e:
        print(str(e))
        return False

def upload_datafile(filename):
    try:
        api.upload_file(path_or_fileobj=filename, path_in_repo=filename, repo_id="raannakasturi/ReXploreData", repo_type="dataset")
        os.remove(filename)
        return True
    except Exception as e:
        print(str(e))
        return False
    
def reset_datafiles(user_access_key):
    if user_access_key != access_key:
        return "Invalid access key"
    else:
        files  = ['arxiv.txt', 'pmc.txt']
        try:
            for filename in files:
                try:
                    download_datafile(filename)
                    with open(filename, 'w') as f:
                        f.write('')
                    upload_datafile(filename)
                except Exception as e:
                    print(str(e))
                    continue
            return True
        except Exception as e:
            print(str(e))
            return False