Spaces:
Sleeping
Sleeping
pablo-sampaio
commited on
Commit
•
4bb2add
1
Parent(s):
4808718
Small updates
Browse files- match_info_crawler.py +9 -13
match_info_crawler.py
CHANGED
@@ -13,21 +13,17 @@ import re
|
|
13 |
import google.generativeai as genai
|
14 |
|
15 |
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
with open('KEY_GOOGLE_AI', 'r') as file:
|
20 |
-
GOOGLE_API_KEY = file.read().replace('\n', '')
|
21 |
|
22 |
-
genai.configure(
|
23 |
|
24 |
|
25 |
USE_BEST_EXTRACTOR = True
|
26 |
|
27 |
sao_paulo_timezone = pytz.timezone('America/Sao_Paulo')
|
28 |
|
29 |
-
#CURRENT_DATE = datetime.now(sao_paulo_timezone)
|
30 |
-
|
31 |
|
32 |
SYSTEM_PROMPT = '''You will extract soccer match information from the text extracted from a html page,
|
33 |
and you have to output the matches in this format:
|
@@ -35,19 +31,20 @@ and you have to output the matches in this format:
|
|
35 |
|
36 |
Regarding [START TIME / MATCH TIME]:
|
37 |
- if the match has already started, report the elapsed time in the match
|
38 |
-
- if it hasn't started write 'not started'
|
39 |
- if the match has finished, report 'finished'
|
40 |
|
41 |
-
|
42 |
- ignore matches for youth (under-20) and women leagues
|
43 |
-
-
|
|
|
|
|
44 |
```
|
45 |
PSG 0 x 1 Borussia Dortmund - Champions League - finished
|
46 |
Palmeiras 0 x 2 Atletico Paranaense - Campeonato Brasileiro - Série A - finished
|
47 |
```
|
48 |
'''
|
49 |
|
50 |
-
|
51 |
if USE_BEST_EXTRACTOR:
|
52 |
EXTRACTOR_MODEL = genai.GenerativeModel('gemini-1.5-pro-latest',
|
53 |
system_instruction=SYSTEM_PROMPT) # TODO: setar uma temperatura bem baixa!
|
@@ -151,4 +148,3 @@ def get_matches_info(date_str: str):
|
|
151 |
if __name__ == '__main__':
|
152 |
matches_info = get_matches_info('yesterday')
|
153 |
print(matches_info)
|
154 |
-
|
|
|
13 |
import google.generativeai as genai
|
14 |
|
15 |
|
16 |
+
from dotenv import load_dotenv, find_dotenv
|
17 |
+
|
18 |
+
load_dotenv(find_dotenv()) # should load the GOOGLE_API_KEY
|
|
|
|
|
19 |
|
20 |
+
genai.configure()
|
21 |
|
22 |
|
23 |
USE_BEST_EXTRACTOR = True
|
24 |
|
25 |
sao_paulo_timezone = pytz.timezone('America/Sao_Paulo')
|
26 |
|
|
|
|
|
27 |
|
28 |
SYSTEM_PROMPT = '''You will extract soccer match information from the text extracted from a html page,
|
29 |
and you have to output the matches in this format:
|
|
|
31 |
|
32 |
Regarding [START TIME / MATCH TIME]:
|
33 |
- if the match has already started, report the elapsed time in the match
|
34 |
+
- if it hasn't started, write 'not started'
|
35 |
- if the match has finished, report 'finished'
|
36 |
|
37 |
+
Additional instructions that you must follow:
|
38 |
- ignore matches for youth (under-20) and women leagues
|
39 |
+
- but report friendly matches, specially between national teams
|
40 |
+
|
41 |
+
Example output:
|
42 |
```
|
43 |
PSG 0 x 1 Borussia Dortmund - Champions League - finished
|
44 |
Palmeiras 0 x 2 Atletico Paranaense - Campeonato Brasileiro - Série A - finished
|
45 |
```
|
46 |
'''
|
47 |
|
|
|
48 |
if USE_BEST_EXTRACTOR:
|
49 |
EXTRACTOR_MODEL = genai.GenerativeModel('gemini-1.5-pro-latest',
|
50 |
system_instruction=SYSTEM_PROMPT) # TODO: setar uma temperatura bem baixa!
|
|
|
148 |
if __name__ == '__main__':
|
149 |
matches_info = get_matches_info('yesterday')
|
150 |
print(matches_info)
|
|