Spaces:
Sleeping
Sleeping
grapplerulrich
commited on
Commit
•
59d5e33
1
Parent(s):
a69d4fe
Create directory for cache if not exists
Browse files- app.py +4 -2
- beautiful_soup/beautiful_soup.py +4 -2
app.py
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
-
from os import remove
|
2 |
-
from os.path import exists
|
3 |
from functools import cache
|
4 |
import json
|
5 |
import streamlit as st
|
@@ -34,6 +34,7 @@ def search_results( query ):
|
|
34 |
file_path = 'search-results/' + slugify( query ) + '.json'
|
35 |
|
36 |
results = []
|
|
|
37 |
if exists( file_path ):
|
38 |
with open( file_path, 'r' ) as results_file:
|
39 |
results = json.load( results_file )
|
@@ -51,6 +52,7 @@ def search_results( query ):
|
|
51 |
|
52 |
def content_summary( url_id, content ):
|
53 |
file_path = 'summaries/' + url_id + '.json'
|
|
|
54 |
if exists( file_path ):
|
55 |
with open( file_path, 'r' ) as file:
|
56 |
summary = json.load( file )
|
|
|
1 |
+
from os import makedirs, remove
|
2 |
+
from os.path import exists, dirname
|
3 |
from functools import cache
|
4 |
import json
|
5 |
import streamlit as st
|
|
|
34 |
file_path = 'search-results/' + slugify( query ) + '.json'
|
35 |
|
36 |
results = []
|
37 |
+
makedirs(dirname(file_path), exist_ok=True)
|
38 |
if exists( file_path ):
|
39 |
with open( file_path, 'r' ) as results_file:
|
40 |
results = json.load( results_file )
|
|
|
52 |
|
53 |
def content_summary( url_id, content ):
|
54 |
file_path = 'summaries/' + url_id + '.json'
|
55 |
+
makedirs(dirname(file_path), exist_ok=True)
|
56 |
if exists( file_path ):
|
57 |
with open( file_path, 'r' ) as file:
|
58 |
summary = json.load( file )
|
beautiful_soup/beautiful_soup.py
CHANGED
@@ -1,5 +1,6 @@
|
|
1 |
import uuid
|
2 |
-
from os
|
|
|
3 |
from bs4 import BeautifulSoup
|
4 |
import requests
|
5 |
|
@@ -14,6 +15,7 @@ import requests
|
|
14 |
|
15 |
def get_url_content( url ):
|
16 |
file_path = 'page-content/' + uuid.uuid5( uuid.NAMESPACE_URL, url ).hex + '.txt'
|
|
|
17 |
if exists( file_path ):
|
18 |
with open( file_path, 'r' ) as file_content:
|
19 |
content = file_content.read()
|
@@ -49,7 +51,7 @@ def extract_content( url ):
|
|
49 |
# Make request and get html content.
|
50 |
def get_soup( url ):
|
51 |
file_path = 'web-pages/' + uuid.uuid5( uuid.NAMESPACE_URL, url ).hex + '.html'
|
52 |
-
|
53 |
if exists( file_path ):
|
54 |
with open( file_path, 'r' ) as web_page:
|
55 |
html = web_page.read()
|
|
|
1 |
import uuid
|
2 |
+
from os import makedirs, remove
|
3 |
+
from os.path import exists, dirname
|
4 |
from bs4 import BeautifulSoup
|
5 |
import requests
|
6 |
|
|
|
15 |
|
16 |
def get_url_content( url ):
|
17 |
file_path = 'page-content/' + uuid.uuid5( uuid.NAMESPACE_URL, url ).hex + '.txt'
|
18 |
+
makedirs(dirname(file_path), exist_ok=True)
|
19 |
if exists( file_path ):
|
20 |
with open( file_path, 'r' ) as file_content:
|
21 |
content = file_content.read()
|
|
|
51 |
# Make request and get html content.
|
52 |
def get_soup( url ):
|
53 |
file_path = 'web-pages/' + uuid.uuid5( uuid.NAMESPACE_URL, url ).hex + '.html'
|
54 |
+
makedirs(dirname(file_path), exist_ok=True)
|
55 |
if exists( file_path ):
|
56 |
with open( file_path, 'r' ) as web_page:
|
57 |
html = web_page.read()
|