raccoon / main.py
grapplerulrich's picture
Add caching and save search results url and HTML
151c2dd
import streamlit as st
from dotenv import load_dotenv
from googleapiclient.discovery import build
from functools import cache
from slugify import slugify
from os import getenv
from os.path import exists
import json
from beautiful_soup.app import extract_content
@cache
def google_search( query ):
api_key = getenv('GOOGLE_SEARCH_API_KEY')
# cx = os.getenv('GOOGLE_SEARCH_ENGIN_ID')
service = build(
"customsearch",
"v1",
developerKey=api_key,
cache_discovery=False
)
return service.cse().list(
q=query,
cx='05048cc2df6134a06',
).execute()
def main():
load_dotenv()
st.title('Google Search')
query = st.text_input('Search query')
if ( query ):
file_path = 'search-urls/' + slugify( query ) + '.json'
if ( exists( file_path ) ):
with open( file_path, 'r' ) as results_file:
results = json.load(results_file)
else:
search_result = google_search( query )
if( int( search_result['searchInformation']['totalResults'] ) > 0 ):
results = search_result['items']
with open( file_path, 'w' ) as results_file:
json.dump( results, results_file )
else:
results = []
if ( len( results ) == 0 ) :
st.write( 'No results found.' )
try:
for item in results:
st.write(item['link'])
st.write(extract_content( item['link'] ))
except Exception as e:
st.exception(e)
if __name__ == '__main__':
main()