from os import getenv from os.path import exists from functools import cache import json import streamlit as st from dotenv import load_dotenv from googleapiclient.discovery import build from slugify import slugify from transformers import pipeline from beautiful_soup.app import get_url_content @cache def google_search_api_request( query ): load_dotenv() api_key = getenv('GOOGLE_SEARCH_API_KEY') # cx = os.getenv('GOOGLE_SEARCH_ENGINE_ID') service = build( "customsearch", "v1", developerKey=api_key, cache_discovery=False ) return service.cse().list( q=query, cx='05048cc2df6134a06', num=5, ).execute() def search_results( query ): file_path = 'search-results/' + slugify( query ) + '.json' results = [] if exists( file_path ): with open( file_path, 'r' ) as results_file: results = json.load( results_file ) else: search_result = google_search_api_request( query ) if ( int( search_result['searchInformation']['totalResults'] ) > 0 ): results = search_result['items'] with open( file_path, 'w' ) as results_file: json.dump( results, results_file ) if ( len( results ) == 0 ) : raise Exception('No results found.') return results def main(): st.title('Google Search') query = st.text_input('Search query') if query : try: results = search_results( query ) except Exception as exception: st.exception(exception) for result in results: st.write(result['link']) try: content = get_url_content( result['link'] ) summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6") summary = summarizer(content, max_length=130, min_length=30, do_sample=False, truncation=True) for sentence in summary: st.write(sentence['summary_text']) except Exception as exception: st.exception(exception) if __name__ == '__main__': main()