Spaces:

Techymom
/

fetch_api_to_csv

Sleeping

File size: 6,645 Bytes

# !pip install --upgrade gradio

import tempfile
import os
import gradio as gr
import requests
import xml.etree.ElementTree as ET
import pandas as pd
import csv
from io import StringIO
from datetime import datetime
import os
import tempfile

# Correctly defined functions for fetching articles and converting them to CSV format
def fetch_articles(keyword, max_results=10):
    search_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi"
    search_params = {"db": "pubmed", "term": keyword, "retmax": max_results, "usehistory": "y"}
    search_response = requests.get(search_url, params=search_params)
    search_tree = ET.fromstring(search_response.content)
    id_list = [id_.text for id_ in search_tree.findall('.//Id')]

    fetch_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi"
    fetch_params = {"db": "pubmed", "id": ",".join(id_list), "retmode": "xml"}
    fetch_response = requests.get(fetch_url, params=fetch_params)
    fetch_tree = ET.fromstring(fetch_response.content)

    articles = []
    for article in fetch_tree.findall('.//PubmedArticle'):
        title = article.find('.//ArticleTitle').text
        authors_list = [author.find('.//LastName').text + " " + author.find('.//ForeName').text for author in article.findall('.//Author') if author.find('.//LastName') is not None and author.find('.//ForeName') is not None]
        authors_formatted = ", ".join(authors_list) if len(authors_list) <= 2 else authors_list[0] + " et al."

        pub_date_element = article.find('.//PubDate')
        year = "Unknown"
        if pub_date_element is not None:
            year_element = pub_date_element.find('Year')
            if year_element is not None:
                year = year_element.text

        journal = article.find('.//Journal/Title').text if article.find('.//Journal/Title') is not None else 'No Journal'
        abstract = article.find('.//Abstract/AbstractText').text if article.find('.//Abstract/AbstractText') is not None else 'No Abstract'
        article_doi = article.find(".//ArticleId[@IdType='doi']")
        doi = article_doi.text if article_doi is not None else "No DOI available"
        doi_link = f"https://doi.org/{doi}" if doi != "No DOI available" else ""

        articles.append({
            "Title": title,
            "Authors": authors_formatted,
            "Year": year,
            "DOI": doi_link,
            "Abstract": abstract,
            "Journal": journal,
        })

    return articles

def write_articles_to_csv(articles, filename="articles.csv"):
    with open(filename, mode='w', newline='', encoding='utf-8') as file:
        writer = csv.writer(file)
        writer.writerow(['', 'Authors', 'Year', 'Title', 'DOI', 'Abstract', 'Journal'])
        for article in articles:
            writer.writerow(['[]', article['Authors'], article['Year'], article['Title'], article['DOI'], article['Abstract'], article['Journal']])

    print(f"Articles written to {filename}: {len(articles)}")

def articles_to_csv_string(articles): 
    output = StringIO()
    writer = csv.writer(output)
    # writer.writerow(['Authors', 'Year', 'Title', 'DOI', 'Abstract', 'Journal'])
    writer.writerow(['Year', 'Title', 'Abstract', 'DOI'])
    for article in articles:
        # writer.writerow([article['Authors'], article['Year'], article['Title'], article['DOI'], article['Abstract'], article['Journal']])
        writer.writerow([article['Year'], article['Title'], article['Abstract'], article['DOI']])    
    output.seek(0)
    return output.getvalue()

def generate_filename(keyword1, keyword2, keyword3):
    # Format the current timestamp
    timestamp = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
    # Create a filename that includes the keywords and timestamp
    # Note: Filenames need to be safe for the filesystem, so replace or remove characters as necessary
    filename = f"articles_{keyword1}_{keyword2}_{keyword3}_{timestamp}.csv".replace(' ', '_').replace('/', '_')
    # Ensure the filename length does not exceed filesystem limits
    return filename[:255]

def process_inputs(keyword1, keyword2, keyword3):
    try:
        keywords = f"{keyword1} AND {keyword2} AND {keyword3}"
        articles = fetch_articles(keywords, max_results=10)
        if not articles:  # If no articles were found
            df_empty = pd.DataFrame({"Error": ["No articles found or an error occurred."]})
            # Generate a nicer filename
            filename = generate_filename(keyword1, keyword2, keyword3)
            # Create a temporary file with the specified filename
            temp_file_path = os.path.join(tempfile.gettempdir(), filename)
            df_empty.to_csv(temp_file_path, index=False)
            return df_empty, temp_file_path

        # If articles were found
        csv_string = articles_to_csv_string(articles)
        df = pd.read_csv(StringIO(csv_string))
        filename = generate_filename(keyword1, keyword2, keyword3)
        temp_file_path = os.path.join(tempfile.gettempdir(), filename)
        df.to_csv(temp_file_path, index=False)

        return df, temp_file_path
    except Exception as e:
        print(f"An error occurred: {e}")
        df_empty = pd.DataFrame({"Error": ["An error occurred during processing."]})
        filename = generate_filename(keyword1, keyword2, keyword3)
        temp_file_path = os.path.join(tempfile.gettempdir(), filename)
        df_empty.to_csv(temp_file_path, index=False)
        return df_empty, temp_file_path

iface = gr.Interface(
    fn=process_inputs,
    inputs=[
       # gr.Textbox(label="First Name", placeholder="Enter your first name here..."),
       # gr.Textbox(label="How can I help you today?", placeholder="Your answer (under 50 characters)"),
        gr.Textbox(label="Describe your overall concern in life, such as a specific diagnosis?", placeholder="ADHD, CPTSD, depression..."),
        gr.Textbox(label="If you had a magic genie, what life situation would you ask to fix?", placeholder="Pressures of University..."),
        gr.Textbox(label="Is there a specific therapeutic approach you are intersted in? Leave this blank if there isn't", placeholder="Art therapy, dbt (dialectic behavior therapy), narrative...")
    ],
    outputs=[
    gr.Dataframe(label="Here are some Research Articles that may interest you today"),
    gr.File(label="Download Articles Here. This file will open in a spreadsheet format with links to the recommended articles")
    ],

    title="MindMender",
    description="This tool helps you find research articles related to your mental health concerns. Enter your parameters as keywords."
)

iface.launch(debug=True)