File size: 5,626 Bytes
102dc72
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
import requests
import streamlit as st
import xml.etree.ElementTree as ET


class BillRetriever:
    """
    A class used to retrieve bills using the ProPublica Congress API & United States Congress API.
    """
    PROPUBLICA_URL = "https://api.propublica.org/congress/v1/bills/search.json"
    CONGRESS_URL_BASE = "https://api.congress.gov/v3/bill/{congress}/{billType}/{billNumber}/text"

    def __init__(self, api_key=None):
        """
        Initialize the BillRetriever with API keys.

        Parameters:
            api_key (str, optional): The API key to be used for authentication. Default is None.
        """
        self.pro_publica_api_key = st.secrets["PRO_PUBLICA_API_KEY"]
        self.congress_api_key = st.secrets["CONGRESS_API_KEY"]

    def make_api_call(self, api_url, api_key, params=None):
        """
        Make an API call to the specified URL with optional parameters and API key.

        Parameters:
            api_url (str): The URL of the API endpoint.
            api_key (str): The API Key for the API
            params (dict, optional): Optional parameters to pass with the API call. Default is None.

        Returns:
            dict: JSON response data if the request is successful, None otherwise.
        """
        headers = {"X-API-Key": api_key} if api_key else {}

        try:
            response = requests.get(api_url, params=params, headers=headers)
            response.raise_for_status()  # Raise an exception for non-2xx status codes
            return response.json()
        except requests.exceptions.RequestException as e:
            print(f"Error occurred: {e}")
            return None
        except ValueError as e:
            print(f"Invalid response received: {e}")
            return None

    def search_bill_propublica(self, query):
        """
        Search for a bill using the ProPublica Congress API.

        Parameters:
            query (str): The query string to search for.

        Returns:
            dict: JSON response data if the request is successful, None otherwise.
        """
        params = {"query": query, "sort": "date", "dir": "desc"}
        return self.make_api_call(self.PROPUBLICA_URL, params=params, api_key=self.pro_publica_api_key)

    def get_bill_text_congress(self, congress, bill_type, bill_number):
        """
        Retrieve the text of a bill using the Congress API.

        Parameters:
            congress (str): The number of the congress.
            bill_type (str): The type of the bill.
            bill_number (str): The number of the bill.

        Returns:
            dict: JSON response data if the request is successful, None otherwise.
        """
        url = self.CONGRESS_URL_BASE.format(congress=congress, billType=bill_type, billNumber=bill_number)
        return self.make_api_call(url, api_key=self.congress_api_key)

    def get_bill_by_query(self, query):
        """
        Search for a bill by query and retrieve its text.

        Parameters:
            query (str): The query string to search for.

        Returns:
            str: The text of the bill if the request is successful, None otherwise.
        """
        # First search for the bill using the ProPublica API
        propublica_data = self.search_bill_propublica(query)
        if propublica_data and 'results' in propublica_data:
            # Iterate over the list of bills, till we find the bill which has text available on Congress Website
            for bill_data in propublica_data['results'][0]['bills']:
                congress = bill_data['bill_id'].split('-')[1]
                bill_type = bill_data['bill_type']
                bill_number = bill_data['number'].split('.')[-1]

                # Then get the text of the bill using the Congress API
                congress_data = self.get_bill_text_congress(congress, bill_type, bill_number)
                if congress_data and 'textVersions' in congress_data and congress_data['textVersions']:
                    # Check if textVersions list is not empty
                    xml_url = congress_data['textVersions'][0]['formats'][2]['url']
                    return self.extract_bill_text(xml_url)
        return None

    def extract_bill_text(self, url):
        """
        Extract the text content from a bill's XML data.

        Parameters:
            url (str): The URL of the bill's XML data.

        Returns:
            str: The text content of the bill.
        """
        # Get the XML data from the URL
        try:
            xml_data = requests.get(url).content
        except requests.exceptions.RequestException as e:
            print(f"Error occurred: {e}")
            return None

        # Decode bytes to string and parse XML
        try:
            root = ET.fromstring(xml_data.decode('utf-8'))
        except ET.ParseError as e:
            print(f"Error parsing XML: {e}")
            return None

        return self.get_all_text(root)

    @staticmethod
    def get_all_text(element):
        """
        Recursively extract text from an XML element and its children.

        Parameters:
            element (xml.etree.ElementTree.Element): An XML element.

        Returns:
            str: The concatenated text from the element and its children.
        """
        text = element.text or ''  # Get the text of the current element, if it exists
        for child in element:
            text += BillRetriever.get_all_text(child)  # Recursively get the text of all child elements
            if child.tail:
                text += child.tail  # Add any trailing text of the child element
        return text