hf-legisqa / utils_mod.py
gabrielaltay's picture
more files
76cbdff
import re
CONGRESS_GOV_TYPE_MAP = {
"hconres": "house-concurrent-resolution",
"hjres": "house-joint-resolution",
"hr": "house-bill",
"hres": "house-resolution",
"s": "senate-bill",
"sconres": "senate-concurrent-resolution",
"sjres": "senate-joint-resolution",
"sres": "senate-resolution",
}
def escape_markdown(text: str) -> str:
MD_SPECIAL_CHARS = r"\`*_{}[]()#+-.!$"
for char in MD_SPECIAL_CHARS:
text = text.replace(char, "\\" + char)
return text
def get_sponsor_url(bioguide_id: str) -> str:
return f"https://bioguide.congress.gov/search/bio/{bioguide_id}"
def get_congress_gov_url(congress_num: int, legis_type: str, legis_num: int) -> str:
lt = CONGRESS_GOV_TYPE_MAP[legis_type]
return f"https://www.congress.gov/bill/{int(congress_num)}th-congress/{lt}/{int(legis_num)}"
def legis_id_to_link(legis_id: str) -> str:
congress_num, legis_type, legis_num = legis_id.split("-")
return get_congress_gov_url(congress_num, legis_type, legis_num)
def legis_id_match_to_link(matchobj):
mstring = matchobj.string[matchobj.start() : matchobj.end()]
url = legis_id_to_link(mstring)
link = f"[{mstring}]({url})"
return link
def replace_legis_ids_with_urls(text: str) -> str:
pattern = "11[345678]-[a-z]+-\d{1,5}"
rtext = re.sub(pattern, legis_id_match_to_link, text)
return rtext