jpwahle's picture
Finalize PDF function and update on hf-hub
7574c0c
raw
history blame
407 Bytes
import scipdf
def parse_pdf_to_artcile_dict(pdf_path):
return scipdf.parse_pdf_to_dict(pdf_path)
if __name__ == "__main__":
article_dict = scipdf.parse_pdf_to_dict(
"/Users/jp/Documents/papers/demo-test/EMNLP23_Influence_NLP_Citation_Analysis.pdf"
) # return dictionary
print(article_dict.keys())
print(article_dict["title"])
print(article_dict["references"][0].keys())