from packaging.version import Version, InvalidVersion
from bs4 import NavigableString,Tag
import requests
import xml.etree.ElementTree as ET

def Normalize_Section(section_number):
    try:
        # Parse the version string
        version = Version(section_number)
        # Return the normalized version string
        return str(version)
    except InvalidVersion:
        # Handle invalid version strings if necessary
        return ""


def Get_Bibliography(article):
  bibliography = {}

  # Iterate over each biblStruct element
  for entry in article.find_all('biblStruct')[1:]:
      xml_id = entry.get('xml:id')

      # Extract relevant details
      title = entry.find('title').text if entry.find('title') else None
      authors = [author.persName for author in entry.find_all('author')]
      authors = [" ".join([a.text for a in author.find_all()]) for author in authors]
      journal = entry.monogr.find('title').text if entry.monogr.find('title') else None
      volume = entry.find('biblScope', {'unit': 'volume'}).text if entry.find('biblScope', {'unit': 'volume'}) else None
      issue = entry.find('biblScope', {'unit': 'issue'}).text if entry.find('biblScope', {'unit': 'issue'}) else None
      pages = entry.find('biblScope', {'unit': 'page'}).text if entry.find('biblScope', {'unit': 'page'}) else None
      year = entry.imprint.date.get('when') if entry.imprint.find('date') else None
      doi = entry.find('idno', {'type': 'DOI'}).text if entry.find('idno', {'type': 'DOI'}) else None

      # Store the bibliographic details in the dictionary
      bibliography[xml_id] = {
          'title': title,
          'authors': authors,
          'journal': journal,
          'volume': volume,
          'issue': issue,
          'pages': pages,
          'year': year,
          'doi': doi
      }

  return bibliography

def GParse_Header(pdf):

  files = {"input": ("",pdf,"application/pdf",{"Expires": "0"})}
  data = {}
  data['generateIDs']=1
  data['consolidateHeader']=0
  data['segmentSentences']=1
  data["teiCoordinates"]=["head","s","p"]

  headers = {"Accept":"application/xml, text/xml, */*; q=0.01"}
  r = requests.request(
      "POST",
      "https://kaiserml-grobid.hf.space/api/processHeaderDocument/",
      headers=headers,
      params=None,
      files=files,
      data=data,
      timeout=60,
  )
  return r.text

def GParse_Paper(pdf):

  files = {"input": ("",pdf,"application/pdf",{"Expires": "0"})}
  data = {}
  data['generateIDs']=1
  #data['segmentSentences']=1
  data["teiCoordinates"]=["head"]
  #data["teiCoordinates"]=["head","s","p","figure","formula","note","title"]

  headers = {"Accept":"application/xml, text/xml"}
  r = requests.request(
      "POST",
      "https://Kaiserml-grobid.hf.space/api/processFulltextDocument/",
      headers=headers,
      params=None,
      files=files,
      data=data,
      timeout=60,
  )
  return r.text


def Resolve_GHeader(xml):
  # Parse the XML
  root = ET.fromstring(xml)

  # Define the namespace map
  ns = {'tei': 'http://www.tei-c.org/ns/1.0'}

  # Find the title element using the namespace
  title = root.find('.//tei:title', namespaces=ns).text

  options = requests.get(f"https://api.openalex.org/autocomplete/works?q={title}").json()['results']
  return options