Source code for phenopacket_mapper.utils.parsing.parse_ordinal

from typing import Tuple
import re



[docs]
def parse_ordinal(field_name_str: str) -> Tuple[str, str]:
    """
    Parsing `DataField.name` string to separate strings containing the ordinal and the name respectively

    This method is meant as part of reading in a `DataModel` from a file, where data model fields might have an ordinal
    attached to them (e.g., "1.1. Pseudonym"), which this method can then neatly separate into ordinal="1.1." and
    name="Pseudonym".

    >>> parse_ordinal("1.1. Pseudonym")
    ('1.1', 'Pseudonym')

    >>> parse_ordinal("1. Pseudonym")
    ('1', 'Pseudonym')

    >>> parse_ordinal("I.a. Pseudonym")
    ('I.a', 'Pseudonym')

    >>> parse_ordinal("ii. Pseudonym")
    ('ii', 'Pseudonym')

    :param field_name_str: name of the field, containing an ordinal, to parse
    :returns: a tuple containing the ordinal and the name
    """
    # Regex to extract the section number and field name
    match = re.match(r"([0-9]+(?:\.[0-9]+)*|[Iivxlc]+\.[a-z]*|[a-z]*)\.?\s*(.+)", field_name_str, re.IGNORECASE)

    if match:
        # Extract the field number and description
        ordinal = match.group(1).strip()
        field_name = match.group(2).strip()

        if ordinal[-1] == '.':
            ordinal = ordinal[0:-1]

        return ordinal, field_name
    else:
        return '', field_name_str  # since this is more for optics, do not raise error and just do "nothing"