Source code for phenopacket_mapper.utils.parsing.parse_coding

from typing import Literal, List
import re

from phenopacket_mapper.data_standards import Coding, CodeSystem
from phenopacket_mapper.utils.parsing import get_codesystem_by_namespace_prefx
from phenopacket_mapper.data_standards import code_system as code_system_module


[docs] def parse_coding( coding_str: str, resources: List[CodeSystem], compliance: Literal['lenient', 'strict'] = 'lenient' ) -> Coding: """Parsed a string representing a coding to a Coding object Expected format: <namespace_prefix>:<code> E.g.: >>> parse_coding("SNOMED:404684003", [code_system_module.SNOMED_CT]) Coding(system=CodeSystem(name=SNOMED CT, name space prefix=SNOMED, version=0.0.0), code='404684003', display='', text='') Intended to be called with a list of all resources used. Can only recognize the name space prefixes that belong to code systems provided in the resources list. If a name space is not found in the resources, it will return a Coding object with the system as the name space prefix and the code as the code. E.g.: >>> parse_coding("SNOMED:404684003", []) Warning: Code system with namespace prefix 'SNOMED' not found in resources. Warning: Returning Coding object with system as namespace prefix and code as '404684003' Coding(system='SNOMED', code='404684003', display='', text='') :param coding_str: a string representing a coding :param resources: a list of all resources used :param compliance: whether to throw a ValueError or just a warning if a name space prefix is not found in the resources :return: a Coding object as specified in the coding string """ coding_str = coding_str.replace(" ", "") if ':' not in coding_str: raise ValueError("Invalid coding string, does not contain separator between code and name space prefix: " f"{coding_str}") pattern = r'^(.*?):(.*)$' match = re.match(pattern, coding_str) if match: namespace_prefix = match.group(1) # Part before the colon code = match.group(2) # Part after the colon code_system = get_codesystem_by_namespace_prefx(namespace_prefix, resources) if code_system: return Coding(system=code_system, code=code) else: if compliance == 'strict': raise ValueError(f"Code system with namespace prefix '{namespace_prefix}' not found in resources.") else: print(f"Warning: Code system with namespace prefix '{namespace_prefix}' not found in resources.") coding = Coding(system=namespace_prefix, code=code) print(f"Warning: Returning Coding object with system as namespace prefix and code as '{code}'") return coding else: raise ValueError(f"Invalid coding string: {coding_str}")