Skip to content

CodelistPhenotype

Bases: Phenotype

CodelistPhenotype extracts patients from a CodeTable based on a specified codelist and other optional filters such as date range, relative time range and categorical filters.

Parameters:

Name Type Description Default
domain str

The domain of the phenotype.

required
codelist Codelist

The codelist used for filtering.

required
name Optional[str]

The name of the phenotype. Optional. If not passed, name will be derived from the name of the codelist.

None
date_range DateRangeFilter

A date range filter to apply.

None
relative_time_range Union[RelativeTimeRangeFilter, List[RelativeTimeRangeFilter]]

A relative time range filter or a list of filters to apply.

None
return_date

Specifies whether to return the 'first', 'last', or 'nearest' event date. Default is 'first'.

'first'
categorical_filter Optional[CategoricalFilter]

Additional categorical filters to apply.

None

Attributes:

Name Type Description
table PhenotypeTable

The resulting phenotype table after filtering (None until execute is called)

Examples:

Inpatient Atrial Fibrillation (OMOP)
from phenex.phenotypes import CodelistPhenotype
from phenex.codelists import Codelist
from phenex.mappers import OMOPDomains
from phenex.filters import DateRangeFilter, CategoricalFilter, Value
from phenex.ibis_connect import SnowflakeConnector

con = SnowflakeConnector() # requires some configuration
mapped_tables = OMOPDomains.get_mapped_tables(con)

af_codelist = Codelist([313217]) # list of concept ids
date_range = DateRangeFilter(
    min_date="2020-01-01",
    max_date="2020-12-31")

inpatient = CategoricalFilter(
    column_name='VISIT_DETAIL_CONCEPT_ID',
    allowed_values=[9201],
    domain='VISIT_DETAIL'
)

af_phenotype = CodelistPhenotype(
    name="af",
    domain='CONDITION_OCCURRENCE',
    codelist=af_codelist,
    date_range=date_range,
    return_date='first',
    categorical_filter=inpatient
)

af = af_phenotype.execute(mapped_tables)
af.head()
Myocardial Infarction One Year Pre-index (OMOP)
from phenex.filters import RelativeTimeRangeFilter, Value

af_phenotype = (...) # take from above example

oneyear_preindex = RelativeTimeRangeFilter(
    min_days=Value('>', 0), # exclude index date
    max_days=Value('<', 365),
    anchor_phenotype=af_phenotype # use af phenotype above as reference date
    )

mi_codelist = Codelist([49601007]) # list of concept ids
mi_phenotype = CodelistPhenotype(
    name='mi',
    domain='CONDITION_OCCURRENCE',
    codelist=mi_codelist,
    return_date='first',
    relative_time_range=oneyear_preindex
)
mi = mi_phenotype.execute(mapped_tables)
mi.head()
Source code in phenex/phenotypes/codelist_phenotype.py
class CodelistPhenotype(Phenotype):
    """
    CodelistPhenotype extracts patients from a CodeTable based on a specified codelist and other optional filters such as date range, relative time range and categorical filters.

    Parameters:
        domain: The domain of the phenotype.
        codelist: The codelist used for filtering.
        name: The name of the phenotype. Optional. If not passed, name will be derived from the name of the codelist.
        date_range: A date range filter to apply.
        relative_time_range: A relative time range filter or a list of filters to apply.
        return_date: Specifies whether to return the 'first', 'last', or 'nearest' event date. Default is 'first'.
        categorical_filter: Additional categorical filters to apply.

    Attributes:
        table (PhenotypeTable): The resulting phenotype table after filtering (None until execute is called)

    Examples:

    Example: Inpatient Atrial Fibrillation (OMOP)
        ```python
        from phenex.phenotypes import CodelistPhenotype
        from phenex.codelists import Codelist
        from phenex.mappers import OMOPDomains
        from phenex.filters import DateRangeFilter, CategoricalFilter, Value
        from phenex.ibis_connect import SnowflakeConnector

        con = SnowflakeConnector() # requires some configuration
        mapped_tables = OMOPDomains.get_mapped_tables(con)

        af_codelist = Codelist([313217]) # list of concept ids
        date_range = DateRangeFilter(
            min_date="2020-01-01",
            max_date="2020-12-31")

        inpatient = CategoricalFilter(
            column_name='VISIT_DETAIL_CONCEPT_ID',
            allowed_values=[9201],
            domain='VISIT_DETAIL'
        )

        af_phenotype = CodelistPhenotype(
            name="af",
            domain='CONDITION_OCCURRENCE',
            codelist=af_codelist,
            date_range=date_range,
            return_date='first',
            categorical_filter=inpatient
        )

        af = af_phenotype.execute(mapped_tables)
        af.head()
        ```

    Example: Myocardial Infarction One Year Pre-index (OMOP)
        ```python
        from phenex.filters import RelativeTimeRangeFilter, Value

        af_phenotype = (...) # take from above example

        oneyear_preindex = RelativeTimeRangeFilter(
            min_days=Value('>', 0), # exclude index date
            max_days=Value('<', 365),
            anchor_phenotype=af_phenotype # use af phenotype above as reference date
            )

        mi_codelist = Codelist([49601007]) # list of concept ids
        mi_phenotype = CodelistPhenotype(
            name='mi',
            domain='CONDITION_OCCURRENCE',
            codelist=mi_codelist,
            return_date='first',
            relative_time_range=oneyear_preindex
        )
        mi = mi_phenotype.execute(mapped_tables)
        mi.head()
        ```
    """

    def __init__(
        self,
        domain: str,
        codelist: Codelist,
        name: Optional[str] = None,
        date_range: DateRangeFilter = None,
        relative_time_range: Union[
            RelativeTimeRangeFilter, List[RelativeTimeRangeFilter]
        ] = None,
        return_date="first",
        categorical_filter: Optional["CategoricalFilter"] = None,
        **kwargs,
    ):
        super(CodelistPhenotype, self).__init__()

        self.codelist_filter = CodelistFilter(codelist)
        self.codelist = codelist
        self.categorical_filter = categorical_filter
        self.name = name or self.codelist.name
        self.date_range = date_range
        self.return_date = return_date
        assert self.return_date in [
            "first",
            "last",
            "nearest",
            "all",
        ], f"Unknown return_date: {return_date}"
        self.table: PhenotypeTable = None
        self.domain = domain
        if isinstance(relative_time_range, RelativeTimeRangeFilter):
            relative_time_range = [relative_time_range]

        self.relative_time_range = relative_time_range
        if self.relative_time_range is not None:
            for rtr in self.relative_time_range:
                if rtr.anchor_phenotype is not None:
                    self.children.append(rtr.anchor_phenotype)

    def _execute(self, tables) -> PhenotypeTable:
        code_table = tables[self.domain]
        code_table = self._perform_codelist_filtering(code_table)
        code_table = self._perform_categorical_filtering(code_table, tables)
        code_table = self._perform_time_filtering(code_table)
        code_table = self._perform_date_selection(code_table)
        return select_phenotype_columns(code_table)

    def _perform_codelist_filtering(self, code_table):
        assert is_phenex_code_table(code_table)
        code_table = self.codelist_filter.filter(code_table)
        return code_table

    def _perform_categorical_filtering(self, code_table, tables):
        if self.categorical_filter is not None:
            assert is_phenex_code_table(code_table)
            code_table = self.categorical_filter.autojoin_filter(code_table, tables)
        return code_table

    def _perform_time_filtering(self, code_table):
        if self.date_range is not None:
            code_table = self.date_range.filter(code_table)
        if self.relative_time_range is not None:
            for rtr in self.relative_time_range:
                code_table = rtr.filter(code_table)
        return code_table

    def _perform_date_selection(self, code_table, reduce=True):
        if self.return_date is None or self.return_date == "all":
            return code_table
        if self.return_date == "first":
            aggregator = First(reduce=reduce)
        elif self.return_date == "last":
            aggregator = Last(reduce=reduce)
        else:
            raise ValueError(f"Unknown return_date: {self.return_date}")
        return aggregator.aggregate(code_table)

    def get_codelists(self) -> List[Codelist]:
        """
        Get all codelists used in the phenotype definition, including all children / dependent phenotypes.

        Returns:
            codeslist: A list of codelists used in the cohort definition.
        """
        codelists = [self.codelist]
        for p in self.children:
            codelists.extend(p.get_codelists())
        return codelists

namespaced_table property

A PhenotypeTable has generic column names 'person_id', 'boolean', 'event_date', and 'value'. The namespaced_table appends the phenotype name to all of these columns. This is useful when joining multiple phenotype tables together.

Returns:

Name Type Description
table Table

The namespaced table for the current phenotype.

execute(tables)

Executes the phenotype computation for the current object and its children. This method recursively iterates over the children of the current object and calls their execute method if their table attribute is None.

Parameters:

Name Type Description Default
tables Dict[str, PhenexTable]

A dictionary mapping table names to PhenexTable objects. See phenex.mappers.DomainsDictionary.get_mapped_tables().

required

Returns:

Name Type Description
table PhenotypeTable

The resulting phenotype table containing the required columns. The PhenotypeTable will contain the columns: PERSON_ID, EVENT_DATE, VALUE. DATE is determined by the return_date parameter. VALUE is different for each phenotype. For example, AgePhenotype will return the age in the VALUE column. A MeasurementPhenotype will return the observed value for the measurement. See the specific phenotype of interest to understand more.

Source code in phenex/phenotypes/phenotype.py
def execute(self, tables: Dict[str, Table]) -> PhenotypeTable:
    """
    Executes the phenotype computation for the current object and its children. This method recursively iterates over the children of the current object and calls their execute method if their table attribute is None.

    Args:
        tables (Dict[str, PhenexTable]): A dictionary mapping table names to PhenexTable objects. See phenex.mappers.DomainsDictionary.get_mapped_tables().

    Returns:
        table (PhenotypeTable): The resulting phenotype table containing the required columns. The PhenotypeTable will contain the columns: PERSON_ID, EVENT_DATE, VALUE. DATE is determined by the return_date parameter. VALUE is different for each phenotype. For example, AgePhenotype will return the age in the VALUE column. A MeasurementPhenotype will return the observed value for the measurement. See the specific phenotype of interest to understand more.
    """
    logger.info(f"Phenotype '{self.name}': executing...")
    for child in self.children:
        if child.table is None:
            logger.debug(
                f"Phenotype {self.name}: executing child phenotype '{child.name}'..."
            )
            child.execute(tables)
        else:
            logger.debug(
                f"Phenotype {self.name}: skipping already computed child phenotype '{child.name}'."
            )

    table = self._execute(tables).mutate(BOOLEAN=True)

    if not set(PHENOTYPE_TABLE_COLUMNS) <= set(table.columns):
        raise ValueError(
            f"Phenotype {self.name} must return columns {PHENOTYPE_TABLE_COLUMNS}. Found {table.columns}."
        )

    self.table = table.select(PHENOTYPE_TABLE_COLUMNS)
    # for some reason, having NULL datatype screws up writing the table to disk; here we make explicit cast
    if type(self.table.schema()["VALUE"]) == ibis.expr.datatypes.core.Null:
        self.table = self.table.cast({"VALUE": "float64"})

    assert is_phenex_phenotype_table(self.table)
    logger.info(f"Phenotype '{self.name}': execution completed.")
    return self.table

get_codelists()

Get all codelists used in the phenotype definition, including all children / dependent phenotypes.

Returns:

Name Type Description
codeslist List[Codelist]

A list of codelists used in the cohort definition.

Source code in phenex/phenotypes/codelist_phenotype.py
def get_codelists(self) -> List[Codelist]:
    """
    Get all codelists used in the phenotype definition, including all children / dependent phenotypes.

    Returns:
        codeslist: A list of codelists used in the cohort definition.
    """
    codelists = [self.codelist]
    for p in self.children:
        codelists.extend(p.get_codelists())
    return codelists