Skip to content

AgePhenotype

AgePhenotype

Bases: Phenotype

AgePhenotype is a class that represents an age-based phenotype. It calculates the age of individuals based on their date of birth and an optional anchor phenotype. The age is computed in years and can be filtered within a specified range.

Parameters:

Name Type Description Default
name str

Name of the phenotype, default is 'age'.

'age'
min_age Optional[Value]

Minimum age for filtering, in years.

None
max_age Optional[Value]

Maximum age for filtering, in years.

None
anchor_phenotype Optional[Phenotype]

An optional anchor phenotype to calculate relative age.

None
domain str

Domain of the phenotype, default is 'PERSON'.

'PERSON'

Attributes:

Name Type Description
table PhenotypeTable

The resulting phenotype table after filtering (None until execute is called)

Age at First Atrial Fibrillation Diagnosis
from phenex.phenotypes import CodelistPhenotype
from phenex.codelists import Codelist

af_codelist = Codelist([313217])
af_phenotype = CodelistPhenotype(
    name="af",
    domain='CONDITION_OCCURRENCE',
    codelist=af_codelist,
    return_date='first',
)

age_phenotype = AgePhenotype(
    min_age=Value('>=', 18),
    max_age=Value('<=', 65),
    anchor_phenotype=af_phenotype
)

result_table = age_phenotype.execute(tables)
display(result_table)
Source code in phenex/phenotypes/age_phenotype.py
class AgePhenotype(Phenotype):
    """
    AgePhenotype is a class that represents an age-based phenotype. It calculates the age of individuals
    based on their date of birth and an optional anchor phenotype. The age is computed in years and can
    be filtered within a specified range.

    Parameters:
        name: Name of the phenotype, default is 'age'.
        min_age: Minimum age for filtering, in years.
        max_age: Maximum age for filtering, in years.
        anchor_phenotype: An optional anchor phenotype to calculate relative age.
        domain: Domain of the phenotype, default is 'PERSON'.

    Attributes:
        table (PhenotypeTable): The resulting phenotype table after filtering (None until execute is called)

    Example: Age at First Atrial Fibrillation Diagnosis
        ```python
        from phenex.phenotypes import CodelistPhenotype
        from phenex.codelists import Codelist

        af_codelist = Codelist([313217])
        af_phenotype = CodelistPhenotype(
            name="af",
            domain='CONDITION_OCCURRENCE',
            codelist=af_codelist,
            return_date='first',
        )

        age_phenotype = AgePhenotype(
            min_age=Value('>=', 18),
            max_age=Value('<=', 65),
            anchor_phenotype=af_phenotype
        )

        result_table = age_phenotype.execute(tables)
        display(result_table)
        ```
    """

    # FIXME this will become a problem when modern medicine allows people to live more
    # than 365*4 years (so they accumulate enough leap days to get an extra year)
    DAYS_IN_YEAR = 365

    def __init__(
        self,
        name: str = "age",
        min_age: Optional[Value] = None,
        max_age: Optional[Value] = None,
        anchor_phenotype: Optional[Phenotype] = None,
        domain: str = "PERSON",
    ):
        self.name = name
        self.min_age = min_age
        self.max_age = max_age
        self.domain = domain
        self.anchor_phenotype = anchor_phenotype
        if self.min_age is not None:
            min_days = Value(
                self.min_age.operator, self.min_age.value * self.DAYS_IN_YEAR
            )
        else:
            min_days = None
        if self.max_age is not None:
            max_days = Value(
                self.max_age.operator, self.max_age.value * self.DAYS_IN_YEAR
            )
        else:
            max_days = None

        self.time_range_filter = RelativeTimeRangeFilter(
            anchor_phenotype=anchor_phenotype
        )

        # Set children to the dependent PHENOTYPES
        if anchor_phenotype is not None:
            self.children = [anchor_phenotype]
        else:
            self.children = []

        super(AgePhenotype, self).__init__()

    def _execute(self, tables: Dict[str, Table]) -> PhenotypeTable:
        person_table = tables[self.domain]
        assert is_phenex_person_table(person_table)

        if "YEAR_OF_BIRTH" in person_table.columns:
            if "DATE_OF_BIRTH" in person_table.columns:
                logger.debug(
                    "Year of birth and date of birth is present, taking date of birth where possible otherwise setting date of birth to june 6th"
                )
                date_of_birth = ibis.coalesce(
                    ibis.date(person_table.DATE_OF_BIRTH),
                    ibis.date(person_table.YEAR_OF_BIRTH, 6, 1),
                )
            else:
                logger.debug(
                    "Only year of birth is present in person table, setting birth date to june 6th"
                )
                date_of_birth = ibis.date(person_table.YEAR_OF_BIRTH, 6, 1)
        else:
            logger.debug("Year of birth not present, taking date of birth")
            date_of_birth = ibis.date(person_table.DATE_OF_BIRTH)
        person_table = person_table.mutate(EVENT_DATE=date_of_birth)

        # Apply the time range filter
        table = person_table
        if self.anchor_phenotype is not None:
            if self.anchor_phenotype.table is None:
                raise ValueError(
                    f"Dependent Phenotype {self.anchor_phenotype.name} must be executed before this node can run!"
                )
            else:
                anchor_table = self.anchor_phenotype.table
                reference_column = anchor_table.EVENT_DATE
                # Note that joins can change column names if the tables have name collisions!
                table = table.join(anchor_table, "PERSON_ID")
        else:
            assert (
                "INDEX_DATE" in table.columns
            ), f"INDEX_DATE column not found in table {table}"
            reference_column = table.INDEX_DATE

        YEARS_FROM_ANCHOR = (
            reference_column.delta(table.EVENT_DATE, "day") / self.DAYS_IN_YEAR
        ).floor()
        table = table.mutate(YEARS_FROM_ANCHOR=YEARS_FROM_ANCHOR)

        conditions = []
        # Fix this, this logic needs to be abstracted to a ValueFilter
        if self.min_age is not None:
            if self.min_age.operator == ">":
                conditions.append(table.YEARS_FROM_ANCHOR > self.min_age.value)
            elif self.min_age.operator == ">=":
                conditions.append(table.YEARS_FROM_ANCHOR >= self.min_age.value)
            else:
                raise ValueError("Operator for min days be > or >=")
        if self.max_age is not None:
            if self.max_age.operator == "<":
                conditions.append(table.YEARS_FROM_ANCHOR < self.max_age.value)
            elif self.max_age.operator == "<=":
                conditions.append(table.YEARS_FROM_ANCHOR <= self.max_age.value)
            else:
                raise ValueError("Operator for max days be < or <=")
        if conditions:
            table = table.filter(conditions)
        person_table = table

        person_table = person_table.mutate(VALUE=person_table.YEARS_FROM_ANCHOR)

        return person_table

namespaced_table property

A PhenotypeTable has generic column names 'person_id', 'boolean', 'event_date', and 'value'. The namespaced_table appends the phenotype name to all of these columns. This is useful when joining multiple phenotype tables together.

Returns:

Name Type Description
table Table

The namespaced table for the current phenotype.

execute(tables)

Executes the phenotype computation for the current object and its children. This method recursively iterates over the children of the current object and calls their execute method if their table attribute is None.

Parameters:

Name Type Description Default
tables Dict[str, PhenexTable]

A dictionary mapping table names to PhenexTable objects. See phenex.mappers.DomainsDictionary.get_mapped_tables().

required

Returns:

Name Type Description
table PhenotypeTable

The resulting phenotype table containing the required columns. The PhenotypeTable will contain the columns: PERSON_ID, EVENT_DATE, VALUE. DATE is determined by the return_date parameter. VALUE is different for each phenotype. For example, AgePhenotype will return the age in the VALUE column. A MeasurementPhenotype will return the observed value for the measurement. See the specific phenotype of interest to understand more.

Source code in phenex/phenotypes/phenotype.py
def execute(self, tables: Dict[str, Table]) -> PhenotypeTable:
    """
    Executes the phenotype computation for the current object and its children. This method recursively iterates over the children of the current object and calls their execute method if their table attribute is None.

    Args:
        tables (Dict[str, PhenexTable]): A dictionary mapping table names to PhenexTable objects. See phenex.mappers.DomainsDictionary.get_mapped_tables().

    Returns:
        table (PhenotypeTable): The resulting phenotype table containing the required columns. The PhenotypeTable will contain the columns: PERSON_ID, EVENT_DATE, VALUE. DATE is determined by the return_date parameter. VALUE is different for each phenotype. For example, AgePhenotype will return the age in the VALUE column. A MeasurementPhenotype will return the observed value for the measurement. See the specific phenotype of interest to understand more.
    """
    logger.info(f"Phenotype '{self.name}': executing...")
    for child in self.children:
        if child.table is None:
            logger.debug(
                f"Phenotype {self.name}: executing child phenotype '{child.name}'..."
            )
            child.execute(tables)
        else:
            logger.debug(
                f"Phenotype {self.name}: skipping already computed child phenotype '{child.name}'."
            )

    table = self._execute(tables).mutate(BOOLEAN=True)

    if not set(PHENOTYPE_TABLE_COLUMNS) <= set(table.columns):
        raise ValueError(
            f"Phenotype {self.name} must return columns {PHENOTYPE_TABLE_COLUMNS}. Found {table.columns}."
        )

    self.table = table.select(PHENOTYPE_TABLE_COLUMNS)
    # for some reason, having NULL datatype screws up writing the table to disk; here we make explicit cast
    if type(self.table.schema()["VALUE"]) == ibis.expr.datatypes.core.Null:
        self.table = self.table.cast({"VALUE": "float64"})

    assert is_phenex_phenotype_table(self.table)
    logger.info(f"Phenotype '{self.name}': execution completed.")
    return self.table