Skip to content

MultipleOccurrencesPhenotype

Bases: Phenotype

CodelistPhenotype is a class that looks for N occurrences of a event (from an EventTable). In this Phenotype, the returned VALUE is equal to the number of occurrences of the event passing all filters.

Parameters:

Name Type Description Default
name str

The name of the phenotype.

required
phenotype Phenotype

The phenotype events to look for.

required
n_occurrences int

The minimum number of occurrences to look for.

2
date_range DateRangeFilter

A date range filter to apply.

None
relative_time_range RelativeTimeRangeFilter

A relative time range filter to apply.

None
return_date

Specifies whether to return the 'first' or 'last' event date. Default is 'first'.

'first'
Example
codelist = Codelist(name="example_codelist", codes=[...])

date_range = DateRangeFilter(min_date="2020-01-01", max_date="2020-12-31")
phenotype = CodelistPhenotype(
    name="example_phenotype",
    domain="CONDITION_OCCURRENCE",
    codelist=codelist,
    date_range=date_range,
    return_date='first'
)

tables = {"CONDITION_OCCURRENCE": example_code_table}
multiple_occurrences = MultipleOccurrencesPhenotype(
    phenotype=phenotype,
    n_occurrences=2,
    return_date='second')

result_table = multiple_occurrences.execute(tables)
display(result_table)
Source code in phenex/phenotypes/multiple_occurrences_phenotype.py
class MultipleOccurrencesPhenotype(Phenotype):
    """
    CodelistPhenotype is a class that looks for N occurrences of a event (from an EventTable). In this Phenotype, the returned VALUE is equal to the number of occurrences of the event passing all filters.

    Parameters:
        name: The name of the phenotype.
        phenotype: The phenotype events to look for.
        n_occurrences: The minimum number of occurrences to look for.
        date_range: A date range filter to apply.
        relative_time_range: A relative time range filter to apply.
        return_date: Specifies whether to return the 'first' or 'last' event date. Default is 'first'.

    Example:
        ```python
        codelist = Codelist(name="example_codelist", codes=[...])

        date_range = DateRangeFilter(min_date="2020-01-01", max_date="2020-12-31")
        phenotype = CodelistPhenotype(
            name="example_phenotype",
            domain="CONDITION_OCCURRENCE",
            codelist=codelist,
            date_range=date_range,
            return_date='first'
        )

        tables = {"CONDITION_OCCURRENCE": example_code_table}
        multiple_occurrences = MultipleOccurrencesPhenotype(
            phenotype=phenotype,
            n_occurrences=2,
            return_date='second')

        result_table = multiple_occurrences.execute(tables)
        display(result_table)
        ```
    """

    def __init__(
        self,
        name: str,
        phenotype: Phenotype,
        n_occurrences: int = 2,
        date_range: DateRangeFilter = None,
        relative_time_range: RelativeTimeRangeFilter = None,
        return_date="first",
    ):
        self.name = name
        self.date_range = date_range
        self.relative_time_range = relative_time_range
        self.return_date = return_date
        self.n_occurrences = n_occurrences
        self.phenotype = phenotype
        self.children = [phenotype]
        super(MultipleOccurrencesPhenotype, self).__init__()

    def _execute(self, tables) -> PhenotypeTable:
        # Execute the child phenotype to get the initial filtered table
        phenotype_table = self.phenotype.table

        # Apply date range filter if provided
        if self.date_range is not None:
            phenotype_table = self.date_range.filter(phenotype_table)

        # Apply relative time range filter if provided
        if self.relative_time_range is not None:
            phenotype_table = self.relative_time_range.filter(phenotype_table)

        # Select only distinct dates:
        phenotype_table = phenotype_table.select(["PERSON_ID", "EVENT_DATE"]).distinct()

        # Count occurrences per PERSON_ID
        occurrence_counts = phenotype_table.group_by("PERSON_ID").aggregate(
            VALUE=_.count(), first_date=_.EVENT_DATE.min(), last_date=_.EVENT_DATE.max()
        )

        # Filter to keep only those with at least n_occurrences
        filtered_table = occurrence_counts[
            occurrence_counts.VALUE >= self.n_occurrences
        ]

        # Determine the return date based on the return_date attribute
        if self.return_date == "first":
            filtered_table = filtered_table.mutate(
                EVENT_DATE=filtered_table.first_date,
            )
        elif self.return_date == "second":
            filtered_table = filtered_table.mutate(
                EVENT_DATE=filtered_table.second_date,
            )
        elif self.return_date == "last":
            filtered_table = filtered_table.mutate(
                EVENT_DATE=filtered_table.last_date,
            )

        # Select the required columns
        result_table = filtered_table.select(PHENOTYPE_TABLE_COLUMNS)

        return result_table

namespaced_table property

A PhenotypeTable has generic column names 'person_id', 'boolean', 'event_date', and 'value'. The namespaced_table appends the phenotype name to all of these columns. This is useful when joining multiple phenotype tables together.

Returns:

Name Type Description
table Table

The namespaced table for the current phenotype.

execute(tables)

Executes the phenotype computation for the current object and its children. This method recursively iterates over the children of the current object and calls their execute method if their table attribute is None.

Parameters:

Name Type Description Default
tables Dict[str, PhenexTable]

A dictionary mapping table names to PhenexTable objects. See phenex.mappers.DomainsDictionary.get_mapped_tables().

required

Returns:

Name Type Description
table PhenotypeTable

The resulting phenotype table containing the required columns. The PhenotypeTable will contain the columns: PERSON_ID, EVENT_DATE, VALUE. DATE is determined by the return_date parameter. VALUE is different for each phenotype. For example, AgePhenotype will return the age in the VALUE column. A MeasurementPhenotype will return the observed value for the measurement. See the specific phenotype of interest to understand more.

Source code in phenex/phenotypes/phenotype.py
def execute(self, tables: Dict[str, Table]) -> PhenotypeTable:
    """
    Executes the phenotype computation for the current object and its children. This method recursively iterates over the children of the current object and calls their execute method if their table attribute is None.

    Args:
        tables (Dict[str, PhenexTable]): A dictionary mapping table names to PhenexTable objects. See phenex.mappers.DomainsDictionary.get_mapped_tables().

    Returns:
        table (PhenotypeTable): The resulting phenotype table containing the required columns. The PhenotypeTable will contain the columns: PERSON_ID, EVENT_DATE, VALUE. DATE is determined by the return_date parameter. VALUE is different for each phenotype. For example, AgePhenotype will return the age in the VALUE column. A MeasurementPhenotype will return the observed value for the measurement. See the specific phenotype of interest to understand more.
    """
    logger.info(f"Phenotype '{self.name}': executing...")
    for child in self.children:
        if child.table is None:
            logger.debug(
                f"Phenotype {self.name}: executing child phenotype '{child.name}'..."
            )
            child.execute(tables)
        else:
            logger.debug(
                f"Phenotype {self.name}: skipping already computed child phenotype '{child.name}'."
            )

    table = self._execute(tables).mutate(BOOLEAN=True)

    if not set(PHENOTYPE_TABLE_COLUMNS) <= set(table.columns):
        raise ValueError(
            f"Phenotype {self.name} must return columns {PHENOTYPE_TABLE_COLUMNS}. Found {table.columns}."
        )

    self.table = table.select(PHENOTYPE_TABLE_COLUMNS)
    # for some reason, having NULL datatype screws up writing the table to disk; here we make explicit cast
    if type(self.table.schema()["VALUE"]) == ibis.expr.datatypes.core.Null:
        self.table = self.table.cast({"VALUE": "float64"})

    assert is_phenex_phenotype_table(self.table)
    logger.info(f"Phenotype '{self.name}': execution completed.")
    return self.table