Bases: Filter
This class filters events in an EventTable based on a specified time range relative to an anchor date. The anchor date can either be provided by an anchor phenotype or by an 'INDEX_DATE' column in the EventTable.
Attributes:
Name |
Type |
Description |
min_days |
Optional[int]
|
Minimum number of days from the anchor date to filter events.
|
max_days |
Optional[int]
|
Maximum number of days from the anchor date to filter events.
|
anchor_phenotype |
Phenotype
|
A phenotype providing the anchor date for filtering.
|
when |
Optional[str]
|
when can be "before" or "after"; if "before", days prior to anchor event_date are positive, and days after are negative; using after, days before the anchor event_date are negative and days after the anchor event_date are positive.
|
Methods:
Name |
Description |
filter |
Filters the given EventTable based on the specified time range relative to the anchor date.
|
Examples:
# filter events to one year before index date, excluding index date
one_year_preindex = RelativeTimeRangeFilter(
max_days = Value('<', 365),
min_days = Value('>', 0),
when = 'before'
)
# filter events to one year after index date, including index date
anytime_after_index = RelativeTimeRangeFilter(
min_days = Value('>=', 0),
when = 'after'
)
Source code in phenex/filters/relative_time_range_filter.py
| class RelativeTimeRangeFilter(Filter):
"""
This class filters events in an EventTable based on a specified time range relative to an anchor date. The anchor date can either be provided by an anchor phenotype or by an 'INDEX_DATE' column in the EventTable.
Attributes:
min_days (Optional[int]): Minimum number of days from the anchor date to filter events.
max_days (Optional[int]): Maximum number of days from the anchor date to filter events.
anchor_phenotype (Phenotype): A phenotype providing the anchor date for filtering.
when (Optional[str]): when can be "before" or "after"; if "before", days prior to anchor event_date are positive, and days after are negative; using after, days before the anchor event_date are negative and days after the anchor event_date are positive.
Methods:
filter: Filters the given EventTable based on the specified time range relative to the anchor date.
Examples:
```
# filter events to one year before index date, excluding index date
one_year_preindex = RelativeTimeRangeFilter(
max_days = Value('<', 365),
min_days = Value('>', 0),
when = 'before'
)
```
```
# filter events to one year after index date, including index date
anytime_after_index = RelativeTimeRangeFilter(
min_days = Value('>=', 0),
when = 'after'
)
```
"""
def __init__(
self,
min_days: Optional[Value] = GreaterThanOrEqualTo(0),
max_days: Optional[Value] = None,
when: Optional[str] = "before",
anchor_phenotype: "Phenotype" = None,
):
verify_relative_time_range_filter_input(min_days, max_days, when)
self.min_days = min_days
self.max_days = max_days
self.when = when
self.anchor_phenotype = anchor_phenotype
super(RelativeTimeRangeFilter, self).__init__()
def _filter(self, table: EventTable):
if self.anchor_phenotype is not None:
if self.anchor_phenotype.table is None:
raise ValueError(
f"Dependent Phenotype {self.anchor_phenotype.name} must be executed before this node can run!"
)
else:
anchor_table = self.anchor_phenotype.table
reference_column = anchor_table.EVENT_DATE
# Note that joins can change column names if the tables have name collisions!
table = table.join(anchor_table, "PERSON_ID")
else:
assert (
"INDEX_DATE" in table.columns
), f"INDEX_DATE column not found in table {table}"
reference_column = table.INDEX_DATE
DAYS_FROM_ANCHOR = reference_column.delta(table.EVENT_DATE, "day")
if self.when == "after":
DAYS_FROM_ANCHOR = -DAYS_FROM_ANCHOR
table = table.mutate(DAYS_FROM_ANCHOR=DAYS_FROM_ANCHOR)
conditions = []
# Fix this, this logic needs to be abstracted to a ValueFilter
if self.min_days is not None:
if self.min_days.operator == ">":
conditions.append(table.DAYS_FROM_ANCHOR > self.min_days.value)
elif self.min_days.operator == ">=":
conditions.append(table.DAYS_FROM_ANCHOR >= self.min_days.value)
else:
raise ValueError("Operator for min days be > or >=")
if self.max_days is not None:
if self.max_days.operator == "<":
conditions.append(table.DAYS_FROM_ANCHOR < self.max_days.value)
elif self.max_days.operator == "<=":
conditions.append(table.DAYS_FROM_ANCHOR <= self.max_days.value)
else:
raise ValueError("Operator for max days be < or <=")
if conditions:
table = table.filter(conditions)
return table
|
filter(table)
Filters the given table according to the rules of the Filter.
Parameters:
Name |
Type |
Description |
Default |
table
|
PhenexTable
|
The table to be filtered.
|
required
|
Returns:
Name | Type |
Description |
PhenexTable |
PhenexTable
|
The filtered table. The returned table has the exact same schema as the input table but has rows removed.
|
Source code in phenex/filters/filter.py
| def filter(self, table: PhenexTable) -> PhenexTable:
"""
Filters the given table according to the rules of the Filter.
Args:
table (PhenexTable): The table to be filtered.
Returns:
PhenexTable: The filtered table. The returned table has the exact same schema as the input table but has rows removed.
"""
input_columns = table.columns
filtered_table = self._filter(table)
if not set(input_columns) <= set(filtered_table.columns):
raise ValueError(f"Filter must not remove columns.")
filtered_table = filtered_table.select(input_columns)
if isinstance(table, PhenexTable):
return type(table)(filtered_table)
else:
return filtered_table
|