Skip to content

CodelistFilter

Bases: Filter

CodelistFilter is a class designed to filter a CodeTable based on a specified codelist.

Attributes:

Name Type Description
codelist Codelist

The codelist used for filtering the CodeTable.

name str

The name of the filter. Defaults to the name of the codelist if not provided.

use_code_type bool

A flag indicating whether to use the code type in the filtering process. Defaults to True.

Source code in phenex/filters/codelist_filter.py
class CodelistFilter(Filter):
    """
    CodelistFilter is a class designed to filter a CodeTable based on a specified codelist.

    Attributes:
        codelist (Codelist): The codelist used for filtering the CodeTable.
        name (str): The name of the filter. Defaults to the name of the codelist if not provided.
        use_code_type (bool): A flag indicating whether to use the code type in the filtering process. Defaults to True.
    """

    def __init__(self, codelist: Codelist, name=None):
        self.codelist = codelist
        self.name = name or self.codelist.name
        self.codelist_as_tuples = self._convert_codelist_to_tuples()
        super(CodelistFilter, self).__init__()

    def _convert_codelist_to_tuples(self) -> List[Tuple[str, str]]:
        if self.codelist is not None:
            if not isinstance(self.codelist, Codelist):
                raise ValueError("Codelist must be an instance of Codelist")
            return [
                (ct, c) for ct, codes in self.codelist.codelist.items() for c in codes
            ]
        return []

    def _filter(self, code_table: CodeTable) -> CodeTable:

        assert is_phenex_code_table(code_table)

        if self.codelist.fuzzy_match:
            return self._filter_fuzzy_codelist(code_table)
        else:
            return self._filter_literal_codelist(code_table)

    def _filter_fuzzy_codelist(self, code_table):
        filter_condition = False
        for code_type, codelist in self.codelist.codelist.items():
            codelist = [str(code) for code in codelist]
            if self.codelist.use_code_type:
                filter_condition = filter_condition | (
                    (code_table.CODE_TYPE == code_type)
                    & (code_table.CODE.like(codelist))
                )
            else:
                filter_condition = filter_condition | code_table.CODE.cast("str").like(
                    codelist
                )

        filtered_table = code_table.filter(filter_condition)
        return filtered_table

    def _filter_literal_codelist(self, code_table):

        # Generate the codelist table as an Ibis literal set
        codelist_df = pd.DataFrame(
            self.codelist_as_tuples, columns=["code_type", "code"]
        ).fillna("")
        codelist_table = ibis.memtable(codelist_df)

        # Create a join condition based on code and possibly code_type
        code_column = code_table.CODE
        if self.codelist.use_code_type:
            code_type_column = code_table.CODE_TYPE
            join_condition = (code_column == codelist_table.code) & (
                code_type_column == codelist_table.code_type
            )
        else:
            join_condition = code_column == codelist_table.code

        # return table with downselected columns, of same type as input table
        filtered_table = code_table.inner_join(codelist_table, join_condition).select(
            code_table.columns
        )
        return filtered_table

filter(table)

Filters the given table according to the rules of the Filter.

Parameters:

Name Type Description Default
table PhenexTable

The table to be filtered.

required

Returns:

Name Type Description
PhenexTable PhenexTable

The filtered table. The returned table has the exact same schema as the input table but has rows removed.

Source code in phenex/filters/filter.py
def filter(self, table: PhenexTable) -> PhenexTable:
    """
    Filters the given table according to the rules of the Filter.

    Args:
        table (PhenexTable): The table to be filtered.

    Returns:
        PhenexTable: The filtered table. The returned table has the exact same schema as the input table but has rows removed.
    """
    input_columns = table.columns
    filtered_table = self._filter(table)
    if not set(input_columns) <= set(filtered_table.columns):
        raise ValueError(f"Filter must not remove columns.")

    filtered_table = filtered_table.select(input_columns)
    if isinstance(table, PhenexTable):
        return type(table)(filtered_table)
    else:
        return filtered_table