Skip to content

LocalCSVCodelistFactory

LocalCSVCodelistFactory allows for the creation of multiple codelists from a single CSV file. Use this class when you have a single CSV file that contains multiple codelists.

To use, create an instance of the class and then call the get_codelist method with the name of the codelist you want to retrieve; this codelist name must be an entry in the name_codelist_column.

Source code in phenex/codelists/codelists.py
class LocalCSVCodelistFactory:
    """
    LocalCSVCodelistFactory allows for the creation of multiple codelists from a single CSV file. Use this class when you have a single CSV file that contains multiple codelists.

    To use, create an instance of the class and then call the `get_codelist` method with the name of the codelist you want to retrieve; this codelist name must be an entry in the name_codelist_column.
    """

    def __init__(
        self,
        path: str,
        name_code_column: str = "code",
        name_codelist_column: str = "codelist",
        name_code_type_column: str = "code_type",
    ) -> None:
        """
        Parameters:
            path: Path to the CSV file.
            name_code_column: The name of the column containing the codes.
            name_codelist_column: The name of the column containing the codelist names.
            name_code_type_column: The name of the column containing the code types.
        """
        self.path = path
        self.name_code_column = name_code_column
        self.name_codelist_column = name_codelist_column
        self.name_code_type_column = name_code_type_column
        try:
            self.df = pd.read_csv(path)
        except:
            raise ValueError("Could not read the file at the given path.")

        # Check if the required columns exist in the DataFrame
        required_columns = [
            name_code_column,
            name_codelist_column,
            name_code_type_column,
        ]
        missing_columns = [
            col for col in required_columns if col not in self.df.columns
        ]
        if missing_columns:
            raise ValueError(
                f"The following required columns are missing in the CSV: {', '.join(missing_columns)}"
            )

    def get_codelists(self) -> List[str]:
        """
        Get a list of all codelists in the supplied CSV.
        """
        return self.df[self.name_codelist_column].unique().tolist()

    def get_codelist(self, name: str) -> Codelist:
        """
        Retrieve a single codelist by name.
        """
        try:
            df_codelist = self.df[self.df[self.name_codelist_column] == name]
            code_dict = (
                df_codelist.groupby(self.name_code_type_column)[self.name_code_column]
                .apply(list)
                .to_dict()
            )
            return Codelist(name=name, codelist=code_dict)
        except:
            raise ValueError("Could not find the codelist with the given name.")

__init__(path, name_code_column='code', name_codelist_column='codelist', name_code_type_column='code_type')

Parameters:

Name Type Description Default
path str

Path to the CSV file.

required
name_code_column str

The name of the column containing the codes.

'code'
name_codelist_column str

The name of the column containing the codelist names.

'codelist'
name_code_type_column str

The name of the column containing the code types.

'code_type'
Source code in phenex/codelists/codelists.py
def __init__(
    self,
    path: str,
    name_code_column: str = "code",
    name_codelist_column: str = "codelist",
    name_code_type_column: str = "code_type",
) -> None:
    """
    Parameters:
        path: Path to the CSV file.
        name_code_column: The name of the column containing the codes.
        name_codelist_column: The name of the column containing the codelist names.
        name_code_type_column: The name of the column containing the code types.
    """
    self.path = path
    self.name_code_column = name_code_column
    self.name_codelist_column = name_codelist_column
    self.name_code_type_column = name_code_type_column
    try:
        self.df = pd.read_csv(path)
    except:
        raise ValueError("Could not read the file at the given path.")

    # Check if the required columns exist in the DataFrame
    required_columns = [
        name_code_column,
        name_codelist_column,
        name_code_type_column,
    ]
    missing_columns = [
        col for col in required_columns if col not in self.df.columns
    ]
    if missing_columns:
        raise ValueError(
            f"The following required columns are missing in the CSV: {', '.join(missing_columns)}"
        )

get_codelist(name)

Retrieve a single codelist by name.

Source code in phenex/codelists/codelists.py
def get_codelist(self, name: str) -> Codelist:
    """
    Retrieve a single codelist by name.
    """
    try:
        df_codelist = self.df[self.df[self.name_codelist_column] == name]
        code_dict = (
            df_codelist.groupby(self.name_code_type_column)[self.name_code_column]
            .apply(list)
            .to_dict()
        )
        return Codelist(name=name, codelist=code_dict)
    except:
        raise ValueError("Could not find the codelist with the given name.")

get_codelists()

Get a list of all codelists in the supplied CSV.

Source code in phenex/codelists/codelists.py
def get_codelists(self) -> List[str]:
    """
    Get a list of all codelists in the supplied CSV.
    """
    return self.df[self.name_codelist_column].unique().tolist()