LocalCSVCodelistFactory allows for the creation of multiple codelists from a single CSV file. Use this class when you have a single CSV file that contains multiple codelists.
To use, create an instance of the class and then call the get_codelist
method with the name of the codelist you want to retrieve; this codelist name must be an entry in the name_codelist_column.
Source code in phenex/codelists/codelists.py
| class LocalCSVCodelistFactory:
"""
LocalCSVCodelistFactory allows for the creation of multiple codelists from a single CSV file. Use this class when you have a single CSV file that contains multiple codelists.
To use, create an instance of the class and then call the `get_codelist` method with the name of the codelist you want to retrieve; this codelist name must be an entry in the name_codelist_column.
"""
def __init__(
self,
path: str,
name_code_column: str = "code",
name_codelist_column: str = "codelist",
name_code_type_column: str = "code_type",
) -> None:
"""
Parameters:
path: Path to the CSV file.
name_code_column: The name of the column containing the codes.
name_codelist_column: The name of the column containing the codelist names.
name_code_type_column: The name of the column containing the code types.
"""
self.path = path
self.name_code_column = name_code_column
self.name_codelist_column = name_codelist_column
self.name_code_type_column = name_code_type_column
try:
self.df = pd.read_csv(path)
except:
raise ValueError("Could not read the file at the given path.")
# Check if the required columns exist in the DataFrame
required_columns = [
name_code_column,
name_codelist_column,
name_code_type_column,
]
missing_columns = [
col for col in required_columns if col not in self.df.columns
]
if missing_columns:
raise ValueError(
f"The following required columns are missing in the CSV: {', '.join(missing_columns)}"
)
def get_codelists(self) -> List[str]:
"""
Get a list of all codelists in the supplied CSV.
"""
return self.df[self.name_codelist_column].unique().tolist()
def get_codelist(self, name: str) -> Codelist:
"""
Retrieve a single codelist by name.
"""
try:
df_codelist = self.df[self.df[self.name_codelist_column] == name]
code_dict = (
df_codelist.groupby(self.name_code_type_column)[self.name_code_column]
.apply(list)
.to_dict()
)
return Codelist(name=name, codelist=code_dict)
except:
raise ValueError("Could not find the codelist with the given name.")
|
__init__(path, name_code_column='code', name_codelist_column='codelist', name_code_type_column='code_type')
Parameters:
Name |
Type |
Description |
Default |
path
|
str
|
|
required
|
name_code_column
|
str
|
The name of the column containing the codes.
|
'code'
|
name_codelist_column
|
str
|
The name of the column containing the codelist names.
|
'codelist'
|
name_code_type_column
|
str
|
The name of the column containing the code types.
|
'code_type'
|
Source code in phenex/codelists/codelists.py
| def __init__(
self,
path: str,
name_code_column: str = "code",
name_codelist_column: str = "codelist",
name_code_type_column: str = "code_type",
) -> None:
"""
Parameters:
path: Path to the CSV file.
name_code_column: The name of the column containing the codes.
name_codelist_column: The name of the column containing the codelist names.
name_code_type_column: The name of the column containing the code types.
"""
self.path = path
self.name_code_column = name_code_column
self.name_codelist_column = name_codelist_column
self.name_code_type_column = name_code_type_column
try:
self.df = pd.read_csv(path)
except:
raise ValueError("Could not read the file at the given path.")
# Check if the required columns exist in the DataFrame
required_columns = [
name_code_column,
name_codelist_column,
name_code_type_column,
]
missing_columns = [
col for col in required_columns if col not in self.df.columns
]
if missing_columns:
raise ValueError(
f"The following required columns are missing in the CSV: {', '.join(missing_columns)}"
)
|
get_codelist(name)
Retrieve a single codelist by name.
Source code in phenex/codelists/codelists.py
| def get_codelist(self, name: str) -> Codelist:
"""
Retrieve a single codelist by name.
"""
try:
df_codelist = self.df[self.df[self.name_codelist_column] == name]
code_dict = (
df_codelist.groupby(self.name_code_type_column)[self.name_code_column]
.apply(list)
.to_dict()
)
return Codelist(name=name, codelist=code_dict)
except:
raise ValueError("Could not find the codelist with the given name.")
|
get_codelists()
Get a list of all codelists in the supplied CSV.
Source code in phenex/codelists/codelists.py
| def get_codelists(self) -> List[str]:
"""
Get a list of all codelists in the supplied CSV.
"""
return self.df[self.name_codelist_column].unique().tolist()
|