Skip to content

DomainsDictionary

A DomainsDictionary is used to map an entire database from an arbitrary schema to a PhenEx internal representation.

Attributes:

Name Type Description
domains_dict Dict[str, class]

A dictionary where keys are domain names and values are uninstantiated PhenexTable class objects.

Methods:

Name Description
get_mapped_tables

Get all tables mapped to PhenEx representation using the given connection.

get_source_tables

Get all source tables using the given connection.

set_mapped_tables

Create a view for all mapped tables in the destination database.

Source code in phenex/mappers.py
class DomainsDictionary:
    """
    A DomainsDictionary is used to map an entire database from an arbitrary schema to a PhenEx internal representation.

    Attributes:
        domains_dict (Dict[str, class]): A dictionary where keys are domain names and values are uninstantiated PhenexTable class objects.

    Methods:
        get_mapped_tables(con) -> Dict[str, Table]:
            Get all tables mapped to PhenEx representation using the given connection.
        get_source_tables(con) -> Dict[str, Table]:
            Get all source tables using the given connection.
        set_mapped_tables(con, overwrite=False) -> None:
            Create a view for all mapped tables in the destination database.
    """

    def __init__(self, domains_dict):
        self.domains_dict = domains_dict

    def set_mapped_tables(self, con, overwrite=False) -> Dict[str, Table]:
        """
        Create a view for all mapped tables in the destination database.

        Args:
            con: The connection to the database.
            overwrite: Whether to overwrite existing views if found. Otherwise, throws an error.

        Returns:
            Dict[str, Table]: A dictionary where keys are domain names and values are mapped tables.
        """
        existing_tables = con.dest_connection.list_tables(
            database=con.SNOWFLAKE_DEST_DATABASE
        )
        for domain, mapper in self.domains_dict.items():
            if domain not in existing_tables:
                t = con.get_source_table(mapper.NAME_TABLE)
                mapped_table = mapper(t).table
                # overwrite error handling handled in create_view call
                con.create_view(
                    mapped_table, name_table=mapper.NAME_TABLE, overwrite=overwrite
                )

    def get_mapped_tables(self, con) -> Dict[str, PhenexTable]:
        """
        Get all tables mapped to PhenEx representation using the given connection.

        If a database is not provided, the current database of the connection is used to find the tables.

        Args:
            con: The connection to the database.

        Returns:
            Dict[str, PhenexTable]: A dictionary where keys are domain names and values are mapped tables.

        Raises:
            ValueError: If a required table is not found in the database.
        """
        # self.set_mapped_tables(con)
        mapped_tables = {}
        for domain, mapper in self.domains_dict.items():
            try:
                source_table = con.get_source_table(mapper.NAME_TABLE)
                mapped_tables[domain] = mapper(source_table)
            except exc.IbisError as e:
                if "Table not found" in str(e):
                    logger.warning(
                        f"Required table '{mapper.NAME_TABLE}' for domain '{domain}' not found in the database@ adding None for this domain"
                    )
                    mapped_tables[domain] = None
                else:
                    raise
        return mapped_tables

    def get_source_tables(self, con) -> Dict[str, str]:
        """
        Get all source tables using the given connection.

        Args:
            con: The connection to the database.

        Returns:
            Dict[str, str]: A dictionary where keys are the source table names and values are table names.
        """
        source_tables = {}
        for mapper in self.domains_dict.values():
            table_name = mapper.NAME_TABLE
            if table_name not in source_tables:
                source_tables[table_name] = con.get_source_table(table_name)
        return source_tables

    def to_dict(self) -> dict:
        """
        Serialize the DomainsDictionary configuration.

        This serializes the mapping of domain names to PhenexTable classes,
        storing only the class configuration, not any actual table data.

        Returns:
            dict: Serialized domains dictionary configuration
        """
        serialized_domains = {}
        for domain_name, mapper_class in self.domains_dict.items():
            # Store the class configuration
            serialized_domains[domain_name] = mapper_class.to_dict()

        return {
            "class_name": "DomainsDictionary",
            "domains_dict": serialized_domains,
        }

    @staticmethod
    def from_dict(data: dict) -> "DomainsDictionary":
        """
        Reconstruct a DomainsDictionary from serialized data.

        Args:
            data: Serialized DomainsDictionary configuration

        Returns:
            DomainsDictionary instance with mapper classes
        """
        from phenex.tables import PhenexTable
        import importlib

        domains_dict = {}
        for domain_name, table_config in data.get("domains_dict", {}).items():
            # Reconstruct the mapper class by looking it up in globals
            table_class_name = table_config["__table_class__"]

            # Try to find the class in the current module (mappers.py)
            if table_class_name in globals():
                mapper_class = globals()[table_class_name]
            else:
                # Try to import from the module specified
                module_name = table_config.get("__module__", "phenex.mappers")
                try:
                    module = importlib.import_module(module_name)
                    mapper_class = getattr(module, table_class_name)
                except (ImportError, AttributeError):
                    raise ValueError(
                        f"Cannot find mapper class '{table_class_name}' in module '{module_name}'"
                    )

            domains_dict[domain_name] = mapper_class

        return DomainsDictionary(domains_dict)

from_dict(data) staticmethod

Reconstruct a DomainsDictionary from serialized data.

Parameters:

Name Type Description Default
data dict

Serialized DomainsDictionary configuration

required

Returns:

Type Description
DomainsDictionary

DomainsDictionary instance with mapper classes

Source code in phenex/mappers.py
@staticmethod
def from_dict(data: dict) -> "DomainsDictionary":
    """
    Reconstruct a DomainsDictionary from serialized data.

    Args:
        data: Serialized DomainsDictionary configuration

    Returns:
        DomainsDictionary instance with mapper classes
    """
    from phenex.tables import PhenexTable
    import importlib

    domains_dict = {}
    for domain_name, table_config in data.get("domains_dict", {}).items():
        # Reconstruct the mapper class by looking it up in globals
        table_class_name = table_config["__table_class__"]

        # Try to find the class in the current module (mappers.py)
        if table_class_name in globals():
            mapper_class = globals()[table_class_name]
        else:
            # Try to import from the module specified
            module_name = table_config.get("__module__", "phenex.mappers")
            try:
                module = importlib.import_module(module_name)
                mapper_class = getattr(module, table_class_name)
            except (ImportError, AttributeError):
                raise ValueError(
                    f"Cannot find mapper class '{table_class_name}' in module '{module_name}'"
                )

        domains_dict[domain_name] = mapper_class

    return DomainsDictionary(domains_dict)

get_mapped_tables(con)

Get all tables mapped to PhenEx representation using the given connection.

If a database is not provided, the current database of the connection is used to find the tables.

Parameters:

Name Type Description Default
con

The connection to the database.

required

Returns:

Type Description
Dict[str, PhenexTable]

Dict[str, PhenexTable]: A dictionary where keys are domain names and values are mapped tables.

Raises:

Type Description
ValueError

If a required table is not found in the database.

Source code in phenex/mappers.py
def get_mapped_tables(self, con) -> Dict[str, PhenexTable]:
    """
    Get all tables mapped to PhenEx representation using the given connection.

    If a database is not provided, the current database of the connection is used to find the tables.

    Args:
        con: The connection to the database.

    Returns:
        Dict[str, PhenexTable]: A dictionary where keys are domain names and values are mapped tables.

    Raises:
        ValueError: If a required table is not found in the database.
    """
    # self.set_mapped_tables(con)
    mapped_tables = {}
    for domain, mapper in self.domains_dict.items():
        try:
            source_table = con.get_source_table(mapper.NAME_TABLE)
            mapped_tables[domain] = mapper(source_table)
        except exc.IbisError as e:
            if "Table not found" in str(e):
                logger.warning(
                    f"Required table '{mapper.NAME_TABLE}' for domain '{domain}' not found in the database@ adding None for this domain"
                )
                mapped_tables[domain] = None
            else:
                raise
    return mapped_tables

get_source_tables(con)

Get all source tables using the given connection.

Parameters:

Name Type Description Default
con

The connection to the database.

required

Returns:

Type Description
Dict[str, str]

Dict[str, str]: A dictionary where keys are the source table names and values are table names.

Source code in phenex/mappers.py
def get_source_tables(self, con) -> Dict[str, str]:
    """
    Get all source tables using the given connection.

    Args:
        con: The connection to the database.

    Returns:
        Dict[str, str]: A dictionary where keys are the source table names and values are table names.
    """
    source_tables = {}
    for mapper in self.domains_dict.values():
        table_name = mapper.NAME_TABLE
        if table_name not in source_tables:
            source_tables[table_name] = con.get_source_table(table_name)
    return source_tables

set_mapped_tables(con, overwrite=False)

Create a view for all mapped tables in the destination database.

Parameters:

Name Type Description Default
con

The connection to the database.

required
overwrite

Whether to overwrite existing views if found. Otherwise, throws an error.

False

Returns:

Type Description
Dict[str, Table]

Dict[str, Table]: A dictionary where keys are domain names and values are mapped tables.

Source code in phenex/mappers.py
def set_mapped_tables(self, con, overwrite=False) -> Dict[str, Table]:
    """
    Create a view for all mapped tables in the destination database.

    Args:
        con: The connection to the database.
        overwrite: Whether to overwrite existing views if found. Otherwise, throws an error.

    Returns:
        Dict[str, Table]: A dictionary where keys are domain names and values are mapped tables.
    """
    existing_tables = con.dest_connection.list_tables(
        database=con.SNOWFLAKE_DEST_DATABASE
    )
    for domain, mapper in self.domains_dict.items():
        if domain not in existing_tables:
            t = con.get_source_table(mapper.NAME_TABLE)
            mapped_table = mapper(t).table
            # overwrite error handling handled in create_view call
            con.create_view(
                mapped_table, name_table=mapper.NAME_TABLE, overwrite=overwrite
            )

to_dict()

Serialize the DomainsDictionary configuration.

This serializes the mapping of domain names to PhenexTable classes, storing only the class configuration, not any actual table data.

Returns:

Name Type Description
dict dict

Serialized domains dictionary configuration

Source code in phenex/mappers.py
def to_dict(self) -> dict:
    """
    Serialize the DomainsDictionary configuration.

    This serializes the mapping of domain names to PhenexTable classes,
    storing only the class configuration, not any actual table data.

    Returns:
        dict: Serialized domains dictionary configuration
    """
    serialized_domains = {}
    for domain_name, mapper_class in self.domains_dict.items():
        # Store the class configuration
        serialized_domains[domain_name] = mapper_class.to_dict()

    return {
        "class_name": "DomainsDictionary",
        "domains_dict": serialized_domains,
    }