Skip to content

Counts

Bases: Reporter

Get counts of inclusion and exclusion criteria

Source code in phenex/reporting/counts.py
class InExCounts(Reporter):
    """
    Get counts of inclusion and exclusion criteria

    """

    def execute(self, cohort: "Cohort") -> pd.DataFrame:
        self.cohort = cohort
        self.df_counts_inclusion = self.get_counts_for_phenotypes(
            self.cohort.inclusions, "inclusion"
        )
        self.df_counts_exclusion = self.get_counts_for_phenotypes(
            self.cohort.exclusions, "exclusion"
        )
        return pd.concat([self.df_counts_inclusion, self.df_counts_exclusion])

    def get_counts_for_phenotypes(
        self, phenotypes: List["Phenotype"], category: str = None
    ):
        ds = []
        for pt in phenotypes:
            d = {
                "phenotype": pt.name,
                "n": pt.table.select("PERSON_ID").distinct().count().to_pandas(),
            }
            ds.append(d)
        _df = pd.DataFrame.from_records(ds)
        if category is not None:
            _df["category"] = category
        return _df

get_pretty_display()

Return a formatted version of the reporter's results for display.

Default implementation returns a copy of self.df with: - Numeric values rounded to decimal_places - NaN values replaced with empty strings for cleaner display

Subclasses can override this method for custom formatting (e.g., phenotype display names).

Returns:

Type Description
DataFrame

pd.DataFrame: Formatted copy of the results

Raises:

Type Description
AttributeError

If self.df is not defined

Source code in phenex/reporting/reporter.py
def get_pretty_display(self) -> pd.DataFrame:
    """
    Return a formatted version of the reporter's results for display.

    Default implementation returns a copy of self.df with:
    - Numeric values rounded to decimal_places
    - NaN values replaced with empty strings for cleaner display

    Subclasses can override this method for custom formatting (e.g., phenotype display names).

    Returns:
        pd.DataFrame: Formatted copy of the results

    Raises:
        AttributeError: If self.df is not defined
    """
    if not hasattr(self, "df"):
        raise AttributeError(
            f"{self.__class__.__name__} does not have a 'df' attribute. "
            "Call execute() first or implement a custom get_pretty_display() method."
        )

    # Create a copy to avoid modifying the original
    pretty_df = self.df.copy()

    # Round numeric columns to decimal_places
    numeric_columns = pretty_df.select_dtypes(include=["number"]).columns
    pretty_df[numeric_columns] = pretty_df[numeric_columns].round(
        self.decimal_places
    )

    # Replace NaN with empty strings for cleaner display
    pretty_df = pretty_df.fillna("")

    return pretty_df

to_csv(filename)

Export reporter results to CSV format.

Default implementation exports self.df if it exists. Subclasses can override for custom behavior. If pretty_display=True, formats the DataFrame before export.

Parameters:

Name Type Description Default
filename str

Path to the output file (relative or absolute, with or without .csv extension)

required

Returns:

Name Type Description
str str

Full path to the created file

Raises:

Type Description
AttributeError

If self.df is not defined (call execute() first)

Source code in phenex/reporting/reporter.py
def to_csv(self, filename: str) -> str:
    """
    Export reporter results to CSV format.

    Default implementation exports self.df if it exists. Subclasses can override for custom behavior. If pretty_display=True, formats the DataFrame before export.

    Args:
        filename: Path to the output file (relative or absolute, with or without .csv extension)

    Returns:
        str: Full path to the created file

    Raises:
        AttributeError: If self.df is not defined (call execute() first)
    """
    if not hasattr(self, "df"):
        raise AttributeError(
            f"{self.__class__.__name__} does not have a 'df' attribute. "
            "Call execute() first or implement a custom to_csv() method."
        )

    # Convert to Path object and ensure .csv extension
    filepath = Path(filename)
    if filepath.suffix != ".csv":
        filepath = filepath.with_suffix(".csv")

    # Create parent directories if needed
    filepath.parent.mkdir(parents=True, exist_ok=True)

    # Apply pretty display if requested
    df_to_export = self.get_pretty_display() if self.pretty_display else self.df

    # Export to CSV
    df_to_export.to_csv(filepath, index=False)

    return str(filepath.absolute())

to_excel(filename)

Export reporter results to Excel format.

Default implementation exports self.df if it exists. Subclasses can override for custom behavior. If pretty_display=True, formats the DataFrame before export using get_pretty_display().

Parameters:

Name Type Description Default
filename str

Path to the output file (relative or absolute, with or without .xlsx extension)

required

Returns:

Name Type Description
str str

Full path to the created file

Raises:

Type Description
AttributeError

If self.df is not defined (call execute() first)

ImportError

If openpyxl is not installed

Source code in phenex/reporting/reporter.py
def to_excel(self, filename: str) -> str:
    """
    Export reporter results to Excel format.

    Default implementation exports self.df if it exists. Subclasses can override for custom behavior.
    If pretty_display=True, formats the DataFrame before export using get_pretty_display().

    Args:
        filename: Path to the output file (relative or absolute, with or without .xlsx extension)

    Returns:
        str: Full path to the created file

    Raises:
        AttributeError: If self.df is not defined (call execute() first)
        ImportError: If openpyxl is not installed
    """
    if not hasattr(self, "df"):
        raise AttributeError(
            f"{self.__class__.__name__} does not have a 'df' attribute. "
            "Call execute() first or implement a custom to_excel() method."
        )

    # Convert to Path object and ensure .xlsx extension
    filepath = Path(filename)
    if filepath.suffix != ".xlsx":
        filepath = filepath.with_suffix(".xlsx")

    # Create parent directories if needed
    filepath.parent.mkdir(parents=True, exist_ok=True)

    # Apply pretty display if requested
    df_to_export = self.get_pretty_display() if self.pretty_display else self.df

    # Export to Excel
    df_to_export.to_excel(filepath, index=False)

    return str(filepath.absolute())

to_html(filename)

Export reporter results to HTML format.

Default implementation exports self.df if it exists. Subclasses can override for custom behavior. If pretty_display=True, formats the DataFrame before export.

Parameters:

Name Type Description Default
filename str

Path to the output file (relative or absolute, with or without .html extension)

required

Returns:

Name Type Description
str str

Full path to the created file

Raises:

Type Description
AttributeError

If self.df is not defined (call execute() first)

Source code in phenex/reporting/reporter.py
def to_html(self, filename: str) -> str:
    """
    Export reporter results to HTML format.

    Default implementation exports self.df if it exists. Subclasses can override for custom behavior. If pretty_display=True, formats the DataFrame before export.

    Args:
        filename: Path to the output file (relative or absolute, with or without .html extension)

    Returns:
        str: Full path to the created file

    Raises:
        AttributeError: If self.df is not defined (call execute() first)
    """
    if not hasattr(self, "df"):
        raise AttributeError(
            f"{self.__class__.__name__} does not have a 'df' attribute. "
            "Call execute() first or implement a custom to_html() method."
        )

    # Convert to Path object and ensure .html extension
    filepath = Path(filename)
    if filepath.suffix != ".html":
        filepath = filepath.with_suffix(".html")

    # Create parent directories if needed
    filepath.parent.mkdir(parents=True, exist_ok=True)

    # Apply pretty display if requested
    df_to_export = self.get_pretty_display() if self.pretty_display else self.df

    # Export to HTML
    df_to_export.to_html(filepath, index=False)

    return str(filepath.absolute())

to_markdown(filename)

Export reporter results to Markdown format.

Default implementation exports self.df if it exists. Subclasses can override for custom behavior. If pretty_display=True, formats the DataFrame before export.

Parameters:

Name Type Description Default
filename str

Path to the output file (relative or absolute, with or without .md extension)

required

Returns:

Name Type Description
str str

Full path to the created file

Raises:

Type Description
AttributeError

If self.df is not defined (call execute() first)

ImportError

If tabulate is not installed (required for df.to_markdown())

Source code in phenex/reporting/reporter.py
def to_markdown(self, filename: str) -> str:
    """
    Export reporter results to Markdown format.

    Default implementation exports self.df if it exists. Subclasses can override for custom behavior. If pretty_display=True, formats the DataFrame before export.

    Args:
        filename: Path to the output file (relative or absolute, with or without .md extension)

    Returns:
        str: Full path to the created file

    Raises:
        AttributeError: If self.df is not defined (call execute() first)
        ImportError: If tabulate is not installed (required for df.to_markdown())
    """
    if not hasattr(self, "df"):
        raise AttributeError(
            f"{self.__class__.__name__} does not have a 'df' attribute. "
            "Call execute() first or implement a custom to_markdown() method."
        )

    # Convert to Path object and ensure .md extension
    filepath = Path(filename)
    if filepath.suffix != ".md":
        filepath = filepath.with_suffix(".md")

    # Create parent directories if needed
    filepath.parent.mkdir(parents=True, exist_ok=True)

    # Apply pretty display if requested
    df_to_export = self.get_pretty_display() if self.pretty_display else self.df

    # Export to Markdown (requires tabulate package)
    try:
        markdown_content = df_to_export.to_markdown(index=False)
        filepath.write_text(markdown_content)
    except ImportError:
        raise ImportError(
            "tabulate is required for Markdown export. Install with: pip install tabulate"
        )

    return str(filepath.absolute())

to_word(filename)

Export reporter results to Microsoft Word format.

Default implementation exports self.df as a simple table if it exists. Subclasses can override for custom formatting (headers, styling, etc). If pretty_display=True, formats the DataFrame before export using get_pretty_display().

Parameters:

Name Type Description Default
filename str

Path to the output file (relative or absolute, with or without .docx extension)

required

Returns:

Name Type Description
str str

Full path to the created file

Raises:

Type Description
AttributeError

If self.df is not defined (call execute() first)

ImportError

If python-docx is not installed

Source code in phenex/reporting/reporter.py
def to_word(self, filename: str) -> str:
    """
    Export reporter results to Microsoft Word format.

    Default implementation exports self.df as a simple table if it exists.
    Subclasses can override for custom formatting (headers, styling, etc).
    If pretty_display=True, formats the DataFrame before export using get_pretty_display().

    Args:
        filename: Path to the output file (relative or absolute, with or without .docx extension)

    Returns:
        str: Full path to the created file

    Raises:
        AttributeError: If self.df is not defined (call execute() first)
        ImportError: If python-docx is not installed
    """
    if not hasattr(self, "df"):
        raise AttributeError(
            f"{self.__class__.__name__} does not have a 'df' attribute. "
            "Call execute() first or implement a custom to_word() method."
        )

    try:
        from docx import Document
    except ImportError:
        raise ImportError(
            "python-docx is required for Word export. Install with: pip install python-docx"
        )

    # Convert to Path object and ensure .docx extension
    filepath = Path(filename)
    if filepath.suffix != ".docx":
        filepath = filepath.with_suffix(".docx")

    # Create parent directories if needed
    filepath.parent.mkdir(parents=True, exist_ok=True)

    # Apply pretty display if requested
    df_to_export = self.get_pretty_display() if self.pretty_display else self.df

    # Create Word document with table
    doc = Document()

    # Add table (rows + 1 for header)
    table = doc.add_table(
        rows=len(df_to_export) + 1, cols=len(df_to_export.columns)
    )
    table.style = "Light Grid Accent 1"

    # Add header row
    for col_idx, column_name in enumerate(df_to_export.columns):
        table.rows[0].cells[col_idx].text = str(column_name)

    # Add data rows
    for row_idx, (_, row_data) in enumerate(df_to_export.iterrows(), start=1):
        for col_idx, value in enumerate(row_data):
            table.rows[row_idx].cells[col_idx].text = str(value)

    # Save document
    doc.save(str(filepath))

    return str(filepath.absolute())