"""Pandas DataFrame utilities."""

# 1. Standard python modules
from typing import Any, Dict, Optional

# 2. Third party modules
import pandas as pd

# 3. Aquaveo modules

# 4. Local modules


class DataFrameBuilder:
    """
    A utility class for dynamically creating a pandas DataFrame.

    Columns are based on predefined column names and types.
    Rows are added one by one, and the DataFrame is created at the end.
    """

    def __init__(self, schema: Dict[str, str], defaults: Optional[Dict[str, Any]] = None):
        """
        Initializes the DataFrameBuilder with a schema.

        Args:
            schema: A dictionary defining column names and their pandas dtypes.
            defaults: A dictionary defining default column values.
        """
        self.schema = schema
        self.defaults = defaults or {}
        self.data = {col: [] for col in schema.keys()}

    def add(self, **kwargs):
        """
        Adds a new row's data to the internal dictionary.

        Args:
            kwargs : key-value pairs
                Column names as keys and the corresponding row values.
                Missing columns in input will be added using the default value or None.
        """
        if not set(kwargs.keys()).issubset(self.schema.keys()):
            raise KeyError(f"Unexpected column(s): {set(kwargs.keys()) - set(self.schema.keys())}")
        for col in self.schema.keys():
            self.data[col].append(kwargs.get(col, self.defaults.get(col, None)))

    def build(self) -> pd.DataFrame:
        """
        Constructs a pandas DataFrame from the collected data and enforces the schema.

        Returns:
            A DataFrame with the schema-enforced column names and types.
        """
        try:
            df = pd.DataFrame(self.data).astype(self.schema)
        except ValueError as e:
            raise ValueError(f"Error while enforcing schema types: {e}")
        return df

    def clear(self):
        """Clears all collected rows, resetting the internal data."""
        self.data = {col: [] for col in self.schema.keys()}


def create_default_dataframe(column_types: Dict[str, str]) -> pd.DataFrame:
    """
    Creates an empty DataFrame with the specified column types.

    Args:
        column_types: A dictionary mapping column names to their data types.

    Returns:
        pd.DataFrame: An empty DataFrame with specified column names and types.
    """
    return pd.DataFrame({col: pd.Series(dtype=typ) for col, typ in column_types.items()})
