"""PestObsDataGenerator class."""
__copyright__ = "(C) Copyright Aquaveo 2025"
__license__ = "All rights reserved"

# 1. Standard Python modules
import csv
from datetime import datetime
from enum import Enum
import os
from typing import Sequence

# 2. Third party modules
from flopy.utils import GridIntersect
from shapely.geometry import Point

# 3. Aquaveo modules
from xms.coverage.xy.xy_series import XySeries
from xms.data_objects.parameters import Coverage, FilterLocation
from xms.guipy import file_io_util
from xms.interp.interpolate.interp_idw import InterpIdw

# 4. Local modules
from xms.mf6.components import dis_builder
from xms.mf6.data import grid_info, oc_data, time_util
from xms.mf6.data.grid_info import GridInfo
from xms.mf6.file_io import io_util
from xms.mf6.file_io.pest.pest_obs_util import ObsCovData
from xms.mf6.mapping import grid_intersector, map_util
from xms.mf6.misc.settings import Settings

# String constants
# column names
OBS_TYPE = 'Type'
OBS_PT = 'obs. pt'

# Head
OBS_HEAD = 'Obs. Head'
OBS_HEAD_INTERVAL = 'Obs. Head interval'
OBS_HEAD_STD_DEV = 'Obs. Head std. dev'

# Trans. Head
OBS_TRANS_HEAD = 'Obs. Trans. Head'
OBS_TRANS_HEAD_INTERVAL = 'Obs. Trans. Head interval'
OBS_TRANS_HEAD_STD_DEV = 'Obs. Trans. Head std. dev'

# MF6 Concentration (which is just like Trans. Head)
OBS_MF6_CONC = 'Obs. MF6 Concentration'
OBS_MF6_CONC_INTERVAL = 'Obs. MF6 Concentration interval'
OBS_MF6_CONC_STD_DEV = 'Obs. MF6 Concentration std. dev'

# MF6 Temperature (which is just like Trans. Head)
OBS_MF6_TEMP = 'Obs. MF6 Temperature'
OBS_MF6_TEMP_INTERVAL = 'Obs. MF6 Temperature interval'
OBS_MF6_TEMP_STD_DEV = 'Obs. MF6 Temperature std. dev'

# Observed Flow / Trans. Observed Flow
OBS_FLOW = 'Obs. flow'
OBS_FLOW_RATE = 'Obs. flow rate'
OBS_FLOW_INTERVAL = 'Obs. flow interval'
OBS_FLOW_STD_DEV = 'Obs. flow std. dev.'
OBS_TRANS_FLOW_STD_DEV = 'Obs. flow std. dev.'  # Trans is the same as steady state (OBS_FLOW_STD_DEV)

OBS_LAYER = 'Layer'
OBS_TOP_SCR = 'Top scr.'
OBS_BOT_SCR = 'Bot. scr.'
OBS_NAME = 'Name'
OBS_ID = 'ID'

# misc
# We need both the PEST zero date and date with time formats below
PEST_ZERO_DATE = '01/01/1950'
PEST_ZERO_DATE_TIME = '01/01/1950 00:00:00'
PEST_STRFTIME_DATE = '%m/%d/%Y'
PEST_STRFTIME = '%m/%d/%Y %H:%M:%S'


class ErrorEnum(Enum):
    """Errors."""
    HEAD_MUST_USE_TRANSIENT = 1
    HEAD_MUST_NOT_USE_TRANSIENT = 2
    FLOW_MUST_USE_TRANSIENT = 3
    FLOW_MUST_NOT_USE_TRANSIENT = 4
    SERIES_NOT_FOUND = 5
    TIME_OUTSIDE_RANGE = 6
    NO_CELLGRP = 7
    NO_BCS_FOR_CELLGRP = 8
    CURVES_MUST_USE_DATES = 9
    CURVES_MUST_NOT_USE_DATES = 10
    POINT_OUTSIDE_GRID = 11


class PestObsDataGenerator:
    """Generates the files used by the PEST observation utilities.

    .b2b - "Bore-to-Budget File". Info on how to read computed flow from the budget file
    .b2map - Only used by GMS. Relates aliases to map ids.
    .blisting - "Bore Listing File". Names of point observation locations.
    .bsamp - "Bore Sample File". Observed values at discrete times.
    .bwt - Only used by GMS. Just like .bsamp but with weighted values (1 / std. dev.)
    .n2b - "Node-to-Bore Interpolation File". Has N closest values and interpolation weights
    .fsamp - "Bore Sample File". Observed flows at discrete times
    .fwt - Only used by GMS. Just like .fsamp but with weighted values (1 / std. dev.)
    .samp - NOT generated here. An output file from mf6bud2smp.exe and used in smp2smp.exe. Has computed flows.

    1) These files are generated when the user pushes the "Generate PEST Observation Data" button in the dialog.

    2) When we save the simulation, these files are copied to a "<model>_pest" subfolder with the simulation files.
       Batch files are also created in that subfolder at that time.

    3) On Read Solution, the batch files are run and the output files are created:
        - <model>.samp
        - <model>.bsamp.out
        - <model>.fsamp.out
        - temp.rec

    """
    def __init__(self, package):
        """Initializer.

        Args:
            package: PEST Obs package.
        """
        self._nearest_n_points = 0
        self._files = []
        self._alias_count = 1
        self._alias_set = set()
        self._alias_list = []
        self._package = package
        self._model_ftype: str = package.model.ftype if package else ''
        self._b2map_data = {}  # List of tuples of: alias, mapid, geom
        # dependent variable (head, concentration, or temperature)
        self._dep_var = []  # observed tuples: alias, time, head, (1/std dev)
        self._flow = []  # observed flow tuples: alias, time, flow, (1/std dev)
        self._flow_weighted = []  # observed flow
        self._flows_b2b = []  # rows in the .b2b file
        self._idw_list = []  # InterpIdw per layer
        self._cellids = []
        self._interpolation_weights = []
        self._object_cellgrps = {}  # cellgrp info sorted by coverage, feature type, and feature id
        self._start_date_time = None
        self._end_date_time = None
        self._time_units = None
        self._coverage = None
        self._errors = []
        self._modflow_uses_dates = True
        self._feature_type: str = ''
        self._all_xy_series: dict[int, XySeries] = {}
        self._cogrid = None
        self._ugrid = None  # UGrid of the cogrid, so we only get it once
        self._grid_info: GridInfo | None = None
        self._intersector: GridIntersect | None = None
        self._tops = []
        self._bottoms = []
        self._budget_text_by_ftype = {
            'CHD6': 'CHD',
            'DRN6': 'DRN',
            'EVT6': 'EVT',
            'GHB6': 'GHB',
            'LAK6': 'LAK',
            'MAW6': 'MAW',
            'RCH6': 'RCH',
            'RIV6': 'RIV',
            'SFR6': 'SFR',
            'UZF6': 'UZF-GWRCH',
            'WEL6': 'WEL'
        }

    def generate(self, dep_var_cov_data: ObsCovData, flow_cov_data: ObsCovData, nearest_n_points: int):
        """Generates the dependent variable (head, concentration, or temperature) and flow files.

        We first go through and gather the data we need for the files, then we use the data to write the files.

        Args:
            dep_var_cov_data: coverages and their att files.
            flow_cov_data: coverages and their att files.
            nearest_n_points: Nearest n points used for IDW interpolation.

        Returns:
            tuple[list[str], str]: List of files generated, and the error string (if any).
        """
        self._alias_count = 1
        self._alias_set = set()
        self._alias_list = []
        self._initialize_time_data()
        self._modflow_uses_dates = isinstance(self._start_date_time, datetime)
        self._generate_dep_var_data(dep_var_cov_data, nearest_n_points)
        self._generate_flow_data(flow_cov_data)
        if self._alias_list:
            self._write_b2map_file()
            self._write_samp_file_all_times(flow=False)
            # I don't think flows work like this:
            # self._write_samp_file_all_times(flow=True)
        return self._files, self._error_string_from_errors()

    def _initialize_time_data(self):
        """Gets the time stuff from TDIS."""
        tdis = self._package.model.mfsim.tdis
        self._period_count = tdis.get_period_count()
        # self._start_date_time be 0.0 if not using START_DATE_TIME, otherwise it will be a datetime object
        self._start_date_time = tdis.get_start_date_time()
        self._end_date_time = tdis.get_end_date_time()
        self._time_units = tdis.get_time_units()

    def _generate_dep_var_data(self, dep_var_cov_data: ObsCovData, nearest_n_points: int) -> list | None:
        """Generates the dependent variable (head, concentration, or temperature) files.

        We first go through and gather the data we need for the files, then we use the data to write the files.

        Args:
            dep_var_cov_data: dict of coverages and a list of their att files.
            nearest_n_points: Nearest n points used for IDW interpolation.

        Returns:
            (list): List of files generated.
        """
        if not dep_var_cov_data:
            return

        self._nearest_n_points = nearest_n_points
        self._init_grid()
        self._init_interp()

        # Gather data needed to write the files
        data_found = False
        for self._coverage, cov_info in dep_var_cov_data.items():
            # Iterate through attribute files (point, arc, etc)
            for file in cov_info.att_files:
                self._feature_type, _ = self._feature_type_and_ids(file, self._coverage)
                if self._feature_type != 'point':
                    continue  # pragma no cover - this shouldn't happen

                points = self._coverage.get_points(FilterLocation.PT_LOC_DISJOINT)
                self._all_xy_series = io_util.read_xy_series_file(
                    file, self._start_date_time, self._time_units, date_times_to_floats=False
                )
                column_names, transient_obs = self._get_data_from_table_definition(file, flow=False)
                if not column_names:
                    break

                if self._model_ftype == 'GWF6':
                    interval_column = OBS_TRANS_HEAD_INTERVAL if transient_obs else OBS_HEAD_INTERVAL
                elif self._model_ftype == 'GWT6':
                    interval_column = OBS_MF6_CONC_INTERVAL
                elif self._model_ftype == 'GWE6':
                    interval_column = OBS_MF6_TEMP_INTERVAL
                else:
                    raise ValueError(f'Model ftype "{self._model_ftype}" not supported for PEST observations.')

                # Read att file while iterating through points
                with open(file, 'r') as att_csv_file:
                    reader = csv.DictReader(att_csv_file, fieldnames=column_names)
                    # skip the header row
                    header = next(reader)  # noqa F841 local variable 'header' is assigned to but never used
                    for point, row in zip(points, reader):
                        if row[OBS_TYPE] != OBS_PT:
                            continue
                        layer = self._get_layer(row, [point.x, point.y, point.z])
                        if layer < 1 or layer > self._grid_info.nlay:
                            self._add_error_point_outside_grid(point.id, row[OBS_NAME])
                            continue

                        # The order here is important
                        if self._add_to_bsamp_and_bwt(point.id, row):
                            self._add_to_n2b(point, layer)
                            self._add_to_b2map(
                                self._alias_list[-1],
                                cov_info.tree_path,
                                self._coverage,
                                self._feature_type,
                                row[OBS_ID],
                                row[interval_column], [point.x, point.y, point.z],
                                flow=False
                            )
                            data_found = True

        # Write the files
        if data_found:
            self._write_blisting_file()
            self._write_samp_file(flow=False)
            self._write_weighted_file(flow=False)
            self._write_n2b_file()
        return self._files

    def _generate_flow_data(self, flow_cov_data: ObsCovData) -> list | None:
        """Generates the flow files.

        We first go through and gather the data we need for the files, then we use the data to write the files.

        Args:
            flow_cov_data: List of tuples of coverages and their att files.

        Returns:
            (list): List of files generated.
        """
        if not flow_cov_data:
            return

        self._object_cellgrps = self.read_cellgrp_info(self._package.model)

        # Gather data needed to write the files
        data_found = False
        for self._coverage, cov_info in flow_cov_data.items():
            # Iterate through attribute files (point, arc, etc)
            for file in cov_info.att_files:
                self._feature_type, feature_ids = self._feature_type_and_ids(file, self._coverage)
                if not feature_ids:
                    continue

                arc_groups = self._coverage.arc_groups if self._feature_type == 'arc_group' else None
                self._all_xy_series = io_util.read_xy_series_file(
                    file, self._start_date_time, self._time_units, date_times_to_floats=False
                )
                column_names, transient_obs = self._get_data_from_table_definition(file, flow=True)
                if not column_names:
                    break

                # Read att file
                with open(file, 'r') as att_csv_file:
                    reader = csv.DictReader(att_csv_file, fieldnames=column_names)
                    # skip the header row
                    header = next(reader)  # noqa F841 local variable 'header' is assigned to but never used
                    for feature_id, row in zip(feature_ids, reader):
                        if not int(row[OBS_FLOW]):
                            continue
                        if self._add_to_flow_data(
                            cov_info.tree_path, self._coverage.uuid, feature_id, row, arc_groups, transient_obs
                        ):
                            data_found = True

        # Write the files
        if data_found:
            self._write_samp_file(flow=True)
            self._write_weighted_file(flow=True)
            self._write_b2b_file()
        return self._files

    def _get_data_from_table_definition(self, file, flow: bool):
        # Examine table definition
        table_def = map_util.read_table_definition_file(file)
        if not flow:
            transient_obs = _transient_dep_var_obs(table_def, self._model_ftype)
        else:
            transient_obs = _transient_flow_obs(table_def)
        if not self._temporal_atts_match_tdis(transient_obs, flow=flow):
            return None, None
        column_names = [column['name'] for column in table_def['columns']]
        return column_names, transient_obs

    def _temporal_atts_match_tdis(self, transient_obs: bool, flow: bool):
        """Returns True if TDIS is steady state and obs atts are steady state, or transient and transient."""
        if self._period_count > 1 and not transient_obs:
            if not flow:
                self._add_error_head_must_use_transient()
            else:
                self._add_error_flow_must_use_transient()
            return False
        if self._period_count == 1 and transient_obs:
            if not flow:
                self._add_error_head_must_not_use_transient()
            else:
                self._add_error_flow_must_not_use_transient()
            return False
        return True

    def _feature_type_and_ids(self, att_file, coverage):
        """Returns the type of features in the attribute table file and the list of feature ids."""
        if os.path.basename(att_file).startswith('points'):
            points = coverage.get_points(FilterLocation.PT_LOC_DISJOINT)
            return 'point', [point.id for point in points]
        elif os.path.basename(att_file).startswith('arcs'):
            return 'arc', [arc.id for arc in coverage.arcs]
        elif os.path.basename(att_file).startswith('arc_groups'):
            return 'arc_group', list(coverage.arc_groups.keys())
        elif os.path.basename(att_file).startswith('polys'):
            return 'polygon', [polygon.id for polygon in coverage.polygons]
        return ''  # pragma no cover - should never happen

    @staticmethod
    def read_cellgrp_info(model):
        """Reads the CELLGRP info from the package settings.json files.

        Args:
            model: The GwfModel.

        Returns:
            (dict):
        """
        object_cellgrps = {}
        for package in model.packages:
            settings = Settings.read_settings(package.filename)
            if not settings:
                continue
            cellgrp_dict = settings.get('CELLGRP')
            cellgrp_geom_dict = settings.get('CELLGRP_GEOM')
            if not cellgrp_dict or not cellgrp_geom_dict:
                continue

            for map_id, cellgrp in cellgrp_dict.items():
                coverage_uuid, feature_type, feature_id = map_id_tuple_from_map_id(map_id)
                if coverage_uuid not in object_cellgrps:
                    object_cellgrps[coverage_uuid] = {}
                if feature_type not in object_cellgrps[coverage_uuid]:
                    object_cellgrps[coverage_uuid][feature_type] = {}
                geom = cellgrp_geom_dict[map_id]
                object_cellgrps[coverage_uuid][feature_type][feature_id] = (
                    package.ftype, package.filename, cellgrp, geom
                )
        return object_cellgrps

    def _add_to_xsamp_and_xwt(
        self, transient: bool, ss_col, trans_col, ss_std_dev_col, trans_std_dev_col, well: bool, dep_vars_or_flows,
        feature_id: int, row
    ) -> bool:
        """Adds data for the .bsamp and .bwt files, or .fsamp and .fwt files."""
        if transient:
            weight = self._weight_from_std_dev(row, trans_std_dev_col)
            return self._add_from_xy_series(feature_id, row, dep_vars_or_flows, trans_col, weight, well=well, alias='')
        else:
            weight = self._weight_from_std_dev(row, ss_std_dev_col)
            alias = self._get_alias_and_append_if_new(row[OBS_NAME], '', well=well)
            dep_vars_or_flows.append((alias, PEST_ZERO_DATE_TIME, row[ss_col], weight))
            return True

    def _add_to_bsamp_and_bwt(self, feature_id: int, row) -> bool:
        """Adds data for the .bsamp and .bwt files."""
        if self._model_ftype == 'GWF6':
            return self._add_to_xsamp_and_xwt(
                OBS_TRANS_HEAD in row, OBS_HEAD, OBS_TRANS_HEAD, OBS_HEAD_STD_DEV, OBS_TRANS_HEAD_STD_DEV, True,
                self._dep_var, feature_id, row
            )
        elif self._model_ftype == 'GWT6':
            return self._add_to_xsamp_and_xwt(
                OBS_MF6_CONC in row, '', OBS_MF6_CONC, '', OBS_MF6_CONC_STD_DEV, True, self._dep_var, feature_id, row
            )
        elif self._model_ftype == 'GWE6':
            return self._add_to_xsamp_and_xwt(
                OBS_MF6_TEMP in row, '', OBS_MF6_TEMP, '', OBS_MF6_TEMP_STD_DEV, True, self._dep_var, feature_id, row
            )
        else:
            raise ValueError(f'Model ftype "{self._model_ftype}" not supported for PEST observations.')

    def _add_to_fsamp_and_fwt(self, feature_id: int, row, transient_obs) -> bool:
        """Adds data for the .fsamp and .fwt files."""
        return self._add_to_xsamp_and_xwt(
            transient_obs, OBS_FLOW_RATE, OBS_FLOW_RATE, OBS_FLOW_STD_DEV, OBS_FLOW_STD_DEV, False, self._flow,
            feature_id, row
        )

    def _get_alias_and_append_if_new(self, name, alias, well):
        if not alias:
            alias = self._make_alias(name, well=well, str_set=self._alias_set)
            self._alias_list.append(alias)
        return alias

    def _add_from_xy_series(self, feature_id, row, row_list, trans_column, weight, well, alias):
        """Add lines for xy series rows.

        Returns:
            (bool): True if successful (although there may still be errors).
        """
        xy_series_id = int(row[trans_column])
        if xy_series_id not in self._all_xy_series:
            self._add_error_series_not_found(feature_id, row[OBS_NAME])
            return False
        else:
            xy_series = self._all_xy_series[xy_series_id]
            found = False
            for xy_time, xy_value in zip(xy_series.x, xy_series.y):
                if not self._compatible_time_format(xy_time, feature_id, row[OBS_NAME]):
                    break
                if xy_time < self._start_date_time or xy_time > self._end_date_time:
                    self._add_error_time_outside_of_range(feature_id, row[OBS_NAME], xy_time)
                else:
                    found = True
                    alias = self._get_alias_and_append_if_new(row[OBS_NAME], alias, well=well)
                    xy_time = self._ensure_date_time(xy_time)
                    row_list.append((alias, xy_time.strftime(PEST_STRFTIME), xy_value, weight))
            return found

    def _compatible_time_format(self, xy_time, feature_id, name):
        """Returns True if the xy series times are consistent with MODFLOW (datetimes vs. floats)."""
        if self._modflow_uses_dates and not isinstance(xy_time, datetime):
            self._add_error_curves_must_use_dates(feature_id, name)
            return False
        elif not self._modflow_uses_dates and isinstance(xy_time, datetime):
            self._add_error_curves_must_not_use_dates(feature_id, name)
            return False
        return True

    def _ensure_date_time(self, xy_time):
        """Converts the float time to a datetime if necessary."""
        if isinstance(xy_time, datetime):
            return xy_time
        start = datetime.strptime(PEST_ZERO_DATE_TIME, PEST_STRFTIME)
        return time_util.compute_end_date_py(start, xy_time, self._time_units)

    def _weight_from_std_dev(self, row, column_name):
        """Returns 1 / std_dev."""
        weight = None
        std_dev = float(row[column_name])
        if std_dev != 0.0:
            weight = 1 / std_dev
        return weight

    def _add_error_head_must_use_transient(self):
        self._errors.append({'error': ErrorEnum.HEAD_MUST_USE_TRANSIENT, 'coverage': self._coverage.name})

    def _add_error_head_must_not_use_transient(self):
        self._errors.append({'error': ErrorEnum.HEAD_MUST_NOT_USE_TRANSIENT, 'coverage': self._coverage.name})

    def _add_error_flow_must_use_transient(self):
        self._errors.append({'error': ErrorEnum.FLOW_MUST_USE_TRANSIENT, 'coverage': self._coverage.name})

    def _add_error_flow_must_not_use_transient(self):
        self._errors.append({'error': ErrorEnum.FLOW_MUST_NOT_USE_TRANSIENT, 'coverage': self._coverage.name})

    def _add_error_time_outside_of_range(self, feature_id, name, xy_date_time):
        self._errors.append(
            {
                'error': ErrorEnum.TIME_OUTSIDE_RANGE,
                'coverage': self._coverage.name,
                'type': self._feature_type,
                'id': feature_id,
                'name': name,
                'time': xy_date_time
            }
        )

    def _add_error_series_not_found(self, feature_id, name):
        self._errors.append(
            {
                'error': ErrorEnum.SERIES_NOT_FOUND,
                'coverage': self._coverage.name,
                'type': self._feature_type,
                'id': feature_id,
                'name': name
            }
        )

    def _add_error_no_cellgrp(self, feature_id, name):
        self._errors.append(
            {
                'error': ErrorEnum.NO_CELLGRP,
                'coverage': self._coverage.name,
                'type': self._feature_type,
                'id': feature_id,
                'name': name
            }
        )

    def _add_error_no_bcs_for_cellgrp(self, feature_id, name):
        self._errors.append(
            {
                'error': ErrorEnum.NO_BCS_FOR_CELLGRP,
                'coverage': self._coverage.name,
                'type': self._feature_type,
                'id': feature_id,
                'name': name
            }
        )

    def _add_error_curves_must_use_dates(self, feature_id, name):
        self._errors.append(
            {
                'error': ErrorEnum.CURVES_MUST_USE_DATES,
                'coverage': self._coverage.name,
                'type': self._feature_type,
                'id': feature_id,
                'name': name
            }
        )

    def _add_error_curves_must_not_use_dates(self, feature_id, name):
        self._errors.append(
            {
                'error': ErrorEnum.CURVES_MUST_NOT_USE_DATES,
                'coverage': self._coverage.name,
                'type': self._feature_type,
                'id': feature_id,
                'name': name
            }
        )

    def _add_error_point_outside_grid(self, feature_id, name):
        self._errors.append(
            {
                'error': ErrorEnum.POINT_OUTSIDE_GRID,
                'coverage': self._coverage.name,
                'type': self._feature_type,
                'id': feature_id,
                'name': name
            }
        )

    def _error_identifying_string(self, error):
        """Returns the error string with identifying info about the error."""
        string = ''
        if 'coverage' in error:
            string += f'Coverage: {error["coverage"]}; '
        if 'type' in error:
            string += f'Feature type: {error["type"]}; '
        if 'id' in error:
            string += f'Id: {error["id"]}; '
        if 'name' in error:
            string += f'Name: {error["name"]}; '
        if 'time' in error:
            string += f'Time: {error["time"]}; '
        return string

    def _error_string_from_errors(self):
        """Combines the list of errors into one string."""
        if not self._errors:
            return ''

        error_str = 'Errors encountered:\n\n'
        for error in self._errors:
            if error['error'] == ErrorEnum.HEAD_MUST_USE_TRANSIENT:
                error_str += (
                    'The following coverage has "Head" observations. The '
                    'current MODFLOW simulation is transient and only coverages with '
                    '"Trans. Head" observations will be used.\n'
                )
                error_str += self._error_identifying_string(error)
            elif error['error'] == ErrorEnum.HEAD_MUST_NOT_USE_TRANSIENT:
                error_str += (
                    'The following coverage has "Trans. Head" observations. The '
                    'current MODFLOW simulation is steady state and only coverages with '
                    '"Head" observations will be used.\n'
                )
                error_str += self._error_identifying_string(error)
            elif error['error'] == ErrorEnum.FLOW_MUST_USE_TRANSIENT:
                error_str += (
                    'The following coverage has "Observed Flow" observations. The '
                    'current MODFLOW simulation is transient and only coverages with '
                    '"Trans. Observed Flow" observations will be used.\n'
                )
                error_str += self._error_identifying_string(error)
            elif error['error'] == ErrorEnum.FLOW_MUST_NOT_USE_TRANSIENT:
                error_str += (
                    'The following coverage has "Trans. Observed Flow" observations. The '
                    'current MODFLOW simulation is steady state and only coverages with '
                    '"Observed Flow" observations will be used.\n'
                )
                error_str += self._error_identifying_string(error)
            elif error['error'] == ErrorEnum.SERIES_NOT_FOUND:
                error_str += 'The time series for the following feature object was not found:\n'
                error_str += self._error_identifying_string(error)
            elif error['error'] == ErrorEnum.TIME_OUTSIDE_RANGE:
                error_str += 'Observation time outside of MODFLOW output time range.\n'
                error_str += self._error_identifying_string(error)
            elif error['error'] == ErrorEnum.NO_CELLGRP:
                error_str += (
                    'CELLGRP data for the following flow observation was not found. You may need to'
                    ' execute "Map from Coverage".\n'
                )
                error_str += self._error_identifying_string(error)
            elif error['error'] == ErrorEnum.NO_BCS_FOR_CELLGRP:
                error_str += (
                    'Boundary conditions for the following flow observation was not found. You may need to'
                    ' execute "Map from Coverage".\n'
                )
                error_str += self._error_identifying_string(error)
            elif error['error'] == ErrorEnum.CURVES_MUST_USE_DATES:
                error_str += 'MODFLOW is using dates so the observation time series curves must also use dates.\n'
                error_str += self._error_identifying_string(error)
            elif error['error'] == ErrorEnum.CURVES_MUST_NOT_USE_DATES:
                error_str += 'MODFLOW is not using dates so the observation time series curves must not use dates.\n'
                error_str += self._error_identifying_string(error)
            elif error['error'] == ErrorEnum.POINT_OUTSIDE_GRID:
                error_str += (
                    'The point is outside the model grid and was skipped.'
                    '\nCheck the "3D Grid layer option for obs. pts." option in the Coverage Setup dialog.'
                    '\nAlso, check that any well screens intersect the model grid.\n'
                )
                error_str += self._error_identifying_string(error)

            error_str += '\n\n'
        return error_str

    def _add_to_n2b(self, point, layer):
        """Adds data for the .n2b file."""
        ALMOST_ZERO = 1e-7  # noqa: N806 (should be lowercase)
        idxs_and_weights = self._idw_list[layer - 1].interpolate_weights((point.x, point.y, point.z))
        cellids = []
        weights = []
        cells_per_layer = self._grid_info.cells_per_layer()
        for idx, weight in zip(idxs_and_weights[0], idxs_and_weights[1]):
            if abs(weight) < ALMOST_ZERO:
                continue
            cellid = grid_info.cell_index_from_lay_cell2d(layer, idx + 1, cells_per_layer, one_based=True) + 1
            cellids.append(cellid)
            weights.append(weight)
        self._cellids.append(cellids)
        self._interpolation_weights.append(weights)

    def _cellgrp_info_found(self, coverage_uuid, feature_type, feature_id):
        """Returns True if we have cellgrp data for this feature."""
        return (
            coverage_uuid in self._object_cellgrps and feature_type in self._object_cellgrps[coverage_uuid]  # noqa W503
            and feature_id in self._object_cellgrps[coverage_uuid][feature_type]  # noqa W503
        )  # noqa W503

    def _add_to_flow_data(
        self, cov_tree_path: str, coverage_uuid: str, feature_id: int, row, arc_groups, transient_obs: bool
    ) -> bool:
        """Adds data for the flow files (.fsamp, .fwt, .b2b)."""
        rv = False
        if self._feature_type != 'arc_group':
            if not self._cellgrp_info_found(coverage_uuid, self._feature_type, feature_id):
                self._add_error_no_cellgrp(feature_id, row[OBS_NAME])
                return False

            ftype, file_name, cellgrp, geom = self._object_cellgrps[coverage_uuid][self._feature_type][feature_id]
            if not self._bcs_with_cellgrp_exist(file_name, cellgrp):
                self._add_error_no_bcs_for_cellgrp(feature_id, row[OBS_NAME])
                return False

            if self._add_to_fsamp_and_fwt(feature_id, row, transient_obs):
                flow_alias = self._alias_list[-1]
                budget_text = self._budget_text_by_ftype[ftype]
                b2b_row = f'{flow_alias} {budget_text} AUX CELLGRP {cellgrp}'
                self._flows_b2b.append(b2b_row)
                self._add_to_b2map(
                    flow_alias,
                    cov_tree_path,
                    self._coverage,
                    self._feature_type,
                    row[OBS_ID],
                    row[OBS_FLOW_INTERVAL],
                    geom,
                    flow=True
                )
                rv = True
        else:  # arc_group
            if self._add_to_fsamp_and_fwt(feature_id, row, transient_obs):
                rv = True
                arc_group_alias = self._alias_list[-1]

                # Add the arcs in the arc group
                arc_group_geom = []
                found_arcs = 0
                arc_aliases = []
                for arc_id in arc_groups[feature_id]:
                    if not self._cellgrp_info_found(coverage_uuid, 'arc', arc_id):
                        self._add_error_no_cellgrp(arc_id, name='')
                        continue

                    if (
                        'arc' in self._object_cellgrps[coverage_uuid] and  # noqa W504
                        arc_id in self._object_cellgrps[coverage_uuid]['arc']
                    ):  # noqa W503
                        found_arcs += 1
                        ftype, file_name, cellgrp, geom = self._object_cellgrps[coverage_uuid]['arc'][arc_id]
                        arc_group_geom.append(geom)
                        if not self._bcs_with_cellgrp_exist(file_name, cellgrp):
                            arc_name = '<unknown>'  # TODO: Get the arc name. It's in a different att file.
                            self._add_error_no_bcs_for_cellgrp(feature_id, arc_name)
                            # continue  Don't think this is a showstopper. Include the arc and the error message.

                        arc_alias = self._get_alias_and_append_if_new('FLOW', '', well=False)
                        arc_aliases.append(arc_alias)
                        budget_text = self._budget_text_by_ftype[ftype]
                        b2b_row = f'{arc_alias} {budget_text} AUX CELLGRP {cellgrp}'
                        self._flows_b2b.append(b2b_row)
                        self._add_to_b2map(
                            arc_alias, cov_tree_path, self._coverage, 'arc', arc_id, '0.0', geom, flow=True
                        )

                # Add the arc group as a COMPOUND
                b2b_row = f'{arc_group_alias} COMPOUND {found_arcs} {" ".join(arc_aliases)}'
                self._flows_b2b.append(b2b_row)
                self._add_to_b2map(
                    arc_group_alias,
                    cov_tree_path,
                    self._coverage,
                    self._feature_type,
                    row[OBS_ID],
                    row[OBS_FLOW_INTERVAL],
                    arc_group_geom,
                    flow=True
                )
        return rv

    def _add_to_b2map(
        self, alias: str, cov_tree_path: str, coverage: Coverage, feature_type: str, feature_id: int, interval: str,
        geom, flow: bool
    ):
        """Adds data for the .b2map file."""
        obs_type = 'dep_var' if not flow else 'flow'
        self._b2map_data[alias] = {
            'map_id': make_map_id(coverage.uuid, feature_type, feature_id),
            'coverage_tree_path': cov_tree_path,
            'interval': float(interval),
            'geometry': geom,
            'obs_type': obs_type
        }

    def _bcs_with_cellgrp_exist(self, file_name, cellgrp):
        """Looks to see if there are boundary conditions with the given cellgrp and reports an error if not."""
        package = self._package.model.package_from_filename(file_name)
        return package.bcs_with_cellgrp_exist(cellgrp)

    def _get_layer(self, row, loc: Sequence[float]) -> int:
        """Returns the layer (1-based) the obs point is in."""
        z = loc[2]
        if OBS_LAYER in row:
            return int(row[OBS_LAYER])
        elif OBS_TOP_SCR in row and OBS_BOT_SCR in row:
            z = (float(row[OBS_TOP_SCR]) + float(row[OBS_BOT_SCR])) / 2.0

        # Find layer by z

        if not self._intersector:
            self._init_intersector()

        # Find 2D cell containing point
        point = Point(loc[0], loc[1])
        rec = self._intersector.intersect(point)
        if rec is None or rec.size == 0:
            return -1
        cellids = rec.cellids.tolist()
        mfcellid = self._grid_info.fix_cellid(cellids[0], 1)
        cellid_2d = self._grid_info.cell_index_from_modflow_cellid(mfcellid) + 1

        # Find 3D cell by iterating through layers
        cells_per_layer = self._grid_info.cells_per_layer()
        for layer_idx in range(self._grid_info.nlay):
            cellid = grid_info.cell_index_from_lay_cell2d(layer_idx + 1, cellid_2d, cells_per_layer, True) + 1
            top = self._tops[cellid - 1]
            bottom = self._bottoms[cellid - 1]
            if top >= z >= bottom:
                return layer_idx + 1
        return -1

    def _init_intersector(self) -> None:
        """Initializes everything we will need to find a cell by the xyz location."""
        dis = self._package.model.get_dis()
        _fgrid, self._intersector = grid_intersector.create_flopy_grid_and_intersector(dis, self._cogrid, self._ugrid)
        self._tops = self._cogrid.get_cell_tops()
        self._bottoms = self._cogrid.get_cell_bottoms()

    def _write_samp_file(self, flow: bool):
        """Writes the .bsamp and .fsamp files.

        This file contains observed data at discrete times: alias, starting date/time, observed value.
        """
        extension = '.bsamp' if not flow else '.fsamp'
        dep_vars_or_flows = self._dep_var if not flow else self._flow

        # Handle file name
        filename = os.path.splitext(self._package.filename)[0] + extension
        self._files.append(filename)
        # Write file
        with open(filename, 'w') as file:
            for row in dep_vars_or_flows:
                file.write(f'{row[0]} {row[1]} {row[2]}\n')

    def _write_weighted_file(self, flow: bool):
        """Writes the .bwt and .fwt files.

        This file contains observed data at discrete times: alias, starting date/time, weighted observed value.
        """
        extension = '.bwt' if not flow else '.fwt'
        dep_vars_or_flows = self._dep_var if not flow else self._flow

        # Handle file name
        filename = os.path.splitext(self._package.filename)[0] + extension
        # self._files.append(filename)  These files are only for GMS.
        # Write file
        with open(filename, 'w') as file:
            for row in dep_vars_or_flows:
                file.write(f'{row[0]} {row[1]} {row[3]}\n')

    def _write_n2b_file(self):
        """Writes the .n2b file.

        This file has the aliases and the list of interpolation points and weights.
        """
        filename = os.path.splitext(self._package.filename)[0] + '.n2b'
        self._files.append(filename)
        with open(filename, 'w') as file:
            for row in range(len(self._alias_list)):
                string = ''
                n = len(self._cellids[row])
                for id_, weight in zip(self._cellids[row], self._interpolation_weights[row]):
                    string += f' {id_} {weight}'
                line = f'{self._alias_list[row]} {n}{string}'
                file.write(f'{line}\n')

    def _write_blisting_file(self):
        """Writes the .blisting file.

        This file just lists the aliases.
        """
        filename = os.path.splitext(self._package.filename)[0] + '.blisting'
        self._files.append(filename)
        with open(filename, 'w') as file:
            for alias in self._alias_list:
                file.write(f'{alias}\n')

    def _write_b2map_file(self):
        """Writes the .b2map file.

        This file is only used by GMS and relates the aliases to the map ids.
        """
        filename = os.path.splitext(self._package.filename)[0] + '.b2map'
        file_io_util.write_json_file(self._b2map_data, filename)

    def _write_b2b_file(self):
        """Writes the .b2b file containing info on how to obtain flow values from the budget file."""
        filename = os.path.splitext(self._package.filename)[0] + '.b2b'
        self._files.append(filename)
        with open(filename, 'w') as file:
            for b2b_row in self._flows_b2b:
                file.write(b2b_row)
                file.write('\n')

    def _write_samp_file_all_times(self, flow: bool):
        """Writes a file containing the times of all the time steps that have output according to the OC package.

        Args:
            flow: True if doing flow obs.

        This is needed by GMS for flow observations.
        """
        if not self._alias_list:
            return  # pragma no cover - should never happen

        output_times = get_output_times(self._package.model)
        if not output_times:
            return  # pragma no cover - should never happen
        extension = '.mftimes-bsamp' if not flow else '.mftimes-fsamp'
        filename = os.path.splitext(self._package.filename)[0] + extension
        with open(filename, 'w') as file:
            # for alias in self._alias_list:
            for alias, data in self._b2map_data.items():
                dep_var_ok = (not flow and data['obs_type'] == 'dep_var')
                flow_ok = (flow and data['obs_type'] == 'flow')
                if dep_var_ok or flow_ok:
                    for _period, timesteps in output_times.items():
                        for _, date_time in timesteps.items():
                            file.write(f'{alias} {date_time.strftime(PEST_STRFTIME)} 0.0\n')  # mm/dd/yyyy

    def _make_alias(self, name, well, str_set):
        """Returns the alias."""
        return make_alias(name, well, str_set, self._alias_count)

    def _init_grid(self):
        """Reads the grid and initializes some grid variables."""
        self._cogrid = self._package.model.get_cogrid()
        self._ugrid = self._cogrid.ugrid if self._cogrid else None
        self._grid_info = self._package.grid_info()  # Using model grid_info assumes grid still matches DIS

    def _init_interp(self):
        """Initializes self._idw_list and a few other variables we use later."""
        self._idw_list.clear()
        cell_centers = dis_builder.get_cell_centers2d(self._cogrid, self._ugrid)
        cells_per_layer = self._grid_info.cells_per_layer()
        for layer in range(self._grid_info.nlay):
            cell_centers_layer = cell_centers[layer * cells_per_layer:(layer + 1) * cells_per_layer]
            idw = InterpIdw(points=cell_centers_layer)
            idw.scalars = [0.0] * cells_per_layer
            idw.set_search_options(nearest_point=self._nearest_n_points + 1, quadrant_oct_search=False)
            self._idw_list.append(idw)


def get_output_times(model):
    """Returns a list of all output times based on the TDIS and OC packages.

    Args:
        model: The GWF or GWT model object.
    """
    tdis = model.mfsim.tdis
    oc_list = model.packages_from_ftype('OC6')
    if not oc_list:
        return oc_data.output_times(tdis.period_df, tdis.get_start_date_time(), tdis.get_time_units(), None, None)
    else:
        oc = oc_list[0]
        return oc.output_times(tdis)


def make_map_id(coverage_uuid: str, feature_type: str, feature_id: int) -> str:
    """Returns the make_map_id for use with CELLGRP: coverage uuid, feature type, feature ID.

    Map id is a string consisting of the coverage uuid, the feature type (point, arc, polygon), and the feature id.

    Args:
        coverage_uuid (str): Coverage uuid.
        feature_type (str): Feature type ('point', 'arc', or 'polygon')
        feature_id (int): ID of the feature.

    Returns:
        (str): See description.
    """
    return f'{coverage_uuid}-{feature_type}-{feature_id}'


def make_alias(name, well, str_set, count):
    """Returns the alias given the name, feature id, and feature type.

    If the name is a valid alias, or can become one, uses it. Otherwise creates an alias from scratch. See
    iCreateAlias() in GMS.

    Args:
        name (str): Name of the feature.
        well (bool): True if a well, false if flow.
        str_set (set[str]): Set of aliases already used.
        count (int): Count used to make unique names.

    Returns:
        (str): The alias
    """
    if not name:
        name = 'well' if well else 'flow'
    name = name.replace(' ', '_')
    name = name.replace(',', '_')
    MAX_LENGTH = 10  # noqa: N806 (should be lowercase)
    name = name[0:MAX_LENGTH]
    name = name.replace("'", '')
    # Use upper case in the set so aliases can't differ only by case
    upper = name.upper()
    if upper not in str_set:
        str_set.add(upper)
        # return name
        return upper

    while upper in str_set:
        if well:
            upper = 'W{:>09d}'.format(count)
        else:
            upper = 'F{:>09d}'.format(count)
        count += 1
    name = upper
    str_set.add(name)
    return name


def coverage_uuid_from_map_id(map_id):
    """Returns the coverage uuid given a map_id.

    Args:
        map_id (str): The map_id.

    Returns:
        (str): See description.
    """
    return map_id[:36]  # coverage uuid is the first 36 characters of the map_id


def feature_type_from_map_id(map_id):
    """Returns the feature object type (arc, point, polygon, arc_group) given a map_id.

    Args:
        map_id (str): The map_id.

    Returns:
        (str): See description.
    """
    last_dash = map_id.rfind('-')  # Find the last '-'
    return map_id[37:last_dash]  # coverage uuid is the first 36 characters of the map_id


def feature_id_from_map_id(map_id):
    """Returns the feature object id given a map_id.

    Args:
        map_id (str): The map_id.

    Returns:
        (int): See description.
    """
    last_dash = map_id.rfind('-')  # Find the last '-'
    return int(map_id[last_dash + 1:])  # Convert the number after the last dash to an int


def map_id_tuple_from_map_id(map_id):
    """Returns a tuple of the coverage uuid, the feature type, and the feature id.

    Args:
        map_id (str): The map_id.

    Returns:
        (tuple): See description.
    """
    coverage_uuid = map_id[:36]  # coverage uuid is the first 36 characters of the map_id
    last_dash = map_id.rfind('-')  # Find the last '-'
    feature_type = map_id[37:last_dash]  # coverage uuid is the first 36 characters of the map_id
    feature_id = int(map_id[last_dash + 1:])  # Convert the number after the last dash to an int
    return coverage_uuid, feature_type, feature_id


def _transient_dep_var_obs(table_def, model_ftype: str) -> bool:
    """Returns True if attribute file indicates transient dependent variable obs (head, concentration, or temperature).

    Args:
        table_def:
        model_ftype: 'GWF6', 'GWT6', 'GWE6'.

    Returns:
        (bool): True if transient, False if not
    """
    if model_ftype == 'GWF6':
        name = OBS_TRANS_HEAD
    elif model_ftype == 'GWT6':
        name = OBS_MF6_CONC
    elif model_ftype == 'GWE6':
        name = OBS_MF6_TEMP
    else:
        raise ValueError(f'Model ftype "{model_ftype}" not supported for PEST observations.')

    for column in table_def['columns']:
        if column['name'] == name:
            return True
    return False


def _transient_flow_obs(table_def):
    """Returns True if the attribute file indicates transient flow obs.

    The 'Obs. flow rate' will be an int for the XY series ID instead of a double.

    Returns:
        (bool): True if transient, False if not, (None if error)
    """
    for column in table_def['columns']:
        if column['name'] == OBS_FLOW_RATE:
            if column['type'] == 'int':
                return True
            else:
                return False
