From ebba06b2caa379425ec7456a53f513252385ff64 Mon Sep 17 00:00:00 2001 From: Bryon Lewis Date: Mon, 17 Mar 2025 14:22:37 -0400 Subject: [PATCH 01/10] initial configuration models, api --- client/src/api/UVDATApi.ts | 22 +++++ client/src/types.ts | 6 ++ docker-compose.override.yml | 9 -- docker-compose.prod.yml | 9 -- .../migrations/0004_displayconfiguration.py | 22 +++++ uvdat/core/models/__init__.py | 2 + uvdat/core/models/display_configuration.py | 34 ++++++++ uvdat/core/rest/__init__.py | 2 + uvdat/core/rest/display_configuration.py | 82 +++++++++++++++++++ uvdat/urls.py | 2 + 10 files changed, 172 insertions(+), 18 deletions(-) create mode 100644 uvdat/core/migrations/0004_displayconfiguration.py create mode 100644 uvdat/core/models/display_configuration.py create mode 100644 uvdat/core/rest/display_configuration.py diff --git a/client/src/api/UVDATApi.ts b/client/src/api/UVDATApi.ts index b1797af..fdce29d 100644 --- a/client/src/api/UVDATApi.ts +++ b/client/src/api/UVDATApi.ts @@ -8,6 +8,7 @@ import { ContextWithIds, Dataset, DerivedRegion, + DisplayConfiguration, FeatureGraphData, FileItem, LayerCollection, @@ -590,4 +591,25 @@ export default class UVdatApi { public static async searchVectorFeatures(requestData: SearchableVectorDataRequest): Promise { return (await UVdatApi.apiClient.post('/map-layers/search-features/', requestData)).data; } + + public static async getDisplayConfiguration(): Promise { + const response = await UVdatApi.apiClient.get('display_configuration/'); + return response.data; + } + + // Fully update the display configuration (PUT /display_configuration/) + public static async updateDisplayConfiguration( + config: DisplayConfiguration, + ): Promise { + const response = await UVdatApi.apiClient.put('display_configuration/', config); + return response.data; + } + + // Partially update the display configuration (PATCH /display_configuration/) + public static async partialUpdateDisplayConfiguration( + config: Partial, + ): Promise { + const response = await UVdatApi.apiClient.patch('display_configuration/', config); + return response.data; + } } diff --git a/client/src/types.ts b/client/src/types.ts index 8cfa31d..99bf571 100644 --- a/client/src/types.ts +++ b/client/src/types.ts @@ -901,3 +901,9 @@ export interface SearchableVectorFeatureResponse { details: { key: string; value: string }[]; center: { lat: number, lon: number }; } + +export interface DisplayConfiguration { + enabled_ui: string[]; + default_tab: string; + default_displayed_layers: Array<{ type: string; [key: string]: string }>; +} diff --git a/docker-compose.override.yml b/docker-compose.override.yml index 0c38a91..a13b5e3 100644 --- a/docker-compose.override.yml +++ b/docker-compose.override.yml @@ -43,15 +43,6 @@ services: - minio platform: linux/amd64 - gen-vector-basemap: - build: - context: ./docker/tilemaker - dockerfile: Dockerfile - restart: no - volumes: - - vector-workdir:/work - - ./client/public/vectortiles:/data - docs: image: minidocks/mkdocs working_dir: /docs diff --git a/docker-compose.prod.yml b/docker-compose.prod.yml index 94665c7..a236dde 100644 --- a/docker-compose.prod.yml +++ b/docker-compose.prod.yml @@ -63,15 +63,6 @@ services: - minio platform: linux/amd64 - gen-vector-basemap: - build: - context: ./docker/tilemaker - dockerfile: Dockerfile - restart: no - volumes: - - vector-workdir:/work - - ./client/public/vectortiles:/data - web: build: context: . diff --git a/uvdat/core/migrations/0004_displayconfiguration.py b/uvdat/core/migrations/0004_displayconfiguration.py new file mode 100644 index 0000000..3e45aad --- /dev/null +++ b/uvdat/core/migrations/0004_displayconfiguration.py @@ -0,0 +1,22 @@ +# Generated by Django 5.0.7 on 2025-03-17 18:02 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('core', '0003_netcdfdata_bounds_rastermaplayer_bounds_and_more'), + ] + + operations = [ + migrations.CreateModel( + name='DisplayConfiguration', + fields=[ + ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('enabled_ui', models.JSONField(default=list, help_text="List of enabled UI elements: 'Collections', 'Datasets', 'Metadata'.")), + ('default_tab', models.CharField(choices=[('Collections', 'Collections'), ('Datasets', 'Datasets'), ('Metadata', 'Metadata')], help_text='The default tab must be one of the enabled features.', max_length=256)), + ('default_displayed_layers', models.JSONField(default=list, help_text="List of map_layers enabled: [{type: 'netcdf', id: 1}. {type: 'vector', id: 3}, {type: 'raster', id: 4}]")), + ], + ), + ] diff --git a/uvdat/core/models/__init__.py b/uvdat/core/models/__init__.py index ec9d257..a254929 100644 --- a/uvdat/core/models/__init__.py +++ b/uvdat/core/models/__init__.py @@ -1,6 +1,7 @@ from .chart import Chart from .context import Context from .dataset import Dataset +from .display_configuration import DisplayConfiguration from .file_item import FileItem from .layer_collection import LayerCollection from .layer_representation import LayerRepresentation @@ -34,4 +35,5 @@ ProcessingTask, VectorFeatureTableData, VectorFeatureRowData, + DisplayConfiguration, ] diff --git a/uvdat/core/models/display_configuration.py b/uvdat/core/models/display_configuration.py new file mode 100644 index 0000000..f5a06bc --- /dev/null +++ b/uvdat/core/models/display_configuration.py @@ -0,0 +1,34 @@ +from django.db import models + + +class DisplayConfiguration(models.Model): + ENABLED_FEATURES_CHOICES = [ + ('Collections', 'Collections'), + ('Datasets', 'Datasets'), + ('Metadata', 'Metadata'), + ] + + enabled_ui = models.JSONField( + default=list, + help_text="List of enabled UI elements: 'Collections', 'Datasets', 'Metadata'.", + ) + + default_tab = models.CharField( + max_length=256, + choices=ENABLED_FEATURES_CHOICES, + help_text='The default tab must be one of the enabled features.', + ) + + default_displayed_layers = models.JSONField( + default=list, + help_text="List of map_layers enabled: [{type: 'netcdf', id: 1}. {type: 'vector', id: 3}, {type: 'raster', id: 4}]", + ) + + def clean(self): + """Ensure default_tab is within enabled_features.""" + super().clean() + if self.default_tab not in self.enabled_features: + raise ValueError('The default tab must be one of the enabled features.') + + def __str__(self): + return f"Configuration ({', '.join(self.enabled_features)})" diff --git a/uvdat/core/rest/__init__.py b/uvdat/core/rest/__init__.py index 8bed03e..1695000 100644 --- a/uvdat/core/rest/__init__.py +++ b/uvdat/core/rest/__init__.py @@ -14,6 +14,7 @@ from .tasks import TasksAPIView from .user import UserViewSet from .vector_feature_table_data import VectorFeatureTableDataViewSet +from .display_configuration import DisplayConfigurationViewSet __all__ = [ ContextViewSet, @@ -38,4 +39,5 @@ VectorFeatureTableDataViewSet, TasksAPIView, MetadataFilterViewSet, + DisplayConfigurationViewSet, ] diff --git a/uvdat/core/rest/display_configuration.py b/uvdat/core/rest/display_configuration.py new file mode 100644 index 0000000..f905bf1 --- /dev/null +++ b/uvdat/core/rest/display_configuration.py @@ -0,0 +1,82 @@ +from typing import Any, Dict, List + +from django.core.exceptions import ValidationError +from rest_framework import status, viewsets +from rest_framework.decorators import action +from rest_framework.request import Request +from rest_framework.response import Response +from rest_framework.serializers import CharField, DictField, ListField, ModelSerializer + +from uvdat.core.models import DisplayConfiguration + + +class DisplayConfigurationSerializer(ModelSerializer): + enabled_ui = ListField( + child=CharField(), + help_text='List of enabled features: "Collections", "Datasets", "Metadata".', + ) + default_tab = CharField(help_text='Default tab, must be one of the enabled features.') + default_displayed_layers = ListField( + child=DictField(child=CharField()), + help_text='List of map layers: [{type: "netcdf"}, {type: "vector"}, {type: "raster"}].', + ) + + class Meta: + model = DisplayConfiguration + fields = ['enabled_ui', 'default_tab', 'default_displayed_layers'] + + def validate(self, data: Dict[str, Any]) -> Dict[str, Any]: + '''Ensure `default_tab` is within `enabled_ui` and validate `default_displayed_layers`.''' + enabled_ui = data.get('enabled_ui', []) + default_tab = data.get('default_tab') + default_displayed_layers = data.get('default_displayed_layers', []) + + if default_tab not in enabled_ui: + raise ValidationError({'default_tab': 'The default tab must be one of the enabled features.'}) + + if not all(isinstance(layer, dict) and 'type' in layer for layer in default_displayed_layers): + raise ValidationError({'default_displayed_layers': 'Each entry must be a dictionary with a "type" field.'}) + + return data + + +class DisplayConfigurationViewSet(viewsets.GenericViewSet): + ''' + ViewSet for managing the single Display Configuration instance. + + - `GET /display_configuration/`: Retrieve the current configuration. + - `PATCH /display_configuration/`: Partially update the configuration. + - `PUT /display_configuration/`: Fully update the configuration. + ''' + + queryset = DisplayConfiguration.objects.all() + serializer_class = DisplayConfigurationSerializer + + def get_object(self) -> DisplayConfiguration: + '''Retrieve or create the single Configuration instance.''' + return DisplayConfiguration.objects.first() or DisplayConfiguration.objects.create() + + @action(detail=False, methods=['get'], url_path='display-configuration') + def retrieve(self, request: Request, *args: Any, **kwargs: Any) -> Response: + '''Retrieve the single Display Configuration.''' + instance = self.get_object() + serializer = self.get_serializer(instance) + return Response(serializer.data) + + @action(detail=False, methods=['put'], url_path='display-configuration') + def update(self, request: Request, *args: Any, **kwargs: Any) -> Response: + '''Fully replace the configuration (PUT request).''' + instance = self.get_object() + serializer = self.get_serializer(instance, data=request.data) + serializer.is_valid(raise_exception=True) + serializer.save() + return Response(serializer.data, status=status.HTTP_200_OK) + + @action(detail=False, methods=['patch'], url_path='display-configuration') + def partial_update(self, request: Request, *args: Any, **kwargs: Any) -> Response: + '''Partially update the configuration (PATCH request).''' + instance = self.get_object() + serializer = self.get_serializer(instance, data=request.data, partial=True) + serializer.is_valid(raise_exception=True) + serializer.save() + return Response(serializer.data, status=status.HTTP_200_OK) diff --git a/uvdat/urls.py b/uvdat/urls.py index a8052d9..0c8c62d 100644 --- a/uvdat/urls.py +++ b/uvdat/urls.py @@ -27,6 +27,7 @@ UserViewSet, VectorFeatureTableDataViewSet, VectorMapLayerViewSet, + DisplayConfigurationViewSet, ) router = routers.SimpleRouter() @@ -67,6 +68,7 @@ path('accounts/', include('allauth.urls')), path('oauth/', include('oauth2_provider.urls')), path('admin/', admin.site.urls), + path('api/v1/display-configuration/', DisplayConfigurationViewSet.as_view({'get': 'retrieve', 'put': 'update', 'patch': 'partial_update'})), path('api/v1/s3-upload/', include('s3_file_field.urls')), path('api/v1/', include(router.urls)), path('api/docs/redoc/', schema_view.with_ui('redoc'), name='docs-redoc'), From 708346f7a2f75bbc0bc2426d8806609da1a9016d Mon Sep 17 00:00:00 2001 From: Bryon Lewis Date: Tue, 18 Mar 2025 15:10:44 -0400 Subject: [PATCH 02/10] usgs/hydrology scripts --- .../googleneuralhydrology/extractHruIds.py | 27 ++ scripts/googleneuralhydrology/getStats.py | 314 ++++++++++++++++++ scripts/googleneuralhydrology/ncToJson.py | 123 +++++++ .../googleneuralhydrology/runExperiment.py | 100 ++++++ uvdat/core/tasks/dataset.py | 2 +- 5 files changed, 565 insertions(+), 1 deletion(-) create mode 100644 scripts/googleneuralhydrology/extractHruIds.py create mode 100755 scripts/googleneuralhydrology/getStats.py create mode 100644 scripts/googleneuralhydrology/ncToJson.py create mode 100644 scripts/googleneuralhydrology/runExperiment.py diff --git a/scripts/googleneuralhydrology/extractHruIds.py b/scripts/googleneuralhydrology/extractHruIds.py new file mode 100644 index 0000000..97e1fab --- /dev/null +++ b/scripts/googleneuralhydrology/extractHruIds.py @@ -0,0 +1,27 @@ +import json +import click + +@click.command() +@click.argument("geojson_file", type=click.Path(exists=True)) +@click.argument("output_file", type=click.Path()) +def extract_hru_ids(geojson_file, output_file): + """Extracts hru_id from a GeoJSON file and saves them as a JSON array.""" + + with open(geojson_file, "r") as f: + geojson_data = json.load(f) + + # Extract hru_id from each feature and ensure it's 8 digits with leading zeros + hru_ids = [ + f"{int(feature['properties']['hru_id']):08d}" + for feature in geojson_data.get("features", []) + if "hru_id" in feature.get("properties", {}) + ] + + # Save output JSON + with open(output_file, "w") as f: + json.dump(hru_ids, f, indent=4) + + click.echo(f"Extracted {len(hru_ids)} HRU IDs to {output_file}") + +if __name__ == "__main__": + extract_hru_ids() diff --git a/scripts/googleneuralhydrology/getStats.py b/scripts/googleneuralhydrology/getStats.py new file mode 100755 index 0000000..d3664c9 --- /dev/null +++ b/scripts/googleneuralhydrology/getStats.py @@ -0,0 +1,314 @@ +import click +import json +import pathlib +import pandas as pd +from dataretrieval import nwis +import numpy as np +import time +import calendar + +def get_unix_time(row, year_index, month_index, day_index): + if year_index is not None: + year = int(row[year_index]) + else: + year = 1970 + if month_index is not None: + month = int(row[month_index]) + else: + month = 1 + if day_index is not None: + day = int(row[day_index]) + else: + day = 1 + if month < 1 or month > 12: + month = 1 + if day < 1 or day > 31: + day = 1 + + return calendar.timegm(time.strptime(f"{year}-{month}-{day}", "%Y-%m-%d")) +# Define the function for fetching and formatting data +@click.command() +@click.option('-p', '--param-codes', default=['00060'], multiple=True, required=True, help="List of parameter codes to query.") +@click.option('--input', required=True, type=click.Path(exists=True), help="JSON file containing an array of site numbers.") +@click.option('--start-date', default='1989-10-01', help="Start date for the data query.") +@click.option('--end-date', default='1999-09-30', help="End date for the data query.") +@click.option('--output', default='output.json', help="Output JSON file where the results will be saved.") +@click.option('--usgs-parameters', default='../nwis/USGSParameters.tsv', type=click.Path(exists=True), help="Path to the USGSParameters.tsv file.") +def fetch_data(param_codes, input, start_date, end_date, output, usgs_parameters): + """Fetch data from NWIS and save it in a JSON format with descriptions for each table.""" + # Load the USGS parameters file + usgs_df = pd.read_csv(usgs_parameters, sep='\t', comment='#') + # Create a dictionary mapping parameter codes to their descriptions + param_desc = dict(zip(usgs_df['parm_cd'], usgs_df['parm_nm'])) + + # Load geojson file + with open(input, 'r') as f: + site_loaded = json.load(f) # Expecting a JSON array of site numbers + + site_numbers = [str(item) for item in site_loaded] + # Split the site numbers into chunks of 10 (because NWIS allows a max of 10 sites per request) + def split_list(l): + n = 10 + for i in range(0, len(l), n): + yield l[i:i + n] + + site_lists = list(split_list(site_numbers)) + + # Prepare the result container + result = {} + + # Fetch data for each site and each report type (monthly, daily, annual) + report_types = ['daily'] # Assuming you want monthly, daily, and annual reports + + for report_type in report_types: + for i, site_list in enumerate(site_lists): + try: + response = nwis.get_stats( + sites=site_list, + startDt=start_date, + endDt=end_date, + statReportType=report_type, + parameterCd=",".join(param_codes), + ) + df, meta = response + except Exception as e: + print(f"Error fetching {report_type} data for sites {site_list}: {e}") + continue + df, meta = response + + # Replace NaN values with empty strings + df = df.fillna('') + + # Add the data to the result dictionary with site_number as the key + for site_number in site_list: + site_data = df[df['site_no'] == site_number] + if not site_data.empty: + # Remove the 'ts_id' column if it exists + site_data = site_data.drop(columns=['site_no', 'loc_web_ds'], errors='ignore') + + unique_param_codes = site_data['parameter_cd'].unique() + # Create a description of the parameters + param_names = [param_desc.get(code, 'Unknown parameter') for code in unique_param_codes] + description = f"This is a table of the mean {report_type} values for the following parameters: {', '.join([f'{code} - {name}' for code, name in zip(unique_param_codes, param_names)])}" + header = site_data.columns.tolist() + parameter_cd_index = header.index('parameter_cd') + mean_va_index = header.index('mean_va') + + # Prepare table object + rows = site_data.values.tolist() + + base_set = set() + year_index = None + month_index = None + day_index = None + if 'year_nu' in header: + year_index = header.index('year_nu') + if 'begin_yr' in header: + year_index = header.index('begin_yr') + if 'month_nu' in header: + month_index = header.index('month_nu') + if 'day_nu' in header: + day_index = header.index('day_nu') + base_string_param_map = {} + ts_id_index = header.index('ts_id') + # update to take only the latest ts_id + ts_id_map = {} + for row in rows: + ts_id = row[ts_id_index] + year = row[year_index] if year_index is not None else 0000 + month = row[month_index] if month_index is not None else 00 + day = row[day_index] if day_index is not None else 00 + base_string = str(f'{str(year).zfill(4)}{str(month).zfill(2)}{str(day).zfill(2)}') + base_set.add(base_string) + if base_string_param_map.get(base_string, None) is None: + base_string_param_map[base_string] = {} + for param in unique_param_codes: + base_string_param_map[base_string][param] = None + val = row[mean_va_index] + for code in unique_param_codes: + if row[parameter_cd_index] == code: + if ts_id_map.get(f'{base_string}_{code}', None) is None: + ts_id_map[f'{base_string}_{code}'] = ts_id + if base_string_param_map[base_string][code] is None or val is not None: + base_string_param_map[base_string][code] = val + if ts_id > ts_id_map[f'{base_string}_{code}']: + ts_id_map[f'{base_string}_{code}'] = ts_id + base_string_param_map[base_string][code] = val + base_order = list(base_set) + base_order.sort() + time_base_index = {} + header.append('index') + header.append('unix_time') + for code in unique_param_codes: + header.append(code) + parameter_cd_index = header.index('parameter_cd') + header.pop(parameter_cd_index) + mean_va_index = header.index('mean_va') + header.pop(mean_va_index) + unix_mapping = {} + for row in rows: + year = row[year_index] if year_index is not None else 0000 + month = row[month_index] if month_index is not None else 00 + day = row[day_index] if day_index is not None else 00 + base_string = str(f'{str(year).zfill(4)}{str(month).zfill(2)}{str(day).zfill(2)}') + row.append(base_order.index(base_string)) + unix_timestamp = get_unix_time(row, year_index, month_index, day_index) + row.append(unix_timestamp) + row.pop(parameter_cd_index) + row.pop(mean_va_index) + if time_base_index.get(base_string, None) is None: + for param in unique_param_codes: + param_val = base_string_param_map.get(base_string, {}).get(param, None) + row.append(param_val) + if unix_mapping.get(unix_timestamp, None) is None: + unix_mapping[unix_timestamp] = row + else: # Combine the so we get rid of any missing data + old_row = unix_mapping[unix_timestamp] + for index in range(len(row)): + if row[index] is not None and old_row[index] is None: + old_row[index] = row[index] + + # row_length = len(rows[0]) + # for row in rows: + # if len(row) != row_length: + # print(f'{site_number} - {row} != {row_length}') + # else: + # print(f'{site_number} - {row} == {row_length}') + sorted_values = [value for _, value in sorted(unix_mapping.items())] + + updated_df = pd.DataFrame(sorted_values, columns=header) + table_object = { + "name": f"{site_number}_{report_type}", + "description": description, + "type": f'USGS_gauge_{report_type}_{"_".join(param_codes)}', + "header": header, + "summary": generate_summary(updated_df, param_desc, updated_df.columns.tolist()), + "rows": sorted_values, + } + if site_number not in result: + result[site_number] = [] + + result[site_number].append(table_object) + + print(f"Fetched {report_type} data for {len(site_list)} sites.") + + # Save the result to the output file as JSON + with open(output, 'w') as f: + json.dump(result, f, indent=4) + + print(f"Results saved to {output}.") + +limit = 100 +def generate_summary(df, param_desc, rows): + """Generate a summary object for the table with column type and stats, focusing on unique parameter_cd.""" + summary = {} + for header in rows: + # Iterate over each unique parameter_cd + if header == 'parameter_cd': + if header not in summary.keys(): + summary[header] = {'type':'parameter_cd'} + for parameter_cd in df['parameter_cd'].unique(): + param_data = df[df['parameter_cd'] == parameter_cd] + + # Assuming the 'value' column contains the actual data values + value_col = param_data['mean_va'] + + # Calculate the min, max, and mean for each parameter_cd + summary[header][parameter_cd] = { + "parameter_cd": parameter_cd, + "parameter_name": param_desc.get(parameter_cd, 'Unknown parameter'), + "min": float(value_col.min()), + "max": float(value_col.max()), + "mean": float(value_col.mean()) + } + else: # Calculate type/min/max and other fields + + if header not in summary.keys(): + summary[header] = {'type': None, 'values': set(), 'value_count': 0} + parameter_cd = param_desc.get(header, None) + if parameter_cd: + summary[header]["description"] = parameter_cd[0] if isinstance(parameter_cd, tuple) else parameter_cd + for value in df[header].unique(): + if isinstance(value, bool): + summary[header]['type'] = 'bool' + summary[header]['value_count'] += 1 + elif isinstance(value, (int, float, np.float64, np.int32, np.int64)): + summary[header]['type'] = 'number' + summary[header]['value_count'] += 1 + if 'min' not in summary[header] or value < summary[header]['min']: + if np.isnan(float(value)) or value is None and summary[header].get('min', None) is None: + summary[header]['min'] = float('inf') + else: + summary[header]['min'] = float(value) + if 'max' not in summary[header] or value > summary[header]['max']: + if np.isnan(float(value)) or value is None and summary[header].get('max', None) is None: + summary[header]['max'] = float('-inf') + else: + summary[header]['max'] = float(value) + elif isinstance(value, str): + if 'values' not in summary[header]: + summary[header]['values'] = set() + summary[header]['value_count'] += 1 + summary[header]['type'] = 'string' + summary[header]['values'].add(value) + for header in summary.keys(): + if summary[header]['type'] is None: + summary[header]['type'] = 'unknown' + del summary[header]['values'] + continue + if summary[header]['type'] == 'number': + if summary[header]['value_count'] == 1: + summary[header]['values'] = summary[header].get('min', summary[header].get('max')) + del summary[header]['min'] + del summary[header]['max'] + elif summary[header]['min'] == summary[header]['max']: + val = summary[header]['min'] + del summary[header]['values'] + del summary[header]['min'] + del summary[header]['max'] + summary[header]['static'] = True + summary[header]['value'] = val + else: + if np.isnan(summary[header]['min']): + summary[header]['min'] = None + if np.isnan(summary[header]['max']): + summary[header]['max'] = None + del summary[header]['values'] + elif ( + summary[header]['type'] == 'string' + and 'values' in summary[header] + and not summary[header].get('searchable') + ): + summary[header]['values'] = sorted(summary[header]['values']) + if len(summary[header]['values']) >= limit: + summary[header]['searchable'] = True + summary[header]['unique'] = len(summary[header]['values']) + del summary[header]['values'] + elif summary[header]['type'] == 'bool': + del summary[header]['values'] + check_json_validity(summary) + return summary + +def check_json_validity(obj, path="root"): + valid_types = (str, int, float, bool, type(None), list, dict) + + if isinstance(obj, (np.float64, np.float128, np.int64, np.int32)): + print(f"Invalid type at {path}: {type(obj).__name__} (Value: {obj})") + return False + + if isinstance(obj, dict): + for key, value in obj.items(): + check_json_validity(value, path=f"{path}.{key}") + + elif isinstance(obj, list): + for idx, item in enumerate(obj): + check_json_validity(item, path=f"{path}[{idx}]") + + elif not isinstance(obj, valid_types): + print(f"Invalid type at {path}: {type(obj).__name__} (Value: {obj})") + return False + + +if __name__ == '__main__': + fetch_data() + diff --git a/scripts/googleneuralhydrology/ncToJson.py b/scripts/googleneuralhydrology/ncToJson.py new file mode 100644 index 0000000..dd79fd0 --- /dev/null +++ b/scripts/googleneuralhydrology/ncToJson.py @@ -0,0 +1,123 @@ +import click +import xarray as xr +import json +import numpy as np +import os +import pandas as pd + +def summarize_data(ds): + """Generate a summary of the dataset with min/max values for numerical variables.""" + summary = {} + + for var in ds.data_vars: + data = ds[var].values.flatten() # Convert to 1D array + if np.issubdtype(data.dtype, np.number): # Check if it's numeric + summary[var] = { + "type": "number", + "min": np.nanmin(data).item() if np.any(~np.isnan(data)) else None, + "max": np.nanmax(data).item() if np.any(~np.isnan(data)) else None + } + + summary["columns"] = list(ds.data_vars.keys()) # List of available variables + if 'date' in ds: + time_values = ds['date'].values + start_time = pd.to_datetime(time_values.min()) + end_time = pd.to_datetime(time_values.max()) + + summary["time"] = { + "type": "string", + "start": start_time.strftime("%Y-%m-%d"), + "end": end_time.strftime("%Y-%m-%d") + } + + # Convert to Unix time (seconds since the Unix epoch) + summary["unix_time"] = { + "type": "number", + "min": start_time.value // 10**9, # Convert to seconds + "max": end_time.value // 10**9 # Convert to seconds + } + + return summary + +def convert_netcdf_to_json(nc_file): + """Load NetCDF file and convert it to JSON format.""" + ds = xr.open_dataset(nc_file, engine="netcdf4") # Open NetCDF file + + # Extract key variables + time = ds["date"].values # Extract dates + obs_values = ds["QObs(mm/d)_obs"].values.flatten() + sim_values = ds["QObs(mm/d)_sim"].values.flatten() + + # Format time correctly and convert to UNIX time + if "units" in ds["date"].attrs and "since" in ds["date"].attrs["units"]: + time_origin = ds["date"].attrs["units"].split("since")[1].strip() + time = pd.to_datetime(time, origin=time_origin, unit="D") + else: + time = pd.to_datetime(time) + + # Convert time to UNIX time + unix_time = time.astype(int) // 10**9 # Convert to seconds + + # Format time as strings + time_str = time.strftime("%Y-%m-%d").tolist() + + # Construct rows with UNIX time + rows = [[time_str[i], int(unix_time[i]), float(obs_values[i]), float(sim_values[i])] for i in range(len(time))] + + # Generate summary + summary = summarize_data(ds) + + # Construct final JSON output + output_json = { + "summary": summary, + "rows": rows + } + + return output_json + +def process_folder(folder_path): + """Process all .nc files in a folder and generate a combined JSON object.""" + json_output = {} + + # Loop through all .nc files in the folder + for filename in os.listdir(folder_path): + if filename.endswith(".nc"): + base_filename = os.path.splitext(filename)[0] # Get base filename without extension + nc_file = os.path.join(folder_path, filename) + + # Convert NetCDF to JSON format + json_data = convert_netcdf_to_json(nc_file) + + # Create the structure for each file + json_output[base_filename] = [{ + "name": f"{base_filename}_daily", + "description": "This is a table of the daily values", + "type": f"GoogleNeuralHydrology_{base_filename}", + "summary": { + "time": json_data["summary"].get("time", {}), + "unix_time": json_data["summary"].get("unix_time", {}), + "QObs(mm/d)_obs": json_data["summary"].get("QObs(mm/d)_obs", {}), + "QObs(mm/d)_sim": json_data["summary"].get("QObs(mm/d)_sim", {}), + }, + "header": ["time", "unix_time", "QObs(mm/d)_obs", "QObs(mm/d)_sim"], + "rows": json_data["rows"] + }] + + return json_output +@click.command() +@click.argument("folder", type=click.Path(exists=True)) +@click.argument("output", type=click.Path()) +def convert_netcdf(folder, output): + """CLI tool to convert a NetCDF (.nc) file to JSON format.""" + json_data = process_folder(folder) + + # Set default output file + + # Save JSON + with open(output, "w") as json_file: + json.dump(json_data, json_file, indent=4) + + click.echo(f"Converted {folder} to {output}") + +if __name__ == "__main__": + convert_netcdf() diff --git a/scripts/googleneuralhydrology/runExperiment.py b/scripts/googleneuralhydrology/runExperiment.py new file mode 100644 index 0000000..73a2751 --- /dev/null +++ b/scripts/googleneuralhydrology/runExperiment.py @@ -0,0 +1,100 @@ +import json +import os +import subprocess +import shutil +import time +import logging +import click + +# Define the shell script path +SHELL_SCRIPT = "/media/bryon.lewis/Elements/DIVERSH/GoogleNeuralHydrology/run_demo1.sh" # Adjust as necessary +EXPERIMENTS_PATH="/media/bryon.lewis/Elements/DIVERSH/GoogleNeuralHydrology/Experiments" +# Configure logging +logging.basicConfig( + filename="hru_processing.log", + level=logging.INFO, + format="%(asctime)s - %(levelname)s - %(message)s", +) + +@click.command() +@click.argument("hru_ids_file", type=click.Path(exists=True)) +@click.option("--wait-time", default=60, help="Max seconds to wait for test_results.nc.") +@click.option("--poll-interval", default=5, help="Time in seconds between file existence checks.") +def run_hru_tasks(hru_ids_file, wait_time, poll_interval): + """Runs a shell script for each HRU ID, waits for output, and organizes NetCDF files.""" + + with open(hru_ids_file, "r") as f: + hru_ids = json.load(f) + + total_ids = len(hru_ids) + results_dir = "netcdf_results" + os.makedirs(results_dir, exist_ok=True) + + logging.info(f"Starting HRU processing for {total_ids} HRU IDs.") + + for index, hru_id in enumerate(hru_ids, start=1): + click.echo(f"Processing HRU ID {hru_id} ({index}/{total_ids})...") + logging.info(f"Processing HRU ID: {hru_id} ({index}/{total_ids})") + + # Check if the files already exist + existing_netcdf_file = os.path.join(results_dir, f"{hru_id}.nc") + existing_exp_dir = os.path.join(EXPERIMENTS_PATH, hru_id) + + if os.path.exists(existing_netcdf_file): + click.echo(f"Skipping HRU ID {hru_id} because the data already exists.") + logging.info(f"Skipping HRU ID {hru_id} because the data already exists.") + continue # Skip this HRU ID + if os.path.exists(existing_exp_dir): + for root, _, files in os.walk(existing_exp_dir): + if "test_results.nc" in files: + test_results_path = os.path.join(root, "test_results.nc") + break + + if test_results_path: + new_file_path = os.path.join(results_dir, f"{hru_id}.nc") + shutil.move(test_results_path, new_file_path) + click.echo(f"Moved {test_results_path} → {new_file_path} ({index}/{total_ids} complete)") + logging.info(f"Moved {test_results_path} → {new_file_path} ({index}/{total_ids} complete)") + continue + + # Run shell script and wait for it to complete (suppress subprocess logging) + cmd = [SHELL_SCRIPT, "-b", hru_id] + try: + subprocess.run(cmd, check=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) + logging.info(f"Successfully executed shell script for HRU ID: {hru_id}") + except subprocess.CalledProcessError as e: + logging.error(f"Shell script failed for HRU ID {hru_id}: {e}") + continue + + # Wait and check for the test_results.nc file + exp_base = os.path.join(EXPERIMENTS_PATH, hru_id) + test_results_path = None + elapsed_time = 0 + + while elapsed_time < wait_time: + for root, _, files in os.walk(exp_base): + if "test_results.nc" in files: + test_results_path = os.path.join(root, "test_results.nc") + break + + if test_results_path: + break # Exit loop once the file is found + + time.sleep(poll_interval) + click.echo(f"Waiting for test_results.nc for HRU ID {hru_id}... ({elapsed_time}s elapsed)") + logging.info(f"Waiting for test_results.nc for HRU ID {hru_id}... ({elapsed_time}s elapsed)") + elapsed_time += poll_interval + + if test_results_path: + new_file_path = os.path.join(results_dir, f"{hru_id}.nc") + shutil.move(test_results_path, new_file_path) + click.echo(f"Moved {test_results_path} → {new_file_path} ({index}/{total_ids} complete)") + logging.info(f"Moved {test_results_path} → {new_file_path} ({index}/{total_ids} complete)") + else: + click.echo(f"Warning: test_results.nc not found for {hru_id} after {wait_time} seconds.") + logging.warning(f"test_results.nc not found for HRU ID {hru_id} after {wait_time} seconds.") + + logging.info(f"HRU processing completed. {index}/{total_ids} processed.") + +if __name__ == "__main__": + run_hru_tasks() diff --git a/uvdat/core/tasks/dataset.py b/uvdat/core/tasks/dataset.py index d7e2e0f..d8e0c89 100644 --- a/uvdat/core/tasks/dataset.py +++ b/uvdat/core/tasks/dataset.py @@ -161,7 +161,7 @@ def process_file_item(self, file_item_id): status=ProcessingTask.Status.ERROR, error=str(f'Unsupported file type: {file_name}') ) - for item in raster_map_layer: + for item in raster_map_layers: item.set_bounds() except Exception as e: processing_task.update(status=ProcessingTask.Status.ERROR, error=str(e)) From d2a856073172ec5b34a2c2f6e98bae9629bf0353 Mon Sep 17 00:00:00 2001 From: Bryon Lewis Date: Mon, 24 Mar 2025 11:12:19 -0400 Subject: [PATCH 03/10] scripts for googleNeuralHydrology processing and uploading --- sample_data/google.json | 157 ++++++++++++++++ .../googleneuralhydrology/combineRowData.py | 39 ++++ .../createAndUploadInputJSON.py | 167 ++++++++++++++++++ .../{getStats.py => getDailyStats.py} | 143 +++++---------- scripts/googleneuralhydrology/ncToJson.py | 55 ++++-- .../googleneuralhydrology/subSampleHruIds.py | 49 +++++ uvdat/core/rest/vector_feature_table_data.py | 6 +- uvdat/core/tasks/map_layers.py | 2 +- 8 files changed, 505 insertions(+), 113 deletions(-) create mode 100644 sample_data/google.json create mode 100644 scripts/googleneuralhydrology/combineRowData.py create mode 100644 scripts/googleneuralhydrology/createAndUploadInputJSON.py rename scripts/googleneuralhydrology/{getStats.py => getDailyStats.py} (63%) create mode 100644 scripts/googleneuralhydrology/subSampleHruIds.py diff --git a/sample_data/google.json b/sample_data/google.json new file mode 100644 index 0000000..0e3efea --- /dev/null +++ b/sample_data/google.json @@ -0,0 +1,157 @@ +[ + { + "type": "Context", + "name": "Google Neural Hydrology", + "default_map_center": [ + 34.8019, + -86.1794 + ], + "default_map_zoom": 6, + "datasets": [ + { + "name": "Google Neural Hydrology", + "description": "Google Neural Hydrology", + "category": "Google Neural Hydrology", + "metadata": {}, + "files": [ + { + "name": "Google Eural Hydrology Data", + "path": "./data/GoogleNeuralHydrology/matching_hru_ids.geojson", + "url": "https://data.kitware.com/api/v1/file/67e175bf3e5f3e5e96b97512/download", + "type": "geojson", + "metadata": { + "tabular": { + "path": "./data/GoogleNeuralHydrology/combined.json", + "name": "Google Neural Hydrology Tabular Data", + "url": "https://data.kitware.com/api/v1/file/67e175c03e5f3e5e96b97515/download", + "featurePropertyMatcher": "hru_id" + }, + "default_style": { + "layers": { + "fill": { + "color": "#888888", + "enabled": true, + "selectable": "singleSelect", + "selectColor": "#00FFFF" + }, + "line": { + "size": 1, + "color": "#000000", + "enabled": true + }, + "text": { + "color": "#888888", + "enabled": false + }, + "circle": { + "size": 3, + "color": "#888888", + "enabled": false + }, + "fill-extrusion": { + "color": "#888888", + "enabled": false + } + }, + "vectorFeatureTableGraphs": [ + { + "name": "QObs(mm/d)_obs", + "type": "GoogleNeuralHydrology", + "xAxis": "unix_time", + "yAxis": "QObs(mm/d)_obs", + "indexer": "hru_id", + "xAxisLabel": "Date", + "yAxisLabel": "QObs(mm/d)_obs" + }, + { + "name": "QObs(mm/d)_sim", + "type": "GoogleNeuralHydrology", + "xAxis": "unix_time", + "yAxis": "QObs(mm/d)_sim", + "indexer": "hru_id", + "xAxisLabel": "Date", + "yAxisLabel": "QObs(mm/d)_sim" + }, + { + "name": "Obs - Sim", + "type": "GoogleNeuralHydrology", + "xAxis": "unix_time", + "yAxis": "Obs - Sim", + "indexer": "hru_id", + "xAxisLabel": "Date", + "yAxisLabel": "Obs - Sim" + }, + { + "name": "Percentage Off", + "type": "GoogleNeuralHydrology", + "xAxis": "unix_time", + "yAxis": "Percentage Off", + "indexer": "hru_id", + "xAxisLabel": "Date", + "yAxisLabel": "Percentage Off" + }, + { + "name": "00060_Mean", + "type": "USGS_gauge_daily_00060", + "xAxis": "unix_time", + "yAxis": "00060_Mean", + "indexer": "hru_id", + "xAxisLabel": "Date", + "yAxisLabel": "00060_Mean" + } + ], + "mapLayerFeatureTableGraphs": [ + { + "name": "QObs(mm/d)_obs", + "type": "GoogleNeuralHydrology", + "xAxis": "unix_time", + "yAxis": "QObs(mm/d)_obs", + "indexer": "hru_id", + "xAxisLabel": "Date", + "yAxisLabel": "QObs(mm/d)_obs" + }, + { + "name": "QObs(mm/d)_sim", + "type": "GoogleNeuralHydrology", + "xAxis": "unix_time", + "yAxis": "QObs(mm/d)_sim", + "indexer": "hru_id", + "xAxisLabel": "Date", + "yAxisLabel": "QObs(mm/d)_sim" + }, + { + "name": "Obs - Sim", + "type": "GoogleNeuralHydrology", + "xAxis": "unix_time", + "yAxis": "Obs - Sim", + "indexer": "hru_id", + "xAxisLabel": "Date", + "yAxisLabel": "Obs - Sim" + }, + { + "name": "Percentage Off", + "type": "GoogleNeuralHydrology", + "xAxis": "unix_time", + "yAxis": "Percentage Off", + "indexer": "hru_id", + "xAxisLabel": "Date", + "yAxisLabel": "Percentage Off" + }, + { + "name": "00060_Mean", + "type": "USGS_gauge_daily_00060", + "xAxis": "unix_time", + "yAxis": "00060_Mean", + "indexer": "hru_id", + "xAxisLabel": "Date", + "yAxisLabel": "00060_Mean" + } + ] + } + } + } + ] + } + ] + } +] \ No newline at end of file diff --git a/scripts/googleneuralhydrology/combineRowData.py b/scripts/googleneuralhydrology/combineRowData.py new file mode 100644 index 0000000..7aa4141 --- /dev/null +++ b/scripts/googleneuralhydrology/combineRowData.py @@ -0,0 +1,39 @@ +import click +import json + +@click.command() +@click.argument("json_file1", type=click.Path(exists=True)) +@click.argument("json_file2", type=click.Path(exists=True)) +@click.argument("output", default="combined.json", type=click.Path()) +def combine_json(json_file1, json_file2, output): + """Combine two JSON files by concatenating arrays for matching keys.""" + + # Load first JSON file + with open(json_file1, "r") as f: + data1 = json.load(f) + + # Load second JSON file + with open(json_file2, "r") as f: + data2 = json.load(f) + + # Merge data + combined_data = {} + all_keys = set(data1.keys()).union(data2.keys()) + + for key in all_keys: + array1 = data1.get(key, []) + array2 = data2.get(key, []) + + if isinstance(array1, list) and isinstance(array2, list): + combined_data[key] = array1 + array2 + else: + raise ValueError(f"Values for key '{key}' are not both arrays.") + + # Save combined JSON + with open(output, "w") as f: + json.dump(combined_data, f, indent=4) + + click.echo(f"Combined JSON saved to {output}") + +if __name__ == "__main__": + combine_json() diff --git a/scripts/googleneuralhydrology/createAndUploadInputJSON.py b/scripts/googleneuralhydrology/createAndUploadInputJSON.py new file mode 100644 index 0000000..f9bb03c --- /dev/null +++ b/scripts/googleneuralhydrology/createAndUploadInputJSON.py @@ -0,0 +1,167 @@ +import os +import json +import re +import click +import xarray as xr +from girder_client import GirderClient +import matplotlib.pyplot as plt + +baseApiKey = 'OxhoPyCrzkGsnQiiRYZm3BgTlPhscKvILVyRdasT' + + +def authenticate(client: GirderClient): + client.authenticate(apiKey=baseApiKey) + + +def create_folder(client, parent_id, name): + folder = client.createFolder(parent_id, name, reuseExisting=True) + return folder['_id'] + + + + +def get_public_folder(gc: GirderClient): + current_user = gc.sendRestRequest('GET', 'user/me') + userId = current_user['_id'] + folders = gc.sendRestRequest('GET', f'folder?parentType=user&parentId={userId}&text=Public&limit=50&sort=lowerName&sortdir=1') + if len(folders) > 0: + uploadFolder = folders[0] + else: + print('No folder found for the user') + return uploadFolder + + + + +def upload_data(client: GirderClient, remote_folder_id, base_geojson, base_tab_json, base_default_style): + file_metadata = [] + existing_base_geojson = list(client.listItem(remote_folder_id, name=base_geojson)) + existing_base_tab = list(client.listItem(remote_folder_id, name=base_tab_json)) + powerplant_url = '' + base_default_style_dict= {} + with open(base_default_style, "r") as f: + base_default_style_dict = json.load(f) # data is now a Python dictionary + + if len(existing_base_geojson) > 0: + geojson_id = existing_base_geojson[0]['_id'] + geojson_url = f'https://data.kitware.com/api/v1/file/{geojson_id}/download' + else: + item = client.uploadFileToFolder(remote_folder_id, base_geojson) + geojson_url = f'https://data.kitware.com/api/v1/file/{item["_id"]}/download' + + if len(existing_base_tab) > 0: + tab_id = existing_base_tab[0]['_id'] + tab_url = f'https://data.kitware.com/api/v1/file/{tab_id}/download' + else: + item = client.uploadFileToFolder(remote_folder_id, base_tab_json) + tab_url = f'https://data.kitware.com/api/v1/file/{item["_id"]}/download' + + + # now we open the file and check for the table name + with open(base_tab_json, "r") as f: + tabular_data = json.load(f) # data is now a Python dictionary + table_name = '' + headers = [] + for key in tabular_data.keys(): + tab_data_charts = tabular_data[key] + break + vectorFeatureTableGraphs = [] + mapLayerFeatureTableGraphs = [] + for tab_data in tab_data_charts: + headers = tab_data['header'] + table_name = tab_data['type'] + for item in headers: + if item in ['time', 'date_str', 'year_nu', 'month_nu', 'day_nu']: + continue + if item != 'unix_time': + vectorFeatureTableGraphs.append({ + "name": item, + "type": table_name, + "xAxis": "unix_time", + "yAxis": item, + "indexer": "hru_id", + "xAxisLabel": "Date", + "yAxisLabel": item + }) + mapLayerFeatureTableGraphs.append({ + "name": item, + "type": table_name, + "xAxis": "unix_time", + "yAxis": item, + "indexer": "hru_id", + "xAxisLabel": "Date", + "yAxisLabel": item + }) + default_style = base_default_style_dict.copy() + default_style['vectorFeatureTableGraphs'] = vectorFeatureTableGraphs + default_style['mapLayerFeatureTableGraphs'] = mapLayerFeatureTableGraphs + print(default_style['mapLayerFeatureTableGraphs']) + file_metadata.append({ + 'name': f'Google Eural Hydrology Data', + 'path': f'./data/GoogleNeuralHydrology/{base_geojson}', + 'url': geojson_url, + 'type': 'geojson', + 'metadata': { + "tabular": { + 'path': f'./data/GoogleNeuralHydrology/{base_tab_json}', + 'name': f'Google Neural Hydrology Tabular Data', + 'url': tab_url, + 'featurePropertyMatcher': 'hru_id' + }, + 'default_style': default_style, + } + }) + return file_metadata + + +@click.command() +@click.argument('base_geojson', type=click.Path(exists=True)) +@click.argument('base_tabular', type=click.Path(exists=True)) +@click.argument('base_default_style', default='base_default_style.json', type=click.Path(exists=True)) +@click.option('--save-path', default='uploaded_file_context.json', help='Path to save the context JSON.') +def main(base_geojson, base_tabular, base_default_style, save_path): + client = GirderClient(apiUrl='https://data.kitware.com/api/v1') + authenticate(client) + + # Get the Public folder ID + public_folder = get_public_folder(client) + uvdat_folder = list(client.listFolder(public_folder['_id'], name='UVDAT'))[0] + + # Create GoogleNeuralHydrology folder + google_hydrology_id = create_folder(client, uvdat_folder['_id'], 'GoogleNeuralHydrology') + + # Create Input and Output folders + + # Upload files + + tabular_files = upload_data(client, google_hydrology_id, base_geojson, base_tabular, base_default_style) + + # Save context JSON + context = { + "type": "Context", + "name": "Google Neural Hydrology", + "default_map_center": [ + 34.8019, + -86.1794 + ], + "default_map_zoom": 6, + "datasets": [] + } + google_dataset = { + "name": "Google Neural Hydrology", + "description": "Google Neural Hydrology", + "category": "Google Neural Hydrology", + "metadata": {}, + "files": tabular_files + } + + + context['datasets'].append(google_dataset) + with open(save_path, 'w') as f: + json.dump([context], f, indent=4) + + click.echo(f'Context with download URLs saved to {save_path}') + + +if __name__ == '__main__': + main() diff --git a/scripts/googleneuralhydrology/getStats.py b/scripts/googleneuralhydrology/getDailyStats.py similarity index 63% rename from scripts/googleneuralhydrology/getStats.py rename to scripts/googleneuralhydrology/getDailyStats.py index d3664c9..79d7f62 100755 --- a/scripts/googleneuralhydrology/getStats.py +++ b/scripts/googleneuralhydrology/getDailyStats.py @@ -6,6 +6,7 @@ import numpy as np import time import calendar +import datetime def get_unix_time(row, year_index, month_index, day_index): if year_index is not None: @@ -32,7 +33,7 @@ def get_unix_time(row, year_index, month_index, day_index): @click.option('--input', required=True, type=click.Path(exists=True), help="JSON file containing an array of site numbers.") @click.option('--start-date', default='1989-10-01', help="Start date for the data query.") @click.option('--end-date', default='1999-09-30', help="End date for the data query.") -@click.option('--output', default='output.json', help="Output JSON file where the results will be saved.") +@click.option('--output', default='dailyStats.json', help="Output JSON file where the results will be saved.") @click.option('--usgs-parameters', default='../nwis/USGSParameters.tsv', type=click.Path(exists=True), help="Path to the USGSParameters.tsv file.") def fetch_data(param_codes, input, start_date, end_date, output, usgs_parameters): """Fetch data from NWIS and save it in a JSON format with descriptions for each table.""" @@ -63,11 +64,10 @@ def split_list(l): for report_type in report_types: for i, site_list in enumerate(site_lists): try: - response = nwis.get_stats( + response = nwis.get_dv( sites=site_list, - startDt=start_date, - endDt=end_date, - statReportType=report_type, + start=start_date, + end=end_date, parameterCd=",".join(param_codes), ) df, meta = response @@ -77,7 +77,22 @@ def split_list(l): df, meta = response # Replace NaN values with empty strings - df = df.fillna('') + print(df.index) + df.index = pd.MultiIndex.from_tuples([(site, pd.to_datetime(dt)) for site, dt in df.index], + names=['site_number', 'datetime']) + + # Extract the datetime level from the MultiIndex + datetime_index = df.index.get_level_values("datetime") + + # Add new columns + df["date_str"] = datetime_index.strftime("%Y-%m-%d") # Convert to 'YYYY-MM-DD' string + df["year_nu"] = datetime_index.year # Extract year + df["month_nu"] = datetime_index.month # Extract month + df["day_nu"] = datetime_index.day # Extract day + df["unix_time"] = datetime_index.astype(int) // 10**9 # Convert to Unix timestamp + df["site_no"] = df.index.get_level_values("site_number") + + # Add the data to the result dictionary with site_number as the key for site_number in site_list: @@ -85,105 +100,32 @@ def split_list(l): if not site_data.empty: # Remove the 'ts_id' column if it exists site_data = site_data.drop(columns=['site_no', 'loc_web_ds'], errors='ignore') - - unique_param_codes = site_data['parameter_cd'].unique() + # Create a description of the parameters + param_headers = [] + header = site_data.columns.tolist() + for item in header: + print(item) + if item.endswith('_Mean_cd'): + site_data = site_data.drop(columns=[item], errors='ignore') + header.remove(item) + if item.endswith('_Mean'): + param_headers.append(item) + unique_param_codes = [item.replace('_Mean', '') for item in param_headers] param_names = [param_desc.get(code, 'Unknown parameter') for code in unique_param_codes] description = f"This is a table of the mean {report_type} values for the following parameters: {', '.join([f'{code} - {name}' for code, name in zip(unique_param_codes, param_names)])}" - header = site_data.columns.tolist() - parameter_cd_index = header.index('parameter_cd') - mean_va_index = header.index('mean_va') + # Prepare table object rows = site_data.values.tolist() - base_set = set() - year_index = None - month_index = None - day_index = None - if 'year_nu' in header: - year_index = header.index('year_nu') - if 'begin_yr' in header: - year_index = header.index('begin_yr') - if 'month_nu' in header: - month_index = header.index('month_nu') - if 'day_nu' in header: - day_index = header.index('day_nu') - base_string_param_map = {} - ts_id_index = header.index('ts_id') - # update to take only the latest ts_id - ts_id_map = {} - for row in rows: - ts_id = row[ts_id_index] - year = row[year_index] if year_index is not None else 0000 - month = row[month_index] if month_index is not None else 00 - day = row[day_index] if day_index is not None else 00 - base_string = str(f'{str(year).zfill(4)}{str(month).zfill(2)}{str(day).zfill(2)}') - base_set.add(base_string) - if base_string_param_map.get(base_string, None) is None: - base_string_param_map[base_string] = {} - for param in unique_param_codes: - base_string_param_map[base_string][param] = None - val = row[mean_va_index] - for code in unique_param_codes: - if row[parameter_cd_index] == code: - if ts_id_map.get(f'{base_string}_{code}', None) is None: - ts_id_map[f'{base_string}_{code}'] = ts_id - if base_string_param_map[base_string][code] is None or val is not None: - base_string_param_map[base_string][code] = val - if ts_id > ts_id_map[f'{base_string}_{code}']: - ts_id_map[f'{base_string}_{code}'] = ts_id - base_string_param_map[base_string][code] = val - base_order = list(base_set) - base_order.sort() - time_base_index = {} - header.append('index') - header.append('unix_time') - for code in unique_param_codes: - header.append(code) - parameter_cd_index = header.index('parameter_cd') - header.pop(parameter_cd_index) - mean_va_index = header.index('mean_va') - header.pop(mean_va_index) - unix_mapping = {} - for row in rows: - year = row[year_index] if year_index is not None else 0000 - month = row[month_index] if month_index is not None else 00 - day = row[day_index] if day_index is not None else 00 - base_string = str(f'{str(year).zfill(4)}{str(month).zfill(2)}{str(day).zfill(2)}') - row.append(base_order.index(base_string)) - unix_timestamp = get_unix_time(row, year_index, month_index, day_index) - row.append(unix_timestamp) - row.pop(parameter_cd_index) - row.pop(mean_va_index) - if time_base_index.get(base_string, None) is None: - for param in unique_param_codes: - param_val = base_string_param_map.get(base_string, {}).get(param, None) - row.append(param_val) - if unix_mapping.get(unix_timestamp, None) is None: - unix_mapping[unix_timestamp] = row - else: # Combine the so we get rid of any missing data - old_row = unix_mapping[unix_timestamp] - for index in range(len(row)): - if row[index] is not None and old_row[index] is None: - old_row[index] = row[index] - - # row_length = len(rows[0]) - # for row in rows: - # if len(row) != row_length: - # print(f'{site_number} - {row} != {row_length}') - # else: - # print(f'{site_number} - {row} == {row_length}') - sorted_values = [value for _, value in sorted(unix_mapping.items())] - - updated_df = pd.DataFrame(sorted_values, columns=header) table_object = { "name": f"{site_number}_{report_type}", "description": description, "type": f'USGS_gauge_{report_type}_{"_".join(param_codes)}', "header": header, - "summary": generate_summary(updated_df, param_desc, updated_df.columns.tolist()), - "rows": sorted_values, + "summary": generate_summary(site_data, param_desc, site_data.columns.tolist()), + "rows": rows, } if site_number not in result: result[site_number] = [] @@ -203,8 +145,21 @@ def generate_summary(df, param_desc, rows): """Generate a summary object for the table with column type and stats, focusing on unique parameter_cd.""" summary = {} for header in rows: + if header.endswith('_Mean'): + if header not in summary.keys(): + summary[header] = {'type':'parameter_cd'} + parameter_cd = header.replace('_Mean', '') + param_data = df[df[header] == header] + value_col = df[header] + summary[header][parameter_cd] = { + "parameter_cd": parameter_cd, + "parameter_name": param_desc.get(parameter_cd, 'Unknown parameter'), + "min": float(value_col.min()), + "max": float(value_col.max()), + "mean": float(value_col.mean()) + } # Iterate over each unique parameter_cd - if header == 'parameter_cd': + elif header == 'parameter_cd': if header not in summary.keys(): summary[header] = {'type':'parameter_cd'} for parameter_cd in df['parameter_cd'].unique(): diff --git a/scripts/googleneuralhydrology/ncToJson.py b/scripts/googleneuralhydrology/ncToJson.py index dd79fd0..d1bfb90 100644 --- a/scripts/googleneuralhydrology/ncToJson.py +++ b/scripts/googleneuralhydrology/ncToJson.py @@ -5,7 +5,7 @@ import os import pandas as pd -def summarize_data(ds): +def summarize_data(ds, diff_values, percentage_off): """Generate a summary of the dataset with min/max values for numerical variables.""" summary = {} @@ -17,24 +17,41 @@ def summarize_data(ds): "min": np.nanmin(data).item() if np.any(~np.isnan(data)) else None, "max": np.nanmax(data).item() if np.any(~np.isnan(data)) else None } - - summary["columns"] = list(ds.data_vars.keys()) # List of available variables + + # Add the new computed values to the summary + summary["Obs - Sim"] = { + "type": "number", + "min": np.nanmin(diff_values).item() if np.any(~np.isnan(diff_values)) else None, + "max": np.nanmax(diff_values).item() if np.any(~np.isnan(diff_values)) else None + } + + summary["Percentage Off"] = { + "type": "number", + "min": np.nanmin(percentage_off).item() if np.any(~np.isnan(percentage_off)) else None, + "max": np.nanmax(percentage_off).item() if np.any(~np.isnan(percentage_off)) else None + } + + summary["columns"] = list(ds.data_vars.keys()) + ["Obs - Sim", "Percentage Off"] + if 'date' in ds: time_values = ds['date'].values start_time = pd.to_datetime(time_values.min()) end_time = pd.to_datetime(time_values.max()) - + unique_dates = np.unique(time_values) + summary["time"] = { "type": "string", "start": start_time.strftime("%Y-%m-%d"), - "end": end_time.strftime("%Y-%m-%d") + "end": end_time.strftime("%Y-%m-%d"), + "value_count": len(unique_dates) } # Convert to Unix time (seconds since the Unix epoch) summary["unix_time"] = { "type": "number", "min": start_time.value // 10**9, # Convert to seconds - "max": end_time.value // 10**9 # Convert to seconds + "max": end_time.value // 10**9, # Convert to seconds + "value_count": len(unique_dates) } return summary @@ -48,6 +65,10 @@ def convert_netcdf_to_json(nc_file): obs_values = ds["QObs(mm/d)_obs"].values.flatten() sim_values = ds["QObs(mm/d)_sim"].values.flatten() + # Compute differences and percentage differences + diff_values = obs_values - sim_values + percentage_off = np.where(obs_values != 0, (diff_values / obs_values) * 100, np.nan) # Avoid division by zero + # Format time correctly and convert to UNIX time if "units" in ds["date"].attrs and "since" in ds["date"].attrs["units"]: time_origin = ds["date"].attrs["units"].split("since")[1].strip() @@ -61,11 +82,14 @@ def convert_netcdf_to_json(nc_file): # Format time as strings time_str = time.strftime("%Y-%m-%d").tolist() - # Construct rows with UNIX time - rows = [[time_str[i], int(unix_time[i]), float(obs_values[i]), float(sim_values[i])] for i in range(len(time))] + # Construct rows with the new columns + rows = [ + [time_str[i], int(unix_time[i]), float(obs_values[i]), float(sim_values[i]), float(diff_values[i]), float(percentage_off[i])] + for i in range(len(time)) + ] - # Generate summary - summary = summarize_data(ds) + # Generate summary including new values + summary = summarize_data(ds, diff_values, percentage_off) # Construct final JSON output output_json = { @@ -92,27 +116,28 @@ def process_folder(folder_path): json_output[base_filename] = [{ "name": f"{base_filename}_daily", "description": "This is a table of the daily values", - "type": f"GoogleNeuralHydrology_{base_filename}", + "type": f"GoogleNeuralHydrology", "summary": { "time": json_data["summary"].get("time", {}), "unix_time": json_data["summary"].get("unix_time", {}), "QObs(mm/d)_obs": json_data["summary"].get("QObs(mm/d)_obs", {}), "QObs(mm/d)_sim": json_data["summary"].get("QObs(mm/d)_sim", {}), + "Obs - Sim": json_data["summary"].get("Obs - Sim", {}), + "Percentage Off": json_data["summary"].get("Percentage Off", {}), }, - "header": ["time", "unix_time", "QObs(mm/d)_obs", "QObs(mm/d)_sim"], + "header": ["time", "unix_time", "QObs(mm/d)_obs", "QObs(mm/d)_sim", "Obs - Sim", "Percentage Off"], "rows": json_data["rows"] }] return json_output + @click.command() @click.argument("folder", type=click.Path(exists=True)) -@click.argument("output", type=click.Path()) +@click.argument("output", default='ncToJSONOutput.json') def convert_netcdf(folder, output): """CLI tool to convert a NetCDF (.nc) file to JSON format.""" json_data = process_folder(folder) - # Set default output file - # Save JSON with open(output, "w") as json_file: json.dump(json_data, json_file, indent=4) diff --git a/scripts/googleneuralhydrology/subSampleHruIds.py b/scripts/googleneuralhydrology/subSampleHruIds.py new file mode 100644 index 0000000..ae12707 --- /dev/null +++ b/scripts/googleneuralhydrology/subSampleHruIds.py @@ -0,0 +1,49 @@ +import json +import click + +@click.command() +@click.argument('geojson_file', type=click.Path(exists=True)) +@click.argument('id_list_file', type=click.Path(exists=True)) +@click.option('--output', default='matching_hru_ids.geojson', help='Output GeoJSON filename') +def filter_geojson(geojson_file, id_list_file, output): + """Filter features from a GEOJSON file based on matching HRU IDs from a JSON array.""" + + # Load the list of HRU IDs + with open(id_list_file, 'r') as f: + try: + hru_ids = {str(id).zfill(8) for id in json.load(f)} # Convert values to a set of zero-padded strings + except (ValueError, TypeError) as e: + click.echo(f"Error reading ID list file: {e}") + return + + # Load the GeoJSON file + with open(geojson_file, 'r') as f: + geojson_data = json.load(f) + + if 'features' not in geojson_data: + click.echo("Invalid GeoJSON file: No 'features' key found.") + return + + # Filter features based on matching hru_id, ensuring zero-padding + filtered_features = [] + for feature in geojson_data['features']: + if 'properties' in feature and 'hru_id' in feature['properties']: + hru_id = str(feature['properties']['hru_id']).zfill(8) # Ensure 8-digit format + if hru_id in hru_ids: + feature['properties']['hru_id'] = hru_id # Update the feature to ensure zero-padding + filtered_features.append(feature) + + # Create new GeoJSON structure + filtered_geojson = { + "type": "FeatureCollection", + "features": filtered_features + } + + # Write the output file + with open(output, 'w') as f: + json.dump(filtered_geojson, f, indent=2) + + click.echo(f"Filtered GeoJSON saved to {output}") + +if __name__ == '__main__': + filter_geojson() diff --git a/uvdat/core/rest/vector_feature_table_data.py b/uvdat/core/rest/vector_feature_table_data.py index 8739940..40899f4 100644 --- a/uvdat/core/rest/vector_feature_table_data.py +++ b/uvdat/core/rest/vector_feature_table_data.py @@ -106,7 +106,7 @@ def table_summary(self, request, *args, **kwargs): # Aggregate summary statistics per column for column, stats_col in table_summary.items(): if 'type' not in column_summaries[table_type][column]: - column_summaries[table_type][column]['type'] = stats.get('type') + column_summaries[table_type][column]['type'] = stats_col.get('type') if stats_col.get('type') == 'number': column_summaries[table_type][column]['min'] = min( @@ -129,10 +129,10 @@ def table_summary(self, request, *args, **kwargs): ) column_summaries[table_type][column]['value_count'] = column_summaries[ table_type - ][column].get('value_count', 0) + stats.get('value_count', 0) + ][column].get('value_count', 0) + stats_col.get('value_count', 0) if stats_col.get('description', None): - column_summaries[table_type][column]['description'] = stats.get( + column_summaries[table_type][column]['description'] = stats_col.get( 'description', 'Unknown' ) # Construct the response diff --git a/uvdat/core/tasks/map_layers.py b/uvdat/core/tasks/map_layers.py index 701b584..fc8b0e2 100644 --- a/uvdat/core/tasks/map_layers.py +++ b/uvdat/core/tasks/map_layers.py @@ -646,7 +646,7 @@ def process_tabular_vector_feature_data(map_layer_id, json_data, matcher): # Ensure the matcher value exists in json_data if matcher_value not in json_data: logger.info( - f'Could not find a match for the property {matcher} in vectorFeature properties' + f'Could not find a match for the property {matcher} : {type(matcher)} in vectorFeature properties with match_value: {matcher_value}: {type(matcher_value)}' ) logger.info(feature.properties) continue From 84b64c2b7d5a605f8cb37f32a4523798caa92810 Mon Sep 17 00:00:00 2001 From: Bryon Lewis Date: Thu, 27 Mar 2025 13:14:55 -0400 Subject: [PATCH 04/10] google hydrology functionality --- client/src/components/MapLegends/ColorKey.vue | 7 +- sample_data/google.json | 26 +- scripts/googleneuralhydrology/README.md | 30 ++ .../addStatsToGeoJSON.py | 60 ++++ .../googleneuralhydrology/combineRowData.py | 16 +- .../createAndUploadInputJSON.py | 4 +- .../googleneuralhydrology/flatJsonToJson.py | 34 ++ .../googleneuralhydrology/getDailyStats.py | 313 ++++++------------ scripts/googleneuralhydrology/ncToJson.py | 55 ++- .../googleneuralhydrology/subSampleHruIds.py | 110 +++++- uvdat/core/rest/vector_feature_table_data.py | 2 +- 11 files changed, 398 insertions(+), 259 deletions(-) create mode 100644 scripts/googleneuralhydrology/README.md create mode 100644 scripts/googleneuralhydrology/addStatsToGeoJSON.py create mode 100644 scripts/googleneuralhydrology/flatJsonToJson.py mode change 100755 => 100644 scripts/googleneuralhydrology/getDailyStats.py diff --git a/client/src/components/MapLegends/ColorKey.vue b/client/src/components/MapLegends/ColorKey.vue index 724448f..974f6d4 100644 --- a/client/src/components/MapLegends/ColorKey.vue +++ b/client/src/components/MapLegends/ColorKey.vue @@ -269,8 +269,11 @@ export default defineComponent({ // D3 allows color strings but says it requires numbers for type definitions .range(colors); // Recalculate percentage of width for gradient - const max = domain[domain.length - 1]; - const percent = domain.map((item) => (max === 0 ? 0 : item / max)); + const min = Math.min(...domain); + const max = Math.max(...domain); + const range = max - min; + + const percent = domain.map((item) => (range === 0 ? 0 : (item - min) / range)); // Append multiple color stops using data/enter step linearGradient.selectAll('stop').remove(); linearGradient diff --git a/sample_data/google.json b/sample_data/google.json index 0e3efea..495dbf5 100644 --- a/sample_data/google.json +++ b/sample_data/google.json @@ -15,15 +15,15 @@ "metadata": {}, "files": [ { - "name": "Google Eural Hydrology Data", - "path": "./data/GoogleNeuralHydrology/matching_hru_ids.geojson", - "url": "https://data.kitware.com/api/v1/file/67e175bf3e5f3e5e96b97512/download", + "name": "Google Neural Hydrology Data", + "path": "./data/GoogleNeuralHydrology/CAMEL_USGS_Stats.geojson", + "url": "https://data.kitware.com/api/v1/file/67e579ae3e5f3e5e96b97cb5/download", "type": "geojson", "metadata": { "tabular": { "path": "./data/GoogleNeuralHydrology/combined.json", "name": "Google Neural Hydrology Tabular Data", - "url": "https://data.kitware.com/api/v1/file/67e175c03e5f3e5e96b97515/download", + "url": "https://data.kitware.com/api/v1/file/67e579d63e5f3e5e96b97cb8/download", "featurePropertyMatcher": "hru_id" }, "default_style": { @@ -89,15 +89,6 @@ "indexer": "hru_id", "xAxisLabel": "Date", "yAxisLabel": "Percentage Off" - }, - { - "name": "00060_Mean", - "type": "USGS_gauge_daily_00060", - "xAxis": "unix_time", - "yAxis": "00060_Mean", - "indexer": "hru_id", - "xAxisLabel": "Date", - "yAxisLabel": "00060_Mean" } ], "mapLayerFeatureTableGraphs": [ @@ -136,15 +127,6 @@ "indexer": "hru_id", "xAxisLabel": "Date", "yAxisLabel": "Percentage Off" - }, - { - "name": "00060_Mean", - "type": "USGS_gauge_daily_00060", - "xAxis": "unix_time", - "yAxis": "00060_Mean", - "indexer": "hru_id", - "xAxisLabel": "Date", - "yAxisLabel": "00060_Mean" } ] } diff --git a/scripts/googleneuralhydrology/README.md b/scripts/googleneuralhydrology/README.md new file mode 100644 index 0000000..63ebac6 --- /dev/null +++ b/scripts/googleneuralhydrology/README.md @@ -0,0 +1,30 @@ +# Google Neural Hydrology Processing + +## extractHruIds.py + +This when given a geojson with properties.hru_id will extract all of the HRUIds from the geojson and make sure they are 8 digit codes and place them into a single list. +This was used to create a small sample that could be used to run testing data on + +## runExperiment.py + +`python runExperiment.py hru_ids.json` + +Requires the `run_demo.sh` script as well as properly downloading the docker and relevant data for running Google Neural Hydrology. +This file will go through the hru_ids.json and will run the experiments on each one and then extract the resulting .nc file and name it to the hru_id for further processing + +# subSampleHruIds.py + +`python subSampleHruIds.py camelsData.geojson hru_ids.json` + +Takes the base camerlsData.geojson with all hru_ids and updates the hru_ids to be an 8 digit code with leading zeros. I've also updated this to gather the USGS gauge data and add it as a point as well as transfer some information betwene the two Features in the geojson such as Name and other information. +Outputs a new geojson file 'matching_hru_ids.geojson' as well as HRU_AREA_MAP.json. The second file is used for converting USGS daily calculations from cuft/s to mm/d values. + +# nctoJson.py + +`python nsToJson.py ./folder` + +Takes all *.nc files from the runExperiment.py results and converts them into row data that can be used by DIVERS-H. It als creates a nc_stat_mapping tool so that the stats can be added to the output geojson from subSampleHruIds.py. Results by default in a ncToJSONOutput.json as well as a nc_stat_mapping.json file + +# addStatsToGeoJSON.py + +Takes the output from nctoJson.py (nc_stat_mapping.json) and adds this information to the output from subSampleHruIds.py so that way sample data can be \ No newline at end of file diff --git a/scripts/googleneuralhydrology/addStatsToGeoJSON.py b/scripts/googleneuralhydrology/addStatsToGeoJSON.py new file mode 100644 index 0000000..e3c137a --- /dev/null +++ b/scripts/googleneuralhydrology/addStatsToGeoJSON.py @@ -0,0 +1,60 @@ +import click +import json +import geojson +from collections import defaultdict + +def flatten_dict(d, parent_key='', sep='.', result=None): + """Flattens a nested dictionary.""" + if result is None: + result = {} + for key, value in d.items(): + new_key = f"{parent_key}{sep}{key}" if parent_key else key + if isinstance(value, dict): + flatten_dict(value, new_key, sep=sep, result=result) + else: + result[new_key] = value + return result + +def process_geojson(geojson_file, json_file, output_file): + """Process GeoJSON and JSON to combine data based on hru_id.""" + + # Load the geojson file + with open(geojson_file, 'r') as f: + geo_data = geojson.load(f) + + # Load the JSON file with HRU stats + with open(json_file, 'r') as f: + stats_data = json.load(f) + + # Iterate over the features in the GeoJSON file + for feature in geo_data['features']: + hru_id = feature['properties'].get('hru_id') + + if hru_id and str(hru_id) in stats_data: + stats = stats_data[str(hru_id)] + + # Flatten the statistics for 'Obs-Sim' and 'Percentage_Off' + flattened_stats = {} + for category in ['Obs-Sim', 'Percentage_Off']: + if category in stats: + flattened_stats.update(flatten_dict(stats[category], parent_key=category)) + + # Add the flattened stats to the GeoJSON feature properties + feature['properties'].update(flattened_stats) + + # Save the modified GeoJSON to the output file + with open(output_file, 'w') as f: + geojson.dump(geo_data, f, separators=(',', ':')) + + click.echo(f"Processed GeoJSON saved to {output_file}") + +@click.command() +@click.argument('geojson_file', default='matching_hru_ids.geojson', type=click.Path(exists=True)) +@click.argument('json_file', default='nc_stat_mapping.json', type=click.Path(exists=True)) +@click.argument('output_file', default='CAMEL_USGS_Stats.geojson', type=click.Path()) +def process_files(geojson_file, json_file, output_file): + """CLI tool to merge HRU stats from a JSON file into a GeoJSON file.""" + process_geojson(geojson_file, json_file, output_file) + +if __name__ == "__main__": + process_files() diff --git a/scripts/googleneuralhydrology/combineRowData.py b/scripts/googleneuralhydrology/combineRowData.py index 7aa4141..4798681 100644 --- a/scripts/googleneuralhydrology/combineRowData.py +++ b/scripts/googleneuralhydrology/combineRowData.py @@ -21,17 +21,21 @@ def combine_json(json_file1, json_file2, output): all_keys = set(data1.keys()).union(data2.keys()) for key in all_keys: - array1 = data1.get(key, []) - array2 = data2.get(key, []) + value1 = data1.get(key, None) + value2 = data2.get(key, None) - if isinstance(array1, list) and isinstance(array2, list): - combined_data[key] = array1 + array2 + if isinstance(value1, list) and isinstance(value2, list): + combined_data[key] = value1 + value2 + elif isinstance(value1, list): + combined_data[key] = value1 + elif isinstance(value2, list): + combined_data[key] = value2 else: - raise ValueError(f"Values for key '{key}' are not both arrays.") + ValueError('Key is not valid in either data') # Save combined JSON with open(output, "w") as f: - json.dump(combined_data, f, indent=4) + json.dump(combined_data, f, separators=(',', ':')) click.echo(f"Combined JSON saved to {output}") diff --git a/scripts/googleneuralhydrology/createAndUploadInputJSON.py b/scripts/googleneuralhydrology/createAndUploadInputJSON.py index f9bb03c..a8f0510 100644 --- a/scripts/googleneuralhydrology/createAndUploadInputJSON.py +++ b/scripts/googleneuralhydrology/createAndUploadInputJSON.py @@ -6,7 +6,7 @@ from girder_client import GirderClient import matplotlib.pyplot as plt -baseApiKey = 'OxhoPyCrzkGsnQiiRYZm3BgTlPhscKvILVyRdasT' +baseApiKey = 'ePEu0WAw7o7DCAzCR3xdSgAmtAOReWrUOIUCiCHC' def authenticate(client: GirderClient): @@ -97,7 +97,7 @@ def upload_data(client: GirderClient, remote_folder_id, base_geojson, base_tab_j default_style['mapLayerFeatureTableGraphs'] = mapLayerFeatureTableGraphs print(default_style['mapLayerFeatureTableGraphs']) file_metadata.append({ - 'name': f'Google Eural Hydrology Data', + 'name': f'Google Neural Hydrology Data', 'path': f'./data/GoogleNeuralHydrology/{base_geojson}', 'url': geojson_url, 'type': 'geojson', diff --git a/scripts/googleneuralhydrology/flatJsonToJson.py b/scripts/googleneuralhydrology/flatJsonToJson.py new file mode 100644 index 0000000..a7f601f --- /dev/null +++ b/scripts/googleneuralhydrology/flatJsonToJson.py @@ -0,0 +1,34 @@ +import json +import click + +@click.command() +@click.argument('input', required=True, type=click.Path(exists=True)) +@click.argument('output', required=True, type=click.Path()) +def convert_file(input, output): + """Convert a line-by-line JSON file to a single JSON object.""" + result = {} + + # Read the input file line by line + with open(input, 'r') as infile: + for line in infile: + try: + # Parse the JSON object from the line + data = json.loads(line.strip()) + # Extract the 8-digit number from the "name" field + name = data.get("name", "") + site_number = name.split('_')[0] if name else None + + if site_number and len(site_number) == 8: # Check if the site_number is valid (8 digits) + # Add the data to the result dictionary with the 8-digit number as the key + result[site_number] = {k: v for k, v in data.items() if k != "name"} # Exclude "name" field + except json.JSONDecodeError: + print(f"Skipping invalid JSON line: {line.strip()}") + + # Write the result to the output file in a dense JSON format (no indentation or newlines) + with open(output, 'w') as outfile: + json.dump(result, outfile, separators=(',', ':')) + + print(f"Conversion complete. Output written to {output}") + +if __name__ == '__main__': + convert_file() diff --git a/scripts/googleneuralhydrology/getDailyStats.py b/scripts/googleneuralhydrology/getDailyStats.py old mode 100755 new mode 100644 index 79d7f62..0221338 --- a/scripts/googleneuralhydrology/getDailyStats.py +++ b/scripts/googleneuralhydrology/getDailyStats.py @@ -7,6 +7,7 @@ import time import calendar import datetime +import os def get_unix_time(row, year_index, month_index, day_index): if year_index is not None: @@ -27,26 +28,37 @@ def get_unix_time(row, year_index, month_index, day_index): day = 1 return calendar.timegm(time.strptime(f"{year}-{month}-{day}", "%Y-%m-%d")) -# Define the function for fetching and formatting data + @click.command() @click.option('-p', '--param-codes', default=['00060'], multiple=True, required=True, help="List of parameter codes to query.") @click.option('--input', required=True, type=click.Path(exists=True), help="JSON file containing an array of site numbers.") @click.option('--start-date', default='1989-10-01', help="Start date for the data query.") @click.option('--end-date', default='1999-09-30', help="End date for the data query.") -@click.option('--output', default='dailyStats.json', help="Output JSON file where the results will be saved.") +@click.option('--output', default='./dailyStats.json', help="Output JSON file where the results will be saved.") @click.option('--usgs-parameters', default='../nwis/USGSParameters.tsv', type=click.Path(exists=True), help="Path to the USGSParameters.tsv file.") -def fetch_data(param_codes, input, start_date, end_date, output, usgs_parameters): +@click.option('--area-mapping', default='./HRU_AREA_MAP.json', type=click.Path(exists=True), help="Path to the HRU area mapping file.") +def fetch_data(param_codes, input, start_date, end_date, output, usgs_parameters, area_mapping): """Fetch data from NWIS and save it in a JSON format with descriptions for each table.""" + # Load the USGS parameters file usgs_df = pd.read_csv(usgs_parameters, sep='\t', comment='#') - # Create a dictionary mapping parameter codes to their descriptions param_desc = dict(zip(usgs_df['parm_cd'], usgs_df['parm_nm'])) - # Load geojson file with open(input, 'r') as f: site_loaded = json.load(f) # Expecting a JSON array of site numbers + with open(area_mapping, 'r') as f: + area_map = json.load(f) # Expecting a JSON array of site numbers + site_numbers = [str(item) for item in site_loaded] + + if os.path.exists(output): + with open(output, 'r') as f: + processed_sites = json.load(f) # Expecting a JSON array of site numbers + + loaded_sites = processed_sites.keys() + site_numbers = [site for site in site_numbers if site not in loaded_sites] + # Split the site numbers into chunks of 10 (because NWIS allows a max of 10 sites per request) def split_list(l): n = 10 @@ -55,215 +67,102 @@ def split_list(l): site_lists = list(split_list(site_numbers)) - # Prepare the result container - result = {} + # Open output file in append mode + with open(output, 'a') as f: + # Fetch data for each site and each report type (daily) + report_types = ['daily'] + for report_type in report_types: + for i, site_list in enumerate(site_lists): + try: + response = nwis.get_dv( + sites=site_list, + start=start_date, + end=end_date, + parameterCd=",".join(param_codes), + ) + df, meta = response + except Exception as e: + print(f"Error fetching {report_type} data for sites {site_list}: {e}") + continue + + # Prepare data for writing + site_data = process_site_data(df, meta, area_map, site_list, param_codes, param_desc) + + # Append processed data to file incrementally + with open(output, 'a') as f: + for entry in site_data: + json_str = json.dumps(entry, default=lambda x: None if isinstance(x, float) and np.isnan(x) else x, separators=(',', ':')) + f.write(f"{json_str}\n") # Write each object on a single line, each key on a new line + + print(f"Fetched {report_type} data for {len(site_list)} sites.") + + print(f"Results saved to {output}.") - # Fetch data for each site and each report type (monthly, daily, annual) - report_types = ['daily'] # Assuming you want monthly, daily, and annual reports +def process_site_data(df, meta, area_map, site_list, param_codes, param_desc): + """Process the data for a single site list.""" + valid_index_tuples = [] + for site, dt in df.index: + try: + valid_dt = pd.to_datetime(dt, errors='raise') # Raise an error for invalid dates + valid_index_tuples.append((site, valid_dt)) + except Exception as e: + print(f"Skipping site {site} due to invalid date: {dt} ({e})") + + if valid_index_tuples: + df.index = pd.MultiIndex.from_tuples(valid_index_tuples, names=['site_number', 'datetime']) + else: + print("No valid sites found due to date errors. Skipping further processing.") + return [] + + # Add additional columns and process data + datetime_index = df.index.get_level_values("datetime") + df["date_str"] = datetime_index.strftime("%Y-%m-%d") + df["year_nu"] = datetime_index.year + df["month_nu"] = datetime_index.month + df["day_nu"] = datetime_index.day + df["unix_time"] = datetime_index.astype(int) // 10**9 + df["site_no"] = df.index.get_level_values("site_number") + + # Prepare and append the data for each site + site_data = [] + endsWith = ['date_str', 'year_nu', 'month_nu', 'day_nu', 'unix_time', '00060_Mean'] + for site_number in site_list: + site_data_frame = df[df['site_no'] == site_number] + area = area_map.get(str(site_number)) + + if not site_data_frame.empty: + site_data_frame = site_data_frame.drop(columns=['site_no', 'loc_web_ds'], errors='ignore') + param_headers = [col for col in site_data_frame.columns if col.endswith('_Mean')] + columns = site_data_frame.columns.tolist() + for col in columns: + if col not in endsWith: + site_data_frame = site_data_frame.drop(columns=[col]) + param_names = [param_desc.get(code.replace('_Mean', ''), 'Unknown parameter') for code in param_headers] + + site_data_frame = site_data_frame.where(pd.notna(site_data_frame), 0) + + # Prepare table object for the site + table_object = { + "name": f"{site_number}_daily", + "description": "This is a table of mean daily values for USGS gauges converted to mm/d for the whole watershed", + "type": f'USGS_gauge_daily_{"_".join(param_codes)}', + "header": site_data_frame.columns.tolist(), + "summary": generate_summary(site_data_frame, param_desc, site_data_frame.columns.tolist()), + "rows": site_data_frame.values.tolist(), + } + site_data.append(table_object) - for report_type in report_types: - for i, site_list in enumerate(site_lists): - try: - response = nwis.get_dv( - sites=site_list, - start=start_date, - end=end_date, - parameterCd=",".join(param_codes), - ) - df, meta = response - except Exception as e: - print(f"Error fetching {report_type} data for sites {site_list}: {e}") - continue - df, meta = response - - # Replace NaN values with empty strings - print(df.index) - df.index = pd.MultiIndex.from_tuples([(site, pd.to_datetime(dt)) for site, dt in df.index], - names=['site_number', 'datetime']) - - # Extract the datetime level from the MultiIndex - datetime_index = df.index.get_level_values("datetime") - - # Add new columns - df["date_str"] = datetime_index.strftime("%Y-%m-%d") # Convert to 'YYYY-MM-DD' string - df["year_nu"] = datetime_index.year # Extract year - df["month_nu"] = datetime_index.month # Extract month - df["day_nu"] = datetime_index.day # Extract day - df["unix_time"] = datetime_index.astype(int) // 10**9 # Convert to Unix timestamp - df["site_no"] = df.index.get_level_values("site_number") - - - - # Add the data to the result dictionary with site_number as the key - for site_number in site_list: - site_data = df[df['site_no'] == site_number] - if not site_data.empty: - # Remove the 'ts_id' column if it exists - site_data = site_data.drop(columns=['site_no', 'loc_web_ds'], errors='ignore') - - # Create a description of the parameters - param_headers = [] - header = site_data.columns.tolist() - for item in header: - print(item) - if item.endswith('_Mean_cd'): - site_data = site_data.drop(columns=[item], errors='ignore') - header.remove(item) - if item.endswith('_Mean'): - param_headers.append(item) - unique_param_codes = [item.replace('_Mean', '') for item in param_headers] - param_names = [param_desc.get(code, 'Unknown parameter') for code in unique_param_codes] - description = f"This is a table of the mean {report_type} values for the following parameters: {', '.join([f'{code} - {name}' for code, name in zip(unique_param_codes, param_names)])}" - + return site_data - # Prepare table object - rows = site_data.values.tolist() - - table_object = { - "name": f"{site_number}_{report_type}", - "description": description, - "type": f'USGS_gauge_{report_type}_{"_".join(param_codes)}', - "header": header, - "summary": generate_summary(site_data, param_desc, site_data.columns.tolist()), - "rows": rows, - } - if site_number not in result: - result[site_number] = [] - - result[site_number].append(table_object) - - print(f"Fetched {report_type} data for {len(site_list)} sites.") - - # Save the result to the output file as JSON - with open(output, 'w') as f: - json.dump(result, f, indent=4) - - print(f"Results saved to {output}.") - -limit = 100 def generate_summary(df, param_desc, rows): - """Generate a summary object for the table with column type and stats, focusing on unique parameter_cd.""" + """Generate a summary object for the table.""" summary = {} for header in rows: - if header.endswith('_Mean'): - if header not in summary.keys(): - summary[header] = {'type':'parameter_cd'} - parameter_cd = header.replace('_Mean', '') - param_data = df[df[header] == header] - value_col = df[header] - summary[header][parameter_cd] = { - "parameter_cd": parameter_cd, - "parameter_name": param_desc.get(parameter_cd, 'Unknown parameter'), - "min": float(value_col.min()), - "max": float(value_col.max()), - "mean": float(value_col.mean()) - } - # Iterate over each unique parameter_cd - elif header == 'parameter_cd': - if header not in summary.keys(): - summary[header] = {'type':'parameter_cd'} - for parameter_cd in df['parameter_cd'].unique(): - param_data = df[df['parameter_cd'] == parameter_cd] - - # Assuming the 'value' column contains the actual data values - value_col = param_data['mean_va'] - - # Calculate the min, max, and mean for each parameter_cd - summary[header][parameter_cd] = { - "parameter_cd": parameter_cd, - "parameter_name": param_desc.get(parameter_cd, 'Unknown parameter'), - "min": float(value_col.min()), - "max": float(value_col.max()), - "mean": float(value_col.mean()) - } - else: # Calculate type/min/max and other fields - - if header not in summary.keys(): - summary[header] = {'type': None, 'values': set(), 'value_count': 0} - parameter_cd = param_desc.get(header, None) - if parameter_cd: - summary[header]["description"] = parameter_cd[0] if isinstance(parameter_cd, tuple) else parameter_cd - for value in df[header].unique(): - if isinstance(value, bool): - summary[header]['type'] = 'bool' - summary[header]['value_count'] += 1 - elif isinstance(value, (int, float, np.float64, np.int32, np.int64)): - summary[header]['type'] = 'number' - summary[header]['value_count'] += 1 - if 'min' not in summary[header] or value < summary[header]['min']: - if np.isnan(float(value)) or value is None and summary[header].get('min', None) is None: - summary[header]['min'] = float('inf') - else: - summary[header]['min'] = float(value) - if 'max' not in summary[header] or value > summary[header]['max']: - if np.isnan(float(value)) or value is None and summary[header].get('max', None) is None: - summary[header]['max'] = float('-inf') - else: - summary[header]['max'] = float(value) - elif isinstance(value, str): - if 'values' not in summary[header]: - summary[header]['values'] = set() - summary[header]['value_count'] += 1 - summary[header]['type'] = 'string' - summary[header]['values'].add(value) - for header in summary.keys(): - if summary[header]['type'] is None: - summary[header]['type'] = 'unknown' - del summary[header]['values'] - continue - if summary[header]['type'] == 'number': - if summary[header]['value_count'] == 1: - summary[header]['values'] = summary[header].get('min', summary[header].get('max')) - del summary[header]['min'] - del summary[header]['max'] - elif summary[header]['min'] == summary[header]['max']: - val = summary[header]['min'] - del summary[header]['values'] - del summary[header]['min'] - del summary[header]['max'] - summary[header]['static'] = True - summary[header]['value'] = val - else: - if np.isnan(summary[header]['min']): - summary[header]['min'] = None - if np.isnan(summary[header]['max']): - summary[header]['max'] = None - del summary[header]['values'] - elif ( - summary[header]['type'] == 'string' - and 'values' in summary[header] - and not summary[header].get('searchable') - ): - summary[header]['values'] = sorted(summary[header]['values']) - if len(summary[header]['values']) >= limit: - summary[header]['searchable'] = True - summary[header]['unique'] = len(summary[header]['values']) - del summary[header]['values'] - elif summary[header]['type'] == 'bool': - del summary[header]['values'] - check_json_validity(summary) + if header.endswith('00060_Mean'): + summary[header] = {'type': 'parameter_cd', 'parameter_name': 'USGS cuft/s converted to mm/d for CAMELS data'} + elif header.endswith('_Mean'): + summary[header] = {'type': 'parameter_cd', 'parameter_name': param_desc.get(header.replace('_Mean', ''), 'Unknown parameter')} return summary -def check_json_validity(obj, path="root"): - valid_types = (str, int, float, bool, type(None), list, dict) - - if isinstance(obj, (np.float64, np.float128, np.int64, np.int32)): - print(f"Invalid type at {path}: {type(obj).__name__} (Value: {obj})") - return False - - if isinstance(obj, dict): - for key, value in obj.items(): - check_json_validity(value, path=f"{path}.{key}") - - elif isinstance(obj, list): - for idx, item in enumerate(obj): - check_json_validity(item, path=f"{path}[{idx}]") - - elif not isinstance(obj, valid_types): - print(f"Invalid type at {path}: {type(obj).__name__} (Value: {obj})") - return False - - if __name__ == '__main__': fetch_data() - diff --git a/scripts/googleneuralhydrology/ncToJson.py b/scripts/googleneuralhydrology/ncToJson.py index d1bfb90..480d401 100644 --- a/scripts/googleneuralhydrology/ncToJson.py +++ b/scripts/googleneuralhydrology/ncToJson.py @@ -56,18 +56,35 @@ def summarize_data(ds, diff_values, percentage_off): return summary +def calculate_stats_per_site(site_data): + """Calculate the statistics (min, max, mean, median) per site for 'Obs - Sim' and 'Percentage Off'.""" + stats = {} + stats["Obs-Sim"] = { + "min": float(np.nanmin(site_data["Obs - Sim"])), + "max": float(np.nanmax(site_data["Obs - Sim"])), + "mean": float(np.nanmean(site_data["Obs - Sim"])), + "median": float(np.nanmedian(site_data["Obs - Sim"])), + } + stats["Percentage_Off"] = { + "min": float(np.nanmin(site_data["Percentage Off"])), + "max": float(np.nanmax(site_data["Percentage Off"])), + "mean": float(np.nanmean(site_data["Percentage Off"])), + "median": float(np.nanmedian(site_data["Percentage Off"])), + } + return stats + def convert_netcdf_to_json(nc_file): """Load NetCDF file and convert it to JSON format.""" ds = xr.open_dataset(nc_file, engine="netcdf4") # Open NetCDF file # Extract key variables time = ds["date"].values # Extract dates - obs_values = ds["QObs(mm/d)_obs"].values.flatten() - sim_values = ds["QObs(mm/d)_sim"].values.flatten() + obs_values = np.nan_to_num(ds["QObs(mm/d)_obs"].values.flatten(), nan=0.0) + sim_values = np.nan_to_num(ds["QObs(mm/d)_sim"].values.flatten(), nan=0.0) # Compute differences and percentage differences diff_values = obs_values - sim_values - percentage_off = np.where(obs_values != 0, (diff_values / obs_values) * 100, np.nan) # Avoid division by zero + percentage_off = np.where(obs_values != 0, (diff_values / obs_values) * 100, 0) # Avoid division by zero # Format time correctly and convert to UNIX time if "units" in ds["date"].attrs and "since" in ds["date"].attrs["units"]: @@ -97,11 +114,12 @@ def convert_netcdf_to_json(nc_file): "rows": rows } - return output_json + return output_json, diff_values, percentage_off def process_folder(folder_path): """Process all .nc files in a folder and generate a combined JSON object.""" json_output = {} + nc_stat_mapping = {} # Loop through all .nc files in the folder for filename in os.listdir(folder_path): @@ -110,7 +128,7 @@ def process_folder(folder_path): nc_file = os.path.join(folder_path, filename) # Convert NetCDF to JSON format - json_data = convert_netcdf_to_json(nc_file) + json_data, diff_values, percentage_off = convert_netcdf_to_json(nc_file) # Create the structure for each file json_output[base_filename] = [{ @@ -129,20 +147,35 @@ def process_folder(folder_path): "rows": json_data["rows"] }] - return json_output + # Calculate site statistics + site_data = pd.DataFrame({ + "Obs - Sim": diff_values, + "Percentage Off": percentage_off + }) + + site_stats = calculate_stats_per_site(site_data) + site_number = base_filename # Assuming the site number is embedded in the file name (you can adjust this part) + nc_stat_mapping[site_number] = site_stats + + return json_output, nc_stat_mapping @click.command() @click.argument("folder", type=click.Path(exists=True)) @click.argument("output", default='ncToJSONOutput.json') -def convert_netcdf(folder, output): +@click.argument("stat_output", default='nc_stat_mapping.json') +def convert_netcdf(folder, output, stat_output): """CLI tool to convert a NetCDF (.nc) file to JSON format.""" - json_data = process_folder(folder) + json_data, nc_stat_mapping_data = process_folder(folder) - # Save JSON + # Save the main JSON output with open(output, "w") as json_file: - json.dump(json_data, json_file, indent=4) + json.dump(json_data, json_file, separators=(',', ':')) + + # Save the secondary JSON output for site statistics + with open(stat_output, "w") as stat_file: + json.dump(nc_stat_mapping_data, stat_file, separators=(',', ':')) - click.echo(f"Converted {folder} to {output}") + click.echo(f"Converted {folder} to {output} and {stat_output}") if __name__ == "__main__": convert_netcdf() diff --git a/scripts/googleneuralhydrology/subSampleHruIds.py b/scripts/googleneuralhydrology/subSampleHruIds.py index ae12707..335296f 100644 --- a/scripts/googleneuralhydrology/subSampleHruIds.py +++ b/scripts/googleneuralhydrology/subSampleHruIds.py @@ -1,17 +1,84 @@ import json import click +import dataretrieval.nwis as nwis +import pandas as pd +import geopandas as gpd +import us + +COUNTY_SHP_URL = "https://www2.census.gov/geo/tiger/TIGER2022/COUNTY/tl_2022_us_county.zip" +counties = gpd.read_file(COUNTY_SHP_URL)[["STATEFP", "COUNTYFP", "NAME"]] + +site_types_df = pd.read_csv('../nwis/SiteTypes.tsv', sep='\t') +site_type_map = site_types_df.set_index('Name')['Long name'].to_dict() + +def split_list(l, n=10): + """Split a list into chunks of size n.""" + for i in range(0, len(l), n): + yield l[i:i + n] + +def get_county_name(state_cd, county_cd): + """Convert state and county FIPS codes to a county name.""" + state_cd = str(state_cd).zfill(2) # Ensure it's two-digit + county_cd = str(county_cd).zfill(3) # Ensure it's three-digit + match = counties[(counties["STATEFP"] == state_cd) & (counties["COUNTYFP"] == county_cd)] + + return match.iloc[0]["NAME"] if not match.empty else None + +def get_state_abbr(state_cd): + state = us.states.lookup(str(state_cd)) + return state.abbr if state else None + +def get_usgs_gauge_data(hru_ids): + """Retrieve USGS gauge information for a list of HRU IDs in batches.""" + usgs_data = {} + + for site_batch in split_list(hru_ids, 10): # Process in batches of 10 + try: + response = nwis.get_info(sites=site_batch) + sites, meta = response + if sites is None or "site_no" not in sites: + continue + + # Convert all values to JSON-serializable types + sites = sites.map(lambda x: x.item() if isinstance(x, (pd.Int64Dtype, pd.Float64Dtype, pd.Timestamp)) else x) + + for _, row in sites.iterrows(): + site_id = row.get("site_no") + elevation = row.get("alt_va") + + # Replace NaN elevation values with -1 + if pd.isna(elevation): + elevation = -1 + + usgs_data[site_id] = { + "usgs_site_id": site_id, + "name": row.get("station_nm"), + "latitude": row.get("dec_lat_va"), + "longitude": row.get("dec_long_va"), + "site_type": site_type_map.get(row.get("site_tp_cd"), 'Unknown'), + "agency": row.get("agency_cd"), + "state": get_state_abbr(str(row.get("state_cd")).zfill(2)), + "county": get_county_name(str(row.get("state_cd")).zfill(2), str(row.get("county_cd")).zfill(3)), + "elevation": elevation, + } + except Exception as e: + click.echo(f"Error retrieving USGS gauge data: {e}") + continue + + return usgs_data @click.command() @click.argument('geojson_file', type=click.Path(exists=True)) @click.argument('id_list_file', type=click.Path(exists=True)) @click.option('--output', default='matching_hru_ids.geojson', help='Output GeoJSON filename') -def filter_geojson(geojson_file, id_list_file, output): - """Filter features from a GEOJSON file based on matching HRU IDs from a JSON array.""" +@click.option('--area_output', default='HRU_AREA_MAP.json', help='Output JSON filename for HRU area mapping') +def filter_geojson(geojson_file, id_list_file, output, area_output): + """Filter features from a GEOJSON file based on matching HRU IDs and add USGS gauge data.""" # Load the list of HRU IDs with open(id_list_file, 'r') as f: try: - hru_ids = {str(id).zfill(8) for id in json.load(f)} # Convert values to a set of zero-padded strings + hru_ids = {str(id).zfill(8) for id in json.load(f)} # Ensure zero-padded strings except (ValueError, TypeError) as e: click.echo(f"Error reading ID list file: {e}") return @@ -24,14 +91,37 @@ def filter_geojson(geojson_file, id_list_file, output): click.echo("Invalid GeoJSON file: No 'features' key found.") return - # Filter features based on matching hru_id, ensuring zero-padding + # Fetch USGS data for all HRU IDs in batches + usgs_data = get_usgs_gauge_data(list(hru_ids)) + + # Filter features and merge USGS data filtered_features = [] + hru_area_map = {} # Dictionary to store HRU ID -> AREA mapping + for feature in geojson_data['features']: if 'properties' in feature and 'hru_id' in feature['properties']: - hru_id = str(feature['properties']['hru_id']).zfill(8) # Ensure 8-digit format + hru_id = str(feature['properties']['hru_id']).zfill(8) if hru_id in hru_ids: - feature['properties']['hru_id'] = hru_id # Update the feature to ensure zero-padding - filtered_features.append(feature) + feature['properties']['hru_id'] = hru_id # Ensure consistent formatting + + # Store AREA property in the HRU area map + if 'AREA' in feature['properties']: + hru_area_map[hru_id] = feature['properties']['AREA'] + + # Add a new point feature for USGS gauge data if available + if hru_id in usgs_data: + gauge_info = usgs_data[hru_id] + point_feature = { + "type": "Feature", + "geometry": { + "type": "Point", + "coordinates": [gauge_info["longitude"], gauge_info["latitude"]] + }, + "properties": gauge_info + } + feature['properties']['Name'] = gauge_info['name'] + filtered_features.append(point_feature) + filtered_features.append(feature) # Keep original polygon # Create new GeoJSON structure filtered_geojson = { @@ -39,11 +129,15 @@ def filter_geojson(geojson_file, id_list_file, output): "features": filtered_features } - # Write the output file + # Write the output files with open(output, 'w') as f: json.dump(filtered_geojson, f, indent=2) + with open(area_output, 'w') as f: + json.dump(hru_area_map, f, indent=2) + click.echo(f"Filtered GeoJSON saved to {output}") + click.echo(f"HRU Area mapping saved to {area_output}") if __name__ == '__main__': filter_geojson() diff --git a/uvdat/core/rest/vector_feature_table_data.py b/uvdat/core/rest/vector_feature_table_data.py index 40899f4..1dd83ef 100644 --- a/uvdat/core/rest/vector_feature_table_data.py +++ b/uvdat/core/rest/vector_feature_table_data.py @@ -239,7 +239,7 @@ def get_graphs( if 'movingAverage' in data_types and moving_avg_val is not None and moving_avg_val > 1: moving_avg = np.convolve( - y_vals, np.ones(moving_avg_val) / moving_avg_val, mode='valid' + y_vals, np.ones(moving_avg_val) / moving_avg_val, mode='same' ) moving_avg_x = x_vals[moving_avg_val - 1 :] result['movingAverage'] = list(zip(moving_avg_x, moving_avg)) From 68abeae07db10273260bc6dc9199d570db098461 Mon Sep 17 00:00:00 2001 From: Bryon Lewis Date: Thu, 27 Mar 2025 13:22:06 -0400 Subject: [PATCH 05/10] revoked API Key and replace text --- scripts/googleneuralhydrology/createAndUploadInputJSON.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/googleneuralhydrology/createAndUploadInputJSON.py b/scripts/googleneuralhydrology/createAndUploadInputJSON.py index a8f0510..f3da232 100644 --- a/scripts/googleneuralhydrology/createAndUploadInputJSON.py +++ b/scripts/googleneuralhydrology/createAndUploadInputJSON.py @@ -6,7 +6,7 @@ from girder_client import GirderClient import matplotlib.pyplot as plt -baseApiKey = 'ePEu0WAw7o7DCAzCR3xdSgAmtAOReWrUOIUCiCHC' +baseApiKey = 'GIRDER API KEY HERE' def authenticate(client: GirderClient): From 1fc6c83244ccb05fb59b69f354189de9f459b766 Mon Sep 17 00:00:00 2001 From: Bryon Lewis Date: Thu, 27 Mar 2025 13:25:06 -0400 Subject: [PATCH 06/10] linting --- tox.ini | 2 ++ .../migrations/0004_displayconfiguration.py | 36 ++++++++++++++++--- uvdat/core/rest/__init__.py | 2 +- uvdat/core/rest/display_configuration.py | 26 ++++++-------- uvdat/urls.py | 9 +++-- 5 files changed, 52 insertions(+), 23 deletions(-) diff --git a/tox.ini b/tox.ini index 6214ad0..232bd10 100644 --- a/tox.ini +++ b/tox.ini @@ -82,6 +82,8 @@ ignore = E203, # line break before binary operator W503, + # Line too long + E501, # Missing docstring in * D10, exclude = diff --git a/uvdat/core/migrations/0004_displayconfiguration.py b/uvdat/core/migrations/0004_displayconfiguration.py index 3e45aad..8e14fe8 100644 --- a/uvdat/core/migrations/0004_displayconfiguration.py +++ b/uvdat/core/migrations/0004_displayconfiguration.py @@ -13,10 +13,38 @@ class Migration(migrations.Migration): migrations.CreateModel( name='DisplayConfiguration', fields=[ - ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), - ('enabled_ui', models.JSONField(default=list, help_text="List of enabled UI elements: 'Collections', 'Datasets', 'Metadata'.")), - ('default_tab', models.CharField(choices=[('Collections', 'Collections'), ('Datasets', 'Datasets'), ('Metadata', 'Metadata')], help_text='The default tab must be one of the enabled features.', max_length=256)), - ('default_displayed_layers', models.JSONField(default=list, help_text="List of map_layers enabled: [{type: 'netcdf', id: 1}. {type: 'vector', id: 3}, {type: 'raster', id: 4}]")), + ( + 'id', + models.BigAutoField( + auto_created=True, primary_key=True, serialize=False, verbose_name='ID' + ), + ), + ( + 'enabled_ui', + models.JSONField( + default=list, + help_text="List of enabled UI elements: 'Collections', 'Datasets', 'Metadata'.", + ), + ), + ( + 'default_tab', + models.CharField( + choices=[ + ('Collections', 'Collections'), + ('Datasets', 'Datasets'), + ('Metadata', 'Metadata'), + ], + help_text='The default tab must be one of the enabled features.', + max_length=256, + ), + ), + ( + 'default_displayed_layers', + models.JSONField( + default=list, + help_text="List of map_layers enabled: [{type: 'netcdf', id: 1}. {type: 'vector', id: 3}, {type: 'raster', id: 4}]", + ), + ), ], ), ] diff --git a/uvdat/core/rest/__init__.py b/uvdat/core/rest/__init__.py index 1695000..81a524e 100644 --- a/uvdat/core/rest/__init__.py +++ b/uvdat/core/rest/__init__.py @@ -1,6 +1,7 @@ from .chart import ChartViewSet from .context import ContextViewSet from .dataset import DatasetViewSet +from .display_configuration import DisplayConfigurationViewSet from .file_item import FileItemViewSet from .filter_metadata import MetadataFilterViewSet from .layer_collection import LayerCollectionViewSet @@ -14,7 +15,6 @@ from .tasks import TasksAPIView from .user import UserViewSet from .vector_feature_table_data import VectorFeatureTableDataViewSet -from .display_configuration import DisplayConfigurationViewSet __all__ = [ ContextViewSet, diff --git a/uvdat/core/rest/display_configuration.py b/uvdat/core/rest/display_configuration.py index f905bf1..d5d5a28 100644 --- a/uvdat/core/rest/display_configuration.py +++ b/uvdat/core/rest/display_configuration.py @@ -1,4 +1,4 @@ -from typing import Any, Dict, List +from typing import Any, Dict from django.core.exceptions import ValidationError from rest_framework import status, viewsets @@ -26,46 +26,41 @@ class Meta: fields = ['enabled_ui', 'default_tab', 'default_displayed_layers'] def validate(self, data: Dict[str, Any]) -> Dict[str, Any]: - '''Ensure `default_tab` is within `enabled_ui` and validate `default_displayed_layers`.''' enabled_ui = data.get('enabled_ui', []) default_tab = data.get('default_tab') default_displayed_layers = data.get('default_displayed_layers', []) if default_tab not in enabled_ui: - raise ValidationError({'default_tab': 'The default tab must be one of the enabled features.'}) + raise ValidationError( + {'default_tab': 'The default tab must be one of the enabled features.'} + ) - if not all(isinstance(layer, dict) and 'type' in layer for layer in default_displayed_layers): - raise ValidationError({'default_displayed_layers': 'Each entry must be a dictionary with a "type" field.'}) + if not all( + isinstance(layer, dict) and 'type' in layer for layer in default_displayed_layers + ): + raise ValidationError( + {'default_displayed_layers': 'Each entry must be a dictionary with a "type" field.'} + ) return data class DisplayConfigurationViewSet(viewsets.GenericViewSet): - ''' - ViewSet for managing the single Display Configuration instance. - - - `GET /display_configuration/`: Retrieve the current configuration. - - `PATCH /display_configuration/`: Partially update the configuration. - - `PUT /display_configuration/`: Fully update the configuration. - ''' queryset = DisplayConfiguration.objects.all() serializer_class = DisplayConfigurationSerializer def get_object(self) -> DisplayConfiguration: - '''Retrieve or create the single Configuration instance.''' return DisplayConfiguration.objects.first() or DisplayConfiguration.objects.create() @action(detail=False, methods=['get'], url_path='display-configuration') def retrieve(self, request: Request, *args: Any, **kwargs: Any) -> Response: - '''Retrieve the single Display Configuration.''' instance = self.get_object() serializer = self.get_serializer(instance) return Response(serializer.data) @action(detail=False, methods=['put'], url_path='display-configuration') def update(self, request: Request, *args: Any, **kwargs: Any) -> Response: - '''Fully replace the configuration (PUT request).''' instance = self.get_object() serializer = self.get_serializer(instance, data=request.data) serializer.is_valid(raise_exception=True) @@ -74,7 +69,6 @@ def update(self, request: Request, *args: Any, **kwargs: Any) -> Response: @action(detail=False, methods=['patch'], url_path='display-configuration') def partial_update(self, request: Request, *args: Any, **kwargs: Any) -> Response: - '''Partially update the configuration (PATCH request).''' instance = self.get_object() serializer = self.get_serializer(instance, data=request.data, partial=True) serializer.is_valid(raise_exception=True) diff --git a/uvdat/urls.py b/uvdat/urls.py index 0c8c62d..ec37311 100644 --- a/uvdat/urls.py +++ b/uvdat/urls.py @@ -10,6 +10,7 @@ ContextViewSet, DatasetViewSet, DerivedRegionViewSet, + DisplayConfigurationViewSet, FileItemViewSet, LayerCollectionViewSet, LayerRepresentationViewSet, @@ -27,7 +28,6 @@ UserViewSet, VectorFeatureTableDataViewSet, VectorMapLayerViewSet, - DisplayConfigurationViewSet, ) router = routers.SimpleRouter() @@ -68,7 +68,12 @@ path('accounts/', include('allauth.urls')), path('oauth/', include('oauth2_provider.urls')), path('admin/', admin.site.urls), - path('api/v1/display-configuration/', DisplayConfigurationViewSet.as_view({'get': 'retrieve', 'put': 'update', 'patch': 'partial_update'})), + path( + 'api/v1/display-configuration/', + DisplayConfigurationViewSet.as_view( + {'get': 'retrieve', 'put': 'update', 'patch': 'partial_update'} + ), + ), path('api/v1/s3-upload/', include('s3_file_field.urls')), path('api/v1/', include(router.urls)), path('api/docs/redoc/', schema_view.with_ui('redoc'), name='docs-redoc'), From 3e5d779fe2f550d740833299ae92a240463c88eb Mon Sep 17 00:00:00 2001 From: Bryon Lewis Date: Mon, 31 Mar 2025 15:29:32 -0400 Subject: [PATCH 07/10] adding admin interface --- client/src/MapStore.ts | 7 + client/src/api/UVDATApi.ts | 14 +- .../vectorFeatureGraphUtils.ts | 2 +- client/src/router/index.ts | 7 + client/src/types.ts | 15 +- client/src/views/Admin/DisplayAdmin.vue | 129 ++++++++++++++++++ client/src/views/HomePage.vue | 7 +- client/src/views/SourceSelection.vue | 17 ++- uvdat/core/admin.py | 10 ++ .../migrations/0004_displayconfiguration.py | 38 +----- uvdat/core/models/__init__.py | 3 +- uvdat/core/models/display_configuration.py | 7 +- uvdat/core/rest/display_configuration.py | 2 +- uvdat/core/rest/map_layers.py | 40 ++++++ uvdat/core/rest/serializers.py | 28 +++- 15 files changed, 269 insertions(+), 57 deletions(-) create mode 100644 client/src/views/Admin/DisplayAdmin.vue diff --git a/client/src/MapStore.ts b/client/src/MapStore.ts index 02fa62e..bb4818a 100644 --- a/client/src/MapStore.ts +++ b/client/src/MapStore.ts @@ -7,6 +7,7 @@ import { ColorFilters, Context, Dataset, + DisplayConfiguration, LayerCollection, NetCDFData, NetCDFLayer, @@ -33,6 +34,8 @@ export default class MapStore { public static proModeButtonEnabled = ref(true); + public static displayConfiguration: Ref = ref({ default_displayed_layers: [], enabled_ui: ['Collections', 'Datasets', 'Metadata'], default_tab: 'Scenarios' }); + // Ability to toggle proMode so Staff users can see what other users see. public static proMode = computed(() => MapStore.userIsStaff.value && MapStore.proModeButtonEnabled.value); @@ -103,6 +106,10 @@ export default class MapStore { MapStore.mapLayersByDataset[datasetId] = await UVdatApi.getDatasetLayers(datasetId); } + public static async getDisplayConfiguration() { + MapStore.displayConfiguration.value = await UVdatApi.getDisplayConfiguration(); + } + public static mapLayerFeatureGraphs = computed(() => { const foundMapLayerFeatureGraphs: { name: string, id: number; graphs: VectorFeatureTableGraph[] }[] = []; MapStore.selectedVectorMapLayers.value.forEach((item) => { diff --git a/client/src/api/UVDATApi.ts b/client/src/api/UVDATApi.ts index fdce29d..1900649 100644 --- a/client/src/api/UVDATApi.ts +++ b/client/src/api/UVDATApi.ts @@ -3,6 +3,7 @@ import { ref } from 'vue'; import OauthClient from '@girder/oauth-client/dist/oauth-client'; import { AbstractMapLayer, + AbstractMapLayerListItem, Chart, Context, ContextWithIds, @@ -214,7 +215,8 @@ export default class UVdatApi { return (await UVdatApi.apiClient.delete(`/files/${fileItemId}/`)).data; } - public static async getGlobalDatasets(filter: { unconnected: boolean }): Promise<(Dataset & { contextCount: number })[]> { + + public static async getGlobalDatasets(filter?: { unconnected: boolean }): Promise<(Dataset & { contextCount: number })[]> { return (await UVdatApi.apiClient.get('datasets', { params: { ...filter } })).data.results; } @@ -588,12 +590,16 @@ export default class UVdatApi { return (await UVdatApi.apiClient.get('/map-layers/', { params })).data; } + public static async getMapLayerAll(): Promise { + return (await UVdatApi.apiClient.get('/map-layers/all')).data; + } + public static async searchVectorFeatures(requestData: SearchableVectorDataRequest): Promise { return (await UVdatApi.apiClient.post('/map-layers/search-features/', requestData)).data; } public static async getDisplayConfiguration(): Promise { - const response = await UVdatApi.apiClient.get('display_configuration/'); + const response = await UVdatApi.apiClient.get('display-configuration/'); return response.data; } @@ -601,7 +607,7 @@ export default class UVdatApi { public static async updateDisplayConfiguration( config: DisplayConfiguration, ): Promise { - const response = await UVdatApi.apiClient.put('display_configuration/', config); + const response = await UVdatApi.apiClient.put('display-configuration/', config); return response.data; } @@ -609,7 +615,7 @@ export default class UVdatApi { public static async partialUpdateDisplayConfiguration( config: Partial, ): Promise { - const response = await UVdatApi.apiClient.patch('display_configuration/', config); + const response = await UVdatApi.apiClient.patch('display-configuration/', config); return response.data; } } diff --git a/client/src/components/FeatureSelection/vectorFeatureGraphUtils.ts b/client/src/components/FeatureSelection/vectorFeatureGraphUtils.ts index 8a391ed..4194b20 100644 --- a/client/src/components/FeatureSelection/vectorFeatureGraphUtils.ts +++ b/client/src/components/FeatureSelection/vectorFeatureGraphUtils.ts @@ -230,7 +230,7 @@ const renderVectorFeatureGraph = ( g.append('path') .datum(graph.movingAverage) .attr('fill', 'none') - .attr('stroke', '#00FFFF') + .attr('stroke', '#FFFF00') .attr('stroke-width', 5) .attr('d', line) .attr('class', `moving-average moving-average-${key}`); diff --git a/client/src/router/index.ts b/client/src/router/index.ts index 0ed8bc9..e446a28 100644 --- a/client/src/router/index.ts +++ b/client/src/router/index.ts @@ -1,5 +1,6 @@ import { RouterOptions, createWebHistory } from 'vue-router'; import HomePage from '../views/HomePage.vue'; +import DisplayAdmin from '../views/Admin/DisplayAdmin.vue'; function makeOptions(): RouterOptions { return { @@ -10,6 +11,12 @@ function makeOptions(): RouterOptions { // component: HomePage, component: HomePage, }, + { + path: '/admin', + // component: HomePage, + component: DisplayAdmin, + }, + ], }; } diff --git a/client/src/types.ts b/client/src/types.ts index 99bf571..6f9941f 100644 --- a/client/src/types.ts +++ b/client/src/types.ts @@ -903,7 +903,16 @@ export interface SearchableVectorFeatureResponse { } export interface DisplayConfiguration { - enabled_ui: string[]; - default_tab: string; - default_displayed_layers: Array<{ type: string; [key: string]: string }>; + enabled_ui: ('Scenarios' | 'Collections' | 'Datasets' | 'Metadata')[]; + default_tab: 'Scenarios' | 'Collections' | 'Datasets' | 'Metadata'; + default_displayed_layers: Array<{ type: AbstractMapLayer['type']; id: number; name: string }>; +} + +export interface AbstractMapLayerListItem { + id: number; + name: string; + type: AbstractMapLayer['type']; + datset_id: number; + file_item: { id: number, name: string }[]; + processing_tasks?: null | ProcessingTask[] } diff --git a/client/src/views/Admin/DisplayAdmin.vue b/client/src/views/Admin/DisplayAdmin.vue new file mode 100644 index 0000000..eeaf0c7 --- /dev/null +++ b/client/src/views/Admin/DisplayAdmin.vue @@ -0,0 +1,129 @@ + + + + diff --git a/client/src/views/HomePage.vue b/client/src/views/HomePage.vue index 012af9e..951cbf4 100644 --- a/client/src/views/HomePage.vue +++ b/client/src/views/HomePage.vue @@ -1,6 +1,6 @@ + + + + diff --git a/client/src/components/Map.vue b/client/src/components/Map.vue index ce00021..128f887 100644 --- a/client/src/components/Map.vue +++ b/client/src/components/Map.vue @@ -66,6 +66,8 @@ export default defineComponent({ const initializeMap = () => { if (mapContainer.value) { + const center = MapStore.displayConfiguration.value.default_map_settings?.location.center || [-86.1794, 34.8019]; + const zoom = MapStore.displayConfiguration.value.default_map_settings?.location.zoom || 6; map.value = new maplibregl.Map({ container: mapContainer.value, style: { @@ -156,8 +158,8 @@ export default defineComponent({ sprite: 'https://maputnik.github.io/osm-liberty/sprites/osm-liberty', glyphs: 'https://orangemug.github.io/font-glyphs/glyphs/{fontstack}/{range}.pbf', }, - center: [-86.1794, 34.8019], // Coordinates for the relative center of the TVA - zoom: 6, // Initial zoom level + center, + zoom, }); if (map.value) { setInternalMap(map as Ref); diff --git a/client/src/types.ts b/client/src/types.ts index 955c0c0..11fdf7d 100644 --- a/client/src/types.ts +++ b/client/src/types.ts @@ -906,6 +906,7 @@ export interface DisplayConfiguration { enabled_ui: ('Scenarios' | 'Collections' | 'Datasets' | 'Metadata')[]; default_tab: 'Scenarios' | 'Collections' | 'Datasets' | 'Metadata'; default_displayed_layers: Array<{ type: AbstractMapLayer['type']; id: number; dataset_id: number; name: string }>; + default_map_settings?: { location: { center: [number, number], zoom: number } }; } export interface AbstractMapLayerListItem { diff --git a/client/src/views/Admin/DisplayAdmin.vue b/client/src/views/Admin/DisplayAdmin.vue index 28dfbf0..b1efb2e 100644 --- a/client/src/views/Admin/DisplayAdmin.vue +++ b/client/src/views/Admin/DisplayAdmin.vue @@ -6,9 +6,13 @@ import { AbstractMapLayerListItem, DisplayConfiguration, } from '../../types'; import UVdatApi from '../../api/UVDATApi'; +import MapSelection from '../../components/Admin/MapSelection.vue'; export default defineComponent({ name: 'DisplayConfigurationEditor', + components: { + MapSelection, + }, setup() { const config: Ref = ref(null); const enabledUiOptions = ['Scenarios', 'Collections', 'Datasets', 'Metadata']; @@ -31,7 +35,11 @@ export default defineComponent({ const availableLayers = computed(() => { if (layers.value) { return layers.value.map((item) => ({ - id: item.id, name: item.name, dataset_id: item.dataset_id, type: item.type, index: `${item.type}_${item.id}_${item.dataset_id}`, + id: item.id, + name: item.name, + dataset_id: item.dataset_id, + type: item.type, + index: `${item.type}_${item.id}_${item.dataset_id}`, })); } return []; @@ -57,6 +65,12 @@ export default defineComponent({ } }; + const updateDefaultMapSettings = (settings: { location: { center: [number, number]; zoom: number; }; }) => { + if (config.value) { + config.value.default_map_settings = settings; + } + }; + return { config, enabledUiOptions, @@ -65,6 +79,7 @@ export default defineComponent({ snackbar, availableLayers, selectedLayers, + updateDefaultMapSettings, }; }, }); @@ -114,6 +129,10 @@ export default defineComponent({ + diff --git a/client/src/views/HomePage.vue b/client/src/views/HomePage.vue index 7ae0520..a304e5f 100644 --- a/client/src/views/HomePage.vue +++ b/client/src/views/HomePage.vue @@ -32,6 +32,7 @@ export default defineComponent({ }, setup() { const oauthClient = inject('oauthClient'); + const loading = ref(false); const drawerOpen = ref(true); if (oauthClient === undefined) { throw new Error('Must provide "oauthClient" into component.'); @@ -49,7 +50,9 @@ export default defineComponent({ }; onMounted(async () => { + loading.value = true; const layers = await MapStore.getDisplayConfiguration(true); + loading.value = false; layers.forEach((layer) => toggleLayerSelection(layer)); }); @@ -149,6 +152,7 @@ export default defineComponent({ activeSideBar: MapStore.activeSideBarCard, rightSideBarPadding, SideBarHasData, + loading, }; }, }); @@ -313,7 +317,7 @@ export default defineComponent({ > - + diff --git a/uvdat/core/admin.py b/uvdat/core/admin.py index 7bcc1da..9bea800 100644 --- a/uvdat/core/admin.py +++ b/uvdat/core/admin.py @@ -218,6 +218,7 @@ class DisplayConfigurationAdmin(admin.ModelAdmin): 'enabled_ui', 'default_tab', 'default_displayed_layers', + 'default_map_settings', ] diff --git a/uvdat/core/migrations/0004_displayconfiguration.py b/uvdat/core/migrations/0004_displayconfiguration.py index 1b883a7..15e8b5f 100644 --- a/uvdat/core/migrations/0004_displayconfiguration.py +++ b/uvdat/core/migrations/0004_displayconfiguration.py @@ -1,4 +1,4 @@ -# Generated by Django 5.0.7 on 2025-03-31 17:03 +# Generated by Django 5.0.7 on 2025-04-09 13:07 from django.db import migrations, models @@ -46,6 +46,15 @@ class Migration(migrations.Migration): help_text="List of map_layers enabled: [{type: 'netcdf', id: 1}. {type: 'vector', id: 3}, {type: 'raster', id: 4}]", ), ), + ( + 'default_map_settings', + models.JSONField( + blank=True, + default=None, + help_text='{location: { center: [x, y], zoom: 5 }}', + null=True, + ), + ), ], ), ] diff --git a/uvdat/core/models/display_configuration.py b/uvdat/core/models/display_configuration.py index 7e99492..928e314 100644 --- a/uvdat/core/models/display_configuration.py +++ b/uvdat/core/models/display_configuration.py @@ -25,6 +25,13 @@ class DisplayConfiguration(models.Model): help_text="List of map_layers enabled: [{type: 'netcdf', id: 1}. {type: 'vector', id: 3}, {type: 'raster', id: 4}]", ) + default_map_settings = models.JSONField( + blank=True, + null=True, + default=None, + help_text='{location: { center: [x, y], zoom: 5 }}', + ) + def clean(self): """Ensure default_tab is within enabled_features.""" super().clean() diff --git a/uvdat/core/rest/display_configuration.py b/uvdat/core/rest/display_configuration.py index 0df6254..80561c0 100644 --- a/uvdat/core/rest/display_configuration.py +++ b/uvdat/core/rest/display_configuration.py @@ -25,10 +25,13 @@ class DisplayConfigurationSerializer(serializers.ModelSerializer): help_text='Default tab, must be one of the enabled features.' ) default_displayed_layers = serializers.ListField(child=LayerSerializer()) + default_map_settings = serializers.JSONField( + help_text='Map settings, e.g., {"location": {"center": [x, y], "zoom": 5}}.' + ) class Meta: model = DisplayConfiguration - fields = ['enabled_ui', 'default_tab', 'default_displayed_layers'] + fields = ['enabled_ui', 'default_tab', 'default_displayed_layers', 'default_map_settings'] def validate(self, data: Dict[str, Any]) -> Dict[str, Any]: enabled_ui = data.get('enabled_ui', [])