diff --git a/README.md b/README.md index 5a01ca1..a6c9f1a 100644 --- a/README.md +++ b/README.md @@ -100,9 +100,10 @@ Your workflow may determine how you use NEMOSIS. Because the GUI relies on data ```python from nemosis import dynamic_data_compiler +from datetime import datetime -start_time = '2017/01/01 00:00:00' -end_time = '2017/01/01 00:05:00' +start_time = datetime(2017, 1, 1, 0, 0) +end_time = datetime(2017, 1, 1, 0, 5) table = 'DISPATCHPRICE' raw_data_cache = 'C:/Users/your_data_storage' @@ -113,6 +114,8 @@ Using the default settings of `dynamic_data_compiler` will download CSV data fro A number of options are available to configure filtering (i.e. what data NEMOSIS returns as a pandas DataFrame) and caching. +For `start_time` and `end_time` you can pass a datetime (timezone unaware), a `date`, or a string of the form "YYYY/MM/DD HH:MM:SS", e.g. `2017/01/01 00:00:00`. + ###### Filter options `dynamic_data_compiler` can be used to filter data before returning results. @@ -206,8 +209,8 @@ from nemosis import defaults defaults.table_columns['BIDPEROFFER_D'] += ['PASAAVAILABILITY'] -start_time = '2017/01/01 00:00:00' -end_time = '2017/01/01 00:05:00' +start_time = datetime(2017, 1, 1, 0, 0) +end_time = datetime(2017, 1, 1, 0, 5) table = 'BIDPEROFFER_D' raw_data_cache = 'C:/Users/your_data_storage' diff --git a/src/nemosis/__init__.py b/src/nemosis/__init__.py index 8631110..547f763 100644 --- a/src/nemosis/__init__.py +++ b/src/nemosis/__init__.py @@ -1,6 +1,6 @@ import logging import sys -from .value_parser import _parse_datetime, _parse_column, _infer_column_data_types +from .value_parser import _parse_column, _infer_column_data_types from .data_fetch_methods import * name = "osdan" diff --git a/src/nemosis/custom_tables.py b/src/nemosis/custom_tables.py index 429a1c9..1826fec 100644 --- a/src/nemosis/custom_tables.py +++ b/src/nemosis/custom_tables.py @@ -2,8 +2,10 @@ from datetime import timedelta, datetime import math import numpy as np -from nemosis import defaults, data_fetch_methods, filters - +from nemosis import defaults +from nemosis.date_generators import parse_datetime_py +from nemosis.filters import filter_on_column_value +from nemosis.data_fetch_methods import dynamic_data_compiler, static_table def fcas4s_scada_match( start_time, @@ -17,12 +19,12 @@ def fcas4s_scada_match( # Pull in the 4 second fcas data. table_name_fcas4s = "FCAS_4_SECOND" - fcas4s = data_fetch_methods.dynamic_data_compiler( + fcas4s = dynamic_data_compiler( start_time, end_time, table_name_fcas4s, raw_data_location ) # Pull in the 4 second fcas variable types. table_name_variable_types = "VARIABLES_FCAS_4_SECOND" - fcas4s_variable_types = data_fetch_methods.static_table( + fcas4s_variable_types = static_table( table_name_variable_types, raw_data_location ) @@ -52,7 +54,7 @@ def fcas4s_scada_match( # Pull in the dispatch unit scada data. table_name_scada = "DISPATCH_UNIT_SCADA" - scada = data_fetch_methods.dynamic_data_compiler( + scada = dynamic_data_compiler( start_time, end_time, table_name_scada, raw_data_location ) scada["SETTLEMENTDATE"] = scada["SETTLEMENTDATE"] - timedelta(minutes=5) @@ -62,7 +64,7 @@ def fcas4s_scada_match( # Pull in the interconnector scada data and use the intervention records where the exist. table_name_inter_flow = "DISPATCHINTERCONNECTORRES" - inter_flows = data_fetch_methods.dynamic_data_compiler( + inter_flows = dynamic_data_compiler( start_time, end_time, table_name_inter_flow, raw_data_location ) inter_flows["METEREDMWFLOW"] = pd.to_numeric(inter_flows["METEREDMWFLOW"]) @@ -144,7 +146,7 @@ def fcas4s_scada_match( best_matches_scada = best_matches_scada.loc[:, select_columns] if filter_cols is not None: - best_matches_scada = filters.filter_on_column_value( + best_matches_scada = filter_on_column_value( best_matches_scada, filter_cols, filter_values ) @@ -408,15 +410,15 @@ def plant_stats( ): ix = pd.date_range( - start=datetime.strptime(start_time, "%Y/%m/%d %H:%M:%S"), - end=datetime.strptime(end_time, "%Y/%m/%d %H:%M:%S") - timedelta(minutes=5), + start=parse_datetime_py(start_time, midnight='start'), + end=parse_datetime_py(end_time, midnight='end') - timedelta(minutes=5), freq="5min", ) timeseries_df = pd.DataFrame(index=ix) timeseries_df.reset_index(inplace=True) timeseries_df.columns = ["SETTLEMENTDATE"] - gen_max_cap = data_fetch_methods.dynamic_data_compiler( + gen_max_cap = dynamic_data_compiler( start_time, end_time, "DUDETAIL", @@ -428,7 +430,7 @@ def plant_stats( gen_max_cap = select_highest_version_number( gen_max_cap, defaults.table_primary_keys["DUDETAIL"] ) - gen_region = data_fetch_methods.dynamic_data_compiler( + gen_region = dynamic_data_compiler( start_time, end_time, "DUDETAILSUMMARY", @@ -437,14 +439,14 @@ def plant_stats( filter_cols=filter_cols, filter_values=filter_values, ) - scada = data_fetch_methods.dynamic_data_compiler( + scada = dynamic_data_compiler( start_time, end_time, "DISPATCH_UNIT_SCADA", raw_data_location, select_columns=["SETTLEMENTDATE", "DUID", "SCADAVALUE"], ) - dispatch_price = data_fetch_methods.dynamic_data_compiler( + dispatch_price = dynamic_data_compiler( start_time, end_time, "DISPATCHPRICE", @@ -454,7 +456,7 @@ def plant_stats( dispatch_price = select_intervention_if_present( dispatch_price, defaults.table_primary_keys["DISPATCHPRICE"] ) - trading_price = data_fetch_methods.dynamic_data_compiler( + trading_price = dynamic_data_compiler( start_time, end_time, "TRADINGPRICE", @@ -465,7 +467,7 @@ def plant_stats( trading_price["RRP"] = pd.to_numeric(trading_price["RRP"]) # trading_price = calc_trading_price(dispatch_price) - region_summary = data_fetch_methods.dynamic_data_compiler( + region_summary = dynamic_data_compiler( start_time, end_time, "DISPATCHREGIONSUM", @@ -517,14 +519,14 @@ def plant_stats( def trading_and_dispatch_cost(): - gen_region = data_fetch_methods.dynamic_data_compiler( + gen_region = dynamic_data_compiler( "2017/01/01 00:05:00", "2018/01/01 00:05:00", "DUDETAILSUMMARY", defaults.raw_data_cache, select_columns=["START_DATE", "END_DATE", "DUID", "REGIONID"], ) - scada = data_fetch_methods.dynamic_data_compiler( + scada = dynamic_data_compiler( "2017/01/01 00:05:00", "2018/01/01 00:05:00", "DISPATCH_UNIT_SCADA", @@ -532,8 +534,8 @@ def trading_and_dispatch_cost(): ) ix = pd.date_range( - start=datetime.strptime("2017/01/01 00:00:00", "%Y/%m/%d %H:%M:%S"), - end=datetime.strptime("2018/01/01 00:00:00", "%Y/%m/%d %H:%M:%S"), + start=datetime(2017, 1, 1), + end=datetime(2018, 1, 1), freq="5min", ) timeseries_df = pd.DataFrame(index=ix) @@ -551,7 +553,7 @@ def trading_and_dispatch_cost(): scada = pd.concat(scada_list) - dispatch_price = data_fetch_methods.dynamic_data_compiler( + dispatch_price = dynamic_data_compiler( "2017/01/01 00:00:00", "2018/01/01 00:05:00", "DISPATCHPRICE", diff --git a/src/nemosis/data_fetch_methods.py b/src/nemosis/data_fetch_methods.py index c82d829..d61501a 100644 --- a/src/nemosis/data_fetch_methods.py +++ b/src/nemosis/data_fetch_methods.py @@ -3,13 +3,13 @@ import glob as _glob import pandas as _pd from datetime import datetime as _datetime, timedelta as _timedelta -from nemosis import filters as _filters from nemosis import downloader as _downloader +from nemosis.filters import filter_on_column_value as _filter_on_column_value from nemosis import processing_info_maps as _processing_info_maps from nemosis import date_generators as _date_generators from nemosis import defaults as _defaults -from nemosis import custom_tables as _custom_tables -from nemosis import _infer_column_data_types +from nemosis.value_parser import _infer_column_data_types +from nemosis.date_generators import parse_datetime_py as _parse_datetime_py from nemosis.custom_errors import UserInputError, NoDataToReturn, DataMismatchError logger = logging.getLogger(__name__) @@ -35,8 +35,12 @@ def dynamic_data_compiler( will save data typed as strings/objects. To save typed data (e.g. appropriate cols are Float or Int), use cache_compiler. Args: - start_time (str): format 'yyyy/mm/dd HH:MM:SS'. - end_time (str): format 'yyyy/mm/dd HH:MM:SS'. + start_time (datetime): A native datetime. (Timezone unaware) + For legacy reasons, may be a string + of format 'yyyy/mm/dd HH:MM:SS'. + end_time (datetime): A native datetime. (Timezone unaware) + For legacy reasons, may be a string + of format 'yyyy/mm/dd HH:MM:SS'. table_name (str): table as per Wiki. raw_data_location (str): directory to download and cache data to. existing data will be used if in this dir. @@ -107,9 +111,10 @@ def dynamic_data_compiler( logger.info(f"Compiling data for table {table_name}") - start_time = _datetime.strptime(start_time, "%Y/%m/%d %H:%M:%S") - end_time = _datetime.strptime(end_time, "%Y/%m/%d %H:%M:%S") - start_search = _datetime.strptime(start_search, "%Y/%m/%d %H:%M:%S") + # cast from string to datetime, if not already datetime + start_time = _parse_datetime_py(start_time, midnight='start') + end_time = _parse_datetime_py(end_time, midnight='end') + start_search = _parse_datetime_py(start_search, midnight='start') data_tables = _dynamic_data_fetch_loop( start_search, start_time, @@ -140,7 +145,7 @@ def dynamic_data_compiler( ] UserInputError(f"Filter columns {missing_columns} not in data.") else: - all_data = _filters.filter_on_column_value( + all_data = _filter_on_column_value( all_data, filter_cols, filter_values ) logger.info(f"Returning {table_name}.") @@ -178,8 +183,12 @@ def cache_compiler( that is used to store csvs (such as the cache for the GUI). Args: - start_time (str): format 'yyyy/mm/dd HH:MM:SS'. - end_time (str): format 'yyyy/mm/dd HH:MM:SS'. + start_time (datetime): A native datetime. (Timezone unaware) + For legacy reasons, may be a string + of format 'yyyy/mm/dd HH:MM:SS'. + end_time (datetime): A native datetime. (Timezone unaware) + For legacy reasons, may be a string + of format 'yyyy/mm/dd HH:MM:SS'. table_name (str): table as per Wiki. raw_data_location (str): directory to download and cache data to. existing data will be used if in this dir. @@ -239,9 +248,9 @@ def cache_compiler( _, start_search, ) = _set_up_dynamic_compilers(table_name, start_time, end_time, select_columns) - start_time = _datetime.strptime(start_time, "%Y/%m/%d %H:%M:%S") - end_time = _datetime.strptime(end_time, "%Y/%m/%d %H:%M:%S") - start_search = _datetime.strptime(start_search, "%Y/%m/%d %H:%M:%S") + start_time = _parse_datetime_py(start_time, midnight='start') + end_time = _parse_datetime_py(end_time, midnight='end') + start_search = _parse_datetime_py(start_search, midnight='start') _dynamic_data_fetch_loop( start_search, @@ -355,7 +364,7 @@ def static_table( missing_columns = [col for col in filter_cols if col not in table.columns] UserInputError(f"Filter columns {missing_columns} not in data.") else: - table = _filters.filter_on_column_value(table, filter_cols, filter_values) + table = _filter_on_column_value(table, filter_cols, filter_values) static_table_finalisers = static_data_finaliser_map[table_name] for finaliser in static_table_finalisers: @@ -878,44 +887,3 @@ def _static_table_wrapper_for_gui( ) return table - -_method_map = { - "DISPATCHLOAD": _dynamic_data_wrapper_for_gui, - "DISPATCHPRICE": _dynamic_data_wrapper_for_gui, - "TRADINGLOAD": _dynamic_data_wrapper_for_gui, - "TRADINGPRICE": _dynamic_data_wrapper_for_gui, - "TRADINGREGIONSUM": _dynamic_data_wrapper_for_gui, - "TRADINGINTERCONNECT": _dynamic_data_wrapper_for_gui, - "DISPATCH_UNIT_SCADA": _dynamic_data_wrapper_for_gui, - "DISPATCHCONSTRAINT": _dynamic_data_wrapper_for_gui, - "DUDETAILSUMMARY": _dynamic_data_wrapper_for_gui, - "PARTICIPANT": _dynamic_data_wrapper_for_gui, - "DUDETAIL": _dynamic_data_wrapper_for_gui, - "GENCONDATA": _dynamic_data_wrapper_for_gui, - "SPDREGIONCONSTRAINT": _dynamic_data_wrapper_for_gui, - "SPDCONNECTIONPOINTCONSTRAINT": _dynamic_data_wrapper_for_gui, - "SPDINTERCONNECTORCONSTRAINT": _dynamic_data_wrapper_for_gui, - "FCAS_4_SECOND": _dynamic_data_wrapper_for_gui, - "ELEMENTS_FCAS_4_SECOND": _static_table_wrapper_for_gui, - "VARIABLES_FCAS_4_SECOND": _static_table_wrapper_for_gui, - "Generators and Scheduled Loads": _static_table_wrapper_for_gui, - "FCAS Providers": _static_table_wrapper_for_gui, - "BIDDAYOFFER_D": _dynamic_data_wrapper_for_gui, - "BIDPEROFFER_D": _dynamic_data_wrapper_for_gui, - "FCAS_4s_SCADA_MAP": _custom_tables.fcas4s_scada_match, - "PLANTSTATS": _custom_tables.plant_stats, - "DISPATCHINTERCONNECTORRES": _dynamic_data_wrapper_for_gui, - "DISPATCHREGIONSUM": _dynamic_data_wrapper_for_gui, - "LOSSMODEL": _dynamic_data_wrapper_for_gui, - "LOSSFACTORMODEL": _dynamic_data_wrapper_for_gui, - "MNSP_DAYOFFER": _dynamic_data_wrapper_for_gui, - "MNSP_PEROFFER": _dynamic_data_wrapper_for_gui, - "MNSP_INTERCONNECTOR": _dynamic_data_wrapper_for_gui, - "INTERCONNECTOR": _dynamic_data_wrapper_for_gui, - "INTERCONNECTORCONSTRAINT": _dynamic_data_wrapper_for_gui, - "MARKET_PRICE_THRESHOLDS": _dynamic_data_wrapper_for_gui, - "DAILY_REGION_SUMMARY": _dynamic_data_wrapper_for_gui, - "NEXT_DAY_DISPATCHLOAD": _dynamic_data_wrapper_for_gui, - "INTERMITTENT_GEN_SCADA": _dynamic_data_wrapper_for_gui, - "ROOFTOP_PV_ACTUAL": _dynamic_data_wrapper_for_gui -} diff --git a/src/nemosis/date_generators.py b/src/nemosis/date_generators.py index 69c5897..b57b98c 100644 --- a/src/nemosis/date_generators.py +++ b/src/nemosis/date_generators.py @@ -1,7 +1,7 @@ import logging from nemosis import defaults from calendar import monthrange -from datetime import timedelta +from datetime import timedelta, datetime, date logger = logging.getLogger(__name__) @@ -134,3 +134,34 @@ def current_gen(start_time, end_time): ): continue yield str(year), month, str(day).zfill(2), None + +def parse_datetime_py(t, midnight='start'): + """ + Takes in a string of a datetime, native datetime or date + Returns a datetime. + If midnight='end', dates will be converted to the midnight at the end of the day. + If midnight='start', dates will be converted to the midnight at the start of the day. + If not a date, the midnight argument is ignored. + This is not intended to be used for conversions within Pandas/CSV/Parquet etc. + """ + if isinstance(t, str): + return datetime.strptime(t, defaults.nemosis_date_format) + elif isinstance(t, datetime): + if t.utcoffset() is not None: + raise ValueError(f"Conversion between timezones not implemented. (Even if it's market time.) " + f"For {t}, pass a timezone unaware version, which will be treated as market time.") + return t + elif isinstance(t, date): + if midnight not in ['start', 'end']: + raise ValueError(f"midnight must be 'start' or 'end', got {midnight}") + + elif midnight == 'end': + # end of this day + # is the start of the next day + t = t + timedelta(days=1) + midnight = 'start' + + return datetime.combine(t, datetime.min.time()) + + else: + raise ValueError(f"Unsure how to handle datetime {t} of unexpected type {type(t)}") \ No newline at end of file diff --git a/src/nemosis/defaults.py b/src/nemosis/defaults.py index b348029..1459d4c 100644 --- a/src/nemosis/defaults.py +++ b/src/nemosis/defaults.py @@ -892,6 +892,10 @@ '%Y-%m-%d %H:%M:%S' ] +# If a user passes a datetime to us, as a string, +# it should be in this format. +nemosis_date_format = "%Y/%m/%d %H:%M:%S" + # data is available from 2009 # however for 2009-2014, the structure in MMSDM is different. # It's one zip per month, with all tables combined. diff --git a/src/nemosis/filters.py b/src/nemosis/filters.py index f4a0247..4926024 100644 --- a/src/nemosis/filters.py +++ b/src/nemosis/filters.py @@ -2,30 +2,30 @@ from datetime import datetime, timedelta import numpy as np -from nemosis.value_parser import _parse_datetime +from nemosis.value_parser import _parse_datetime_np logger = logging.getLogger(__name__) def filter_on_start_and_end_date(data, start_time, end_time): - data["START_DATE"] = _parse_datetime(data["START_DATE"]) + data["START_DATE"] = _parse_datetime_np(data["START_DATE"]) data["END_DATE"] = np.where( data["END_DATE"] == "2999/12/31 00:00:00", "2100/12/31 00:00:00", data["END_DATE"], ) - data["END_DATE"] = _parse_datetime(data["END_DATE"]) + data["END_DATE"] = _parse_datetime_np(data["END_DATE"]) data = data[(data["START_DATE"] < end_time) & (data["END_DATE"] > start_time)] return data def filter_on_effective_date(data, start_time, end_time): - data["EFFECTIVEDATE"] = _parse_datetime(data["EFFECTIVEDATE"]) + data["EFFECTIVEDATE"] = _parse_datetime_np(data["EFFECTIVEDATE"]) data = data[data["EFFECTIVEDATE"] < end_time] return data def filter_on_settlementdate(data, start_time, end_time): - data["SETTLEMENTDATE"] = _parse_datetime(data["SETTLEMENTDATE"]) + data["SETTLEMENTDATE"] = _parse_datetime_np(data["SETTLEMENTDATE"]) data = data[ (data["SETTLEMENTDATE"] > start_time) & (data["SETTLEMENTDATE"] <= end_time) ] @@ -33,7 +33,7 @@ def filter_on_settlementdate(data, start_time, end_time): def filter_on_run_datetime(data, start_time, end_time): - data["RUN_DATETIME"] = _parse_datetime(data["RUN_DATETIME"]) + data["RUN_DATETIME"] = _parse_datetime_np(data["RUN_DATETIME"]) data = data[ (data["RUN_DATETIME"] > start_time) & (data["RUN_DATETIME"] <= end_time) ] @@ -42,14 +42,14 @@ def filter_on_run_datetime(data, start_time, end_time): def filter_on_timestamp(data, start_time, end_time): try: - data["TIMESTAMP"] = _parse_datetime(data["TIMESTAMP"]) + data["TIMESTAMP"] = _parse_datetime_np(data["TIMESTAMP"]) except ValueError as e: logger.error(e) # if date format is wrong, str may be too short med_str_len = np.median(data["TIMESTAMP"].str.len()) not_data = data.loc[data["TIMESTAMP"].str.len() < med_str_len, :] data = data.loc[data["TIMESTAMP"].str.len() >= med_str_len, :] - data["TIMESTAMP"] = _parse_datetime(data["TIMESTAMP"]) + data["TIMESTAMP"] = _parse_datetime_np(data["TIMESTAMP"]) logger.warning("Rows with incorrect data formats omitted") logger.warning(not_data.head()) finally: @@ -58,7 +58,7 @@ def filter_on_timestamp(data, start_time, end_time): def filter_on_interval_datetime(data, start_time, end_time): - data["INTERVAL_DATETIME"] = _parse_datetime(data["INTERVAL_DATETIME"]) + data["INTERVAL_DATETIME"] = _parse_datetime_np(data["INTERVAL_DATETIME"]) data = data[ (data["INTERVAL_DATETIME"] > start_time) & (data["INTERVAL_DATETIME"] <= end_time) @@ -78,7 +78,7 @@ def filter_on_date_and_peroid(data, start_time, end_time): # Not tested, just for nemlite integration. def filter_on_date_and_interval(data, start_time, end_time): - data["SETTLEMENTDATE"] = _parse_datetime(data["SETTLEMENTDATE"]) + data["SETTLEMENTDATE"] = _parse_datetime_np(data["SETTLEMENTDATE"]) data = data[ (data["SETTLEMENTDATE"] > start_time) & (data["SETTLEMENTDATE"] <= end_time) ] @@ -87,7 +87,7 @@ def filter_on_date_and_interval(data, start_time, end_time): # Not tested, just for nemlite integration. def filter_on_last_changed(data, start_time, end_time): - data["LASTCHANGED"] = _parse_datetime(data["LASTCHANGED"]) + data["LASTCHANGED"] = _parse_datetime_np(data["LASTCHANGED"]) data = data[data["LASTCHANGED"] < end_time] return data diff --git a/src/nemosis/gui.py b/src/nemosis/gui.py index 2adfb42..5b88e96 100644 --- a/src/nemosis/gui.py +++ b/src/nemosis/gui.py @@ -1,4 +1,5 @@ -from nemosis import rows, defaults, data_fetch_methods +from nemosis import rows, defaults +from nemosis.method_map import _method_map import pandas as pd import tkinter as tk import tkinter.ttk as ttk @@ -362,7 +363,7 @@ def run_query(self, row, raw_data_location): start_time = row.start_time.get() end_time = row.end_time.get() # Call the query using the tables predefined wraper function. - result = data_fetch_methods._method_map[table]( + result = _method_map[table]( start_time, end_time, table, diff --git a/src/nemosis/method_map.py b/src/nemosis/method_map.py new file mode 100644 index 0000000..cafd613 --- /dev/null +++ b/src/nemosis/method_map.py @@ -0,0 +1,43 @@ +from nemosis.custom_tables import fcas4s_scada_match, plant_stats +from nemosis.data_fetch_methods import _dynamic_data_wrapper_for_gui, _static_table_wrapper_for_gui + +_method_map = { + "DISPATCHLOAD": _dynamic_data_wrapper_for_gui, + "DISPATCHPRICE": _dynamic_data_wrapper_for_gui, + "TRADINGLOAD": _dynamic_data_wrapper_for_gui, + "TRADINGPRICE": _dynamic_data_wrapper_for_gui, + "TRADINGREGIONSUM": _dynamic_data_wrapper_for_gui, + "TRADINGINTERCONNECT": _dynamic_data_wrapper_for_gui, + "DISPATCH_UNIT_SCADA": _dynamic_data_wrapper_for_gui, + "DISPATCHCONSTRAINT": _dynamic_data_wrapper_for_gui, + "DUDETAILSUMMARY": _dynamic_data_wrapper_for_gui, + "PARTICIPANT": _dynamic_data_wrapper_for_gui, + "DUDETAIL": _dynamic_data_wrapper_for_gui, + "GENCONDATA": _dynamic_data_wrapper_for_gui, + "SPDREGIONCONSTRAINT": _dynamic_data_wrapper_for_gui, + "SPDCONNECTIONPOINTCONSTRAINT": _dynamic_data_wrapper_for_gui, + "SPDINTERCONNECTORCONSTRAINT": _dynamic_data_wrapper_for_gui, + "FCAS_4_SECOND": _dynamic_data_wrapper_for_gui, + "ELEMENTS_FCAS_4_SECOND": _static_table_wrapper_for_gui, + "VARIABLES_FCAS_4_SECOND": _static_table_wrapper_for_gui, + "Generators and Scheduled Loads": _static_table_wrapper_for_gui, + "FCAS Providers": _static_table_wrapper_for_gui, + "BIDDAYOFFER_D": _dynamic_data_wrapper_for_gui, + "BIDPEROFFER_D": _dynamic_data_wrapper_for_gui, + "FCAS_4s_SCADA_MAP": fcas4s_scada_match, + "PLANTSTATS": plant_stats, + "DISPATCHINTERCONNECTORRES": _dynamic_data_wrapper_for_gui, + "DISPATCHREGIONSUM": _dynamic_data_wrapper_for_gui, + "LOSSMODEL": _dynamic_data_wrapper_for_gui, + "LOSSFACTORMODEL": _dynamic_data_wrapper_for_gui, + "MNSP_DAYOFFER": _dynamic_data_wrapper_for_gui, + "MNSP_PEROFFER": _dynamic_data_wrapper_for_gui, + "MNSP_INTERCONNECTOR": _dynamic_data_wrapper_for_gui, + "INTERCONNECTOR": _dynamic_data_wrapper_for_gui, + "INTERCONNECTORCONSTRAINT": _dynamic_data_wrapper_for_gui, + "MARKET_PRICE_THRESHOLDS": _dynamic_data_wrapper_for_gui, + "DAILY_REGION_SUMMARY": _dynamic_data_wrapper_for_gui, + "NEXT_DAY_DISPATCHLOAD": _dynamic_data_wrapper_for_gui, + "INTERMITTENT_GEN_SCADA": _dynamic_data_wrapper_for_gui, + "ROOFTOP_PV_ACTUAL": _dynamic_data_wrapper_for_gui +} diff --git a/src/nemosis/query_wrappers.py b/src/nemosis/query_wrappers.py index 34506c2..7474bd0 100644 --- a/src/nemosis/query_wrappers.py +++ b/src/nemosis/query_wrappers.py @@ -1,15 +1,25 @@ import pandas as pd from datetime import datetime, timedelta -from nemosis import defaults, _parse_datetime +from nemosis import defaults +from nemosis.date_generators import parse_datetime_py +from nemosis.value_parser import _parse_datetime_np + + +# Setup functions normalise their start_time / end_time inputs via +# parse_datetime_py so they accept any of the public input shapes +# (str / datetime / date) — see #44, #53. They still return strings +# because the existing downstream contract feeds back through +# _parse_datetime_py again in dynamic_data_compiler / cache_compiler; +# returning datetimes would also work but enlarges the diff. def dispatch_date_setup(start_time, end_time): - start_time = datetime.strptime(start_time, "%Y/%m/%d %H:%M:%S") + start_time = parse_datetime_py(start_time, midnight='start') start_time = start_time - timedelta(hours=4) start_time = start_time.replace(hour=0, minute=0) start_time = start_time - timedelta(seconds=1) start_time = datetime.isoformat(start_time).replace("-", "/").replace("T", " ") - end_time = datetime.strptime(end_time, "%Y/%m/%d %H:%M:%S") + end_time = parse_datetime_py(end_time, midnight='end') end_time = end_time - timedelta(hours=4, seconds=1) end_time = datetime.isoformat(end_time).replace("-", "/").replace("T", " ") end_time = end_time[:10] @@ -19,7 +29,7 @@ def dispatch_date_setup(start_time, end_time): def dispatch_half_hour_setup(start_time, end_time): - start_time = datetime.strptime(start_time, "%Y/%m/%d %H:%M:%S") + start_time = parse_datetime_py(start_time, midnight='start') start_time = datetime( year=start_time.year, month=start_time.month, @@ -70,5 +80,5 @@ def drop_duplicates_by_primary_key(data, start_time, table_name): def convert_genconid_effectivedate_to_datetime_format(data, start_time, table_name): if "GENCONID_EFFECTIVEDATE" in data.columns: - data["GENCONID_EFFECTIVEDATE"] = _parse_datetime(data["GENCONID_EFFECTIVEDATE"]) + data["GENCONID_EFFECTIVEDATE"] = _parse_datetime_np(data["GENCONID_EFFECTIVEDATE"]) return data diff --git a/src/nemosis/value_parser.py b/src/nemosis/value_parser.py index b6dc92f..7dbcb4e 100644 --- a/src/nemosis/value_parser.py +++ b/src/nemosis/value_parser.py @@ -3,7 +3,7 @@ from nemosis import defaults as _defaults -def _parse_datetime(series): +def _parse_datetime_np(series): """ Attempts to parse a column into a datetime If unable to (because the data is not a datetime), will raise a ValueError @@ -38,7 +38,7 @@ def _parse_column(series): """ try: - return _parse_datetime(series) + return _parse_datetime_np(series) except ValueError: try: col_new = pd.to_numeric(series) diff --git a/tests/end_to_end_table_tests/test_datetime_inputs.py b/tests/end_to_end_table_tests/test_datetime_inputs.py new file mode 100644 index 0000000..050ffa5 --- /dev/null +++ b/tests/end_to_end_table_tests/test_datetime_inputs.py @@ -0,0 +1,117 @@ +"""End-to-end tests for accepting datetime / date inputs to the public +compilers (#44, #53). + +For legacy reasons NEMOSIS has always taken `start_time` / `end_time` +as strings of `"YYYY/MM/DD HH:MM:SS"`. As of this change, callers may +also pass: + +- a timezone-naive `datetime.datetime` — used verbatim (treated as + market time) +- a `datetime.date` — converted to midnight; `end_time` resolves to + midnight at the *start of the next day* so a request bounded by a + date covers the whole of that date + +These tests verify the equivalence at the public API level: the same +window expressed three different ways must produce the same data. +""" +from datetime import date, datetime + +import pandas as pd +import pytest +from pandas.testing import assert_frame_equal + +from nemosis import cache_compiler, dynamic_data_compiler + + +# 30-minute window in May 2018 — long enough to fetch real dispatch +# price rows from the fixture, short enough to keep the tests fast. +START_STR = "2018/05/01 00:00:00" +END_STR = "2018/05/01 00:30:00" +START_DT = datetime(2018, 5, 1, 0, 0, 0) +END_DT = datetime(2018, 5, 1, 0, 30, 0) + + +def _sort_for_compare(df): + """Order rows deterministically so frame equality is robust to + upstream sort instability.""" + return df.sort_values(list(df.columns)).reset_index(drop=True) + + +def test_dynamic_data_compiler_accepts_datetime(nemosis_fixture): + """A `datetime` and a matching string MUST return identical data.""" + via_str = dynamic_data_compiler( + start_time=START_STR, end_time=END_STR, + table_name="DISPATCHPRICE", + raw_data_location=str(nemosis_fixture), + ) + via_dt = dynamic_data_compiler( + start_time=START_DT, end_time=END_DT, + table_name="DISPATCHPRICE", + raw_data_location=str(nemosis_fixture), + ) + assert_frame_equal(_sort_for_compare(via_str), _sort_for_compare(via_dt)) + + +def test_dynamic_data_compiler_accepts_date(nemosis_fixture): + """A `date` end_time must resolve to midnight at the *start of the + next day* so the day is fully covered. Whole-day-of-May-1 expressed + via two dates must equal the same range expressed via strings.""" + via_str = dynamic_data_compiler( + start_time="2018/05/01 00:00:00", end_time="2018/05/02 00:00:00", + table_name="DISPATCHPRICE", + raw_data_location=str(nemosis_fixture), + ) + via_date = dynamic_data_compiler( + start_time=date(2018, 5, 1), end_time=date(2018, 5, 1), + table_name="DISPATCHPRICE", + raw_data_location=str(nemosis_fixture), + ) + assert_frame_equal(_sort_for_compare(via_str), _sort_for_compare(via_date)) + + +def test_cache_compiler_accepts_datetime(nemosis_fixture): + """cache_compiler must accept the same input shapes as + dynamic_data_compiler — just confirms no exception and that a + feather lands in the cache.""" + cache_compiler( + start_time=START_DT, end_time=END_DT, + table_name="DISPATCHPRICE", + raw_data_location=str(nemosis_fixture), + ) + assert list(nemosis_fixture.glob("*DISPATCHPRICE*.feather")), ( + "cache should be populated when datetime inputs are supplied" + ) + + +def test_cache_compiler_accepts_date(nemosis_fixture): + """date inputs flow through cache_compiler the same way.""" + cache_compiler( + start_time=date(2018, 5, 1), end_time=date(2018, 5, 1), + table_name="DISPATCHPRICE", + raw_data_location=str(nemosis_fixture), + ) + assert list(nemosis_fixture.glob("*DISPATCHPRICE*.feather")) + + +# --------------------------------------------------------------------------- +# Tables with a `setup_function` in processing_info_maps massage start/end +# before the main fetch loop. Those wrappers (dispatch_date_setup, +# dispatch_half_hour_setup) used to call `datetime.strptime` directly, +# which broke when the public API started accepting datetime/date inputs. +# This regression test covers the BIDDAYOFFER_D path (dispatch_date_setup). +# --------------------------------------------------------------------------- + +def test_dispatch_date_setup_path_accepts_datetime(nemosis_fixture): + """BIDDAYOFFER_D routes through dispatch_date_setup before the main + fetch loop — verify datetime inputs survive that pre-processing.""" + via_str = dynamic_data_compiler( + start_time="2018/05/01 00:00:00", end_time="2018/05/01 00:30:00", + table_name="BIDDAYOFFER_D", + raw_data_location=str(nemosis_fixture), + ) + via_dt = dynamic_data_compiler( + start_time=datetime(2018, 5, 1), end_time=datetime(2018, 5, 1, 0, 30), + table_name="BIDDAYOFFER_D", + raw_data_location=str(nemosis_fixture), + ) + assert_frame_equal(_sort_for_compare(via_str), _sort_for_compare(via_dt)) diff --git a/tests/test_date_generators.py b/tests/test_date_generators.py index 9d6d7b4..292f682 100644 --- a/tests/test_date_generators.py +++ b/tests/test_date_generators.py @@ -1,6 +1,9 @@ import unittest -from datetime import datetime -from nemosis import date_generators +from datetime import datetime, date + +import pytz + +from nemosis import defaults, date_generators class TestYearAndMonthGen(unittest.TestCase): @@ -357,3 +360,50 @@ def test_no_missing_values_in_a_week(self): self.assertEqual(times[-1][2], "07") self.assertEqual(times[-1][3], "0000") self.assertEqual(len(times), 1740) + +class TestParseDatetimePy(unittest.TestCase): + def test_dt_to_dt(self): + # if we pass in a dt (no timezone) we get the same thing back + dt = datetime(2023, 1, 1, 12, 0, 0) + result = date_generators.parse_datetime_py(dt) + self.assertEqual(result, dt) + self.assertIsNone(result.tzinfo) + + def test_dt_to_dt_with_tz(self): + # if we pass in a dt with a timezone specified, we get an exception thrown + tz = pytz.timezone('Australia/Brisbane') + dt_with_tz = tz.localize(datetime(2023, 1, 1, 12, 0, 0)) + with self.assertRaises((ValueError, TypeError, AssertionError)): + date_generators.parse_datetime_py(dt_with_tz) + + def test_valid_s_to_dt(self): + # if we pass in a string of right format, we get the corresponding datetime back + dt_string = "2023/01/01 12:00:00" + expected = datetime(2023, 1, 1, 12, 0, 0) + result = date_generators.parse_datetime_py(dt_string) + self.assertEqual(result, expected) + self.assertIsNone(result.tzinfo) + + def test_invalid_s_to_dt(self): + # if we pass in a string in the wrong format, + # we get an exception thrown + # ("T" is the wrong part) + dt_string_invalid = "2023/01/01T12:00:00" + with self.assertRaises((ValueError, TypeError, AssertionError)): + date_generators.parse_datetime_py(dt_string_invalid) + + def test_date_to_dt(self): + d = date(2026, 1, 2) + expected_t_start = datetime(2026, 1, 2) + expected_t_end = datetime(2026, 1, 3) + + actual_t_start = date_generators.parse_datetime_py(d, midnight='start') + actual_t_end = date_generators.parse_datetime_py(d, midnight='end') + + self.assertEqual(expected_t_start, actual_t_start) + self.assertEqual(expected_t_end, actual_t_end) + + with self.assertRaises(ValueError): + actual_t_start = date_generators.parse_datetime_py(d, midnight='Start') + + self.assertEqual(date_generators.parse_datetime_py(d, midnight='start'), date_generators.parse_datetime_py(d))