Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 7 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -100,9 +100,10 @@ Your workflow may determine how you use NEMOSIS. Because the GUI relies on data

```python
from nemosis import dynamic_data_compiler
from datetime import datetime

start_time = '2017/01/01 00:00:00'
end_time = '2017/01/01 00:05:00'
start_time = datetime(2017, 1, 1, 0, 0)
end_time = datetime(2017, 1, 1, 0, 5)
table = 'DISPATCHPRICE'
raw_data_cache = 'C:/Users/your_data_storage'

Expand All @@ -113,6 +114,8 @@ Using the default settings of `dynamic_data_compiler` will download CSV data fro

A number of options are available to configure filtering (i.e. what data NEMOSIS returns as a pandas DataFrame) and caching.

For `start_time` and `end_time` you can pass a datetime (timezone unaware), a `date`, or a string of the form "YYYY/MM/DD HH:MM:SS", e.g. `2017/01/01 00:00:00`.

###### Filter options

`dynamic_data_compiler` can be used to filter data before returning results.
Expand Down Expand Up @@ -206,8 +209,8 @@ from nemosis import defaults

defaults.table_columns['BIDPEROFFER_D'] += ['PASAAVAILABILITY']

start_time = '2017/01/01 00:00:00'
end_time = '2017/01/01 00:05:00'
start_time = datetime(2017, 1, 1, 0, 0)
end_time = datetime(2017, 1, 1, 0, 5)
table = 'BIDPEROFFER_D'
raw_data_cache = 'C:/Users/your_data_storage'

Expand Down
2 changes: 1 addition & 1 deletion src/nemosis/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import logging
import sys
from .value_parser import _parse_datetime, _parse_column, _infer_column_data_types
from .value_parser import _parse_column, _infer_column_data_types
from .data_fetch_methods import *

name = "osdan"
Expand Down
42 changes: 22 additions & 20 deletions src/nemosis/custom_tables.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,10 @@
from datetime import timedelta, datetime
import math
import numpy as np
from nemosis import defaults, data_fetch_methods, filters

from nemosis import defaults
from nemosis.date_generators import parse_datetime_py
from nemosis.filters import filter_on_column_value
from nemosis.data_fetch_methods import dynamic_data_compiler, static_table

def fcas4s_scada_match(
start_time,
Expand All @@ -17,12 +19,12 @@ def fcas4s_scada_match(

# Pull in the 4 second fcas data.
table_name_fcas4s = "FCAS_4_SECOND"
fcas4s = data_fetch_methods.dynamic_data_compiler(
fcas4s = dynamic_data_compiler(
start_time, end_time, table_name_fcas4s, raw_data_location
)
# Pull in the 4 second fcas variable types.
table_name_variable_types = "VARIABLES_FCAS_4_SECOND"
fcas4s_variable_types = data_fetch_methods.static_table(
fcas4s_variable_types = static_table(
table_name_variable_types, raw_data_location
)

Expand Down Expand Up @@ -52,7 +54,7 @@ def fcas4s_scada_match(

# Pull in the dispatch unit scada data.
table_name_scada = "DISPATCH_UNIT_SCADA"
scada = data_fetch_methods.dynamic_data_compiler(
scada = dynamic_data_compiler(
start_time, end_time, table_name_scada, raw_data_location
)
scada["SETTLEMENTDATE"] = scada["SETTLEMENTDATE"] - timedelta(minutes=5)
Expand All @@ -62,7 +64,7 @@ def fcas4s_scada_match(

# Pull in the interconnector scada data and use the intervention records where the exist.
table_name_inter_flow = "DISPATCHINTERCONNECTORRES"
inter_flows = data_fetch_methods.dynamic_data_compiler(
inter_flows = dynamic_data_compiler(
start_time, end_time, table_name_inter_flow, raw_data_location
)
inter_flows["METEREDMWFLOW"] = pd.to_numeric(inter_flows["METEREDMWFLOW"])
Expand Down Expand Up @@ -144,7 +146,7 @@ def fcas4s_scada_match(
best_matches_scada = best_matches_scada.loc[:, select_columns]

if filter_cols is not None:
best_matches_scada = filters.filter_on_column_value(
best_matches_scada = filter_on_column_value(
best_matches_scada, filter_cols, filter_values
)

Expand Down Expand Up @@ -408,15 +410,15 @@ def plant_stats(
):

ix = pd.date_range(
start=datetime.strptime(start_time, "%Y/%m/%d %H:%M:%S"),
end=datetime.strptime(end_time, "%Y/%m/%d %H:%M:%S") - timedelta(minutes=5),
start=parse_datetime_py(start_time, midnight='start'),
end=parse_datetime_py(end_time, midnight='end') - timedelta(minutes=5),
freq="5min",
)
timeseries_df = pd.DataFrame(index=ix)
timeseries_df.reset_index(inplace=True)
timeseries_df.columns = ["SETTLEMENTDATE"]

gen_max_cap = data_fetch_methods.dynamic_data_compiler(
gen_max_cap = dynamic_data_compiler(
start_time,
end_time,
"DUDETAIL",
Expand All @@ -428,7 +430,7 @@ def plant_stats(
gen_max_cap = select_highest_version_number(
gen_max_cap, defaults.table_primary_keys["DUDETAIL"]
)
gen_region = data_fetch_methods.dynamic_data_compiler(
gen_region = dynamic_data_compiler(
start_time,
end_time,
"DUDETAILSUMMARY",
Expand All @@ -437,14 +439,14 @@ def plant_stats(
filter_cols=filter_cols,
filter_values=filter_values,
)
scada = data_fetch_methods.dynamic_data_compiler(
scada = dynamic_data_compiler(
start_time,
end_time,
"DISPATCH_UNIT_SCADA",
raw_data_location,
select_columns=["SETTLEMENTDATE", "DUID", "SCADAVALUE"],
)
dispatch_price = data_fetch_methods.dynamic_data_compiler(
dispatch_price = dynamic_data_compiler(
start_time,
end_time,
"DISPATCHPRICE",
Expand All @@ -454,7 +456,7 @@ def plant_stats(
dispatch_price = select_intervention_if_present(
dispatch_price, defaults.table_primary_keys["DISPATCHPRICE"]
)
trading_price = data_fetch_methods.dynamic_data_compiler(
trading_price = dynamic_data_compiler(
start_time,
end_time,
"TRADINGPRICE",
Expand All @@ -465,7 +467,7 @@ def plant_stats(
trading_price["RRP"] = pd.to_numeric(trading_price["RRP"])
# trading_price = calc_trading_price(dispatch_price)

region_summary = data_fetch_methods.dynamic_data_compiler(
region_summary = dynamic_data_compiler(
start_time,
end_time,
"DISPATCHREGIONSUM",
Expand Down Expand Up @@ -517,23 +519,23 @@ def plant_stats(


def trading_and_dispatch_cost():
gen_region = data_fetch_methods.dynamic_data_compiler(
gen_region = dynamic_data_compiler(
"2017/01/01 00:05:00",
"2018/01/01 00:05:00",
"DUDETAILSUMMARY",
defaults.raw_data_cache,
select_columns=["START_DATE", "END_DATE", "DUID", "REGIONID"],
)
scada = data_fetch_methods.dynamic_data_compiler(
scada = dynamic_data_compiler(
"2017/01/01 00:05:00",
"2018/01/01 00:05:00",
"DISPATCH_UNIT_SCADA",
defaults.raw_data_cache,
)

ix = pd.date_range(
start=datetime.strptime("2017/01/01 00:00:00", "%Y/%m/%d %H:%M:%S"),
end=datetime.strptime("2018/01/01 00:00:00", "%Y/%m/%d %H:%M:%S"),
start=datetime(2017, 1, 1),
end=datetime(2018, 1, 1),
freq="5min",
)
timeseries_df = pd.DataFrame(index=ix)
Expand All @@ -551,7 +553,7 @@ def trading_and_dispatch_cost():

scada = pd.concat(scada_list)

dispatch_price = data_fetch_methods.dynamic_data_compiler(
dispatch_price = dynamic_data_compiler(
"2017/01/01 00:00:00",
"2018/01/01 00:05:00",
"DISPATCHPRICE",
Expand Down
80 changes: 24 additions & 56 deletions src/nemosis/data_fetch_methods.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,13 @@
import glob as _glob
import pandas as _pd
from datetime import datetime as _datetime, timedelta as _timedelta
from nemosis import filters as _filters
from nemosis import downloader as _downloader
from nemosis.filters import filter_on_column_value as _filter_on_column_value
from nemosis import processing_info_maps as _processing_info_maps
from nemosis import date_generators as _date_generators
from nemosis import defaults as _defaults
from nemosis import custom_tables as _custom_tables
from nemosis import _infer_column_data_types
from nemosis.value_parser import _infer_column_data_types
from nemosis.date_generators import parse_datetime_py as _parse_datetime_py
from nemosis.custom_errors import UserInputError, NoDataToReturn, DataMismatchError

logger = logging.getLogger(__name__)
Expand All @@ -35,8 +35,12 @@ def dynamic_data_compiler(
will save data typed as strings/objects. To save typed data (e.g.
appropriate cols are Float or Int), use cache_compiler.
Args:
start_time (str): format 'yyyy/mm/dd HH:MM:SS'.
end_time (str): format 'yyyy/mm/dd HH:MM:SS'.
start_time (datetime): A native datetime. (Timezone unaware)
For legacy reasons, may be a string
of format 'yyyy/mm/dd HH:MM:SS'.
end_time (datetime): A native datetime. (Timezone unaware)
For legacy reasons, may be a string
of format 'yyyy/mm/dd HH:MM:SS'.
table_name (str): table as per Wiki.
raw_data_location (str): directory to download and cache data to.
existing data will be used if in this dir.
Expand Down Expand Up @@ -107,9 +111,10 @@ def dynamic_data_compiler(

logger.info(f"Compiling data for table {table_name}")

start_time = _datetime.strptime(start_time, "%Y/%m/%d %H:%M:%S")
end_time = _datetime.strptime(end_time, "%Y/%m/%d %H:%M:%S")
start_search = _datetime.strptime(start_search, "%Y/%m/%d %H:%M:%S")
# cast from string to datetime, if not already datetime
start_time = _parse_datetime_py(start_time, midnight='start')
end_time = _parse_datetime_py(end_time, midnight='end')
start_search = _parse_datetime_py(start_search, midnight='start')
data_tables = _dynamic_data_fetch_loop(
start_search,
start_time,
Expand Down Expand Up @@ -140,7 +145,7 @@ def dynamic_data_compiler(
]
UserInputError(f"Filter columns {missing_columns} not in data.")
else:
all_data = _filters.filter_on_column_value(
all_data = _filter_on_column_value(
all_data, filter_cols, filter_values
)
logger.info(f"Returning {table_name}.")
Expand Down Expand Up @@ -178,8 +183,12 @@ def cache_compiler(
that is used to store csvs (such as the cache for the GUI).

Args:
start_time (str): format 'yyyy/mm/dd HH:MM:SS'.
end_time (str): format 'yyyy/mm/dd HH:MM:SS'.
start_time (datetime): A native datetime. (Timezone unaware)
For legacy reasons, may be a string
of format 'yyyy/mm/dd HH:MM:SS'.
end_time (datetime): A native datetime. (Timezone unaware)
For legacy reasons, may be a string
of format 'yyyy/mm/dd HH:MM:SS'.
table_name (str): table as per Wiki.
raw_data_location (str): directory to download and cache data to.
existing data will be used if in this dir.
Expand Down Expand Up @@ -239,9 +248,9 @@ def cache_compiler(
_,
start_search,
) = _set_up_dynamic_compilers(table_name, start_time, end_time, select_columns)
start_time = _datetime.strptime(start_time, "%Y/%m/%d %H:%M:%S")
end_time = _datetime.strptime(end_time, "%Y/%m/%d %H:%M:%S")
start_search = _datetime.strptime(start_search, "%Y/%m/%d %H:%M:%S")
start_time = _parse_datetime_py(start_time, midnight='start')
end_time = _parse_datetime_py(end_time, midnight='end')
start_search = _parse_datetime_py(start_search, midnight='start')

_dynamic_data_fetch_loop(
start_search,
Expand Down Expand Up @@ -355,7 +364,7 @@ def static_table(
missing_columns = [col for col in filter_cols if col not in table.columns]
UserInputError(f"Filter columns {missing_columns} not in data.")
else:
table = _filters.filter_on_column_value(table, filter_cols, filter_values)
table = _filter_on_column_value(table, filter_cols, filter_values)

static_table_finalisers = static_data_finaliser_map[table_name]
for finaliser in static_table_finalisers:
Expand Down Expand Up @@ -878,44 +887,3 @@ def _static_table_wrapper_for_gui(
)
return table


_method_map = {
"DISPATCHLOAD": _dynamic_data_wrapper_for_gui,
"DISPATCHPRICE": _dynamic_data_wrapper_for_gui,
"TRADINGLOAD": _dynamic_data_wrapper_for_gui,
"TRADINGPRICE": _dynamic_data_wrapper_for_gui,
"TRADINGREGIONSUM": _dynamic_data_wrapper_for_gui,
"TRADINGINTERCONNECT": _dynamic_data_wrapper_for_gui,
"DISPATCH_UNIT_SCADA": _dynamic_data_wrapper_for_gui,
"DISPATCHCONSTRAINT": _dynamic_data_wrapper_for_gui,
"DUDETAILSUMMARY": _dynamic_data_wrapper_for_gui,
"PARTICIPANT": _dynamic_data_wrapper_for_gui,
"DUDETAIL": _dynamic_data_wrapper_for_gui,
"GENCONDATA": _dynamic_data_wrapper_for_gui,
"SPDREGIONCONSTRAINT": _dynamic_data_wrapper_for_gui,
"SPDCONNECTIONPOINTCONSTRAINT": _dynamic_data_wrapper_for_gui,
"SPDINTERCONNECTORCONSTRAINT": _dynamic_data_wrapper_for_gui,
"FCAS_4_SECOND": _dynamic_data_wrapper_for_gui,
"ELEMENTS_FCAS_4_SECOND": _static_table_wrapper_for_gui,
"VARIABLES_FCAS_4_SECOND": _static_table_wrapper_for_gui,
"Generators and Scheduled Loads": _static_table_wrapper_for_gui,
"FCAS Providers": _static_table_wrapper_for_gui,
"BIDDAYOFFER_D": _dynamic_data_wrapper_for_gui,
"BIDPEROFFER_D": _dynamic_data_wrapper_for_gui,
"FCAS_4s_SCADA_MAP": _custom_tables.fcas4s_scada_match,
"PLANTSTATS": _custom_tables.plant_stats,
"DISPATCHINTERCONNECTORRES": _dynamic_data_wrapper_for_gui,
"DISPATCHREGIONSUM": _dynamic_data_wrapper_for_gui,
"LOSSMODEL": _dynamic_data_wrapper_for_gui,
"LOSSFACTORMODEL": _dynamic_data_wrapper_for_gui,
"MNSP_DAYOFFER": _dynamic_data_wrapper_for_gui,
"MNSP_PEROFFER": _dynamic_data_wrapper_for_gui,
"MNSP_INTERCONNECTOR": _dynamic_data_wrapper_for_gui,
"INTERCONNECTOR": _dynamic_data_wrapper_for_gui,
"INTERCONNECTORCONSTRAINT": _dynamic_data_wrapper_for_gui,
"MARKET_PRICE_THRESHOLDS": _dynamic_data_wrapper_for_gui,
"DAILY_REGION_SUMMARY": _dynamic_data_wrapper_for_gui,
"NEXT_DAY_DISPATCHLOAD": _dynamic_data_wrapper_for_gui,
"INTERMITTENT_GEN_SCADA": _dynamic_data_wrapper_for_gui,
"ROOFTOP_PV_ACTUAL": _dynamic_data_wrapper_for_gui
}
33 changes: 32 additions & 1 deletion src/nemosis/date_generators.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import logging
from nemosis import defaults
from calendar import monthrange
from datetime import timedelta
from datetime import timedelta, datetime, date

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -134,3 +134,34 @@ def current_gen(start_time, end_time):
):
continue
yield str(year), month, str(day).zfill(2), None

def parse_datetime_py(t, midnight='start'):
"""
Takes in a string of a datetime, native datetime or date
Returns a datetime.
If midnight='end', dates will be converted to the midnight at the end of the day.
If midnight='start', dates will be converted to the midnight at the start of the day.
If not a date, the midnight argument is ignored.
This is not intended to be used for conversions within Pandas/CSV/Parquet etc.
"""
if isinstance(t, str):
return datetime.strptime(t, defaults.nemosis_date_format)
elif isinstance(t, datetime):
if t.utcoffset() is not None:
raise ValueError(f"Conversion between timezones not implemented. (Even if it's market time.) "
f"For {t}, pass a timezone unaware version, which will be treated as market time.")
return t
elif isinstance(t, date):
if midnight not in ['start', 'end']:
raise ValueError(f"midnight must be 'start' or 'end', got {midnight}")

elif midnight == 'end':
# end of this day
# is the start of the next day
t = t + timedelta(days=1)
midnight = 'start'

return datetime.combine(t, datetime.min.time())

else:
raise ValueError(f"Unsure how to handle datetime {t} of unexpected type {type(t)}")
4 changes: 4 additions & 0 deletions src/nemosis/defaults.py
Original file line number Diff line number Diff line change
Expand Up @@ -892,6 +892,10 @@
'%Y-%m-%d %H:%M:%S'
]

# If a user passes a datetime to us, as a string,
# it should be in this format.
nemosis_date_format = "%Y/%m/%d %H:%M:%S"

# data is available from 2009
# however for 2009-2014, the structure in MMSDM is different.
# It's one zip per month, with all tables combined.
Expand Down
Loading
Loading