diff --git a/.coveragerc b/.coveragerc new file mode 100644 index 00000000..f6c5c9ec --- /dev/null +++ b/.coveragerc @@ -0,0 +1,3 @@ +[run] +core = ctrace +branch = True diff --git a/.github/workflows/pull-request-ci.yml b/.github/workflows/pull-request-ci.yml index bd8bac8d..b06de470 100644 --- a/.github/workflows/pull-request-ci.yml +++ b/.github/workflows/pull-request-ci.yml @@ -37,22 +37,22 @@ jobs: ref: ${{ github.head_ref }} fetch_depth: 0 - - name: Get ruff log file from Python 3.11 build + - name: Get ruff log file from Python 3.14 build uses: actions/download-artifact@v4 with: - name: ruff-results-3.11 + name: ruff-results-3.14 path: . - - name: Get mypy log file from Python 3.11 build + - name: Get mypy log file from Python 3.14 build uses: actions/download-artifact@v4 with: - name: mypy-results-3.11 + name: mypy-results-3.14 path: . - - name: Get coverage log file from Python 3.11 build + - name: Get coverage log file from Python 3.14 build uses: actions/download-artifact@v4 with: - name: coverage-results-3.11 + name: coverage-results-3.14 path: . - name: Get coverage log file from Python latest build @@ -61,14 +61,14 @@ jobs: name: coverage-results-3.14 path: . - - name: Generate and push badges for with python 3.11 + - name: Generate and push badges for with python 3.14 run: | git fetch origin "${{ github.head_ref }}" || true git checkout "${{ github.head_ref }}" pip install anybadge - anybadge -o -l ruff -v $(cat ruff-results-3.11.log | wc -l) -f $BADGES_DIR/ruff.svg -u 1=green 2=red - anybadge -o -l mypy -v $([ -n "$(tail -n 1 mypy-3.11.log | grep -e '^Succes')" ] && echo pass || echo fail) -f $BADGES_DIR/mypy.svg fail=red pass=green - COVERAGE_PERC=$(grep "TOTAL" coverage-3.11.log | grep -Eo '[0-9.]+%' | sed 's/%//') + anybadge -o -l ruff -v $(cat ruff-results-3.14.log | wc -l) -f $BADGES_DIR/ruff.svg -u 1=green 2=red + anybadge -o -l mypy -v $([ -n "$(tail -n 1 mypy-3.14.log | grep -e '^Succes')" ] && echo pass || echo fail) -f $BADGES_DIR/mypy.svg fail=red pass=green + COVERAGE_PERC=$(grep "TOTAL" coverage-3.14.log | grep -Eo '[0-9.]+%' | sed 's/%//') anybadge -o -l coverage -v "$COVERAGE_PERC%" -f $BADGES_DIR/coverage.svg 60=red 80=orange 100=green git config user.name "Badge Bot" git config user.email "<>" diff --git a/.github/workflows/reusable-ci-workflows.yml b/.github/workflows/reusable-ci-workflows.yml index 33141520..c84a5f27 100644 --- a/.github/workflows/reusable-ci-workflows.yml +++ b/.github/workflows/reusable-ci-workflows.yml @@ -40,7 +40,7 @@ jobs: - name: Run ruff run: | - ruff check --output-file "ruff-results-${{ inputs.python-version }}.log" --output-format pylint + ruff check --output-file "ruff-results-${{ inputs.python-version }}.log" --output-format pylint --exit-zero - name: Upload ruff results uses: actions/upload-artifact@v7 with: diff --git a/CHANGELOG.md b/CHANGELOG.md index 1cfeaf67..ecb22c58 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added - Functions to `qmi.instruments.yokogawa.dlm4308` for obtaining trace data from the instrument waveform channels via Ethernet. All data formats are enabled. - Due to possibility of obtaining data in various data formats with Yokogawa device, and the fact that the returned data string decoding varies depending on the data format, an option for setting the `decoder` for `ScpiProtocol.ask` method was added. This enabled the trace adat acquisition in all data formats for Yokogawa. +- Added a HDF5 file-to-QMI `DataSet` conversion function in `qmi.data.dataset`. +- Added a possibility to add QMI `DataSet` into an existing HDF5 file. ### Changed - Replace `pylint` linter with `ruff`. @@ -20,6 +22,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 All tuple-like usages of the output from `discover_peer_contexts` will still work. - Specifying an explicit `None` argument for the `config_file` parameter to `qmi.start()` will now always use an empty configuration. The default behavior when `config_file` is not specified has not changed. +- Refactoring of QMI `DataSet` class so that it allows also raw (single) dataset column(s) without axis | axes definition. +- QMI datasets has now clear `QMI_Dataset: 1` attribute in dataset root. +- `qmi.data.datastore` module to work with changes in `QMI_DataSet` class. ## [0.52.0] - 2026-04-01 diff --git a/documentation/sphinx/source/images/class_diagram_main.drawio.svg b/documentation/sphinx/source/images/class_diagram_main.drawio.svg index 6ed9358c..24bc9b22 100644 --- a/documentation/sphinx/source/images/class_diagram_main.drawio.svg +++ b/documentation/sphinx/source/images/class_diagram_main.drawio.svg @@ -1,4 +1,4 @@ -corecoreqmiqmi+ context_singleton: core.context::QMI_Context [0..1]- _object_registry: core.object_registry::ObjectRegistry+ context_singleton: core.context::QMI_Context [0..1]...contextcontextconfig_defsconfig_defsrpcrpcpubsubpubsubmessagingmessagingtasktask«use»«use»QMI_RpcObjectQMI_RpcObjectinstrument::QMI_Instrumentinstrument::QMI_InstrumentQMI_RpcProxyQMI_RpcProxy+ rpc_nonblocking: QMI_RpcNonBlockingProxy+ rpc_nonblocking:...QMI_RpcNonBlockingProxyQMI_RpcNonBlockingProxyQMI_TaskRunnerQMI_TaskRunner- _thread: _TaskThread- _thread: _TaskThread_TaskThread_TaskThread+ task: QMI_Task+ task: QMI_Taskthread::QMI_Threadthread::QMI_Threadthreading::Threadthreading::ThreadSignalManagerSignalManager- _local_subscriptions: dict<String, set<QMI_SignalReceiver>>- _local_subscriptions:...QMI_SignalReceiverQMI_SignalReceiverMessageRouterMessageRouter- _thread: _EventDrivenThread [0..1]- _socket_manager: _SocketManager [0..1]- _thread: _EventDrivenThread [0..1]...+ start_tcp_server(tcp_server_port: Integer)+ start_udp_responder(udp_server_port: Integer)+ connect_to_peer(peer_context_name: String, peer_address: String)+ disconnect_from_peer(peer_context_name: String)+ start_tcp_server(tcp_server_port: Integer)..._SocketManager_SocketManager- _socket_wrappers: list[_SocketWrapper]- _peer_context_map: dict<String, _PeerTcpConnection>- _socket_wrappers: list[_SocketWrapper]..._SocketWrapper_SocketWrapper_UdpResponder_UdpResponder_TcpServer_TcpServer_PeerTcpConnection_PeerTcpConnectionQMI_MessageHandlerQMI_MessageHandler+ address: QMI_MessageHandlerAddress+ address: QMI_MessageHandlerAddressQMI_MessageHandlerAddressQMI_MessageHandlerAddress+ context_id: String+ object_id: String+ context_id: String...QMI_RpcFutureQMI_RpcFutureCfgQmiCfgQmi+ config_file: String [0..1]+ workgroup: String+ qmi_home: String [0..1]+ log_dir: String [0..1]+ datastore: String [0..1]+ logging: CfgLogging+ contexts: dict<String, CfgContext>+ process_management: CfgProcessManagement+ config_file: String [0..1]...CfgContextCfgContext+ host: String [0..1]+ tcp_server_port: Integer [0..1]+ connect_to_peers: list<String>+ enabled: Boolean+ program_module: String [0..1] + program_args: list<String>+ python_path: String [0..1]+ virtualenv_path: String [0..1]+ host: String [0..1]...«use»«use»QMI_ContextQMI_Context- _config: config_defs::CfgQmi- _rpc_object_map: dict<String, rpc::RpcObjectManager [0..1]>- _signal_manager: pubsub::SignalManager- _message_router: messaging::MessageRouter- _config: config_defs::CfgQmi...qmi.core.context::_ContextRpcObjectqmi.core.context::_ContextRpcObjectRpcObjectManagerRpcObjectManager- _rpc_thread: _RpcThread [0..1]- _rpc_object_maker: Callable<(): QMI_RpcObject>- _rpc_thread: _RpcThread [0..1]...+ make_proxy(): QMI_RpcProxy+ make_proxy(): QMI_RpcProxy«use»«use»«use»«use»_RpcThread_RpcThread- _rpc_object: QMI_RpcObject [0..1]- _rpc_object: QMI_RpcObject [0..1]CreateCreate«use»«use»«use»«use»«use»«use»«use»«use»«use»«use»«use»«use»«use»«use»«use»«use»_EventDrivenThread_EventDrivenThread«use»«use»«use»«use»«use»«use»«use»«use»«use»«use»QMI_TaskQMI_Task+ settings: Any [0..1]+ status: Any [0..1]# _task_runner: QMI_TaskRunner+ settings: Any [0..1]...QMI_LoopTaskQMI_LoopTask«use»«use»Text is not SVG - cannot display \ No newline at end of file +corecoreqmiqmi+ context_singleton: core.context::QMI_Context [0..1]- _object_registry: core.object_registry::ObjectRegistry+ context_singleton: core.context::QMI_Context [0..1]...contextcontextconfig_defsconfig_defsrpcrpcpubsubpubsubmessagingmessagingtasktask«use»«use»QMI_RpcObjectQMI_RpcObjectinstrument::QMI_Instrumentinstrument::QMI_InstrumentQMI_RpcProxyQMI_RpcProxy+ rpc_nonblocking: QMI_RpcNonBlockingProxy+ rpc_nonblocking:...QMI_RpcNonBlockingProxyQMI_RpcNonBlockingProxyQMI_TaskRunnerQMI_TaskRunner- _thread: _TaskThread- _thread: _TaskThread_TaskThread_TaskThread+ task: QMI_Task+ task: QMI_Taskthread::QMI_Threadthread::QMI_Threadthreading::Threadthreading::ThreadSignalManagerSignalManager- _local_subscriptions: dict<String, set<QMI_SignalReceiver>>- _local_subscriptions:...QMI_SignalReceiverQMI_SignalReceiverMessageRouterMessageRouter- _thread: _EventDrivenThread [0..1]- _socket_manager: _SocketManager [0..1]- _thread: _EventDrivenThread [0..1]...+ start_tcp_server(tcp_server_port: Integer)+ start_udp_responder(udp_server_port: Integer)+ connect_to_peer(peer_context_name: String, peer_address: String)+ disconnect_from_peer(peer_context_name: String)+ start_tcp_server(tcp_server_port: Integer)..._SocketManager_SocketManager- _socket_wrappers: list[_SocketWrapper]- _peer_context_map: dict<String, _PeerTcpConnection>- _socket_wrappers: list[_SocketWrapper]..._SocketWrapper_SocketWrapper_UdpResponder_UdpResponder_TcpServer_TcpServer_PeerTcpConnection_PeerTcpConnectionQMI_MessageHandlerQMI_MessageHandler+ address: QMI_MessageHandlerAddress+ address: QMI_MessageHandlerAddressQMI_MessageHandlerAddressQMI_MessageHandlerAddress+ context_id: String+ object_id: String+ context_id: String...QMI_RpcFutureQMI_RpcFutureCfgQmiCfgQmi+ config_file: String [0..1]+ workgroup: String+ qmi_home: String [0..1]+ log_dir: String [0..1]+ datastore: String [0..1]+ logging: CfgLogging+ contexts: dict<String, CfgContext>+ process_management: CfgProcessManagement+ config_file: String [0..1]...CfgContextCfgContext+ host: String [0..1]+ tcp_server_port: Integer [0..1]+ connect_to_peers: list<String>+ enabled: Boolean+ program_module: String [0..1] + program_args: list<String>+ python_path: String [0..1]+ virtualenv_path: String [0..1]+ host: String [0..1]...«use»«use»QMI_ContextQMI_Context- _config: config_defs::CfgQmi- _rpc_object_map: dict<String, rpc::RpcObjectManager [0..1]>- _signal_manager: pubsub::SignalManager- _message_router: messaging::MessageRouter- _config: config_defs::CfgQmi...qmi.core.context::_ContextRpcObjectqmi.core.context::_ContextRpcObjectRpcObjectManagerRpcObjectManager- _rpc_thread: _RpcThread [0..1]- _rpc_object_maker: Callable<(): QMI_RpcObject>- _rpc_thread: _RpcThread [0..1]...+ make_proxy(): QMI_RpcProxy+ make_proxy(): QMI_RpcProxy«use»«use»«use»«use»_RpcThread_RpcThread- _rpc_object: QMI_RpcObject [0..1]- _rpc_object: QMI_RpcObject [0..1]CreateCreate«use»«use»«use»«use»«use»«use»«use»«use»«use»«use»«use»«use»«use»«use»«use»«use»_EventDrivenThread_EventDrivenThread«use»«use»«use»«use»«use»«use»«use»«use»«use»«use»QMI_TaskQMI_Task+ settings: Any [0..1]+ status: Any [0..1]# _task_runner: QMI_TaskRunner+ settings: Any [0..1]...QMI_LoopTaskQMI_LoopTask«use»«use»Text is not SVG - cannot display diff --git a/documentation/sphinx/source/images/class_diagram_messaging.drawio.svg b/documentation/sphinx/source/images/class_diagram_messaging.drawio.svg index 9c5e59a5..25494dc3 100644 --- a/documentation/sphinx/source/images/class_diagram_messaging.drawio.svg +++ b/documentation/sphinx/source/images/class_diagram_messaging.drawio.svg @@ -1,4 +1,4 @@ -qmi.coreqmi.corerpcrpcmessagingmessagingpubsubpubsubMessageRouterMessageRouter+ send_message(message: QMI_Message)+ deliver_message(message: QMI_Message)+ register_message_handler( message_handler: QMI_MessageHandler)+ unregister_message_handler( message_handler: QMI_MessageHandler)+ send_message(message: QMI_Message)...QMI_MessageHandlerQMI_MessageHandler+ address: QMI_MessageHandlerAddress+ address: QMI_MessageHandlerAddress+ handle_message(message: QMI_Message)+ handle_message(message: QMI_Message)QMI_RpcFutureQMI_RpcFutureRpcObjectManagerRpcObjectManager- _rpc_thread: _RpcThread [0..1]- _rpc_object_maker: Callable<(): QMI_RpcObject>- _rpc_thread: _RpcThread [0..1]...context::QMI_Contextcontext::QMI_Context- _message_router: messaging::MessageRouter- _message_router: messaging::MessageRouter+ send_message(message: QMI_Message)+ send_message(message: QMI_Message)«use»«use»«call»«call»QMI_MessageQMI_Message+ source_address: QMI_MessageHandlerAddress+ destination_address: QMI_MessageHandlerAddress+ source_address: QMI_MessageHandlerAddress...QMI_InitialHandshakeMessageQMI_InitialHandshakeMessage+ version: String+ is_server_handshake: Boolean+ version: String...QMI_RequestMessageQMI_RequestMessage+ request_id: String+ request_id: StringQMI_ReplyMessageQMI_ReplyMessage+ request_id: String+ request_id: StringQMI_ErrorReplyMessageQMI_ErrorReplyMessage+ error_msg: String+ error_msg: StringQMI_SignalSubscriptionRequestQMI_SignalSubscriptionRequest+ publisher_name: String+ signal_name: String+ subscribe: Boolean+ publisher_name: String...QMI_SignalSubscriptionReplyQMI_SignalSubscriptionReply+ success: Boolean+ error_msg: String+ success: Boolean...«use»«use»«use»«use»«use»«use»Text is not SVG - cannot display \ No newline at end of file +qmi.coreqmi.corerpcrpcmessagingmessagingpubsubpubsubMessageRouterMessageRouter+ send_message(message: QMI_Message)+ deliver_message(message: QMI_Message)+ register_message_handler( message_handler: QMI_MessageHandler)+ unregister_message_handler( message_handler: QMI_MessageHandler)+ send_message(message: QMI_Message)...QMI_MessageHandlerQMI_MessageHandler+ address: QMI_MessageHandlerAddress+ address: QMI_MessageHandlerAddress+ handle_message(message: QMI_Message)+ handle_message(message: QMI_Message)QMI_RpcFutureQMI_RpcFutureRpcObjectManagerRpcObjectManager- _rpc_thread: _RpcThread [0..1]- _rpc_object_maker: Callable<(): QMI_RpcObject>- _rpc_thread: _RpcThread [0..1]...context::QMI_Contextcontext::QMI_Context- _message_router: messaging::MessageRouter- _message_router: messaging::MessageRouter+ send_message(message: QMI_Message)+ send_message(message: QMI_Message)«use»«use»«call»«call»QMI_MessageQMI_Message+ source_address: QMI_MessageHandlerAddress+ destination_address: QMI_MessageHandlerAddress+ source_address: QMI_MessageHandlerAddress...QMI_InitialHandshakeMessageQMI_InitialHandshakeMessage+ version: String+ is_server_handshake: Boolean+ version: String...QMI_RequestMessageQMI_RequestMessage+ request_id: String+ request_id: StringQMI_ReplyMessageQMI_ReplyMessage+ request_id: String+ request_id: StringQMI_ErrorReplyMessageQMI_ErrorReplyMessage+ error_msg: String+ error_msg: StringQMI_SignalSubscriptionRequestQMI_SignalSubscriptionRequest+ publisher_name: String+ signal_name: String+ subscribe: Boolean+ publisher_name: String...QMI_SignalSubscriptionReplyQMI_SignalSubscriptionReply+ success: Boolean+ error_msg: String+ success: Boolean...«use»«use»«use»«use»«use»«use»Text is not SVG - cannot display diff --git a/documentation/sphinx/source/images/class_diagram_signalling.drawio.svg b/documentation/sphinx/source/images/class_diagram_signalling.drawio.svg index 6203acc6..70e40cc2 100644 --- a/documentation/sphinx/source/images/class_diagram_signalling.drawio.svg +++ b/documentation/sphinx/source/images/class_diagram_signalling.drawio.svg @@ -1,4 +1,4 @@ -messagingmessagingpubsubpubsubtasktaskQMI_MessageHandlerQMI_MessageHandler+ address: QMI_MessageHandlerAddress+ address: QMI_MessageHandlerAddress+ handle_message(message: QMI_Message)+ handle_message(message: QMI_Message)context::QMI_Contextcontext::QMI_Context+ publish_signal( publisher_name: String, signal_name: String, args: tuple<Any>)+ publish_signal(...+ subscribe_signal( publisher_context: String, publisher_name: String, signal_name: String, receiver: messaging::QMI_SignalReceiver)+ subscribe_signal(...+ unsubscribe_signal( publisher_context: String, publisher_name: String, signal_name: String, receiver: messaging::QMI_SignalReceiver)+ unsubscribe_signal(...+ send_message( message: messaging::QMI_Message)+ send_message(...QMI_MessageQMI_Message+ source_address: QMI_MessageHandlerAddress+ destination_address: QMI_MessageHandlerAddress+ source_address:...QMI_SignalRemovedMessageQMI_SignalRemovedMessage+ publisher_name: String+ signal_name: String+ publisher_name: String...QMI_SignalMessageQMI_SignalMessage+ signal_name: String+ args: tuple<Any>+ signal_name: String...QMI_TaskQMI_Task+ sig_settings_updated: messaging::QMI_Signal# _qmi_signals: tuple<messaging::SignalDescription>+ sig_settings_updated:...+ update_settings(): Boolean+ update_settings(): BooleanQMI_LoopTaskQMI_LoopTask+ sig_status_updated: messaging::QMI_Signal+ sig_status_updated:...+ run()+ run()QMI_RegisteredSignalQMI_RegisteredSignal + publisher_name: String+ signal_name: String+ arg_types: tuple<Type>- _context: context::QMI_Context+ publisher_name: String...+ publish(args: Any [*]) + publish(args: Any [*]) 0..10..1**QMI_SignalSubscriber QMI_SignalSubscriber + context: context::QMI_Context+ published_context: String+ publisher_name: String+ signal_name: String+ signal_arg_types: String+ context: context::QMI_Context...+ subscribe( receiver: QMI_SignalReceiver)+ subscribe(...+ unsubscribe( receiver: QMI_SignalReceiver)+ unsubscribe(...SignalManagerSignalManager- _context: context::QMI_Context- _local_subscriptions: dict<String, set<QMI_SignalReceiver>>- _pending_subscription_request_by_request_id: dict<String, _PendingSubscriptionRequest>- _pending_subscription_request_by_signal_name: dict<String, _PendingSubscriptionRequest>- _context: context::QMI_Context...+ publish_signal( publisher_name: String, signal_name: String, args: tuple<Any>)+ publish_signal(...+ unsubscribe_signal( publisher_context: String, publisher_name: String, signal_name: String, receiver: QMI_SignalReceiver)+ unsubscribe_signal(...+ subscribe_signal( publisher_context: String, publisher_name: String, signal_name: String, receiver: QMI_SignalReceiver)+ subscribe_signal(...+ handle_message(message: messaging::QMI_Message)+ handle_message(message:...QMI_SignalReceiverQMI_SignalReceiver- _queue: deque<ReceivedSignal>- _queue: deque<ReceivedSignal>- _receive_signal( message: QMI_SignalMessage)- _receive_signal(...QMI_SignalQMI_Signal+ arg_types: tuple<Type>+ arg_types: tuple<Type>+ publish(args: Any [*])+ publish(args: Any [*])«call»«call»«call»«call»«call»«call»«call»«call»«call»«call»«call»«call»«call»«call»Text is not SVG - cannot display \ No newline at end of file +messagingmessagingpubsubpubsubtasktaskQMI_MessageHandlerQMI_MessageHandler+ address: QMI_MessageHandlerAddress+ address: QMI_MessageHandlerAddress+ handle_message(message: QMI_Message)+ handle_message(message: QMI_Message)context::QMI_Contextcontext::QMI_Context+ publish_signal( publisher_name: String, signal_name: String, args: tuple<Any>)+ publish_signal(...+ subscribe_signal( publisher_context: String, publisher_name: String, signal_name: String, receiver: messaging::QMI_SignalReceiver)+ subscribe_signal(...+ unsubscribe_signal( publisher_context: String, publisher_name: String, signal_name: String, receiver: messaging::QMI_SignalReceiver)+ unsubscribe_signal(...+ send_message( message: messaging::QMI_Message)+ send_message(...QMI_MessageQMI_Message+ source_address: QMI_MessageHandlerAddress+ destination_address: QMI_MessageHandlerAddress+ source_address:...QMI_SignalRemovedMessageQMI_SignalRemovedMessage+ publisher_name: String+ signal_name: String+ publisher_name: String...QMI_SignalMessageQMI_SignalMessage+ signal_name: String+ args: tuple<Any>+ signal_name: String...QMI_TaskQMI_Task+ sig_settings_updated: messaging::QMI_Signal# _qmi_signals: tuple<messaging::SignalDescription>+ sig_settings_updated:...+ update_settings(): Boolean+ update_settings(): BooleanQMI_LoopTaskQMI_LoopTask+ sig_status_updated: messaging::QMI_Signal+ sig_status_updated:...+ run()+ run()QMI_RegisteredSignalQMI_RegisteredSignal + publisher_name: String+ signal_name: String+ arg_types: tuple<Type>- _context: context::QMI_Context+ publisher_name: String...+ publish(args: Any [*]) + publish(args: Any [*]) 0..10..1**QMI_SignalSubscriber QMI_SignalSubscriber + context: context::QMI_Context+ published_context: String+ publisher_name: String+ signal_name: String+ signal_arg_types: String+ context: context::QMI_Context...+ subscribe( receiver: QMI_SignalReceiver)+ subscribe(...+ unsubscribe( receiver: QMI_SignalReceiver)+ unsubscribe(...SignalManagerSignalManager- _context: context::QMI_Context- _local_subscriptions: dict<String, set<QMI_SignalReceiver>>- _pending_subscription_request_by_request_id: dict<String, _PendingSubscriptionRequest>- _pending_subscription_request_by_signal_name: dict<String, _PendingSubscriptionRequest>- _context: context::QMI_Context...+ publish_signal( publisher_name: String, signal_name: String, args: tuple<Any>)+ publish_signal(...+ unsubscribe_signal( publisher_context: String, publisher_name: String, signal_name: String, receiver: QMI_SignalReceiver)+ unsubscribe_signal(...+ subscribe_signal( publisher_context: String, publisher_name: String, signal_name: String, receiver: QMI_SignalReceiver)+ subscribe_signal(...+ handle_message(message: messaging::QMI_Message)+ handle_message(message:...QMI_SignalReceiverQMI_SignalReceiver- _queue: deque<ReceivedSignal>- _queue: deque<ReceivedSignal>- _receive_signal( message: QMI_SignalMessage)- _receive_signal(...QMI_SignalQMI_Signal+ arg_types: tuple<Type>+ arg_types: tuple<Type>+ publish(args: Any [*])+ publish(args: Any [*])«call»«call»«call»«call»«call»«call»«call»«call»«call»«call»«call»«call»«call»«call»Text is not SVG - cannot display diff --git a/documentation/sphinx/source/images/example_contexts.drawio.svg b/documentation/sphinx/source/images/example_contexts.drawio.svg index 09c87a5f..d1377005 100644 --- a/documentation/sphinx/source/images/example_contexts.drawio.svg +++ b/documentation/sphinx/source/images/example_contexts.drawio.svg @@ -1,4 +1,4 @@ -Context_1:qmi.core.context::QMI_ContextContext_1:...Instrument_1:qmi.core.instrument::QMI_InstrumentInstrument_1:...Instrument_2:qmi.core.instrument::QMI_InstrumentInstrument_2:...Context_2:qmi.core.context::QMI_ContextContext_2:...Task_1:qmi.core.task::QMI_LoopTaskTask_1:...Context_3:qmi.core.context::QMI_ContextContext_3:...Task_2:qmi.core.task::QMI_LoopTaskTask_2:...«flow»«flow»Control instrumentsControl instruments«flow»«flow»Control instrumentsControl instruments«flow»«flow»Instrument statusInstrument statusDatabaseDatabase«flow»«flow»ControltaskControl...«flow»«flow»settings, status:qmi.core.pubsub::QMI_Signalsettings, status:...«flow»«flow»status:qmi.core.pubsub::QMI_Signalstatus:...Text is not SVG - cannot display \ No newline at end of file +Context_1:qmi.core.context::QMI_ContextContext_1:...Instrument_1:qmi.core.instrument::QMI_InstrumentInstrument_1:...Instrument_2:qmi.core.instrument::QMI_InstrumentInstrument_2:...Context_2:qmi.core.context::QMI_ContextContext_2:...Task_1:qmi.core.task::QMI_LoopTaskTask_1:...Context_3:qmi.core.context::QMI_ContextContext_3:...Task_2:qmi.core.task::QMI_LoopTaskTask_2:...«flow»«flow»Control instrumentsControl instruments«flow»«flow»Control instrumentsControl instruments«flow»«flow»Instrument statusInstrument statusDatabaseDatabase«flow»«flow»ControltaskControl...«flow»«flow»settings, status:qmi.core.pubsub::QMI_Signalsettings, status:...«flow»«flow»status:qmi.core.pubsub::QMI_Signalstatus:...Text is not SVG - cannot display diff --git a/qmi/data/dataset.py b/qmi/data/dataset.py index 00545eca..fcf0f6ae 100644 --- a/qmi/data/dataset.py +++ b/qmi/data/dataset.py @@ -9,18 +9,30 @@ import h5netcdf import h5py +# Constant names +QMI_DATASET_MARKER = "QMI_Dataset" +QMI_DATASET_NAME = "QMI_Dataset_name" +QMI_DATASET_LAYOUT = "QMI_Dataset_layout" +QMI_DATASET_TIMESTAMP = "QMI_Dataset_timestamp" +QMI_DATASET_TIME_STR = "QMI_Dataset_time_str" +QMI_DATASET_DATA_NDIM = "QMI_Dataset_data_ndim" +QMI_DATASET_N_AXES = "QMI_Dataset_n_axes" +QMI_DATASET_NCOL = "QMI_Dataset_ncol" + class DataSet: """A dataset is a series of values obtained during a measurement. - A dataset contains an array of values in the form of a N-dimensional Numpy array (N >= 2). + A dataset contains an array of values in the form of a N-dimensional Numpy array. + + For raw datasets without measurement axes, the array may be one-dimensional ``(n,)`` for a single data column, + or two-dimensional ``(nrow, ncol)`` for tabular data with multiple columns. - The first (N-1) axes of the Numpy array represent independent variables or iterations in the measurement. - Each of these axes may have an optional label, a physical unit, and a mapping of array indices to values - on the physical axis. + For axis-based datasets, the last axis of the array acts as a "column index" while the first axes represent + independent variables or iterations in the measurement. Each of these axes may have an optional label, + a physical unit, and a mapping of array indices to values on the physical axis. - The last axis of the Numpy array acts as a "column index". Each column may have an associated label and - physical unit. + Each data column may have an associated label and physical unit. A dataset may have attributes. Each attribute has a name, which is a short string, unique to the dataset. Each attribute has a value which may be a string or a number. @@ -37,9 +49,9 @@ class DataSet: ~DataSet.name: Name of the dataset. ~DataSet.data: Numpy array containing the actual data. ~DataSet.timestamp: POSIX time stamp associated with the data. - axis_label: List of strings specifying labels for the first (N-1) axes. - axis_unit: List of strings specifying units for the first (N-1) axes. - axis_scale: List of optional 1D Numpy arrays specifying value mappings for the first (N-1) axes. + axis_label: List of strings specifying labels for the measurement axes. + axis_unit: List of strings specifying units for the measurement axes. + axis_scale: List of optional 1D Numpy arrays specifying value mappings for the measurement axes. column_label: List of strings specifying column labels. column_unit: List of strings specifying column units. attrs: Dictionary of application-specific attributes. @@ -60,7 +72,8 @@ def __init__( name: Name of the dataset. This should be a short string without spaces or strange symbols, suitable for use as part of a file name. shape: Tuple of axis dimensions. Used to create a zero-initialized dataset if the actual data - are not yet available. The last axis dimension represents the number of columns in the dataset. + are not yet available. For raw datasets this may be ``(n,)`` or ``(nrow, ncol)``. + For axis-based datasets, the last axis dimension represents the number of columns in the dataset. dtype: Type of value in each data point. If not specified, the default is np.float64. data: Optional Numpy array containing the actual data. The new dataset instance will contain a reference to the specified Numpy array. Modifying the Numpy array will cause the contents of the dataset @@ -71,11 +84,9 @@ def __init__( self.timestamp = time.time() if data is not None: - # Check that the specified data is a Numpy array. if not isinstance(data, np.ndarray): raise TypeError("Specified 'data' parameter must be a Numpy array.") - # Check shape and data type. if shape is not None: if data.shape != tuple(shape): raise ValueError("Data does not match specified shape.") @@ -88,7 +99,6 @@ def __init__( self.data = data else: - if shape is None: raise TypeError("Either 'shape' or 'data' parameter must be specified.") @@ -99,38 +109,97 @@ def __init__( self.data = np.zeros(tuple(shape), dtype=dtype) # Check shape. - ndim = len(self.data.shape) - if ndim < 2: - raise ValueError("Dataset must have at least 2 axes.") + if self.data.ndim < 1: + raise ValueError("DataSet requires at least one dimension.") + if np.min(self.data.shape) < 1: raise ValueError("Zero-size or negative size axes are not allowed.") - # Initialize axis labels. - self.axis_label: list[str] = [""] * (ndim - 1) - self.axis_unit: list[str] = [""] * (ndim - 1) - self.axis_scale: list[np.ndarray | None] = [None] * (ndim - 1) + if self.data.ndim == 1: + self.__axis_capacity = 0 + self.__axis_ndim = 0 + self.__raw_mode = True + ncol = 1 + + elif self.data.ndim == 2: + self.__axis_capacity = 1 + self.__axis_ndim = 0 + self.__raw_mode = True + ncol = self.data.shape[-1] + + else: + self.__axis_capacity = self.data.ndim - 1 + self.__axis_ndim = self.data.ndim - 1 + self.__raw_mode = False + ncol = self.data.shape[-1] + + self.axis_label: list[str] = [""] * self.__axis_capacity if self.__axis_capacity > 0 else [] + self.axis_unit: list[str] = [""] * self.__axis_capacity if self.__axis_capacity > 0 else [] + self.axis_name: list[str] = [""] * self.__axis_capacity if self.__axis_capacity > 0 else [] + self.axis_scale: list[np.ndarray | None] = [None] * self.__axis_capacity if self.__axis_capacity > 0 else [] - # Initialize column labels. - ncol = self.data.shape[-1] self.column_label: list[str] = ncol * [""] self.column_unit: list[str] = ncol * [""] + self.column_name: list[str] = ncol * [""] - # Initialize empty set of attributes. - self.attrs: dict[str, str | int | float] = {} + # Initialize set of attributes. + self.attrs: dict[str, str | int | float | bool] = {} - def set_axis_label(self, axis: int, label: str) -> None: - """Specify an axis label. + @property + def _ndim(self) -> int: + return self.__axis_ndim + + @property + def n_axes(self) -> int: + return self.__axis_ndim + + @property + def ncol(self) -> int: + return len(self.column_label) + + @property + def is_raw(self) -> bool: + return self.__raw_mode + + def _activate_axis_mode(self) -> None: + """If two-dimensional data has an axis, create it here. + + This won't have any effect on three-dimensional data. + """ + if self.data.ndim == 2 and self.__raw_mode: + self.__raw_mode = False + self.__axis_ndim = 1 + if len(self.axis_label) == 0: + self.axis_label = [""] + self.axis_unit = [""] + self.axis_name = [""] + self.axis_scale = [None] + + def _check_axis_number(self, axis: int) -> None: + """Check that an axis number is valid. Parameters: axis: Axis number (0, 1, ...). - label: Label string of the axis. + + Raises: + TypeError: If axis parameter is not an integer. + ValueError: If the axis value is not valid, e.g. larger than defined axes at dataset initialization. """ if not isinstance(axis, int): raise TypeError("Parameter 'axis' must be an integer.") - if axis < 0 or axis >= len(self.axis_label): + if axis < 0 or axis >= self.__axis_capacity: raise ValueError("Invalid value for parameter 'axis'.") + def set_axis_label(self, axis: int, label: str) -> None: + """Specify an axis label. + + Parameters: + axis: Axis number (0, 1, ...). + label: Label string of the axis. + """ + self._check_axis_number(axis) + self._activate_axis_mode() self.axis_label[axis] = label def set_axis_unit(self, axis: int, unit: str) -> None: @@ -140,13 +209,20 @@ def set_axis_unit(self, axis: int, unit: str) -> None: axis: Axis number (0, 1, ...). unit: Unit string of the axis. """ - if not isinstance(axis, int): - raise TypeError("Parameter 'axis' must be an integer.") + self._check_axis_number(axis) + self._activate_axis_mode() + self.axis_unit[axis] = unit - if axis < 0 or axis >= len(self.axis_unit): - raise ValueError("Invalid value for parameter 'axis'.") + def set_axis_name(self, axis: int, name: str) -> None: + """Specify an axis 'long' name. - self.axis_unit[axis] = unit + Parameters: + axis: Axis number (0, 1, ...). + name: 'Long' name string of the axis. + """ + self._check_axis_number(axis) + self._activate_axis_mode() + self.axis_name[axis] = name def set_axis_scale(self, axis: int, scale: np.ndarray) -> None: """Specify a mapping from array indices to physical values along an axis. @@ -155,12 +231,8 @@ def set_axis_scale(self, axis: int, scale: np.ndarray) -> None: axis: Axis to which the mapping applies (the first axis has number 0). scale: 1D Numpy array of values along the axis. The length must match the size of the axis. """ - if not isinstance(axis, int): - raise TypeError("Parameter 'axis' must be an integer.") - - if axis < 0 or axis >= len(self.data.shape) - 1: - raise ValueError("Invalid value for parameter 'axis'.") - + self._check_axis_number(axis) + self._activate_axis_mode() v = np.array(scale) if v.shape != (self.data.shape[axis],): raise ValueError("Invalid shape for scale array.") @@ -200,8 +272,23 @@ def set_column_unit(self, col: int, unit: str) -> None: self.column_unit[col] = unit + def set_column_name(self, col: int, name: str) -> None: + """Specify a name for a column in a multi-column data set. -def _parse_attribute_value(s: str) -> int | float | str: + Parameters: + col: Column number (0, 1, ...). + name: Descriptive name for column data. + """ + if not isinstance(col, int): + raise TypeError("Parameter 'col' must be an integer") + + if col < 0 or col >= len(self.column_name): + raise ValueError("Invalid value for parameter 'col'") + + self.column_name[col] = name + + +def _parse_attribute_value(s: str) -> int | float | str | bool: """Parse an attribute value. This function should be able to evaluate any string @@ -266,167 +353,371 @@ def replace_esc(m: Match[str]) -> str: return float(s) -def write_dataset_to_hdf5(dataset: DataSet, hdf_group: h5py.Group | h5netcdf.Group) -> None: - """Write the specified dataset to the specified HDF5 group. +def _dataset_layout(dataset: DataSet) -> str: + return "raw" if dataset.is_raw else "axis" + + +def _column_keys(dataset: DataSet) -> list[str]: + keys: list[str] = [] + used: set[str] = set() + for col in range(dataset.ncol): + base = dataset.column_label[col] or (dataset.name if dataset.data.ndim == 1 else f"column_{col}") + key = base + suffix = 1 + while key in used: + key = f"{base}_{suffix}" + suffix += 1 + used.add(key) + keys.append(key) + return keys + + +def _axis_scale_keys(dataset: DataSet) -> list[str]: + keys: list[str] = [] + used: set[str] = set() + for axis in range(dataset.n_axes): + base = dataset.axis_label[axis] or f"axis_{axis}_scale" + key = base + suffix = 1 + while key in used: + key = f"{base}_{suffix}" + suffix += 1 + used.add(key) + keys.append(key) + return keys + + +def _write_common_metadata( + container: h5py.Group | h5netcdf.Group | h5py.File | h5netcdf.File, + dataset: DataSet, +) -> tuple[str, list[str], list[str]]: + group_name = dataset.name if isinstance(container, (h5py.File, h5netcdf.File)) else container.name.split("/")[-1] + + container.attrs[QMI_DATASET_MARKER] = 1 + container.attrs[QMI_DATASET_NAME] = dataset.name + container.attrs[QMI_DATASET_LAYOUT] = _dataset_layout(dataset) + container.attrs[f"{group_name}_timestamp"] = dataset.timestamp + container.attrs[f"{group_name}_time_str"] = time.strftime("%Y-%m-%dT%H:%M:%S", time.gmtime(dataset.timestamp)) + container.attrs[f"{group_name}_data_ndim"] = dataset.data.ndim + container.attrs[f"{group_name}_n_axes"] = dataset.n_axes + container.attrs[f"{group_name}_ncol"] = dataset.ncol + + for dim_index, dim_size in enumerate(dataset.data.shape): + container.attrs[f"{group_name}_dim{dim_index}_size"] = dim_size + + axis_scale_keys = _axis_scale_keys(dataset) + for axis in range(dataset.n_axes): + if dataset.axis_label[axis]: + container.attrs[f"{group_name}_axis{axis}_label"] = dataset.axis_label[axis] + if dataset.axis_unit[axis]: + container.attrs[f"{group_name}_axis{axis}_unit"] = dataset.axis_unit[axis] + if dataset.axis_name[axis]: + container.attrs[f"{group_name}_axis{axis}_name"] = dataset.axis_name[axis] + if dataset.axis_scale[axis] is not None: + container.attrs[f"{group_name}_axis{axis}_key"] = axis_scale_keys[axis] - The dataset "name" field determines the name of the corresponding HDF5 dataset. - An error occurs if the HDF5 group already contains a dataset with the same name. + column_keys = _column_keys(dataset) + for col in range(dataset.ncol): + container.attrs[f"{group_name}_column{col}_key"] = column_keys[col] + if dataset.column_label[col]: + container.attrs[f"{group_name}_column{col}_label"] = dataset.column_label[col] + if dataset.column_unit[col]: + container.attrs[f"{group_name}_column{col}_unit"] = dataset.column_unit[col] + if dataset.column_name[col]: + container.attrs[f"{group_name}_column{col}_name"] = dataset.column_name[col] - Note that this function may create additional supporting datasets in the HDF5 group if the DataSet instance - uses axis scales. In this case, HDF5 datasets named "_axisN_scale" will be created in - addition to the main dataset. + for (name, value) in dataset.attrs.items(): + if name.startswith("QMI_Dataset") or name.startswith(group_name) or name.startswith("DIMENSION_"): + raise ValueError(f"Invalid use of special attribute name {name!r}") + container.attrs[name] = value - Parameters: - dataset: DataSet instance to write to HDF5. - hdf_group: HDF5 File or Group instance to which the dataset is written. - """ + return group_name, column_keys, axis_scale_keys - ndim = len(dataset.data.shape) - ncol = dataset.data.shape[-1] - if isinstance(hdf_group, h5py.Group): - ds = hdf_group.create_dataset(dataset.name, data=dataset.data) +def _create_dataset_node( + container: h5py.Group | h5netcdf.Group | h5py.File | h5netcdf.File, + key: str, + data: np.ndarray, + dim_names: tuple[str, ...], +) -> h5py.Dataset | h5netcdf.Variable: + if isinstance(container, (h5py.File, h5py.Group)): + return container.create_dataset(key, data=data) - else: - dim_names = [] - for axis in range(dataset.data.ndim): - try: - dim_names.append(dataset.axis_label[axis]) - except IndexError: - dim_names.append(f"dim_{axis}") - - for axis, dim_name in enumerate(dim_names): - if dim_name not in hdf_group.dimensions: - hdf_group.dimensions[dim_name] = dataset.data.shape[axis] + for dim_name, dim_size in zip(dim_names, data.shape): + if dim_name not in container.dimensions: + container.dimensions[dim_name] = dim_size + return container.create_variable(key, dimensions=dim_names, data=data) - ds = hdf_group.create_variable(dataset.name, dimensions=dim_names, data=dataset.data) - # Special timestamp attribute. - ds.attrs["QMI_DataSet_timestamp"] = dataset.timestamp - ds.attrs["QMI_DataSet_time_str"] = time.strftime("%Y-%m-%dT%H:%M:%S", time.gmtime(dataset.timestamp)) +def _read_shape(attrs: dict, group_name: str) -> tuple[int, ...]: + data_ndim = int(attrs[f"{group_name}_data_ndim"]) + return tuple(int(attrs[f"{group_name}_dim{dim_index}_size"]) for dim_index in range(data_ndim)) - # Special attributes for axis labels / units. - for axis in range(ndim - 1): - if dataset.axis_label[axis]: - ds.attrs[f"QMI_DataSet_axis{axis}_label"] = dataset.axis_label[axis] - if dataset.axis_unit[axis]: - ds.attrs[f"QMI_DataSet_axis{axis}_unit"] = dataset.axis_unit[axis] +def _read_qmi_dataset(container: h5py.Group | h5netcdf.Group | h5py.File | h5netcdf.File) -> DataSet: + attrs = dict(container.attrs) + name = str(attrs.get(QMI_DATASET_NAME) or container.name.split("/")[-1] or "dataset") + group_name = name + layout = str(attrs.get(QMI_DATASET_LAYOUT, "axis")) + data_ndim = int(attrs[f"{group_name}_data_ndim"]) + n_axes = int(attrs.get(f"{group_name}_n_axes", 0)) + ncol = int(attrs[f"{group_name}_ncol"]) + shape = _read_shape(attrs, group_name) + + column_keys = [ + str(attrs.get(f"{group_name}_column{col}_key", attrs.get(f"{group_name}_column{col}_label", f"column_{col}"))) + for col in range(ncol) + ] + + if data_ndim == 1: + data = np.asarray(container[column_keys[0]]) + else: + column_arrays = [np.asarray(container[key]) for key in column_keys] + base_shape = column_arrays[0].shape + for arr in column_arrays[1:]: + if arr.shape != base_shape: + raise ValueError("Column datasets do not have matching shapes") + data = np.stack(column_arrays, axis=-1) + if data.shape != shape: + data = data.reshape(shape) + + dataset = DataSet(name=name, data=data) + dataset.timestamp = float(attrs[f"{group_name}_timestamp"]) + + if layout == "axis" and n_axes > 0 and dataset.is_raw: + dataset._activate_axis_mode() + dataset.__axis_ndim = n_axes + dataset.__raw_mode = False + + for axis in range(n_axes): + dataset.axis_label[axis] = str(attrs.get(f"{group_name}_axis{axis}_label", "")) + dataset.axis_unit[axis] = str(attrs.get(f"{group_name}_axis{axis}_unit", "")) + dataset.axis_name[axis] = str(attrs.get(f"{group_name}_axis{axis}_name", "")) + scale_key = attrs.get(f"{group_name}_axis{axis}_key") + if scale_key: + scale = np.asarray(container[str(scale_key)]) + if scale.shape != (dataset.data.shape[axis],): + raise ValueError(f"Invalid shape of dimension scale for axis {axis}") + dataset.axis_scale[axis] = scale - # Special attributes for column labels / units. for col in range(ncol): - if dataset.column_label[col]: - ds.attrs[f"QMI_DataSet_column{col}_label"] = dataset.column_label[col] + dataset.column_label[col] = str(attrs.get(f"{group_name}_column{col}_label", "")) + dataset.column_unit[col] = str(attrs.get(f"{group_name}_column{col}_unit", "")) + dataset.column_name[col] = str(attrs.get(f"{group_name}_column{col}_name", "")) - if dataset.column_unit[col]: - ds.attrs[f"QMI_DataSet_column{col}_unit"] = dataset.column_unit[col] + for name, value in attrs.items(): + if not name.startswith(group_name) and not name.startswith("QMI_Dataset") and not name.startswith("DIMENSION_"): + dataset.attrs[name] = value - # Dimension scales. - for axis in range(ndim - 1): - if dataset.axis_label[axis] and isinstance(hdf_group, h5py.Group): - ds.dims[axis].label = dataset.axis_label[axis] + return dataset - if dataset.axis_scale[axis] is not None: - # Create an extra dataset to hold the dimension scale. - scale_name = f"{dataset.name}_axis{axis}_scale" - if isinstance(hdf_group, h5py.Group): - ds_scale = hdf_group.create_dataset(scale_name, data=dataset.axis_scale[axis]) - # Attach the dimension scale to the axis. - ds_scale.make_scale(scale_name) - ds.dims[axis].attach_scale(ds_scale) - else: - # With h5netcdf backend we cannot attach scaled data. - # Create a variable instead and attach it as an attribute to the dataset. - ds_scale = hdf_group.create_variable( - scale_name, - dimensions=(ds.dimensions[axis],), - data=dataset.axis_scale[axis], - ) - ds.attrs[scale_name] = ds_scale - - # Custom attributes. - for (name, value) in dataset.attrs.items(): - if name.startswith("QMI_DataSet") or name.startswith("DIMENSION_"): - raise ValueError(f"Invalid use of special attribute name {name!r}") +def write_dataset_to_hdf5(dataset: DataSet, hdf_group: h5py.Group | h5netcdf.Group | h5py.File | h5netcdf.File) -> None: + """Write the specified dataset to the specified HDF5 group. + + The dataset "name" field determines the name of the corresponding HDF5 dataset. + An error occurs if the HDF5 group already contains a dataset with the same name. + + Note that this function may create additional supporting datasets in the HDF5 group if the DataSet instance + uses axis scales. In this case, HDF5 datasets named "_axisN_scale" will be created in + addition to the main dataset. + + Parameters: + dataset: DataSet instance to write to HDF5. + hdf_group: HDF5 File or Group instance to which the dataset is written. + """ - ds.attrs[name] = value + group_name, column_keys, axis_scale_keys = _write_common_metadata(hdf_group, dataset) + + if dataset.data.ndim == 1: + column_ds = _create_dataset_node(hdf_group, column_keys[0], dataset.data, (group_name,)) + if dataset.column_label[0]: + column_ds.attrs["name"] = dataset.column_label[0] + if dataset.column_unit[0]: + column_ds.attrs["unit"] = dataset.column_unit[0] + if dataset.column_name[0]: + column_ds.attrs["long_name"] = dataset.column_name[0] + return + + if dataset.n_axes == 0: + row_dim = "row" + for col, key in enumerate(column_keys): + column_ds = _create_dataset_node(hdf_group, key, dataset.data[..., col], (row_dim,)) + if dataset.column_label[col]: + column_ds.attrs["name"] = dataset.column_label[col] + if dataset.column_unit[col]: + column_ds.attrs["unit"] = dataset.column_unit[col] + if dataset.column_name[col]: + column_ds.attrs["long_name"] = dataset.column_name[col] + return + + dim_names = tuple(dataset.axis_label[axis] or f"dim_{axis}" for axis in range(dataset.n_axes)) + column_nodes: list[h5py.Dataset | h5netcdf.Variable] = [] + for col, key in enumerate(column_keys): + column_ds = _create_dataset_node(hdf_group, key, dataset.data[..., col], dim_names) + if dataset.column_label[col]: + column_ds.attrs["name"] = dataset.column_label[col] + if dataset.column_unit[col]: + column_ds.attrs["unit"] = dataset.column_unit[col] + if dataset.column_name[col]: + column_ds.attrs["long_name"] = dataset.column_name[col] + column_nodes.append(column_ds) + + for axis in range(dataset.n_axes): + axis_scale = dataset.axis_scale[axis] + if axis_scale is None: + continue + + scale_key = axis_scale_keys[axis] + scale_ds = _create_dataset_node(hdf_group, scale_key, axis_scale, (dim_names[axis],)) + if dataset.axis_label[axis]: + scale_ds.attrs["name"] = dataset.axis_label[axis] + if dataset.axis_unit[axis]: + scale_ds.attrs["unit"] = dataset.axis_unit[axis] + if dataset.axis_name[axis]: + scale_ds.attrs["long_name"] = dataset.axis_name[axis] - # Special attribute to recognize format. - ds.attrs["QMI_DataSet"] = 1 + if isinstance(hdf_group, (h5py.File, h5py.Group)): + scale_ds.make_scale(scale_key) + for column_ds in column_nodes: + column_ds.dims[axis].label = dataset.axis_label[axis] + column_ds.dims[axis].attach_scale(scale_ds) def read_dataset_from_hdf5( - ds: h5py.Dataset | h5netcdf.Variable, parent_group: h5netcdf.Group | None = None + parent: h5py.File | h5netcdf.File | h5py.Group | h5netcdf.Group | h5py.Dataset | h5netcdf.Variable, + container: h5py.File | h5netcdf.File | h5py.Group | h5netcdf.Group | None = None, ) -> DataSet: - """Extract a DataSet instance from the specified HDF5 dataset. + """Extract a QMI DataSet instance from the specified HDF5 dataset (group). Note that this function may fetch additional HDF5 datasets from the parent HDF5 group if the dataset uses dimension scales. Parameters: - ds: HDF5 h5py.Dataset or h5netcdf.Variable instance to read from. - parent_group: Optional parent group parameter if the dataset is a h5netcdf.Variable instance. + parent: HDF5 file/group container, or a child dataset for backwards compatibility. + container: Optional explicit parent file/group if `parent` is a child dataset. Returns: - DataSet instance. + dataset: DataSet instance. """ + source = parent + meta_container = container or parent - # Check that the HDF5 dataset was created by this Python module. - if ds.attrs.get("QMI_DataSet") != 1: - raise ValueError("HDF5 dataset not in expected format") + if isinstance(meta_container, (h5py.Dataset, h5netcdf.Variable)) and meta_container.attrs.get(QMI_DATASET_MARKER) == 1: + return _read_qmi_dataset(meta_container) - # Sanity check. - if (len(ds.shape) < 2) or np.min(ds.shape) < 1: - raise ValueError("Invalid shape of HDF5 dataset") + if isinstance(meta_container, (h5py.File, h5py.Group, h5netcdf.File, h5netcdf.Group)) and meta_container.attrs.get(QMI_DATASET_MARKER) == 1: + return _read_qmi_dataset(meta_container) - # Create DataSet instance and read actual data. - name = ds.name.split("/")[-1] - dataset = DataSet(name=name, data=ds[:]) + return convert_to_qmi_dataset(source) - ndim = len(dataset.data.shape) - ncol = dataset.data.shape[-1] - # Read timestamp. - dataset.timestamp = ds.attrs["QMI_DataSet_timestamp"] +def convert_to_qmi_dataset( + parent: h5py.File | h5netcdf.File | h5py.Group | h5netcdf.Group | h5py.Dataset | h5netcdf.Variable, +) -> DataSet: + """A function to convert a HDF5 dataset, in a group, or in file root, to a QMI dataset. - # Read special attributes for labels. - for axis in range(ndim - 1): - dataset.axis_label[axis] = ds.attrs.get(f"QMI_DataSet_axis{axis}_label", "") - dataset.axis_unit[axis] = ds.attrs.get(f"QMI_DataSet_axis{axis}_unit", "") + If the input is a h5py.Dataset | h5netcdf.Variable, the dataset can have one or more dimensions. - for col in range(ncol): - dataset.column_label[col] = ds.attrs.get(f"QMI_DataSet_column{col}_label", "") - dataset.column_unit[col] = ds.attrs.get(f"QMI_DataSet_column{col}_unit", "") - - # Read dimension scales. - if parent_group is None: - for axis in range(ndim - 1): - if len(ds.dims[axis]) > 0: - scale = ds.dims[axis][0] - if scale.shape != (dataset.data.shape[axis],): - raise ValueError(f"Invalid shape of dimension scale for axis {axis}") - - dataset.axis_scale[axis] = scale[:] + If the input is s h5py.Group | h5netcdf.Group, and the group has multiple datasets, the dataset attributes + are looked into if we can determine a scaled axis | column or columns, and data axis | axes. If so, + it will be converted into single QMI dataset with (multiple) ax[i|e]s and column[s]. A single dataset will be + converted as a 1D dataset. + If the input is a h5py.File | h5netcdf.File, and there are no groups, the handling is the same as for the group. + If there is a single group present, that will be taken and handled like a group. For multiple groups in a file + an error will be thrown. + """ + if isinstance(parent, (h5py.Dataset, h5netcdf.Variable)): + data = np.asarray(parent) + name = parent.name.split("/")[-1] or str(parent.attrs.get("name", "dataset")) + dataset = DataSet(name=name, data=data) + if dataset.ncol > 0: + dataset.column_label[0] = str(parent.attrs.get("label", parent.attrs.get("name", name))) + dataset.column_unit[0] = str(parent.attrs.get("units", parent.attrs.get("unit", ""))) + dataset.column_name[0] = str(parent.attrs.get("long_name", "")) + for attr_name, value in parent.attrs.items(): + if not str(attr_name).startswith("_") and not str(attr_name).startswith("DIMENSION_"): + dataset.attrs[str(attr_name)] = value + return dataset + + group: h5py.Group | h5netcdf.Group | None = None + if isinstance(parent, (h5py.File, h5netcdf.File)): + groups = [obj for obj in parent.values() if isinstance(obj, (h5py.Group, h5netcdf.Group))] + if groups: + if len(groups) > 1: + raise RuntimeError("Cannot convert multiple groups from a HDF5 file.") + group = groups[0] + else: + group = parent else: - # Read dimension scales from sibling variables. - for axis in range(ndim - 1): - scale_name = f"{dataset.name}_axis{axis}_scale" - if scale_name in parent_group: - scale = parent_group[scale_name] - if scale.shape != (dataset.data.shape[axis],): - raise ValueError(f"Invalid shape of dimension scale for axis {axis}") - - dataset.axis_scale[axis] = scale[:] + group = parent - # Read custom attributes. - for (name, value) in ds.attrs.items(): - if not name.startswith("QMI_DataSet") and not name.startswith("DIMENSION_")\ - and not name.startswith(f"{dataset.name}_axis"): - dataset.attrs[name] = value + axes: list[tuple[str, h5py.Dataset | h5netcdf.Variable]] = [] + columns: list[tuple[str, h5py.Dataset | h5netcdf.Variable]] = [] - return dataset + for item, hdf5_obj in group.items(): + if isinstance(hdf5_obj, (h5py.Group, h5netcdf.Group)): + raise RuntimeError("Cannot convert nested groups to a single QMI dataset.") + + label = str(hdf5_obj.attrs.get("label", hdf5_obj.attrs.get("name", item))) + if isinstance(hdf5_obj, h5py.Dataset): + if hdf5_obj.is_scale: + axes.append((label, hdf5_obj)) + else: + columns.append((label, hdf5_obj)) + + elif item in hdf5_obj.dimensions: + axes.append((label, hdf5_obj)) + + else: + columns.append((label, hdf5_obj)) + + if not columns and not axes: + raise RuntimeError("No datasets found to convert from the HDF5 file.") + + name = group.name.split("/")[-1] + if not name: + name = str(group.attrs["name"]) if "name" in group.attrs else "dataset" + + if columns: + column_arrays = [np.asarray(ds) for _, ds in columns] + base_shape = column_arrays[0].shape + for arr in column_arrays[1:]: + if arr.shape != base_shape: + raise ValueError("Column datasets do not have matching shapes") + + if len(column_arrays) == 1 and not axes: + qmi_dataset = DataSet(name=name, data=column_arrays[0]) + else: + data = np.stack(column_arrays, axis=-1) + qmi_dataset = DataSet(name=name, data=data) + else: + label, axis_ds = axes[0] + qmi_dataset = DataSet(name=name, data=np.asarray(axis_ds)) + qmi_dataset.column_label[0] = label + qmi_dataset.column_unit[0] = str(axis_ds.attrs.get("units", axis_ds.attrs.get("unit", ""))) + qmi_dataset.column_name[0] = str(axis_ds.attrs.get("long_name", "")) + + if axes and qmi_dataset.data.ndim >= 2: + for axis_index, (label, axis_ds) in enumerate(axes): + qmi_dataset.set_axis_label(axis_index, label) + qmi_dataset.set_axis_unit(axis_index, str(axis_ds.attrs.get("units", axis_ds.attrs.get("unit", "")))) + qmi_dataset.set_axis_name(axis_index, str(axis_ds.attrs.get("long_name", ""))) + axis_values = np.asarray(axis_ds) + if np.issubdtype(axis_values.dtype, np.number) and np.all(np.isfinite(axis_values)): + qmi_dataset.set_axis_scale(axis_index, axis_values) + + for col_index, (label, column_ds) in enumerate(columns[:qmi_dataset.ncol]): + qmi_dataset.column_label[col_index] = label + qmi_dataset.column_unit[col_index] = str(column_ds.attrs.get("units", column_ds.attrs.get("unit", ""))) + qmi_dataset.column_name[col_index] = str(column_ds.attrs.get("long_name", "")) + + for name, value in group.attrs.items(): + if not str(name).startswith("_") and not str(name).startswith("DIMENSION_"): + qmi_dataset.attrs[str(name)] = value + + return qmi_dataset def write_dataset_to_text(dataset: DataSet, fh: TextIO) -> None: @@ -442,61 +733,52 @@ def write_dataset_to_text(dataset: DataSet, fh: TextIO) -> None: fh: File handle open for writing in text mode. """ - ndim = len(dataset.data.shape) - ncol = dataset.data.shape[-1] + attrs: dict[str, int | float | str | bool] = collections.OrderedDict() + attrs[QMI_DATASET_NAME] = dataset.name + attrs[QMI_DATASET_LAYOUT] = _dataset_layout(dataset) + attrs[QMI_DATASET_TIMESTAMP] = dataset.timestamp + attrs[QMI_DATASET_TIME_STR] = time.strftime("%Y-%m-%dT%H:%M:%S", time.gmtime(dataset.timestamp)) + attrs[QMI_DATASET_DATA_NDIM] = dataset.data.ndim + attrs[QMI_DATASET_N_AXES] = dataset.n_axes + attrs[QMI_DATASET_NCOL] = dataset.ncol + + for dim_index, dim_size in enumerate(dataset.data.shape): + attrs[f"QMI_Dataset_dim{dim_index}_size"] = dim_size + + for axis in range(dataset.n_axes): + if dataset.axis_label[axis]: + attrs[f"QMI_Dataset_axis{axis}_label"] = dataset.axis_label[axis] + + if dataset.axis_unit[axis]: + attrs[f"QMI_Dataset_axis{axis}_unit"] = dataset.axis_unit[axis] + + if dataset.axis_name[axis]: + attrs[f"QMI_Dataset_axis{axis}_name"] = dataset.axis_name[axis] - # Create special columns if needed. special_column_label = [] special_column_unit = [] - if ndim > 2: - # Create special axis index columns. - for axis in range(ndim - 1): + if dataset.n_axes > 1: + for axis in range(dataset.n_axes): special_column_label.append(f"axis{axis}_index") special_column_unit.append("") - # Create special axis scale columns if needed. - for axis in range(ndim - 1): + for axis in range(dataset.n_axes): if dataset.axis_scale[axis] is not None: special_column_label.append(f"axis{axis}_scale") special_column_unit.append(dataset.axis_unit[axis]) - # Prepare attributes. - attrs: dict[str, int | float | str] = collections.OrderedDict() - - # Dataset name. - attrs["QMI_DataSet_name"] = dataset.name - - # Timestamp. - attrs["QMI_DataSet_timestamp"] = dataset.timestamp - attrs["QMI_DataSet_time_str"] = time.strftime("%Y-%m-%dT%H:%M:%S", time.gmtime(dataset.timestamp)) - - # Data shape. - attrs["QMI_DataSet_ndim"] = ndim - attrs["QMI_DataSet_ncol"] = ncol - - # Axis labels / units. - for axis in range(ndim - 1): - attrs[f"QMI_DataSet_axis{axis}_size"] = dataset.data.shape[axis] - if dataset.axis_label[axis]: - attrs[f"QMI_DataSet_axis{axis}_label"] = dataset.axis_label[axis] - - if dataset.axis_unit[axis]: - attrs[f"QMI_DataSet_axis{axis}_unit"] = dataset.axis_unit[axis] - - # Column labels / units. column_label = special_column_label + dataset.column_label column_unit = special_column_unit + dataset.column_unit for col in range(len(column_label)): if column_label[col]: - attrs[f"QMI_DataSet_column{col}_label"] = column_label[col] + attrs[f"QMI_Dataset_column{col}_label"] = column_label[col] if column_unit[col]: - attrs[f"QMI_DataSet_column{col}_unit"] = column_unit[col] + attrs[f"QMI_Dataset_column{col}_unit"] = column_unit[col] - # Custom attributes. for (name, val) in dataset.attrs.items(): - if name.startswith("QMI_DataSet"): + if name.startswith("QMI_Dataset"): raise ValueError(f"Invalid use of special attribute name {name!r}") if not name: @@ -507,30 +789,29 @@ def write_dataset_to_text(dataset: DataSet, fh: TextIO) -> None: attrs[name] = val - # Reshape data to 2D format. - if ndim > 2: + if dataset.data.ndim == 1: + rawdata = dataset.data.reshape(-1, 1) + elif dataset.data.ndim > 2: nrow = np.prod(dataset.data.shape[:-1]) - rawdata = dataset.data.reshape((nrow, ncol)) - + rawdata = dataset.data.reshape((nrow, dataset.ncol)) else: rawdata = dataset.data - # Insert axis index columns. extra_columns = [] - if ndim > 2: - for axis in range(ndim - 1): + if dataset.n_axes > 1: + for axis in range(dataset.n_axes): n = dataset.data.shape[axis] outer_rows = int(np.prod(dataset.data.shape[:axis], dtype=np.int32)) inner_rows = int(np.prod(dataset.data.shape[axis+1:-1], dtype=np.int32)) extra_columns.append(np.tile(np.repeat(np.arange(n), inner_rows), outer_rows)) - # Insert axis scale columns. - for axis in range(ndim - 1): + for axis in range(dataset.n_axes): dataset_axis_scale = dataset.axis_scale[axis] if dataset_axis_scale is not None: - assert dataset_axis_scale.shape == (dataset.data.shape[axis],) outer_rows = int(np.prod(dataset.data.shape[:axis], dtype=np.int32)) inner_rows = int(np.prod(dataset.data.shape[axis+1:-1], dtype=np.int32)) + if dataset.n_axes == 1: + inner_rows = 1 extra_columns.append(np.tile(np.repeat(dataset_axis_scale, inner_rows), outer_rows)) if extra_columns: @@ -539,13 +820,11 @@ def write_dataset_to_text(dataset: DataSet, fh: TextIO) -> None: # Write marker line. fh.write("# QMI_DataSet\n") fh.write("#\n") - # Write attributes. for (name, value) in attrs.items(): fh.write(f"# {name}: {value!r}\n") fh.write("#\n") - # Write actual data. np.savetxt(fh, rawdata) @@ -559,7 +838,6 @@ def read_dataset_from_text(fh: TextIO) -> DataSet: Returns: DataSet instance. """ - # Check marker line. line = fh.readline().strip() if line != "# QMI_DataSet": @@ -573,10 +851,10 @@ def read_dataset_from_text(fh: TextIO) -> DataSet: attrs = {} while True: line = fh.readline().strip() - # Stop at separator between attributes and data. if line == "#": + # Stop at separator between attributes and data. break - + # Read attribute. p = line.find(":") if (not line.startswith("# ")) or (p < 0): @@ -592,64 +870,55 @@ def read_dataset_from_text(fh: TextIO) -> DataSet: # Read raw data. rawdata = np.loadtxt(fh, ndmin=2) (nrow, total_columns) = rawdata.shape - # Determine dataset name. - dataset_name = attrs.get("QMI_DataSet_name") + dataset_name = attrs.get(QMI_DATASET_NAME) if not isinstance(dataset_name, str): raise ValueError("Missing required attribute QMI_DataSet_name") - # Determine dataset shape. - ndim = int(attrs["QMI_DataSet_ndim"]) - ncol = int(attrs["QMI_DataSet_ncol"]) - assert ndim >= 2 - - shape_list: list[int] = [] - for axis in range(ndim - 1): - axis_size = attrs[f"QMI_DataSet_axis{axis}_size"] - if not isinstance(axis_size, int): - raise ValueError(f"Invalid value for attribute QMI_DataSet_axis{axis}_size") - - shape_list.append(axis_size) - - shape_list.append(ncol) - shape = tuple(shape_list) - - # Verify number of rows. - expect_rows = np.prod(shape[:-1]) - if nrow != expect_rows: - raise ValueError(f"Expecting {expect_rows} rows but got {nrow} rows") - - # Verify number of columns. + # Determine dataset shape + data_ndim = int(attrs[QMI_DATASET_DATA_NDIM]) + n_axes = int(attrs.get(QMI_DATASET_N_AXES, 0)) + ncol = int(attrs[QMI_DATASET_NCOL]) + shape = tuple(int(attrs[f"QMI_Dataset_dim{dim_index}_size"]) for dim_index in range(data_ndim)) + # Verify data dimensions. if total_columns < ncol: raise ValueError(f"Expecting at least {ncol} columns but got {total_columns} columns") + num_special_columns = total_columns - ncol + if data_ndim == 1: + data = rawdata[:, num_special_columns] - # Extract and reshape actual data. - data = rawdata[:, num_special_columns:].reshape(*shape) + else: + expect_rows = np.prod(shape[:-1]) if len(shape) > 1 else shape[0] + if nrow != expect_rows: + raise ValueError(f"Expecting {expect_rows} rows but got {nrow} rows") + + data = rawdata[:, num_special_columns:].reshape(shape) - # Create DataSet instance. + # Create dataset instance. dataset = DataSet(name=dataset_name, data=data) + dataset.timestamp = float(attrs[QMI_DATASET_TIMESTAMP]) - # Set timestamp. - dataset.timestamp = float(attrs["QMI_DataSet_timestamp"]) + if n_axes > 0 and dataset.is_raw: + dataset._activate_axis_mode() + dataset.__axis_ndim = n_axes + dataset.__raw_mode = False - # Set axis labels and units. - for axis in range(ndim - 1): - dataset.axis_label[axis] = str(attrs.get(f"QMI_DataSet_axis{axis}_label", "")) - dataset.axis_unit[axis] = str(attrs.get(f"QMI_DataSet_axis{axis}_unit", "")) + for axis in range(n_axes): + dataset.axis_label[axis] = str(attrs.get(f"QMI_Dataset_axis{axis}_label", "")) + dataset.axis_unit[axis] = str(attrs.get(f"QMI_Dataset_axis{axis}_unit", "")) + dataset.axis_name[axis] = str(attrs.get(f"QMI_Dataset_axis{axis}_name", "")) for col in range(ncol): - dataset.column_label[col] = str(attrs.get(f"QMI_DataSet_column{num_special_columns + col}_label", "")) - dataset.column_unit[col] = str(attrs.get(f"QMI_DataSet_column{num_special_columns + col}_unit", "")) + dataset.column_label[col] = str(attrs.get(f"QMI_Dataset_column{num_special_columns + col}_label", "")) + dataset.column_unit[col] = str(attrs.get(f"QMI_Dataset_column{num_special_columns + col}_unit", "")) - # Verify index columns and extract axis scales. for col in range(num_special_columns): - label = attrs.get(f"QMI_DataSet_column{col}_label") + label = attrs.get(f"QMI_Dataset_column{col}_label") if not isinstance(label, str): raise ValueError(f"Missing label for special column {col}") if label.startswith("axis") and label.endswith("_index"): - # Verify index column. axis = int(label[4:-6]) n = dataset.data.shape[axis] outer_rows = int(np.prod(dataset.data.shape[:axis], dtype=np.int32)) @@ -659,11 +928,13 @@ def read_dataset_from_text(fh: TextIO) -> DataSet: raise ValueError(f"Inconsistent index data for axis {axis}") elif label.startswith("axis") and label.endswith("_scale"): - # Set axis scale. axis = int(label[4:-6]) n = dataset.data.shape[axis] outer_rows = int(np.prod(dataset.data.shape[:axis], dtype=np.int32)) inner_rows = int(np.prod(dataset.data.shape[axis+1:-1], dtype=np.int32)) + if dataset.n_axes == 1: + inner_rows = 1 + scale = rawdata[0:n*inner_rows:inner_rows, col] scale_raw = np.tile(np.repeat(scale, inner_rows), outer_rows) if not np.all(rawdata[:, col] == scale_raw): @@ -671,9 +942,8 @@ def read_dataset_from_text(fh: TextIO) -> DataSet: dataset.axis_scale[axis] = scale - # Read custom attributes. for attribute_name, attribute_value in attrs.items(): - if not attribute_name.startswith("QMI_DataSet"): + if not attribute_name.startswith("QMI_Dataset"): dataset.attrs[attribute_name] = attribute_value return dataset diff --git a/qmi/data/datastore.py b/qmi/data/datastore.py index 8998547f..bc964ab0 100644 --- a/qmi/data/datastore.py +++ b/qmi/data/datastore.py @@ -1,20 +1,24 @@ """Routines for data storage.""" +import json import os import os.path import re +import shutil import time from typing import Any -import json import h5netcdf import h5py -import shutil +import numpy as np -from qmi.core.exceptions import QMI_UsageException +import qmi import qmi.data.dataset -from qmi.data.dataset import DataSet from qmi.core.config_struct import config_struct_to_dict +from qmi.core.exceptions import QMI_UsageException +from qmi.data.dataset import DataSet + +QMI_DATASET = "QMI_Dataset_name_{ds_count}" def _relative_folder_path(date_str: str, time_str: str, label: str) -> str: @@ -49,7 +53,6 @@ def __init__( Raises: FileNotFoundError: If the specified DataFolder does not exist. """ - self.folder_path = folder_path self.label = label self.date_str = date_str @@ -61,49 +64,46 @@ def __init__( def __repr__(self) -> str: return f"DataFolder({self.folder_path!r})" - def _hdf5_file(self, name: str, mode: str, backend: str) -> h5py.File | h5netcdf.File: + def _hdf5_file(self, filename: str, mode: str, backend: str) -> h5py.File | h5netcdf.File: """Create a new, or open, HDF5 file in the data folder. Parameters: - name: Base name of the HDF5 file, without the extension ".h5" or ".hdf5". - mode: File mode. - backend: Backend for HDF5 file format. Options are "hdf5" (default) and "h5netcdf". + filename: Base name of the HDF5 file, with the extension ".h5" or ".hdf5". + mode: File mode. + backend: Backend for HDF5 file format. Options are "hdf5" (default) and "h5netcdf". Returns: A `File` object representing the HDF5 file. Raises: - ValueError: If the `name` has non-latin character(s). - IOError: If the file is already present and file mode does not allow truncation. - FileNotFoundError: If the file mode is set to read a file and the file was not found. - ValueError: Invalid HDF5 file backend. + ValueError: If the `name` has non-latin character(s). + QMI_UsageException: Only '.h5' and '.hdf5' file name extensions are allowed. + IOError: If the file is already present and file mode does not allow truncation. + FileNotFoundError: If the file mode is set to read a file and the file was not found. + ValueError: Invalid HDF5 file backend. """ - if not re.match(r"^[-_a-zA-Z0-9(),]+$", name): - raise ValueError(f"Invalid name {name!r}") + if not re.match(r"^[-_a-zA-Z0-9(),]+$", os.path.splitext(filename)[0]): + raise ValueError(f"Invalid name {filename!r}") - if mode in ["r+", "r", "x"]: - for ext in [".h5", ".hdf5"]: - filename = name + ext - # Let's check the file exists. - file_path = os.path.join(self.folder_path, filename) - file_found = os.path.isfile(file_path) - if file_found: - break + if not filename.endswith((".h5", ".hdf5", ".H5", ".HDF5")): + raise QMI_UsageException("Invalid HDF5 file extension, only '.h5' and '.hdf5' extensions are allowed.") + file_path = os.path.join(self.folder_path, filename) + if mode in ["r+", "r", "x"]: + # Let's check the file exists. + file_found = os.path.isfile(file_path) if mode == "x" and file_found: raise IOError(f"The file {filename} already exists and not allowed to overwrite.") - if mode in ["r+", "r"] and not file_found: - raise FileNotFoundError(f"Could not find HDF5 file with name {name}.") + raise FileNotFoundError(f"Could not find HDF5 file with name {filename}.") if backend == "h5py": return h5py.File(file_path, mode=mode) - - elif backend == "h5netcdf": + + if backend == "h5netcdf": return h5netcdf.File(file_path, mode=mode, decode_vlen_strings=False) - - else: - raise ValueError(f"Invalid backend type {backend}") + + raise ValueError(f"Invalid backend type {backend}") def write_config(self, config: Any) -> None: """Write QMI configuration to a file in the data folder.""" @@ -122,9 +122,7 @@ def write_config(self, config: Any) -> None: config_fn = "config" full_path_fn = os.path.join(self.folder_path, config_fn + ".json") - config_dict = config_struct_to_dict(config) - with open(full_path_fn, "w") as output: json.dump(config_dict, output, indent=4, sort_keys=True) @@ -140,7 +138,11 @@ def copy_file(self, filename: str) -> None: shutil.copy2(filename, self.folder_path) def write_dataset( - self, ds: DataSet, file_format: str = "hdf5", overwrite: bool = False, backend: str = "h5py" + self, + ds: DataSet, + file_format: str = "hdf5", + overwrite: bool = False, + backend: str = "h5py", ) -> None: """Write the specified DataSet to a new or existing file in the data folder. @@ -149,15 +151,16 @@ def write_dataset( Parameters: ds: DataSet instance to write. file_format: File format specification. - "hdf5" - selects HDF5 format with (default) - "text" - selects a space-separated text format + - "hdf5". Selects HDF5 format with (default); + - "text". Selects a space-separated text format. overwrite: Allow user to overwrite an existing dataset. Default is False. - backend: Select backend for HDF5 file format. Options are "hdf5" (default) and "h5netcdf". + backend: Select backend for HDF5 file format. Options are "hdf5" (default) and "h5netcdf".` Raises: - ValueError: Dataset name is invalid. - ValueError: Invalid HDF5 file backend. - OSError: If the data folder already contains a file with the same name. + ValueError: Dataset name is invalid. + ValueError: Invalid HDF5 file backend. + OSError: If the data folder already contains a file with the same name. + QMI_UsageException: Dataset name already exists and overwrite not allowed. """ if not re.match(r"^[-_a-zA-Z0-9(),]+$", ds.name): raise ValueError(f"Invalid DataSet name {ds.name!r}") @@ -166,16 +169,44 @@ def write_dataset( filename = ds.name + ".hdf5" file_path = os.path.join(self.folder_path, filename) if backend == "h5py": - with h5py.File(file_path, "w" if overwrite else "x") as f: - qmi.data.dataset.write_dataset_to_hdf5(ds, f) + f = h5py.File(file_path, "w" if overwrite else "x") elif backend == "h5netcdf": - with h5netcdf.File(file_path, "w" if overwrite else "x", decode_vlen_strings=False) as f: - qmi.data.dataset.write_dataset_to_hdf5(ds, f) + f = h5netcdf.File(file_path, "w" if overwrite else "x", decode_vlen_strings=False) else: raise ValueError(f"Invalid backend type {backend}.") + try: + if ds.is_raw: + target = f + f.attrs[QMI_DATASET.format(ds_count="").rstrip("_")] = ds.name + + else: + target = f.create_group(ds.name) + ds_count = 0 + # Check for existing datasets + while True: + check_name = QMI_DATASET.format(ds_count=ds_count) + if not check_name in f.attrs: + break + + if check_name == ds.name and overwrite: + break + + if check_name == ds.name and not overwrite: + raise QMI_UsageException(f"Dataset {ds.name} already exists and not allowed to overwrite.") + + f.attrs[QMI_DATASET.format(ds_count=ds_count)] = ds.name + + # Add QMI version to file + f.attrs["QMI_version"] = qmi.__version__ + f.attrs["QMI_Dataset"] = 1 + qmi.data.dataset.write_dataset_to_hdf5(ds, target) + + finally: + f.close() + elif file_format == "text": filename = ds.name + ".dat" file_path = os.path.join(self.folder_path, filename) @@ -214,8 +245,7 @@ def read_dataset(self, name: str, backend: str = "h5py") -> DataSet: return qmi.data.dataset.read_dataset_from_text(f) # Look for a HDF5 file with matching name. - extensions = [".hdf5", ".h5"] - for ext in extensions: + for ext in [".hdf5", ".h5"]: file_path = os.path.join(self.folder_path, name + ext) if os.path.isfile(file_path): if backend == "h5py": @@ -230,7 +260,7 @@ def read_dataset(self, name: str, backend: str = "h5py") -> DataSet: if name not in f: raise FileNotFoundError(f"No dataset {name!r} found in {file_path}.") - return qmi.data.dataset.read_dataset_from_hdf5(f[name], f) + return qmi.data.dataset.read_dataset_from_hdf5(f[name]) else: raise ValueError(f"Invalid backend type {backend}.") @@ -244,14 +274,18 @@ def make_hdf5file(self, name: str, backend: str = "h5py") -> h5py.File | h5netcd An error occurs if the specified file already exists. Parameters: - name: Base name of the HDF5 file, without the extension ".h5" or ".hdf5". - backend: Select backend for HDF5 file format. Options are "hdf5" (default) and "h5netcdf". + name: Base name of the HDF5 file, without the extension ".h5" or ".hdf5". + backend: Select backend for HDF5 file format. Options are "hdf5" (default) and "h5netcdf". Returns: - A `File` object representing the HDF5 file. - See http://docs.h5py.org/ for information on how to use this object. + hdf5_file: A file object representing a HDF5 file. """ - return self._hdf5_file(name, "x", backend) + hdf5_file = self._hdf5_file(name, "x", backend) + # Add QMI version to file + hdf5_file.attrs["QMI_version"] = qmi.__version__ + hdf5_file.attrs["QMI_Dataset"] = 1 + + return hdf5_file def open_hdf5file(self, name: str, write_mode: bool = False, backend: str = "h5py") -> h5py.File | h5netcdf.File: """Open an existing HDF5 file in the data folder. @@ -267,7 +301,49 @@ def open_hdf5file(self, name: str, write_mode: bool = False, backend: str = "h5p """ mode = "r+" if write_mode else "r" return self._hdf5_file(name, mode, backend) - + + def add_dataset_to_file( + self, + hdf5_file: h5py.File | h5netcdf.File, + ds: DataSet, + root_attrs: dict[ + str, + str | int | float | complex | str | np.ndarray | np.integer | list[int | float | complex | str] | tuple[ + int | float | complex | str, ... + ], + ] | None = None, + ) -> None: + """Add a dataset, and optional file attributes to an existing HDF5 file. + + Parameters: + hdf5_file: The file instance to add the dataset in. + ds: A QMI dataset instance that is to be added. + root_attrs: A dictionary of attributes that should be written in the file root. Default is `None`. + + Raises: + QMI_UsageException: If an attribute with the dataset name already exists in the file. + """ + if ds.name in hdf5_file: + raise QMI_UsageException(f"Data file already has an attribute named {ds.name}") + + if isinstance(hdf5_file, h5netcdf.File): + hdf5_file.dimensions[ds.column_label] = None + + keys = list(dict(hdf5_file.attrs).keys()) + ds_count = 0 + while True: + if not QMI_DATASET.format(ds_count=ds_count) in keys: + break + + ds_count += 1 + + hdf5_file.attrs[QMI_DATASET.format(ds_count=ds_count)] = ds.name + dataset_group = hdf5_file.create_group(ds.name) + qmi.data.dataset.write_dataset_to_hdf5(ds, dataset_group) + # Add root attributes + for attr, val in (root_attrs or {}).items(): + hdf5_file.attrs[attr] = val + class DataStore: """A DataStore represents a collection of stored data. @@ -334,18 +410,16 @@ def make_folder( Raises: FileExistsError: If the DataFolder already exists. """ - # Determine date_str and time_str. if (date_str is None) or (time_str is None): if (date_str is not None) or (time_str is not None): raise ValueError("Specify both date_str and time_str or neither.") + # Generate date_str and time_str from timestamp. if timestamp is None: timestamp = time.time() - if self.USE_LOCAL_TIME: - tm = time.localtime(timestamp) - else: - tm = time.gmtime(timestamp) + + tm = time.localtime(timestamp) if self.USE_LOCAL_TIME else time.gmtime(timestamp) date_str = time.strftime("%Y%m%d", tm) time_str = time.strftime("%H%M%S", tm) @@ -379,8 +453,8 @@ def make_folder( full_path = os.path.join(self.basedir, rel_path) if os.path.exists(full_path): raise FileExistsError(f"Directory {full_path!r} already exists.") - os.mkdir(full_path) + os.mkdir(full_path) return DataFolder(full_path, label, date_str, time_str) def get_folder(self, label: str, date_str: str, time_str: str) -> DataFolder: @@ -437,9 +511,7 @@ def list_folders(self, label: str | None = None) -> list[DataFolder]: Returns: ret: List of matching DataFolder items. """ - ret = [] - date_dirs = os.listdir(self.basedir) date_dirs.sort() for dd in date_dirs: diff --git a/tests/data/test_dataset.py b/tests/data/test_dataset.py index 69adfe94..0070f3d7 100644 --- a/tests/data/test_dataset.py +++ b/tests/data/test_dataset.py @@ -73,13 +73,6 @@ def test_04_no_shape_nor_data_specified_raises_exception(self): with self.assertRaises(TypeError): DataSet("empty_dataset", shape=None, data=None) - def test_05_data_dimensions_not_two_or_more(self): - """An exception must be raised if neither data dimension and shape is less than 2""" - with self.assertRaises(ValueError) as exc: - DataSet("empty_dataset", shape=(3,), data=np.array([1.0, 2.0, 3.0])) - - self.assertEqual( "Dataset must have at least 2 axes.", str(exc.exception)) - def test_06_data_dimension_invalid(self): """Zero (and negative) dimension values raise an exception""" with self.assertRaises(ValueError) as exc: @@ -89,23 +82,24 @@ def test_06_data_dimension_invalid(self): def test_10_create_empty_dataset(self): """Create a simple, empty DataSet instance.""" - + # Arrange + expected_root_attrs = {} ds_name = "empty_dataset" ds_shape = (8, 3) + # Act ds = DataSet(ds_name, shape=ds_shape) - + # Assert self.assertEqual(ds_name, ds.name) self.assertIsInstance(ds.data, np.ndarray) self.assertEqual(ds_shape, ds.data.shape) self.assertEqual(np.float64, ds.data.dtype) self.assertTrue(np.all(ds.data == 0)) - - self.assertEqual([""], ds.axis_label) - self.assertEqual([""], ds.axis_unit) - self.assertEqual([None], ds.axis_scale) - self.assertEqual(["", "", ""], ds.column_label) - self.assertEqual(["", "", ""], ds.column_unit) - self.assertEqual({}, ds.attrs) + self.assertListEqual([""], ds.axis_label) + self.assertListEqual([""], ds.axis_unit) + self.assertListEqual([None], ds.axis_scale) + self.assertListEqual(["", "", ""], ds.column_label) + self.assertListEqual(["", "", ""], ds.column_unit) + self.assertDictEqual(expected_root_attrs, ds.attrs) def test_11_create_initialized_dataset(self): """Create a simple dataset from existing Numpy data.""" @@ -121,9 +115,23 @@ def test_11_create_initialized_dataset(self): self.assertEqual(np.float64, ds.data.dtype) self.assertTrue(np.all(ds.data == data)) - def test_20_labels(self): - """Setting labels.""" + def test_12_create_initialized_3d_dataset(self): + """Create a 3d dataset from existing Numpy data.""" + + ds_name = "my_3d_dataset" + ds_shape = (3, 5, 2) + data = np.sqrt(np.arange(30)).reshape(ds_shape[0], ds_shape[1], ds_shape[2]) + ds = DataSet(ds_name, data=data) + + self.assertEqual(ds_name, ds.name) + self.assertIsInstance(ds.data, np.ndarray) + self.assertEqual(ds_shape, ds.data.shape) + self.assertEqual(np.float64, ds.data.dtype) + self.assertTrue(np.all(ds.data == data)) + def test_20_labels(self): + """Setting labels on dataset with three columns and two axes.""" + # Arrange ds_name = "my_dataset" ds_shape = (2, 8, 3) ds = DataSet(ds_name, shape=ds_shape) @@ -132,7 +140,7 @@ def test_20_labels(self): axis_scale = [None, None] column_labels = ["power", "countrate", "temperature"] column_units = ["mW", "kHz", "K"] - + # Act ds.set_axis_label(0, "X") ds.set_axis_unit(0, "um") ds.set_axis_label(1, "Z") @@ -143,10 +151,10 @@ def test_20_labels(self): ds.set_column_unit(1, "kHz") ds.set_column_label(2, "temperature") ds.set_column_unit(2, "K") - + # Assert self.assertEqual(ds_name, ds.name) self.assertEqual(ds_shape, ds.data.shape) - self.assertEqual(axis_labels, ds.axis_label, ) + self.assertEqual(axis_labels, ds.axis_label) self.assertEqual(axis_unit, ds.axis_unit) self.assertEqual(axis_scale, ds.axis_scale) self.assertEqual(column_labels, ds.column_label) @@ -240,15 +248,17 @@ def test_26_invalid_column_unit(self): def test_30_dataset_1col(self): """Create a 3-dimensional, 1-column dataset and setting labels.""" - + # Arrange + expected_root_attrs = {} ds_name = "my_dataset" ds_shape = (2, 5, 1) data = np.arange(10).reshape(*ds_shape) + # Act ds = DataSet(ds_name, data=data) ds.set_axis_label(0, "X") ds.set_axis_label(1, "Y") ds.set_column_label(0, "Z") - + # Assert self.assertEqual(ds_name, ds.name) self.assertEqual(ds_shape, ds.data.shape) self.assertEqual(ds.axis_label, ["X", "Y"]) @@ -256,7 +266,79 @@ def test_30_dataset_1col(self): self.assertEqual(ds.axis_scale, [None, None]) self.assertEqual(ds.column_label, ["Z"]) self.assertEqual(ds.column_unit, [""]) - self.assertEqual(ds.attrs, {}) + self.assertEqual(expected_root_attrs, ds.attrs) + + def test_31_create_raw_1d_dataset(self): + """Create a 1-dimensional raw dataset.""" + # Arrange + ds_name = "raw_trace" + data = np.arange(5, dtype=np.int16) + # Act + ds = DataSet(ds_name, data=data) + ds.set_column_label(0, "signal") + ds.set_column_unit(0, "V") + ds.set_column_name(0, "Measured signal") + # Assert + self.assertEqual(ds_name, ds.name) + self.assertTrue(ds.is_raw) + self.assertEqual(0, ds.n_axes) + self.assertEqual(1, ds.ncol) + self.assertListEqual([], ds.axis_label) + self.assertListEqual(["signal"], ds.column_label) + self.assertListEqual(["V"], ds.column_unit) + self.assertListEqual(["Measured signal"], ds.column_name) + self.assertTrue(np.all(ds.data == data)) + + def test_32_axis_mode_is_activated_for_2d_axis_metadata(self): + """Setting 2D axis metadata changes a raw tabular dataset into an axis dataset.""" + # Arrange + ds = DataSet("axis_trace", shape=(4, 2)) + axis_scale = 0.5 * np.arange(4) + # Act + ds.set_axis_label(0, "time") + ds.set_axis_unit(0, "s") + ds.set_axis_name(0, "Elapsed time") + ds.set_axis_scale(0, axis_scale) + # Assert + self.assertFalse(ds.is_raw) + self.assertEqual(1, ds.n_axes) + self.assertListEqual(["time"], ds.axis_label) + self.assertListEqual(["s"], ds.axis_unit) + self.assertListEqual(["Elapsed time"], ds.axis_name) + self.assertTrue(np.all(ds.axis_scale[0] == axis_scale)) + + def test_33_invalid_column_name(self): + """Setting invalid column names raises exception.""" + ds = DataSet("my_dataset", shape=(2, 8, 3)) + + with self.assertRaises(TypeError): + ds.set_column_name("0", "X") + + with self.assertRaises(ValueError): + ds.set_column_name(-1, "X") + + def test_34_internal_properties_and_unusual_axis_activation(self): + """Cover internal dimension state and axis activation list initialization.""" + ds = DataSet("typed_dataset", shape=(4,), dtype=np.int32) + + self.assertEqual(0, ds._ndim) + self.assertEqual(np.int32, ds.data.dtype) + + with self.assertRaises(ValueError): + DataSet("scalar_dataset", data=np.array(1.0)) + + ds2 = DataSet("manual_axis_dataset", shape=(3, 2)) + ds2.axis_label = [] + ds2.axis_unit = [] + ds2.axis_name = [] + ds2.axis_scale = [] + ds2.set_axis_label(0, "row") + + self.assertFalse(ds2.is_raw) + self.assertListEqual(["row"], ds2.axis_label) + self.assertListEqual([""], ds2.axis_unit) + self.assertListEqual([""], ds2.axis_name) + self.assertListEqual([None], ds2.axis_scale) def test_40_write_hdf5_simple(self): """Writing a simple dataset as HDF5 with h5py backend.""" @@ -273,70 +355,74 @@ def test_40_write_hdf5_simple(self): ds.attrs["hello"] = "world" ds.attrs["number"] = 2.71 - f = h5py.File("test.h5", "w", driver="core", backing_store=False) - qmi.data.dataset.write_dataset_to_hdf5(ds, f) + with h5py.File("test.h5", "w", driver="core", backing_store=False) as f: + grp = f.create_group(ds.name) + qmi.data.dataset.write_dataset_to_hdf5(ds, grp) - self.assertEqual(set(f.keys()), {ds_name, "my_dataset_axis0_scale"}) - self.assertEqual(f[ds_name].shape, (8, 3)) - self.assertEqual(f[ds_name].dtype, np.float64) - self.assertTrue(np.all(data == f[ds_name])) + self.assertListEqual([ds_name], list(f.keys())) + for e, col in enumerate(ds.column_label): + self.assertEqual((8,), f[ds_name][col].shape) + self.assertEqual(np.float64, f[ds_name][col].dtype) + self.assertTrue(np.all(data[:, e] == f[ds_name][col])) + self.assertEqual(ds.column_label[e], f[ds_name].attrs[f"{ds_name}_column{e}_label"]) + self.assertEqual(ds.column_unit[e], f[ds_name].attrs[f"{ds_name}_column{e}_unit"],) - self.assertEqual(f[ds_name].attrs["QMI_DataSet_axis0_label"], "X axis") - self.assertEqual(f[ds_name].attrs["QMI_DataSet_axis0_unit"], "mm") - self.assertEqual(f[ds_name].attrs["QMI_DataSet_column1_label"], "green") - self.assertEqual(f[ds_name].attrs["QMI_DataSet_column1_unit"], "nm") + self.assertCountEqual(list(f[ds_name].keys()), ds.axis_label + ds.column_label) + self.assertEqual(f[ds_name].attrs[f"{ds_name}_axis0_label"], "X axis") + self.assertEqual(f[ds_name].attrs[f"{ds_name}_axis0_unit"], "mm") - self.assertEqual(f[ds_name].dims[0].label, "X axis") - self.assertTrue(np.all(scale_data == f[ds_name].dims[0][0])) + self.assertTrue(np.all(scale_data == np.array(f[ds_name]["X axis"]))) - self.assertEqual(f[ds_name].attrs["hello"], "world") - self.assertEqual(f[ds_name].attrs["number"], 2.71) - - f.close() + self.assertEqual(f[ds_name].attrs["hello"], "world") + self.assertEqual(f[ds_name].attrs["number"], 2.71) def test_41_write_read_hdf5(self): """Writing and reading various datasets as HDF5 with h5py backend.""" - f = h5py.File("test.h5", "w", driver="core", backing_store=False) datasets = [] + with h5py.File("test.h5", "w", driver="core", backing_store=False) as f: + # Create datasets and write to file. + for (name, shape, dtype) in [ + ("t1", (4, 4), np.float64), + ("t2", (4, 4), np.float64), + ("t3", (3, 3, 3, 3), np.float64), + ("t4", (4, 4, 1), np.int32)]: + ds = _internal_create_dataset( + name, + shape, + dtype, + add_labels=True, + add_scale=True, + add_attributes=True + ) + datasets.append(ds) + grp = f.create_group(name) + qmi.data.dataset.write_dataset_to_hdf5(ds, grp) - # Create datasets and write to file. - for (name, shape, dtype) in [ - ("t1", (4, 4), np.float64), - ("t2", (4, 4), np.float64), - ("t3", (3, 3, 3, 3), np.float64), - ("t4", (4, 4, 1), np.int32)]: - ds = _internal_create_dataset(name, - shape, - dtype, - add_labels=True, - add_scale=True, - add_attributes=True) - datasets.append(ds) - qmi.data.dataset.write_dataset_to_hdf5(ds, f) + # Read back from file and verify. + for d, ds in enumerate(datasets): + ds2 = qmi.data.dataset.read_dataset_from_hdf5(f[ds.name]) + self.assertEqual(ds2.name, ds.name) + self.assertIsInstance(ds2.data, np.ndarray) + self.assertEqual(ds.data.shape, ds2.data.shape) + self.assertEqual(ds.data.dtype, ds2.data.dtype) + self.assertTrue(np.all(ds2.data == ds.data)) + self.assertEqual(ds.column_label, ds2.column_label) + self.assertEqual(ds.column_unit, ds2.column_unit) + # All ds attributes should be in ds2. + for attr in ds.attrs: + self.assertIn(attr, ds2.attrs) - # Read back from file and verify. - for ds in datasets: - ds2 = qmi.data.dataset.read_dataset_from_hdf5(f[ds.name]) - self.assertEqual(ds2.name, ds.name) - self.assertIsInstance(ds2.data, np.ndarray) - self.assertEqual(ds2.data.shape, ds.data.shape) - self.assertEqual(ds2.data.dtype, ds.data.dtype) - self.assertTrue(np.all(ds2.data == ds.data)) - self.assertEqual(ds2.axis_label, ds.axis_label) - self.assertEqual(ds2.axis_unit, ds.axis_unit) - self.assertEqual(len(ds2.axis_scale), len(ds.axis_scale)) - for axis in range(len(ds.axis_scale)): - if ds.axis_scale[axis] is None: - self.assertIsNone(ds2.axis_scale[axis]) - else: - self.assertTrue(np.all(ds2.axis_scale[axis] == ds.axis_scale[axis])) - - self.assertEqual(ds2.column_label, ds.column_label) - self.assertEqual(ds2.column_unit, ds.column_unit) - self.assertEqual(ds2.attrs, ds.attrs) + self.assertEqual(len(ds.axis_scale), len(ds2.axis_scale)) + for e, label in enumerate(ds.axis_label): + self.assertEqual(ds2.axis_label[e], label) + self.assertEqual(ds2.axis_unit[e], ds.axis_unit[e]) - f.close() + for axis in range(len(ds.axis_scale)): + if ds.axis_scale[axis] is None: + self.assertIsNone(ds2.axis_scale[axis]) + else: + self.assertTrue(np.all(ds2.axis_scale[axis] == ds.axis_scale[axis])) def test_42_write_hdf5_raises_exception_on_invalid_attrs_name(self): """If attrs name is equal or starting with 'QMI_DataSet' or 'DIMENSION_', an exception is raised.""" @@ -349,30 +435,29 @@ def test_42_write_hdf5_raises_exception_on_invalid_attrs_name(self): ds.column_unit = ["MHz", "nm", "K"] scale_data = 0.1 * np.arange(8) ds.set_axis_scale(0, scale_data) - ds.attrs["QMI_DataSet"] = "Hello" # <-- Error + ds.attrs["QMI_Dataset"] = "Hello" # <-- Error ds.attrs["number"] = 2.71 - f = h5py.File("test.h5", "w", driver="core", backing_store=False) - with self.assertRaises(ValueError): - qmi.data.dataset.write_dataset_to_hdf5(ds, f) + with h5py.File("test.h5", "w", driver="core", backing_store=False) as f: + with self.assertRaises(ValueError): + qmi.data.dataset.write_dataset_to_hdf5(ds, f) - ds2 = DataSet(ds_name, data=data) - ds2.axis_label[0] = "X axis" - ds2.axis_unit[0] = "mm" - ds2.column_label = ["red", "green", "blue"] - ds2.column_unit = ["MHz", "nm", "K"] - scale_data = 0.1 * np.arange(8) - ds2.set_axis_scale(0, scale_data) - ds2.attrs["DIMENSION_"] = "Hello" # <-- Error - ds2.attrs["number"] = 2.71 + ds2 = DataSet(ds_name, data=data) + ds2.axis_label[0] = "X axis" + ds2.axis_unit[0] = "mm" + ds2.column_label = ["red", "green", "blue"] + ds2.column_unit = ["MHz", "nm", "K"] + scale_data = 0.1 * np.arange(8) + ds2.set_axis_scale(0, scale_data) + ds2.attrs["DIMENSION_"] = "Hello" # <-- Error + ds2.attrs["number"] = 2.71 - with self.assertRaises(ValueError): - qmi.data.dataset.write_dataset_to_hdf5(ds2, f) + with self.assertRaises(ValueError): + qmi.data.dataset.write_dataset_to_hdf5(ds2, f) def test_43_write_hdf5_simple(self): """Writing a simple dataset as HDF5 with h5netcdf backend.""" ds_name = "my_dataset" - ds_scale_name = ds_name + "_axis0_scale" data = 1.4142 * (np.arange(24).reshape(8, 3) - 1) ds = DataSet(ds_name, data=data) ds.axis_label[0] = "X axis" @@ -384,43 +469,39 @@ def test_43_write_hdf5_simple(self): ds.attrs["hello"] = "world" ds.attrs["number"] = 2.71 - f = h5netcdf.File("test.h5", "w", driver="core", backing_store=False, decode_vlen_strings=False) - qmi.data.dataset.write_dataset_to_hdf5(ds, f) - - self.assertEqual(set(f.keys()), {ds_name, ds_scale_name}) - self.assertEqual(f[ds_name].shape, (8, 3)) - self.assertEqual(f[ds_name].dtype, np.float64) - self.assertTrue(np.all(data == f[ds_name])) + with h5netcdf.File("test.h5", "w", driver="core", backing_store=False, decode_vlen_strings=False) as f: + grp = f.create_group(ds.name) + qmi.data.dataset.write_dataset_to_hdf5(ds, grp) + self.assertListEqual([ds_name], list(f.keys())) + for e, col in enumerate(ds.column_label): + self.assertEqual((8,), f[ds_name][col].shape) + self.assertEqual(np.float64, f[ds_name][col].dtype) + self.assertTrue(np.all(data[:, e] == f[ds_name][col])) + self.assertEqual(ds.column_label[e], f[ds_name].attrs[f"{ds_name}_column{e}_label"]) + self.assertEqual(ds.column_unit[e], f[ds_name].attrs[f"{ds_name}_column{e}_unit"],) - self.assertEqual(f[ds_name].attrs["QMI_DataSet_axis0_label"], "X axis") - self.assertEqual(f[ds_name].attrs["QMI_DataSet_axis0_unit"], "mm") - self.assertEqual(f[ds_name].attrs["QMI_DataSet_column1_label"], "green") - self.assertEqual(f[ds_name].attrs["QMI_DataSet_column1_unit"], "nm") + self.assertCountEqual(list(f[ds_name].keys()), ds.axis_label + ds.column_label) + self.assertEqual(f[ds_name].attrs[f"{ds_name}_axis0_label"], "X axis") + self.assertEqual(f[ds_name].attrs[f"{ds_name}_axis0_unit"], "mm") - self.assertEqual(f[ds_name].dimensions[0], "X axis") - self.assertTrue(np.all(scale_data == f[ds_name].attrs.get(ds_scale_name))) + self.assertTrue(np.all(scale_data == np.array(f[ds_name]["X axis"]))) - self.assertEqual(f[ds_name].attrs["hello"], "world") - self.assertEqual(f[ds_name].attrs["number"], 2.71) - - f.close() + self.assertEqual(f[ds_name].attrs["hello"], "world") + self.assertEqual(f[ds_name].attrs["number"], 2.71) def test_44_write_read_hdf5(self): - """Writing and reading various datasets as HDF5.""" + """Writing and reading various datasets as HDF5 with h5netcdf backend.""" datasets = [] - - # Create datasets and write to file. - for (name, shape, dtype) in [ - ("t1", (4, 4), np.float64), - ("t2", (4, 4), np.float64), - ("t3", (3, 3, 3, 3), np.float64), - ("t4", (4, 4, 1), np.int32) - ]: - # h5netcdf does not clear the dataset from the backend. Thus we need to open-close the file on each test - with h5netcdf.File( - "test.h5", "w", driver="core", backing_store=False, decode_vlen_strings=False - ) as f: + # h5netcdf does not clear the dataset from the backend. Thus we need to open-close the file on each test + with h5netcdf.File("test.h5", "w", driver="core", backing_store=False, decode_vlen_strings=False) as f: + # Create datasets and write to file. + for (name, shape, dtype) in [ + ("t1", (4, 4), np.float64), + ("t2", (4, 4), np.float64), + ("t3", (3, 3, 3, 3), np.float64), + ("t4", (4, 4, 1), np.int32) + ]: ds = _internal_create_dataset( name, shape, @@ -430,30 +511,38 @@ def test_44_write_read_hdf5(self): add_attributes=True ) datasets.append(ds) - qmi.data.dataset.write_dataset_to_hdf5(ds, f) + grp = f.create_group(name) + qmi.data.dataset.write_dataset_to_hdf5(ds, grp) - # Read back from file and verify. - ds2 = qmi.data.dataset.read_dataset_from_hdf5(f[ds.name], f) + # Read back from file and verify. + for ds in datasets: + ds2 = qmi.data.dataset.read_dataset_from_hdf5(f[ds.name]) self.assertEqual(ds2.name, ds.name) self.assertIsInstance(ds2.data, np.ndarray) - self.assertEqual(ds2.data.shape, ds.data.shape) - self.assertEqual(ds2.data.dtype, ds.data.dtype) + self.assertEqual(ds.data.shape, ds2.data.shape) + self.assertEqual(ds.data.dtype, ds2.data.dtype) self.assertTrue(np.all(ds2.data == ds.data)) - self.assertEqual(ds2.axis_label, ds.axis_label) - self.assertEqual(ds2.axis_unit, ds.axis_unit) - self.assertEqual(len(ds2.axis_scale), len(ds.axis_scale)) + self.assertEqual(ds.column_label, ds2.column_label) + self.assertEqual(ds.column_unit, ds2.column_unit) + # All ds attributes should be in ds2. + for attr in ds.attrs: + self.assertIn(attr, ds2.attrs) + + self.assertEqual(len(ds.axis_scale), len(ds2.axis_scale)) + for e, label in enumerate(ds.axis_label): + self.assertEqual(ds2.axis_label[e], label) + self.assertEqual(ds2.axis_unit[e], ds.axis_unit[e]) + for axis in range(len(ds.axis_scale)): if ds.axis_scale[axis] is None: self.assertIsNone(ds2.axis_scale[axis]) else: self.assertTrue(np.all(ds2.axis_scale[axis] == ds.axis_scale[axis])) - self.assertEqual(ds2.column_label, ds.column_label) - self.assertEqual(ds2.column_unit, ds.column_unit) - self.assertEqual(ds2.attrs, ds.attrs) - def test_45_write_hdf5_raises_exception_on_invalid_attrs_name(self): - """If attrs name is equal or starting with 'QMI_DataSet' or 'DIMENSION_', and exception is raised""" + """If attrs name is equal or starting with 'QMI_DataSet' or 'DIMENSION_', and exception is raised. + This is done now with h5netcdf backend. + """ ds_name = "my_dataset" data = 1.4142 * (np.arange(24).reshape(8, 3) - 1) ds = DataSet(ds_name, data=data) @@ -463,12 +552,12 @@ def test_45_write_hdf5_raises_exception_on_invalid_attrs_name(self): ds.column_unit = ["MHz", "nm", "K"] scale_data = 0.1 * np.arange(8) ds.set_axis_scale(0, scale_data) - ds.attrs["QMI_DataSet"] = "Hello" # <-- Error + ds.attrs["QMI_Dataset"] = "Hello" # <-- Error ds.attrs["number"] = 2.71 - f = h5netcdf.File("test.h5", "w", driver="core", backing_store=False, decode_vlen_strings=False) - with self.assertRaises(ValueError): - qmi.data.dataset.write_dataset_to_hdf5(ds, f) + with h5netcdf.File("test.h5", "w", driver="core", backing_store=False, decode_vlen_strings=False) as f: + with self.assertRaises(ValueError): + qmi.data.dataset.write_dataset_to_hdf5(ds, f) ds2 = DataSet(ds_name, data=data) ds2.axis_label[0] = "X axis" @@ -483,6 +572,286 @@ def test_45_write_hdf5_raises_exception_on_invalid_attrs_name(self): with self.assertRaises(ValueError): qmi.data.dataset.write_dataset_to_hdf5(ds2, f) + def test_46_write_read_hdf5_axis_and_column_names(self): + """Writing and reading long axis and column names as HDF5.""" + ds_name = "named_dataset" + data = np.arange(12, dtype=np.float64).reshape(3, 2, 2) + ds = DataSet(ds_name, data=data) + ds.set_axis_label(0, "x") + ds.set_axis_unit(0, "mm") + ds.set_axis_name(0, "Sample x position") + ds.set_axis_label(1, "y") + ds.set_axis_unit(1, "mm") + ds.set_axis_name(1, "Sample y position") + ds.set_column_label(0, "red") + ds.set_column_unit(0, "counts") + ds.set_column_name(0, "Red detector counts") + ds.set_column_label(1, "blue") + ds.set_column_unit(1, "counts") + ds.set_column_name(1, "Blue detector counts") + + with h5py.File("test.h5", "w", driver="core", backing_store=False) as f: + grp = f.create_group(ds.name) + qmi.data.dataset.write_dataset_to_hdf5(ds, grp) + + ds2 = qmi.data.dataset.read_dataset_from_hdf5(f[ds.name]) + + self.assertListEqual(ds.axis_name, ds2.axis_name) + self.assertListEqual(ds.column_name, ds2.column_name) + self.assertListEqual(ds.axis_label, ds2.axis_label) + self.assertListEqual(ds.column_label, ds2.column_label) + self.assertTrue(np.all(ds2.data == ds.data)) + + def test_47_write_hdf5_duplicate_column_labels_get_unique_keys(self): + """Duplicate column labels are stored under unique HDF5 keys.""" + ds_name = "duplicate_columns" + data = np.arange(12, dtype=np.float64).reshape(4, 3) + ds = DataSet(ds_name, data=data) + ds.set_column_label(0, "signal") + ds.set_column_label(1, "signal") + ds.set_column_label(2, "signal") + + with h5py.File("test.h5", "w", driver="core", backing_store=False) as f: + qmi.data.dataset.write_dataset_to_hdf5(ds, f) + + self.assertCountEqual(["signal", "signal_1", "signal_2"], list(f.keys())) + ds2 = qmi.data.dataset.read_dataset_from_hdf5(f) + + self.assertListEqual(ds.column_label, ds2.column_label) + self.assertTrue(np.all(ds2.data == ds.data)) + + def test_48_convert_plain_hdf5_dataset_to_qmi_dataset(self): + """Convert a plain HDF5 dataset with common metadata attributes.""" + data = np.array([1.0, 2.5, 4.0]) + + with h5py.File("test.h5", "w", driver="core", backing_store=False) as f: + hdf_ds = f.create_dataset("trace", data=data) + hdf_ds.attrs["name"] = "Signal" + hdf_ds.attrs["unit"] = "V" + hdf_ds.attrs["long_name"] = "Input signal" + hdf_ds.attrs["experiment"] = "calibration" + + ds = qmi.data.dataset.read_dataset_from_hdf5(hdf_ds) + + self.assertEqual("trace", ds.name) + self.assertTrue(ds.is_raw) + self.assertListEqual(["Signal"], ds.column_label) + self.assertListEqual(["V"], ds.column_unit) + self.assertListEqual(["Input signal"], ds.column_name) + self.assertEqual("calibration", ds.attrs["experiment"]) + self.assertTrue(np.all(ds.data == data)) + + def test_49_write_read_hdf5_raw_1d_dataset(self): + """Writing and reading a raw 1D dataset as HDF5.""" + data = np.arange(5, dtype=np.float64) + ds = DataSet("raw_signal", data=data) + ds.set_column_label(0, "signal") + ds.set_column_unit(0, "V") + ds.set_column_name(0, "Input signal") + + with h5py.File("test.h5", "w", driver="core", backing_store=False) as f: + qmi.data.dataset.write_dataset_to_hdf5(ds, f) + + self.assertIn("raw_signal_column0_key", f.attrs) + self.assertEqual("signal", f["signal"].attrs["name"]) + self.assertEqual("V", f["signal"].attrs["unit"]) + self.assertEqual("Input signal", f["signal"].attrs["long_name"]) + ds2 = qmi.data.dataset.read_dataset_from_hdf5(f) + + self.assertTrue(ds2.is_raw) + self.assertTrue(np.all(ds2.data == data)) + self.assertListEqual(ds.column_label, ds2.column_label) + self.assertListEqual(ds.column_unit, ds2.column_unit) + self.assertListEqual(ds.column_name, ds2.column_name) + + def test_49b_write_hdf5_raw_2d_dataset(self): + """Writing a raw 2D dataset as HDF5 creates one dataset per column.""" + data = np.arange(12, dtype=np.float64).reshape(4, 3) + ds = DataSet("raw_table", data=data) + ds.set_column_label(0, "x") + ds.set_column_unit(0, "mm") + ds.set_column_name(0, "X position") + ds.set_column_label(1, "y") + ds.set_column_unit(1, "mm") + ds.set_column_name(1, "Y position") + ds.set_column_name(2, "Signal") + + with h5py.File("test.h5", "w", driver="core", backing_store=False) as f: + qmi.data.dataset.write_dataset_to_hdf5(ds, f) + + self.assertCountEqual(["x", "y", "column_2"], list(f.keys())) + self.assertEqual("x", f["x"].attrs["name"]) + self.assertEqual("mm", f["x"].attrs["unit"]) + self.assertEqual("X position", f["x"].attrs["long_name"]) + self.assertEqual("Signal", f["column_2"].attrs["long_name"]) + + def test_49c_write_hdf5_duplicate_axis_labels_get_unique_scale_keys(self): + """Duplicate axis labels are stored under unique HDF5 scale keys.""" + data = np.arange(12, dtype=np.float64).reshape(2, 3, 2) + ds = DataSet("duplicate_axes", data=data) + ds.set_axis_label(0, "axis") + ds.set_axis_unit(0, "s") + ds.set_axis_name(0, "First axis") + ds.set_axis_scale(0, np.array([1.0, 2.0])) + ds.set_axis_label(1, "axis") + ds.set_axis_unit(1, "m") + ds.set_axis_name(1, "Second axis") + ds.set_axis_scale(1, np.array([3.0, 4.0, 5.0])) + ds.set_column_label(0, "a") + ds.set_column_label(1, "b") + + with h5py.File("test.h5", "w", driver="core", backing_store=False) as f: + grp = f.create_group(ds.name) + qmi.data.dataset.write_dataset_to_hdf5(ds, grp) + + self.assertIn("axis", grp) + self.assertIn("axis_1", grp) + self.assertEqual("First axis", grp["axis"].attrs["long_name"]) + self.assertEqual("Second axis", grp["axis_1"].attrs["long_name"]) + self.assertEqual("axis_1", grp.attrs["duplicate_axes_axis1_key"]) + + def test_49d_read_hdf5_rejects_mismatched_column_shapes(self): + """Reading QMI HDF5 metadata rejects columns with mismatched shapes.""" + with h5py.File("test.h5", "w", driver="core", backing_store=False) as f: + f.attrs["QMI_Dataset"] = 1 + f.attrs["QMI_Dataset_name"] = "bad_columns" + f.attrs["QMI_Dataset_layout"] = "raw" + f.attrs["bad_columns_timestamp"] = 1.0 + f.attrs["bad_columns_data_ndim"] = 2 + f.attrs["bad_columns_n_axes"] = 0 + f.attrs["bad_columns_ncol"] = 2 + f.attrs["bad_columns_dim0_size"] = 2 + f.attrs["bad_columns_dim1_size"] = 2 + f.attrs["bad_columns_column0_key"] = "a" + f.attrs["bad_columns_column1_key"] = "b" + f.create_dataset("a", data=np.arange(2)) + f.create_dataset("b", data=np.arange(3)) + + with self.assertRaises(ValueError) as exc: + qmi.data.dataset.read_dataset_from_hdf5(f) + + self.assertIn("matching shapes", str(exc.exception)) + + def test_49e_read_hdf5_rejects_invalid_axis_scale_shape(self): + """Reading QMI HDF5 metadata rejects invalid axis scale shapes.""" + ds = DataSet("bad_scale", data=np.arange(6, dtype=np.float64).reshape(3, 2)) + ds.set_axis_label(0, "time") + ds.set_column_label(0, "a") + ds.set_column_label(1, "b") + + with h5py.File("test.h5", "w", driver="core", backing_store=False) as f: + qmi.data.dataset.write_dataset_to_hdf5(ds, f) + f.attrs["bad_scale_axis0_key"] = "bad_time" + f.create_dataset("bad_time", data=np.arange(2)) + + with self.assertRaises(ValueError) as exc: + qmi.data.dataset.read_dataset_from_hdf5(f) + + self.assertIn("Invalid shape of dimension scale", str(exc.exception)) + + def test_49f_convert_plain_hdf5_group_with_axes_and_columns(self): + """Convert a plain HDF5 group with dimension scales and columns.""" + with h5py.File("test.h5", "w", driver="core", backing_store=False) as f: + grp = f.create_group("plain_group") + x = grp.create_dataset("x", data=np.array([0.0, 0.5])) + x.attrs["name"] = "x" + x.attrs["unit"] = "mm" + x.attrs["long_name"] = "X position" + x.make_scale("x") + y = grp.create_dataset("y", data=np.array([1.0, 2.0, 3.0])) + y.attrs["name"] = "y" + y.attrs["units"] = "s" + y.attrs["long_name"] = "Y delay" + y.make_scale("y") + signal = grp.create_dataset("signal", data=np.arange(6, dtype=np.float64).reshape(2, 3)) + signal.attrs["name"] = "counts" + signal.attrs["unit"] = "Hz" + signal.attrs["long_name"] = "Photon counts" + background = grp.create_dataset("background", data=np.ones((2, 3))) + background.attrs["label"] = "bg" + grp.attrs["experiment"] = "scan" + + ds = qmi.data.dataset.convert_to_qmi_dataset(grp) + + self.assertEqual("plain_group", ds.name) + self.assertFalse(ds.is_raw) + self.assertListEqual(["x", "y"], ds.axis_label) + self.assertListEqual(["mm", "s"], ds.axis_unit) + self.assertListEqual(["X position", "Y delay"], ds.axis_name) + self.assertTrue(np.all(ds.axis_scale[0] == np.array([0.0, 0.5]))) + self.assertCountEqual(["counts", "bg"], ds.column_label) + counts_index = ds.column_label.index("counts") + self.assertEqual("Hz", ds.column_unit[counts_index]) + self.assertEqual("Photon counts", ds.column_name[counts_index]) + self.assertEqual("scan", ds.attrs["experiment"]) + self.assertEqual((2, 3, 2), ds.data.shape) + + def test_49g_convert_plain_hdf5_file_with_single_group(self): + """Convert a plain HDF5 file containing one group.""" + with h5py.File("test.h5", "w", driver="core", backing_store=False) as f: + grp = f.create_group("single") + grp.create_dataset("signal", data=np.array([1.0, 2.0])) + + ds = qmi.data.dataset.convert_to_qmi_dataset(f) + + self.assertEqual("single", ds.name) + self.assertListEqual(["signal"], ds.column_label) + self.assertTrue(np.all(ds.data == np.array([1.0, 2.0]))) + + def test_49h_convert_plain_hdf5_file_with_root_datasets(self): + """Convert a plain HDF5 file with root datasets and a root name attribute.""" + with h5py.File("test.h5", "w", driver="core", backing_store=False) as f: + f.attrs["name"] = "root_dataset" + f.create_dataset("signal", data=np.array([1.0, 2.0])) + + ds = qmi.data.dataset.convert_to_qmi_dataset(f) + + self.assertEqual("root_dataset", ds.name) + self.assertListEqual(["signal"], ds.column_label) + + def test_49i_convert_plain_hdf5_axis_only_group(self): + """Convert a plain HDF5 group that only contains an axis scale dataset.""" + with h5py.File("test.h5", "w", driver="core", backing_store=False) as f: + grp = f.create_group("axis_only") + axis = grp.create_dataset("time", data=np.array([1.0, 2.0, 3.0])) + axis.attrs["name"] = "time" + axis.attrs["unit"] = "s" + axis.attrs["long_name"] = "Elapsed time" + axis.make_scale("time") + + ds = qmi.data.dataset.convert_to_qmi_dataset(grp) + + self.assertTrue(ds.is_raw) + self.assertListEqual(["time"], ds.column_label) + self.assertListEqual(["s"], ds.column_unit) + self.assertListEqual(["Elapsed time"], ds.column_name) + self.assertTrue(np.all(ds.data == np.array([1.0, 2.0, 3.0]))) + + def test_49j_convert_plain_hdf5_group_error_cases(self): + """Converting plain HDF5 groups rejects ambiguous or invalid inputs.""" + with h5py.File("test.h5", "w", driver="core", backing_store=False) as f: + f.create_group("one") + f.create_group("two") + with self.assertRaises(RuntimeError): + qmi.data.dataset.convert_to_qmi_dataset(f) + + with h5py.File("test.h5", "w", driver="core", backing_store=False) as f: + grp = f.create_group("nested") + grp.create_group("inner") + with self.assertRaises(RuntimeError): + qmi.data.dataset.convert_to_qmi_dataset(grp) + + with h5py.File("test.h5", "w", driver="core", backing_store=False) as f: + with self.assertRaises(RuntimeError): + qmi.data.dataset.convert_to_qmi_dataset(f) + + with h5py.File("test.h5", "w", driver="core", backing_store=False) as f: + grp = f.create_group("mismatch") + grp.create_dataset("a", data=np.arange(2)) + grp.create_dataset("b", data=np.arange(3)) + with self.assertRaises(ValueError): + qmi.data.dataset.convert_to_qmi_dataset(grp) + def test_50_write_text_simple(self): """Writing a simple dataset as text.""" ds_name = "my_dataset" @@ -530,12 +899,12 @@ def test_50_write_text_simple(self): self.assertEqual(rawdata.shape, (8, 4)) self.assertTrue(np.all(rawdata == np.column_stack([scale_data, data]))) - self.assertEqual(attrs["QMI_DataSet_name"], repr(ds_name)) - self.assertEqual(attrs["QMI_DataSet_axis0_label"], repr("X axis")) - self.assertEqual(attrs["QMI_DataSet_axis0_unit"], repr("mm")) - self.assertEqual(attrs["QMI_DataSet_column1_label"], repr("red")) - self.assertEqual(attrs["QMI_DataSet_column2_label"], repr("green")) - self.assertEqual(attrs["QMI_DataSet_column2_unit"], repr("nm")) + self.assertEqual(attrs["QMI_Dataset_name"], repr(ds_name)) + self.assertEqual(attrs["QMI_Dataset_axis0_label"], repr("X axis")) + self.assertEqual(attrs["QMI_Dataset_axis0_unit"], repr("mm")) + self.assertEqual(attrs["QMI_Dataset_column1_label"], repr("red")) + self.assertEqual(attrs["QMI_Dataset_column2_label"], repr("green")) + self.assertEqual(attrs["QMI_Dataset_column2_unit"], repr("nm")) self.assertEqual(attrs["hello"], repr("world")) self.assertEqual(attrs["number"], repr(2.71)) @@ -586,7 +955,7 @@ def test_52_write_text_raises_exception_on_invalid_attrs_name(self): ds.column_unit = ["MHz", "nm", "K"] scale_data = 0.1 * np.arange(8) ds.set_axis_scale(0, scale_data) - ds.attrs["QMI_DataSet"] = "Hello" # <-- Error + ds.attrs["QMI_Dataset"] = "Hello" # <-- Error ds.attrs["number"] = 2.71 with self.assertRaises(ValueError), io.StringIO() as f: @@ -669,6 +1038,170 @@ def test_60_parse_repr(self): else: self.assertEqual(w, v) + def test_61_read_text_rejects_invalid_marker(self): + """Reading text data rejects files without a QMI dataset marker.""" + with self.assertRaises(ValueError) as exc, io.StringIO("# Not_QMI_DataSet\n#\n") as f: + qmi.data.dataset.read_dataset_from_text(f) + + self.assertIn("expecting marker", str(exc.exception)) + + def test_61b_parse_attribute_value_edge_cases(self): + """Parsing text attributes handles booleans, octal escapes and invalid syntax.""" + self.assertTrue(qmi.data.dataset._parse_attribute_value("True")) + self.assertFalse(qmi.data.dataset._parse_attribute_value("False")) + self.assertEqual("S", qmi.data.dataset._parse_attribute_value("'\\123'")) + + for invalid_value in ["x'abc'", "'abc", "'a'b'"]: + with self.assertRaises(ValueError): + qmi.data.dataset._parse_attribute_value(invalid_value) + + def test_61c_read_text_rejects_invalid_separator(self): + """Reading text data rejects an invalid separator after the marker.""" + with self.assertRaises(ValueError) as exc, io.StringIO("# QMI_DataSet\n# not empty\n") as f: + qmi.data.dataset.read_dataset_from_text(f) + + self.assertIn("expecting separator", str(exc.exception)) + + def test_61d_read_text_rejects_invalid_attribute_lines(self): + """Reading text data rejects malformed attribute lines.""" + texts = [ + "# QMI_DataSet\n#\nnot an attribute\n", + "# QMI_DataSet\n#\n# : 1\n", + ] + for text in texts: + with self.assertRaises(ValueError), io.StringIO(text) as f: + qmi.data.dataset.read_dataset_from_text(f) + + def test_61e_read_text_rejects_missing_dataset_name(self): + """Reading text data rejects files without a string dataset name.""" + text = "\n".join([ + "# QMI_DataSet", + "#", + "# QMI_Dataset_name: 123", + "# QMI_Dataset_timestamp: 1.0", + "# QMI_Dataset_data_ndim: 1", + "# QMI_Dataset_ncol: 1", + "# QMI_Dataset_dim0_size: 1", + "#", + "1.0", + ]) + + with self.assertRaises(ValueError) as exc, io.StringIO(text) as f: + qmi.data.dataset.read_dataset_from_text(f) + + self.assertIn("Missing required attribute", str(exc.exception)) + + def test_61f_read_text_rejects_too_few_columns(self): + """Reading text data rejects files with fewer data columns than metadata declares.""" + text = "\n".join([ + "# QMI_DataSet", + "#", + "# QMI_Dataset_name: 'few_columns'", + "# QMI_Dataset_timestamp: 1.0", + "# QMI_Dataset_data_ndim: 1", + "# QMI_Dataset_ncol: 2", + "# QMI_Dataset_dim0_size: 1", + "#", + "1.0", + ]) + + with self.assertRaises(ValueError) as exc, io.StringIO(text) as f: + qmi.data.dataset.read_dataset_from_text(f) + + self.assertIn("Expecting at least 2 columns", str(exc.exception)) + + def test_61g_read_text_rejects_wrong_number_of_rows(self): + """Reading text data rejects files with the wrong number of rows.""" + text = "\n".join([ + "# QMI_DataSet", + "#", + "# QMI_Dataset_name: 'wrong_rows'", + "# QMI_Dataset_timestamp: 1.0", + "# QMI_Dataset_data_ndim: 2", + "# QMI_Dataset_ncol: 1", + "# QMI_Dataset_dim0_size: 3", + "# QMI_Dataset_dim1_size: 1", + "#", + "1.0", + "2.0", + ]) + + with self.assertRaises(ValueError) as exc, io.StringIO(text) as f: + qmi.data.dataset.read_dataset_from_text(f) + + self.assertIn("Expecting 3 rows", str(exc.exception)) + + def test_61h_read_text_rejects_missing_special_column_label(self): + """Reading text data rejects special columns without a label.""" + text = "\n".join([ + "# QMI_DataSet", + "#", + "# QMI_Dataset_name: 'missing_special_label'", + "# QMI_Dataset_timestamp: 1.0", + "# QMI_Dataset_data_ndim: 2", + "# QMI_Dataset_n_axes: 1", + "# QMI_Dataset_ncol: 1", + "# QMI_Dataset_dim0_size: 2", + "# QMI_Dataset_dim1_size: 1", + "#", + "0.0 1.0", + "1.0 2.0", + ]) + + with self.assertRaises(ValueError) as exc, io.StringIO(text) as f: + qmi.data.dataset.read_dataset_from_text(f) + + self.assertIn("Missing label for special column", str(exc.exception)) + + def test_62_read_text_rejects_inconsistent_axis_index_data(self): + """Reading text data rejects inconsistent special axis index columns.""" + ds = DataSet("indexed_dataset", data=np.arange(12, dtype=np.float64).reshape(2, 3, 2)) + ds.set_axis_label(0, "x") + ds.set_axis_label(1, "y") + + with io.StringIO() as f: + qmi.data.dataset.write_dataset_to_text(ds, f) + lines = f.getvalue().splitlines() + + for line_index, line in enumerate(lines): + if line and not line.startswith("#"): + words = line.split() + words[0] = "99" + lines[line_index] = " ".join(words) + break + + with self.assertRaises(ValueError) as exc, io.StringIO("\n".join(lines)) as f: + qmi.data.dataset.read_dataset_from_text(f) + + self.assertIn("Inconsistent index data", str(exc.exception)) + + def test_63_read_text_rejects_inconsistent_axis_scale_data(self): + """Reading text data rejects inconsistent special axis scale columns.""" + ds = DataSet("scaled_dataset", data=np.arange(6, dtype=np.float64).reshape(2, 3, 1)) + ds.set_axis_label(0, "x") + ds.set_axis_label(1, "y") + ds.set_axis_scale(0, np.array([1.0, 2.0])) + + with io.StringIO() as f: + qmi.data.dataset.write_dataset_to_text(ds, f) + lines = f.getvalue().splitlines() + + data_line_count = 0 + for line_index, line in enumerate(lines): + if line and not line.startswith("#"): + data_line_count += 1 + if data_line_count == 1: + continue + words = line.split() + words[2] = "99" + lines[line_index] = " ".join(words) + break + + with self.assertRaises(ValueError) as exc, io.StringIO("\n".join(lines)) as f: + qmi.data.dataset.read_dataset_from_text(f) + + self.assertIn("Inconsistent scale data", str(exc.exception)) + if __name__ == "__main__": unittest.main() diff --git a/tests/data/test_datastore.py b/tests/data/test_datastore.py index 63a7508e..9df18ae2 100644 --- a/tests/data/test_datastore.py +++ b/tests/data/test_datastore.py @@ -3,6 +3,7 @@ """Test datastore module.""" import unittest +import unittest.mock import os import inspect @@ -138,7 +139,30 @@ def test_05_write_dataset_wrong_file_format_raises_exception(self): def test_06_read_dataset_in_hdf5(self): """See that we can read in a data set in HDF5 format.""" # Arrange - expected_dataset = _create_dataset() + expected_timestamp = 1776671773.6601706 + # expected_time_str = "2026-04-20T07:56:13" + with unittest.mock.patch("qmi.data.dataset.time") as time_patch: + time_patch.time = unittest.mock.Mock(return_value=expected_timestamp) + expected_dataset = _create_dataset() + + # ds_name = expected_dataset.name + # expected_attrs = expected_dataset.attrs.copy() + # expected_attrs.update( + # { + # f"{ds_name}_axis0_label": "X", + # f"{ds_name}_axis0_unit": "um", + # f"{ds_name}_axis1_label": "Z", + # f"{ds_name}_axis1_unit": "mm", + # f"{ds_name}_column0_label": "power", + # f"{ds_name}_column0_unit": "mW", + # f"{ds_name}_column1_label": "countrate", + # f"{ds_name}_column1_unit": "kHz", + # f"{ds_name}_column2_label": "temperature", + # f"{ds_name}_column2_unit": "K", + # f"{ds_name}_time_str": expected_time_str, + # f"{ds_name}_timestamp": expected_timestamp, + # } + # ) expected_file = os.path.join(os.getcwd(), expected_dataset.name + ".hdf5") try: self.datafolder.write_dataset(expected_dataset) @@ -163,7 +187,12 @@ def test_06_read_dataset_in_hdf5(self): def test_06b_read_dataset_in_hdf5_h5netcdf_backend(self): """See that we can read in a data set in HDF5 format with h5netcdf backend.""" # Arrange - expected_dataset = _create_dataset() + expected_timestamp = 1776671773.6601706 + # expected_time_str = "2026-04-20T07:56:13" + with unittest.mock.patch("qmi.data.dataset.time") as time_patch: + time_patch.time = unittest.mock.Mock(return_value=expected_timestamp) + expected_dataset = _create_dataset() + expected_file = os.path.join(os.getcwd(), expected_dataset.name + ".hdf5") try: self.datafolder.write_dataset(expected_dataset, backend="h5netcdf") @@ -220,8 +249,8 @@ def test_08_read_dataset_wrong_file_name_raises_exception(self): def test_09_make_hdf5_file(self): """Make a hdf5 file.""" # Arrange - name = "expected" - expected_file = os.path.join(os.getcwd(), name + ".hdf5") + name = "expected" + ".hdf5" + expected_file = os.path.join(os.getcwd(), name) # Act try: with self.datafolder.make_hdf5file(name) as hdf5_file: @@ -234,8 +263,8 @@ def test_09_make_hdf5_file(self): def test_09b_make_hdf5_file_h5netcdf_backend(self): """Make a hdf5 file with h5netcdf backend.""" # Arrange - name = "expected" - expected_file = os.path.join(os.getcwd(), name + ".hdf5") + name = "expected" + ".hdf5" + expected_file = os.path.join(os.getcwd(), name) # Act try: with self.datafolder.make_hdf5file(name, backend="h5netcdf") as hdf5_file: @@ -256,8 +285,8 @@ def test_10_make_hdf5_file_raises_exception_with_non_latin_characters(self): def test_10b_make_hdf5_file_raises_exception_if_h5_file_exists(self): """Making a HDF5 file fails when a matching .h5 file already exists.""" # Arrange - name = "existing_h5" - existing_file = os.path.join(os.getcwd(), name + ".h5") + name = "existing_h5" + ".h5" + existing_file = os.path.join(os.getcwd(), name) try: with File(existing_file, "x"): pass @@ -272,8 +301,8 @@ def test_10b_make_hdf5_file_raises_exception_if_h5_file_exists(self): def test_10c_make_hdf5_file_raises_exception_if_hdf5_file_exists(self): """Making a HDF5 file fails when a matching .hdf5 file already exists.""" # Arrange - name = "existing_hdf5" - existing_file = os.path.join(os.getcwd(), name + ".hdf5") + name = "existing_hdf5" + ".hdf5" + existing_file = os.path.join(os.getcwd(), name) try: with File(existing_file, "x"): pass @@ -288,8 +317,8 @@ def test_10c_make_hdf5_file_raises_exception_if_hdf5_file_exists(self): def test_11_open_hdf5_file(self): """Open a hdf5 file.""" # Arrange - name = "expected" - expected_file = os.path.join(os.getcwd(), name + ".hdf5") + name = "expected" + ".hdf5" + expected_file = os.path.join(os.getcwd(), name) # Act and Assert try: with self.datafolder.make_hdf5file(name): @@ -304,8 +333,8 @@ def test_11_open_hdf5_file(self): def test_11b_open_hdf5_file_h5netcdf_backend(self): """Open a hdf5 file with h5netcdf backend.""" # Arrange - name = "expected" - expected_file = os.path.join(os.getcwd(), name + ".hdf5") + name = "expected" + ".hdf5" + expected_file = os.path.join(os.getcwd(), name) # Act and Assert try: with self.datafolder.make_hdf5file(name, backend="h5netcdf"): @@ -328,7 +357,7 @@ def test_12_open_hdf5_file_raises_exception_with_non_latin_characters(self): def test_12b_open_hdf5_file_raises_exception_if_file_does_not_exist(self): """Opening a HDF5 file fails when no matching file exists.""" # Arrange - name = "missing_file" + name = "missing_file.h5" # Act and Assert with self.assertRaises(FileNotFoundError): self.datafolder.open_hdf5file(name) @@ -336,8 +365,8 @@ def test_12b_open_hdf5_file_raises_exception_if_file_does_not_exist(self): def test_12c_open_hdf5_file_finds_h5_extension(self): """Opening a HDF5 file also resolves files stored with the .h5 extension.""" # Arrange - name = "expected_h5" - expected_file = os.path.join(os.getcwd(), name + ".h5") + name = "expected_h5" + ".h5" + expected_file = os.path.join(os.getcwd(), name) try: with File(expected_file, "x"): pass @@ -378,6 +407,168 @@ def test_14_write_dataset_again(self): finally: os.remove(expected_file) + def test_15_make_hdf5_file_wrong_extension_raises_exception(self): + """Making a HDF5 file fails with an unsupported file extension.""" + with self.assertRaises(qmi.core.exceptions.QMI_UsageException): + self.datafolder.make_hdf5file("wrong_extension.txt") + + def test_16_make_hdf5_file_invalid_backend_raises_exception(self): + """Making a HDF5 file fails with an unsupported backend.""" + with self.assertRaises(ValueError): + self.datafolder.make_hdf5file("invalid_backend.hdf5", backend="boh") + + def test_17_open_hdf5_file_in_write_mode(self): + """Open an existing hdf5 file in read/write mode.""" + # Arrange + name = "write_mode" + ".hdf5" + expected_file = os.path.join(os.getcwd(), name) + # Act and Assert + try: + with self.datafolder.make_hdf5file(name): + self.assertTrue(os.path.isfile(expected_file)) + + with self.datafolder.open_hdf5file(name, write_mode=True) as hdf5_file: + hdf5_file.attrs["extra"] = "value" + + with self.datafolder.open_hdf5file(name) as hdf5_file: + self.assertEqual("value", hdf5_file.attrs["extra"]) + + finally: + os.remove(expected_file) + + def test_18_read_dataset_with_path_name_raises_exception(self): + """Reading a dataset rejects names that contain a path component.""" + with self.assertRaises(ValueError): + self.datafolder.read_dataset(os.path.join("subfolder", "dataset")) + + def test_19_add_dataset_to_hdf5_file(self): + """Add a dataset and root attributes to an existing HDF5 file.""" + # Arrange + name = "combined" + ".hdf5" + expected_file = os.path.join(os.getcwd(), name) + dataset = _create_dataset() + root_attrs = {"operator": "QMI", "run": 7} + # Act and Assert + try: + with self.datafolder.make_hdf5file(name) as hdf5_file: + self.datafolder.add_dataset_to_file(hdf5_file, dataset, root_attrs=root_attrs) + self.assertIn(dataset.name, hdf5_file) + self.assertEqual(dataset.name, hdf5_file.attrs["QMI_Dataset_name_0"]) + self.assertEqual("QMI", hdf5_file.attrs["operator"]) + self.assertEqual(7, hdf5_file.attrs["run"]) + + with self.datafolder.open_hdf5file(name) as hdf5_file: + read_dataset = qmi.data.dataset.read_dataset_from_hdf5(hdf5_file[dataset.name]) + + self.assertEqual(dataset.name, read_dataset.name) + self.assertListEqual(dataset.axis_label, read_dataset.axis_label) + self.assertListEqual(dataset.column_label, read_dataset.column_label) + self.assertEqual(dataset.data.shape, read_dataset.data.shape) + + finally: + os.remove(expected_file) + + def test_20_repr(self): + """DataFolder repr includes its folder path.""" + self.assertEqual("DataFolder({!r})".format(os.getcwd()), repr(self.datafolder)) + + def test_21_create_config_json_file_with_full_name(self): + """Create a JSON file with label, date and time in the file name.""" + datafolder = DataFolder(os.getcwd(), "test", "20200102", "030405") + expected_file = os.path.join(os.getcwd(), "test-20200102-030405.json") + try: + datafolder.write_config(CfgLogging()) + self.assertTrue(os.path.isfile(expected_file)) + + finally: + os.remove(expected_file) + + def test_22_write_dataset_invalid_name_raises_exception(self): + """Writing a dataset with an invalid name raises an exception.""" + dataset = _create_dataset() + dataset.name = "wrong name" + + with self.assertRaises(ValueError): + self.datafolder.write_dataset(dataset) + + def test_23_write_dataset_invalid_backend_raises_exception(self): + """Writing a HDF5 dataset with an invalid backend raises an exception.""" + dataset = _create_dataset() + + with self.assertRaises(ValueError): + self.datafolder.write_dataset(dataset, backend="boh") + + def test_24_read_hdf5_file_without_named_dataset_raises_exception(self): + """Reading HDF5 files rejects files that do not contain the named dataset.""" + name = "missing_in_file" + expected_file = os.path.join(os.getcwd(), name + ".hdf5") + try: + with File(expected_file, "x") as hdf5_file: + hdf5_file.create_group("other") + + with self.assertRaises(FileNotFoundError): + self.datafolder.read_dataset(name) + + finally: + os.remove(expected_file) + + def test_25_read_hdf5_file_without_named_dataset_h5netcdf_raises_exception(self): + """Reading h5netcdf files rejects files that do not contain the named dataset.""" + name = "missing_in_netcdf_file" + expected_file = os.path.join(os.getcwd(), name + ".hdf5") + try: + with NetCdfFile(expected_file, "x", decode_vlen_strings=False) as hdf5_file: + hdf5_file.create_group("other") + + with self.assertRaises(FileNotFoundError): + self.datafolder.read_dataset(name, backend="h5netcdf") + + finally: + os.remove(expected_file) + + def test_26_read_dataset_invalid_backend_raises_exception(self): + """Reading an existing HDF5 dataset with an invalid backend raises an exception.""" + dataset = _create_dataset() + expected_file = os.path.join(os.getcwd(), dataset.name + ".hdf5") + try: + self.datafolder.write_dataset(dataset) + + with self.assertRaises(ValueError): + self.datafolder.read_dataset(dataset.name, backend="boh") + + finally: + os.remove(expected_file) + + def test_27_add_dataset_to_file_duplicate_name_raises_exception(self): + """Adding a dataset to a file fails if a matching group already exists.""" + name = "duplicate_add" + ".hdf5" + expected_file = os.path.join(os.getcwd(), name) + dataset = _create_dataset() + try: + with self.datafolder.make_hdf5file(name) as hdf5_file: + hdf5_file.create_group(dataset.name) + + with self.assertRaises(qmi.core.exceptions.QMI_UsageException): + self.datafolder.add_dataset_to_file(hdf5_file, dataset) + + finally: + os.remove(expected_file) + + def test_28_add_dataset_to_file_uses_next_dataset_counter(self): + """Adding a dataset uses the next free QMI dataset counter attribute.""" + name = "counter_add" + ".hdf5" + expected_file = os.path.join(os.getcwd(), name) + dataset = _create_dataset() + try: + with self.datafolder.make_hdf5file(name) as hdf5_file: + hdf5_file.attrs["QMI_Dataset_name_0"] = "existing" + self.datafolder.add_dataset_to_file(hdf5_file, dataset) + + self.assertEqual(dataset.name, hdf5_file.attrs["QMI_Dataset_name_1"]) + + finally: + os.remove(expected_file) + class TestDataFolderNoLabel(unittest.TestCase): @@ -727,6 +918,130 @@ def test_15_find_latest_with_specific_date(self): time_folder = os.path.join(day_folder, "{}_{}".format(time_str, name)) os.removedirs(time_folder) + def test_16_get_folder_from_relative_datastore_path(self): + """Get a datastore folder from a path relative to the datastore base directory.""" + # Arrange + name = "temp" + date_str = "20200102" + time_str = "030405" + relative_path = os.path.join(date_str, "{}_{}".format(time_str, name)) + expected_folder = os.path.join(os.getcwd(), relative_path) + self.datastore.make_folder(name, date_str=date_str, time_str=time_str) + try: + # Act + datafolder = self.datastore.get_folder_from_path(relative_path) + # Assert + self.assertEqual(type(datafolder), DataFolder) + self.assertEqual(os.path.abspath(expected_folder), os.path.abspath(datafolder.folder_path)) + + finally: + os.removedirs(expected_folder) + + def test_17_list_folders_without_label_returns_all_labels(self): + """List data folders across labels when no label filter is given.""" + # Arrange + date_str = "20200103" + time_str = "040506" + labels = ["alpha", "beta"] + for label in labels: + self.datastore.make_folder(label, date_str=date_str, time_str=time_str) + + # Act + try: + data_folders = self.datastore.list_folders() + folder_names = [os.path.split(data_folder.folder_path)[-1] for data_folder in data_folders] + # Assert + for label in labels: + self.assertIn("{}_{}".format(time_str, label), folder_names) + + finally: + for label in labels: + day_folder = os.path.join(os.getcwd(), date_str) + time_folder = os.path.join(day_folder, "{}_{}".format(time_str, label)) + os.removedirs(time_folder) + + def test_18_find_latest_folder_returns_none_without_match(self): + """Finding the latest folder returns None if no matching folder exists.""" + self.assertIsNone(self.datastore.find_latest_folder("missing_label")) + + def test_19_repr(self): + """DataStore repr includes its base directory.""" + self.assertEqual("DataStore({!r})".format(os.getcwd()), repr(self.datastore)) + + def test_20_create_datastore_with_missing_base_directory_raises_exception(self): + """Creating a datastore fails if the base directory does not exist.""" + missing_basedir = os.path.join(os.getcwd(), "missing_datastore_base") + + with self.assertRaises(FileNotFoundError): + DataStore(missing_basedir) + + def test_21_make_folder_with_timestamp_and_date_time_raises_exception(self): + """Making a folder rejects timestamp together with date and time strings.""" + with self.assertRaises(ValueError): + self.datastore.make_folder("temp", timestamp=1.0, date_str="20200102", time_str="030405") + + def test_22_make_folder_uses_utc_when_local_time_is_disabled(self): + """Make a datastore folder using UTC time when configured.""" + name = "utc" + timestamp = 0.0 + date_str = "19700101" + time_str = "000000" + expected_folder = os.path.join(os.getcwd(), date_str, "{}_{}".format(time_str, name)) + old_use_local_time = DataStore.USE_LOCAL_TIME + DataStore.USE_LOCAL_TIME = False + try: + datafolder = self.datastore.make_folder(name, timestamp=timestamp) + + self.assertEqual(type(datafolder), DataFolder) + self.assertTrue(os.path.isdir(expected_folder)) + + finally: + DataStore.USE_LOCAL_TIME = old_use_local_time + os.removedirs(expected_folder) + + def test_23_get_folder_from_path_relative_to_base_directory(self): + """Get a folder from a relative path that is resolved against the datastore base directory.""" + base_dir = os.path.join(os.getcwd(), "tmp_datastore_base") + name = "temp" + date_str = "20200104" + time_str = "050607" + relative_path = os.path.join(date_str, "{}_{}".format(time_str, name)) + expected_folder = os.path.join(base_dir, relative_path) + os.mkdir(base_dir) + datastore = DataStore(base_dir) + try: + datastore.make_folder(name, date_str=date_str, time_str=time_str) + + datafolder = datastore.get_folder_from_path(relative_path) + + self.assertEqual(os.path.abspath(expected_folder), os.path.abspath(datafolder.folder_path)) + + finally: + os.removedirs(expected_folder) + + def test_24_list_folders_ignores_non_matching_entries(self): + """Listing folders ignores non-date directories, files and malformed folder names.""" + date_str = "20200105" + name = "wanted" + time_str = "060708" + day_folder = os.path.join(os.getcwd(), date_str) + wanted_folder = os.path.join(day_folder, "{}_{}".format(time_str, name)) + malformed_folder = os.path.join(day_folder, "not_a_measurement") + non_date_folder = os.path.join(os.getcwd(), "notadate") + self.datastore.make_folder(name, date_str=date_str, time_str=time_str) + os.mkdir(malformed_folder) + os.mkdir(non_date_folder) + try: + data_folders = self.datastore.list_folders(name) + + self.assertEqual(1, len(data_folders)) + self.assertEqual(wanted_folder, data_folders[0].folder_path) + + finally: + os.rmdir(malformed_folder) + os.rmdir(non_date_folder) + os.removedirs(wanted_folder) + if __name__ == "__main__": unittest.main()