From b201010313ed70470377459b5460df89f100b4da Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=9Eevvval=20Ulus?= <85807807+sevvaluluss@users.noreply.github.com> Date: Thu, 2 Apr 2026 10:35:03 -0500 Subject: [PATCH 1/5] Add download source and required files documentation to eICUDataset (fixes #883) --- pyhealth/datasets/eicu.py | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/pyhealth/datasets/eicu.py b/pyhealth/datasets/eicu.py index 3cf4e9166..bc439e78d 100644 --- a/pyhealth/datasets/eicu.py +++ b/pyhealth/datasets/eicu.py @@ -8,7 +8,23 @@ class eICUDataset(BaseDataset): - """ + """ + Download: https://eicu-crd.mit.edu/ + Access: Requires PhysioNet credentialed access and DUA agreement. + + Required files: + - patient.csv + - hospital.csv + - diagnosis.csv + - treatment.csv + - medication.csv + - lab.csv + - physicalExam.csv + - admissionDx.csv + + Compatible formats: .csv + Note: All files must be placed in the root directory. + A dataset class for handling eICU data. The eICU dataset is a large dataset of de-identified health records of ICU From 59e0ad9a86d3717fd1a1016b986475c29964146c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=9Eevvval=20Ulus?= <85807807+sevvaluluss@users.noreply.github.com> Date: Thu, 2 Apr 2026 10:40:09 -0500 Subject: [PATCH 2/5] Add download source and required files documentation to MIMIC3Dataset --- pyhealth/datasets/mimic3.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/pyhealth/datasets/mimic3.py b/pyhealth/datasets/mimic3.py index 7e569d2f3..d99bcd5eb 100644 --- a/pyhealth/datasets/mimic3.py +++ b/pyhealth/datasets/mimic3.py @@ -13,7 +13,20 @@ class MIMIC3Dataset(BaseDataset): """ A dataset class for handling MIMIC-III data. + Download: https://physionet.org/content/mimiciii/1.4/ + Access: Requires PhysioNet credentialed access and DUA agreement. + Required files: + - ADMISSIONS.csv + - PATIENTS.csv + - DIAGNOSES_ICD.csv + - PROCEDURES_ICD.csv + - PRESCRIPTIONS.csv + - LABEVENTS.csv + - ICUSTAYS.csv + + Compatible formats: .csv + Note: All files must be placed in the root directory. This class is responsible for loading and managing the MIMIC-III dataset, which includes tables such as patients, admissions, and icustays. From cf3f91644ffb97f15e51d8fd48737b1cab75fdce Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=9Eevvval=20Ulus?= <85807807+sevvaluluss@users.noreply.github.com> Date: Thu, 2 Apr 2026 10:42:15 -0500 Subject: [PATCH 3/5] Add download source and required files documentation to MIMIC4EHRDataset --- pyhealth/datasets/mimic4.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/pyhealth/datasets/mimic4.py b/pyhealth/datasets/mimic4.py index 9d1aa55d8..c056dec63 100644 --- a/pyhealth/datasets/mimic4.py +++ b/pyhealth/datasets/mimic4.py @@ -31,6 +31,20 @@ def log_memory_usage(tag=""): class MIMIC4EHRDataset(BaseDataset): """ MIMIC-IV EHR dataset. + Download: https://physionet.org/content/mimiciv/2.2/ + Access: Requires PhysioNet credentialed access and DUA agreement. + + Required files: + - admissions.csv + - patients.csv + - diagnoses_icd.csv + - procedures_icd.csv + - prescriptions.csv + - labevents.csv + - icustays.csv + + Compatible formats: .csv + Note: All files must be placed in the root directory. This class is responsible for loading and managing the MIMIC-IV EHR dataset, which includes tables such as patients, admissions, and icustays. From b72cbb1aceed1ef8a5e59341f19ef7355b986eaf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=9Eevvval=20Ulus?= <85807807+sevvaluluss@users.noreply.github.com> Date: Thu, 2 Apr 2026 10:44:04 -0500 Subject: [PATCH 4/5] Add download source and required files documentation to EHRShotDataset --- pyhealth/datasets/ehrshot.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/pyhealth/datasets/ehrshot.py b/pyhealth/datasets/ehrshot.py index 878295bea..e26561390 100644 --- a/pyhealth/datasets/ehrshot.py +++ b/pyhealth/datasets/ehrshot.py @@ -10,7 +10,14 @@ class EHRShotDataset(BaseDataset): """ A dataset class for handling EHRShot data. + Download: https://huggingface.co/datasets/StanfordShahLab/ehrshot + Access: Requires HuggingFace account and dataset agreement. + Required files: + - ehrshot.csv + + Compatible formats: .csv + Note: All files must be placed in the root directory. This class is responsible for loading and managing the EHRShot dataset. Website: https://som-shahlab.github.io/ehrshot-website/ From 4681adf46303f1378d6623bcae6032c120367a49 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=9Eevvval=20Ulus?= <85807807+sevvaluluss@users.noreply.github.com> Date: Thu, 2 Apr 2026 10:47:33 -0500 Subject: [PATCH 5/5] Add required files documentation to CardiologyDataset --- pyhealth/datasets/cardiology.py | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/pyhealth/datasets/cardiology.py b/pyhealth/datasets/cardiology.py index d37690b1b..2df3f02f6 100644 --- a/pyhealth/datasets/cardiology.py +++ b/pyhealth/datasets/cardiology.py @@ -10,7 +10,20 @@ class CardiologyDataset(BaseSignalDataset): """Base ECG dataset for Cardiology - + + Download: https://physionet.org/content/challenge-2020/1.0.2/ + Access: Requires PhysioNet account. + + Required files: + - cpsc_2018/ (directory) + - cpsc_2018_extra/ (directory) + - georgia/ (directory) + - ptb/ (directory) + - ptb-xl/ (directory) + - st_petersburg_incart/ (directory) + + Compatible formats: .mat, .hea + Note: All dataset directories must be placed in the root directory. Dataset is available at https://physionet.org/content/challenge-2020/1.0.2/ Args: