diff --git a/lectern/pcgl/pcgl_analysis.json b/lectern/pcgl/pcgl_analysis.json new file mode 100644 index 0000000..efd9dd0 --- /dev/null +++ b/lectern/pcgl/pcgl_analysis.json @@ -0,0 +1,574 @@ +{ + "name": "pcgl_analysis", + "description" : "Analysis schema summarizing PCGL's analysis entities managed by SONG", + "version" : "1.0", + "schemas": [ + { + "name": "analysis", + "fields": [ + { + "name": "studyId", + "description": "Unique identifier of the study.", + "valueType": "string", + "restrictions": { + "required": true + }, + "meta": { + "mapping": { + "CQDG": "study_id" + }, + "displayName": "studyid", + "notes": "Added as a redundancy measure to prevent submission errors. Clinical API will check study_id against user's selection" + } + }, + { + "name": "submitter_analysis_id", + "description": "Unique identifier of the analysis within the study, assigned by the data provider.", + "valueType": "string", + "unique": true, + "restrictions": { + "required": true, + "regex": "^[A-Za-z0-9\\-\\._]{1,64}$" + }, + "meta": { + "mapping": { + "Beacon V2": "analyses.id" + }, + "displayName": "submitter_analysis_id" + } + }, + { + "name": "analysisType", + "description": "Overall type of the analysis. Term chosen from a controlled vocabulary (CV) list.", + "valueType": "string", + "restrictions": { + "required": true, + "codeList": [ + "sequenceExperiment", + "sequenceAlignment", + "variantCall", + "variantAnnotation" + ] + }, + "meta": { + "source": [ + "ARGO", + "EGA" + ], + "displayName": "analysistype" + } + }, + { + "name": "submitter_participant_id", + "description": "List of unique identifier of the participants within the same analysis. Participants must be registered as part of base schema in PCGL.", + "valueType": "string", + "restrictions": { + "regex": "^[A-Za-z0-9\\-\\._]{1,64}$" + }, + "meta": { + "displayName": "submitter_participant_id" + }, + "isArray" : true + }, + { + "name": "submitter_specimen_id", + "description": "List of unique identifier of the specimen within the study, assigned by the data provider. Specimens must be registered as part of base schema in PCGL.", + "valueType": "string", + "meta": { + "source": [ + "MOH", + "ARGO" + ], + "mapping": { + "FHIR/mCODE": "mCODE STU1: Specimen.Identifier", + "Phenopacket": "biosample.id", + "Beacon V2": "biosample.id" + }, + "displayName": "submitter_specimen_id" + }, + "restrictions": { + "regex": "^[A-Za-z0-9\\-\\._]{1,64}$" + }, + "isArray" : true + }, + { + "name": "submitter_sample_id", + "description": "List of unique identifiers for samples within the same analysis. Samples must be registered as part of base schema in PCGL.", + "valueType": "string", + "meta": { + "displayName": "submitter_sample_id" + }, + "restrictions": { + "regex": "^[A-Za-z0-9\\-\\._]{1,64}$" + }, + "isArray" : true + }, + { + "name": "submitter_experiment_id", + "description": "List of unique identifier of the experiments within the same analysis. Experiments must be registered as part of base schema in PCGL.", + "valueType": "string", + "meta": { + "displayName": "submitter_experiment_id", + "notes": "Required if analysisType is sequenceExperiment, sequenceAlignment, or variantCall" + }, + "restrictions": { + "if": { + "conditions": [ + { + "fields": [ + "analysisType" + ], + "match": { + "codeList": [ + "sequenceExperiment", + "sequenceAlignment", + "variantCall" + ] + }, + "case": "any" + } + ] + }, + "then": { + "required": true, + "regex": "^[A-Za-z0-9\\-\\._]{1,64}$" + }, + "else": { + "required": false, + "regex": "^[A-Za-z0-9\\-\\._]{1,64}$" + } + }, + "isArray" : true + }, + { + "name": "data_category", + "description": "Indicate the category of data produced as outputs from the analysis.", + "valueType": "string", + "restrictions": { + "required": true, + "codeList": [ + "Genomics", + "Transcriptomics", + "Proteomics", + "Epigenomics", + "Imaging", + "Reference" + ] + }, + "meta": { + "displayName": "data_category" + } + }, + { + "name": "variant_class", + "description": "Indicates whether the called variants are germline, somatic, or of uncertain classification.", + "valueType": "string", + "restrictions": { + "if": { + "conditions": [ + { + "fields": [ + "analysisType" + ], + "match": { + "value": "variantCall" + }, + "case": "any" + } + ] + }, + "then": { + "required": true, + "codeList": [ + "Germline", + "Somatic", + "Unknown" + ] + }, + "else": { + "required": false, + "codeList": [ + "Germline", + "Somatic", + "Unknown" + ] + } + }, + "meta": { + "displayName": "variant_class", + "notes": "Required if analysisType is variantCall" + } + }, + { + "name": "variant_calling_strategy", + "description": "Indicate the approach used to identify genetic variants from sequencing data, based on the availability of reference samples (e.g., matched normal) and the study design.", + "valueType": "string", + "restrictions": { + "if": { + "conditions": [ + { + "fields": [ + "analysisType" + ], + "match": { + "value": "variantCall" + }, + "case": "any" + } + ] + }, + "then": { + "required": true, + "codeList": [ + "Tumour only", + "Tumour normal", + "Single sample", + "Family based", + "RNA based", + "Joint calling" + ] + }, + "else": { + "required": false, + "codeList": [ + "Tumour only", + "Tumour normal", + "Single sample", + "Family based", + "RNA based", + "Joint calling" + ] + } + }, + "meta": { + "displayName": "variant_calling_strategy", + "notes": "Required if analysisType is variantCall" + } + }, + { + "name": "genome_build", + "description": "The reference genome build that is used in the workflow.", + "valueType": "string", + "restrictions": { + "if": { + "conditions": [ + { + "fields": [ + "analysisType" + ], + "match": { + "codeList": [ + "sequenceAlignment", + "variantCall" + ] + }, + "case": "any" + } + ] + }, + "then": { + "required": true + } + }, + "meta": { + "displayName": "genome_build", + "notes": "Required if analysisType is sequenceAlignment or variantCall" + } + }, + { + "name": "genome_annotation", + "description": "Indicates which genome annotation build or version was used in the analysis (e.g., Ensembl release, RefSeq release, etc.).", + "valueType": "string", + "restrictions": { + "regex": "^(?:Ensembl \\d{1,5}|RefSeq \\d{1,5}|Gencode v\\d{1,5})$" + }, + "meta": { + "displayName": "genome_annotation" + } + } + ], + "meta": { + "reference": "EGA", + "status": "Prod V1.0" + }, + "description": "This entity is intended to contain metadata about a detailed examination of data (mainly data processing protocols) in order to come to some conclusion. It can be of different types (e.g. sequence variation, sequence alignment, phenotype characterization, gene expression, etc.) that will mainly differ in the protocols used to achieve the processed data of the analysis." + }, + { + "name": "file", + "fields": [ + { + "name": "submitter_analysis_id", + "description": "Unique identifier of the analysis within the study, assigned by the data provider.", + "valueType": "string", + "restrictions": { + "required": true, + "regex": "^[A-Za-z0-9\\-\\._]{1,64}$" + }, + "meta": { + "mapping": { + "Beacon V2": "analyses.id" + }, + "displayName": "submitter_analysis_id" + } + }, + { + "name": "fileName", + "description": "Name of the file.", + "valueType": "string", + "restrictions": { + "required": true, + "regex": "^[^\\\\/:\\*\\?\"<>\\|]+$" + }, + "meta": { + "displayName": "filename", + "notes": "No paths are allowed in the file name." + } + }, + { + "name": "fileSize", + "description": "Size of the file, in bytes.", + "valueType": "integer", + "restrictions": { + "required": true, + "range": { + "min": 0 + } + }, + "meta": { + "displayName": "filesize", + "notes": "Integer with a minimum value of 0" + } + }, + { + "name": "fileMd5sum", + "description": "Computed md5sum of the file.", + "valueType": "string", + "restrictions": { + "required": true, + "regex": "^[a-fA-F0-9]{32}$" + }, + "meta": { + "displayName": "filemd5sum" + } + }, + { + "name": "fileType", + "description": "Data format of files.", + "valueType": "string", + "restrictions": { + "required": true, + "codeList": [ + "ADF", + "ASM", + "BAM", + "BAI", + "BAX.H5", + "BAS.H5", + "BCF", + "BIM", + "BGI", + "BGEN", + "CEL", + "CSI", + "CSFASTA", + "CRAM", + "CRAI", + "CSV", + "EXP", + "FAM", + "FAST5", + "FASTQ", + "FASTA", + "GEN", + "GMX", + "GMT", + "GPR", + "GRP", + "GTC", + "HAP", + "HDF5", + "HTML", + "HIC", + "IDAT", + "IDF", + "JPG", + "LOC", + "LOOM", + "MAP", + "MATLAB", + "MEX", + "MD", + "MD5", + "MTX", + "PAIR", + "PED", + "PERL", + "PNG", + "PXF", + "PY", + "qual454", + "qualsolid", + "R", + "SAM", + "SDRF", + "SH", + "SNP", + "SRA", + "SRF", + "SFF", + "SVG", + "TBI", + "TIF", + "TSV", + "TXT", + "VCF", + "XLSX", + "XML" + ] + }, + "meta": { + "source": [ + "EGA", + "ARGO" + ], + "displayName": "filetype" + } + }, + { + "name": "fileAccess", + "description": "The permission level of a file for public access.", + "valueType": "string", + "restrictions": { + "required": true + }, + "meta": { + "displayName": "fileaccess" + } + }, + { + "name": "dataType", + "description": "Indicates the type of data produced as outputs from the analysis.", + "valueType": "string", + "restrictions": { + "required": true, + "codeList": [ + "Raw Sequencing Reads", + "Aligned Reads", + "Aligned Reads Index", + "Single Nucleotide Variants (SNVs)", + "Insertions and Deletions (InDels)", + "Structural Variations (SVs)", + "Copy Number Variations (CNVs)", + "Variant Calls Index", + "Quality Control Metrics", + "Gene Fusions", + "Alternative Splicing", + "Gene Expression Quantification", + "Transcript Expression Quantification", + "Single-Cell Expression Matrices", + "Splicing Junctions", + "Differential Expression Analysis" + ] + }, + "meta": { + "displayName": "datatype" + } + } + ], + "restrictions": { + "foreignKey": [ + { + "schema": "analysis", + "mappings": [ + { + "local": "submitter_analysis_id", + "foreign": "submitter_analysis_id" + } + ] + } + ] + } + }, + { + "name": "workflow", + "fields": [ + { + "name": "submitter_workflow_id", + "description": "Unique identifier of the genomic data processing workflow used for data analysis.", + "valueType": "string", + "unique": true, + "restrictions": { + "required": true, + "regex": "^[A-Za-z0-9\\-\\._]{1,64}$" + }, + "meta": { + "mapping": { + "Beacon V2": "analyses.id" + }, + "displayName": "submitter_workflow_id" + } + }, + { + "name": "submitter_analysis_id", + "description": "Unique identifier of the analysis within the study, assigned by the data provider.", + "valueType": "string", + "restrictions": { + "required": true, + "regex": "^[A-Za-z0-9\\-\\._]{1,64}$" + }, + "meta": { + "mapping": { + "Beacon V2": "analyses.id" + }, + "displayName": "submitter_analysis_id" + } + }, + { + "name": "workflow_name", + "description": "The name of the genomic data processing workflow used for data analysis.", + "valueType": "string", + "restrictions": { + "required": true + }, + "meta": { + "displayName": "workflow_name" + } + }, + { + "name": "workflow_version", + "description": "The version of the workflow or pipeline being used.", + "valueType": "string", + "meta": { + "displayName": "workflow_version" + } + }, + { + "name": "workflow_url", + "description": "A URL linking to the workflow's repository or documentation. This could be a reference to a GitHub repository, a website where the workflow is described, or documentation on how to run the workflow.", + "valueType": "string", + "meta": { + "displayName": "workflow_url" + } + } + ], + "meta": { + "reference": "EGA", + "status": "Prod V1.0" + }, + "description": "Contains information about the analysis workflow generating the files", + "restrictions": { + "foreignKey": [ + { + "schema": "analysis", + "mappings": [ + { + "local": "submitter_analysis_id", + "foreign": "submitter_analysis_id" + } + ] + } + ] + } + } + ] +} \ No newline at end of file diff --git a/lectern/pcgl/pcgl_dac.json b/lectern/pcgl/pcgl_dac.json new file mode 100644 index 0000000..b517233 --- /dev/null +++ b/lectern/pcgl/pcgl_dac.json @@ -0,0 +1,72 @@ +{ + "name": "pcgl_dac", + "description" : "DAC schema summarizing PCGL's DAC entity managed by Clinical Submission Service", + "version" : "1.0", + "schemas": [ + { + "name": "dac", + "fields": [ + { + "name": "dac_id", + "description": "Unique identifier of the Data Access Committee (DAC).", + "valueType": "string", + "restrictions": { + "required": true + }, + "meta": { + "displayName": "dac_id" + } + }, + { + "name": "dac_name", + "description": "Full name of the DAC", + "valueType": "string", + "restrictions": { + "required": true + }, + "meta": { + "displayName": "dac_name" + } + }, + { + "name": "dac_description", + "description": "An in-depth description of the DAC, including its overall purpose, scope and responsibilities.", + "valueType": "string", + "restrictions": { + "required": true + }, + "meta": { + "displayName": "dac_description" + } + }, + { + "name": "contact_name", + "description": "The full name of the primary contact person. Please provide in the format: FirstName LastName E,g Jone Smith", + "valueType": "string", + "restrictions": { + "required": true + }, + "meta": { + "displayName": "contact_name" + } + }, + { + "name": "contact_email", + "description": "Contact email for communication", + "valueType": "string", + "restrictions": { + "required": true + }, + "meta": { + "displayName": "contact_email" + } + } + ], + "meta": { + "reference": "EGA", + "status": "Prod V1.0" + }, + "description": "A group fo individuals responsible for reviewing and approving or denying access requests to data based on established policies." + } + ] +} \ No newline at end of file diff --git a/lectern/pcgl/pcgl_lectern.json b/lectern/pcgl/pcgl_lectern.json index 2870536..e997465 100644 --- a/lectern/pcgl/pcgl_lectern.json +++ b/lectern/pcgl/pcgl_lectern.json @@ -1,6 +1,6 @@ { "name": "prod_pcgl_schema", - "description": "Base clinical schema that incorporates all PCGL base entities", + "description": "Defines the base schema of shared data elements used across all domains, including patient demographics, vital status and laboratory results", "version": "1.1", "schemas": [ { diff --git a/lectern/pcgl/pcgl_study.json b/lectern/pcgl/pcgl_study.json new file mode 100644 index 0000000..7520102 --- /dev/null +++ b/lectern/pcgl/pcgl_study.json @@ -0,0 +1,237 @@ +{ + "name": "pcgl_study", + "description" : "Study schema summarizing PCGL's study entity managed by Clinical Submission Service", + "version" : "1.0", + "schemas": [ + { + "name": "study", + "fields": [ + { + "name": "study_id", + "description": "Unique identifier of the study in PCGL.", + "valueType": "string", + "restrictions": { + "required": true + }, + "meta": { + "source": [ + "ARGO:program.program_id", + "CQDG:study.study_id" + ], + "mapping": { + "CQDG": "study_id" + }, + "displayName": "study_id" + } + }, + { + "name": "study_name", + "description": "The official name of the study", + "valueType": "string", + "restrictions": { + "required": true + }, + "meta": { + "source": [ + "ARGO:program.program_name", + "CQDG:study.name" + ], + "mapping": { + "CQDG": "name" + }, + "displayName": "study_name" + } + }, + { + "name": "study_description", + "description": "A detailed description of the study\u2019s purpose, hypothesis, and design.", + "valueType": "string", + "restrictions": { + "required": true + }, + "meta": { + "source": [ + "ARGO:program.description", + "CQDG:study.description" + ], + "mapping": { + "CQDG": "description" + }, + "displayName": "study_description" + } + }, + { + "name": "program_name", + "description": "Indicate the overarching program the study belongs to (if applicable)", + "valueType": "string", + "meta": { + "displayName": "program_name" + } + }, + { + "name": "keywords", + "description": "List of specific terms that describe the focus and content of the study. In case there are several, place each on separate line.", + "valueType": "string", + "meta": { + "displayName": "keywords" + }, + "isArray": true, + "delimiter": "," + }, + { + "name": "status", + "description": "Indicate if the study is completed or ongoing.", + "valueType": "string", + "restrictions": { + "required": true, + "codeList": [ + "Ongoing", + "Completed" + ] + }, + "meta": { + "displayName": "status" + } + }, + { + "name": "context", + "description": "Indicate if the study was conducted in a clinical setting or as part of a research project.", + "valueType": "string", + "restrictions": { + "required": true, + "codeList": [ + "Clinical", + "Research" + ] + }, + "meta": { + "displayName": "context" + } + }, + { + "name": "domain", + "description": "List of specific scientific or clinical domains addressed by the study. In case there are several, place each on separate line.", + "valueType": "string", + "isArray": true, + "restrictions": { + "required": true, + "codeList": [ + "Aging", + "Birth Defects", + "Cancer", + "Circulatory and Respiratory Health", + "General Health", + "Infection and Immunity", + "Musculoskeletal Health and Arthritis", + "Neurodevelopmental Conditions", + "Neurosciences, Mental Health and Addiction", + "Nutrition, Metabolism and Diabetes", + "Population Genomics", + "Rare Diseases", + "Other" + ] + }, + "meta": { + "displayName": "domain" + } + }, + { + "name": "dac_id", + "description": "Unique identifier of the Data Access Committee (DAC) in PCGL to which the study is assigned.", + "valueType": "string", + "restrictions": { + "required": true + }, + "meta": { + "displayName": "dac_id" + } + }, + { + "name": "participant_criteria", + "description": "Inclusion/exclusion criteria for participants (e.g., specific cancer type, age range).", + "valueType": "string", + "meta": { + "displayName": "participant_criteria" + } + }, + { + "name": "principal_investigators", + "description": "List of lead researchers responsible for the study. In case there are several, please list each investigator on a separate line in the format: FirstName LastName, Affiliation E.g, John Doe, Example Research Institute", + "valueType": "string", + "isArray": true, + "delimiter": ",", + "restrictions": { + "required": true + }, + "meta": { + "displayName": "principal_investigators", + "notes": "Make sure to use only one comma (,) to separate the name from the affiliation.\nAvoid using additional fields (like emails or roles) in this section.", + "examples": [ + "FirstName LastName, Affiliation", + "John Doe, Example Research Institute" + ] + } + }, + { + "name": "lead_organizations", + "description": "List of institutions or organizations leading the study. In case there are several, please list each lead organization on a separate line using its full official name.", + "valueType": "string", + "isArray": true, + "delimiter": ",", + "restrictions": { + "required": true + }, + "meta": { + "displayName": "lead_organizations", + "notes": "Do not include departments or individual names." + } + }, + { + "name": "collaborators", + "description": "List of researchers, institutions or companies involved in the study. In case there are several, please list each collaborator on a separate line in the format: FirstName LastName (if individula) or Organization full official name, Role E.g, Biotech Inc., Industry Partner Emily John, Data Contributor", + "valueType": "string", + "meta": { + "displayName": "collaborators", + "notes": "Include both individuals and organizations as applicable.\nUse a single comma (,) to separate the name or organization from their role.\nIf the collaborator is an individual, include their full name and role.\nIf the collaborator is an organization, use the full official name.", + "examples": [ + "Biotech Inc., Industry Partner", + "Emily John, Data Contributor" + ] + }, + "isArray": true + }, + { + "name": "funding_sources", + "description": "List of organizations or agencies funding the study. In case there are several, please list each funding source on a separate line in the format: Funder name, Grant number E.g, National Example Foundation, NEF-12345 Example Health Research Council", + "valueType": "string", + "isArray": true, + "restrictions": { + "required": true + }, + "meta": { + "displayName": "funding_sources", + "notes": "If no grant number is available, only include the funder name.\nDo not include currency amounts or dates.\nUse the full name of the funding organization.", + "examples": [ + "National Example Foundation, NEF-12345", + "Example Health Research Council" + ] + } + }, + { + "name": "publication_links", + "description": "List of URL links to academic papers or reports associated with the study. In case there are several, please list each publication on a separate line by providing a DOI URL formatted as https://doi.org/ followed by the DOI number.", + "valueType": "string", + "meta": { + "displayName": "publication_links" + }, + "isArray": true, + "delimiter": "," + } + ], + "meta": { + "status": "Prod V1.0" + }, + "description": "Study refers to a specific research project conducted as part of a larger program. It involves data collection, analysis, and interpretation related to the program's goals. Each study is focused on particular research questions related to set of objectives." + } + ] +} \ No newline at end of file diff --git a/song_schema/json-schema/dynamic/sequenceAlignment.json b/song_schema/json-schema/dynamic/sequenceAlignment.json index e5033ad..653eb35 100644 --- a/song_schema/json-schema/dynamic/sequenceAlignment.json +++ b/song_schema/json-schema/dynamic/sequenceAlignment.json @@ -9,7 +9,7 @@ ], "externalValidations": [ { - "url": "https://submission.pcgl-dev.cumulus.genomeinformatics.org/health", + "url": "http://submission.submission.svc.cluster.local/validator/entity/experiment/field/submitter_experiment_id/exists?study={study}&value={value}", "jsonPath": "submitter_experiment_id" } ] diff --git a/song_schema/json-schema/dynamic/sequenceExperiment.json b/song_schema/json-schema/dynamic/sequenceExperiment.json index 1afe4f4..9dedbd5 100644 --- a/song_schema/json-schema/dynamic/sequenceExperiment.json +++ b/song_schema/json-schema/dynamic/sequenceExperiment.json @@ -7,7 +7,7 @@ ], "externalValidations": [ { - "url": "https://submission.pcgl-dev.cumulus.genomeinformatics.org/health", + "url": "http://submission.submission.svc.cluster.local/validator/entity/experiment/field/submitter_experiment_id/exists?study={study}&value={value}", "jsonPath": "submitter_experiment_id" } ] diff --git a/song_schema/json-schema/dynamic/variantCall.json b/song_schema/json-schema/dynamic/variantCall.json index d4636c4..e98a67c 100644 --- a/song_schema/json-schema/dynamic/variantCall.json +++ b/song_schema/json-schema/dynamic/variantCall.json @@ -7,7 +7,7 @@ ], "externalValidations": [ { - "url": "https://submission.pcgl-dev.cumulus.genomeinformatics.org/health", + "url": "http://submission.submission.svc.cluster.local/validator/entity/experiment/field/submitter_experiment_id/exists?study={study}&value={value}", "jsonPath": "submitter_experiment_id" } ]