From e0a359f659d5a9b88ddcd7756f1d2f6cf185f8ab Mon Sep 17 00:00:00 2001 From: ali Date: Tue, 24 Feb 2026 13:58:14 -0800 Subject: [PATCH 01/13] fix: Postgres image tag --- bin/postgres17-pgroonga.dockerfile | 37 +++++++++--------------------- 1 file changed, 11 insertions(+), 26 deletions(-) diff --git a/bin/postgres17-pgroonga.dockerfile b/bin/postgres17-pgroonga.dockerfile index 04369728a68..f799c69e8ec 100644 --- a/bin/postgres17-pgroonga.dockerfile +++ b/bin/postgres17-pgroonga.dockerfile @@ -15,36 +15,21 @@ # specific language governing permissions and limitations # under the License. -FROM bitnami/postgresql:17.4.0-debian-12-r11 +FROM postgres:17-bookworm -USER root - -# Install build tools and Groonga APT repo -RUN install_packages \ - build-essential \ - git \ +# 1. Install prerequisites for adding the Groonga repository +RUN apt-get update && apt-get install -y --no-install-recommends \ wget \ - curl \ ca-certificates \ - pkg-config \ - libmecab-dev \ - mecab \ - gnupg \ - libpq-dev + && rm -rf /var/lib/apt/lists/* -# Add Groonga official APT repo +# 2. Add Groonga official APT repo and install pre-compiled PGroonga for PGDG RUN wget https://packages.groonga.org/debian/groonga-apt-source-latest-bookworm.deb && \ dpkg -i groonga-apt-source-latest-bookworm.deb && \ apt-get update && \ - apt-get install -y \ - libgroonga-dev \ - groonga-tokenizer-mecab - -# Clone PGroonga with submodules and build it using Bitnami's pg_config -RUN git clone --recursive https://github.com/pgroonga/pgroonga.git /tmp/pgroonga && \ - cd /tmp/pgroonga && \ - PG_CONFIG=/opt/bitnami/postgresql/bin/pg_config make && \ - PG_CONFIG=/opt/bitnami/postgresql/bin/pg_config make install && \ - rm -rf /tmp/pgroonga - -USER 1001 + apt-get install -y --no-install-recommends \ + postgresql-17-pgdg-pgroonga \ + groonga-tokenizer-mecab \ + mecab \ + && rm -rf /var/lib/apt/lists/* \ + && rm groonga-apt-source-latest-bookworm.deb \ No newline at end of file From 85a18f38dbb26f0690d720ffa8da1fc9ba7aa835 Mon Sep 17 00:00:00 2001 From: ali Date: Wed, 25 Feb 2026 12:23:47 -0800 Subject: [PATCH 02/13] fix: Postgres image tag and registry --- .../access-control-service-deployment.yaml | 2 +- .../templates/config-service-deployment.yaml | 2 +- .../templates/example-data-loader-job.yaml | 2 +- .../templates/file-service-deployment.yaml | 2 +- bin/k8s/templates/webserver-deployment.yaml | 2 +- ...workflow-compiling-service-deployment.yaml | 2 +- ...low-computing-unit-manager-deployment.yaml | 4 +-- ...mputing-unit-master-prepull-daemonset.yaml | 2 +- bin/k8s/values.yaml | 12 +++++-- bin/postgres17-pgroonga.dockerfile | 35 ------------------- 10 files changed, 18 insertions(+), 47 deletions(-) delete mode 100644 bin/postgres17-pgroonga.dockerfile diff --git a/bin/k8s/templates/access-control-service-deployment.yaml b/bin/k8s/templates/access-control-service-deployment.yaml index adeca0b8bbf..1d6739e0349 100644 --- a/bin/k8s/templates/access-control-service-deployment.yaml +++ b/bin/k8s/templates/access-control-service-deployment.yaml @@ -34,7 +34,7 @@ spec: spec: containers: - name: {{ .Values.accessControlService.name }} - image: {{ .Values.global.imageRegistry }}/{{ .Values.accessControlService.imageName }}:{{ .Values.global.imageTag }} + image: {{ .Values.texera.imageRegistry }}/{{ .Values.accessControlService.imageName }}:{{ .Values.texera.imageTag }} imagePullPolicy: {{ .Values.texeraImages.pullPolicy }} ports: - containerPort: {{ .Values.accessControlService.service.port }} diff --git a/bin/k8s/templates/config-service-deployment.yaml b/bin/k8s/templates/config-service-deployment.yaml index 45f081b11d7..f0748785c3a 100644 --- a/bin/k8s/templates/config-service-deployment.yaml +++ b/bin/k8s/templates/config-service-deployment.yaml @@ -34,7 +34,7 @@ spec: spec: containers: - name: {{ .Values.configService.name }} - image: {{ .Values.global.imageRegistry }}/{{ .Values.configService.imageName }}:{{ .Values.global.imageTag }} + image: {{ .Values.texera.imageRegistry }}/{{ .Values.configService.imageName }}:{{ .Values.texera.imageTag }} imagePullPolicy: {{ .Values.texeraImages.pullPolicy }} ports: - containerPort: {{ .Values.configService.service.port }} diff --git a/bin/k8s/templates/example-data-loader-job.yaml b/bin/k8s/templates/example-data-loader-job.yaml index 9c5e0491389..15759be4097 100644 --- a/bin/k8s/templates/example-data-loader-job.yaml +++ b/bin/k8s/templates/example-data-loader-job.yaml @@ -30,7 +30,7 @@ spec: restartPolicy: Never containers: - name: example-data-loader - image: {{ .Values.global.imageRegistry }}/{{ .Values.exampleDataLoader.imageName }}:{{ .Values.global.imageTag }} + image: {{ .Values.texera.imageRegistry }}/{{ .Values.exampleDataLoader.imageName }}:{{ .Values.texera.imageTag }} env: - name: TEXERA_EXAMPLE_USERNAME value: {{ .Values.exampleDataLoader.username }} diff --git a/bin/k8s/templates/file-service-deployment.yaml b/bin/k8s/templates/file-service-deployment.yaml index d7cfb7c1995..6c9a4041f98 100644 --- a/bin/k8s/templates/file-service-deployment.yaml +++ b/bin/k8s/templates/file-service-deployment.yaml @@ -34,7 +34,7 @@ spec: spec: containers: - name: {{ .Values.fileService.name }} - image: {{ .Values.global.imageRegistry }}/{{ .Values.fileService.imageName }}:{{ .Values.global.imageTag }} + image: {{ .Values.texera.imageRegistry }}/{{ .Values.fileService.imageName }}:{{ .Values.texera.imageTag }} imagePullPolicy: {{ .Values.texeraImages.pullPolicy }} ports: - containerPort: {{ .Values.fileService.service.port }} diff --git a/bin/k8s/templates/webserver-deployment.yaml b/bin/k8s/templates/webserver-deployment.yaml index 0c8656bfe8f..56642c54785 100644 --- a/bin/k8s/templates/webserver-deployment.yaml +++ b/bin/k8s/templates/webserver-deployment.yaml @@ -34,7 +34,7 @@ spec: spec: containers: - name: {{ .Values.webserver.name }} - image: {{ .Values.global.imageRegistry }}/{{ .Values.webserver.imageName }}:{{ .Values.global.imageTag }} + image: {{ .Values.texera.imageRegistry }}/{{ .Values.webserver.imageName }}:{{ .Values.texera.imageTag }} imagePullPolicy: {{ .Values.texeraImages.pullPolicy }} ports: - containerPort: {{ .Values.webserver.service.port }} diff --git a/bin/k8s/templates/workflow-compiling-service-deployment.yaml b/bin/k8s/templates/workflow-compiling-service-deployment.yaml index 4ab10e3f9bc..50a0a04e1b7 100644 --- a/bin/k8s/templates/workflow-compiling-service-deployment.yaml +++ b/bin/k8s/templates/workflow-compiling-service-deployment.yaml @@ -34,7 +34,7 @@ spec: spec: containers: - name: {{ .Values.workflowCompilingService.name }} - image: {{ .Values.global.imageRegistry }}/{{ .Values.workflowCompilingService.imageName }}:{{ .Values.global.imageTag }} + image: {{ .Values.texera.imageRegistry }}/{{ .Values.workflowCompilingService.imageName }}:{{ .Values.texera.imageTag }} imagePullPolicy: {{ .Values.texeraImages.pullPolicy }} ports: - containerPort: {{ .Values.workflowCompilingService.service.port }} diff --git a/bin/k8s/templates/workflow-computing-unit-manager-deployment.yaml b/bin/k8s/templates/workflow-computing-unit-manager-deployment.yaml index 75e5e76b30a..5241d9160a3 100644 --- a/bin/k8s/templates/workflow-computing-unit-manager-deployment.yaml +++ b/bin/k8s/templates/workflow-computing-unit-manager-deployment.yaml @@ -35,7 +35,7 @@ spec: serviceAccountName: {{ .Values.workflowComputingUnitManager.serviceAccountName }} containers: - name: {{ .Values.workflowComputingUnitManager.name }} - image: {{ .Values.global.imageRegistry }}/{{ .Values.workflowComputingUnitManager.imageName }}:{{ .Values.global.imageTag }} + image: {{ .Values.texera.imageRegistry }}/{{ .Values.workflowComputingUnitManager.imageName }}:{{ .Values.texera.imageTag }} imagePullPolicy: {{ .Values.texeraImages.pullPolicy }} ports: - containerPort: {{ .Values.workflowComputingUnitManager.service.port }} @@ -46,7 +46,7 @@ spec: - name: KUBERNETES_COMPUTE_UNIT_SERVICE_NAME value: {{ .Values.workflowComputingUnitPool.name }}-svc - name: KUBERNETES_IMAGE_NAME - value: {{ .Values.global.imageRegistry }}/{{ .Values.workflowComputingUnitPool.imageName }}:{{ .Values.global.imageTag }} + value: {{ .Values.texera.imageRegistry }}/{{ .Values.workflowComputingUnitPool.imageName }}:{{ .Values.texera.imageTag }} # TexeraDB Access - name: STORAGE_JDBC_URL value: jdbc:postgresql://{{ .Release.Name }}-postgresql:5432/texera_db?currentSchema=texera_db,public diff --git a/bin/k8s/templates/workflow-computing-unit-master-prepull-daemonset.yaml b/bin/k8s/templates/workflow-computing-unit-master-prepull-daemonset.yaml index b4a8d6634db..4edc0104a89 100644 --- a/bin/k8s/templates/workflow-computing-unit-master-prepull-daemonset.yaml +++ b/bin/k8s/templates/workflow-computing-unit-master-prepull-daemonset.yaml @@ -36,7 +36,7 @@ spec: - operator: "Exists" initContainers: - name: prepuller - image: {{ .Values.global.imageRegistry }}/{{ .Values.workflowComputingUnitPool.imageName }}:{{ .Values.global.imageTag }} + image: {{ .Values.texera.imageRegistry }}/{{ .Values.workflowComputingUnitPool.imageName }}:{{ .Values.texera.imageTag }} imagePullPolicy: Always command: ["sh", "-c", "true"] containers: diff --git a/bin/k8s/values.yaml b/bin/k8s/values.yaml index 3e612ef40f5..7558591c4dd 100644 --- a/bin/k8s/values.yaml +++ b/bin/k8s/values.yaml @@ -15,13 +15,16 @@ # specific language governing permissions and limitations # under the License. -global: +texera: # Container image registry and tag for all Texera services # Override these to use a different registry or version imageRegistry: ghcr.io/apache imageTag: latest + +global: + # Required by Bitnami sub-charts (postgresql, minio) to allow custom images security: - allowInsecureImages: true # for custom postgres image + allowInsecureImages: true # Persistence Configuration # This controls how Persistent Volumes (PVs) and Persistent Volume Claims (PVCs) are managed @@ -37,12 +40,15 @@ persistence: # Part 1: the configuration of Postgres, Minio and LakeFS postgresql: image: - repository: texera/postgres17-pgroonga + repository: groonga/pgroonga tag: latest debug: true auth: postgresPassword: root_password # for executing init script with superuser primary: + containerSecurityContext: + # Disabled because groonga/pgroonga needs to write a lock/socket file to /var/run/postgresql + readOnlyRootFilesystem: false livenessProbe: initialDelaySeconds: 30 # increase this if the launching of postgresql is slow on the cluster readinessProbe: diff --git a/bin/postgres17-pgroonga.dockerfile b/bin/postgres17-pgroonga.dockerfile deleted file mode 100644 index f799c69e8ec..00000000000 --- a/bin/postgres17-pgroonga.dockerfile +++ /dev/null @@ -1,35 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -FROM postgres:17-bookworm - -# 1. Install prerequisites for adding the Groonga repository -RUN apt-get update && apt-get install -y --no-install-recommends \ - wget \ - ca-certificates \ - && rm -rf /var/lib/apt/lists/* - -# 2. Add Groonga official APT repo and install pre-compiled PGroonga for PGDG -RUN wget https://packages.groonga.org/debian/groonga-apt-source-latest-bookworm.deb && \ - dpkg -i groonga-apt-source-latest-bookworm.deb && \ - apt-get update && \ - apt-get install -y --no-install-recommends \ - postgresql-17-pgdg-pgroonga \ - groonga-tokenizer-mecab \ - mecab \ - && rm -rf /var/lib/apt/lists/* \ - && rm groonga-apt-source-latest-bookworm.deb \ No newline at end of file From e4a844f027a05c8dc59cf7a9febf4e17e80ec664 Mon Sep 17 00:00:00 2001 From: ali Date: Tue, 3 Mar 2026 12:13:28 -0800 Subject: [PATCH 03/13] fix: header name --- bin/k8s/templates/gateway-security-policy.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bin/k8s/templates/gateway-security-policy.yaml b/bin/k8s/templates/gateway-security-policy.yaml index ad2f5684e58..4128b5f3d89 100644 --- a/bin/k8s/templates/gateway-security-policy.yaml +++ b/bin/k8s/templates/gateway-security-policy.yaml @@ -32,7 +32,7 @@ spec: port: {{ .Values.accessControlService.service.port }} path: /api/auth headersToBackend: - - x-user-cu-access + - x-user-computing-unit-access - x-user-id - x-user-name - x-user-email From b5e902481795ea8a5da94ce82c49adc48c01a2c4 Mon Sep 17 00:00:00 2001 From: ali Date: Wed, 11 Mar 2026 13:50:54 -0700 Subject: [PATCH 04/13] fix: image tags --- bin/k8s/values.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bin/k8s/values.yaml b/bin/k8s/values.yaml index 7558591c4dd..ef46dfbef43 100644 --- a/bin/k8s/values.yaml +++ b/bin/k8s/values.yaml @@ -19,7 +19,7 @@ texera: # Container image registry and tag for all Texera services # Override these to use a different registry or version imageRegistry: ghcr.io/apache - imageTag: latest + imageTag: nightly global: # Required by Bitnami sub-charts (postgresql, minio) to allow custom images From 7612505683066f37aa2bd03beddc6e83c89f864a Mon Sep 17 00:00:00 2001 From: ali Date: Thu, 12 Mar 2026 14:37:39 -0700 Subject: [PATCH 05/13] fix: use latest in CI --- .github/workflows/build-and-push-images.yml | 2 +- bin/k8s/values.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build-and-push-images.yml b/.github/workflows/build-and-push-images.yml index e70c81a2d9f..60354af8602 100644 --- a/.github/workflows/build-and-push-images.yml +++ b/.github/workflows/build-and-push-images.yml @@ -87,7 +87,7 @@ jobs: if [[ "${{ github.event_name }}" == "schedule" ]]; then echo "Nightly build detected - using nightly defaults" echo "branch=main" >> $GITHUB_OUTPUT - echo "image_tag=nightly" >> $GITHUB_OUTPUT + echo "image_tag=latest" >> $GITHUB_OUTPUT echo "docker_registry=ghcr.io/apache" >> $GITHUB_OUTPUT echo "services=*" >> $GITHUB_OUTPUT echo "platforms=both" >> $GITHUB_OUTPUT diff --git a/bin/k8s/values.yaml b/bin/k8s/values.yaml index ef46dfbef43..7558591c4dd 100644 --- a/bin/k8s/values.yaml +++ b/bin/k8s/values.yaml @@ -19,7 +19,7 @@ texera: # Container image registry and tag for all Texera services # Override these to use a different registry or version imageRegistry: ghcr.io/apache - imageTag: nightly + imageTag: latest global: # Required by Bitnami sub-charts (postgresql, minio) to allow custom images From e08712eb82dbef49aaa6c5377e04dd76b5b0848e Mon Sep 17 00:00:00 2001 From: Kun Woo Park Date: Sat, 2 May 2026 14:18:35 -0700 Subject: [PATCH 06/13] feat(cloudbiomapper): add CloudBioMapper operator for sequence alignment via public cluster services - Add CloudMapperSourceOpDesc, ReferenceGenome, ReferenceGenomeEnum operator classes - Add FileResolver.resolveDirectory for resolving dataset directories by path - Add DatasetFileDocument directory mode: downloads all files as a zip via LakeFS/FileService - Add DocumentFactory.openReadonlyDocument isDirectory parameter - Add ENV_FILE_SERVICE_LIST_DIRECTORY_OBJECTS_ENDPOINT env var - Add Kubernetes Helm chart and PVC for the cloudmapper service Co-Authored-By: Claude Sonnet 4.6 --- bin/k8s/templates/cloudmapper-pvc.yaml | 13 + bin/k8s/templates/cloudmapper.yaml | 82 ++++++ .../amber/config/EnvironmentalVariable.scala | 2 + .../amber/core/storage/DocumentFactory.scala | 7 +- .../amber/core/storage/FileResolver.scala | 90 ++++++ .../storage/model/DatasetFileDocument.scala | 258 ++++++++++++++++-- .../cloudmapper/CloudMapperSourceOpDesc.scala | 243 +++++++++++++++++ .../cloudmapper/ReferenceGenome.scala | 46 ++++ .../cloudmapper/ReferenceGenomeEnum.java | 26 ++ .../texera/amber/operator/LogicalOp.scala | 7 +- 10 files changed, 755 insertions(+), 19 deletions(-) create mode 100755 bin/k8s/templates/cloudmapper-pvc.yaml create mode 100755 bin/k8s/templates/cloudmapper.yaml create mode 100755 common/workflow-operator/src/main/scala/org/apache/amber/operator/cloudmapper/CloudMapperSourceOpDesc.scala create mode 100755 common/workflow-operator/src/main/scala/org/apache/amber/operator/cloudmapper/ReferenceGenome.scala create mode 100755 common/workflow-operator/src/main/scala/org/apache/amber/operator/cloudmapper/ReferenceGenomeEnum.java diff --git a/bin/k8s/templates/cloudmapper-pvc.yaml b/bin/k8s/templates/cloudmapper-pvc.yaml new file mode 100755 index 00000000000..c02acf83822 --- /dev/null +++ b/bin/k8s/templates/cloudmapper-pvc.yaml @@ -0,0 +1,13 @@ + +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: cloudmapper-pvc + namespace: texera +spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 10Gi + storageClassName: local-storage diff --git a/bin/k8s/templates/cloudmapper.yaml b/bin/k8s/templates/cloudmapper.yaml new file mode 100755 index 00000000000..88f2ec51b7e --- /dev/null +++ b/bin/k8s/templates/cloudmapper.yaml @@ -0,0 +1,82 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: cloudmapper + namespace: texera +spec: + replicas: 1 + selector: + matchLabels: + app: cloudmapper + template: + metadata: + labels: + app: cloudmapper + spec: + volumes: + - name: cloudmapper-file-volume + persistentVolumeClaim: + claimName: cloudmapper-pvc + - name: aws-config-volume + emptyDir: {} + - name: ssh-key-volume + secret: + secretName: ssh-key + defaultMode: 0600 # Secure SSH key permissions + + initContainers: + - name: aws-config-setup + image: ubuntu:22.04 + command: ["/bin/sh", "-c"] + args: + - | + mkdir -p /aws-config; + echo "[default]" > /aws-config/credentials; + echo "aws_access_key_id = $AWS_ACCESS_KEY_ID" >> /aws-config/credentials; + echo "aws_secret_access_key = $AWS_SECRET_ACCESS_KEY" >> /aws-config/credentials; + echo "[default]" > /aws-config/config; + echo "region = us-west-2" >> /aws-config/config; + env: + - name: AWS_ACCESS_KEY_ID + valueFrom: + secretKeyRef: + name: aws-secret + key: AWS_ACCESS_KEY_ID + - name: AWS_SECRET_ACCESS_KEY + valueFrom: + secretKeyRef: + name: aws-secret + key: AWS_SECRET_ACCESS_KEY + volumeMounts: + - name: aws-config-volume + mountPath: /aws-config + + containers: + - name: cloudmapper + image: kunwp1/cloudmapper:latest + ports: + - containerPort: 4000 + volumeMounts: + - name: cloudmapper-file-volume + mountPath: "/data" + - name: aws-config-volume + mountPath: /root/.aws + - name: ssh-key-volume + mountPath: /root/.ssh + readOnly: true + +--- +apiVersion: v1 +kind: Service +metadata: + name: cloudmapper-service + namespace: texera +spec: + selector: + app: cloudmapper + ports: + - protocol: TCP + port: 4000 + targetPort: 4000 + type: ClusterIP + diff --git a/common/config/src/main/scala/org/apache/texera/amber/config/EnvironmentalVariable.scala b/common/config/src/main/scala/org/apache/texera/amber/config/EnvironmentalVariable.scala index 9ec52bba653..396bdd3b1d5 100644 --- a/common/config/src/main/scala/org/apache/texera/amber/config/EnvironmentalVariable.scala +++ b/common/config/src/main/scala/org/apache/texera/amber/config/EnvironmentalVariable.scala @@ -38,6 +38,8 @@ object EnvironmentalVariable { val ENV_FILE_SERVICE_GET_PRESIGNED_URL_ENDPOINT = "FILE_SERVICE_GET_PRESIGNED_URL_ENDPOINT" val ENV_FILE_SERVICE_UPLOAD_ONE_FILE_TO_DATASET_ENDPOINT = "FILE_SERVICE_UPLOAD_ONE_FILE_TO_DATASET_ENDPOINT" + val ENV_FILE_SERVICE_LIST_DIRECTORY_OBJECTS_ENDPOINT = + "FILE_SERVICE_LIST_DIRECTORY_OBJECTS_ENDPOINT" /** * Auth related vars diff --git a/common/workflow-core/src/main/scala/org/apache/texera/amber/core/storage/DocumentFactory.scala b/common/workflow-core/src/main/scala/org/apache/texera/amber/core/storage/DocumentFactory.scala index 15949ef4717..f25a5aedc4f 100644 --- a/common/workflow-core/src/main/scala/org/apache/texera/amber/core/storage/DocumentFactory.scala +++ b/common/workflow-core/src/main/scala/org/apache/texera/amber/core/storage/DocumentFactory.scala @@ -44,9 +44,12 @@ object DocumentFactory { * @param fileUri the uri of the document * @return ReadonlyVirtualDocument */ - def openReadonlyDocument(fileUri: URI): ReadonlyVirtualDocument[_] = { + def openReadonlyDocument( + fileUri: URI, + isDirectory: Boolean = false + ): ReadonlyVirtualDocument[_] = { fileUri.getScheme match { - case DATASET_FILE_URI_SCHEME => new DatasetFileDocument(fileUri) + case DATASET_FILE_URI_SCHEME => new DatasetFileDocument(fileUri, isDirectory) case "file" => new ReadonlyLocalFileDocument(fileUri) case unsupportedScheme => throw new UnsupportedOperationException( diff --git a/common/workflow-core/src/main/scala/org/apache/texera/amber/core/storage/FileResolver.scala b/common/workflow-core/src/main/scala/org/apache/texera/amber/core/storage/FileResolver.scala index cda5b28779b..eb92e4e58bc 100644 --- a/common/workflow-core/src/main/scala/org/apache/texera/amber/core/storage/FileResolver.scala +++ b/common/workflow-core/src/main/scala/org/apache/texera/amber/core/storage/FileResolver.scala @@ -62,6 +62,21 @@ object FileResolver { .getOrElse(throw new FileNotFoundException(fileName)) } + def resolveDirectory(directoryName: String): URI = { + if (isFileResolved(directoryName)) { + return new URI(directoryName) + } + val resolvers: Seq[String => URI] = Seq(localDirectoryResolveFunc, datasetDirectoryResolveFunc) + + // Try each resolver function in sequence + resolvers + .map(resolver => Try(resolver(directoryName))) + .collectFirst { + case Success(output) => output + } + .getOrElse(throw new FileNotFoundException(directoryName)) + } + /** * Attempts to resolve a local file path. * @throws FileNotFoundException if the local file does not exist @@ -75,6 +90,21 @@ object FileResolver { filePath.toUri } + /** + * Attempts to resolve a local directory path. + * @throws FileNotFoundException if the local directory does not exist + * @param directoryName the name of the directory to check + */ + private def localDirectoryResolveFunc(directoryName: String): URI = { + val directoryPath = Paths.get(directoryName) + if (!Files.exists(directoryPath) || !Files.isDirectory(directoryPath)) { + throw new FileNotFoundException( + s"Local directory $directoryName does not exist or is not a directory" + ) + } + directoryPath.toUri + } + /** * Parses a dataset file path and extracts its components. * Expected format: /ownerEmail/datasetName/versionName/fileRelativePath @@ -178,6 +208,66 @@ object FileResolver { } } + /** + * Attempts to resolve a given directoryName to a URI. + * + * The directoryName format should be: /ownerEmail/datasetName/versionName + * e.g. /bob@texera.com/twitterDataset/v1 + * The output dataset URI format is: {DATASET_FILE_URI_SCHEME}:///{repositoryName}/{versionHash}/ + * e.g. {DATASET_FILE_URI_SCHEME}:///dataset-15/adeq233td/ + * + * @param directoryName the name of the directory to attempt resolving as a dataset directory + * @return A URI pointing to the dataset directory + * @throws FileNotFoundException if the dataset directory does not exist or cannot be created + */ + private def datasetDirectoryResolveFunc(directoryName: String): URI = { + val directoryPath = Paths.get(directoryName) + val pathSegments = + (0 until directoryPath.getNameCount).map(directoryPath.getName(_).toString).toArray + + val ownerEmail = pathSegments(0) + val datasetName = pathSegments(1) + val versionName = pathSegments(2) + + val (dataset, datasetVersion) = + withTransaction( + SqlServer + .getInstance() + .createDSLContext() + ) { ctx => + val dataset = ctx + .select(DATASET.fields: _*) + .from(DATASET) + .leftJoin(USER) + .on(USER.UID.eq(DATASET.OWNER_UID)) + .where(USER.EMAIL.eq(ownerEmail)) + .and(DATASET.NAME.eq(datasetName)) + .fetchOneInto(classOf[Dataset]) + + val datasetVersion = ctx + .selectFrom(DATASET_VERSION) + .where(DATASET_VERSION.DID.eq(dataset.getDid)) + .and(DATASET_VERSION.NAME.eq(versionName)) + .fetchOneInto(classOf[DatasetVersion]) + + if (dataset == null || datasetVersion == null) { + throw new FileNotFoundException(s"Dataset directory $directoryName not found.") + } + (dataset, datasetVersion) + } + + val uriSplitter = "/" + val encodedPath = + uriSplitter + dataset.getRepositoryName + uriSplitter + datasetVersion.getVersionHash + uriSplitter + + try { + new URI(DATASET_FILE_URI_SCHEME, "", encodedPath, null) + } catch { + case e: Exception => + throw new FileNotFoundException(s"Dataset directory $directoryName not found.") + } + } + /** * Checks if a given file path has a valid scheme. * diff --git a/common/workflow-core/src/main/scala/org/apache/texera/amber/core/storage/model/DatasetFileDocument.scala b/common/workflow-core/src/main/scala/org/apache/texera/amber/core/storage/model/DatasetFileDocument.scala index b62b6583658..dd1c68caf81 100644 --- a/common/workflow-core/src/main/scala/org/apache/texera/amber/core/storage/model/DatasetFileDocument.scala +++ b/common/workflow-core/src/main/scala/org/apache/texera/amber/core/storage/model/DatasetFileDocument.scala @@ -23,6 +23,7 @@ import com.typesafe.scalalogging.LazyLogging import org.apache.texera.amber.config.EnvironmentalVariable import org.apache.texera.amber.core.storage.model.DatasetFileDocument.{ fileServiceGetPresignURLEndpoint, + fileServiceListDirectoryObjectsEndpoint, userJwtToken } import org.apache.texera.amber.core.storage.util.LakeFSStorageClient @@ -32,6 +33,7 @@ import java.io.{File, FileOutputStream, InputStream} import java.net._ import java.nio.charset.StandardCharsets import java.nio.file.{Files, Path, Paths} +import java.util.zip.{ZipEntry, ZipOutputStream} import scala.jdk.CollectionConverters.IteratorHasAsScala object DatasetFileDocument { @@ -49,21 +51,33 @@ object DatasetFileDocument { "http://localhost:9092/api/dataset/presign-download" ) .trim + + // The endpoint for listing directory objects from the file service. + lazy val fileServiceListDirectoryObjectsEndpoint: String = + sys.env + .getOrElse( + EnvironmentalVariable.ENV_FILE_SERVICE_LIST_DIRECTORY_OBJECTS_ENDPOINT, + "http://localhost:9092/api/dataset/list-directory-objects" + ) + .trim } -private[storage] class DatasetFileDocument(uri: URI) +private[storage] class DatasetFileDocument(uri: URI, isDirectory: Boolean = false) extends VirtualDocument[Nothing] with OnDataset with LazyLogging { // Utility function to parse and decode URI segments into individual components private def parseUri(uri: URI): (String, String, Path) = { val segments = Paths.get(uri.getPath).iterator().asScala.map(_.toString).toArray - if (segments.length < 3) + if (!isDirectory && segments.length < 3) throw new IllegalArgumentException("URI format is incorrect") // parse uri to dataset components val repositoryName = segments(0) val datasetVersionHash = URLDecoder.decode(segments(1), StandardCharsets.UTF_8) + if (isDirectory) { + return (repositoryName, datasetVersionHash, Paths.get("")) + } val decodedRelativeSegments = segments.drop(2).map(part => URLDecoder.decode(part, StandardCharsets.UTF_8)) val fileRelativePath = Paths.get(decodedRelativeSegments.head, decodedRelativeSegments.tail: _*) @@ -141,24 +155,44 @@ private[storage] class DatasetFileDocument(uri: URI) tempFile match { case Some(file) => file case None => - val tempFilePath = Files.createTempFile("versionedFile", ".tmp") - val tempFileStream = new FileOutputStream(tempFilePath.toFile) - val inputStream = asInputStream() + if (isDirectory) { + val tempZipPath = Files.createTempFile("versionedDirectory", ".zip") + val zipOutputStream = new ZipOutputStream(new FileOutputStream(tempZipPath.toFile)) + + try { + addDirectoryToZip( + zipOutputStream, + "", + getRepositoryName(), + getVersionHash(), + fileRelativePath + ) + } finally { + zipOutputStream.close() + } - val buffer = new Array[Byte](1024) + val file = tempZipPath.toFile + tempFile = Some(file) + file + } else { + val tempFilePath = Files.createTempFile("versionedFile", ".tmp") + val tempFileStream = new FileOutputStream(tempFilePath.toFile) + val inputStream = asInputStream() - // Create an iterator to repeatedly call inputStream.read, and direct buffered data to file - Iterator - .continually(inputStream.read(buffer)) - .takeWhile(_ != -1) - .foreach(tempFileStream.write(buffer, 0, _)) + val buffer = new Array[Byte](1024) - inputStream.close() - tempFileStream.close() + Iterator + .continually(inputStream.read(buffer)) + .takeWhile(_ != -1) + .foreach(tempFileStream.write(buffer, 0, _)) - val file = tempFilePath.toFile - tempFile = Some(file) - file + inputStream.close() + tempFileStream.close() + + val file = tempFilePath.toFile + tempFile = Some(file) + file + } } } @@ -191,4 +225,196 @@ private[storage] class DatasetFileDocument(uri: URI) override def getVersionHash(): String = datasetVersionHash override def getFileRelativePath(): String = fileRelativePath.toString + + private def addDirectoryToZip( + zipOutputStream: ZipOutputStream, + basePath: String, + datasetName: String, + versionHash: String, + directoryPath: Path + ): Unit = { + try { + val allObjects = if (userJwtToken.nonEmpty) { + getDirectoryObjectsViaFileService(datasetName, versionHash) + } else { + LakeFSStorageClient.retrieveObjectsOfVersion(datasetName, versionHash) + } + + val directoryPathStr = directoryPath.toString.replace("\\", "/") + + val objectsInDirectory = allObjects.filter { obj => + val objPath = obj.getPath + if (directoryPathStr.isEmpty) { + true + } else { + objPath.startsWith(directoryPathStr + "/") || objPath == directoryPathStr + } + } + + objectsInDirectory.foreach { obj => + val objPath = obj.getPath + val relativePath = if (directoryPathStr.isEmpty) { + if (basePath.isEmpty) objPath else s"$basePath/$objPath" + } else { + val filePathWithinDirectory = objPath.substring(directoryPathStr.length).stripPrefix("/") + if (basePath.isEmpty) filePathWithinDirectory else s"$basePath/$filePathWithinDirectory" + } + + if (relativePath.nonEmpty) { + val zipEntry = new ZipEntry(relativePath) + zipOutputStream.putNextEntry(zipEntry) + + val fileInputStream = getFileInputStreamFromLakeFS(datasetName, versionHash, objPath) + val buffer = new Array[Byte](1024) + + try { + Iterator + .continually(fileInputStream.read(buffer)) + .takeWhile(_ != -1) + .foreach(zipOutputStream.write(buffer, 0, _)) + } finally { + fileInputStream.close() + } + + zipOutputStream.closeEntry() + } + } + } catch { + case e: Exception => + logger.warn( + s"Error adding directory to zip via primary method: ${e.getMessage}. Trying fallback.", + e + ) + addDirectoryToZipFallback(zipOutputStream, basePath, datasetName, versionHash, directoryPath) + } + } + + private def getDirectoryObjectsViaFileService( + datasetName: String, + versionHash: String + ): List[io.lakefs.clients.sdk.model.ObjectStats] = { + val requestUrl = + s"$fileServiceListDirectoryObjectsEndpoint?datasetName=${URLEncoder.encode(datasetName, StandardCharsets.UTF_8.name())}&commitHash=${URLEncoder + .encode(versionHash, StandardCharsets.UTF_8.name())}" + + val connection = new URL(requestUrl).openConnection().asInstanceOf[HttpURLConnection] + connection.setRequestMethod("GET") + connection.setRequestProperty("Authorization", s"Bearer $userJwtToken") + + try { + if (connection.getResponseCode != HttpURLConnection.HTTP_OK) { + throw new RuntimeException( + s"Failed to list directory objects: HTTP ${connection.getResponseCode}" + ) + } + + val responseBody = + new String(connection.getInputStream.readAllBytes(), StandardCharsets.UTF_8) + + val objectPattern = """\{"path"\s*:\s*"([^"]+)"\s*,\s*"sizeBytes"\s*:\s*(\d+)\}""".r + + objectPattern.findAllMatchIn(responseBody).toList.map { matchObj => + val path = matchObj.group(1) + val sizeBytes = matchObj.group(2).toLong + val objectStats = new io.lakefs.clients.sdk.model.ObjectStats() + objectStats.setPath(path) + objectStats.setSizeBytes(sizeBytes) + objectStats + } + } catch { + case e: Exception => + logger.warn( + s"Failed to get directory objects via FileService: ${e.getMessage}. Falling back to direct LakeFS.", + e + ) + LakeFSStorageClient.retrieveObjectsOfVersion(datasetName, versionHash) + } finally { + connection.disconnect() + } + } + + private def addDirectoryToZipFallback( + zipOutputStream: ZipOutputStream, + basePath: String, + datasetName: String, + versionHash: String, + directoryPath: Path + ): Unit = { + lazy val datasetsRootPath = + Path + .of(sys.env.getOrElse("TEXERA_HOME", ".")) + .resolve("amber") + .resolve("user-resources") + .resolve("datasets") + val datasetPath = datasetsRootPath.resolve("0") + val fullDirectoryPath = datasetPath.resolve(directoryPath) + + if (Files.exists(fullDirectoryPath) && Files.isDirectory(fullDirectoryPath)) { + Files.walk(fullDirectoryPath).forEach { filePath => + if (!Files.isDirectory(filePath)) { + val zipRelativePath = if (basePath.isEmpty) { + directoryPath.relativize(datasetPath.relativize(filePath)).toString.replace("\\", "/") + } else { + s"$basePath/${directoryPath.relativize(datasetPath.relativize(filePath)).toString.replace("\\", "/")}" + } + + val zipEntry = new ZipEntry(zipRelativePath) + zipOutputStream.putNextEntry(zipEntry) + + val fileInputStream = + GitVersionControlLocalFileStorage.retrieveFileContentOfVersionAsInputStream( + datasetPath, + versionHash, + filePath + ) + + val buffer = new Array[Byte](1024) + try { + Iterator + .continually(fileInputStream.read(buffer)) + .takeWhile(_ != -1) + .foreach(zipOutputStream.write(buffer, 0, _)) + } finally { + fileInputStream.close() + } + + zipOutputStream.closeEntry() + } + } + } else { + throw new RuntimeException(s"Failed to create zip file for directory: ${directoryPath}") + } + } + + private def getFileInputStreamFromLakeFS( + datasetName: String, + versionHash: String, + filePath: String + ): InputStream = { + if (userJwtToken.isEmpty) { + val presignUrl = LakeFSStorageClient.getFilePresignedUrl(datasetName, versionHash, filePath) + new URL(presignUrl).openStream() + } else { + val presignRequestUrl = + s"$fileServiceGetPresignURLEndpoint?repositoryName=${datasetName}&commitHash=${versionHash}&filePath=${URLEncoder + .encode(filePath, StandardCharsets.UTF_8.name())}" + + val connection = new URL(presignRequestUrl).openConnection().asInstanceOf[HttpURLConnection] + connection.setRequestMethod("GET") + connection.setRequestProperty("Authorization", s"Bearer $userJwtToken") + + if (connection.getResponseCode != HttpURLConnection.HTTP_OK) { + throw new RuntimeException( + s"Failed to retrieve presigned URL: HTTP ${connection.getResponseCode}" + ) + } + + val responseBody = + new String(connection.getInputStream.readAllBytes(), StandardCharsets.UTF_8) + val presignedUrl = responseBody.split("\"presignedUrl\"\\s*:\\s*\"")(1).split("\"")(0) + + connection.disconnect() + new URL(presignedUrl).openStream() + } + } } diff --git a/common/workflow-operator/src/main/scala/org/apache/amber/operator/cloudmapper/CloudMapperSourceOpDesc.scala b/common/workflow-operator/src/main/scala/org/apache/amber/operator/cloudmapper/CloudMapperSourceOpDesc.scala new file mode 100755 index 00000000000..c02c40a3beb --- /dev/null +++ b/common/workflow-operator/src/main/scala/org/apache/amber/operator/cloudmapper/CloudMapperSourceOpDesc.scala @@ -0,0 +1,243 @@ +package org.apache.amber.operator.cloudmapper + +import com.fasterxml.jackson.annotation.{JsonProperty, JsonPropertyDescription} +import com.kjetland.jackson.jsonSchema.annotations.JsonSchemaTitle +import org.apache.texera.amber.core.tuple.{Attribute, AttributeType, Schema} +import org.apache.texera.amber.core.workflow.{OutputPort, PortIdentity} +import org.apache.texera.amber.operator.metadata.{OperatorGroupConstants, OperatorInfo} +import org.apache.texera.amber.operator.source.PythonSourceOperatorDescriptor +import org.apache.texera.amber.core.storage.{DocumentFactory, FileResolver} + +class CloudMapperSourceOpDesc extends PythonSourceOperatorDescriptor { + @JsonProperty(required = true) + @JsonSchemaTitle("FastQ Dataset") + @JsonPropertyDescription("Dataset containing fastq files") + val directoryName: String = "" + + @JsonProperty(required = true) + var referenceGenome: ReferenceGenome = _ + + @JsonProperty(required = false) + @JsonSchemaTitle("Additional Reference Genomes") + @JsonPropertyDescription("Add one or more additional reference genomes (optional)") + var additionalReferenceGenomes: List[ReferenceGenome] = List() + + @JsonProperty(required = true) + @JsonSchemaTitle("Cluster") + @JsonPropertyDescription("Cluster") + val cluster: String = "" + + private var clusterLauncherServiceTarget: String = + "http://cloudmapper-service.texera.svc.cluster.local:4000" + + // Getter to retrieve only the id part (cid) from the cluster + def clusterId: String = { + if (cluster.startsWith("#")) { + cluster.split(" ")(0).substring(1) // Extracts the cid part by splitting and removing '#' + } else { + "" + } + } + + override def generatePythonCode(): String = { + val directoryUri = FileResolver.resolveDirectory(directoryName) + println(directoryUri.toASCIIString) + + val directoryDocument = DocumentFactory.openReadonlyDocument(directoryUri, isDirectory = true) + val directoryFile = directoryDocument.asFile() + println(directoryFile.getAbsolutePath) + + // Convert the Scala referenceGenome to a Python string + val pythonReferenceGenome = s"'${referenceGenome.referenceGenome.getName}'" + + // Convert the Scala additionalReferenceGenomes list to a Python list format + val pythonAdditionalReferenceGenomes = additionalReferenceGenomes + .map(_.referenceGenome.getName) + .map(name => s"'$name'") + .mkString("[", ", ", "]") + + // Combine main reference genome with additional ones + val pythonAllReferenceGenomes = + s"[${pythonReferenceGenome}] + ${pythonAdditionalReferenceGenomes}" + + // Convert all reference genomes (main + additional) to a Python list format for FASTA files + val pythonFastaFiles = (referenceGenome :: additionalReferenceGenomes) + .flatMap(_.fastAFiles) + .map(file => { + val fileUri = FileResolver.resolve(file) + val fileDocument = DocumentFactory.openReadonlyDocument(fileUri, isDirectory = false) + val fastAFilePath = fileDocument.asFile().getAbsolutePath + s"open(r'$fastAFilePath', 'rb')" + }) + .mkString("[", ", ", "]") + + // Extract GTF file if exists for 'My Reference' (considering both main and additional reference genomes) + val pythonGtfFile = (referenceGenome :: additionalReferenceGenomes) + .find(_.referenceGenome == ReferenceGenomeEnum.MY_REFERENCE) + .flatMap(_.gtfFile) + .map(file => { + val fileUri = FileResolver.resolve(file) + val fileDocument = DocumentFactory.openReadonlyDocument(fileUri, isDirectory = false) + val gtfFilePath = fileDocument.asFile().getAbsolutePath + s"open(r'$gtfFilePath', 'rb')" + }) + .getOrElse("None") + + val pythonGtfFileValue = if (pythonGtfFile == "None") "None" else pythonGtfFile + + s"""from pytexera import * + | + |class GenerateOperator(UDFSourceOperator): + | + | @overrides + | def produce(self) -> Iterator[Union[TupleLike, TableLike, None]]: + | import requests, time, tarfile, io + | + | reads_path = r'${directoryFile.getAbsolutePath}' + | service_url = "${clusterLauncherServiceTarget}" + | cluster_id = ${clusterId} + | + | # ------------------------------------------------------------------ + | # Step 1: Ask the Go service for a presigned S3 PUT URL. + | # The reads zip will be sent directly to S3 from here — the Go + | # service is not in the data path for the large file. + | # ------------------------------------------------------------------ + | upload_meta_resp = requests.post(f"{service_url}/api/job/request-upload") + | upload_meta_resp.raise_for_status() + | upload_meta = upload_meta_resp.json() + | upload_url = upload_meta["upload_url"] + | s3_key = upload_meta["s3_key"] + | job_id = upload_meta["job_id"] + | + | yield # let Texera heartbeat while we upload + | + | # ------------------------------------------------------------------ + | # Step 2: PUT the reads zip directly to S3 (presigned URL, no proxy). + | # ------------------------------------------------------------------ + | with open(reads_path, 'rb') as reads_file: + | put_resp = requests.put(upload_url, data=reads_file) + | put_resp.raise_for_status() + | + | yield # let Texera heartbeat while we notify + | + | # ------------------------------------------------------------------ + | # Step 3: Notify the Go service to start the job. Pass s3_key and + | # job_id so it knows which S3 object to pull on the EC2 head node. + | # FASTA/GTF files (small, annotation-only) still go as multipart. + | # ------------------------------------------------------------------ + | selected_genomes = ${pythonAllReferenceGenomes} + | form_data = { + | 'cid': str(cluster_id), + | 's3_key': s3_key, + | 'job_id': str(job_id), + | } + | for index, genome in enumerate(selected_genomes): + | form_data[f'referenceGenome[{index}]'] = genome + | + | files = {} + | if 'My Reference' in selected_genomes: + | fasta_files = ${pythonFastaFiles} + | for index, fasta_file in enumerate(fasta_files): + | files[f'fastaFiles[{index}]'] = fasta_file + | gtf_file = ${pythonGtfFileValue} + | if gtf_file is not None: + | files['gtfFile'] = gtf_file + | + | response = requests.post(f"{service_url}/api/job/create", + | data=form_data, files=files if files else None) + | response.raise_for_status() + | + | # ------------------------------------------------------------------ + | # Step 4: Poll until the job is finished. + | # ------------------------------------------------------------------ + | while True: + | status_response = requests.get(f'{service_url}/api/job/status/{job_id}') + | status = status_response.json().get("status") + | + | if status == "finished": + | print("Job finished! Downloading the result...") + | break + | elif status == "failed": + | print("Job failed.") + | yield { + | 'Sample': None, + | 'features.tsv.gz': None, + | 'barcodes.tsv.gz': None, + | 'matrix.mtx.gz': None + | } + | return + | + | print("Job is still processing...") + | time.sleep(0.5) + | yield + | + | # ------------------------------------------------------------------ + | # Step 5: Download results. + | # The server streams a tar.gz archive containing all filtered/ + | # output files. We parse it member-by-member so the operator + | # never holds the entire decompressed matrix in RAM at once. + | # ------------------------------------------------------------------ + | download_response = requests.get(f'{service_url}/api/job/download/{job_id}', + | stream=True) + | download_response.raise_for_status() + | + | # urllib3 raw socket; tell it to handle transport-encoding itself + | download_response.raw.decode_content = True + | + | samples = {} + | with tarfile.open(fileobj=download_response.raw, mode='r|gz') as tar: + | for member in tar: + | if not member.isfile(): + | continue + | parts = member.name.split('/') + | # Expected layout: /filtered/.gz + | if len(parts) < 3: + | continue + | sample_name = parts[0] + | fname = parts[-1] + | if fname in ('features.tsv.gz', 'barcodes.tsv.gz', 'matrix.mtx.gz'): + | f = tar.extractfile(member) + | if f is not None: + | samples.setdefault(sample_name, {})[fname] = f.read() + | + | if not samples: + | print("Download succeeded but archive contained no recognised files.") + | yield { + | 'Sample': None, + | 'features.tsv.gz': None, + | 'barcodes.tsv.gz': None, + | 'matrix.mtx.gz': None + | } + | return + | + | for sample_name, files in samples.items(): + | yield { + | 'Sample': sample_name, + | 'features.tsv.gz': files.get('features.tsv.gz'), + | 'barcodes.tsv.gz': files.get('barcodes.tsv.gz'), + | 'matrix.mtx.gz': files.get('matrix.mtx.gz') + | } + """.stripMargin + } + override def operatorInfo: OperatorInfo = + OperatorInfo( + "CloudBioMapper", + "Running sequence alignment using public cluster services", + OperatorGroupConstants.API_GROUP, + inputPorts = List.empty, + outputPorts = List(OutputPort()) + ) + override def asSource() = true + override def sourceSchema(): Schema = + Schema() + .add( + new Attribute("Sample", AttributeType.STRING), + new Attribute("features.tsv.gz", AttributeType.BINARY), + new Attribute("barcodes.tsv.gz", AttributeType.BINARY), + new Attribute("matrix.mtx.gz", AttributeType.BINARY) + ) + + def getOutputSchemas(inputSchemas: Map[PortIdentity, Schema]): Map[PortIdentity, Schema] = { + Map(operatorInfo.outputPorts.head.id -> sourceSchema()) + } +} diff --git a/common/workflow-operator/src/main/scala/org/apache/amber/operator/cloudmapper/ReferenceGenome.scala b/common/workflow-operator/src/main/scala/org/apache/amber/operator/cloudmapper/ReferenceGenome.scala new file mode 100755 index 00000000000..923c85ce673 --- /dev/null +++ b/common/workflow-operator/src/main/scala/org/apache/amber/operator/cloudmapper/ReferenceGenome.scala @@ -0,0 +1,46 @@ +package org.apache.amber.operator.cloudmapper + +import com.fasterxml.jackson.annotation.JsonProperty +import com.kjetland.jackson.jsonSchema.annotations.{ + JsonSchemaInject, + JsonSchemaString, + JsonSchemaTitle +} +import org.apache.texera.amber.operator.metadata.annotations.HideAnnotation + +class ReferenceGenome { + // Required field for selecting a reference genome. + // This field is mandatory and must be filled by the user. + @JsonProperty(required = true) + var referenceGenome: ReferenceGenomeEnum = _ + + // Optional field for FastA files. + // This field is shown only if 'referenceGenome' is set to 'MY_REFERENCE'. + @JsonSchemaTitle("FastA Files") + @JsonSchemaInject( + strings = Array( + new JsonSchemaString(path = HideAnnotation.hideTarget, value = "referenceGenome"), + new JsonSchemaString(path = HideAnnotation.hideType, value = HideAnnotation.Type.regex), + new JsonSchemaString( + path = HideAnnotation.hideExpectedValue, + value = "^((?!My Reference).)*$" + ) + ) + ) + val fastAFiles: Option[String] = None + + // Optional field for Gtf files. + // This field is shown only if 'referenceGenome' is set to 'MY_REFERENCE'. + @JsonSchemaTitle("Gtf File") + @JsonSchemaInject( + strings = Array( + new JsonSchemaString(path = HideAnnotation.hideTarget, value = "referenceGenome"), + new JsonSchemaString(path = HideAnnotation.hideType, value = HideAnnotation.Type.regex), + new JsonSchemaString( + path = HideAnnotation.hideExpectedValue, + value = "^((?!My Reference).)*$" + ) + ) + ) + val gtfFile: Option[String] = None +} diff --git a/common/workflow-operator/src/main/scala/org/apache/amber/operator/cloudmapper/ReferenceGenomeEnum.java b/common/workflow-operator/src/main/scala/org/apache/amber/operator/cloudmapper/ReferenceGenomeEnum.java new file mode 100755 index 00000000000..962fb6eb19f --- /dev/null +++ b/common/workflow-operator/src/main/scala/org/apache/amber/operator/cloudmapper/ReferenceGenomeEnum.java @@ -0,0 +1,26 @@ +package org.apache.amber.operator.cloudmapper; + +import com.fasterxml.jackson.annotation.JsonValue; + +public enum ReferenceGenomeEnum { + HUMAN_GRCh38("GRCh38"), + + MOUSE_GRCm39("GRCm39"), + + MOUSE_mm10("mm10"), + + HUMAN_hg19("hg19"), + + MY_REFERENCE("My Reference"); + + private final String name; + + ReferenceGenomeEnum(String name) { + this.name = name; + } + + @JsonValue + public String getName() { + return this.name; + } +} \ No newline at end of file diff --git a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/LogicalOp.scala b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/LogicalOp.scala index 4e9d6c6e2cd..43d1ffc03b9 100644 --- a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/LogicalOp.scala +++ b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/LogicalOp.scala @@ -22,6 +22,7 @@ package org.apache.texera.amber.operator import com.fasterxml.jackson.annotation.JsonSubTypes.Type import com.fasterxml.jackson.annotation._ import com.kjetland.jackson.jsonSchema.annotations.JsonSchemaTitle +import org.apache.amber.operator.cloudmapper.CloudMapperSourceOpDesc import org.apache.texera.amber.core.executor.OperatorExecutor import org.apache.texera.amber.core.tuple.Schema import org.apache.texera.amber.core.virtualidentity.{ @@ -428,7 +429,11 @@ trait StateTransferFunc value = classOf[SklearnAdvancedSVRTrainerOpDesc], name = "SVRTrainer" ), - new Type(value = classOf[SklearnTestingOpDesc], name = "SklearnTesting") + new Type(value = classOf[SklearnTestingOpDesc], name = "SklearnTesting"), + new Type( + value = classOf[CloudMapperSourceOpDesc], + name = "CloudBioMapper" + ) ) ) abstract class LogicalOp extends PortDescriptor with Serializable { From 0dc36bae6214bb4d9f80841f1238c7d29bb45eb8 Mon Sep 17 00:00:00 2001 From: Kun Woo Park Date: Sat, 2 May 2026 14:37:14 -0700 Subject: [PATCH 07/13] feat(cloudbiomapper): add cluster management UI, backend API, and frontend integration - Add ClusterResource, ClusterCallbackResource, ClusterServiceClient, ClusterUtils backend API for managing EC2 clusters - Add cluster dashboard component with launch/stop/terminate/start actions and management modal - Add ClusterSelectionComponent and ClusterAutoCompleteComponent for operator property panel - Add DirectoryPathInput and DirectorySelection components for dataset directory selection - Add cluster route in app-routing, cluster declarations in app.module - Add cluster_enabled feature flag to gui-config, dashboard sidebar, and admin settings - Add clusterautocomplete and directorypathinput formly field types - Register cluster/directoryName/fastQFiles/fastAFiles/gtfFile fields in operator property editor - Add SQL schema for cluster and cluster_activity tables - Add dknet logo, CloudBioMapper operator icon, and sequence-alignment workflow assets - Add DatasetDirectoryDocument and PathUtils storage utilities Co-Authored-By: Claude Sonnet 4.6 --- .../texera/web/TexeraWebApplication.scala | 6 + .../cluster/ClusterCallbackResource.scala | 198 ++++++++++++++ .../user/cluster/ClusterResource.scala | 254 ++++++++++++++++++ .../user/cluster/ClusterServiceClient.scala | 107 ++++++++ .../dashboard/user/cluster/ClusterUtils.scala | 49 ++++ .../model/DatasetDirectoryDocument.scala | 98 +++++++ .../amber/core/storage/model/PathUtils.scala | 35 +++ frontend/src/app/app-routing.module.ts | 5 + frontend/src/app/app.module.ts | 12 + .../src/app/common/formly/formly-config.ts | 4 + .../common/service/cluster/cluster.service.ts | 45 ++++ frontend/src/app/common/type/gui-config.ts | 1 + .../settings/admin-settings.component.html | 9 + .../settings/admin-settings.component.ts | 1 + .../component/dashboard.component.html | 12 + .../component/dashboard.component.ts | 1 + .../cluster-management-modal.component.html | 61 +++++ .../cluster-management-modal.component.scss | 106 ++++++++ .../cluster-management-modal.component.ts | 82 ++++++ .../user/cluster/cluster.component.html | 112 ++++++++ .../user/cluster/cluster.component.scss | 43 +++ .../user/cluster/cluster.component.ts | 194 +++++++++++++ frontend/src/app/dashboard/type/clusters.ts | 9 + .../cluster-autocomplete.component.html | 15 ++ .../cluster-autocomplete.component.scss | 3 + .../cluster-autocomplete.component.ts | 47 ++++ .../cluster-selection.component.html | 75 ++++++ .../cluster-selection.component.scss | 80 ++++++ .../cluster-selection.component.ts | 56 ++++ .../directory-path-input.component.html | 23 ++ .../directory-path-input.component.scss | 32 +++ .../directory-path-input.component.ts | 54 ++++ .../directory-selection.component.html | 64 +++++ .../directory-selection.component.scss | 71 +++++ .../directory-selection.component.ts | 107 ++++++++ .../operator-property-edit-frame.component.ts | 18 +- .../src/assets/logos/dknet-favicon-32x32.png | Bin 0 -> 2085 bytes frontend/src/assets/logos/dknet-logo.png | Bin 0 -> 153082 bytes .../assets/operator_images/CloudBioMapper.png | Bin 0 -> 55237 bytes .../assets/sequence-alignment-workflow.png | Bin 0 -> 467023 bytes sql/updates/cluster.sql | 42 +++ 41 files changed, 2129 insertions(+), 2 deletions(-) create mode 100755 amber/src/main/scala/org/apache/texera/web/resource/dashboard/user/cluster/ClusterCallbackResource.scala create mode 100755 amber/src/main/scala/org/apache/texera/web/resource/dashboard/user/cluster/ClusterResource.scala create mode 100755 amber/src/main/scala/org/apache/texera/web/resource/dashboard/user/cluster/ClusterServiceClient.scala create mode 100755 amber/src/main/scala/org/apache/texera/web/resource/dashboard/user/cluster/ClusterUtils.scala create mode 100755 common/workflow-core/src/main/scala/org/apache/texera/amber/core/storage/model/DatasetDirectoryDocument.scala create mode 100755 common/workflow-core/src/main/scala/org/apache/texera/amber/core/storage/model/PathUtils.scala create mode 100755 frontend/src/app/common/service/cluster/cluster.service.ts create mode 100755 frontend/src/app/dashboard/component/user/cluster/cluster-management-modal/cluster-management-modal.component.html create mode 100755 frontend/src/app/dashboard/component/user/cluster/cluster-management-modal/cluster-management-modal.component.scss create mode 100755 frontend/src/app/dashboard/component/user/cluster/cluster-management-modal/cluster-management-modal.component.ts create mode 100755 frontend/src/app/dashboard/component/user/cluster/cluster.component.html create mode 100755 frontend/src/app/dashboard/component/user/cluster/cluster.component.scss create mode 100755 frontend/src/app/dashboard/component/user/cluster/cluster.component.ts create mode 100755 frontend/src/app/dashboard/type/clusters.ts create mode 100755 frontend/src/app/workspace/component/cluster-autocomplete/cluster-autocomplete.component.html create mode 100755 frontend/src/app/workspace/component/cluster-autocomplete/cluster-autocomplete.component.scss create mode 100755 frontend/src/app/workspace/component/cluster-autocomplete/cluster-autocomplete.component.ts create mode 100755 frontend/src/app/workspace/component/cluster-selection/cluster-selection.component.html create mode 100755 frontend/src/app/workspace/component/cluster-selection/cluster-selection.component.scss create mode 100755 frontend/src/app/workspace/component/cluster-selection/cluster-selection.component.ts create mode 100755 frontend/src/app/workspace/component/directory-path-input/directory-path-input.component.html create mode 100755 frontend/src/app/workspace/component/directory-path-input/directory-path-input.component.scss create mode 100755 frontend/src/app/workspace/component/directory-path-input/directory-path-input.component.ts create mode 100755 frontend/src/app/workspace/component/directory-selection/directory-selection.component.html create mode 100755 frontend/src/app/workspace/component/directory-selection/directory-selection.component.scss create mode 100755 frontend/src/app/workspace/component/directory-selection/directory-selection.component.ts create mode 100755 frontend/src/assets/logos/dknet-favicon-32x32.png create mode 100755 frontend/src/assets/logos/dknet-logo.png create mode 100755 frontend/src/assets/operator_images/CloudBioMapper.png create mode 100755 frontend/src/assets/sequence-alignment-workflow.png create mode 100644 sql/updates/cluster.sql diff --git a/amber/src/main/scala/org/apache/texera/web/TexeraWebApplication.scala b/amber/src/main/scala/org/apache/texera/web/TexeraWebApplication.scala index 98b7c68c974..e48bd8e7b67 100644 --- a/amber/src/main/scala/org/apache/texera/web/TexeraWebApplication.scala +++ b/amber/src/main/scala/org/apache/texera/web/TexeraWebApplication.scala @@ -45,6 +45,10 @@ import org.apache.texera.web.resource.dashboard.user.project.{ ProjectResource, PublicProjectResource } +import org.apache.texera.web.resource.dashboard.user.cluster.{ + ClusterResource, + ClusterCallbackResource +} import org.apache.texera.web.resource.dashboard.user.quota.UserQuotaResource import org.apache.texera.web.resource.dashboard.user.workflow.{ WorkflowAccessResource, @@ -160,6 +164,8 @@ class TexeraWebApplication environment.jersey.register(classOf[UserQuotaResource]) environment.jersey.register(classOf[AdminSettingsResource]) environment.jersey.register(classOf[AIAssistantResource]) + environment.jersey.register(classOf[ClusterResource]) + environment.jersey.register(classOf[ClusterCallbackResource]) AuthResource.createAdminUser() diff --git a/amber/src/main/scala/org/apache/texera/web/resource/dashboard/user/cluster/ClusterCallbackResource.scala b/amber/src/main/scala/org/apache/texera/web/resource/dashboard/user/cluster/ClusterCallbackResource.scala new file mode 100755 index 00000000000..310fecf608d --- /dev/null +++ b/amber/src/main/scala/org/apache/texera/web/resource/dashboard/user/cluster/ClusterCallbackResource.scala @@ -0,0 +1,198 @@ +package org.apache.texera.web.resource.dashboard.user.cluster + +import org.apache.texera.dao.SqlServer +import org.apache.texera.dao.jooq.generated.enums.ClusterStatus +import org.apache.texera.dao.jooq.generated.tables.daos.{ClusterActivityDao, ClusterDao} +import org.apache.texera.dao.jooq.generated.tables.pojos.ClusterActivity +import org.apache.texera.dao.jooq.generated.tables.Cluster.CLUSTER + +import javax.ws.rs.{Consumes, POST, Path} +import javax.ws.rs.core.{MediaType, Response} +import org.apache.texera.web.resource.dashboard.user.cluster.ClusterUtils.{ + updateClusterActivityEndTime, + updateClusterStatus +} +import org.apache.texera.web.resource.dashboard.user.cluster.ClusterCallbackResource.{ + clusterActivityDao, + clusterDao, + context +} +import org.jooq.impl.DSL + +import java.sql.Timestamp + +object ClusterCallbackResource { + final private val context = SqlServer + .getInstance() + .createDSLContext() + final private lazy val clusterDao = new ClusterDao(context.configuration) + final private lazy val clusterActivityDao = new ClusterActivityDao(context.configuration) + + // error messages + val ERR_USER_HAS_NO_ACCESS_TO_CLUSTER_MESSAGE = "User has no access to this cluster" +} + +@Path("/callback") +class ClusterCallbackResource { + + @POST + @Path("/cluster/created") + @Consumes(Array(MediaType.APPLICATION_JSON)) + def handleClusterCreatedCallback(callbackPayload: CallbackPayload): Response = { + val clusterId = callbackPayload.clusterId + val success = callbackPayload.success + + val cluster = clusterDao.fetchOneByCid(clusterId) + if (cluster == null) { + return Response + .status(Response.Status.NOT_FOUND) + .entity(s"Cluster $clusterId not found") + .build() + } + + if (success && cluster.getStatus == ClusterStatus.PENDING) { + updateClusterStatus(clusterId, ClusterStatus.RUNNING, context) + insertClusterActivity(cluster.getCid, cluster.getCreationTime) + Response.ok("Cluster status updated to RUNNING").build() + } else if (!success) { + // Cluster launch failed — mark it so the UI doesn't stay stuck on PENDING. + updateClusterStatus(clusterId, ClusterStatus.LAUNCH_FAILED, context) + Response.ok("Cluster status updated to LAUNCH_FAILED").build() + } else { + Response + .status(Response.Status.CONFLICT) + .entity(s"Cluster $clusterId status update not allowed (current: ${cluster.getStatus})") + .build() + } + } + + @POST + @Path("/cluster/deleted") + @Consumes(Array(MediaType.APPLICATION_JSON)) + def handleClusterDeletedCallback(callbackPayload: CallbackPayload): Response = { + val clusterId = callbackPayload.clusterId + val success = callbackPayload.success + + val cluster = clusterDao.fetchOneByCid(clusterId) + if (success && cluster != null && cluster.getStatus == ClusterStatus.SHUTTING_DOWN) { + updateClusterStatus(clusterId, ClusterStatus.TERMINATED, context) + updateClusterActivityEndTime(clusterId, context) + Response + .ok(s"Cluster with ID $clusterId marked as TERMINATED and activity end time updated") + .build() + } else { + Response + .status(Response.Status.NOT_FOUND) + .entity("Cluster not found or status update not allowed") + .build() + } + } + + @POST + @Path("/cluster/paused") + @Consumes(Array(MediaType.APPLICATION_JSON)) + def handleClusterPausedCallback(callbackPayload: CallbackPayload): Response = { + val clusterId = callbackPayload.clusterId + val success = callbackPayload.success + + val cluster = clusterDao.fetchOneByCid(clusterId) + if (success && cluster != null && cluster.getStatus == ClusterStatus.STOPPING) { + updateClusterStatus(clusterId, ClusterStatus.STOPPED, context) + updateClusterActivityEndTime(clusterId, context) + Response + .ok(s"Cluster with ID $clusterId marked as STOPPED and activity end time updated") + .build() + } else { + Response + .status(Response.Status.NOT_FOUND) + .entity("Cluster not found or status update not allowed") + .build() + } + } + + @POST + @Path("/cluster/resumed") + @Consumes(Array(MediaType.APPLICATION_JSON)) + def handleClusterResumedCallback(callbackPayload: CallbackPayload): Response = { + val clusterId = callbackPayload.clusterId + val success = callbackPayload.success + // Update the cluster status to LAUNCHED in the database + val cluster = clusterDao.fetchOneByCid(clusterId) + if (success && cluster != null && cluster.getStatus == ClusterStatus.PENDING) { + updateClusterStatus(clusterId, ClusterStatus.RUNNING, context) + insertClusterActivity(cluster.getCid, cluster.getCreationTime) + Response.ok("Cluster status updated to RUNNING").build() + } else { + Response + .status(Response.Status.NOT_FOUND) + .entity("Cluster not found or status update not allowed") + .build() + } + } + + @POST + @Path("/cluster/getid") + @Consumes(Array(MediaType.APPLICATION_JSON)) + def handleClusterGetIdCallback(callbackPayload: CallbackPayload): Response = { + val maxCidResult = context + .select(DSL.max(CLUSTER.CID)) + .from(CLUSTER) + .fetchOne() + + val maxCid = maxCidResult.getValue(0, classOf[Integer]) + + if (maxCid == null) { + Response.ok("Next cluster ID is 1").entity(1).build() + } else { + val nextCid = maxCid + 1 + Response.ok(s"Next cluster ID is $nextCid").entity(nextCid).build() + } + } + + /** + * Handles the callback to change the cluster status to SHUTTING_DOWN. + * + * @param callbackPayload The payload containing the cluster ID and success status. + * @return A Response indicating the result of the operation. + */ + @POST + @Path("/cluster/shutdown") + @Consumes(Array(MediaType.APPLICATION_JSON)) + def handleClusterShutdownCallback(callbackPayload: CallbackPayload): Response = { + val clusterId = callbackPayload.clusterId + val success = callbackPayload.success + + // Fetch the cluster by its ID + val cluster = clusterDao.fetchOneByCid(clusterId) + + // Check if the operation is successful and the cluster exists + if (success && cluster != null) { + // Update the cluster status to SHUTTING_DOWN + updateClusterStatus(clusterId, ClusterStatus.SHUTTING_DOWN, context) + // Return a success response + Response.ok(s"Cluster with ID $clusterId status updated to SHUTTING_DOWN").build() + } else { + // Return a NOT_FOUND response if the cluster is not found or the status update is not allowed + Response + .status(Response.Status.NOT_FOUND) + .entity("Cluster not found or status update not allowed") + .build() + } + } + + /** + * Inserts a new cluster activity record with the given start time. + * + * @param clusterId The ID of the cluster. + * @param startTime The start time of the activity. + */ + private def insertClusterActivity(clusterId: Int, startTime: Timestamp): Unit = { + val clusterActivity = new ClusterActivity() + clusterActivity.setClusterId(clusterId) + clusterActivity.setStartTime(startTime) + clusterActivityDao.insert(clusterActivity) + } +} + +// Define the payload structure expected from the Go service +case class CallbackPayload(clusterId: Int, success: Boolean) diff --git a/amber/src/main/scala/org/apache/texera/web/resource/dashboard/user/cluster/ClusterResource.scala b/amber/src/main/scala/org/apache/texera/web/resource/dashboard/user/cluster/ClusterResource.scala new file mode 100755 index 00000000000..a96ff46ff6a --- /dev/null +++ b/amber/src/main/scala/org/apache/texera/web/resource/dashboard/user/cluster/ClusterResource.scala @@ -0,0 +1,254 @@ +package org.apache.texera.web.resource.dashboard.user.cluster + +import org.apache.texera.auth.SessionUser +import org.apache.texera.dao.SqlServer +import org.apache.texera.dao.jooq.generated.enums.ClusterStatus +import org.apache.texera.dao.jooq.generated.tables.daos.ClusterDao +import org.apache.texera.dao.jooq.generated.tables.pojos.Cluster +import org.apache.texera.web.resource.dashboard.user.cluster.ClusterResource.{ + ERR_USER_HAS_NO_ACCESS_TO_CLUSTER_MESSAGE, + clusterDao, + context +} +import io.dropwizard.auth.Auth +import org.apache.texera.web.resource.dashboard.user.cluster.ClusterServiceClient.{ + callCreateClusterAPI, + callDeleteClusterAPI, + callPauseClusterAPI, + callResumeClusterAPI +} +import org.apache.texera.web.resource.dashboard.user.cluster.ClusterUtils.updateClusterStatus +import org.apache.texera.dao.jooq.generated.tables.Cluster.CLUSTER + +import java.util +import javax.annotation.security.RolesAllowed +import javax.ws.rs.{Consumes, ForbiddenException, GET, POST, Path, QueryParam} +import javax.ws.rs.core.{MediaType, Response} + +object ClusterResource { + final private lazy val context = SqlServer + .getInstance() + .createDSLContext() + final private lazy val clusterDao = new ClusterDao(context.configuration) + + // error messages + val ERR_USER_HAS_NO_ACCESS_TO_CLUSTER_MESSAGE = "User has no access to this cluster" +} + +@RolesAllowed(Array("REGULAR", "ADMIN")) +@Path("/cluster") +class ClusterResource { + + /** + * Launchs a new cluster and records the start time in cluster_activity. + * + * @param user The authenticated user creating the cluster. + * @param launchRequest The launch request containing cluster configuration. + * @return The created Cluster object. + */ + @POST + @Path("/launch") + @Consumes(Array(MediaType.APPLICATION_JSON)) + def launchCluster( + @Auth user: SessionUser, + launchRequest: ClusterLaunchRequest + ): Response = { + val cluster = new Cluster() + cluster.setName(launchRequest.name) + cluster.setOwnerId(user.getUid) + cluster.setMachineType(launchRequest.machineType) + cluster.setNumberOfMachines(launchRequest.numberOfMachines) + cluster.setStatus(ClusterStatus.LAUNCH_RECEIVED) + clusterDao.insert(cluster) + + // Call Go microservice to actually create the cluster + callCreateClusterAPI( + cluster.getCid, + launchRequest.machineType, + launchRequest.numberOfMachines + ) match { + case Right(goResponse) => + updateClusterStatus(cluster.getCid, ClusterStatus.PENDING, context) + Response.ok(clusterDao.fetchOneByCid(cluster.getCid)).build() + + case Left(errorMessage) => + updateClusterStatus(cluster.getCid, ClusterStatus.LAUNCH_FAILED, context) + Response + .status(Response.Status.INTERNAL_SERVER_ERROR) + .entity(s"Cluster creation failed: $errorMessage") + .build() + } + } + + /** + * Terminates a cluster and records the termination time in cluster_activity. + * + * @param user The authenticated user requesting the deletion. + * @param cluster The cluster to be deleted. + * @return A Response indicating the result of the operation. + */ + @POST + @Path("/terminate") + def terminateCluster(@Auth user: SessionUser, cluster: Cluster): Response = { + val clusterId = cluster.getCid + validateClusterOwnership(user, clusterId) + + updateClusterStatus(clusterId, ClusterStatus.TERMINATE_RECEIVED, context) + + // Call Go microservice to actually delete the cluster + callDeleteClusterAPI(clusterId) match { + case Right(goResponse) => + updateClusterStatus(clusterId, ClusterStatus.SHUTTING_DOWN, context) + Response.ok(goResponse).build() + + case Left(errorMessage) => + updateClusterStatus( + clusterId, + ClusterStatus.TERMINATE_FAILED, + context + ) + Response + .status(Response.Status.INTERNAL_SERVER_ERROR) + .entity(s"Cluster deletion failed: $errorMessage") + .build() + } + } + + /** + * Stops a cluster and records the stop time in cluster_activity. + * + * @param user The authenticated user requesting the pause. + * @param cluster The cluster to be paused. + * @return A Response indicating the result of the operation. + */ + @POST + @Path("/stop") + def stopCluster(@Auth user: SessionUser, cluster: Cluster): Response = { + val clusterId = cluster.getCid + validateClusterOwnership(user, clusterId) + + updateClusterStatus(clusterId, ClusterStatus.STOP_RECEIVED, context) + + callPauseClusterAPI(clusterId) match { + case Right(goResponse) => + updateClusterStatus(clusterId, ClusterStatus.STOPPING, context) + Response.ok(goResponse).build() + + case Left(errorMessage) => + updateClusterStatus( + clusterId, + ClusterStatus.STOP_FAILED, + context + ) + Response + .status(Response.Status.INTERNAL_SERVER_ERROR) + .entity(s"Cluster pause failed: $errorMessage") + .build() + } + + } + + /** + * Starts a stopped cluster and records the start time in cluster_activity. + * + * @param user The authenticated user requesting the resume. + * @param cluster The cluster to be resumed. + * @return A Response indicating the result of the operation. + */ + @POST + @Path("/start") + def startCluster(@Auth user: SessionUser, cluster: Cluster): Response = { + val clusterId = cluster.getCid + validateClusterOwnership(user, clusterId) + + updateClusterStatus(clusterId, ClusterStatus.START_RECEIVED, context) + + callResumeClusterAPI(clusterId) match { + case Right(goResponse) => + updateClusterStatus(clusterId, ClusterStatus.PENDING, context) + Response.ok(goResponse).build() + + case Left(errorMessage) => + updateClusterStatus( + clusterId, + ClusterStatus.START_FAILED, + context + ) + Response + .status(Response.Status.INTERNAL_SERVER_ERROR) + .entity(s"Cluster resume failed: $errorMessage") + .build() + } + + } + + /** + * Updates the name of a cluster. + * + * @param user The authenticated user requesting the update. + * @param cluster The cluster with the new name. + * @return A Response indicating the result of the operation. + */ + @POST + @Consumes(Array(MediaType.APPLICATION_JSON)) + @Path("/update/name") + def updateClusterName(@Auth user: SessionUser, cluster: Cluster): Response = { + validateClusterOwnership(user, cluster.getCid) + + context + .update(CLUSTER) + .set(CLUSTER.NAME, cluster.getName) + .where(CLUSTER.CID.eq(cluster.getCid)) + .execute() + + Response.ok().build() + } + + /** + * Lists all clusters owned by the authenticated user. + * + * @param user The authenticated user. + * @param available Boolean to indicate whether to return available (Launched) clusters only + * @return A list of Clusters owned by the user. + */ + @GET + @Path("") + def listClusters( + @Auth user: SessionUser, + @QueryParam("available") available: Boolean + ): util.List[Cluster] = { + clusterDao.fetchByOwnerId(user.getUid) + var steps = context + .select(CLUSTER.asterisk()) + .from(CLUSTER) + .where(CLUSTER.OWNER_ID.eq(user.getUid)) + .and(CLUSTER.STATUS.ne(ClusterStatus.TERMINATED)) + if (available) { + steps = steps.and(CLUSTER.STATUS.eq(ClusterStatus.RUNNING)) + } + steps.fetchInto(classOf[Cluster]) + + } + + /** + * Validates that the authenticated user has ownership of the cluster. + * + * @param user The authenticated user. + * @param clusterId The ID of the cluster to validate ownership. + */ + private def validateClusterOwnership(user: SessionUser, clusterId: Int): Unit = { + val clusterOwnerId = clusterDao.fetchOneByCid(clusterId).getOwnerId + if (clusterOwnerId != user.getUid) { + throw new ForbiddenException(ERR_USER_HAS_NO_ACCESS_TO_CLUSTER_MESSAGE) + } + } +} + +/** + * Request object for launching a cluster. + */ +case class ClusterLaunchRequest( + name: String, + machineType: String, + numberOfMachines: Integer +) diff --git a/amber/src/main/scala/org/apache/texera/web/resource/dashboard/user/cluster/ClusterServiceClient.scala b/amber/src/main/scala/org/apache/texera/web/resource/dashboard/user/cluster/ClusterServiceClient.scala new file mode 100755 index 00000000000..9cb0b6d7ed1 --- /dev/null +++ b/amber/src/main/scala/org/apache/texera/web/resource/dashboard/user/cluster/ClusterServiceClient.scala @@ -0,0 +1,107 @@ +package org.apache.texera.web.resource.dashboard.user.cluster + +import java.net.{HttpURLConnection, URL} +import scala.io.Source +import scala.util.{Failure, Success, Try} + +object ClusterServiceClient { + + /** + * Makes an HTTP POST request to create a cluster in the Go microservice. + * + * @param clusterId The id of the cluster. + * @param machineType The type of the machine for the cluster. + * @param numberOfMachines The number of machines in the cluster. + * @return Either an error message in Left, or the response body in Right. + */ + def callCreateClusterAPI( + clusterId: Int, + machineType: String, + numberOfMachines: Int + ): Either[String, String] = { + val url = new URL(s"http://cloudmapper-service:4000/api/cluster/create") + val jsonInputString = + s"""{ + |"provider": "aws", + |"machineType": "$machineType", + |"numberOfNodes": $numberOfMachines, + |"clusterId": $clusterId + |}""".stripMargin + + sendHttpRequest("POST", url, Some(jsonInputString)) + } + + /** + * Makes an HTTP DELETE request to delete a cluster in the Go microservice. + * + * @param clusterId The ID of the cluster to be deleted. + * @return Either an error message in Left, or the response body in Right. + */ + def callDeleteClusterAPI(clusterId: Int): Either[String, String] = { + val url = new URL(s"http://cloudmapper-service:4000/api/cluster/$clusterId") + sendHttpRequest("DELETE", url, None) + } + + /** + * Makes an HTTP DELETE request to delete a cluster in the Go microservice. + * + * @param clusterId The ID of the cluster to be paused. + * @return Either an error message in Left, or the response body in Right. + */ + def callPauseClusterAPI(clusterId: Int): Either[String, String] = { + val url = new URL(s"http://cloudmapper-service:4000/api/cluster/$clusterId") + sendHttpRequest("PUT", url, None) + } + + /** + * Makes an HTTP DELETE request to delete a cluster in the Go microservice. + * + * @param clusterId The ID of the cluster to be resumed. + * @return Either an error message in Left, or the response body in Right. + */ + def callResumeClusterAPI(clusterId: Int): Either[String, String] = { + val url = new URL(s"http://cloudmapper-service:4000/api/cluster/resume/$clusterId") + sendHttpRequest("POST", url, None) + } + + /** + * Helper function to send an HTTP request. + * + * @param method The HTTP method (e.g., POST, DELETE). + * @param url The URL for the HTTP request. + * @param jsonInputString The optional JSON payload for the request body (for POST requests). + * @return Either an error message in Left, or the response body in Right. + */ + private def sendHttpRequest( + method: String, + url: URL, + jsonInputString: Option[String] + ): Either[String, String] = { + Try { + val conn = url.openConnection().asInstanceOf[HttpURLConnection] + conn.setRequestMethod(method) + conn.setRequestProperty("Content-Type", "application/json") + conn.setDoOutput(jsonInputString.isDefined) + + jsonInputString.foreach { input => + val os = conn.getOutputStream + os.write(input.getBytes("UTF-8")) + os.close() + } + + val responseCode = conn.getResponseCode + val result = if (responseCode == HttpURLConnection.HTTP_OK) { + Right(Source.fromInputStream(conn.getInputStream).mkString) + } else { + Left(s"Failed: HTTP error code $responseCode") + } + + conn.disconnect() + result + } match { + case Success(result) => result + case Failure(exception) => + Left(s"Error: ${exception.getMessage}") + } + } +} diff --git a/amber/src/main/scala/org/apache/texera/web/resource/dashboard/user/cluster/ClusterUtils.scala b/amber/src/main/scala/org/apache/texera/web/resource/dashboard/user/cluster/ClusterUtils.scala new file mode 100755 index 00000000000..3ceb6cee879 --- /dev/null +++ b/amber/src/main/scala/org/apache/texera/web/resource/dashboard/user/cluster/ClusterUtils.scala @@ -0,0 +1,49 @@ +package org.apache.texera.web.resource.dashboard.user.cluster + +import org.apache.texera.dao.jooq.generated.enums.ClusterStatus + +import java.sql.Timestamp +import java.time.Instant +import org.jooq.impl.DSL.max +import org.jooq.DSLContext +import org.apache.texera.dao.jooq.generated.tables.Cluster.CLUSTER +import org.apache.texera.dao.jooq.generated.tables.ClusterActivity.CLUSTER_ACTIVITY + +object ClusterUtils { + + /** + * Updates the status of a cluster. + * + * @param clusterId The ID of the cluster. + * @param status The new status of the cluster. + */ + def updateClusterStatus(clusterId: Int, status: ClusterStatus, context: DSLContext): Unit = { + context + .update(CLUSTER) + .set(CLUSTER.STATUS, status) + .where(CLUSTER.CID.eq(clusterId)) + .execute() + } + + /** + * Updates the end time of the most recent cluster activity to the current time. + * + * @param clusterId The ID of the cluster. + */ + def updateClusterActivityEndTime(clusterId: Int, context: DSLContext): Unit = { + context + .update(CLUSTER_ACTIVITY) + .set(CLUSTER_ACTIVITY.END_TIME, Timestamp.from(Instant.now())) + .where(CLUSTER_ACTIVITY.CLUSTER_ID.eq(clusterId)) + .and( + CLUSTER_ACTIVITY.START_TIME.eq( + context + .select(max(CLUSTER_ACTIVITY.START_TIME)) + .from(CLUSTER_ACTIVITY) + .where(CLUSTER_ACTIVITY.CLUSTER_ID.eq(clusterId)) + .and(CLUSTER_ACTIVITY.END_TIME.isNull) + ) + ) + .execute() + } +} diff --git a/common/workflow-core/src/main/scala/org/apache/texera/amber/core/storage/model/DatasetDirectoryDocument.scala b/common/workflow-core/src/main/scala/org/apache/texera/amber/core/storage/model/DatasetDirectoryDocument.scala new file mode 100755 index 00000000000..e949dfceb31 --- /dev/null +++ b/common/workflow-core/src/main/scala/org/apache/texera/amber/core/storage/model/DatasetDirectoryDocument.scala @@ -0,0 +1,98 @@ +package org.apache.texera.amber.core.storage.model + +import org.apache.texera.amber.core.storage.util.dataset.GitVersionControlLocalFileStorage +import org.apache.texera.dao.SqlServer +import org.apache.texera.dao.jooq.generated.tables.Dataset.DATASET +import org.apache.texera.dao.jooq.generated.tables.User.USER +import org.apache.texera.dao.jooq.generated.tables.DatasetVersion.DATASET_VERSION +import org.apache.texera.dao.jooq.generated.tables.pojos.{Dataset, DatasetVersion} + +import java.io.{File, FileOutputStream, InputStream} +import java.nio.file.{Files, Path, Paths} + +class DatasetDirectoryDocument(fileFullPath: Path, shouldContainFile: Boolean = true) { + + private val context = SqlServer + .getInstance() + .createDSLContext() + private val (dataset, datasetVersion, fileRelativePath) = + resolvePath(fileFullPath, shouldContainFile) + private var tempFile: Option[File] = None + + private def getDatasetByName(ownerEmail: String, datasetName: String): Dataset = { + context + .select(DATASET.fields: _*) + .from(DATASET) + .leftJoin(USER) + .on(USER.UID.eq(DATASET.OWNER_UID)) + .where(USER.EMAIL.eq(ownerEmail)) + .and(DATASET.NAME.eq(datasetName)) + .fetchOneInto(classOf[Dataset]) + } + + private def getDatasetVersionByName(did: Integer, versionName: String): DatasetVersion = { + context + .selectFrom(DATASET_VERSION) + .where(DATASET_VERSION.DID.eq(did)) + .and(DATASET_VERSION.NAME.eq(versionName)) + .fetchOneInto(classOf[DatasetVersion]) + } + + def resolvePath( + path: java.nio.file.Path, + shouldContainFile: Boolean + ): (Dataset, DatasetVersion, Option[Path]) = { + val pathSegments = (0 until path.getNameCount).map(path.getName(_).toString).toArray + + val ownerEmail = pathSegments(0) + val datasetName = pathSegments(1) + val versionName = pathSegments(2) + + val fileRelativePath = + if (shouldContainFile) Some(Paths.get(pathSegments.drop(3).mkString("/"))) else None + + val dataset = getDatasetByName(ownerEmail, datasetName) + val datasetVersion = getDatasetVersionByName(dataset.getDid, versionName) + (dataset, datasetVersion, fileRelativePath) + } + + def asFile(): File = { + tempFile match { + case Some(file) => file + case None => + val tempFilePath = Files.createTempFile("versionedFile", ".tmp") + val tempFileStream = new FileOutputStream(tempFilePath.toFile) + val inputStream = asInputStream() + + val buffer = new Array[Byte](1024) + + // Create an iterator to repeatedly call inputStream.read, and direct buffered data to file + Iterator + .continually(inputStream.read(buffer)) + .takeWhile(_ != -1) + .foreach(tempFileStream.write(buffer, 0, _)) + + inputStream.close() + tempFileStream.close() + + val file = tempFilePath.toFile + tempFile = Some(file) + file + } + } + + def asInputStream(): InputStream = { + val datasetAbsolutePath = PathUtils.getDatasetPath(dataset.getDid) + GitVersionControlLocalFileStorage + .retrieveFileContentOfVersionAsInputStream( + datasetAbsolutePath, + datasetVersion.getVersionHash, + datasetAbsolutePath.resolve(fileRelativePath.get) + ) + } + + def asDirectory(): String = { + + PathUtils.getDatasetPath(dataset.getDid).toString + } +} diff --git a/common/workflow-core/src/main/scala/org/apache/texera/amber/core/storage/model/PathUtils.scala b/common/workflow-core/src/main/scala/org/apache/texera/amber/core/storage/model/PathUtils.scala new file mode 100755 index 00000000000..2a0cccd472f --- /dev/null +++ b/common/workflow-core/src/main/scala/org/apache/texera/amber/core/storage/model/PathUtils.scala @@ -0,0 +1,35 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.texera.amber.core.storage.model + +import java.nio.file.Path + +object PathUtils { + lazy val datasetsRootPath: Path = + Path + .of(sys.env.getOrElse("TEXERA_HOME", ".")) + .resolve("amber") + .resolve("user-resources") + .resolve("datasets") + + def getDatasetPath(did: Integer): Path = { + datasetsRootPath.resolve(did.toString) + } +} diff --git a/frontend/src/app/app-routing.module.ts b/frontend/src/app/app-routing.module.ts index 179caf5c088..ab585a9e930 100644 --- a/frontend/src/app/app-routing.module.ts +++ b/frontend/src/app/app-routing.module.ts @@ -25,6 +25,7 @@ import { UserQuotaComponent } from "./dashboard/component/user/user-quota/user-q import { UserProjectSectionComponent } from "./dashboard/component/user/user-project/user-project-section/user-project-section.component"; import { UserProjectComponent } from "./dashboard/component/user/user-project/user-project.component"; import { UserComputingUnitComponent } from "./dashboard/component/user/user-computing-unit/user-computing-unit.component"; +import { ClusterComponent } from "./dashboard/component/user/cluster/cluster.component"; import { WorkspaceComponent } from "./workspace/component/workspace.component"; import { AboutComponent } from "./hub/component/about/about.component"; import { AuthGuardService } from "./common/service/user/auth-guard.service"; @@ -135,6 +136,10 @@ routes.push({ path: "compute", component: UserComputingUnitComponent, }, + { + path: "cluster", + component: ClusterComponent, + }, { path: "quota", component: UserQuotaComponent, diff --git a/frontend/src/app/app.module.ts b/frontend/src/app/app.module.ts index 6006fb22ad4..0e905bf61bd 100644 --- a/frontend/src/app/app.module.ts +++ b/frontend/src/app/app.module.ts @@ -190,6 +190,12 @@ import { NzCheckboxModule } from "ng-zorro-antd/checkbox"; import { RegistrationRequestModalComponent } from "./common/service/user/registration-request-modal/registration-request-modal.component"; import { UserComputingUnitComponent } from "./dashboard/component/user/user-computing-unit/user-computing-unit.component"; import { UserComputingUnitListItemComponent } from "./dashboard/component/user/user-computing-unit/user-computing-unit-list-item/user-computing-unit-list-item.component"; +import { DirectoryPathInputComponent } from "./workspace/component/directory-path-input/directory-path-input.component"; +import { DirectorySelectionComponent } from "./workspace/component/directory-selection/directory-selection.component"; +import { ClusterComponent } from "./dashboard/component/user/cluster/cluster.component"; +import { ClusterManagementModalComponent } from "./dashboard/component/user/cluster/cluster-management-modal/cluster-management-modal.component"; +import { ClusterSelectionComponent } from "./workspace/component/cluster-selection/cluster-selection.component"; +import { ClusterAutoCompleteComponent } from "./workspace/component/cluster-autocomplete/cluster-autocomplete.component"; registerLocaleData(en); @@ -290,6 +296,12 @@ registerLocaleData(en); MarkdownDescriptionComponent, UserComputingUnitComponent, UserComputingUnitListItemComponent, + DirectoryPathInputComponent, + DirectorySelectionComponent, + ClusterComponent, + ClusterManagementModalComponent, + ClusterSelectionComponent, + ClusterAutoCompleteComponent, ], imports: [ BrowserModule, diff --git a/frontend/src/app/common/formly/formly-config.ts b/frontend/src/app/common/formly/formly-config.ts index c3995abb544..ccebf25b797 100644 --- a/frontend/src/app/common/formly/formly-config.ts +++ b/frontend/src/app/common/formly/formly-config.ts @@ -28,6 +28,8 @@ import { DatasetFileSelectorComponent } from "../../workspace/component/dataset- import { CollabWrapperComponent } from "./collab-wrapper/collab-wrapper/collab-wrapper.component"; import { FormlyRepeatDndComponent } from "./repeat-dnd/repeat-dnd.component"; import { DatasetVersionSelectorComponent } from "../../workspace/component/dataset-version-selector/dataset-version-selector.component"; +import { DirectoryPathInputComponent } from "../../workspace/component/directory-path-input/directory-path-input.component"; +import { ClusterAutoCompleteComponent } from "src/app/workspace/component/cluster-autocomplete/cluster-autocomplete.component"; /** * Configuration for using Json Schema with Formly. @@ -80,6 +82,8 @@ export const TEXERA_FORMLY_CONFIG = { { name: "inputautocomplete", component: DatasetFileSelectorComponent, wrappers: ["form-field"] }, { name: "datasetversionselector", component: DatasetVersionSelectorComponent, wrappers: ["form-field"] }, { name: "repeat-section-dnd", component: FormlyRepeatDndComponent }, + { name: "clusterautocomplete", component: ClusterAutoCompleteComponent, wrappers: ["form-field"] }, + { name: "directorypathinput", component: DirectoryPathInputComponent, wrappers: ["form-field"] }, ], wrappers: [ { name: "preset-wrapper", component: PresetWrapperComponent }, diff --git a/frontend/src/app/common/service/cluster/cluster.service.ts b/frontend/src/app/common/service/cluster/cluster.service.ts new file mode 100755 index 00000000000..8049eca37ff --- /dev/null +++ b/frontend/src/app/common/service/cluster/cluster.service.ts @@ -0,0 +1,45 @@ +import { Injectable } from "@angular/core"; +import { HttpClient } from "@angular/common/http"; +import { Observable } from "rxjs"; +import { Clusters } from "../../../dashboard/type/clusters"; +import { AppSettings } from "../../app-setting"; + +@Injectable({ + providedIn: "root", +}) +export class ClusterService { + public CLUSTER_BASE_URL = "cluster"; + public CLUSTER_LAUNCH_URL = this.CLUSTER_BASE_URL + "/launch"; + public CLUSTER_TERMINATE_URL = this.CLUSTER_BASE_URL + "/terminate"; + public CLUSTER_STOP_URL = this.CLUSTER_BASE_URL + "/stop"; + public CLUSTER_START_URL = this.CLUSTER_BASE_URL + "/start"; + public CLUSTER_UPDATE_URL = this.CLUSTER_BASE_URL + "/update/name"; + + constructor(private http: HttpClient) {} + + getClusters(available = false): Observable { + return this.http.get(`${AppSettings.getApiEndpoint()}/${this.CLUSTER_BASE_URL}`, { + params: { available }, + }); + } + + launchCluster(clusterConfig: { name: string; machineType: string; numberOfMachines: number }): Observable { + return this.http.post(`${AppSettings.getApiEndpoint()}/${this.CLUSTER_LAUNCH_URL}`, clusterConfig); + } + + terminateCluster(cluster: Clusters): Observable { + return this.http.post(`${AppSettings.getApiEndpoint()}/${this.CLUSTER_TERMINATE_URL}`, cluster); + } + + stopCluster(cluster: Clusters): Observable { + return this.http.post(`${AppSettings.getApiEndpoint()}/${this.CLUSTER_STOP_URL}`, cluster); + } + + startCluster(cluster: Clusters): Observable { + return this.http.post(`${AppSettings.getApiEndpoint()}/${this.CLUSTER_START_URL}`, cluster); + } + + updateCluster(cluster: Clusters): Observable { + return this.http.post(`${AppSettings.getApiEndpoint()}/${this.CLUSTER_UPDATE_URL}`, cluster); + } +} diff --git a/frontend/src/app/common/type/gui-config.ts b/frontend/src/app/common/type/gui-config.ts index d8786c1dc08..72e3401fccc 100644 --- a/frontend/src/app/common/type/gui-config.ts +++ b/frontend/src/app/common/type/gui-config.ts @@ -54,6 +54,7 @@ export interface SidebarTabs { projects_enabled: boolean; workflows_enabled: boolean; datasets_enabled: boolean; + cluster_enabled: boolean; quota_enabled: boolean; forum_enabled: boolean; about_enabled: boolean; diff --git a/frontend/src/app/dashboard/component/admin/settings/admin-settings.component.html b/frontend/src/app/dashboard/component/admin/settings/admin-settings.component.html index a0ea29e77b5..0c395c2f17d 100644 --- a/frontend/src/app/dashboard/component/admin/settings/admin-settings.component.html +++ b/frontend/src/app/dashboard/component/admin/settings/admin-settings.component.html @@ -190,6 +190,15 @@

General Settings

+ +

Cluster Information

- +
Name: - {{ selectedCluster.name }} + {{ cluster.name }}
Machine Type: - {{ selectedCluster.machineType }} + {{ cluster.machineType }}
Machines: - {{ selectedCluster.numberOfMachines }} + {{ cluster.numberOfMachines }}
Status: - {{ selectedCluster.status }} + {{ cluster.status }}
Created At: @@ -49,7 +49,7 @@

Cluster Information

class="info-value" nz-tooltip [nzTooltipTitle]="createdAtTooltip"> - {{ selectedCluster.creationTime | date:'medium' }} + {{ cluster.creationTime | date:'medium' }}
diff --git a/frontend/src/app/workspace/component/cluster-selection/cluster-selection.component.ts b/frontend/src/app/workspace/component/cluster-selection/cluster-selection.component.ts index 85754d207fb..c7a8afe0367 100755 --- a/frontend/src/app/workspace/component/cluster-selection/cluster-selection.component.ts +++ b/frontend/src/app/workspace/component/cluster-selection/cluster-selection.component.ts @@ -5,6 +5,7 @@ import { Clusters } from "src/app/dashboard/type/clusters"; @UntilDestroy() @Component({ + standalone: false, selector: "texera-cluster-selection", templateUrl: "cluster-selection.component.html", styleUrls: ["cluster-selection.component.scss"], diff --git a/frontend/src/app/workspace/component/directory-path-input/directory-path-input.component.ts b/frontend/src/app/workspace/component/directory-path-input/directory-path-input.component.ts index 59deb3e9164..388f891dc25 100755 --- a/frontend/src/app/workspace/component/directory-path-input/directory-path-input.component.ts +++ b/frontend/src/app/workspace/component/directory-path-input/directory-path-input.component.ts @@ -10,6 +10,7 @@ import { DatasetService } from "../../../dashboard/service/user/dataset/dataset. @UntilDestroy() @Component({ + standalone: false, selector: "texera-directory-path-input", templateUrl: "./directory-path-input.component.html", styleUrls: ["./directory-path-input.component.scss"], diff --git a/frontend/src/app/workspace/component/directory-selection/directory-selection.component.ts b/frontend/src/app/workspace/component/directory-selection/directory-selection.component.ts index ee17ccb2c1a..799fc7dd0aa 100755 --- a/frontend/src/app/workspace/component/directory-selection/directory-selection.component.ts +++ b/frontend/src/app/workspace/component/directory-selection/directory-selection.component.ts @@ -8,6 +8,7 @@ import { DatasetService } from "../../../dashboard/service/user/dataset/dataset. @UntilDestroy() @Component({ + standalone: false, selector: "texera-directory-selection-modal", templateUrl: "directory-selection.component.html", styleUrls: ["directory-selection.component.scss"], From d8e67364ef612fb8cba873493f34e5e0dee8ece3 Mon Sep 17 00:00:00 2001 From: ali risheh Date: Thu, 7 May 2026 09:59:40 -0700 Subject: [PATCH 10/13] Revert "Feat/cloudbiomapper" --- .../texera/web/TexeraWebApplication.scala | 6 - .../cluster/ClusterCallbackResource.scala | 198 -------------- .../user/cluster/ClusterResource.scala | 254 ----------------- .../user/cluster/ClusterServiceClient.scala | 107 -------- .../dashboard/user/cluster/ClusterUtils.scala | 49 ---- bin/k8s/templates/cloudmapper-pvc.yaml | 13 - bin/k8s/templates/cloudmapper.yaml | 82 ------ .../src/main/resources/application.conf | 5 - common/config/src/main/resources/default.conf | 3 - .../amber/config/EnvironmentalVariable.scala | 2 - .../amber/core/storage/DocumentFactory.scala | 7 +- .../amber/core/storage/FileResolver.scala | 90 ------ .../model/DatasetDirectoryDocument.scala | 98 ------- .../storage/model/DatasetFileDocument.scala | 258 ++---------------- .../amber/core/storage/model/PathUtils.scala | 35 --- .../cloudmapper/CloudMapperSourceOpDesc.scala | 243 ----------------- .../cloudmapper/ReferenceGenome.scala | 46 ---- .../cloudmapper/ReferenceGenomeEnum.java | 26 -- .../texera/amber/operator/LogicalOp.scala | 7 +- frontend/src/app/app-routing.module.ts | 5 - frontend/src/app/app.module.ts | 12 - .../src/app/common/formly/formly-config.ts | 4 - .../common/service/cluster/cluster.service.ts | 45 --- frontend/src/app/common/type/gui-config.ts | 1 - .../settings/admin-settings.component.html | 9 - .../settings/admin-settings.component.ts | 1 - .../component/dashboard.component.html | 12 - .../component/dashboard.component.ts | 1 - .../cluster-management-modal.component.html | 61 ----- .../cluster-management-modal.component.scss | 106 ------- .../cluster-management-modal.component.ts | 83 ------ .../user/cluster/cluster.component.html | 112 -------- .../user/cluster/cluster.component.scss | 43 --- .../user/cluster/cluster.component.ts | 195 ------------- frontend/src/app/dashboard/type/clusters.ts | 9 - .../cluster-autocomplete.component.html | 15 - .../cluster-autocomplete.component.scss | 3 - .../cluster-autocomplete.component.ts | 48 ---- .../cluster-selection.component.html | 75 ----- .../cluster-selection.component.scss | 80 ------ .../cluster-selection.component.ts | 57 ---- .../directory-path-input.component.html | 23 -- .../directory-path-input.component.scss | 32 --- .../directory-path-input.component.ts | 55 ---- .../directory-selection.component.html | 64 ----- .../directory-selection.component.scss | 71 ----- .../directory-selection.component.ts | 108 -------- .../operator-property-edit-frame.component.ts | 18 +- .../src/assets/logos/dknet-favicon-32x32.png | Bin 2085 -> 0 bytes frontend/src/assets/logos/dknet-logo.png | Bin 153082 -> 0 bytes .../assets/operator_images/CloudBioMapper.png | Bin 55237 -> 0 bytes .../assets/sequence-alignment-workflow.png | Bin 467023 -> 0 bytes sql/updates/cluster.sql | 42 --- 53 files changed, 21 insertions(+), 2898 deletions(-) delete mode 100755 amber/src/main/scala/org/apache/texera/web/resource/dashboard/user/cluster/ClusterCallbackResource.scala delete mode 100755 amber/src/main/scala/org/apache/texera/web/resource/dashboard/user/cluster/ClusterResource.scala delete mode 100755 amber/src/main/scala/org/apache/texera/web/resource/dashboard/user/cluster/ClusterServiceClient.scala delete mode 100755 amber/src/main/scala/org/apache/texera/web/resource/dashboard/user/cluster/ClusterUtils.scala delete mode 100755 bin/k8s/templates/cloudmapper-pvc.yaml delete mode 100755 bin/k8s/templates/cloudmapper.yaml delete mode 100755 common/workflow-core/src/main/scala/org/apache/texera/amber/core/storage/model/DatasetDirectoryDocument.scala delete mode 100755 common/workflow-core/src/main/scala/org/apache/texera/amber/core/storage/model/PathUtils.scala delete mode 100755 common/workflow-operator/src/main/scala/org/apache/amber/operator/cloudmapper/CloudMapperSourceOpDesc.scala delete mode 100755 common/workflow-operator/src/main/scala/org/apache/amber/operator/cloudmapper/ReferenceGenome.scala delete mode 100755 common/workflow-operator/src/main/scala/org/apache/amber/operator/cloudmapper/ReferenceGenomeEnum.java delete mode 100755 frontend/src/app/common/service/cluster/cluster.service.ts delete mode 100755 frontend/src/app/dashboard/component/user/cluster/cluster-management-modal/cluster-management-modal.component.html delete mode 100755 frontend/src/app/dashboard/component/user/cluster/cluster-management-modal/cluster-management-modal.component.scss delete mode 100755 frontend/src/app/dashboard/component/user/cluster/cluster-management-modal/cluster-management-modal.component.ts delete mode 100755 frontend/src/app/dashboard/component/user/cluster/cluster.component.html delete mode 100755 frontend/src/app/dashboard/component/user/cluster/cluster.component.scss delete mode 100755 frontend/src/app/dashboard/component/user/cluster/cluster.component.ts delete mode 100755 frontend/src/app/dashboard/type/clusters.ts delete mode 100755 frontend/src/app/workspace/component/cluster-autocomplete/cluster-autocomplete.component.html delete mode 100755 frontend/src/app/workspace/component/cluster-autocomplete/cluster-autocomplete.component.scss delete mode 100755 frontend/src/app/workspace/component/cluster-autocomplete/cluster-autocomplete.component.ts delete mode 100755 frontend/src/app/workspace/component/cluster-selection/cluster-selection.component.html delete mode 100755 frontend/src/app/workspace/component/cluster-selection/cluster-selection.component.scss delete mode 100755 frontend/src/app/workspace/component/cluster-selection/cluster-selection.component.ts delete mode 100755 frontend/src/app/workspace/component/directory-path-input/directory-path-input.component.html delete mode 100755 frontend/src/app/workspace/component/directory-path-input/directory-path-input.component.scss delete mode 100755 frontend/src/app/workspace/component/directory-path-input/directory-path-input.component.ts delete mode 100755 frontend/src/app/workspace/component/directory-selection/directory-selection.component.html delete mode 100755 frontend/src/app/workspace/component/directory-selection/directory-selection.component.scss delete mode 100755 frontend/src/app/workspace/component/directory-selection/directory-selection.component.ts delete mode 100755 frontend/src/assets/logos/dknet-favicon-32x32.png delete mode 100755 frontend/src/assets/logos/dknet-logo.png delete mode 100755 frontend/src/assets/operator_images/CloudBioMapper.png delete mode 100755 frontend/src/assets/sequence-alignment-workflow.png delete mode 100644 sql/updates/cluster.sql diff --git a/amber/src/main/scala/org/apache/texera/web/TexeraWebApplication.scala b/amber/src/main/scala/org/apache/texera/web/TexeraWebApplication.scala index e48bd8e7b67..98b7c68c974 100644 --- a/amber/src/main/scala/org/apache/texera/web/TexeraWebApplication.scala +++ b/amber/src/main/scala/org/apache/texera/web/TexeraWebApplication.scala @@ -45,10 +45,6 @@ import org.apache.texera.web.resource.dashboard.user.project.{ ProjectResource, PublicProjectResource } -import org.apache.texera.web.resource.dashboard.user.cluster.{ - ClusterResource, - ClusterCallbackResource -} import org.apache.texera.web.resource.dashboard.user.quota.UserQuotaResource import org.apache.texera.web.resource.dashboard.user.workflow.{ WorkflowAccessResource, @@ -164,8 +160,6 @@ class TexeraWebApplication environment.jersey.register(classOf[UserQuotaResource]) environment.jersey.register(classOf[AdminSettingsResource]) environment.jersey.register(classOf[AIAssistantResource]) - environment.jersey.register(classOf[ClusterResource]) - environment.jersey.register(classOf[ClusterCallbackResource]) AuthResource.createAdminUser() diff --git a/amber/src/main/scala/org/apache/texera/web/resource/dashboard/user/cluster/ClusterCallbackResource.scala b/amber/src/main/scala/org/apache/texera/web/resource/dashboard/user/cluster/ClusterCallbackResource.scala deleted file mode 100755 index 310fecf608d..00000000000 --- a/amber/src/main/scala/org/apache/texera/web/resource/dashboard/user/cluster/ClusterCallbackResource.scala +++ /dev/null @@ -1,198 +0,0 @@ -package org.apache.texera.web.resource.dashboard.user.cluster - -import org.apache.texera.dao.SqlServer -import org.apache.texera.dao.jooq.generated.enums.ClusterStatus -import org.apache.texera.dao.jooq.generated.tables.daos.{ClusterActivityDao, ClusterDao} -import org.apache.texera.dao.jooq.generated.tables.pojos.ClusterActivity -import org.apache.texera.dao.jooq.generated.tables.Cluster.CLUSTER - -import javax.ws.rs.{Consumes, POST, Path} -import javax.ws.rs.core.{MediaType, Response} -import org.apache.texera.web.resource.dashboard.user.cluster.ClusterUtils.{ - updateClusterActivityEndTime, - updateClusterStatus -} -import org.apache.texera.web.resource.dashboard.user.cluster.ClusterCallbackResource.{ - clusterActivityDao, - clusterDao, - context -} -import org.jooq.impl.DSL - -import java.sql.Timestamp - -object ClusterCallbackResource { - final private val context = SqlServer - .getInstance() - .createDSLContext() - final private lazy val clusterDao = new ClusterDao(context.configuration) - final private lazy val clusterActivityDao = new ClusterActivityDao(context.configuration) - - // error messages - val ERR_USER_HAS_NO_ACCESS_TO_CLUSTER_MESSAGE = "User has no access to this cluster" -} - -@Path("/callback") -class ClusterCallbackResource { - - @POST - @Path("/cluster/created") - @Consumes(Array(MediaType.APPLICATION_JSON)) - def handleClusterCreatedCallback(callbackPayload: CallbackPayload): Response = { - val clusterId = callbackPayload.clusterId - val success = callbackPayload.success - - val cluster = clusterDao.fetchOneByCid(clusterId) - if (cluster == null) { - return Response - .status(Response.Status.NOT_FOUND) - .entity(s"Cluster $clusterId not found") - .build() - } - - if (success && cluster.getStatus == ClusterStatus.PENDING) { - updateClusterStatus(clusterId, ClusterStatus.RUNNING, context) - insertClusterActivity(cluster.getCid, cluster.getCreationTime) - Response.ok("Cluster status updated to RUNNING").build() - } else if (!success) { - // Cluster launch failed — mark it so the UI doesn't stay stuck on PENDING. - updateClusterStatus(clusterId, ClusterStatus.LAUNCH_FAILED, context) - Response.ok("Cluster status updated to LAUNCH_FAILED").build() - } else { - Response - .status(Response.Status.CONFLICT) - .entity(s"Cluster $clusterId status update not allowed (current: ${cluster.getStatus})") - .build() - } - } - - @POST - @Path("/cluster/deleted") - @Consumes(Array(MediaType.APPLICATION_JSON)) - def handleClusterDeletedCallback(callbackPayload: CallbackPayload): Response = { - val clusterId = callbackPayload.clusterId - val success = callbackPayload.success - - val cluster = clusterDao.fetchOneByCid(clusterId) - if (success && cluster != null && cluster.getStatus == ClusterStatus.SHUTTING_DOWN) { - updateClusterStatus(clusterId, ClusterStatus.TERMINATED, context) - updateClusterActivityEndTime(clusterId, context) - Response - .ok(s"Cluster with ID $clusterId marked as TERMINATED and activity end time updated") - .build() - } else { - Response - .status(Response.Status.NOT_FOUND) - .entity("Cluster not found or status update not allowed") - .build() - } - } - - @POST - @Path("/cluster/paused") - @Consumes(Array(MediaType.APPLICATION_JSON)) - def handleClusterPausedCallback(callbackPayload: CallbackPayload): Response = { - val clusterId = callbackPayload.clusterId - val success = callbackPayload.success - - val cluster = clusterDao.fetchOneByCid(clusterId) - if (success && cluster != null && cluster.getStatus == ClusterStatus.STOPPING) { - updateClusterStatus(clusterId, ClusterStatus.STOPPED, context) - updateClusterActivityEndTime(clusterId, context) - Response - .ok(s"Cluster with ID $clusterId marked as STOPPED and activity end time updated") - .build() - } else { - Response - .status(Response.Status.NOT_FOUND) - .entity("Cluster not found or status update not allowed") - .build() - } - } - - @POST - @Path("/cluster/resumed") - @Consumes(Array(MediaType.APPLICATION_JSON)) - def handleClusterResumedCallback(callbackPayload: CallbackPayload): Response = { - val clusterId = callbackPayload.clusterId - val success = callbackPayload.success - // Update the cluster status to LAUNCHED in the database - val cluster = clusterDao.fetchOneByCid(clusterId) - if (success && cluster != null && cluster.getStatus == ClusterStatus.PENDING) { - updateClusterStatus(clusterId, ClusterStatus.RUNNING, context) - insertClusterActivity(cluster.getCid, cluster.getCreationTime) - Response.ok("Cluster status updated to RUNNING").build() - } else { - Response - .status(Response.Status.NOT_FOUND) - .entity("Cluster not found or status update not allowed") - .build() - } - } - - @POST - @Path("/cluster/getid") - @Consumes(Array(MediaType.APPLICATION_JSON)) - def handleClusterGetIdCallback(callbackPayload: CallbackPayload): Response = { - val maxCidResult = context - .select(DSL.max(CLUSTER.CID)) - .from(CLUSTER) - .fetchOne() - - val maxCid = maxCidResult.getValue(0, classOf[Integer]) - - if (maxCid == null) { - Response.ok("Next cluster ID is 1").entity(1).build() - } else { - val nextCid = maxCid + 1 - Response.ok(s"Next cluster ID is $nextCid").entity(nextCid).build() - } - } - - /** - * Handles the callback to change the cluster status to SHUTTING_DOWN. - * - * @param callbackPayload The payload containing the cluster ID and success status. - * @return A Response indicating the result of the operation. - */ - @POST - @Path("/cluster/shutdown") - @Consumes(Array(MediaType.APPLICATION_JSON)) - def handleClusterShutdownCallback(callbackPayload: CallbackPayload): Response = { - val clusterId = callbackPayload.clusterId - val success = callbackPayload.success - - // Fetch the cluster by its ID - val cluster = clusterDao.fetchOneByCid(clusterId) - - // Check if the operation is successful and the cluster exists - if (success && cluster != null) { - // Update the cluster status to SHUTTING_DOWN - updateClusterStatus(clusterId, ClusterStatus.SHUTTING_DOWN, context) - // Return a success response - Response.ok(s"Cluster with ID $clusterId status updated to SHUTTING_DOWN").build() - } else { - // Return a NOT_FOUND response if the cluster is not found or the status update is not allowed - Response - .status(Response.Status.NOT_FOUND) - .entity("Cluster not found or status update not allowed") - .build() - } - } - - /** - * Inserts a new cluster activity record with the given start time. - * - * @param clusterId The ID of the cluster. - * @param startTime The start time of the activity. - */ - private def insertClusterActivity(clusterId: Int, startTime: Timestamp): Unit = { - val clusterActivity = new ClusterActivity() - clusterActivity.setClusterId(clusterId) - clusterActivity.setStartTime(startTime) - clusterActivityDao.insert(clusterActivity) - } -} - -// Define the payload structure expected from the Go service -case class CallbackPayload(clusterId: Int, success: Boolean) diff --git a/amber/src/main/scala/org/apache/texera/web/resource/dashboard/user/cluster/ClusterResource.scala b/amber/src/main/scala/org/apache/texera/web/resource/dashboard/user/cluster/ClusterResource.scala deleted file mode 100755 index a96ff46ff6a..00000000000 --- a/amber/src/main/scala/org/apache/texera/web/resource/dashboard/user/cluster/ClusterResource.scala +++ /dev/null @@ -1,254 +0,0 @@ -package org.apache.texera.web.resource.dashboard.user.cluster - -import org.apache.texera.auth.SessionUser -import org.apache.texera.dao.SqlServer -import org.apache.texera.dao.jooq.generated.enums.ClusterStatus -import org.apache.texera.dao.jooq.generated.tables.daos.ClusterDao -import org.apache.texera.dao.jooq.generated.tables.pojos.Cluster -import org.apache.texera.web.resource.dashboard.user.cluster.ClusterResource.{ - ERR_USER_HAS_NO_ACCESS_TO_CLUSTER_MESSAGE, - clusterDao, - context -} -import io.dropwizard.auth.Auth -import org.apache.texera.web.resource.dashboard.user.cluster.ClusterServiceClient.{ - callCreateClusterAPI, - callDeleteClusterAPI, - callPauseClusterAPI, - callResumeClusterAPI -} -import org.apache.texera.web.resource.dashboard.user.cluster.ClusterUtils.updateClusterStatus -import org.apache.texera.dao.jooq.generated.tables.Cluster.CLUSTER - -import java.util -import javax.annotation.security.RolesAllowed -import javax.ws.rs.{Consumes, ForbiddenException, GET, POST, Path, QueryParam} -import javax.ws.rs.core.{MediaType, Response} - -object ClusterResource { - final private lazy val context = SqlServer - .getInstance() - .createDSLContext() - final private lazy val clusterDao = new ClusterDao(context.configuration) - - // error messages - val ERR_USER_HAS_NO_ACCESS_TO_CLUSTER_MESSAGE = "User has no access to this cluster" -} - -@RolesAllowed(Array("REGULAR", "ADMIN")) -@Path("/cluster") -class ClusterResource { - - /** - * Launchs a new cluster and records the start time in cluster_activity. - * - * @param user The authenticated user creating the cluster. - * @param launchRequest The launch request containing cluster configuration. - * @return The created Cluster object. - */ - @POST - @Path("/launch") - @Consumes(Array(MediaType.APPLICATION_JSON)) - def launchCluster( - @Auth user: SessionUser, - launchRequest: ClusterLaunchRequest - ): Response = { - val cluster = new Cluster() - cluster.setName(launchRequest.name) - cluster.setOwnerId(user.getUid) - cluster.setMachineType(launchRequest.machineType) - cluster.setNumberOfMachines(launchRequest.numberOfMachines) - cluster.setStatus(ClusterStatus.LAUNCH_RECEIVED) - clusterDao.insert(cluster) - - // Call Go microservice to actually create the cluster - callCreateClusterAPI( - cluster.getCid, - launchRequest.machineType, - launchRequest.numberOfMachines - ) match { - case Right(goResponse) => - updateClusterStatus(cluster.getCid, ClusterStatus.PENDING, context) - Response.ok(clusterDao.fetchOneByCid(cluster.getCid)).build() - - case Left(errorMessage) => - updateClusterStatus(cluster.getCid, ClusterStatus.LAUNCH_FAILED, context) - Response - .status(Response.Status.INTERNAL_SERVER_ERROR) - .entity(s"Cluster creation failed: $errorMessage") - .build() - } - } - - /** - * Terminates a cluster and records the termination time in cluster_activity. - * - * @param user The authenticated user requesting the deletion. - * @param cluster The cluster to be deleted. - * @return A Response indicating the result of the operation. - */ - @POST - @Path("/terminate") - def terminateCluster(@Auth user: SessionUser, cluster: Cluster): Response = { - val clusterId = cluster.getCid - validateClusterOwnership(user, clusterId) - - updateClusterStatus(clusterId, ClusterStatus.TERMINATE_RECEIVED, context) - - // Call Go microservice to actually delete the cluster - callDeleteClusterAPI(clusterId) match { - case Right(goResponse) => - updateClusterStatus(clusterId, ClusterStatus.SHUTTING_DOWN, context) - Response.ok(goResponse).build() - - case Left(errorMessage) => - updateClusterStatus( - clusterId, - ClusterStatus.TERMINATE_FAILED, - context - ) - Response - .status(Response.Status.INTERNAL_SERVER_ERROR) - .entity(s"Cluster deletion failed: $errorMessage") - .build() - } - } - - /** - * Stops a cluster and records the stop time in cluster_activity. - * - * @param user The authenticated user requesting the pause. - * @param cluster The cluster to be paused. - * @return A Response indicating the result of the operation. - */ - @POST - @Path("/stop") - def stopCluster(@Auth user: SessionUser, cluster: Cluster): Response = { - val clusterId = cluster.getCid - validateClusterOwnership(user, clusterId) - - updateClusterStatus(clusterId, ClusterStatus.STOP_RECEIVED, context) - - callPauseClusterAPI(clusterId) match { - case Right(goResponse) => - updateClusterStatus(clusterId, ClusterStatus.STOPPING, context) - Response.ok(goResponse).build() - - case Left(errorMessage) => - updateClusterStatus( - clusterId, - ClusterStatus.STOP_FAILED, - context - ) - Response - .status(Response.Status.INTERNAL_SERVER_ERROR) - .entity(s"Cluster pause failed: $errorMessage") - .build() - } - - } - - /** - * Starts a stopped cluster and records the start time in cluster_activity. - * - * @param user The authenticated user requesting the resume. - * @param cluster The cluster to be resumed. - * @return A Response indicating the result of the operation. - */ - @POST - @Path("/start") - def startCluster(@Auth user: SessionUser, cluster: Cluster): Response = { - val clusterId = cluster.getCid - validateClusterOwnership(user, clusterId) - - updateClusterStatus(clusterId, ClusterStatus.START_RECEIVED, context) - - callResumeClusterAPI(clusterId) match { - case Right(goResponse) => - updateClusterStatus(clusterId, ClusterStatus.PENDING, context) - Response.ok(goResponse).build() - - case Left(errorMessage) => - updateClusterStatus( - clusterId, - ClusterStatus.START_FAILED, - context - ) - Response - .status(Response.Status.INTERNAL_SERVER_ERROR) - .entity(s"Cluster resume failed: $errorMessage") - .build() - } - - } - - /** - * Updates the name of a cluster. - * - * @param user The authenticated user requesting the update. - * @param cluster The cluster with the new name. - * @return A Response indicating the result of the operation. - */ - @POST - @Consumes(Array(MediaType.APPLICATION_JSON)) - @Path("/update/name") - def updateClusterName(@Auth user: SessionUser, cluster: Cluster): Response = { - validateClusterOwnership(user, cluster.getCid) - - context - .update(CLUSTER) - .set(CLUSTER.NAME, cluster.getName) - .where(CLUSTER.CID.eq(cluster.getCid)) - .execute() - - Response.ok().build() - } - - /** - * Lists all clusters owned by the authenticated user. - * - * @param user The authenticated user. - * @param available Boolean to indicate whether to return available (Launched) clusters only - * @return A list of Clusters owned by the user. - */ - @GET - @Path("") - def listClusters( - @Auth user: SessionUser, - @QueryParam("available") available: Boolean - ): util.List[Cluster] = { - clusterDao.fetchByOwnerId(user.getUid) - var steps = context - .select(CLUSTER.asterisk()) - .from(CLUSTER) - .where(CLUSTER.OWNER_ID.eq(user.getUid)) - .and(CLUSTER.STATUS.ne(ClusterStatus.TERMINATED)) - if (available) { - steps = steps.and(CLUSTER.STATUS.eq(ClusterStatus.RUNNING)) - } - steps.fetchInto(classOf[Cluster]) - - } - - /** - * Validates that the authenticated user has ownership of the cluster. - * - * @param user The authenticated user. - * @param clusterId The ID of the cluster to validate ownership. - */ - private def validateClusterOwnership(user: SessionUser, clusterId: Int): Unit = { - val clusterOwnerId = clusterDao.fetchOneByCid(clusterId).getOwnerId - if (clusterOwnerId != user.getUid) { - throw new ForbiddenException(ERR_USER_HAS_NO_ACCESS_TO_CLUSTER_MESSAGE) - } - } -} - -/** - * Request object for launching a cluster. - */ -case class ClusterLaunchRequest( - name: String, - machineType: String, - numberOfMachines: Integer -) diff --git a/amber/src/main/scala/org/apache/texera/web/resource/dashboard/user/cluster/ClusterServiceClient.scala b/amber/src/main/scala/org/apache/texera/web/resource/dashboard/user/cluster/ClusterServiceClient.scala deleted file mode 100755 index 9cb0b6d7ed1..00000000000 --- a/amber/src/main/scala/org/apache/texera/web/resource/dashboard/user/cluster/ClusterServiceClient.scala +++ /dev/null @@ -1,107 +0,0 @@ -package org.apache.texera.web.resource.dashboard.user.cluster - -import java.net.{HttpURLConnection, URL} -import scala.io.Source -import scala.util.{Failure, Success, Try} - -object ClusterServiceClient { - - /** - * Makes an HTTP POST request to create a cluster in the Go microservice. - * - * @param clusterId The id of the cluster. - * @param machineType The type of the machine for the cluster. - * @param numberOfMachines The number of machines in the cluster. - * @return Either an error message in Left, or the response body in Right. - */ - def callCreateClusterAPI( - clusterId: Int, - machineType: String, - numberOfMachines: Int - ): Either[String, String] = { - val url = new URL(s"http://cloudmapper-service:4000/api/cluster/create") - val jsonInputString = - s"""{ - |"provider": "aws", - |"machineType": "$machineType", - |"numberOfNodes": $numberOfMachines, - |"clusterId": $clusterId - |}""".stripMargin - - sendHttpRequest("POST", url, Some(jsonInputString)) - } - - /** - * Makes an HTTP DELETE request to delete a cluster in the Go microservice. - * - * @param clusterId The ID of the cluster to be deleted. - * @return Either an error message in Left, or the response body in Right. - */ - def callDeleteClusterAPI(clusterId: Int): Either[String, String] = { - val url = new URL(s"http://cloudmapper-service:4000/api/cluster/$clusterId") - sendHttpRequest("DELETE", url, None) - } - - /** - * Makes an HTTP DELETE request to delete a cluster in the Go microservice. - * - * @param clusterId The ID of the cluster to be paused. - * @return Either an error message in Left, or the response body in Right. - */ - def callPauseClusterAPI(clusterId: Int): Either[String, String] = { - val url = new URL(s"http://cloudmapper-service:4000/api/cluster/$clusterId") - sendHttpRequest("PUT", url, None) - } - - /** - * Makes an HTTP DELETE request to delete a cluster in the Go microservice. - * - * @param clusterId The ID of the cluster to be resumed. - * @return Either an error message in Left, or the response body in Right. - */ - def callResumeClusterAPI(clusterId: Int): Either[String, String] = { - val url = new URL(s"http://cloudmapper-service:4000/api/cluster/resume/$clusterId") - sendHttpRequest("POST", url, None) - } - - /** - * Helper function to send an HTTP request. - * - * @param method The HTTP method (e.g., POST, DELETE). - * @param url The URL for the HTTP request. - * @param jsonInputString The optional JSON payload for the request body (for POST requests). - * @return Either an error message in Left, or the response body in Right. - */ - private def sendHttpRequest( - method: String, - url: URL, - jsonInputString: Option[String] - ): Either[String, String] = { - Try { - val conn = url.openConnection().asInstanceOf[HttpURLConnection] - conn.setRequestMethod(method) - conn.setRequestProperty("Content-Type", "application/json") - conn.setDoOutput(jsonInputString.isDefined) - - jsonInputString.foreach { input => - val os = conn.getOutputStream - os.write(input.getBytes("UTF-8")) - os.close() - } - - val responseCode = conn.getResponseCode - val result = if (responseCode == HttpURLConnection.HTTP_OK) { - Right(Source.fromInputStream(conn.getInputStream).mkString) - } else { - Left(s"Failed: HTTP error code $responseCode") - } - - conn.disconnect() - result - } match { - case Success(result) => result - case Failure(exception) => - Left(s"Error: ${exception.getMessage}") - } - } -} diff --git a/amber/src/main/scala/org/apache/texera/web/resource/dashboard/user/cluster/ClusterUtils.scala b/amber/src/main/scala/org/apache/texera/web/resource/dashboard/user/cluster/ClusterUtils.scala deleted file mode 100755 index 3ceb6cee879..00000000000 --- a/amber/src/main/scala/org/apache/texera/web/resource/dashboard/user/cluster/ClusterUtils.scala +++ /dev/null @@ -1,49 +0,0 @@ -package org.apache.texera.web.resource.dashboard.user.cluster - -import org.apache.texera.dao.jooq.generated.enums.ClusterStatus - -import java.sql.Timestamp -import java.time.Instant -import org.jooq.impl.DSL.max -import org.jooq.DSLContext -import org.apache.texera.dao.jooq.generated.tables.Cluster.CLUSTER -import org.apache.texera.dao.jooq.generated.tables.ClusterActivity.CLUSTER_ACTIVITY - -object ClusterUtils { - - /** - * Updates the status of a cluster. - * - * @param clusterId The ID of the cluster. - * @param status The new status of the cluster. - */ - def updateClusterStatus(clusterId: Int, status: ClusterStatus, context: DSLContext): Unit = { - context - .update(CLUSTER) - .set(CLUSTER.STATUS, status) - .where(CLUSTER.CID.eq(clusterId)) - .execute() - } - - /** - * Updates the end time of the most recent cluster activity to the current time. - * - * @param clusterId The ID of the cluster. - */ - def updateClusterActivityEndTime(clusterId: Int, context: DSLContext): Unit = { - context - .update(CLUSTER_ACTIVITY) - .set(CLUSTER_ACTIVITY.END_TIME, Timestamp.from(Instant.now())) - .where(CLUSTER_ACTIVITY.CLUSTER_ID.eq(clusterId)) - .and( - CLUSTER_ACTIVITY.START_TIME.eq( - context - .select(max(CLUSTER_ACTIVITY.START_TIME)) - .from(CLUSTER_ACTIVITY) - .where(CLUSTER_ACTIVITY.CLUSTER_ID.eq(clusterId)) - .and(CLUSTER_ACTIVITY.END_TIME.isNull) - ) - ) - .execute() - } -} diff --git a/bin/k8s/templates/cloudmapper-pvc.yaml b/bin/k8s/templates/cloudmapper-pvc.yaml deleted file mode 100755 index c02acf83822..00000000000 --- a/bin/k8s/templates/cloudmapper-pvc.yaml +++ /dev/null @@ -1,13 +0,0 @@ - -apiVersion: v1 -kind: PersistentVolumeClaim -metadata: - name: cloudmapper-pvc - namespace: texera -spec: - accessModes: - - ReadWriteOnce - resources: - requests: - storage: 10Gi - storageClassName: local-storage diff --git a/bin/k8s/templates/cloudmapper.yaml b/bin/k8s/templates/cloudmapper.yaml deleted file mode 100755 index 88f2ec51b7e..00000000000 --- a/bin/k8s/templates/cloudmapper.yaml +++ /dev/null @@ -1,82 +0,0 @@ -apiVersion: apps/v1 -kind: Deployment -metadata: - name: cloudmapper - namespace: texera -spec: - replicas: 1 - selector: - matchLabels: - app: cloudmapper - template: - metadata: - labels: - app: cloudmapper - spec: - volumes: - - name: cloudmapper-file-volume - persistentVolumeClaim: - claimName: cloudmapper-pvc - - name: aws-config-volume - emptyDir: {} - - name: ssh-key-volume - secret: - secretName: ssh-key - defaultMode: 0600 # Secure SSH key permissions - - initContainers: - - name: aws-config-setup - image: ubuntu:22.04 - command: ["/bin/sh", "-c"] - args: - - | - mkdir -p /aws-config; - echo "[default]" > /aws-config/credentials; - echo "aws_access_key_id = $AWS_ACCESS_KEY_ID" >> /aws-config/credentials; - echo "aws_secret_access_key = $AWS_SECRET_ACCESS_KEY" >> /aws-config/credentials; - echo "[default]" > /aws-config/config; - echo "region = us-west-2" >> /aws-config/config; - env: - - name: AWS_ACCESS_KEY_ID - valueFrom: - secretKeyRef: - name: aws-secret - key: AWS_ACCESS_KEY_ID - - name: AWS_SECRET_ACCESS_KEY - valueFrom: - secretKeyRef: - name: aws-secret - key: AWS_SECRET_ACCESS_KEY - volumeMounts: - - name: aws-config-volume - mountPath: /aws-config - - containers: - - name: cloudmapper - image: kunwp1/cloudmapper:latest - ports: - - containerPort: 4000 - volumeMounts: - - name: cloudmapper-file-volume - mountPath: "/data" - - name: aws-config-volume - mountPath: /root/.aws - - name: ssh-key-volume - mountPath: /root/.ssh - readOnly: true - ---- -apiVersion: v1 -kind: Service -metadata: - name: cloudmapper-service - namespace: texera -spec: - selector: - app: cloudmapper - ports: - - protocol: TCP - port: 4000 - targetPort: 4000 - type: ClusterIP - diff --git a/common/config/src/main/resources/application.conf b/common/config/src/main/resources/application.conf index 59abc83589b..c7a7af24180 100644 --- a/common/config/src/main/resources/application.conf +++ b/common/config/src/main/resources/application.conf @@ -148,8 +148,3 @@ ai-assistant-server { ai-service-url = "" ai-service-url = ${?AI_ASSISTANT_SERVER_AI_SERVICE_URL} } - -cluster-launcher-service { - target = "http://localhost:3000" - target = ${?CLUSTER_LAUNCHER_SERVICE_TARGET} -} diff --git a/common/config/src/main/resources/default.conf b/common/config/src/main/resources/default.conf index 1fdf3148885..0ce2a2e38b6 100644 --- a/common/config/src/main/resources/default.conf +++ b/common/config/src/main/resources/default.conf @@ -64,9 +64,6 @@ gui { datasets_enabled = true datasets_enabled = ${?GUI_TABS_DATASETS_ENABLED} - cluster_enabled = true - cluster_enabled = ${?GUI_TABS_CLUSTER_ENABLED} - quota_enabled = true quota_enabled = ${?GUI_TABS_QUOTA_ENABLED} diff --git a/common/config/src/main/scala/org/apache/texera/amber/config/EnvironmentalVariable.scala b/common/config/src/main/scala/org/apache/texera/amber/config/EnvironmentalVariable.scala index 396bdd3b1d5..9ec52bba653 100644 --- a/common/config/src/main/scala/org/apache/texera/amber/config/EnvironmentalVariable.scala +++ b/common/config/src/main/scala/org/apache/texera/amber/config/EnvironmentalVariable.scala @@ -38,8 +38,6 @@ object EnvironmentalVariable { val ENV_FILE_SERVICE_GET_PRESIGNED_URL_ENDPOINT = "FILE_SERVICE_GET_PRESIGNED_URL_ENDPOINT" val ENV_FILE_SERVICE_UPLOAD_ONE_FILE_TO_DATASET_ENDPOINT = "FILE_SERVICE_UPLOAD_ONE_FILE_TO_DATASET_ENDPOINT" - val ENV_FILE_SERVICE_LIST_DIRECTORY_OBJECTS_ENDPOINT = - "FILE_SERVICE_LIST_DIRECTORY_OBJECTS_ENDPOINT" /** * Auth related vars diff --git a/common/workflow-core/src/main/scala/org/apache/texera/amber/core/storage/DocumentFactory.scala b/common/workflow-core/src/main/scala/org/apache/texera/amber/core/storage/DocumentFactory.scala index f25a5aedc4f..15949ef4717 100644 --- a/common/workflow-core/src/main/scala/org/apache/texera/amber/core/storage/DocumentFactory.scala +++ b/common/workflow-core/src/main/scala/org/apache/texera/amber/core/storage/DocumentFactory.scala @@ -44,12 +44,9 @@ object DocumentFactory { * @param fileUri the uri of the document * @return ReadonlyVirtualDocument */ - def openReadonlyDocument( - fileUri: URI, - isDirectory: Boolean = false - ): ReadonlyVirtualDocument[_] = { + def openReadonlyDocument(fileUri: URI): ReadonlyVirtualDocument[_] = { fileUri.getScheme match { - case DATASET_FILE_URI_SCHEME => new DatasetFileDocument(fileUri, isDirectory) + case DATASET_FILE_URI_SCHEME => new DatasetFileDocument(fileUri) case "file" => new ReadonlyLocalFileDocument(fileUri) case unsupportedScheme => throw new UnsupportedOperationException( diff --git a/common/workflow-core/src/main/scala/org/apache/texera/amber/core/storage/FileResolver.scala b/common/workflow-core/src/main/scala/org/apache/texera/amber/core/storage/FileResolver.scala index eb92e4e58bc..cda5b28779b 100644 --- a/common/workflow-core/src/main/scala/org/apache/texera/amber/core/storage/FileResolver.scala +++ b/common/workflow-core/src/main/scala/org/apache/texera/amber/core/storage/FileResolver.scala @@ -62,21 +62,6 @@ object FileResolver { .getOrElse(throw new FileNotFoundException(fileName)) } - def resolveDirectory(directoryName: String): URI = { - if (isFileResolved(directoryName)) { - return new URI(directoryName) - } - val resolvers: Seq[String => URI] = Seq(localDirectoryResolveFunc, datasetDirectoryResolveFunc) - - // Try each resolver function in sequence - resolvers - .map(resolver => Try(resolver(directoryName))) - .collectFirst { - case Success(output) => output - } - .getOrElse(throw new FileNotFoundException(directoryName)) - } - /** * Attempts to resolve a local file path. * @throws FileNotFoundException if the local file does not exist @@ -90,21 +75,6 @@ object FileResolver { filePath.toUri } - /** - * Attempts to resolve a local directory path. - * @throws FileNotFoundException if the local directory does not exist - * @param directoryName the name of the directory to check - */ - private def localDirectoryResolveFunc(directoryName: String): URI = { - val directoryPath = Paths.get(directoryName) - if (!Files.exists(directoryPath) || !Files.isDirectory(directoryPath)) { - throw new FileNotFoundException( - s"Local directory $directoryName does not exist or is not a directory" - ) - } - directoryPath.toUri - } - /** * Parses a dataset file path and extracts its components. * Expected format: /ownerEmail/datasetName/versionName/fileRelativePath @@ -208,66 +178,6 @@ object FileResolver { } } - /** - * Attempts to resolve a given directoryName to a URI. - * - * The directoryName format should be: /ownerEmail/datasetName/versionName - * e.g. /bob@texera.com/twitterDataset/v1 - * The output dataset URI format is: {DATASET_FILE_URI_SCHEME}:///{repositoryName}/{versionHash}/ - * e.g. {DATASET_FILE_URI_SCHEME}:///dataset-15/adeq233td/ - * - * @param directoryName the name of the directory to attempt resolving as a dataset directory - * @return A URI pointing to the dataset directory - * @throws FileNotFoundException if the dataset directory does not exist or cannot be created - */ - private def datasetDirectoryResolveFunc(directoryName: String): URI = { - val directoryPath = Paths.get(directoryName) - val pathSegments = - (0 until directoryPath.getNameCount).map(directoryPath.getName(_).toString).toArray - - val ownerEmail = pathSegments(0) - val datasetName = pathSegments(1) - val versionName = pathSegments(2) - - val (dataset, datasetVersion) = - withTransaction( - SqlServer - .getInstance() - .createDSLContext() - ) { ctx => - val dataset = ctx - .select(DATASET.fields: _*) - .from(DATASET) - .leftJoin(USER) - .on(USER.UID.eq(DATASET.OWNER_UID)) - .where(USER.EMAIL.eq(ownerEmail)) - .and(DATASET.NAME.eq(datasetName)) - .fetchOneInto(classOf[Dataset]) - - val datasetVersion = ctx - .selectFrom(DATASET_VERSION) - .where(DATASET_VERSION.DID.eq(dataset.getDid)) - .and(DATASET_VERSION.NAME.eq(versionName)) - .fetchOneInto(classOf[DatasetVersion]) - - if (dataset == null || datasetVersion == null) { - throw new FileNotFoundException(s"Dataset directory $directoryName not found.") - } - (dataset, datasetVersion) - } - - val uriSplitter = "/" - val encodedPath = - uriSplitter + dataset.getRepositoryName + uriSplitter + datasetVersion.getVersionHash + uriSplitter - - try { - new URI(DATASET_FILE_URI_SCHEME, "", encodedPath, null) - } catch { - case e: Exception => - throw new FileNotFoundException(s"Dataset directory $directoryName not found.") - } - } - /** * Checks if a given file path has a valid scheme. * diff --git a/common/workflow-core/src/main/scala/org/apache/texera/amber/core/storage/model/DatasetDirectoryDocument.scala b/common/workflow-core/src/main/scala/org/apache/texera/amber/core/storage/model/DatasetDirectoryDocument.scala deleted file mode 100755 index e949dfceb31..00000000000 --- a/common/workflow-core/src/main/scala/org/apache/texera/amber/core/storage/model/DatasetDirectoryDocument.scala +++ /dev/null @@ -1,98 +0,0 @@ -package org.apache.texera.amber.core.storage.model - -import org.apache.texera.amber.core.storage.util.dataset.GitVersionControlLocalFileStorage -import org.apache.texera.dao.SqlServer -import org.apache.texera.dao.jooq.generated.tables.Dataset.DATASET -import org.apache.texera.dao.jooq.generated.tables.User.USER -import org.apache.texera.dao.jooq.generated.tables.DatasetVersion.DATASET_VERSION -import org.apache.texera.dao.jooq.generated.tables.pojos.{Dataset, DatasetVersion} - -import java.io.{File, FileOutputStream, InputStream} -import java.nio.file.{Files, Path, Paths} - -class DatasetDirectoryDocument(fileFullPath: Path, shouldContainFile: Boolean = true) { - - private val context = SqlServer - .getInstance() - .createDSLContext() - private val (dataset, datasetVersion, fileRelativePath) = - resolvePath(fileFullPath, shouldContainFile) - private var tempFile: Option[File] = None - - private def getDatasetByName(ownerEmail: String, datasetName: String): Dataset = { - context - .select(DATASET.fields: _*) - .from(DATASET) - .leftJoin(USER) - .on(USER.UID.eq(DATASET.OWNER_UID)) - .where(USER.EMAIL.eq(ownerEmail)) - .and(DATASET.NAME.eq(datasetName)) - .fetchOneInto(classOf[Dataset]) - } - - private def getDatasetVersionByName(did: Integer, versionName: String): DatasetVersion = { - context - .selectFrom(DATASET_VERSION) - .where(DATASET_VERSION.DID.eq(did)) - .and(DATASET_VERSION.NAME.eq(versionName)) - .fetchOneInto(classOf[DatasetVersion]) - } - - def resolvePath( - path: java.nio.file.Path, - shouldContainFile: Boolean - ): (Dataset, DatasetVersion, Option[Path]) = { - val pathSegments = (0 until path.getNameCount).map(path.getName(_).toString).toArray - - val ownerEmail = pathSegments(0) - val datasetName = pathSegments(1) - val versionName = pathSegments(2) - - val fileRelativePath = - if (shouldContainFile) Some(Paths.get(pathSegments.drop(3).mkString("/"))) else None - - val dataset = getDatasetByName(ownerEmail, datasetName) - val datasetVersion = getDatasetVersionByName(dataset.getDid, versionName) - (dataset, datasetVersion, fileRelativePath) - } - - def asFile(): File = { - tempFile match { - case Some(file) => file - case None => - val tempFilePath = Files.createTempFile("versionedFile", ".tmp") - val tempFileStream = new FileOutputStream(tempFilePath.toFile) - val inputStream = asInputStream() - - val buffer = new Array[Byte](1024) - - // Create an iterator to repeatedly call inputStream.read, and direct buffered data to file - Iterator - .continually(inputStream.read(buffer)) - .takeWhile(_ != -1) - .foreach(tempFileStream.write(buffer, 0, _)) - - inputStream.close() - tempFileStream.close() - - val file = tempFilePath.toFile - tempFile = Some(file) - file - } - } - - def asInputStream(): InputStream = { - val datasetAbsolutePath = PathUtils.getDatasetPath(dataset.getDid) - GitVersionControlLocalFileStorage - .retrieveFileContentOfVersionAsInputStream( - datasetAbsolutePath, - datasetVersion.getVersionHash, - datasetAbsolutePath.resolve(fileRelativePath.get) - ) - } - - def asDirectory(): String = { - - PathUtils.getDatasetPath(dataset.getDid).toString - } -} diff --git a/common/workflow-core/src/main/scala/org/apache/texera/amber/core/storage/model/DatasetFileDocument.scala b/common/workflow-core/src/main/scala/org/apache/texera/amber/core/storage/model/DatasetFileDocument.scala index dd1c68caf81..b62b6583658 100644 --- a/common/workflow-core/src/main/scala/org/apache/texera/amber/core/storage/model/DatasetFileDocument.scala +++ b/common/workflow-core/src/main/scala/org/apache/texera/amber/core/storage/model/DatasetFileDocument.scala @@ -23,7 +23,6 @@ import com.typesafe.scalalogging.LazyLogging import org.apache.texera.amber.config.EnvironmentalVariable import org.apache.texera.amber.core.storage.model.DatasetFileDocument.{ fileServiceGetPresignURLEndpoint, - fileServiceListDirectoryObjectsEndpoint, userJwtToken } import org.apache.texera.amber.core.storage.util.LakeFSStorageClient @@ -33,7 +32,6 @@ import java.io.{File, FileOutputStream, InputStream} import java.net._ import java.nio.charset.StandardCharsets import java.nio.file.{Files, Path, Paths} -import java.util.zip.{ZipEntry, ZipOutputStream} import scala.jdk.CollectionConverters.IteratorHasAsScala object DatasetFileDocument { @@ -51,33 +49,21 @@ object DatasetFileDocument { "http://localhost:9092/api/dataset/presign-download" ) .trim - - // The endpoint for listing directory objects from the file service. - lazy val fileServiceListDirectoryObjectsEndpoint: String = - sys.env - .getOrElse( - EnvironmentalVariable.ENV_FILE_SERVICE_LIST_DIRECTORY_OBJECTS_ENDPOINT, - "http://localhost:9092/api/dataset/list-directory-objects" - ) - .trim } -private[storage] class DatasetFileDocument(uri: URI, isDirectory: Boolean = false) +private[storage] class DatasetFileDocument(uri: URI) extends VirtualDocument[Nothing] with OnDataset with LazyLogging { // Utility function to parse and decode URI segments into individual components private def parseUri(uri: URI): (String, String, Path) = { val segments = Paths.get(uri.getPath).iterator().asScala.map(_.toString).toArray - if (!isDirectory && segments.length < 3) + if (segments.length < 3) throw new IllegalArgumentException("URI format is incorrect") // parse uri to dataset components val repositoryName = segments(0) val datasetVersionHash = URLDecoder.decode(segments(1), StandardCharsets.UTF_8) - if (isDirectory) { - return (repositoryName, datasetVersionHash, Paths.get("")) - } val decodedRelativeSegments = segments.drop(2).map(part => URLDecoder.decode(part, StandardCharsets.UTF_8)) val fileRelativePath = Paths.get(decodedRelativeSegments.head, decodedRelativeSegments.tail: _*) @@ -155,44 +141,24 @@ private[storage] class DatasetFileDocument(uri: URI, isDirectory: Boolean = fals tempFile match { case Some(file) => file case None => - if (isDirectory) { - val tempZipPath = Files.createTempFile("versionedDirectory", ".zip") - val zipOutputStream = new ZipOutputStream(new FileOutputStream(tempZipPath.toFile)) - - try { - addDirectoryToZip( - zipOutputStream, - "", - getRepositoryName(), - getVersionHash(), - fileRelativePath - ) - } finally { - zipOutputStream.close() - } + val tempFilePath = Files.createTempFile("versionedFile", ".tmp") + val tempFileStream = new FileOutputStream(tempFilePath.toFile) + val inputStream = asInputStream() - val file = tempZipPath.toFile - tempFile = Some(file) - file - } else { - val tempFilePath = Files.createTempFile("versionedFile", ".tmp") - val tempFileStream = new FileOutputStream(tempFilePath.toFile) - val inputStream = asInputStream() + val buffer = new Array[Byte](1024) - val buffer = new Array[Byte](1024) + // Create an iterator to repeatedly call inputStream.read, and direct buffered data to file + Iterator + .continually(inputStream.read(buffer)) + .takeWhile(_ != -1) + .foreach(tempFileStream.write(buffer, 0, _)) - Iterator - .continually(inputStream.read(buffer)) - .takeWhile(_ != -1) - .foreach(tempFileStream.write(buffer, 0, _)) + inputStream.close() + tempFileStream.close() - inputStream.close() - tempFileStream.close() - - val file = tempFilePath.toFile - tempFile = Some(file) - file - } + val file = tempFilePath.toFile + tempFile = Some(file) + file } } @@ -225,196 +191,4 @@ private[storage] class DatasetFileDocument(uri: URI, isDirectory: Boolean = fals override def getVersionHash(): String = datasetVersionHash override def getFileRelativePath(): String = fileRelativePath.toString - - private def addDirectoryToZip( - zipOutputStream: ZipOutputStream, - basePath: String, - datasetName: String, - versionHash: String, - directoryPath: Path - ): Unit = { - try { - val allObjects = if (userJwtToken.nonEmpty) { - getDirectoryObjectsViaFileService(datasetName, versionHash) - } else { - LakeFSStorageClient.retrieveObjectsOfVersion(datasetName, versionHash) - } - - val directoryPathStr = directoryPath.toString.replace("\\", "/") - - val objectsInDirectory = allObjects.filter { obj => - val objPath = obj.getPath - if (directoryPathStr.isEmpty) { - true - } else { - objPath.startsWith(directoryPathStr + "/") || objPath == directoryPathStr - } - } - - objectsInDirectory.foreach { obj => - val objPath = obj.getPath - val relativePath = if (directoryPathStr.isEmpty) { - if (basePath.isEmpty) objPath else s"$basePath/$objPath" - } else { - val filePathWithinDirectory = objPath.substring(directoryPathStr.length).stripPrefix("/") - if (basePath.isEmpty) filePathWithinDirectory else s"$basePath/$filePathWithinDirectory" - } - - if (relativePath.nonEmpty) { - val zipEntry = new ZipEntry(relativePath) - zipOutputStream.putNextEntry(zipEntry) - - val fileInputStream = getFileInputStreamFromLakeFS(datasetName, versionHash, objPath) - val buffer = new Array[Byte](1024) - - try { - Iterator - .continually(fileInputStream.read(buffer)) - .takeWhile(_ != -1) - .foreach(zipOutputStream.write(buffer, 0, _)) - } finally { - fileInputStream.close() - } - - zipOutputStream.closeEntry() - } - } - } catch { - case e: Exception => - logger.warn( - s"Error adding directory to zip via primary method: ${e.getMessage}. Trying fallback.", - e - ) - addDirectoryToZipFallback(zipOutputStream, basePath, datasetName, versionHash, directoryPath) - } - } - - private def getDirectoryObjectsViaFileService( - datasetName: String, - versionHash: String - ): List[io.lakefs.clients.sdk.model.ObjectStats] = { - val requestUrl = - s"$fileServiceListDirectoryObjectsEndpoint?datasetName=${URLEncoder.encode(datasetName, StandardCharsets.UTF_8.name())}&commitHash=${URLEncoder - .encode(versionHash, StandardCharsets.UTF_8.name())}" - - val connection = new URL(requestUrl).openConnection().asInstanceOf[HttpURLConnection] - connection.setRequestMethod("GET") - connection.setRequestProperty("Authorization", s"Bearer $userJwtToken") - - try { - if (connection.getResponseCode != HttpURLConnection.HTTP_OK) { - throw new RuntimeException( - s"Failed to list directory objects: HTTP ${connection.getResponseCode}" - ) - } - - val responseBody = - new String(connection.getInputStream.readAllBytes(), StandardCharsets.UTF_8) - - val objectPattern = """\{"path"\s*:\s*"([^"]+)"\s*,\s*"sizeBytes"\s*:\s*(\d+)\}""".r - - objectPattern.findAllMatchIn(responseBody).toList.map { matchObj => - val path = matchObj.group(1) - val sizeBytes = matchObj.group(2).toLong - val objectStats = new io.lakefs.clients.sdk.model.ObjectStats() - objectStats.setPath(path) - objectStats.setSizeBytes(sizeBytes) - objectStats - } - } catch { - case e: Exception => - logger.warn( - s"Failed to get directory objects via FileService: ${e.getMessage}. Falling back to direct LakeFS.", - e - ) - LakeFSStorageClient.retrieveObjectsOfVersion(datasetName, versionHash) - } finally { - connection.disconnect() - } - } - - private def addDirectoryToZipFallback( - zipOutputStream: ZipOutputStream, - basePath: String, - datasetName: String, - versionHash: String, - directoryPath: Path - ): Unit = { - lazy val datasetsRootPath = - Path - .of(sys.env.getOrElse("TEXERA_HOME", ".")) - .resolve("amber") - .resolve("user-resources") - .resolve("datasets") - val datasetPath = datasetsRootPath.resolve("0") - val fullDirectoryPath = datasetPath.resolve(directoryPath) - - if (Files.exists(fullDirectoryPath) && Files.isDirectory(fullDirectoryPath)) { - Files.walk(fullDirectoryPath).forEach { filePath => - if (!Files.isDirectory(filePath)) { - val zipRelativePath = if (basePath.isEmpty) { - directoryPath.relativize(datasetPath.relativize(filePath)).toString.replace("\\", "/") - } else { - s"$basePath/${directoryPath.relativize(datasetPath.relativize(filePath)).toString.replace("\\", "/")}" - } - - val zipEntry = new ZipEntry(zipRelativePath) - zipOutputStream.putNextEntry(zipEntry) - - val fileInputStream = - GitVersionControlLocalFileStorage.retrieveFileContentOfVersionAsInputStream( - datasetPath, - versionHash, - filePath - ) - - val buffer = new Array[Byte](1024) - try { - Iterator - .continually(fileInputStream.read(buffer)) - .takeWhile(_ != -1) - .foreach(zipOutputStream.write(buffer, 0, _)) - } finally { - fileInputStream.close() - } - - zipOutputStream.closeEntry() - } - } - } else { - throw new RuntimeException(s"Failed to create zip file for directory: ${directoryPath}") - } - } - - private def getFileInputStreamFromLakeFS( - datasetName: String, - versionHash: String, - filePath: String - ): InputStream = { - if (userJwtToken.isEmpty) { - val presignUrl = LakeFSStorageClient.getFilePresignedUrl(datasetName, versionHash, filePath) - new URL(presignUrl).openStream() - } else { - val presignRequestUrl = - s"$fileServiceGetPresignURLEndpoint?repositoryName=${datasetName}&commitHash=${versionHash}&filePath=${URLEncoder - .encode(filePath, StandardCharsets.UTF_8.name())}" - - val connection = new URL(presignRequestUrl).openConnection().asInstanceOf[HttpURLConnection] - connection.setRequestMethod("GET") - connection.setRequestProperty("Authorization", s"Bearer $userJwtToken") - - if (connection.getResponseCode != HttpURLConnection.HTTP_OK) { - throw new RuntimeException( - s"Failed to retrieve presigned URL: HTTP ${connection.getResponseCode}" - ) - } - - val responseBody = - new String(connection.getInputStream.readAllBytes(), StandardCharsets.UTF_8) - val presignedUrl = responseBody.split("\"presignedUrl\"\\s*:\\s*\"")(1).split("\"")(0) - - connection.disconnect() - new URL(presignedUrl).openStream() - } - } } diff --git a/common/workflow-core/src/main/scala/org/apache/texera/amber/core/storage/model/PathUtils.scala b/common/workflow-core/src/main/scala/org/apache/texera/amber/core/storage/model/PathUtils.scala deleted file mode 100755 index 2a0cccd472f..00000000000 --- a/common/workflow-core/src/main/scala/org/apache/texera/amber/core/storage/model/PathUtils.scala +++ /dev/null @@ -1,35 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.texera.amber.core.storage.model - -import java.nio.file.Path - -object PathUtils { - lazy val datasetsRootPath: Path = - Path - .of(sys.env.getOrElse("TEXERA_HOME", ".")) - .resolve("amber") - .resolve("user-resources") - .resolve("datasets") - - def getDatasetPath(did: Integer): Path = { - datasetsRootPath.resolve(did.toString) - } -} diff --git a/common/workflow-operator/src/main/scala/org/apache/amber/operator/cloudmapper/CloudMapperSourceOpDesc.scala b/common/workflow-operator/src/main/scala/org/apache/amber/operator/cloudmapper/CloudMapperSourceOpDesc.scala deleted file mode 100755 index c02c40a3beb..00000000000 --- a/common/workflow-operator/src/main/scala/org/apache/amber/operator/cloudmapper/CloudMapperSourceOpDesc.scala +++ /dev/null @@ -1,243 +0,0 @@ -package org.apache.amber.operator.cloudmapper - -import com.fasterxml.jackson.annotation.{JsonProperty, JsonPropertyDescription} -import com.kjetland.jackson.jsonSchema.annotations.JsonSchemaTitle -import org.apache.texera.amber.core.tuple.{Attribute, AttributeType, Schema} -import org.apache.texera.amber.core.workflow.{OutputPort, PortIdentity} -import org.apache.texera.amber.operator.metadata.{OperatorGroupConstants, OperatorInfo} -import org.apache.texera.amber.operator.source.PythonSourceOperatorDescriptor -import org.apache.texera.amber.core.storage.{DocumentFactory, FileResolver} - -class CloudMapperSourceOpDesc extends PythonSourceOperatorDescriptor { - @JsonProperty(required = true) - @JsonSchemaTitle("FastQ Dataset") - @JsonPropertyDescription("Dataset containing fastq files") - val directoryName: String = "" - - @JsonProperty(required = true) - var referenceGenome: ReferenceGenome = _ - - @JsonProperty(required = false) - @JsonSchemaTitle("Additional Reference Genomes") - @JsonPropertyDescription("Add one or more additional reference genomes (optional)") - var additionalReferenceGenomes: List[ReferenceGenome] = List() - - @JsonProperty(required = true) - @JsonSchemaTitle("Cluster") - @JsonPropertyDescription("Cluster") - val cluster: String = "" - - private var clusterLauncherServiceTarget: String = - "http://cloudmapper-service.texera.svc.cluster.local:4000" - - // Getter to retrieve only the id part (cid) from the cluster - def clusterId: String = { - if (cluster.startsWith("#")) { - cluster.split(" ")(0).substring(1) // Extracts the cid part by splitting and removing '#' - } else { - "" - } - } - - override def generatePythonCode(): String = { - val directoryUri = FileResolver.resolveDirectory(directoryName) - println(directoryUri.toASCIIString) - - val directoryDocument = DocumentFactory.openReadonlyDocument(directoryUri, isDirectory = true) - val directoryFile = directoryDocument.asFile() - println(directoryFile.getAbsolutePath) - - // Convert the Scala referenceGenome to a Python string - val pythonReferenceGenome = s"'${referenceGenome.referenceGenome.getName}'" - - // Convert the Scala additionalReferenceGenomes list to a Python list format - val pythonAdditionalReferenceGenomes = additionalReferenceGenomes - .map(_.referenceGenome.getName) - .map(name => s"'$name'") - .mkString("[", ", ", "]") - - // Combine main reference genome with additional ones - val pythonAllReferenceGenomes = - s"[${pythonReferenceGenome}] + ${pythonAdditionalReferenceGenomes}" - - // Convert all reference genomes (main + additional) to a Python list format for FASTA files - val pythonFastaFiles = (referenceGenome :: additionalReferenceGenomes) - .flatMap(_.fastAFiles) - .map(file => { - val fileUri = FileResolver.resolve(file) - val fileDocument = DocumentFactory.openReadonlyDocument(fileUri, isDirectory = false) - val fastAFilePath = fileDocument.asFile().getAbsolutePath - s"open(r'$fastAFilePath', 'rb')" - }) - .mkString("[", ", ", "]") - - // Extract GTF file if exists for 'My Reference' (considering both main and additional reference genomes) - val pythonGtfFile = (referenceGenome :: additionalReferenceGenomes) - .find(_.referenceGenome == ReferenceGenomeEnum.MY_REFERENCE) - .flatMap(_.gtfFile) - .map(file => { - val fileUri = FileResolver.resolve(file) - val fileDocument = DocumentFactory.openReadonlyDocument(fileUri, isDirectory = false) - val gtfFilePath = fileDocument.asFile().getAbsolutePath - s"open(r'$gtfFilePath', 'rb')" - }) - .getOrElse("None") - - val pythonGtfFileValue = if (pythonGtfFile == "None") "None" else pythonGtfFile - - s"""from pytexera import * - | - |class GenerateOperator(UDFSourceOperator): - | - | @overrides - | def produce(self) -> Iterator[Union[TupleLike, TableLike, None]]: - | import requests, time, tarfile, io - | - | reads_path = r'${directoryFile.getAbsolutePath}' - | service_url = "${clusterLauncherServiceTarget}" - | cluster_id = ${clusterId} - | - | # ------------------------------------------------------------------ - | # Step 1: Ask the Go service for a presigned S3 PUT URL. - | # The reads zip will be sent directly to S3 from here — the Go - | # service is not in the data path for the large file. - | # ------------------------------------------------------------------ - | upload_meta_resp = requests.post(f"{service_url}/api/job/request-upload") - | upload_meta_resp.raise_for_status() - | upload_meta = upload_meta_resp.json() - | upload_url = upload_meta["upload_url"] - | s3_key = upload_meta["s3_key"] - | job_id = upload_meta["job_id"] - | - | yield # let Texera heartbeat while we upload - | - | # ------------------------------------------------------------------ - | # Step 2: PUT the reads zip directly to S3 (presigned URL, no proxy). - | # ------------------------------------------------------------------ - | with open(reads_path, 'rb') as reads_file: - | put_resp = requests.put(upload_url, data=reads_file) - | put_resp.raise_for_status() - | - | yield # let Texera heartbeat while we notify - | - | # ------------------------------------------------------------------ - | # Step 3: Notify the Go service to start the job. Pass s3_key and - | # job_id so it knows which S3 object to pull on the EC2 head node. - | # FASTA/GTF files (small, annotation-only) still go as multipart. - | # ------------------------------------------------------------------ - | selected_genomes = ${pythonAllReferenceGenomes} - | form_data = { - | 'cid': str(cluster_id), - | 's3_key': s3_key, - | 'job_id': str(job_id), - | } - | for index, genome in enumerate(selected_genomes): - | form_data[f'referenceGenome[{index}]'] = genome - | - | files = {} - | if 'My Reference' in selected_genomes: - | fasta_files = ${pythonFastaFiles} - | for index, fasta_file in enumerate(fasta_files): - | files[f'fastaFiles[{index}]'] = fasta_file - | gtf_file = ${pythonGtfFileValue} - | if gtf_file is not None: - | files['gtfFile'] = gtf_file - | - | response = requests.post(f"{service_url}/api/job/create", - | data=form_data, files=files if files else None) - | response.raise_for_status() - | - | # ------------------------------------------------------------------ - | # Step 4: Poll until the job is finished. - | # ------------------------------------------------------------------ - | while True: - | status_response = requests.get(f'{service_url}/api/job/status/{job_id}') - | status = status_response.json().get("status") - | - | if status == "finished": - | print("Job finished! Downloading the result...") - | break - | elif status == "failed": - | print("Job failed.") - | yield { - | 'Sample': None, - | 'features.tsv.gz': None, - | 'barcodes.tsv.gz': None, - | 'matrix.mtx.gz': None - | } - | return - | - | print("Job is still processing...") - | time.sleep(0.5) - | yield - | - | # ------------------------------------------------------------------ - | # Step 5: Download results. - | # The server streams a tar.gz archive containing all filtered/ - | # output files. We parse it member-by-member so the operator - | # never holds the entire decompressed matrix in RAM at once. - | # ------------------------------------------------------------------ - | download_response = requests.get(f'{service_url}/api/job/download/{job_id}', - | stream=True) - | download_response.raise_for_status() - | - | # urllib3 raw socket; tell it to handle transport-encoding itself - | download_response.raw.decode_content = True - | - | samples = {} - | with tarfile.open(fileobj=download_response.raw, mode='r|gz') as tar: - | for member in tar: - | if not member.isfile(): - | continue - | parts = member.name.split('/') - | # Expected layout: /filtered/.gz - | if len(parts) < 3: - | continue - | sample_name = parts[0] - | fname = parts[-1] - | if fname in ('features.tsv.gz', 'barcodes.tsv.gz', 'matrix.mtx.gz'): - | f = tar.extractfile(member) - | if f is not None: - | samples.setdefault(sample_name, {})[fname] = f.read() - | - | if not samples: - | print("Download succeeded but archive contained no recognised files.") - | yield { - | 'Sample': None, - | 'features.tsv.gz': None, - | 'barcodes.tsv.gz': None, - | 'matrix.mtx.gz': None - | } - | return - | - | for sample_name, files in samples.items(): - | yield { - | 'Sample': sample_name, - | 'features.tsv.gz': files.get('features.tsv.gz'), - | 'barcodes.tsv.gz': files.get('barcodes.tsv.gz'), - | 'matrix.mtx.gz': files.get('matrix.mtx.gz') - | } - """.stripMargin - } - override def operatorInfo: OperatorInfo = - OperatorInfo( - "CloudBioMapper", - "Running sequence alignment using public cluster services", - OperatorGroupConstants.API_GROUP, - inputPorts = List.empty, - outputPorts = List(OutputPort()) - ) - override def asSource() = true - override def sourceSchema(): Schema = - Schema() - .add( - new Attribute("Sample", AttributeType.STRING), - new Attribute("features.tsv.gz", AttributeType.BINARY), - new Attribute("barcodes.tsv.gz", AttributeType.BINARY), - new Attribute("matrix.mtx.gz", AttributeType.BINARY) - ) - - def getOutputSchemas(inputSchemas: Map[PortIdentity, Schema]): Map[PortIdentity, Schema] = { - Map(operatorInfo.outputPorts.head.id -> sourceSchema()) - } -} diff --git a/common/workflow-operator/src/main/scala/org/apache/amber/operator/cloudmapper/ReferenceGenome.scala b/common/workflow-operator/src/main/scala/org/apache/amber/operator/cloudmapper/ReferenceGenome.scala deleted file mode 100755 index 923c85ce673..00000000000 --- a/common/workflow-operator/src/main/scala/org/apache/amber/operator/cloudmapper/ReferenceGenome.scala +++ /dev/null @@ -1,46 +0,0 @@ -package org.apache.amber.operator.cloudmapper - -import com.fasterxml.jackson.annotation.JsonProperty -import com.kjetland.jackson.jsonSchema.annotations.{ - JsonSchemaInject, - JsonSchemaString, - JsonSchemaTitle -} -import org.apache.texera.amber.operator.metadata.annotations.HideAnnotation - -class ReferenceGenome { - // Required field for selecting a reference genome. - // This field is mandatory and must be filled by the user. - @JsonProperty(required = true) - var referenceGenome: ReferenceGenomeEnum = _ - - // Optional field for FastA files. - // This field is shown only if 'referenceGenome' is set to 'MY_REFERENCE'. - @JsonSchemaTitle("FastA Files") - @JsonSchemaInject( - strings = Array( - new JsonSchemaString(path = HideAnnotation.hideTarget, value = "referenceGenome"), - new JsonSchemaString(path = HideAnnotation.hideType, value = HideAnnotation.Type.regex), - new JsonSchemaString( - path = HideAnnotation.hideExpectedValue, - value = "^((?!My Reference).)*$" - ) - ) - ) - val fastAFiles: Option[String] = None - - // Optional field for Gtf files. - // This field is shown only if 'referenceGenome' is set to 'MY_REFERENCE'. - @JsonSchemaTitle("Gtf File") - @JsonSchemaInject( - strings = Array( - new JsonSchemaString(path = HideAnnotation.hideTarget, value = "referenceGenome"), - new JsonSchemaString(path = HideAnnotation.hideType, value = HideAnnotation.Type.regex), - new JsonSchemaString( - path = HideAnnotation.hideExpectedValue, - value = "^((?!My Reference).)*$" - ) - ) - ) - val gtfFile: Option[String] = None -} diff --git a/common/workflow-operator/src/main/scala/org/apache/amber/operator/cloudmapper/ReferenceGenomeEnum.java b/common/workflow-operator/src/main/scala/org/apache/amber/operator/cloudmapper/ReferenceGenomeEnum.java deleted file mode 100755 index 962fb6eb19f..00000000000 --- a/common/workflow-operator/src/main/scala/org/apache/amber/operator/cloudmapper/ReferenceGenomeEnum.java +++ /dev/null @@ -1,26 +0,0 @@ -package org.apache.amber.operator.cloudmapper; - -import com.fasterxml.jackson.annotation.JsonValue; - -public enum ReferenceGenomeEnum { - HUMAN_GRCh38("GRCh38"), - - MOUSE_GRCm39("GRCm39"), - - MOUSE_mm10("mm10"), - - HUMAN_hg19("hg19"), - - MY_REFERENCE("My Reference"); - - private final String name; - - ReferenceGenomeEnum(String name) { - this.name = name; - } - - @JsonValue - public String getName() { - return this.name; - } -} \ No newline at end of file diff --git a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/LogicalOp.scala b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/LogicalOp.scala index 43d1ffc03b9..4e9d6c6e2cd 100644 --- a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/LogicalOp.scala +++ b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/LogicalOp.scala @@ -22,7 +22,6 @@ package org.apache.texera.amber.operator import com.fasterxml.jackson.annotation.JsonSubTypes.Type import com.fasterxml.jackson.annotation._ import com.kjetland.jackson.jsonSchema.annotations.JsonSchemaTitle -import org.apache.amber.operator.cloudmapper.CloudMapperSourceOpDesc import org.apache.texera.amber.core.executor.OperatorExecutor import org.apache.texera.amber.core.tuple.Schema import org.apache.texera.amber.core.virtualidentity.{ @@ -429,11 +428,7 @@ trait StateTransferFunc value = classOf[SklearnAdvancedSVRTrainerOpDesc], name = "SVRTrainer" ), - new Type(value = classOf[SklearnTestingOpDesc], name = "SklearnTesting"), - new Type( - value = classOf[CloudMapperSourceOpDesc], - name = "CloudBioMapper" - ) + new Type(value = classOf[SklearnTestingOpDesc], name = "SklearnTesting") ) ) abstract class LogicalOp extends PortDescriptor with Serializable { diff --git a/frontend/src/app/app-routing.module.ts b/frontend/src/app/app-routing.module.ts index ab585a9e930..179caf5c088 100644 --- a/frontend/src/app/app-routing.module.ts +++ b/frontend/src/app/app-routing.module.ts @@ -25,7 +25,6 @@ import { UserQuotaComponent } from "./dashboard/component/user/user-quota/user-q import { UserProjectSectionComponent } from "./dashboard/component/user/user-project/user-project-section/user-project-section.component"; import { UserProjectComponent } from "./dashboard/component/user/user-project/user-project.component"; import { UserComputingUnitComponent } from "./dashboard/component/user/user-computing-unit/user-computing-unit.component"; -import { ClusterComponent } from "./dashboard/component/user/cluster/cluster.component"; import { WorkspaceComponent } from "./workspace/component/workspace.component"; import { AboutComponent } from "./hub/component/about/about.component"; import { AuthGuardService } from "./common/service/user/auth-guard.service"; @@ -136,10 +135,6 @@ routes.push({ path: "compute", component: UserComputingUnitComponent, }, - { - path: "cluster", - component: ClusterComponent, - }, { path: "quota", component: UserQuotaComponent, diff --git a/frontend/src/app/app.module.ts b/frontend/src/app/app.module.ts index 0e905bf61bd..6006fb22ad4 100644 --- a/frontend/src/app/app.module.ts +++ b/frontend/src/app/app.module.ts @@ -190,12 +190,6 @@ import { NzCheckboxModule } from "ng-zorro-antd/checkbox"; import { RegistrationRequestModalComponent } from "./common/service/user/registration-request-modal/registration-request-modal.component"; import { UserComputingUnitComponent } from "./dashboard/component/user/user-computing-unit/user-computing-unit.component"; import { UserComputingUnitListItemComponent } from "./dashboard/component/user/user-computing-unit/user-computing-unit-list-item/user-computing-unit-list-item.component"; -import { DirectoryPathInputComponent } from "./workspace/component/directory-path-input/directory-path-input.component"; -import { DirectorySelectionComponent } from "./workspace/component/directory-selection/directory-selection.component"; -import { ClusterComponent } from "./dashboard/component/user/cluster/cluster.component"; -import { ClusterManagementModalComponent } from "./dashboard/component/user/cluster/cluster-management-modal/cluster-management-modal.component"; -import { ClusterSelectionComponent } from "./workspace/component/cluster-selection/cluster-selection.component"; -import { ClusterAutoCompleteComponent } from "./workspace/component/cluster-autocomplete/cluster-autocomplete.component"; registerLocaleData(en); @@ -296,12 +290,6 @@ registerLocaleData(en); MarkdownDescriptionComponent, UserComputingUnitComponent, UserComputingUnitListItemComponent, - DirectoryPathInputComponent, - DirectorySelectionComponent, - ClusterComponent, - ClusterManagementModalComponent, - ClusterSelectionComponent, - ClusterAutoCompleteComponent, ], imports: [ BrowserModule, diff --git a/frontend/src/app/common/formly/formly-config.ts b/frontend/src/app/common/formly/formly-config.ts index ccebf25b797..c3995abb544 100644 --- a/frontend/src/app/common/formly/formly-config.ts +++ b/frontend/src/app/common/formly/formly-config.ts @@ -28,8 +28,6 @@ import { DatasetFileSelectorComponent } from "../../workspace/component/dataset- import { CollabWrapperComponent } from "./collab-wrapper/collab-wrapper/collab-wrapper.component"; import { FormlyRepeatDndComponent } from "./repeat-dnd/repeat-dnd.component"; import { DatasetVersionSelectorComponent } from "../../workspace/component/dataset-version-selector/dataset-version-selector.component"; -import { DirectoryPathInputComponent } from "../../workspace/component/directory-path-input/directory-path-input.component"; -import { ClusterAutoCompleteComponent } from "src/app/workspace/component/cluster-autocomplete/cluster-autocomplete.component"; /** * Configuration for using Json Schema with Formly. @@ -82,8 +80,6 @@ export const TEXERA_FORMLY_CONFIG = { { name: "inputautocomplete", component: DatasetFileSelectorComponent, wrappers: ["form-field"] }, { name: "datasetversionselector", component: DatasetVersionSelectorComponent, wrappers: ["form-field"] }, { name: "repeat-section-dnd", component: FormlyRepeatDndComponent }, - { name: "clusterautocomplete", component: ClusterAutoCompleteComponent, wrappers: ["form-field"] }, - { name: "directorypathinput", component: DirectoryPathInputComponent, wrappers: ["form-field"] }, ], wrappers: [ { name: "preset-wrapper", component: PresetWrapperComponent }, diff --git a/frontend/src/app/common/service/cluster/cluster.service.ts b/frontend/src/app/common/service/cluster/cluster.service.ts deleted file mode 100755 index 8049eca37ff..00000000000 --- a/frontend/src/app/common/service/cluster/cluster.service.ts +++ /dev/null @@ -1,45 +0,0 @@ -import { Injectable } from "@angular/core"; -import { HttpClient } from "@angular/common/http"; -import { Observable } from "rxjs"; -import { Clusters } from "../../../dashboard/type/clusters"; -import { AppSettings } from "../../app-setting"; - -@Injectable({ - providedIn: "root", -}) -export class ClusterService { - public CLUSTER_BASE_URL = "cluster"; - public CLUSTER_LAUNCH_URL = this.CLUSTER_BASE_URL + "/launch"; - public CLUSTER_TERMINATE_URL = this.CLUSTER_BASE_URL + "/terminate"; - public CLUSTER_STOP_URL = this.CLUSTER_BASE_URL + "/stop"; - public CLUSTER_START_URL = this.CLUSTER_BASE_URL + "/start"; - public CLUSTER_UPDATE_URL = this.CLUSTER_BASE_URL + "/update/name"; - - constructor(private http: HttpClient) {} - - getClusters(available = false): Observable { - return this.http.get(`${AppSettings.getApiEndpoint()}/${this.CLUSTER_BASE_URL}`, { - params: { available }, - }); - } - - launchCluster(clusterConfig: { name: string; machineType: string; numberOfMachines: number }): Observable { - return this.http.post(`${AppSettings.getApiEndpoint()}/${this.CLUSTER_LAUNCH_URL}`, clusterConfig); - } - - terminateCluster(cluster: Clusters): Observable { - return this.http.post(`${AppSettings.getApiEndpoint()}/${this.CLUSTER_TERMINATE_URL}`, cluster); - } - - stopCluster(cluster: Clusters): Observable { - return this.http.post(`${AppSettings.getApiEndpoint()}/${this.CLUSTER_STOP_URL}`, cluster); - } - - startCluster(cluster: Clusters): Observable { - return this.http.post(`${AppSettings.getApiEndpoint()}/${this.CLUSTER_START_URL}`, cluster); - } - - updateCluster(cluster: Clusters): Observable { - return this.http.post(`${AppSettings.getApiEndpoint()}/${this.CLUSTER_UPDATE_URL}`, cluster); - } -} diff --git a/frontend/src/app/common/type/gui-config.ts b/frontend/src/app/common/type/gui-config.ts index 72e3401fccc..d8786c1dc08 100644 --- a/frontend/src/app/common/type/gui-config.ts +++ b/frontend/src/app/common/type/gui-config.ts @@ -54,7 +54,6 @@ export interface SidebarTabs { projects_enabled: boolean; workflows_enabled: boolean; datasets_enabled: boolean; - cluster_enabled: boolean; quota_enabled: boolean; forum_enabled: boolean; about_enabled: boolean; diff --git a/frontend/src/app/dashboard/component/admin/settings/admin-settings.component.html b/frontend/src/app/dashboard/component/admin/settings/admin-settings.component.html index 0c395c2f17d..a0ea29e77b5 100644 --- a/frontend/src/app/dashboard/component/admin/settings/admin-settings.component.html +++ b/frontend/src/app/dashboard/component/admin/settings/admin-settings.component.html @@ -190,15 +190,6 @@

General Settings

- -