diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml index 05a81306669..74483115591 100644 --- a/.github/ISSUE_TEMPLATE/config.yml +++ b/.github/ISSUE_TEMPLATE/config.yml @@ -1,23 +1,3 @@ -# -------------------------------------------------------------------- -# -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed -# with this work for additional information regarding copyright -# ownership. The ASF licenses this file to You under the Apache -# License, Version 2.0 (the "License"); you may not use this file -# except in compliance with the License. You may obtain a copy of the -# License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or -# implied. See the License for the specific language governing -# permissions and limitations under the License. -# -# -------------------------------------------------------------------- - blank_issues_enabled: true contact_links: - name: 🙏🏻 Q&A diff --git a/.github/workflows/build-cloudberry-rocky8.yml b/.github/workflows/build-cloudberry-rocky8.yml index 2abf88060e3..4986eae11b2 100644 --- a/.github/workflows/build-cloudberry-rocky8.yml +++ b/.github/workflows/build-cloudberry-rocky8.yml @@ -320,6 +320,10 @@ jobs: "gpcontrib/gp_sparse_vector:installcheck", "gpcontrib/gp_toolkit:installcheck"] }, + {"test":"gpcontrib-gp-stats-collector", + "make_configs":["gpcontrib/gp_stats_collector:installcheck"], + "extension":"gp_stats_collector" + }, {"test":"ic-fixme", "make_configs":["src/test/regress:installcheck-fixme"], "enable_core_check":false @@ -339,6 +343,10 @@ jobs: }, {"test":"ic-cbdb-parallel", "make_configs":["src/test/regress:installcheck-cbdb-parallel"] + }, + {"test":"ic-recovery", + "make_configs":["src/test/recovery:installcheck"], + "enable_core_check":false } ] }' @@ -1396,6 +1404,7 @@ jobs: if: success() && needs.check-skip.outputs.should_skip != 'true' env: SRC_DIR: ${{ github.workspace }} + BUILD_DESTINATION: /usr/local/cloudberry-db shell: bash {0} run: | set -o pipefail @@ -1419,6 +1428,30 @@ jobs: # 2. Follow the same pattern as optimizer # 3. Update matrix entries to include the new setting + # Create extension if required + if [[ "${{ matrix.extension != '' }}" == "true" ]]; then + case "${{ matrix.extension }}" in + gp_stats_collector) + if ! su - gpadmin -c "source ${BUILD_DESTINATION}/cloudberry-env.sh && \ + source ${SRC_DIR}/gpAux/gpdemo/gpdemo-env.sh && \ + gpconfig -c shared_preload_libraries -v 'gp_stats_collector' && \ + gpstop -ra && \ + echo 'CREATE EXTENSION IF NOT EXISTS gp_stats_collector; \ + SHOW shared_preload_libraries; \ + TABLE pg_extension;' | \ + psql postgres" + then + echo "Error creating gp_stats_collector extension" + exit 1 + fi + ;; + *) + echo "Unknown extension: ${{ matrix.extension }}" + exit 1 + ;; + esac + fi + # Set PostgreSQL options if defined PG_OPTS="" if [[ "${{ matrix.pg_settings.optimizer != '' }}" == "true" ]]; then @@ -1643,6 +1676,7 @@ jobs: - name: Check and Display Regression Diffs if: always() + shell: bash {0} run: | # Search for regression.diffs recursively found_file=$(find . -type f -name "regression.diffs" | head -n 1) diff --git a/.github/workflows/build-cloudberry.yml b/.github/workflows/build-cloudberry.yml index ca75f7b42e7..c00dcde0486 100644 --- a/.github/workflows/build-cloudberry.yml +++ b/.github/workflows/build-cloudberry.yml @@ -271,6 +271,10 @@ jobs: }, "enable_core_check":false }, + {"test":"gpcontrib-gp-stats-collector", + "make_configs":["gpcontrib/gp_stats_collector:installcheck"], + "extension":"gp_stats_collector" + }, {"test":"ic-expandshrink", "make_configs":["src/test/isolation2:installcheck-expandshrink"] }, @@ -312,7 +316,8 @@ jobs: "gpcontrib/zstd:installcheck", "gpcontrib/gp_sparse_vector:installcheck", "gpcontrib/gp_toolkit:installcheck", - "gpcontrib/gp_exttable_fdw:installcheck"] + "gpcontrib/gp_exttable_fdw:installcheck", + "gpcontrib/gp_internal_tools:installcheck"] }, {"test":"ic-diskquota", "make_configs":["gpcontrib/diskquota:installcheck"], @@ -337,6 +342,9 @@ jobs: }, {"test":"ic-cbdb-parallel", "make_configs":["src/test/regress:installcheck-cbdb-parallel"] + }, + {"test":"ic-orca-parallel", + "make_configs":["src/test/regress:installcheck-orca-parallel"] } ] }' @@ -1402,6 +1410,7 @@ jobs: if: success() && needs.check-skip.outputs.should_skip != 'true' env: SRC_DIR: ${{ github.workspace }} + BUILD_DESTINATION: /usr/local/cloudberry-db shell: bash {0} run: | set -o pipefail @@ -1431,6 +1440,30 @@ jobs: PG_OPTS="$PG_OPTS -c optimizer=${{ matrix.pg_settings.optimizer }}" fi + # Create extension if required + if [[ "${{ matrix.extension != '' }}" == "true" ]]; then + case "${{ matrix.extension }}" in + gp_stats_collector) + if ! su - gpadmin -c "source ${BUILD_DESTINATION}/cloudberry-env.sh && \ + source ${SRC_DIR}/gpAux/gpdemo/gpdemo-env.sh && \ + gpconfig -c shared_preload_libraries -v 'gp_stats_collector' && \ + gpstop -ra && \ + echo 'CREATE EXTENSION IF NOT EXISTS gp_stats_collector; \ + SHOW shared_preload_libraries; \ + TABLE pg_extension;' | \ + psql postgres" + then + echo "Error creating gp_stats_collector extension" + exit 1 + fi + ;; + *) + echo "Unknown extension: ${{ matrix.extension }}" + exit 1 + ;; + esac + fi + if [[ "${{ matrix.pg_settings.default_table_access_method != '' }}" == "true" ]]; then PG_OPTS="$PG_OPTS -c default_table_access_method=${{ matrix.pg_settings.default_table_access_method }}" fi @@ -1649,6 +1682,7 @@ jobs: - name: Check and Display Regression Diffs if: always() + shell: bash {0} run: | # Search for regression.diffs recursively found_file=$(find . -type f -name "regression.diffs" | head -n 1) diff --git a/.github/workflows/build-deb-cloudberry-ubuntu24.04.yml b/.github/workflows/build-deb-cloudberry-ubuntu24.04.yml new file mode 100644 index 00000000000..041eabc252b --- /dev/null +++ b/.github/workflows/build-deb-cloudberry-ubuntu24.04.yml @@ -0,0 +1,1892 @@ +# -------------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed +# with this work for additional information regarding copyright +# ownership. The ASF licenses this file to You under the Apache +# License, Version 2.0 (the "License"); you may not use this file +# except in compliance with the License. You may obtain a copy of the +# License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. See the License for the specific language governing +# permissions and limitations under the License. +# +# -------------------------------------------------------------------- +# GitHub Actions Workflow: Apache Cloudberry Build Pipeline +# -------------------------------------------------------------------- +# Description: +# +# This workflow builds, tests, and packages Apache Cloudberry on +# Ubuntu 24.04. It ensures artifact integrity and performs installation +# tests. +# +# Workflow Overview: +# 1. **Build Job**: +# - Configures and builds Apache Cloudberry. +# - Supports debug build configuration via ENABLE_DEBUG flag. +# - Runs unit tests and verifies build artifacts. +# - Creates DEB packages (regular and debug), source tarball +# and additional files for dupload utility. +# - **Key Artifacts**: DEB package, source tarball, changes and dsc files, build logs. +# +# 2. **DEB Install Test Job**: +# - Verifies DEB integrity and installs Cloudberry. +# - Validates successful installation. +# - **Key Artifacts**: Installation logs, verification results. +# +# 3. **Report Job**: +# - Aggregates job results into a final report. +# - Sends failure notifications if any step fails. +# +# Execution Environment: +# - **Runs On**: ubuntu-22.04 with ubuntu-24.04 containers. +# - **Resource Requirements**: +# - Disk: Minimum 20GB free space. +# - Memory: Minimum 8GB RAM. +# - CPU: Recommended 4+ cores. +# +# Triggers: +# - Push to `main` branch. +# - Pull request that modifies this workflow file. +# - Scheduled: Every Monday at 02:00 UTC. +# - Manual workflow dispatch. +# +# Container Images: +# - **Build**: `apache/incubator-cloudberry:cbdb-build-ubuntu24.04-latest` +# - **Test**: `apache/incubator-cloudberry:cbdb-test-ubuntu24.04-latest` +# +# Artifacts: +# - DEB Package (retention: ${{ env.LOG_RETENTION_DAYS }} days). +# - Changes and DSC files (retention: ${{ env.LOG_RETENTION_DAYS }} days). +# - Source Tarball (retention: ${{ env.LOG_RETENTION_DAYS }} days). +# - Logs and Test Results (retention: ${{ env.LOG_RETENTION_DAYS }} days). +# +# Notes: +# - Supports concurrent job execution. +# - Supports debug builds with preserved symbols. +# -------------------------------------------------------------------- + +name: Apache Cloudberry Debian Build + +on: + push: + branches: [main, REL_2_STABLE] + pull_request: + paths: + - '.github/workflows/build-deb-cloudberry-ubuntu24.04.yml' + # We can enable the PR test when needed + # branches: [main, REL_2_STABLE] + # types: [opened, synchronize, reopened, edited] + schedule: + # Run every Monday at 02:00 UTC + - cron: '0 2 * * 1' + workflow_dispatch: # Manual trigger + inputs: + test_selection: + description: 'Select tests to run (comma-separated). Examples: ic-good-opt-off,ic-contrib' + required: false + default: 'all' + type: string + reuse_artifacts_from_run_id: + description: 'Reuse build artifacts from a previous run ID (leave empty to build fresh)' + required: false + default: '' + type: string + +# Note: Step details, logs, and artifacts require users to be logged into GitHub +# even for public repositories. This is a GitHub security feature and cannot +# be overridden by permissions. + +permissions: + # READ permissions allow viewing repository contents + contents: read # Required for checking out code and reading repository files + + # READ permissions for packages (Container registry, etc) + packages: read # Allows reading from GitHub package registry + + # WRITE permissions for actions includes read access to: + # - Workflow runs + # - Artifacts (requires GitHub login) + # - Logs (requires GitHub login) + actions: write + + # READ permissions for checks API: + # - Step details visibility (requires GitHub login) + # - Check run status and details + checks: read + + # READ permissions for pull request metadata: + # - PR status + # - Associated checks + # - Review states + pull-requests: read + +env: + LOG_RETENTION_DAYS: 7 + ENABLE_DEBUG: false + +jobs: + + ## ====================================================================== + ## Job: check-skip + ## ====================================================================== + + check-skip: + runs-on: ubuntu-22.04 + outputs: + should_skip: ${{ steps.skip-check.outputs.should_skip }} + steps: + - id: skip-check + shell: bash + env: + EVENT_NAME: ${{ github.event_name }} + PR_TITLE: ${{ github.event.pull_request.title || '' }} + PR_BODY: ${{ github.event.pull_request.body || '' }} + run: | + # Default to not skipping + echo "should_skip=false" >> "$GITHUB_OUTPUT" + + # Apply skip logic only for pull_request events + if [[ "$EVENT_NAME" == "pull_request" ]]; then + # Combine PR title and body for skip check + MESSAGE="${PR_TITLE}\n${PR_BODY}" + + # Escape special characters using printf %s + ESCAPED_MESSAGE=$(printf "%s" "$MESSAGE") + + echo "Checking PR title and body (escaped): $ESCAPED_MESSAGE" + + # Check for skip patterns + if echo -e "$ESCAPED_MESSAGE" | grep -qEi '\[skip[ -]ci\]|\[ci[ -]skip\]|\[no[ -]ci\]'; then + echo "should_skip=true" >> "$GITHUB_OUTPUT" + fi + else + echo "Skip logic is not applied for $EVENT_NAME events." + fi + + - name: Report Skip Status + if: steps.skip-check.outputs.should_skip == 'true' + run: | + echo "CI Skip flag detected in PR - skipping all checks." + exit 0 + + ## ====================================================================== + ## Job: prepare-test-matrix-deb + ## ====================================================================== + + prepare-test-matrix-deb: + runs-on: ubuntu-22.04 + needs: [check-skip] + if: needs.check-skip.outputs.should_skip != 'true' + outputs: + test-matrix: ${{ steps.set-matrix.outputs.matrix }} + + steps: + - id: set-matrix + run: | + echo "=== Matrix Preparation Diagnostics ===" + echo "Event type: ${{ github.event_name }}" + echo "Test selection input: '${{ github.event.inputs.test_selection }}'" + + # Define defaults + DEFAULT_NUM_PRIMARY_MIRROR_PAIRS=3 + DEFAULT_ENABLE_CGROUPS=false + DEFAULT_ENABLE_CORE_CHECK=true + DEFAULT_PG_SETTINGS_OPTIMIZER="" + + # Define base test configurations + ALL_TESTS='{ + "include": [ + {"test":"ic-deb-good-opt-off", + "make_configs":["src/test/regress:installcheck-good"], + "pg_settings":{"optimizer":"off"} + }, + {"test":"ic-deb-good-opt-on", + "make_configs":["src/test/regress:installcheck-good"], + "pg_settings":{"optimizer":"on"} + }, + {"test":"pax-ic-deb-good-opt-off", + "make_configs":[ + "contrib/pax_storage/:pax-test", + "contrib/pax_storage/:regress_test" + ], + "pg_settings":{ + "optimizer":"off", + "default_table_access_method":"pax" + } + }, + {"test":"pax-ic-deb-good-opt-on", + "make_configs":[ + "contrib/pax_storage/:pax-test", + "contrib/pax_storage/:regress_test" + ], + "pg_settings":{ + "optimizer":"on", + "default_table_access_method":"pax" + } + }, + {"test":"ic-deb-contrib", + "make_configs":["contrib/auto_explain:installcheck", + "contrib/amcheck:installcheck", + "contrib/citext:installcheck", + "contrib/btree_gin:installcheck", + "contrib/btree_gist:installcheck", + "contrib/dblink:installcheck", + "contrib/dict_int:installcheck", + "contrib/dict_xsyn:installcheck", + "contrib/extprotocol:installcheck", + "contrib/file_fdw:installcheck", + "contrib/formatter_fixedwidth:installcheck", + "contrib/hstore:installcheck", + "contrib/indexscan:installcheck", + "contrib/pg_trgm:installcheck", + "contrib/indexscan:installcheck", + "contrib/pgcrypto:installcheck", + "contrib/pgstattuple:installcheck", + "contrib/tablefunc:installcheck", + "contrib/passwordcheck:installcheck", + "contrib/pg_buffercache:installcheck", + "contrib/sslinfo:installcheck"] + }, + {"test":"ic-deb-gpcontrib", + "make_configs":["gpcontrib/orafce:installcheck", + "gpcontrib/zstd:installcheck", + "gpcontrib/gp_sparse_vector:installcheck", + "gpcontrib/gp_toolkit:installcheck"] + }, + {"test":"gpcontrib-gp-stats-collector", + "make_configs":["gpcontrib/gp_stats_collector:installcheck"], + "extension":"gp_stats_collector" + }, + {"test":"ic-cbdb-parallel", + "make_configs":["src/test/regress:installcheck-cbdb-parallel"] + } + ] + }' + + # Function to apply defaults + apply_defaults() { + echo "$1" | jq --arg npm "$DEFAULT_NUM_PRIMARY_MIRROR_PAIRS" \ + --argjson ec "$DEFAULT_ENABLE_CGROUPS" \ + --argjson ecc "$DEFAULT_ENABLE_CORE_CHECK" \ + --arg opt "$DEFAULT_PG_SETTINGS_OPTIMIZER" \ + 'def get_defaults: + { + num_primary_mirror_pairs: ($npm|tonumber), + enable_cgroups: $ec, + enable_core_check: $ecc, + pg_settings: { + optimizer: $opt + } + }; + get_defaults * .' + } + + # Extract all valid test names from ALL_TESTS + VALID_TESTS=$(echo "$ALL_TESTS" | jq -r '.include[].test') + + # Parse input test selection + IFS=',' read -ra SELECTED_TESTS <<< "${{ github.event.inputs.test_selection }}" + + # Default to all tests if selection is empty or 'all' + if [[ "${SELECTED_TESTS[*]}" == "all" || -z "${SELECTED_TESTS[*]}" ]]; then + mapfile -t SELECTED_TESTS <<< "$VALID_TESTS" + fi + + # Validate and filter selected tests + INVALID_TESTS=() + FILTERED_TESTS=() + for TEST in "${SELECTED_TESTS[@]}"; do + TEST=$(echo "$TEST" | tr -d '[:space:]') # Trim whitespace + if echo "$VALID_TESTS" | grep -qw "$TEST"; then + FILTERED_TESTS+=("$TEST") + else + INVALID_TESTS+=("$TEST") + fi + done + + # Handle invalid tests + if [[ ${#INVALID_TESTS[@]} -gt 0 ]]; then + echo "::error::Invalid test(s) selected: ${INVALID_TESTS[*]}" + echo "Valid tests are: $(echo "$VALID_TESTS" | tr '\n' ', ')" + exit 1 + fi + + # Build result JSON with defaults applied + RESULT='{"include":[' + FIRST=true + for TEST in "${FILTERED_TESTS[@]}"; do + CONFIG=$(jq -c --arg test "$TEST" '.include[] | select(.test == $test)' <<< "$ALL_TESTS") + FILTERED_WITH_DEFAULTS=$(apply_defaults "$CONFIG") + if [[ "$FIRST" == true ]]; then + FIRST=false + else + RESULT="${RESULT}," + fi + RESULT="${RESULT}${FILTERED_WITH_DEFAULTS}" + done + RESULT="${RESULT}]}" + + # Output the matrix for GitHub Actions + echo "Final matrix configuration:" + echo "$RESULT" | jq . + + # Fix: Use block redirection + { + echo "matrix<> "$GITHUB_OUTPUT" + + echo "=== Matrix Preparation Complete ===" + + ## ====================================================================== + ## Job: build-deb + ## ====================================================================== + + build-deb: + name: Build Apache Cloudberry DEB (Ubuntu 24.04) + env: + JOB_TYPE: build + needs: [check-skip] + runs-on: ubuntu-22.04 + timeout-minutes: 120 + if: github.event.inputs.reuse_artifacts_from_run_id == '' + outputs: + build_timestamp: ${{ steps.set_timestamp.outputs.timestamp }} + + container: + image: apache/incubator-cloudberry:cbdb-build-ubuntu24.04-latest + options: >- + --user root + -h cdw + -v /usr/share:/host_usr_share + -v /usr/local:/host_usr_local + -v /opt:/host_opt + + steps: + - name: Free Disk Space + if: needs.check-skip.outputs.should_skip != 'true' + run: | + echo "=== Disk space before cleanup ===" + df -h / + + # Remove pre-installed tools from host to free disk space + rm -rf /host_opt/hostedtoolcache || true # GitHub Actions tool cache + rm -rf /host_usr_local/lib/android || true # Android SDK + rm -rf /host_usr_share/dotnet || true # .NET SDK + rm -rf /host_opt/ghc || true # Haskell GHC + rm -rf /host_usr_local/.ghcup || true # Haskell GHCup + rm -rf /host_usr_share/swift || true # Swift + rm -rf /host_usr_local/share/powershell || true # PowerShell + rm -rf /host_usr_local/share/chromium || true # Chromium + rm -rf /host_usr_share/miniconda || true # Miniconda + rm -rf /host_opt/az || true # Azure CLI + rm -rf /host_usr_share/sbt || true # Scala Build Tool + + echo "=== Disk space after cleanup ===" + df -h / + + - name: Skip Check + if: needs.check-skip.outputs.should_skip == 'true' + run: | + echo "Build skipped via CI skip flag" >> "$GITHUB_STEP_SUMMARY" + exit 0 + + - name: Set build timestamp + id: set_timestamp # Add an ID to reference this step + run: | + timestamp=$(date +'%Y%m%d_%H%M%S') + echo "timestamp=$timestamp" | tee -a "$GITHUB_OUTPUT" # Use GITHUB_OUTPUT for job outputs + echo "BUILD_TIMESTAMP=$timestamp" | tee -a "$GITHUB_ENV" # Also set as environment variable + + - name: Checkout Apache Cloudberry + uses: actions/checkout@v4 + with: + fetch-depth: 1 + submodules: true + + - name: Cloudberry Environment Initialization + shell: bash + env: + LOGS_DIR: build-logs + run: | + set -eo pipefail + if ! su - gpadmin -c "/tmp/init_system.sh"; then + echo "::error::Container initialization failed" + exit 1 + fi + + mkdir -p "${LOGS_DIR}/details" + chown -R gpadmin:gpadmin . + chmod -R 755 . + chmod 777 "${LOGS_DIR}" + + df -kh / + rm -rf /__t/* + df -kh / + + df -h | tee -a "${LOGS_DIR}/details/disk-usage.log" + free -h | tee -a "${LOGS_DIR}/details/memory-usage.log" + + { + echo "=== Environment Information ===" + uname -a + df -h + free -h + env + } | tee -a "${LOGS_DIR}/details/environment.log" + + echo "SRC_DIR=${GITHUB_WORKSPACE}" | tee -a "$GITHUB_ENV" + + - name: Generate Build Job Summary Start + run: | + { + echo "# Build Job Summary (Ubuntu 24.04)" + echo "## Environment" + echo "- Start Time: $(date -u +'%Y-%m-%d %H:%M:%S UTC')" + echo "- ENABLE_DEBUG: ${{ env.ENABLE_DEBUG }}" + echo "- OS Version: $(lsb_release -sd)" + echo "- GCC Version: $(gcc --version | head -n1)" + } >> "$GITHUB_STEP_SUMMARY" + + - name: Run Apache Cloudberry configure script + shell: bash + env: + SRC_DIR: ${{ github.workspace }} + run: | + set -eo pipefail + + export BUILD_DESTINATION=${SRC_DIR}/debian/build + + chmod +x "${SRC_DIR}"/devops/build/automation/cloudberry/scripts/configure-cloudberry.sh + if ! time su - gpadmin -c "cd ${SRC_DIR} && SRC_DIR=${SRC_DIR} ENABLE_DEBUG=${{ env.ENABLE_DEBUG }} BUILD_DESTINATION=${BUILD_DESTINATION} ${SRC_DIR}/devops/build/automation/cloudberry/scripts/configure-cloudberry.sh"; then + echo "::error::Configure script failed" + exit 1 + fi + + - name: Run Apache Cloudberry build script + shell: bash + env: + SRC_DIR: ${{ github.workspace }} + run: | + set -eo pipefail + + export BUILD_DESTINATION=${SRC_DIR}/debian/build + + chmod +x "${SRC_DIR}"/devops/build/automation/cloudberry/scripts/build-cloudberry.sh + if ! time su - gpadmin -c "cd ${SRC_DIR} && SRC_DIR=${SRC_DIR} BUILD_DESTINATION=${BUILD_DESTINATION} ${SRC_DIR}/devops/build/automation/cloudberry/scripts/build-cloudberry.sh"; then + echo "::error::Build script failed" + exit 1 + fi + + - name: Verify build artifacts + shell: bash + run: | + set -eo pipefail + + export BUILD_DESTINATION=${SRC_DIR}/debian/build + + echo "Verifying build artifacts..." + { + echo "=== Build Artifacts Verification ===" + echo "Timestamp: $(date -u)" + + if [ ! -d "${BUILD_DESTINATION}" ]; then + echo "::error::Build artifacts directory not found" + exit 1 + fi + + # Verify critical binaries + critical_binaries=( + "${BUILD_DESTINATION}/bin/postgres" + "${BUILD_DESTINATION}/bin/psql" + ) + + echo "Checking critical binaries..." + for binary in "${critical_binaries[@]}"; do + if [ ! -f "$binary" ]; then + echo "::error::Critical binary missing: $binary" + exit 1 + fi + if [ ! -x "$binary" ]; then + echo "::error::Binary not executable: $binary" + exit 1 + fi + echo "Binary verified: $binary" + ls -l "$binary" + done + + # Test binary execution + echo "Testing binary execution..." + if ! ${BUILD_DESTINATION}/bin/postgres --version; then + echo "::error::postgres binary verification failed" + exit 1 + fi + if ! ${BUILD_DESTINATION}/bin/psql --version; then + echo "::error::psql binary verification failed" + exit 1 + fi + + echo "All build artifacts verified successfully" + } 2>&1 | tee -a build-logs/details/build-verification.log + + - name: Create Source tarball, create DEB and verify artifacts + shell: bash + env: + CBDB_VERSION: 99.0.0 + BUILD_NUMBER: 1 + SRC_DIR: ${{ github.workspace }} + run: | + set -eo pipefail + + { + echo "=== Artifact Creation Log ===" + echo "Timestamp: $(date -u)" + + cp -r "${SRC_DIR}"/devops/build/packaging/deb/ubuntu24.04/* debian/ + chown -R "$(whoami)" debian + chmod -x debian/*install + + # replace not supported symbols in version + CBDB_VERSION=$(echo "$CBDB_VERSION" | sed "s/\//./g") + CBDB_VERSION=$(echo "$CBDB_VERSION" | sed "s/_/-/g") + + echo "We will built ${CBDB_VERSION}" + export BUILD_DESTINATION=${SRC_DIR}/debian/build + + if ! ${SRC_DIR}/devops/build/packaging/deb/build-deb.sh -v $CBDB_VERSION; then + echo "::error::Build script failed" + exit 1 + fi + + ARCH=$(dpkg --print-architecture) + # Detect OS distribution (e.g., ubuntu24.04, debian12) + if [ -f /etc/os-release ]; then + . /etc/os-release + OS_DISTRO=$(echo "${ID}${VERSION_ID}" | tr '[:upper:]' '[:lower:]') + else + OS_DISTRO="unknown" + fi + CBDB_PKG_VERSION=${CBDB_VERSION}-${BUILD_NUMBER}-${OS_DISTRO} + + echo "Produced artifacts" + ls -l ../ + + echo "Copy artifacts to subdirectory for sign/upload" + mkdir ${SRC_DIR}/deb + DEB_FILE="apache-cloudberry-db-incubating_${CBDB_PKG_VERSION}"_"${ARCH}".deb + DBG_DEB_FILE="apache-cloudberry-db-incubating-dbgsym_${CBDB_PKG_VERSION}"_"${ARCH}".ddeb + CHANGES_DEB_FILE="apache-cloudberry-db-incubating_${CBDB_PKG_VERSION}"_"${ARCH}".changes + BUILDINFO_DEB_FILE="apache-cloudberry-db-incubating_${CBDB_PKG_VERSION}"_"${ARCH}".buildinfo + DSC_DEB_FILE="apache-cloudberry-db-incubating_${CBDB_PKG_VERSION}".dsc + SOURCE_FILE="apache-cloudberry-db-incubating_${CBDB_PKG_VERSION}".tar.xz + cp ../"${DEB_FILE}" "${SRC_DIR}/deb" + cp ../"${DBG_DEB_FILE}" "${SRC_DIR}/deb" + cp ../"${CHANGES_DEB_FILE}" "${SRC_DIR}/deb" + cp ../"${BUILDINFO_DEB_FILE}" "${SRC_DIR}/deb" + cp ../"${DSC_DEB_FILE}" "${SRC_DIR}/deb" + cp ../"${SOURCE_FILE}" "${SRC_DIR}/deb" + mkdir "${SRC_DIR}/deb/debian" + cp debian/changelog "${SRC_DIR}/deb/debian" + + # Get package information + echo "Package Information:" + dpkg --info "${SRC_DIR}/deb/${DEB_FILE}" + dpkg --contents "${SRC_DIR}/deb/${DEB_FILE}" + + # Verify critical files in DEB + echo "Verifying critical files in DEB..." + for binary in "bin/postgres" "bin/psql"; do + if ! dpkg --contents "${SRC_DIR}/deb/${DEB_FILE}" | grep -c "${binary}$"; then + echo "::error::Critical binary '${binary}' not found in DEB" + exit 1 + fi + done + + # Record checksums + echo "Calculating checksums..." + sha256sum "${SRC_DIR}/deb/${DEB_FILE}" | tee -a build-logs/details/checksums.log + + echo "Artifacts created and verified successfully" + + + } 2>&1 | tee -a build-logs/details/artifact-creation.log + + - name: Run Apache Cloudberry unittest script + if: needs.check-skip.outputs.should_skip != 'true' + shell: bash + env: + SRC_DIR: ${{ github.workspace }} + run: | + set -eo pipefail + chmod +x "${SRC_DIR}"/devops/build/automation/cloudberry/scripts/unittest-cloudberry.sh + if ! time su - gpadmin -c "cd ${SRC_DIR} && SRC_DIR=${SRC_DIR} ${SRC_DIR}/devops/build/automation/cloudberry/scripts/unittest-cloudberry.sh"; then + echo "::error::Unittest script failed" + exit 1 + fi + + - name: Generate Build Job Summary End + run: | + { + echo "## Build Results" + echo "- End Time: $(date -u +'%Y-%m-%d %H:%M:%S UTC')" + } >> "$GITHUB_STEP_SUMMARY" + + - name: Upload build logs + uses: actions/upload-artifact@v4 + with: + name: build-logs-ubuntu24.04-${{ env.BUILD_TIMESTAMP }} + path: | + build-logs/ + retention-days: ${{ env.LOG_RETENTION_DAYS }} + + - name: Upload Cloudberry DEB build artifacts + uses: actions/upload-artifact@v4 + with: + name: apache-cloudberry-db-incubating-deb-ubuntu24.04-build-artifacts + retention-days: ${{ env.LOG_RETENTION_DAYS }} + if-no-files-found: error + path: | + deb/*.deb + deb/*.ddeb + + - name: Upload Cloudberry deb source build artifacts + uses: actions/upload-artifact@v4 + with: + name: apache-cloudberry-db-incubating-deb-source-build-artifacts + retention-days: ${{ env.LOG_RETENTION_DAYS }} + if-no-files-found: error + path: | + deb/*.tar.xz + deb/*.changes + deb/*.dsc + deb/*.buildinfo + deb/debian/changelog + + ## ====================================================================== + ## Job: deb-install-test + ## ====================================================================== + + deb-install-test: + name: DEB Install Test Apache Cloudberry (Ubuntu 24.04) + needs: [check-skip, build-deb] + if: | + !cancelled() && + (needs.build-deb.result == 'success' || needs.build-deb.result == 'skipped') && + github.event.inputs.reuse_artifacts_from_run_id == '' + runs-on: ubuntu-22.04 + timeout-minutes: 120 + + container: + image: apache/incubator-cloudberry:cbdb-test-ubuntu24.04-latest + options: >- + --user root + -h cdw + -v /usr/share:/host_usr_share + -v /usr/local:/host_usr_local + -v /opt:/host_opt + + steps: + - name: Free Disk Space + if: needs.check-skip.outputs.should_skip != 'true' + run: | + echo "=== Disk space before cleanup ===" + df -h / + + # Remove pre-installed tools from host to free disk space + rm -rf /host_opt/hostedtoolcache || true # GitHub Actions tool cache + rm -rf /host_usr_local/lib/android || true # Android SDK + rm -rf /host_usr_share/dotnet || true # .NET SDK + rm -rf /host_opt/ghc || true # Haskell GHC + rm -rf /host_usr_local/.ghcup || true # Haskell GHCup + rm -rf /host_usr_share/swift || true # Swift + rm -rf /host_usr_local/share/powershell || true # PowerShell + rm -rf /host_usr_local/share/chromium || true # Chromium + rm -rf /host_usr_share/miniconda || true # Miniconda + rm -rf /host_opt/az || true # Azure CLI + rm -rf /host_usr_share/sbt || true # Scala Build Tool + + echo "=== Disk space after cleanup ===" + df -h / + + - name: Skip Check + if: needs.check-skip.outputs.should_skip == 'true' + run: | + echo "DEB install test skipped via CI skip flag" >> "$GITHUB_STEP_SUMMARY" + exit 0 + + - name: Download Cloudberry DEB build artifacts + if: needs.check-skip.outputs.should_skip != 'true' + uses: actions/download-artifact@v4 + with: + name: apache-cloudberry-db-incubating-deb-ubuntu24.04-build-artifacts + path: ${{ github.workspace }}/deb_build_artifacts + run-id: ${{ github.event.inputs.reuse_artifacts_from_run_id || github.run_id }} + merge-multiple: false + + - name: Cloudberry Environment Initialization + if: needs.check-skip.outputs.should_skip != 'true' + shell: bash + env: + LOGS_DIR: install-logs + run: | + set -eo pipefail + if ! su - gpadmin -c "/tmp/init_system.sh"; then + echo "::error::Container initialization failed" + exit 1 + fi + + mkdir -p "${LOGS_DIR}/details" + chown -R gpadmin:gpadmin . + chmod -R 755 . + chmod 777 "${LOGS_DIR}" + + df -kh / + rm -rf /__t/* + df -kh / + + df -h | tee -a "${LOGS_DIR}/details/disk-usage.log" + free -h | tee -a "${LOGS_DIR}/details/memory-usage.log" + + { + echo "=== Environment Information ===" + uname -a + df -h + free -h + env + } | tee -a "${LOGS_DIR}/details/environment.log" + + echo "SRC_DIR=${GITHUB_WORKSPACE}" | tee -a "$GITHUB_ENV" + + - name: Verify DEB artifacts + id: verify-artifacts + shell: bash + run: | + set -eo pipefail + + DEB_FILE=$(ls "${GITHUB_WORKSPACE}"/deb_build_artifacts/*.deb) + if [ ! -f "${DEB_FILE}" ]; then + echo "::error::DEB file not found" + exit 1 + fi + + echo "deb_file=${DEB_FILE}" >> "$GITHUB_OUTPUT" + + echo "Verifying DEB artifacts..." + { + echo "=== DEB Verification Summary ===" + echo "Timestamp: $(date -u)" + echo "DEB File: ${DEB_FILE}" + + # Get DEB metadata and verify contents + echo "Package Information:" + dpkg-deb -f "${DEB_FILE}" + + # Get key DEB attributes for verification + DEB_VERSION=$(dpkg-deb -f "${DEB_FILE}" Version | cut -d'-' -f 1) + DEB_RELEASE=$(dpkg-deb -f "${DEB_FILE}" Version | cut -d'-' -f 3) + echo "version=${DEB_VERSION}" >> "$GITHUB_OUTPUT" + echo "release=${DEB_RELEASE}" >> "$GITHUB_OUTPUT" + + # Verify expected binaries are in the DEB + echo "Verifying critical files in DEB..." + for binary in "bin/postgres" "bin/psql"; do + if ! dpkg-deb -c "${DEB_FILE}" | grep "${binary}" > /dev/null; then + echo "::error::Critical binary '${binary}' not found in DEB" + exit 1 + fi + done + + echo "DEB Details:" + echo "- Version: ${DEB_VERSION}" + echo "- Release: ${DEB_RELEASE}" + + # Calculate and store checksum + echo "Checksum:" + sha256sum "${DEB_FILE}" + + } 2>&1 | tee -a install-logs/details/deb-verification.log + + - name: Install Cloudberry DEB + shell: bash + env: + DEB_FILE: ${{ steps.verify-artifacts.outputs.deb_file }} + DEB_VERSION: ${{ steps.verify-artifacts.outputs.version }} + DEB_RELEASE: ${{ steps.verify-artifacts.outputs.release }} + run: | + set -eo pipefail + + if [ -z "${DEB_FILE}" ]; then + echo "::error::DEB_FILE environment variable is not set" + exit 1 + fi + + { + echo "=== DEB Installation Log ===" + echo "Timestamp: $(date -u)" + echo "DEB File: ${DEB_FILE}" + echo "Version: ${DEB_VERSION}" + echo "Release: ${DEB_RELEASE}" + + # Clean install location + rm -rf /usr/local/cloudberry-db + + # Install DEB + echo "Starting installation..." + apt-get update + if ! apt-get -y install "${DEB_FILE}"; then + echo "::error::DEB installation failed" + exit 1 + fi + + # Change ownership back to gpadmin - it is needed for future tests + chown -R gpadmin:gpadmin /usr/local/cloudberry-db + + echo "Installation completed successfully" + dpkg-query -s apache-cloudberry-db-incubating + echo "Installed files:" + dpkg-query -L apache-cloudberry-db-incubating + } 2>&1 | tee -a install-logs/details/deb-installation.log + + - name: Upload install logs + uses: actions/upload-artifact@v4 + with: + name: install-logs-${{ matrix.name }}-${{ needs.build-deb.outputs.build_timestamp }} + path: | + install-logs/ + retention-days: ${{ env.LOG_RETENTION_DAYS }} + + - name: Generate Install Test Job Summary End + if: always() + shell: bash {0} + run: | + { + echo "# Installed Package Summary (Ubuntu 24.04)" + echo "\`\`\`" + + dpkg-query -s apache-cloudberry-db-incubating + echo "\`\`\`" + } >> "$GITHUB_STEP_SUMMARY" || true + + ## ====================================================================== + ## Job: test-deb + ## ====================================================================== + + test-deb: + name: ${{ matrix.test }} (Ubuntu 24.04) + needs: [check-skip, build-deb, prepare-test-matrix-deb] + if: | + !cancelled() && + (needs.build-deb.result == 'success' || needs.build-deb.result == 'skipped') + runs-on: ubuntu-22.04 + timeout-minutes: 120 + # actionlint-allow matrix[*].pg_settings + strategy: + fail-fast: false # Continue with other tests if one fails + matrix: ${{ fromJson(needs.prepare-test-matrix-deb.outputs.test-matrix) }} + + container: + image: apache/incubator-cloudberry:cbdb-build-ubuntu24.04-latest + options: >- + --privileged + --user root + --hostname cdw + --shm-size=2gb + --ulimit core=-1 + --cgroupns=host + -v /sys/fs/cgroup:/sys/fs/cgroup:rw + -v /usr/share:/host_usr_share + -v /usr/local:/host_usr_local + -v /opt:/host_opt + + steps: + - name: Free Disk Space + if: needs.check-skip.outputs.should_skip != 'true' + run: | + echo "=== Disk space before cleanup ===" + df -h / + + # Remove pre-installed tools from host to free disk space + rm -rf /host_opt/hostedtoolcache || true # GitHub Actions tool cache + rm -rf /host_usr_local/lib/android || true # Android SDK + rm -rf /host_usr_share/dotnet || true # .NET SDK + rm -rf /host_opt/ghc || true # Haskell GHC + rm -rf /host_usr_local/.ghcup || true # Haskell GHCup + rm -rf /host_usr_share/swift || true # Swift + rm -rf /host_usr_local/share/powershell || true # PowerShell + rm -rf /host_usr_local/share/chromium || true # Chromium + rm -rf /host_usr_share/miniconda || true # Miniconda + rm -rf /host_opt/az || true # Azure CLI + rm -rf /host_usr_share/sbt || true # Scala Build Tool + + echo "=== Disk space after cleanup ===" + df -h / + + - name: Skip Check + if: needs.check-skip.outputs.should_skip == 'true' + run: | + echo "Test ${{ matrix.test }} skipped via CI skip flag" >> "$GITHUB_STEP_SUMMARY" + exit 0 + + - name: Use timestamp from previous job + if: needs.check-skip.outputs.should_skip != 'true' + run: | + echo "Timestamp from output: ${{ needs.build-deb.outputs.build_timestamp }}" + + - name: Cloudberry Environment Initialization + shell: bash + env: + LOGS_DIR: build-logs + run: | + set -eo pipefail + if ! su - gpadmin -c "/tmp/init_system.sh"; then + echo "::error::Container initialization failed" + exit 1 + fi + + mkdir -p "${LOGS_DIR}/details" + chown -R gpadmin:gpadmin . + chmod -R 755 . + chmod 777 "${LOGS_DIR}" + + df -kh / + rm -rf /__t/* + df -kh / + + df -h | tee -a "${LOGS_DIR}/details/disk-usage.log" + free -h | tee -a "${LOGS_DIR}/details/memory-usage.log" + + { + echo "=== Environment Information ===" + uname -a + df -h + free -h + env + } | tee -a "${LOGS_DIR}/details/environment.log" + + echo "SRC_DIR=${GITHUB_WORKSPACE}" | tee -a "$GITHUB_ENV" + + - name: Setup cgroups + if: needs.check-skip.outputs.should_skip != 'true' + shell: bash + run: | + set -uxo pipefail + + if [ "${{ matrix.enable_cgroups }}" = "true" ]; then + + echo "Current mounts:" + mount | grep cgroup + + CGROUP_BASEDIR=/sys/fs/cgroup + + # 1. Basic setup with permissions + sudo chmod -R 777 ${CGROUP_BASEDIR}/ + sudo mkdir -p ${CGROUP_BASEDIR}/gpdb + sudo chmod -R 777 ${CGROUP_BASEDIR}/gpdb + sudo chown -R gpadmin:gpadmin ${CGROUP_BASEDIR}/gpdb + + # 2. Enable controllers + sudo bash -c "echo '+cpu +cpuset +memory +io' > ${CGROUP_BASEDIR}/cgroup.subtree_control" || true + sudo bash -c "echo '+cpu +cpuset +memory +io' > ${CGROUP_BASEDIR}/gpdb/cgroup.subtree_control" || true + + # 3. CPU settings + sudo bash -c "echo 'max 100000' > ${CGROUP_BASEDIR}/gpdb/cpu.max" || true + sudo bash -c "echo '100' > ${CGROUP_BASEDIR}/gpdb/cpu.weight" || true + sudo bash -c "echo '0' > ${CGROUP_BASEDIR}/gpdb/cpu.weight.nice" || true + sudo bash -c "echo 0-$(( $(nproc) - 1 )) > ${CGROUP_BASEDIR}/gpdb/cpuset.cpus" || true + sudo bash -c "echo '0' > ${CGROUP_BASEDIR}/gpdb/cpuset.mems" || true + + # 4. Memory settings + sudo bash -c "echo 'max' > ${CGROUP_BASEDIR}/gpdb/memory.max" || true + sudo bash -c "echo '0' > ${CGROUP_BASEDIR}/gpdb/memory.min" || true + sudo bash -c "echo 'max' > ${CGROUP_BASEDIR}/gpdb/memory.high" || true + + # 5. IO settings + echo "Available block devices:" + lsblk + + sudo bash -c " + if [ -f \${CGROUP_BASEDIR}/gpdb/io.stat ]; then + echo 'Detected IO devices:' + cat \${CGROUP_BASEDIR}/gpdb/io.stat + fi + echo '' > \${CGROUP_BASEDIR}/gpdb/io.max || true + " + + # 6. Fix permissions again after all writes + sudo chmod -R 777 ${CGROUP_BASEDIR}/gpdb + sudo chown -R gpadmin:gpadmin ${CGROUP_BASEDIR}/gpdb + + # 7. Check required files + echo "Checking required files:" + required_files=( + "cgroup.procs" + "cpu.max" + "cpu.pressure" + "cpu.weight" + "cpu.weight.nice" + "cpu.stat" + "cpuset.cpus" + "cpuset.mems" + "cpuset.cpus.effective" + "cpuset.mems.effective" + "memory.current" + "io.max" + ) + + for file in "${required_files[@]}"; do + if [ -f "${CGROUP_BASEDIR}/gpdb/$file" ]; then + echo "✓ $file exists" + ls -l "${CGROUP_BASEDIR}/gpdb/$file" + else + echo "✗ $file missing" + fi + done + + # 8. Test subdirectory creation + echo "Testing subdirectory creation..." + sudo -u gpadmin bash -c " + TEST_DIR=\${CGROUP_BASEDIR}/gpdb/test6448 + if mkdir -p \$TEST_DIR; then + echo 'Created test directory' + sudo chmod -R 777 \$TEST_DIR + if echo \$\$ > \$TEST_DIR/cgroup.procs; then + echo 'Successfully wrote to cgroup.procs' + cat \$TEST_DIR/cgroup.procs + # Move processes back to parent before cleanup + echo \$\$ > \${CGROUP_BASEDIR}/gpdb/cgroup.procs + else + echo 'Failed to write to cgroup.procs' + ls -la \$TEST_DIR/cgroup.procs + fi + ls -la \$TEST_DIR/ + rmdir \$TEST_DIR || { + echo 'Moving all processes to parent before cleanup' + cat \$TEST_DIR/cgroup.procs | while read pid; do + echo \$pid > \${CGROUP_BASEDIR}/gpdb/cgroup.procs 2>/dev/null || true + done + rmdir \$TEST_DIR + } + else + echo 'Failed to create test directory' + fi + " + + # 9. Verify setup as gpadmin user + echo "Testing cgroup access as gpadmin..." + sudo -u gpadmin bash -c " + echo 'Checking mounts...' + mount | grep cgroup + + echo 'Checking /proc/self/mounts...' + cat /proc/self/mounts | grep cgroup + + if ! grep -q cgroup2 /proc/self/mounts; then + echo 'ERROR: cgroup2 mount NOT visible to gpadmin' + exit 1 + fi + echo 'SUCCESS: cgroup2 mount visible to gpadmin' + + if ! [ -w ${CGROUP_BASEDIR}/gpdb ]; then + echo 'ERROR: gpadmin cannot write to gpdb cgroup' + exit 1 + fi + echo 'SUCCESS: gpadmin can write to gpdb cgroup' + + echo 'Verifying key files content:' + echo 'cpu.max:' + cat ${CGROUP_BASEDIR}/gpdb/cpu.max || echo 'Failed to read cpu.max' + echo 'cpuset.cpus:' + cat ${CGROUP_BASEDIR}/gpdb/cpuset.cpus || echo 'Failed to read cpuset.cpus' + echo 'cgroup.subtree_control:' + cat ${CGROUP_BASEDIR}/gpdb/cgroup.subtree_control || echo 'Failed to read cgroup.subtree_control' + " + + # 10. Show final state + echo "Final cgroup state:" + ls -la ${CGROUP_BASEDIR}/gpdb/ + echo "Cgroup setup completed successfully" + else + echo "Cgroup setup skipped" + fi + + - name: "Generate Test Job Summary Start: ${{ matrix.test }}" + if: always() + run: | + { + echo "# Test Job Summary: ${{ matrix.test }} (Ubuntu 24.04)" + echo "## Environment" + echo "- Start Time: $(date -u +'%Y-%m-%d %H:%M:%S UTC')" + + if [[ "${{ needs.check-skip.outputs.should_skip }}" == "true" ]]; then + echo "## Skip Status" + echo "✓ Test execution skipped via CI skip flag" + else + echo "- OS Version: $(cat /etc/redhat-release)" + fi + } >> "$GITHUB_STEP_SUMMARY" + + - name: Download Cloudberry DEB build artifacts + if: needs.check-skip.outputs.should_skip != 'true' + uses: actions/download-artifact@v4 + with: + name: apache-cloudberry-db-incubating-deb-ubuntu24.04-build-artifacts + path: ${{ github.workspace }}/deb_build_artifacts + merge-multiple: false + run-id: ${{ github.event.inputs.reuse_artifacts_from_run_id || github.run_id }} + github-token: ${{ secrets.GITHUB_TOKEN }} + + - name: Download Cloudberry Source build artifacts + if: needs.check-skip.outputs.should_skip != 'true' + uses: actions/download-artifact@v4 + with: + name: apache-cloudberry-db-incubating-deb-source-build-artifacts + path: ${{ github.workspace }}/source_build_artifacts + merge-multiple: false + run-id: ${{ github.event.inputs.reuse_artifacts_from_run_id || github.run_id }} + github-token: ${{ secrets.GITHUB_TOKEN }} + + - name: Verify DEB artifacts + if: needs.check-skip.outputs.should_skip != 'true' + id: verify-artifacts + shell: bash + run: | + set -eo pipefail + + SRC_TARBALL_FILE=$(ls "${GITHUB_WORKSPACE}"/source_build_artifacts/apache-cloudberry-db-incubating_*.tar.xz) + if [ ! -f "${SRC_TARBALL_FILE}" ]; then + echo "::error::SRC TARBALL file not found" + exit 1 + fi + + echo "src_tarball_file=${SRC_TARBALL_FILE}" >> "$GITHUB_OUTPUT" + + echo "Verifying SRC TARBALL artifacts..." + { + echo "=== SRC TARBALL Verification Summary ===" + echo "Timestamp: $(date -u)" + echo "SRC TARBALL File: ${SRC_TARBALL_FILE}" + + # Calculate and store checksum + echo "Checksum:" + sha256sum "${SRC_TARBALL_FILE}" + + } 2>&1 | tee -a build-logs/details/src-tarball-verification.log + + DEB_FILE=$(ls "${GITHUB_WORKSPACE}"/deb_build_artifacts/*.deb) + if [ ! -f "${DEB_FILE}" ]; then + echo "::error::DEB file not found" + exit 1 + fi + + echo "deb_file=${DEB_FILE}" >> "$GITHUB_OUTPUT" + + echo "Verifying DEB artifacts..." + { + echo "=== DEB Verification Summary ===" + echo "Timestamp: $(date -u)" + echo "DEB File: ${DEB_FILE}" + + # Get DEB metadata and verify contents + echo "Package Information:" + dpkg-deb -f "${DEB_FILE}" + + # Get key DEB attributes for verification + DEB_VERSION=$(dpkg-deb -f "${DEB_FILE}" Version | cut -d'-' -f 1) + DEB_RELEASE=$(dpkg-deb -f "${DEB_FILE}" Version | cut -d'-' -f 3) + echo "version=${DEB_VERSION}" >> "$GITHUB_OUTPUT" + echo "release=${DEB_RELEASE}" >> "$GITHUB_OUTPUT" + + # Verify expected binaries are in the DEB + echo "Verifying critical files in DEB..." + for binary in "bin/postgres" "bin/psql"; do + if ! dpkg-deb -c "${DEB_FILE}" | grep "${binary}" > /dev/null; then + echo "::error::Critical binary '${binary}' not found in DEB" + exit 1 + fi + done + + echo "DEB Details:" + echo "- Version: ${DEB_VERSION}" + echo "- Release: ${DEB_RELEASE}" + + # Calculate and store checksum + echo "Checksum:" + sha256sum "${DEB_FILE}" + + } 2>&1 | tee -a build-logs/details/deb-verification.log + + - name: Install Cloudberry DEB + if: success() && needs.check-skip.outputs.should_skip != 'true' + shell: bash + env: + DEB_FILE: ${{ steps.verify-artifacts.outputs.deb_file }} + DEB_VERSION: ${{ steps.verify-artifacts.outputs.version }} + DEB_RELEASE: ${{ steps.verify-artifacts.outputs.release }} + run: | + set -eo pipefail + + if [ -z "${DEB_FILE}" ]; then + echo "::error::DEB_FILE environment variable is not set" + exit 1 + fi + + { + echo "=== DEB Installation Log ===" + echo "Timestamp: $(date -u)" + echo "DEB File: ${DEB_FILE}" + echo "Version: ${DEB_VERSION}" + echo "Release: ${DEB_RELEASE}" + + # Clean install location + rm -rf /usr/local/cloudberry-db + + # Install DEB + echo "Starting installation..." + apt-get update + if ! apt-get -y install "${DEB_FILE}"; then + echo "::error::DEB installation failed" + exit 1 + fi + + # Change ownership back to gpadmin - it is needed for future tests + chown -R gpadmin:gpadmin /usr/local/cloudberry-db + + echo "Installation completed successfully" + dpkg-query -s apache-cloudberry-db-incubating + echo "Installed files:" + dpkg-query -L apache-cloudberry-db-incubating + } 2>&1 | tee -a build-logs/details/deb-installation.log + + - name: Extract source tarball + if: success() && needs.check-skip.outputs.should_skip != 'true' + shell: bash + env: + SRC_TARBALL_FILE: ${{ steps.verify-artifacts.outputs.src_tarball_file }} + SRC_DIR: ${{ github.workspace }} + run: | + set -eo pipefail + + { + echo "=== Source Extraction Log ===" + echo "Timestamp: $(date -u)" + + echo "Starting extraction..." + file "${SRC_TARBALL_FILE}" + if ! time tar xf "${SRC_TARBALL_FILE}" -C "${SRC_DIR}"/.. ; then + echo "::error::Source extraction failed" + exit 1 + fi + + echo "Extraction completed successfully" + echo "Extracted contents:" + ls -la "${SRC_DIR}/../cloudberry" + echo "Directory size:" + du -sh "${SRC_DIR}/../cloudberry" + } 2>&1 | tee -a build-logs/details/source-extraction.log + + - name: Prepare DEB Environment + if: success() && needs.check-skip.outputs.should_skip != 'true' + shell: bash + env: + SRC_DIR: ${{ github.workspace }} + run: | + set -eo pipefail + + { + + # change ownership to gpadmin + chown -R gpadmin "${SRC_DIR}/../cloudberry" + touch build-logs/sections.log + chown gpadmin build-logs/sections.log + chmod 777 build-logs + + # configure link lib directory to temporary location, fix it + rm -rf "${SRC_DIR}"/debian/build/lib + ln -sf /usr/cloudberry-db/lib "${SRC_DIR}"/debian/build/lib + + # check if regress.so exists in src directory - it is needed for contrib/dblink tests + if [ ! -f ${SRC_DIR}/src/test/regress/regress.so ]; then + ln -sf /usr/cloudberry-db/lib/postgresql/regress.so ${SRC_DIR}/src/test/regress/regress.so + fi + + # FIXME + # temporary install gdb - delete after creating new docker build/test contaners + apt-get update + apt-get -y install gdb + + } 2>&1 | tee -a build-logs/details/prepare-deb-env.log + + - name: Create Apache Cloudberry demo cluster + if: success() && needs.check-skip.outputs.should_skip != 'true' + shell: bash + env: + SRC_DIR: ${{ github.workspace }} + run: | + set -eo pipefail + + { + chmod +x "${SRC_DIR}"/devops/build/automation/cloudberry/scripts/create-cloudberry-demo-cluster.sh + + # Build BLDWRAP_POSTGRES_CONF_ADDONS for shared_preload_libraries if specified + EXTRA_CONF="" + if [[ -n "${{ matrix.shared_preload_libraries }}" ]]; then + EXTRA_CONF="shared_preload_libraries='${{ matrix.shared_preload_libraries }}'" + echo "Adding shared_preload_libraries: ${{ matrix.shared_preload_libraries }}" + fi + + if ! time su - gpadmin -c "cd ${SRC_DIR} && NUM_PRIMARY_MIRROR_PAIRS='${{ matrix.num_primary_mirror_pairs }}' BLDWRAP_POSTGRES_CONF_ADDONS=\"${EXTRA_CONF}\" SRC_DIR=${SRC_DIR} ${SRC_DIR}/devops/build/automation/cloudberry/scripts/create-cloudberry-demo-cluster.sh"; then + echo "::error::Demo cluster creation failed" + exit 1 + fi + + } 2>&1 | tee -a build-logs/details/create-cloudberry-demo-cluster.log + + - name: "Run Tests: ${{ matrix.test }}" + if: success() && needs.check-skip.outputs.should_skip != 'true' + env: + SRC_DIR: ${{ github.workspace }} + BUILD_DESTINATION: ${{ github.workspace }}/debian/build + shell: bash {0} + run: | + set -o pipefail + + # Initialize test status + overall_status=0 + + # Create logs directory structure + mkdir -p build-logs/details + + # Core file config + mkdir -p "/tmp/cloudberry-cores" + chmod 1777 "/tmp/cloudberry-cores" + sysctl -w kernel.core_pattern="/tmp/cloudberry-cores/core-%e-%s-%u-%g-%p-%t" + sysctl kernel.core_pattern + su - gpadmin -c "ulimit -c" + + # WARNING: PostgreSQL Settings + # When adding new pg_settings key/value pairs: + # 1. Add a new check below for the setting + # 2. Follow the same pattern as optimizer + # 3. Update matrix entries to include the new setting + + + # Create extension if required + if [[ "${{ matrix.extension != '' }}" == "true" ]]; then + case "${{ matrix.extension }}" in + gp_stats_collector) + if ! su - gpadmin -c "source ${BUILD_DESTINATION}/cloudberry-env.sh && \ + source ${SRC_DIR}/gpAux/gpdemo/gpdemo-env.sh && \ + gpconfig -c shared_preload_libraries -v 'gp_stats_collector' && \ + gpstop -ra && \ + echo 'CREATE EXTENSION IF NOT EXISTS gp_stats_collector; \ + SHOW shared_preload_libraries; \ + TABLE pg_extension;' | \ + psql postgres" + then + echo "Error creating gp_stats_collector extension" + exit 1 + fi + ;; + *) + echo "Unknown extension: ${{ matrix.extension }}" + exit 1 + ;; + esac + fi + + # Set PostgreSQL options if defined + PG_OPTS="" + if [[ "${{ matrix.pg_settings.optimizer != '' }}" == "true" ]]; then + PG_OPTS="$PG_OPTS -c optimizer=${{ matrix.pg_settings.optimizer }}" + fi + + if [[ "${{ matrix.pg_settings.default_table_access_method != '' }}" == "true" ]]; then + PG_OPTS="$PG_OPTS -c default_table_access_method=${{ matrix.pg_settings.default_table_access_method }}" + fi + + # Read configs into array + IFS=' ' read -r -a configs <<< "${{ join(matrix.make_configs, ' ') }}" + + echo "=== Starting test execution for ${{ matrix.test }} ===" + echo "Number of configurations to execute: ${#configs[@]}" + echo "" + + # Execute each config separately + for ((i=0; i<${#configs[@]}; i++)); do + config="${configs[$i]}" + IFS=':' read -r dir target <<< "$config" + + echo "=== Executing configuration $((i+1))/${#configs[@]} ===" + echo "Make command: make -C $dir $target" + echo "Environment:" + echo "- PGOPTIONS: ${PG_OPTS}" + + # Create unique log file for this configuration + config_log="build-logs/details/make-${{ matrix.test }}-config$i.log" + + # Clean up any existing core files + echo "Cleaning up existing core files..." + rm -f /tmp/cloudberry-cores/core-* + + # Execute test script with proper environment setup + if ! time su - gpadmin -c "cd ${SRC_DIR} && \ + MAKE_NAME='${{ matrix.test }}-config$i' \ + MAKE_TARGET='$target' \ + MAKE_DIRECTORY='-C $dir' \ + PGOPTIONS='${PG_OPTS}' \ + SRC_DIR='${SRC_DIR}' \ + ${SRC_DIR}/devops/build/automation/cloudberry/scripts/test-cloudberry.sh" \ + 2>&1 | tee "$config_log"; then + echo "::warning::Test execution failed for configuration $((i+1)): make -C $dir $target" + overall_status=1 + fi + + # Check for results directory + results_dir="${dir}/results" + + if [[ -d "$results_dir" ]]; then + echo "-----------------------------------------" | tee -a build-logs/details/make-${{ matrix.test }}-config$i-results.log + echo "Found results directory: $results_dir" | tee -a build-logs/details/make-${{ matrix.test }}-config$i-results.log + echo "Contents of results directory:" | tee -a build-logs/details/make-${{ matrix.test }}-config$i-results.log + + find "$results_dir" -type f -ls >> "$log_file" 2>&1 | tee -a build-logs/details/make-${{ matrix.test }}-config$i-results.log + echo "-----------------------------------------" | tee -a build-logs/details/make-${{ matrix.test }}-config$i-results.log + else + echo "-----------------------------------------" + echo "Results directory $results_dir does not exit" + echo "-----------------------------------------" + fi + + # Analyze any core files generated by this test configuration + echo "Analyzing core files for configuration ${{ matrix.test }}-config$i..." + test_id="${{ matrix.test }}-config$i" + + # List the cores directory + echo "-----------------------------------------" + echo "Cores directory: /tmp/cloudberry-cores" + echo "Contents of cores directory:" + ls -Rl "/tmp/cloudberry-cores" + echo "-----------------------------------------" + + "${SRC_DIR}"/devops/build/automation/cloudberry/scripts/analyze_core_dumps.sh "$test_id" + core_analysis_rc=$? + case "$core_analysis_rc" in + 0) echo "No core dumps found for this configuration" ;; + 1) echo "Core dumps were found and analyzed successfully" ;; + 2) echo "::warning::Issues encountered during core dump analysis" ;; + *) echo "::error::Unexpected return code from core dump analysis: $core_analysis_rc" ;; + esac + + echo "Log file: $config_log" + echo "=== End configuration $((i+1)) execution ===" + echo "" + done + + echo "=== Test execution completed ===" + echo "Log files:" + ls -l build-logs/details/ + + # Store number of configurations for parsing step + echo "NUM_CONFIGS=${#configs[@]}" >> "$GITHUB_ENV" + + # Report overall status + if [ $overall_status -eq 0 ]; then + echo "All test executions completed successfully" + else + echo "::warning::Some test executions failed, check individual logs for details" + fi + + exit $overall_status + + - name: "Parse Test Results: ${{ matrix.test }}" + id: test-results + if: always() && needs.check-skip.outputs.should_skip != 'true' + env: + SRC_DIR: ${{ github.workspace }} + shell: bash {0} + run: | + set -o pipefail + + overall_status=0 + + # Get configs array to create context for results + IFS=' ' read -r -a configs <<< "${{ join(matrix.make_configs, ' ') }}" + + echo "=== Starting results parsing for ${{ matrix.test }} ===" + echo "Number of configurations to parse: ${#configs[@]}" + echo "" + + # Parse each configuration's results independently + for ((i=0; i "test_results.$i.txt" + overall_status=1 + continue + fi + + # Parse this configuration's results + + MAKE_NAME="${{ matrix.test }}-config$i" \ + "${SRC_DIR}"/devops/build/automation/cloudberry/scripts/parse-test-results.sh "$config_log" + status_code=$? + + { + echo "SUITE_NAME=${{ matrix.test }}" + echo "DIR=${dir}" + echo "TARGET=${target}" + } >> test_results.txt + + # Process return code + case $status_code in + 0) # All tests passed + echo "All tests passed successfully" + if [ -f test_results.txt ]; then + (echo "MAKE_COMMAND=\"make -C $dir $target\""; cat test_results.txt) | tee "test_results.${{ matrix.test }}.$i.txt" + rm test_results.txt + fi + ;; + 1) # Tests failed but parsed successfully + echo "Test failures detected but properly parsed" + if [ -f test_results.txt ]; then + (echo "MAKE_COMMAND=\"make -C $dir $target\""; cat test_results.txt) | tee "test_results.${{ matrix.test }}.$i.txt" + rm test_results.txt + fi + overall_status=1 + ;; + 2) # Parse error or missing file + echo "::warning::Could not parse test results properly for configuration $((i+1))" + { + echo "MAKE_COMMAND=\"make -C $dir $target\"" + echo "STATUS=parse_error" + echo "TOTAL_TESTS=0" + echo "FAILED_TESTS=0" + echo "PASSED_TESTS=0" + echo "IGNORED_TESTS=0" + } | tee "test_results.${{ matrix.test }}.$i.txt" + overall_status=1 + ;; + *) # Unexpected error + echo "::warning::Unexpected error during test results parsing for configuration $((i+1))" + { + echo "MAKE_COMMAND=\"make -C $dir $target\"" + echo "STATUS=unknown_error" + echo "TOTAL_TESTS=0" + echo "FAILED_TESTS=0" + echo "PASSED_TESTS=0" + echo "IGNORED_TESTS=0" + } | tee "test_results.${{ matrix.test }}.$i.txt" + overall_status=1 + ;; + esac + + echo "Results stored in test_results.$i.txt" + echo "=== End parsing for configuration $((i+1)) ===" + echo "" + done + + # Report status of results files + echo "=== Results file status ===" + echo "Generated results files:" + for ((i=0; i> "$GITHUB_STEP_SUMMARY" || true + + - name: Upload test logs + if: always() + uses: actions/upload-artifact@v4 + with: + name: test-logs-${{ matrix.test }}-${{ needs.build-deb.outputs.build_timestamp }} + path: | + build-logs/ + retention-days: ${{ env.LOG_RETENTION_DAYS }} + + - name: Upload Test Metadata + if: always() + uses: actions/upload-artifact@v4 + with: + name: test-metadata-${{ matrix.test }} + path: | + test_results*.txt + retention-days: ${{ env.LOG_RETENTION_DAYS }} + + - name: Upload test results files + uses: actions/upload-artifact@v4 + with: + name: results-${{ matrix.test }}-${{ needs.build-deb.outputs.build_timestamp }} + path: | + **/regression.out + **/regression.diffs + **/results/ + retention-days: ${{ env.LOG_RETENTION_DAYS }} + + - name: Upload test regression logs + if: failure() || cancelled() + uses: actions/upload-artifact@v4 + with: + name: regression-logs-${{ matrix.test }}-${{ needs.build-deb.outputs.build_timestamp }} + path: | + **/regression.out + **/regression.diffs + **/results/ + gpAux/gpdemo/datadirs/standby/log/ + gpAux/gpdemo/datadirs/qddir/demoDataDir-1/log/ + gpAux/gpdemo/datadirs/dbfast1/demoDataDir0/log/ + gpAux/gpdemo/datadirs/dbfast2/demoDataDir1/log/ + gpAux/gpdemo/datadirs/dbfast3/demoDataDir2/log/ + gpAux/gpdemo/datadirs/dbfast_mirror1/demoDataDir0/log/ + gpAux/gpdemo/datadirs/dbfast_mirror2/demoDataDir1/log/ + gpAux/gpdemo/datadirs/dbfast_mirror3/demoDataDir2/log/ + retention-days: ${{ env.LOG_RETENTION_DAYS }} + + ## ====================================================================== + ## Job: report-deb + ## ====================================================================== + + report-deb: + name: Generate Apache Cloudberry Build Report (Ubuntu 24.04) + needs: [check-skip, build-deb, prepare-test-matrix-deb, deb-install-test, test-deb] + if: always() + runs-on: ubuntu-22.04 + steps: + - name: Generate Final Report + run: | + { + echo "# Apache Cloudberry Build Pipeline Report" + + if [[ "${{ needs.check-skip.outputs.should_skip }}" == "true" ]]; then + echo "## CI Skip Status" + echo "✅ CI checks skipped via skip flag" + echo "- Completion Time: $(date -u +'%Y-%m-%d %H:%M:%S UTC')" + else + echo "## Job Status" + echo "- Build Job: ${{ needs.build-deb.result }}" + echo "- Test Job: ${{ needs.test-deb.result }}" + echo "- Completion Time: $(date -u +'%Y-%m-%d %H:%M:%S UTC')" + + if [[ "${{ needs.build-deb.result }}" == "success" && "${{ needs.test-deb.result }}" == "success" ]]; then + echo "✅ Pipeline completed successfully" + else + echo "⚠️ Pipeline completed with failures" + + if [[ "${{ needs.build-deb.result }}" != "success" ]]; then + echo "### Build Job Failure" + echo "Check build logs for details" + fi + + if [[ "${{ needs.test-deb.result }}" != "success" ]]; then + echo "### Test Job Failure" + echo "Check test logs and regression files for details" + fi + fi + fi + } >> "$GITHUB_STEP_SUMMARY" + + - name: Notify on failure + if: | + needs.check-skip.outputs.should_skip != 'true' && + (needs.build-deb.result != 'success' || needs.test-deb.result != 'success') + run: | + echo "::error::Build/Test pipeline failed! Check job summaries and logs for details" + echo "Timestamp: $(date -u +'%Y-%m-%d %H:%M:%S UTC')" + echo "Build Result: ${{ needs.build-deb.result }}" + echo "Test Result: ${{ needs.test-deb.result }}" diff --git a/.github/workflows/build-deb-cloudberry.yml b/.github/workflows/build-deb-cloudberry.yml index 85d917b8ff0..f8eadee3c8f 100644 --- a/.github/workflows/build-deb-cloudberry.yml +++ b/.github/workflows/build-deb-cloudberry.yml @@ -252,6 +252,10 @@ jobs: "gpcontrib/gp_sparse_vector:installcheck", "gpcontrib/gp_toolkit:installcheck"] }, + {"test":"gpcontrib-gp-stats-collector", + "make_configs":["gpcontrib/gp_stats_collector:installcheck"], + "extension":"gp_stats_collector" + }, {"test":"ic-cbdb-parallel", "make_configs":["src/test/regress:installcheck-cbdb-parallel"] } @@ -1341,6 +1345,7 @@ jobs: if: success() && needs.check-skip.outputs.should_skip != 'true' env: SRC_DIR: ${{ github.workspace }} + BUILD_DESTINATION: ${{ github.workspace }}/debian/build shell: bash {0} run: | set -o pipefail @@ -1365,6 +1370,30 @@ jobs: # 3. Update matrix entries to include the new setting + # Create extension if required + if [[ "${{ matrix.extension != '' }}" == "true" ]]; then + case "${{ matrix.extension }}" in + gp_stats_collector) + if ! su - gpadmin -c "source ${BUILD_DESTINATION}/cloudberry-env.sh && \ + source ${SRC_DIR}/gpAux/gpdemo/gpdemo-env.sh && \ + gpconfig -c shared_preload_libraries -v 'gp_stats_collector' && \ + gpstop -ra && \ + echo 'CREATE EXTENSION IF NOT EXISTS gp_stats_collector; \ + SHOW shared_preload_libraries; \ + TABLE pg_extension;' | \ + psql postgres" + then + echo "Error creating gp_stats_collector extension" + exit 1 + fi + ;; + *) + echo "Unknown extension: ${{ matrix.extension }}" + exit 1 + ;; + esac + fi + # Set PostgreSQL options if defined PG_OPTS="" if [[ "${{ matrix.pg_settings.optimizer != '' }}" == "true" ]]; then @@ -1589,6 +1618,7 @@ jobs: - name: Check and Display Regression Diffs if: always() + shell: bash {0} run: | # Search for regression.diffs recursively found_file=$(find . -type f -name "regression.diffs" | head -n 1) diff --git a/.github/workflows/coverity.yml b/.github/workflows/coverity.yml index 4f7f74d54b2..2b6a81c91f4 100644 --- a/.github/workflows/coverity.yml +++ b/.github/workflows/coverity.yml @@ -90,7 +90,7 @@ jobs: /usr/local/cloudberry-db/lib sudo chown -R gpadmin:gpadmin /usr/local/cloudberry-db su - gpadmin -c "cd $WORKSPACE" - export LD_LIBRARY_PATH=/usr/local/cloudberry-db/lib:LD_LIBRARY_PATH + export LD_LIBRARY_PATH=/usr/local/cloudberry-db/lib:${LD_LIBRARY_PATH:-""} export PATH=$WORKSPACE/coverity_tool/bin:$PATH ./configure --prefix=/usr/local/cloudberry-db \ --disable-external-fts \ diff --git a/.github/workflows/docker-cbdb-build-containers.yml b/.github/workflows/docker-cbdb-build-containers.yml index dd9ea9acd27..538b4e9b179 100644 --- a/.github/workflows/docker-cbdb-build-containers.yml +++ b/.github/workflows/docker-cbdb-build-containers.yml @@ -60,6 +60,7 @@ on: paths: - 'devops/deploy/docker/build/rocky8/**' - 'devops/deploy/docker/build/rocky9/**' + - 'devops/deploy/docker/build/rocky10/**' - 'devops/deploy/docker/build/ubuntu22.04/**' - 'devops/deploy/docker/build/ubuntu24.04/**' pull_request: @@ -81,7 +82,7 @@ jobs: # Matrix strategy to build for both Rocky Linux 8 and 9, Ubuntu 22.04 and 24.04 strategy: matrix: - platform: ['rocky8', 'rocky9', 'ubuntu22.04', 'ubuntu24.04'] + platform: ['rocky8', 'rocky9', 'rocky10', 'ubuntu22.04', 'ubuntu24.04'] steps: # Checkout repository code with full history @@ -108,6 +109,8 @@ jobs: - 'devops/deploy/docker/build/rocky8/**' rocky9: - 'devops/deploy/docker/build/rocky9/**' + rocky10: + - 'devops/deploy/docker/build/rocky10/**' ubuntu22.04: - 'devops/deploy/docker/build/ubuntu22.04/**' ubuntu24.04: @@ -117,13 +120,13 @@ jobs: # This allows building ARM64 images on AMD64 infrastructure and vice versa - name: Set up QEMU if: ${{ steps.platform-filter.outputs[matrix.platform] == 'true' }} - uses: docker/setup-qemu-action@v3 + uses: docker/setup-qemu-action@ce360397dd3f832beb865e1373c09c0e9f86d70a # v4.0.0 # Login to DockerHub for pushing images # Requires DOCKERHUB_USER and DOCKERHUB_TOKEN secrets to be set - name: Login to Docker Hub if: ${{ steps.platform-filter.outputs[matrix.platform] == 'true' && github.event_name == 'push' && github.ref == 'refs/heads/main' }} - uses: docker/login-action@v3 + uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # v3.7.0 with: username: ${{ secrets.DOCKERHUB_USER }} password: ${{ secrets.DOCKERHUB_TOKEN }} @@ -132,7 +135,7 @@ jobs: # Enable debug mode for better troubleshooting - name: Set up Docker Buildx if: ${{ steps.platform-filter.outputs[matrix.platform] == 'true' }} - uses: docker/setup-buildx-action@v3 + uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # v3.12.0 with: buildkitd-flags: --debug @@ -172,7 +175,7 @@ jobs: # This creates a manifest list that supports both architectures - name: Build and Push Multi-arch Docker images if: ${{ steps.platform-filter.outputs[matrix.platform] == 'true' && github.event_name == 'push' && github.ref == 'refs/heads/main' }} - uses: docker/build-push-action@v6 + uses: docker/build-push-action@10e90e3645eae34f1e60eeb005ba3a3d33f178e8 # v6.19.2 with: context: ./devops/deploy/docker/build/${{ matrix.platform }} push: true diff --git a/.github/workflows/docker-cbdb-test-containers.yml b/.github/workflows/docker-cbdb-test-containers.yml index 1c8e1c8a9a2..4d0fb8def33 100644 --- a/.github/workflows/docker-cbdb-test-containers.yml +++ b/.github/workflows/docker-cbdb-test-containers.yml @@ -49,6 +49,7 @@ on: paths: - 'devops/deploy/docker/test/rocky8/**' - 'devops/deploy/docker/test/rocky9/**' + - 'devops/deploy/docker/test/rocky10/**' - 'devops/deploy/docker/test/ubuntu22.04/**' - 'devops/deploy/docker/test/ubuntu24.04/**' pull_request: @@ -68,7 +69,7 @@ jobs: strategy: matrix: # Build for Rocky Linux 8 and 9, Ubuntu 22.04 and 24.04 - platform: ['rocky8', 'rocky9', 'ubuntu22.04', 'ubuntu24.04'] + platform: ['rocky8', 'rocky9', 'rocky10', 'ubuntu22.04', 'ubuntu24.04'] steps: # Checkout repository code @@ -92,6 +93,8 @@ jobs: - 'devops/deploy/docker/test/rocky8/**' rocky9: - 'devops/deploy/docker/test/rocky9/**' + rocky10: + - 'devops/deploy/docker/test/rocky10/**' ubuntu22.04: - 'devops/deploy/docker/test/ubuntu22.04/**' ubuntu24.04: @@ -106,12 +109,12 @@ jobs: # This allows building ARM64 images on AMD64 infrastructure and vice versa - name: Set up QEMU if: ${{ steps.platform-filter.outputs[matrix.platform] == 'true' }} - uses: docker/setup-qemu-action@v3 + uses: docker/setup-qemu-action@ce360397dd3f832beb865e1373c09c0e9f86d70a # v4.0.0 # Login to DockerHub for pushing images - name: Login to Docker Hub if: ${{ steps.platform-filter.outputs[matrix.platform] == 'true' && github.event_name == 'push' && github.ref == 'refs/heads/main' }} - uses: docker/login-action@v3 + uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # v3.7.0 with: username: ${{ secrets.DOCKERHUB_USER }} password: ${{ secrets.DOCKERHUB_TOKEN }} @@ -119,7 +122,7 @@ jobs: # Setup Docker Buildx for efficient multi-architecture builds - name: Set up Docker Buildx if: ${{ steps.platform-filter.outputs[matrix.platform] == 'true' }} - uses: docker/setup-buildx-action@v3 + uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # v3.12.0 with: buildkitd-flags: --debug @@ -142,7 +145,7 @@ jobs: # Creates a manifest list that supports both architectures - name: Build and Push Multi-arch Docker images if: ${{ steps.platform-filter.outputs[matrix.platform] == 'true' && github.event_name == 'push' && github.ref == 'refs/heads/main' }} - uses: docker/build-push-action@v6 + uses: docker/build-push-action@10e90e3645eae34f1e60eeb005ba3a3d33f178e8 # v6.19.2 with: context: ./devops/deploy/docker/test/${{ matrix.platform }} push: true diff --git a/.github/workflows/sonarqube.yml b/.github/workflows/sonarqube.yml index e67c2d96a54..93379d184ea 100644 --- a/.github/workflows/sonarqube.yml +++ b/.github/workflows/sonarqube.yml @@ -94,7 +94,7 @@ jobs: /usr/local/xerces-c/lib/libxerces-c-3.3.so \ /usr/local/cloudberry-db/lib sudo chown -R gpadmin:gpadmin /usr/local/cloudberry-db - export LD_LIBRARY_PATH=/usr/local/cloudberry-db/lib:LD_LIBRARY_PATH + export LD_LIBRARY_PATH=/usr/local/cloudberry-db/lib:${LD_LIBRARY_PATH:-""} ./configure --prefix=/usr/local/cloudberry-db \ --disable-external-fts \ --enable-gpcloud \ diff --git a/LICENSE b/LICENSE index 28796e982e1..0ccd7072122 100644 --- a/LICENSE +++ b/LICENSE @@ -246,7 +246,7 @@ The PostgreSQL software includes: src/backend/utils/adt/inet_cidr_ntop.c src/backend/utils/adt/inet_net_pton.c - see licenses/licenses/LICENSE-isc.txt + see licenses/LICENSE-isc.txt ---------------------------- Perl Artistic License 2.0 (exception) diff --git a/configure b/configure index e91414fb52c..74d1415d637 100755 --- a/configure +++ b/configure @@ -721,6 +721,9 @@ GREP with_apr_config with_libcurl with_rt +PROTOC +with_gp_stats_collector +with_diskquota with_zstd with_libbz2 LZ4_LIBS @@ -943,6 +946,8 @@ with_zlib with_lz4 with_libbz2 with_zstd +with_diskquota +with_gp_stats_collector with_rt with_libcurl with_apr_config @@ -1703,11 +1708,14 @@ Optional Packages: --with-lz4 build with LZ4 support --without-libbz2 do not use bzip2 --without-zstd do not build with Zstandard + --with-diskquota build with diskquota extension + --with-gp_stats_collector + build with stats collector extension --without-rt do not use Realtime Library --without-libcurl do not use libcurl --with-apr-config=PATH path to apr-1-config utility --with-gnu-ld assume the C compiler uses GNU ld [default=no] - --without-mdblocales build without MDB locales + --with-mdblocales build with MDB locales --with-ssl=LIB use LIB for SSL/TLS support (openssl) --with-openssl obsolete spelling of --with-ssl=openssl @@ -2924,6 +2932,7 @@ PG_PACKAGE_VERSION=14.7 + ac_aux_dir= for ac_dir in config "$srcdir"/config; do if test -f "$ac_dir/install-sh"; then @@ -11157,6 +11166,184 @@ fi $as_echo "$with_zstd" >&6; } +# +# diskquota +# + + + +# Check whether --with-diskquota was given. +if test "${with_diskquota+set}" = set; then : + withval=$with_diskquota; + case $withval in + yes) + : + ;; + no) + : + ;; + *) + as_fn_error $? "no argument expected for --with-diskquota option" "$LINENO" 5 + ;; + esac + +else + with_diskquota=no + +fi + + + + +# +# gp_stats_collector +# + + + +# Check whether --with-gp_stats_collector was given. +if test "${with_gp_stats_collector+set}" = set; then : + withval=$with_gp_stats_collector; + case $withval in + yes) + : + ;; + no) + : + ;; + *) + as_fn_error $? "no argument expected for --with-gp_stats_collector option" "$LINENO" 5 + ;; + esac + +else + with_gp_stats_collector=no + +fi + + + + +if test "$with_gp_stats_collector" = yes; then + +pkg_failed=no +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for protobuf >= 3.0.0" >&5 +$as_echo_n "checking for protobuf >= 3.0.0... " >&6; } + +if test -n "$PROTOBUF_CFLAGS"; then + pkg_cv_PROTOBUF_CFLAGS="$PROTOBUF_CFLAGS" + elif test -n "$PKG_CONFIG"; then + if test -n "$PKG_CONFIG" && \ + { { $as_echo "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"protobuf >= 3.0.0\""; } >&5 + ($PKG_CONFIG --exists --print-errors "protobuf >= 3.0.0") 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; then + pkg_cv_PROTOBUF_CFLAGS=`$PKG_CONFIG --cflags "protobuf >= 3.0.0" 2>/dev/null` + test "x$?" != "x0" && pkg_failed=yes +else + pkg_failed=yes +fi + else + pkg_failed=untried +fi +if test -n "$PROTOBUF_LIBS"; then + pkg_cv_PROTOBUF_LIBS="$PROTOBUF_LIBS" + elif test -n "$PKG_CONFIG"; then + if test -n "$PKG_CONFIG" && \ + { { $as_echo "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"protobuf >= 3.0.0\""; } >&5 + ($PKG_CONFIG --exists --print-errors "protobuf >= 3.0.0") 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; then + pkg_cv_PROTOBUF_LIBS=`$PKG_CONFIG --libs "protobuf >= 3.0.0" 2>/dev/null` + test "x$?" != "x0" && pkg_failed=yes +else + pkg_failed=yes +fi + else + pkg_failed=untried +fi + + + +if test $pkg_failed = yes; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } + +if $PKG_CONFIG --atleast-pkgconfig-version 0.20; then + _pkg_short_errors_supported=yes +else + _pkg_short_errors_supported=no +fi + if test $_pkg_short_errors_supported = yes; then + PROTOBUF_PKG_ERRORS=`$PKG_CONFIG --short-errors --print-errors --cflags --libs "protobuf >= 3.0.0" 2>&1` + else + PROTOBUF_PKG_ERRORS=`$PKG_CONFIG --print-errors --cflags --libs "protobuf >= 3.0.0" 2>&1` + fi + # Put the nasty error message in config.log where it belongs + echo "$PROTOBUF_PKG_ERRORS" >&5 + + as_fn_error $? "protobuf >= 3.0.0 is required for gp_stats_collector" "$LINENO" 5 + +elif test $pkg_failed = untried; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } + as_fn_error $? "protobuf >= 3.0.0 is required for gp_stats_collector" "$LINENO" 5 + +else + PROTOBUF_CFLAGS=$pkg_cv_PROTOBUF_CFLAGS + PROTOBUF_LIBS=$pkg_cv_PROTOBUF_LIBS + { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +$as_echo "yes" >&6; } + +fi + # Extract the first word of "protoc", so it can be a program name with args. +set dummy protoc; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_path_PROTOC+:} false; then : + $as_echo_n "(cached) " >&6 +else + case $PROTOC in + [\\/]* | ?:[\\/]*) + ac_cv_path_PROTOC="$PROTOC" # Let the user override the test with a path. + ;; + *) + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_path_PROTOC="$as_dir/$ac_word$ac_exec_ext" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + + test -z "$ac_cv_path_PROTOC" && ac_cv_path_PROTOC="no" + ;; +esac +fi +PROTOC=$ac_cv_path_PROTOC +if test -n "$PROTOC"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $PROTOC" >&5 +$as_echo "$PROTOC" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + + if test "$PROTOC" = no; then + as_fn_error $? "protoc is required for gp_stats_collector but was not found in PATH" "$LINENO" 5 + fi +fi + if test "$with_zstd" = yes; then pkg_failed=no @@ -12979,56 +13166,6 @@ $as_echo "${python_libspec} ${python_additional_libs}" >&6; } -fi - -if test "$with_mdblocales" = yes; then - { $as_echo "$as_me:${as_lineno-$LINENO}: checking for mdb_setlocale in -lmdblocales" >&5 -$as_echo_n "checking for mdb_setlocale in -lmdblocales... " >&6; } -if ${ac_cv_lib_mdblocales_mdb_setlocale+:} false; then : - $as_echo_n "(cached) " >&6 -else - ac_check_lib_save_LIBS=$LIBS -LIBS="-lmdblocales $LIBS" -cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ - -/* Override any GCC internal prototype to avoid an error. - Use char because int might match the return type of a GCC - builtin and then its argument prototype would still apply. */ -#ifdef __cplusplus -extern "C" -#endif -char mdb_setlocale (); -int -main () -{ -return mdb_setlocale (); - ; - return 0; -} -_ACEOF -if ac_fn_c_try_link "$LINENO"; then : - ac_cv_lib_mdblocales_mdb_setlocale=yes -else - ac_cv_lib_mdblocales_mdb_setlocale=no -fi -rm -f core conftest.err conftest.$ac_objext \ - conftest$ac_exeext conftest.$ac_ext -LIBS=$ac_check_lib_save_LIBS -fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_mdblocales_mdb_setlocale" >&5 -$as_echo "$ac_cv_lib_mdblocales_mdb_setlocale" >&6; } -if test "x$ac_cv_lib_mdblocales_mdb_setlocale" = xyes; then : - cat >>confdefs.h <<_ACEOF -#define HAVE_LIBMDBLOCALES 1 -_ACEOF - - LIBS="-lmdblocales $LIBS" - -else - as_fn_error $? "mdblocales library not found" "$LINENO" 5 -fi - fi if test x"$cross_compiling" = x"yes" && test -z "$with_system_tzdata"; then @@ -14953,6 +15090,56 @@ fi fi +if test "$with_mdblocales" = yes; then + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for mdb_setlocale in -lmdblocales" >&5 +$as_echo_n "checking for mdb_setlocale in -lmdblocales... " >&6; } +if ${ac_cv_lib_mdblocales_mdb_setlocale+:} false; then : + $as_echo_n "(cached) " >&6 +else + ac_check_lib_save_LIBS=$LIBS +LIBS="-lmdblocales $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +#ifdef __cplusplus +extern "C" +#endif +char mdb_setlocale (); +int +main () +{ +return mdb_setlocale (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + ac_cv_lib_mdblocales_mdb_setlocale=yes +else + ac_cv_lib_mdblocales_mdb_setlocale=no +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_mdblocales_mdb_setlocale" >&5 +$as_echo "$ac_cv_lib_mdblocales_mdb_setlocale" >&6; } +if test "x$ac_cv_lib_mdblocales_mdb_setlocale" = xyes; then : + cat >>confdefs.h <<_ACEOF +#define HAVE_LIBMDBLOCALES 1 +_ACEOF + + LIBS="-lmdblocales $LIBS" + +else + as_fn_error $? "mdblocales library not found" "$LINENO" 5 +fi + +fi + if test "$enable_external_fts" = yes; then { $as_echo "$as_me:${as_lineno-$LINENO}: checking for jansson_version_str in -ljansson" >&5 $as_echo_n "checking for jansson_version_str in -ljansson... " >&6; } diff --git a/configure.ac b/configure.ac index 9a07159cecf..f0584d65076 100644 --- a/configure.ac +++ b/configure.ac @@ -1368,6 +1368,31 @@ PGAC_ARG_BOOL(with, zstd, yes, [do not build with Zstandard], AC_MSG_RESULT([$with_zstd]) AC_SUBST(with_zstd) +# +# diskquota +# +PGAC_ARG_BOOL(with, diskquota, no, + [build with diskquota extension]) +AC_SUBST(with_diskquota) + +# +# gp_stats_collector +# +PGAC_ARG_BOOL(with, gp_stats_collector, no, + [build with stats collector extension]) +AC_SUBST(with_gp_stats_collector) + +if test "$with_gp_stats_collector" = yes; then + PKG_CHECK_MODULES([PROTOBUF], [protobuf >= 3.0.0], + [], + [AC_MSG_ERROR([protobuf >= 3.0.0 is required for gp_stats_collector])] + ) + AC_PATH_PROG([PROTOC], [protoc], [no]) + if test "$PROTOC" = no; then + AC_MSG_ERROR([protoc is required for gp_stats_collector but was not found in PATH]) + fi +fi + if test "$with_zstd" = yes; then dnl zstd_errors.h was renamed from error_public.h in v1.4.0 PKG_CHECK_MODULES([ZSTD], [libzstd >= 1.4.0]) @@ -1469,7 +1494,7 @@ AC_SUBST(install_bin) # MDB locales # -PGAC_ARG_BOOL(with, mdblocales, yes, [build without MDB locales], +PGAC_ARG_BOOL(with, mdblocales, no, [build with MDB locales], [AC_DEFINE([USE_MDBLOCALES], 1, [Define to 1 to build with MDB locales. (--with-mdblocales)])]) AC_SUBST(USE_MDBLOCALES) @@ -3194,4 +3219,11 @@ AC_OUTPUT # The configure args contain '-Wl,-rpath,\$$ORIGIN`, when it falls # as a C literal string, it's invalid, so converting `\` to `\\` # to be correct for C program. -sed -i '/define CONFIGURE_ARGS/s,\([[^\\]]\)\\\$\$,\1\\\\$$,g' src/include/pg_config.h +case $build_os in +darwin*) + sed -i '' '/define CONFIGURE_ARGS/s,\([[^\\]]\)\\\$\$,\1\\\\$$,g' src/include/pg_config.h + ;; +*) + sed -i '/define CONFIGURE_ARGS/s,\([[^\\]]\)\\\$\$,\1\\\\$$,g' src/include/pg_config.h + ;; +esac diff --git a/contrib/btree_gist/expected/cash_optimizer.out b/contrib/btree_gist/expected/cash_optimizer.out index 171dec7e511..f2c9ac07420 100644 --- a/contrib/btree_gist/expected/cash_optimizer.out +++ b/contrib/btree_gist/expected/cash_optimizer.out @@ -77,12 +77,11 @@ SELECT a, a <-> '21472.79' FROM moneytmp ORDER BY a <-> '21472.79' LIMIT 3; QUERY PLAN ------------------------------------------------------------ Limit - -> Sort - Sort Key: ((a <-> '$21,472.79'::money)) - -> Result - -> Gather Motion 3:1 (slice1; segments: 3) - -> Seq Scan on moneytmp - Optimizer: GPORCA + -> Gather Motion 3:1 (slice1; segments: 3) + Merge Key: ((a <-> '$21,472.79'::money)) + -> Limit + -> Index Only Scan using moneyidx on moneytmp + Order By: (a <-> '$21,472.79'::money) (7 rows) SELECT a, a <-> '21472.79' FROM moneytmp ORDER BY a <-> '21472.79' LIMIT 3; diff --git a/contrib/btree_gist/expected/date_optimizer.out b/contrib/btree_gist/expected/date_optimizer.out index a77041f847f..12269cf169b 100644 --- a/contrib/btree_gist/expected/date_optimizer.out +++ b/contrib/btree_gist/expected/date_optimizer.out @@ -79,10 +79,9 @@ SELECT a, a <-> '2001-02-13' FROM datetmp ORDER BY a <-> '2001-02-13' LIMIT 3; Limit -> Gather Motion 3:1 (slice1; segments: 3) Merge Key: ((a <-> '02-13-2001'::date)) - -> Sort - Sort Key: ((a <-> '02-13-2001'::date)) - -> Seq Scan on datetmp - Optimizer: GPORCA + -> Limit + -> Index Only Scan using dateidx on datetmp + Order By: (a <-> '02-13-2001'::date) (7 rows) SELECT a, a <-> '2001-02-13' FROM datetmp ORDER BY a <-> '2001-02-13' LIMIT 3; diff --git a/contrib/btree_gist/expected/float4_optimizer.out b/contrib/btree_gist/expected/float4_optimizer.out index cc40e9bd1ae..7b71a2f5112 100644 --- a/contrib/btree_gist/expected/float4_optimizer.out +++ b/contrib/btree_gist/expected/float4_optimizer.out @@ -79,10 +79,9 @@ SELECT a, a <-> '-179.0' FROM float4tmp ORDER BY a <-> '-179.0' LIMIT 3; Limit -> Gather Motion 3:1 (slice1; segments: 3) Merge Key: ((a <-> '-179'::real)) - -> Sort - Sort Key: ((a <-> '-179'::real)) - -> Seq Scan on float4tmp - Optimizer: GPORCA + -> Limit + -> Index Only Scan using float4idx on float4tmp + Order By: (a <-> '-179'::real) (7 rows) SELECT a, a <-> '-179.0' FROM float4tmp ORDER BY a <-> '-179.0' LIMIT 3; diff --git a/contrib/btree_gist/expected/float8_optimizer.out b/contrib/btree_gist/expected/float8_optimizer.out index 1bd96c44d3b..18e5c195286 100644 --- a/contrib/btree_gist/expected/float8_optimizer.out +++ b/contrib/btree_gist/expected/float8_optimizer.out @@ -79,10 +79,9 @@ SELECT a, a <-> '-1890.0' FROM float8tmp ORDER BY a <-> '-1890.0' LIMIT 3; Limit -> Gather Motion 3:1 (slice1; segments: 3) Merge Key: ((a <-> '-1890'::double precision)) - -> Sort - Sort Key: ((a <-> '-1890'::double precision)) - -> Seq Scan on float8tmp - Optimizer: GPORCA + -> Limit + -> Index Only Scan using float8idx on float8tmp + Order By: (a <-> '-1890'::double precision) (7 rows) SELECT a, a <-> '-1890.0' FROM float8tmp ORDER BY a <-> '-1890.0' LIMIT 3; diff --git a/contrib/btree_gist/expected/int2_optimizer.out b/contrib/btree_gist/expected/int2_optimizer.out index fdfc859097b..f8f6a428b93 100644 --- a/contrib/btree_gist/expected/int2_optimizer.out +++ b/contrib/btree_gist/expected/int2_optimizer.out @@ -79,10 +79,9 @@ SELECT a, a <-> '237' FROM int2tmp ORDER BY a <-> '237' LIMIT 3; Limit -> Gather Motion 3:1 (slice1; segments: 3) Merge Key: ((a <-> '237'::smallint)) - -> Sort - Sort Key: ((a <-> '237'::smallint)) - -> Seq Scan on int2tmp - Optimizer: GPORCA + -> Limit + -> Index Only Scan using int2idx on int2tmp + Order By: (a <-> '237'::smallint) (7 rows) SELECT a, a <-> '237' FROM int2tmp ORDER BY a <-> '237' LIMIT 3; diff --git a/contrib/btree_gist/expected/int4_optimizer.out b/contrib/btree_gist/expected/int4_optimizer.out index 67107e63bfa..6877fb09af5 100644 --- a/contrib/btree_gist/expected/int4_optimizer.out +++ b/contrib/btree_gist/expected/int4_optimizer.out @@ -79,10 +79,9 @@ SELECT a, a <-> '237' FROM int4tmp ORDER BY a <-> '237' LIMIT 3; Limit -> Gather Motion 3:1 (slice1; segments: 3) Merge Key: ((a <-> 237)) - -> Sort - Sort Key: ((a <-> 237)) - -> Seq Scan on int4tmp - Optimizer: GPORCA + -> Limit + -> Index Only Scan using int4idx on int4tmp + Order By: (a <-> 237) (7 rows) SELECT a, a <-> '237' FROM int4tmp ORDER BY a <-> '237' LIMIT 3; diff --git a/contrib/btree_gist/expected/int8_optimizer.out b/contrib/btree_gist/expected/int8_optimizer.out index ba8e21135e8..962dd314661 100644 --- a/contrib/btree_gist/expected/int8_optimizer.out +++ b/contrib/btree_gist/expected/int8_optimizer.out @@ -79,10 +79,9 @@ SELECT a, a <-> '464571291354841' FROM int8tmp ORDER BY a <-> '464571291354841' Limit -> Gather Motion 3:1 (slice1; segments: 3) Merge Key: ((a <-> '464571291354841'::bigint)) - -> Sort - Sort Key: ((a <-> '464571291354841'::bigint)) - -> Seq Scan on int8tmp - Optimizer: GPORCA + -> Limit + -> Index Only Scan using int8idx on int8tmp + Order By: (a <-> '464571291354841'::bigint) (7 rows) SELECT a, a <-> '464571291354841' FROM int8tmp ORDER BY a <-> '464571291354841' LIMIT 3; diff --git a/contrib/btree_gist/expected/interval_optimizer.out b/contrib/btree_gist/expected/interval_optimizer.out index f5afd17456b..f0a4e850aeb 100644 --- a/contrib/btree_gist/expected/interval_optimizer.out +++ b/contrib/btree_gist/expected/interval_optimizer.out @@ -74,15 +74,15 @@ SELECT count(*) FROM intervaltmp WHERE a > '199 days 21:21:23'::interval; EXPLAIN (COSTS OFF) SELECT a, a <-> '199 days 21:21:23' FROM intervaltmp ORDER BY a <-> '199 days 21:21:23' LIMIT 3; - QUERY PLAN ------------------------------------------------------------------------------------ + QUERY PLAN +--------------------------------------------------------------------------------------- Limit -> Gather Motion 3:1 (slice1; segments: 3) Merge Key: ((a <-> '@ 199 days 21 hours 21 mins 23 secs'::interval)) - -> Sort - Sort Key: ((a <-> '@ 199 days 21 hours 21 mins 23 secs'::interval)) - -> Seq Scan on intervaltmp - Optimizer: GPORCA + -> Limit + -> Index Only Scan using intervalidx on intervaltmp + Order By: (a <-> '@ 199 days 21 hours 21 mins 23 secs'::interval) + Optimizer: Postgres query optimizer (7 rows) SELECT a, a <-> '199 days 21:21:23' FROM intervaltmp ORDER BY a <-> '199 days 21:21:23' LIMIT 3; @@ -96,15 +96,15 @@ SELECT a, a <-> '199 days 21:21:23' FROM intervaltmp ORDER BY a <-> '199 days 21 SET enable_indexonlyscan=off; EXPLAIN (COSTS OFF) SELECT a, a <-> '199 days 21:21:23' FROM intervaltmp ORDER BY a <-> '199 days 21:21:23' LIMIT 3; - QUERY PLAN ------------------------------------------------------------------------------------ + QUERY PLAN +--------------------------------------------------------------------------------------- Limit -> Gather Motion 3:1 (slice1; segments: 3) Merge Key: ((a <-> '@ 199 days 21 hours 21 mins 23 secs'::interval)) - -> Sort - Sort Key: ((a <-> '@ 199 days 21 hours 21 mins 23 secs'::interval)) - -> Seq Scan on intervaltmp - Optimizer: GPORCA + -> Limit + -> Index Scan using intervalidx on intervaltmp + Order By: (a <-> '@ 199 days 21 hours 21 mins 23 secs'::interval) + Optimizer: Postgres query optimizer (7 rows) SELECT a, a <-> '199 days 21:21:23' FROM intervaltmp ORDER BY a <-> '199 days 21:21:23' LIMIT 3; diff --git a/contrib/btree_gist/expected/time_optimizer.out b/contrib/btree_gist/expected/time_optimizer.out index 590ada880b9..40d49e79b02 100644 --- a/contrib/btree_gist/expected/time_optimizer.out +++ b/contrib/btree_gist/expected/time_optimizer.out @@ -79,10 +79,9 @@ SELECT a, a <-> '10:57:11' FROM timetmp ORDER BY a <-> '10:57:11' LIMIT 3; Limit -> Gather Motion 3:1 (slice1; segments: 3) Merge Key: ((a <-> '10:57:11'::time without time zone)) - -> Sort - Sort Key: ((a <-> '10:57:11'::time without time zone)) - -> Seq Scan on timetmp - Optimizer: GPORCA + -> Limit + -> Index Only Scan using timeidx on timetmp + Order By: (a <-> '10:57:11'::time without time zone) (7 rows) SELECT a, a <-> '10:57:11' FROM timetmp ORDER BY a <-> '10:57:11' LIMIT 3; diff --git a/contrib/btree_gist/expected/timestamp_optimizer.out b/contrib/btree_gist/expected/timestamp_optimizer.out index 1b8e709fe90..85c3a1a5e5d 100644 --- a/contrib/btree_gist/expected/timestamp_optimizer.out +++ b/contrib/btree_gist/expected/timestamp_optimizer.out @@ -79,10 +79,9 @@ SELECT a, a <-> '2004-10-26 08:55:08' FROM timestamptmp ORDER BY a <-> '2004-10- Limit -> Gather Motion 3:1 (slice1; segments: 3) Merge Key: ((a <-> 'Tue Oct 26 08:55:08 2004'::timestamp without time zone)) - -> Sort - Sort Key: ((a <-> 'Tue Oct 26 08:55:08 2004'::timestamp without time zone)) - -> Seq Scan on timestamptmp - Optimizer: GPORCA + -> Limit + -> Index Only Scan using timestampidx on timestamptmp + Order By: (a <-> 'Tue Oct 26 08:55:08 2004'::timestamp without time zone) (7 rows) SELECT a, a <-> '2004-10-26 08:55:08' FROM timestamptmp ORDER BY a <-> '2004-10-26 08:55:08' LIMIT 3; diff --git a/contrib/btree_gist/expected/timestamptz_optimizer.out b/contrib/btree_gist/expected/timestamptz_optimizer.out index 2173c5dca35..a9e043f98a6 100644 --- a/contrib/btree_gist/expected/timestamptz_optimizer.out +++ b/contrib/btree_gist/expected/timestamptz_optimizer.out @@ -199,10 +199,9 @@ SELECT a, a <-> '2018-12-18 10:59:54 GMT+2' FROM timestamptztmp ORDER BY a <-> ' Limit -> Gather Motion 3:1 (slice1; segments: 3) Merge Key: ((a <-> 'Tue Dec 18 04:59:54 2018 PST'::timestamp with time zone)) - -> Sort - Sort Key: ((a <-> 'Tue Dec 18 04:59:54 2018 PST'::timestamp with time zone)) - -> Seq Scan on timestamptztmp - Optimizer: GPORCA + -> Limit + -> Index Only Scan using timestamptzidx on timestamptztmp + Order By: (a <-> 'Tue Dec 18 04:59:54 2018 PST'::timestamp with time zone) (7 rows) SELECT a, a <-> '2018-12-18 10:59:54 GMT+2' FROM timestamptztmp ORDER BY a <-> '2018-12-18 10:59:54 GMT+2' LIMIT 3; diff --git a/contrib/pax_storage/expected/cbdb_parallel.out b/contrib/pax_storage/expected/cbdb_parallel.out index db583090026..ec6ceba7e3c 100644 --- a/contrib/pax_storage/expected/cbdb_parallel.out +++ b/contrib/pax_storage/expected/cbdb_parallel.out @@ -41,13 +41,29 @@ set gp_appendonly_insert_files = 4; begin; set local enable_parallel = on; create table test_131_ao1(x int, y int) using ao_row with(parallel_workers=2); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'x' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. create table test_131_ao2(x int, y int) using ao_row with(parallel_workers=2); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'x' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. create table test_131_ao3(x int, y int) using ao_row with(parallel_workers=0); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'x' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. create table test_131_ao4(x int, y int) using ao_row with(parallel_workers=0); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'x' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. create table test_131_aoco1(x int, y int) using ao_column with(parallel_workers=2); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'x' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. create table test_131_aoco2(x int, y int) using ao_column with(parallel_workers=2); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'x' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. create table test_131_aoco3(x int, y int) using ao_column with(parallel_workers=0); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'x' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. create table test_131_aoco4(x int, y int) using ao_column with(parallel_workers=0); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'x' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. select relname, reloptions from pg_catalog.pg_class where relname like 'test_131_ao%'; relname | reloptions ----------------+---------------------- @@ -155,8 +171,14 @@ explain(locus, costs off) select count(*) from test_131_aoco3, test_131_aoco4 wh abort; create table ao1(x int, y int); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'x' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. create table ao2(x int, y int); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'x' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. create table aocs1(x int, y int); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'x' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. begin; -- encourage use of parallel plans set local min_parallel_table_scan_size = 0; @@ -367,6 +389,8 @@ abort; begin; set local max_parallel_workers_per_gather = 2; create table t1(a int, b int) with(parallel_workers=2); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. create table rt1(a int, b int) with(parallel_workers=2) distributed replicated; create table rt2(a int, b int) distributed replicated; create table rt3(a int, b int) distributed replicated; @@ -599,6 +623,8 @@ select * from rt1 join t1 on rt1.a = t1.b join rt2 on rt2.a = t1.b; 5 | 6 | 4 | 5 | 5 | 6 8 | 9 | 7 | 8 | 8 | 9 9 | 10 | 8 | 9 | 9 | 10 + 1 | 2 | 1 | 1 | 1 | 2 + 2 | 3 | 1 | 2 | 2 | 3 5 | 6 | 5 | 5 | 5 | 6 6 | 7 | 6 | 6 | 6 | 7 9 | 10 | 9 | 9 | 9 | 10 @@ -606,8 +632,6 @@ select * from rt1 join t1 on rt1.a = t1.b join rt2 on rt2.a = t1.b; 6 | 7 | 5 | 6 | 6 | 7 7 | 8 | 6 | 7 | 7 | 8 10 | 11 | 9 | 10 | 10 | 11 - 1 | 2 | 1 | 1 | 1 | 2 - 2 | 3 | 1 | 2 | 2 | 3 (19 rows) -- parallel hash join @@ -650,13 +674,6 @@ explain(locus, costs off) select * from rt1 join t1 on rt1.a = t1.b join rt2 on select * from rt1 join t1 on rt1.a = t1.b join rt2 on rt2.a = t1.b; a | b | a | b | a | b ----+----+----+----+----+---- - 5 | 6 | 5 | 5 | 5 | 6 - 6 | 7 | 5 | 6 | 6 | 7 - 6 | 7 | 6 | 6 | 6 | 7 - 7 | 8 | 6 | 7 | 7 | 8 - 9 | 10 | 9 | 9 | 9 | 10 - 10 | 11 | 9 | 10 | 10 | 11 - 10 | 11 | 10 | 10 | 10 | 11 2 | 3 | 2 | 2 | 2 | 3 3 | 4 | 2 | 3 | 3 | 4 3 | 4 | 3 | 3 | 3 | 4 @@ -669,6 +686,13 @@ select * from rt1 join t1 on rt1.a = t1.b join rt2 on rt2.a = t1.b; 9 | 10 | 8 | 9 | 9 | 10 1 | 2 | 1 | 1 | 1 | 2 2 | 3 | 1 | 2 | 2 | 3 + 5 | 6 | 5 | 5 | 5 | 6 + 6 | 7 | 5 | 6 | 6 | 7 + 6 | 7 | 6 | 6 | 6 | 7 + 7 | 8 | 6 | 7 | 7 | 8 + 9 | 10 | 9 | 9 | 9 | 10 + 10 | 11 | 9 | 10 | 10 | 11 + 10 | 11 | 10 | 10 | 10 | 11 (19 rows) -- @@ -702,6 +726,8 @@ explain(locus, costs off) select * from rt1 join t1 on rt1.a = t1.b join rt3 on select * from rt1 join t1 on rt1.a = t1.b join rt3 on rt3.a = t1.b; a | b | a | b | a | b ----+----+----+----+----+---- + 1 | 2 | 1 | 1 | 1 | 2 + 2 | 3 | 1 | 2 | 2 | 3 2 | 3 | 2 | 2 | 2 | 3 3 | 4 | 3 | 3 | 3 | 4 4 | 5 | 4 | 4 | 4 | 5 @@ -712,8 +738,6 @@ select * from rt1 join t1 on rt1.a = t1.b join rt3 on rt3.a = t1.b; 5 | 6 | 4 | 5 | 5 | 6 8 | 9 | 7 | 8 | 8 | 9 9 | 10 | 8 | 9 | 9 | 10 - 1 | 2 | 1 | 1 | 1 | 2 - 2 | 3 | 1 | 2 | 2 | 3 5 | 6 | 5 | 5 | 5 | 6 6 | 7 | 6 | 6 | 6 | 7 9 | 10 | 9 | 9 | 9 | 10 @@ -779,6 +803,8 @@ select * from rt1 join t1 on rt1.a = t1.b join rt3 on rt3.a = t1.b; (19 rows) create table t2(a int, b int) with(parallel_workers=0); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. create table rt4(a int, b int) with(parallel_workers=2) distributed replicated; insert into t2 select i, i+1 from generate_series(1, 10) i; insert into rt4 select i, i+1 from generate_series(1, 10000) i; @@ -788,16 +814,16 @@ set local enable_parallel = off; select * from rt4 join t2 using(b); b | a | a ----+----+---- - 2 | 1 | 1 - 6 | 5 | 5 - 7 | 6 | 6 - 10 | 9 | 9 - 11 | 10 | 10 3 | 2 | 2 4 | 3 | 3 5 | 4 | 4 8 | 7 | 7 9 | 8 | 8 + 2 | 1 | 1 + 6 | 5 | 5 + 7 | 6 | 6 + 10 | 9 | 9 + 11 | 10 | 10 (10 rows) set local enable_parallel = on; @@ -828,19 +854,21 @@ explain(locus, costs off) select * from rt4 join t2 using(b); select * from rt4 join t2 using(b); b | a | a ----+----+---- - 2 | 1 | 1 + 6 | 5 | 5 + 7 | 6 | 6 + 10 | 9 | 9 + 11 | 10 | 10 3 | 2 | 2 4 | 3 | 3 5 | 4 | 4 8 | 7 | 7 9 | 8 | 8 - 6 | 5 | 5 - 7 | 6 | 6 - 10 | 9 | 9 - 11 | 10 | 10 + 2 | 1 | 1 (10 rows) create table t3(a int, b int) with(parallel_workers=2); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. insert into t3 select i, i+1 from generate_series(1, 9000) i; analyze t3; set local enable_parallel = off; @@ -919,10 +947,10 @@ explain(locus, costs off) select * from t_replica_workers_2 join t_random_worker select * from t_replica_workers_2 join t_random_workers_0 using(a); a | b | b ---+---+--- - 2 | 3 | 3 - 3 | 4 | 4 1 | 2 | 2 + 2 | 3 | 3 4 | 5 | 5 + 3 | 4 | 4 5 | 6 | 6 (5 rows) @@ -931,11 +959,11 @@ set local enable_parallel=false; select * from t_replica_workers_2 join t_random_workers_0 using(a); a | b | b ---+---+--- - 2 | 3 | 3 3 | 4 | 4 - 1 | 2 | 2 - 4 | 5 | 5 5 | 6 | 6 + 4 | 5 | 5 + 1 | 2 | 2 + 2 | 3 | 3 (5 rows) abort; @@ -976,11 +1004,11 @@ explain(locus, costs off) select * from t_replica_workers_2 right join t_random_ select * from t_replica_workers_2 right join t_random_workers_2 using(a); a | b | b ---+---+--- - 5 | 6 | 6 1 | 2 | 2 2 | 3 | 3 3 | 4 | 4 4 | 5 | 5 + 5 | 6 | 6 (5 rows) -- non parallel results @@ -1028,14 +1056,14 @@ explain(locus, costs off) select * from t_replica_workers_2 join t_random_worker Locus: Strewn Parallel Workers: 2 Optimizer: Postgres query optimizer -(16 rows) +(15 rows) select * from t_replica_workers_2 join t_random_workers_2 using(a); a | b | b ---+---+--- - 2 | 3 | 3 1 | 2 | 2 3 | 4 | 4 + 2 | 3 | 3 4 | 5 | 5 5 | 6 | 6 (5 rows) @@ -1045,9 +1073,9 @@ set local enable_parallel=false; select * from t_replica_workers_2 join t_random_workers_2 using(a); a | b | b ---+---+--- - 2 | 3 | 3 1 | 2 | 2 3 | 4 | 4 + 2 | 3 | 3 4 | 5 | 5 5 | 6 | 6 (5 rows) @@ -1059,7 +1087,11 @@ abort; -- begin; create table t1(a int, b int) with(parallel_workers=3); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. create table t2(b int, a int) with(parallel_workers=2); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'b' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. insert into t1 select i, i+1 from generate_series(1, 10) i; insert into t2 select i, i+1 from generate_series(1, 5) i; analyze t1; @@ -1071,17 +1103,17 @@ explain(costs off) select * from t1 right join t2 on t1.b = t2.a; QUERY PLAN ------------------------------------------------------------------ Gather Motion 9:1 (slice1; segments: 9) - -> Parallel Hash Left Join - Hash Cond: (t2.a = t1.b) - -> Redistribute Motion 6:9 (slice2; segments: 6) - Hash Key: t2.a + -> Parallel Hash Right Join + Hash Cond: (t1.b = t2.a) + -> Redistribute Motion 9:9 (slice2; segments: 9) + Hash Key: t1.b Hash Module: 3 - -> Parallel Seq Scan on t2 + -> Parallel Seq Scan on t1 -> Parallel Hash - -> Redistribute Motion 9:9 (slice3; segments: 9) - Hash Key: t1.b + -> Redistribute Motion 6:9 (slice3; segments: 6) + Hash Key: t2.a Hash Module: 3 - -> Parallel Seq Scan on t1 + -> Parallel Seq Scan on t2 Optimizer: Postgres query optimizer (13 rows) @@ -1091,7 +1123,11 @@ abort; -- begin; create table t1(a int, b int) with(parallel_workers=2); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. create table t2(a int, b int) with(parallel_workers=2); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. insert into t1 select i%10, i from generate_series(1, 5) i; insert into t1 values (100000); insert into t2 select i%10, i from generate_series(1, 100000) i; @@ -1100,34 +1136,34 @@ analyze t2; set local enable_parallel = on; -- parallel hash join with shared table, SinglQE as outer partial path. explain(locus, costs off) select * from (select count(*) as a from t2) t2 left join t1 on t1.a = t2.a; - QUERY PLAN ------------------------------------------------------------------- + QUERY PLAN +------------------------------------------------------------------------ Gather Motion 6:1 (slice1; segments: 6) Locus: Entry - -> Parallel Hash Left Join - Locus: Hashed + -> Parallel Hash Right Join + Locus: HashedWorkers Parallel Workers: 2 - Hash Cond: ((count(*)) = t1.a) - -> Redistribute Motion 1:6 (slice2; segments: 1) - Locus: Hashed + Hash Cond: (t1.a = (count(*))) + -> Parallel Seq Scan on t1 + Locus: HashedWorkers Parallel Workers: 2 - Hash Key: (count(*)) - Hash Module: 3 - -> Finalize Aggregate - Locus: SingleQE - -> Gather Motion 6:1 (slice3; segments: 6) - Locus: SingleQE - -> Partial Aggregate - Locus: HashedWorkers - Parallel Workers: 2 - -> Parallel Seq Scan on t2 - Locus: HashedWorkers - Parallel Workers: 2 -> Parallel Hash Locus: Hashed - -> Parallel Seq Scan on t1 - Locus: HashedWorkers + -> Redistribute Motion 1:6 (slice2; segments: 1) + Locus: Hashed Parallel Workers: 2 + Hash Key: (count(*)) + Hash Module: 3 + -> Finalize Aggregate + Locus: SingleQE + -> Gather Motion 6:1 (slice3; segments: 6) + Locus: SingleQE + -> Partial Aggregate + Locus: HashedWorkers + Parallel Workers: 2 + -> Parallel Seq Scan on t2 + Locus: HashedWorkers + Parallel Workers: 2 Optimizer: Postgres query optimizer (27 rows) @@ -1323,12 +1359,18 @@ begin; create table rt1(a int, b int) distributed replicated; create table rt2(a int, b int) with (parallel_workers = 0) distributed replicated; create table t1(a int, b int); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. create table t2(a int, b int) with (parallel_workers = 0); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. insert into t1 select i, i+1 from generate_series(1, 10000) i; insert into t2 select i, i+1 from generate_series(1, 10000) i; insert into rt1 select i, i+1 from generate_series(1, 10000) i; insert into rt2 select i, i+1 from generate_series(1, 10000) i; CREATE TABLE sq1 AS SELECT a, b FROM t1 WHERE gp_segment_id = 0; +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 'a' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. set local optimizer=off; set local enable_parallel=on; set local min_parallel_table_scan_size to 0; @@ -1385,7 +1427,7 @@ explain (locus, costs off) select * from rt1 union all select * from t1; -> Result Locus: Strewn Parallel Workers: 2 - One-Time Filter: (gp_execution_segment() = 1) + One-Time Filter: (gp_execution_segment() = 0) -> Parallel Seq Scan on rt1 Locus: SegmentGeneralWorkers Parallel Workers: 2 @@ -1409,7 +1451,7 @@ explain (locus, costs off) select * from rt1 union all select * from t2; -> Result Locus: Strewn Parallel Workers: 2 - One-Time Filter: (gp_execution_segment() = 1) + One-Time Filter: (gp_execution_segment() = 0) -> Parallel Seq Scan on rt1 Locus: SegmentGeneralWorkers Parallel Workers: 2 @@ -1482,6 +1524,8 @@ abort; -- begin; create table t1(c1 int, c2 int) with(parallel_workers=2); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'c1' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. insert into t1 select i, i+1 from generate_series(1, 100000) i; analyze t1; set local optimizer = off; @@ -1549,6 +1593,8 @@ abort; -- begin; create table t1(c1 int, c2 int) with(parallel_workers=2); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'c1' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. insert into t1 select i, i+1 from generate_series(1, 100000) i; analyze t1; set local optimizer = off; @@ -1768,6 +1814,8 @@ set local optimizer = off; set local enable_parallel = on; -- ao table create table ao (a INT, b INT) using ao_row; +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. insert into ao select i as a, i as b from generate_series(1, 100) AS i; alter table ao set (parallel_workers = 2); explain(costs off) select count(*) from ao; @@ -1789,6 +1837,8 @@ select count(*) from ao; alter table ao reset (parallel_workers); -- aocs table create table aocs (a INT, b INT) using ao_column; +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. insert into aocs select i as a, i as b from generate_series(1, 100) AS i; alter table aocs set (parallel_workers = 2); explain(costs off) select count(*) from aocs; @@ -1862,9 +1912,14 @@ select * from abort; begin; create table pagg_tab (a int, b int, c text, d int) partition by list(c); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. create table pagg_tab_p1 partition of pagg_tab for values in ('0000', '0001', '0002', '0003', '0004'); +NOTICE: table has parent, setting distribution columns to match parent table create table pagg_tab_p2 partition of pagg_tab for values in ('0005', '0006', '0007', '0008'); +NOTICE: table has parent, setting distribution columns to match parent table create table pagg_tab_p3 partition of pagg_tab for values in ('0009', '0010', '0011'); +NOTICE: table has parent, setting distribution columns to match parent table insert into pagg_tab select i % 20, i % 30, to_char(i % 12, 'FM0000'), i % 30 from generate_series(0, 2999) i; analyze pagg_tab; set local enable_parallel to off; @@ -1939,7 +1994,11 @@ abort; -- begin; create table t1(a int, b int) with(parallel_workers=3); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. create table t2(b int, a int) with(parallel_workers=2); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'b' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. insert into t1 select i, i+1 from generate_series(1, 10) i; insert into t2 select i, i+1 from generate_series(1, 5) i; analyze t1; @@ -2329,6 +2388,8 @@ abort; -- prepare, execute locus is null begin; create table t1(c1 int, c2 int); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'c1' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. analyze t1; prepare t1_count(integer) as select count(*) from t1; explain(locus, costs off) execute t1_count(1); diff --git a/contrib/pax_storage/src/test/isolation2/expected/pax/copy_to_concurrent_reorganize.out b/contrib/pax_storage/src/test/isolation2/expected/pax/copy_to_concurrent_reorganize.out deleted file mode 100644 index b4beed7d035..00000000000 --- a/contrib/pax_storage/src/test/isolation2/expected/pax/copy_to_concurrent_reorganize.out +++ /dev/null @@ -1,289 +0,0 @@ --- Test: PAX table — relation-based COPY TO concurrent with ALTER TABLE SET WITH (reorganize=true) --- Issue: https://github.com/apache/cloudberry/issues/1545 --- Same as test 2.1 in the main isolation2 suite but for PAX storage. - -CREATE TABLE copy_reorg_pax_test (a INT, b INT) DISTRIBUTED BY (a); -CREATE -INSERT INTO copy_reorg_pax_test SELECT i, i FROM generate_series(1, 1000) i; -INSERT 1000 - --- Record original row count -SELECT count(*) FROM copy_reorg_pax_test; - count -------- - 1000 -(1 row) - --- Session 1: Begin reorganize (holds AccessExclusiveLock) -1: BEGIN; -BEGIN -1: ALTER TABLE copy_reorg_pax_test SET WITH (reorganize=true); -ALTER - --- Session 2: relation-based COPY TO should block on AccessShareLock -2&: COPY copy_reorg_pax_test TO '/tmp/copy_reorg_pax_test.csv'; - --- Confirm Session 2 is waiting for the lock -1: SELECT count(*) > 0 FROM pg_stat_activity WHERE query LIKE 'COPY copy_reorg_pax_test%' AND wait_event_type = 'Lock'; - ?column? ----------- - t -(1 row) - --- Session 1: Commit reorganize, releasing AccessExclusiveLock -1: COMMIT; -COMMIT - --- Session 2: Should return 1000 rows (fixed), not 0 rows (broken) -2<: <... completed> -COPY 1000 - --- Verify the output file contains all rows -CREATE TABLE copy_reorg_pax_verify (a INT, b INT) DISTRIBUTED BY (a); -CREATE -COPY copy_reorg_pax_verify FROM '/tmp/copy_reorg_pax_test.csv'; -COPY 1000 -SELECT count(*) FROM copy_reorg_pax_verify; - count -------- - 1000 -(1 row) - --- Cleanup -DROP TABLE copy_reorg_pax_verify; -DROP -DROP TABLE copy_reorg_pax_test; -DROP - --- ============================================================ --- Test 2.2c: PAX — query-based COPY TO + concurrent reorganize --- Fixed: BeginCopy() refreshes snapshot after AcquireRewriteLocks(). --- ============================================================ - -CREATE TABLE copy_query_reorg_pax_test (a INT, b INT) DISTRIBUTED BY (a); -CREATE -INSERT INTO copy_query_reorg_pax_test SELECT i, i FROM generate_series(1, 1000) i; -INSERT 1000 - -SELECT count(*) FROM copy_query_reorg_pax_test; - count -------- - 1000 -(1 row) - -1: BEGIN; -BEGIN -1: ALTER TABLE copy_query_reorg_pax_test SET WITH (reorganize=true); -ALTER - -2&: COPY (SELECT * FROM copy_query_reorg_pax_test) TO '/tmp/copy_query_reorg_pax_test.csv'; - -1: SELECT count(*) > 0 FROM pg_stat_activity WHERE query LIKE 'COPY (SELECT%copy_query_reorg_pax_test%' AND wait_event_type = 'Lock'; - ?column? ----------- - t -(1 row) - -1: COMMIT; -COMMIT -2<: <... completed> -COPY 1000 - -CREATE TABLE copy_query_reorg_pax_verify (a INT, b INT) DISTRIBUTED BY (a); -CREATE -COPY copy_query_reorg_pax_verify FROM '/tmp/copy_query_reorg_pax_test.csv'; -COPY 1000 -SELECT count(*) FROM copy_query_reorg_pax_verify; - count -------- - 1000 -(1 row) - -DROP TABLE copy_query_reorg_pax_verify; -DROP -DROP TABLE copy_query_reorg_pax_test; -DROP - --- ============================================================ --- Test 2.3c: PAX — partitioned table COPY TO + child partition concurrent reorganize --- Fixed: DoCopy() calls find_all_inheritors() to lock all child partitions first. --- ============================================================ - -CREATE TABLE copy_part_parent_pax (a INT, b INT) PARTITION BY RANGE (a) DISTRIBUTED BY (a); -CREATE -CREATE TABLE copy_part_child1_pax PARTITION OF copy_part_parent_pax FOR VALUES FROM (1) TO (501); -CREATE -CREATE TABLE copy_part_child2_pax PARTITION OF copy_part_parent_pax FOR VALUES FROM (501) TO (1001); -CREATE -INSERT INTO copy_part_parent_pax SELECT i, i FROM generate_series(1, 1000) i; -INSERT 1000 - -SELECT count(*) FROM copy_part_parent_pax; - count -------- - 1000 -(1 row) - -1: BEGIN; -BEGIN -1: ALTER TABLE copy_part_child1_pax SET WITH (reorganize=true); -ALTER - -2&: COPY copy_part_parent_pax TO '/tmp/copy_part_parent_pax.csv'; - -1: SELECT count(*) > 0 FROM pg_stat_activity WHERE query LIKE 'COPY copy_part_parent_pax%' AND wait_event_type = 'Lock'; - ?column? ----------- - t -(1 row) - -1: COMMIT; -COMMIT -2<: <... completed> -COPY 1000 - -CREATE TABLE copy_part_pax_verify (a INT, b INT) DISTRIBUTED BY (a); -CREATE -COPY copy_part_pax_verify FROM '/tmp/copy_part_parent_pax.csv'; -COPY 1000 -SELECT count(*) FROM copy_part_pax_verify; - count -------- - 1000 -(1 row) - -DROP TABLE copy_part_pax_verify; -DROP -DROP TABLE copy_part_parent_pax; -DROP - --- ============================================================ --- Test 2.4c: PAX — RLS table COPY TO + policy-referenced table concurrent reorganize --- Fixed: same as 2.2c — BeginCopy() refreshes snapshot after AcquireRewriteLocks(). --- ============================================================ - -CREATE TABLE copy_rls_pax_lookup (cat INT) DISTRIBUTED BY (cat); -CREATE -INSERT INTO copy_rls_pax_lookup SELECT i FROM generate_series(1, 2) i; -INSERT 2 - -CREATE TABLE copy_rls_pax_main (a INT, category INT) DISTRIBUTED BY (a); -CREATE -INSERT INTO copy_rls_pax_main SELECT i, (i % 5) + 1 FROM generate_series(1, 1000) i; -INSERT 1000 - -ALTER TABLE copy_rls_pax_main ENABLE ROW LEVEL SECURITY; -ALTER -CREATE POLICY p_rls_pax ON copy_rls_pax_main USING (category IN (SELECT cat from copy_rls_pax_lookup)); -CREATE - -CREATE ROLE copy_rls_pax_testuser; -CREATE -GRANT pg_write_server_files TO copy_rls_pax_testuser; -GRANT -GRANT ALL ON copy_rls_pax_main TO copy_rls_pax_testuser; -GRANT -GRANT ALL ON copy_rls_pax_lookup TO copy_rls_pax_testuser; -GRANT - -SELECT count(*) FROM copy_rls_pax_main; - count -------- - 1000 -(1 row) - -2: SET ROLE copy_rls_pax_testuser; COPY copy_rls_pax_main TO '/tmp/copy_rls_pax_main.csv'; -SET 400 - -1: BEGIN; -BEGIN -1: ALTER TABLE copy_rls_pax_lookup SET WITH (reorganize=true); -ALTER - -2&: SET ROLE copy_rls_pax_testuser; COPY copy_rls_pax_main TO '/tmp/copy_rls_pax_main.csv'; - -1: SELECT count(*) > 0 FROM pg_stat_activity WHERE query LIKE '%COPY copy_rls_pax_main%' AND wait_event_type = 'Lock'; - ?column? ----------- - t -(1 row) - -1: COMMIT; -COMMIT -2<: <... completed> -SET 400 - --- Reset session 2's role to avoid leaking to subsequent tests -2: RESET ROLE; -RESET - -RESET ROLE; -RESET -CREATE TABLE copy_rls_pax_verify (a INT, category INT) DISTRIBUTED BY (a); -CREATE -COPY copy_rls_pax_verify FROM '/tmp/copy_rls_pax_main.csv'; -COPY 400 -SELECT count(*) FROM copy_rls_pax_verify; - count -------- - 400 -(1 row) - -DROP TABLE copy_rls_pax_verify; -DROP -DROP POLICY p_rls_pax ON copy_rls_pax_main; -DROP -DROP TABLE copy_rls_pax_main; -DROP -DROP TABLE copy_rls_pax_lookup; -DROP -DROP ROLE copy_rls_pax_testuser; -DROP - --- ============================================================ --- Test 2.5c: PAX — CTAS + concurrent reorganize --- Fixed as a side effect via BeginCopy() snapshot refresh. --- ============================================================ - -CREATE TABLE ctas_reorg_pax_src (a INT, b INT) DISTRIBUTED BY (a); -CREATE -INSERT INTO ctas_reorg_pax_src SELECT i, i FROM generate_series(1, 1000) i; -INSERT 1000 - -SELECT count(*) FROM ctas_reorg_pax_src; - count -------- - 1000 -(1 row) - -1: BEGIN; -BEGIN -1: ALTER TABLE ctas_reorg_pax_src SET WITH (reorganize=true); -ALTER - -2&: CREATE TABLE ctas_reorg_pax_dst AS SELECT * FROM ctas_reorg_pax_src DISTRIBUTED BY (a); - -1: SELECT count(*) > 0 FROM pg_stat_activity WHERE query LIKE 'CREATE TABLE ctas_reorg_pax_dst%' AND wait_event_type = 'Lock'; - ?column? ----------- - t -(1 row) - -1: COMMIT; -COMMIT -2<: <... completed> -CREATE 1000 - -SELECT count(*) FROM ctas_reorg_pax_dst; - count -------- - 1000 -(1 row) - -DROP TABLE ctas_reorg_pax_dst; -DROP -DROP TABLE ctas_reorg_pax_src; -DROP - --- NOTE: Test 2.6c (PAX variant of change distribution key + query-based COPY TO) --- removed for the same reason as test 2.6 (server crash, pre-existing bug). diff --git a/contrib/pax_storage/src/test/isolation2/isolation2_schedule b/contrib/pax_storage/src/test/isolation2/isolation2_schedule index fa163aa96b6..72fa06f5204 100644 --- a/contrib/pax_storage/src/test/isolation2/isolation2_schedule +++ b/contrib/pax_storage/src/test/isolation2/isolation2_schedule @@ -157,7 +157,6 @@ test: pax/vacuum_while_vacuum # test: uao/bad_buffer_on_temp_ao_row test: reorganize_after_ao_vacuum_skip_drop truncate_after_ao_vacuum_skip_drop mark_all_aoseg_await_drop -test: pax/copy_to_concurrent_reorganize # below test(s) inject faults so each of them need to be in a separate group test: segwalrep/master_wal_switch diff --git a/contrib/pax_storage/src/test/isolation2/sql/pax/copy_to_concurrent_reorganize.sql b/contrib/pax_storage/src/test/isolation2/sql/pax/copy_to_concurrent_reorganize.sql deleted file mode 100644 index 05ef25852e9..00000000000 --- a/contrib/pax_storage/src/test/isolation2/sql/pax/copy_to_concurrent_reorganize.sql +++ /dev/null @@ -1,170 +0,0 @@ --- Test: PAX table — relation-based COPY TO concurrent with ALTER TABLE SET WITH (reorganize=true) --- Issue: https://github.com/apache/cloudberry/issues/1545 --- Same as test 2.1 in the main isolation2 suite but for PAX storage. - -CREATE TABLE copy_reorg_pax_test (a INT, b INT) DISTRIBUTED BY (a); -INSERT INTO copy_reorg_pax_test SELECT i, i FROM generate_series(1, 1000) i; - --- Record original row count -SELECT count(*) FROM copy_reorg_pax_test; - --- Session 1: Begin reorganize (holds AccessExclusiveLock) -1: BEGIN; -1: ALTER TABLE copy_reorg_pax_test SET WITH (reorganize=true); - --- Session 2: relation-based COPY TO should block on AccessShareLock -2&: COPY copy_reorg_pax_test TO '/tmp/copy_reorg_pax_test.csv'; - --- Confirm Session 2 is waiting for the lock -1: SELECT count(*) > 0 FROM pg_stat_activity - WHERE query LIKE 'COPY copy_reorg_pax_test%' AND wait_event_type = 'Lock'; - --- Session 1: Commit reorganize, releasing AccessExclusiveLock -1: COMMIT; - --- Session 2: Should return 1000 rows (fixed), not 0 rows (broken) -2<: - --- Verify the output file contains all rows -CREATE TABLE copy_reorg_pax_verify (a INT, b INT) DISTRIBUTED BY (a); -COPY copy_reorg_pax_verify FROM '/tmp/copy_reorg_pax_test.csv'; -SELECT count(*) FROM copy_reorg_pax_verify; - --- Cleanup -DROP TABLE copy_reorg_pax_verify; -DROP TABLE copy_reorg_pax_test; - --- ============================================================ --- Test 2.2c: PAX — query-based COPY TO + concurrent reorganize --- Fixed: BeginCopy() refreshes snapshot after AcquireRewriteLocks(). --- ============================================================ - -CREATE TABLE copy_query_reorg_pax_test (a INT, b INT) DISTRIBUTED BY (a); -INSERT INTO copy_query_reorg_pax_test SELECT i, i FROM generate_series(1, 1000) i; - -SELECT count(*) FROM copy_query_reorg_pax_test; - -1: BEGIN; -1: ALTER TABLE copy_query_reorg_pax_test SET WITH (reorganize=true); - -2&: COPY (SELECT * FROM copy_query_reorg_pax_test) TO '/tmp/copy_query_reorg_pax_test.csv'; - -1: SELECT count(*) > 0 FROM pg_stat_activity - WHERE query LIKE 'COPY (SELECT%copy_query_reorg_pax_test%' AND wait_event_type = 'Lock'; - -1: COMMIT; -2<: - -CREATE TABLE copy_query_reorg_pax_verify (a INT, b INT) DISTRIBUTED BY (a); -COPY copy_query_reorg_pax_verify FROM '/tmp/copy_query_reorg_pax_test.csv'; -SELECT count(*) FROM copy_query_reorg_pax_verify; - -DROP TABLE copy_query_reorg_pax_verify; -DROP TABLE copy_query_reorg_pax_test; - --- ============================================================ --- Test 2.3c: PAX — partitioned table COPY TO + child partition concurrent reorganize --- Fixed: DoCopy() calls find_all_inheritors() to lock all child partitions first. --- ============================================================ - -CREATE TABLE copy_part_parent_pax (a INT, b INT) PARTITION BY RANGE (a) DISTRIBUTED BY (a); -CREATE TABLE copy_part_child1_pax PARTITION OF copy_part_parent_pax FOR VALUES FROM (1) TO (501); -CREATE TABLE copy_part_child2_pax PARTITION OF copy_part_parent_pax FOR VALUES FROM (501) TO (1001); -INSERT INTO copy_part_parent_pax SELECT i, i FROM generate_series(1, 1000) i; - -SELECT count(*) FROM copy_part_parent_pax; - -1: BEGIN; -1: ALTER TABLE copy_part_child1_pax SET WITH (reorganize=true); - -2&: COPY copy_part_parent_pax TO '/tmp/copy_part_parent_pax.csv'; - -1: SELECT count(*) > 0 FROM pg_stat_activity - WHERE query LIKE 'COPY copy_part_parent_pax%' AND wait_event_type = 'Lock'; - -1: COMMIT; -2<: - -CREATE TABLE copy_part_pax_verify (a INT, b INT) DISTRIBUTED BY (a); -COPY copy_part_pax_verify FROM '/tmp/copy_part_parent_pax.csv'; -SELECT count(*) FROM copy_part_pax_verify; - -DROP TABLE copy_part_pax_verify; -DROP TABLE copy_part_parent_pax; - --- ============================================================ --- Test 2.4c: PAX — RLS table COPY TO + policy-referenced table concurrent reorganize --- Fixed: same as 2.2c — BeginCopy() refreshes snapshot after AcquireRewriteLocks(). --- ============================================================ - -CREATE TABLE copy_rls_pax_lookup (cat INT) DISTRIBUTED BY (cat); -INSERT INTO copy_rls_pax_lookup SELECT i FROM generate_series(1, 2) i; - -CREATE TABLE copy_rls_pax_main (a INT, category INT) DISTRIBUTED BY (a); -INSERT INTO copy_rls_pax_main SELECT i, (i % 5) + 1 FROM generate_series(1, 1000) i; - -ALTER TABLE copy_rls_pax_main ENABLE ROW LEVEL SECURITY; -CREATE POLICY p_rls_pax ON copy_rls_pax_main USING (category IN (SELECT cat from copy_rls_pax_lookup)); - -CREATE ROLE copy_rls_pax_testuser; -GRANT pg_write_server_files TO copy_rls_pax_testuser; -GRANT ALL ON copy_rls_pax_main TO copy_rls_pax_testuser; -GRANT ALL ON copy_rls_pax_lookup TO copy_rls_pax_testuser; - -SELECT count(*) FROM copy_rls_pax_main; - -2: SET ROLE copy_rls_pax_testuser; COPY copy_rls_pax_main TO '/tmp/copy_rls_pax_main.csv'; - -1: BEGIN; -1: ALTER TABLE copy_rls_pax_lookup SET WITH (reorganize=true); - -2&: SET ROLE copy_rls_pax_testuser; COPY copy_rls_pax_main TO '/tmp/copy_rls_pax_main.csv'; - -1: SELECT count(*) > 0 FROM pg_stat_activity - WHERE query LIKE '%COPY copy_rls_pax_main%' AND wait_event_type = 'Lock'; - -1: COMMIT; -2<: - --- Reset session 2's role to avoid leaking to subsequent tests -2: RESET ROLE; - -RESET ROLE; -CREATE TABLE copy_rls_pax_verify (a INT, category INT) DISTRIBUTED BY (a); -COPY copy_rls_pax_verify FROM '/tmp/copy_rls_pax_main.csv'; -SELECT count(*) FROM copy_rls_pax_verify; - -DROP TABLE copy_rls_pax_verify; -DROP POLICY p_rls_pax ON copy_rls_pax_main; -DROP TABLE copy_rls_pax_main; -DROP TABLE copy_rls_pax_lookup; -DROP ROLE copy_rls_pax_testuser; - --- ============================================================ --- Test 2.5c: PAX — CTAS + concurrent reorganize --- Fixed as a side effect via BeginCopy() snapshot refresh. --- ============================================================ - -CREATE TABLE ctas_reorg_pax_src (a INT, b INT) DISTRIBUTED BY (a); -INSERT INTO ctas_reorg_pax_src SELECT i, i FROM generate_series(1, 1000) i; - -SELECT count(*) FROM ctas_reorg_pax_src; - -1: BEGIN; -1: ALTER TABLE ctas_reorg_pax_src SET WITH (reorganize=true); - -2&: CREATE TABLE ctas_reorg_pax_dst AS SELECT * FROM ctas_reorg_pax_src DISTRIBUTED BY (a); - -1: SELECT count(*) > 0 FROM pg_stat_activity - WHERE query LIKE 'CREATE TABLE ctas_reorg_pax_dst%' AND wait_event_type = 'Lock'; - -1: COMMIT; -2<: - -SELECT count(*) FROM ctas_reorg_pax_dst; - -DROP TABLE ctas_reorg_pax_dst; -DROP TABLE ctas_reorg_pax_src; - --- NOTE: Test 2.6c (PAX variant of change distribution key + query-based COPY TO) --- removed for the same reason as test 2.6 (server crash, pre-existing bug). diff --git a/contrib/pax_storage/src/test/regress/expected/groupingsets_optimizer.out b/contrib/pax_storage/src/test/regress/expected/groupingsets_optimizer.out index b3da68b1f9d..382fb46fdfd 100644 --- a/contrib/pax_storage/src/test/regress/expected/groupingsets_optimizer.out +++ b/contrib/pax_storage/src/test/regress/expected/groupingsets_optimizer.out @@ -949,21 +949,21 @@ select v.c, (select count(*) from gstest2 group by () having v.c) explain (costs off) select v.c, (select count(*) from gstest2 group by () having v.c) from (values (false),(true)) v(c) order by v.c; - QUERY PLAN --------------------------------------------------------------------------- - Sort - Sort Key: "*VALUES*".column1 - -> Values Scan on "*VALUES*" - SubPlan 1 - -> Aggregate - Group Key: () - Filter: "*VALUES*".column1 - -> Result - One-Time Filter: "*VALUES*".column1 - -> Materialize - -> Gather Motion 3:1 (slice1; segments: 3) + QUERY PLAN +-------------------------------------------------------------------- + Result + -> Sort + Sort Key: "Values".column1 + -> Values Scan on "Values" + SubPlan 1 + -> Result + One-Time Filter: "Values".column1 + -> Finalize Aggregate + -> Materialize + -> Gather Motion 3:1 (slice1; segments: 3) + -> Partial Aggregate -> Seq Scan on gstest2 - Optimizer: Postgres query optimizer + Optimizer: GPORCA (13 rows) -- HAVING with GROUPING queries diff --git a/contrib/pax_storage/src/test/regress/expected/stats.out b/contrib/pax_storage/src/test/regress/expected/stats.out index d3f407656fc..150dab3ead4 100644 --- a/contrib/pax_storage/src/test/regress/expected/stats.out +++ b/contrib/pax_storage/src/test/regress/expected/stats.out @@ -34,14 +34,13 @@ declare updated3 bool; updated4 bool; updated5 bool; + updated6 bool; begin -- we don't want to wait forever; loop will exit after 30 seconds for i in 1 .. 300 loop - -- With parallel query, the seqscan and indexscan on tenk2 might be done - -- in parallel worker processes, which will send their stats counters - -- asynchronously to what our own session does. So we must check for - -- those counts to be registered separately from the update counts. + -- Segment stats are sent asynchronously to the coordinator, so we must + -- check for each counter independently to avoid false exits. -- check to see if seqscan has been sensed SELECT (st.seq_scan >= pr.seq_scan + 1) INTO updated1 @@ -68,7 +67,13 @@ begin FROM pg_stat_user_tables AS st, pg_class AS cl, prevstats AS pr WHERE st.relname='tenk2' AND cl.relname='tenk2'; - exit when updated1 and updated2 and updated3 and updated4 and updated5; + -- check to see if seq_tup_read has been sensed; segment stats may arrive + -- after seq_scan count, so wait for this explicitly + SELECT (st.seq_tup_read >= pr.seq_tup_read + cl.reltuples) INTO updated6 + FROM gp_stat_user_tables_summary AS st, pg_class AS cl, prevstats AS pr + WHERE st.relname='tenk2' AND cl.relname='tenk2'; + + exit when updated1 and updated2 and updated3 and updated4 and updated5 and updated6; -- wait a little perform pg_sleep_for('100 milliseconds'); diff --git a/contrib/pax_storage/src/test/regress/sql/stats.sql b/contrib/pax_storage/src/test/regress/sql/stats.sql index 36878562f87..8f32a97cf18 100644 --- a/contrib/pax_storage/src/test/regress/sql/stats.sql +++ b/contrib/pax_storage/src/test/regress/sql/stats.sql @@ -33,14 +33,13 @@ declare updated3 bool; updated4 bool; updated5 bool; + updated6 bool; begin -- we don't want to wait forever; loop will exit after 30 seconds for i in 1 .. 300 loop - -- With parallel query, the seqscan and indexscan on tenk2 might be done - -- in parallel worker processes, which will send their stats counters - -- asynchronously to what our own session does. So we must check for - -- those counts to be registered separately from the update counts. + -- Segment stats are sent asynchronously to the coordinator, so we must + -- check for each counter independently to avoid false exits. -- check to see if seqscan has been sensed SELECT (st.seq_scan >= pr.seq_scan + 1) INTO updated1 @@ -67,7 +66,13 @@ begin FROM pg_stat_user_tables AS st, pg_class AS cl, prevstats AS pr WHERE st.relname='tenk2' AND cl.relname='tenk2'; - exit when updated1 and updated2 and updated3 and updated4 and updated5; + -- check to see if seq_tup_read has been sensed; segment stats may arrive + -- after seq_scan count, so wait for this explicitly + SELECT (st.seq_tup_read >= pr.seq_tup_read + cl.reltuples) INTO updated6 + FROM gp_stat_user_tables_summary AS st, pg_class AS cl, prevstats AS pr + WHERE st.relname='tenk2' AND cl.relname='tenk2'; + + exit when updated1 and updated2 and updated3 and updated4 and updated5 and updated6; -- wait a little perform pg_sleep_for('100 milliseconds'); diff --git a/contrib/pg_trgm/expected/pg_trgm_optimizer.out b/contrib/pg_trgm/expected/pg_trgm_optimizer.out index 4597b8ca047..a1e9b3d299d 100644 --- a/contrib/pg_trgm/expected/pg_trgm_optimizer.out +++ b/contrib/pg_trgm/expected/pg_trgm_optimizer.out @@ -2351,6 +2351,7 @@ select t <-> 'q0987wertyu0988', t from test_trgm order by t <-> 'q0987wertyu0988 -> Limit -> Index Scan using trgm_idx on test_trgm Order By: (t <-> 'q0987wertyu0988'::text) + Optimizer: Postgres query optimizer (7 rows) select t <-> 'q0987wertyu0988', t from test_trgm order by t <-> 'q0987wertyu0988' limit 2; @@ -5003,8 +5004,8 @@ select * from test2 where t ~ '/\d+/-\d'; -- test = operator explain (costs off) select * from test2 where t = 'abcdef'; - QUERY PLAN ------------------------------------------- + QUERY PLAN +------------------------------------------------ Gather Motion 1:1 (slice1; segments: 1) -> Bitmap Heap Scan on test2 Recheck Cond: (t = 'abcdef'::text) @@ -5020,8 +5021,8 @@ select * from test2 where t = 'abcdef'; explain (costs off) select * from test2 where t = '%line%'; - QUERY PLAN ------------------------------------------- + QUERY PLAN +------------------------------------------------ Gather Motion 1:1 (slice1; segments: 1) -> Bitmap Heap Scan on test2 Recheck Cond: (t = '%line%'::text) @@ -5311,14 +5312,15 @@ select * from test2 where t ~ '/\d+/-\d'; -- test = operator explain (costs off) select * from test2 where t = 'abcdef'; - QUERY PLAN ------------------------------------------- + QUERY PLAN +------------------------------------------------- Gather Motion 1:1 (slice1; segments: 1) -> Bitmap Heap Scan on test2 Recheck Cond: (t = 'abcdef'::text) -> Bitmap Index Scan on test2_idx_gist Index Cond: (t = 'abcdef'::text) -(2 rows) + Optimizer: Postgres query optimizer +(6 rows) select * from test2 where t = 'abcdef'; t @@ -5328,13 +5330,14 @@ select * from test2 where t = 'abcdef'; explain (costs off) select * from test2 where t = '%line%'; - QUERY PLAN ------------------------------------------- + QUERY PLAN +------------------------------------------------- Gather Motion 1:1 (slice1; segments: 1) -> Bitmap Heap Scan on test2 Recheck Cond: (t = '%line%'::text) -> Bitmap Index Scan on test2_idx_gist Index Cond: (t = '%line%'::text) + Optimizer: Postgres query optimizer (6 rows) select * from test2 where t = '%line%'; @@ -5423,7 +5426,7 @@ SELECT DISTINCT city, similarity(city, 'Warsaw'), show_limit() -> Index Scan using restaurants_city_idx on restaurants Index Cond: (city % 'Warsaw'::text) Filter: (city % 'Warsaw'::text) - Optimizer: Pivotal Optimizer (GPORCA) + Optimizer: GPORCA (9 rows) SELECT set_limit(0.3); diff --git a/contrib/postgres_fdw/expected/postgres_fdw.out b/contrib/postgres_fdw/expected/postgres_fdw.out index 10700d6fd4a..9b70906a4a9 100644 --- a/contrib/postgres_fdw/expected/postgres_fdw.out +++ b/contrib/postgres_fdw/expected/postgres_fdw.out @@ -707,6 +707,17 @@ EXPLAIN (VERBOSE, COSTS OFF) SELECT * FROM ft_empty ORDER BY c1; Remote SQL: SELECT c1, c2 FROM public.loct_empty ORDER BY c1 ASC NULLS LAST (3 rows) +-- test restriction on non-system foreign tables. +SET restrict_nonsystem_relation_kind TO 'foreign-table'; +SELECT * from ft1 where c1 < 1; -- ERROR +ERROR: access to non-system foreign table is restricted +INSERT INTO ft1 (c1) VALUES (1); -- ERROR +ERROR: access to non-system foreign table is restricted +DELETE FROM ft1 WHERE c1 = 1; -- ERROR +ERROR: access to non-system foreign table is restricted +TRUNCATE ft1; -- ERROR +ERROR: access to non-system foreign table is restricted +RESET restrict_nonsystem_relation_kind; -- =================================================================== -- WHERE with remotely-executable conditions -- =================================================================== diff --git a/contrib/postgres_fdw/sql/postgres_fdw.sql b/contrib/postgres_fdw/sql/postgres_fdw.sql index 793dd64811d..ac290d3ba30 100644 --- a/contrib/postgres_fdw/sql/postgres_fdw.sql +++ b/contrib/postgres_fdw/sql/postgres_fdw.sql @@ -321,6 +321,14 @@ DELETE FROM loct_empty; ANALYZE ft_empty; EXPLAIN (VERBOSE, COSTS OFF) SELECT * FROM ft_empty ORDER BY c1; +-- test restriction on non-system foreign tables. +SET restrict_nonsystem_relation_kind TO 'foreign-table'; +SELECT * from ft1 where c1 < 1; -- ERROR +INSERT INTO ft1 (c1) VALUES (1); -- ERROR +DELETE FROM ft1 WHERE c1 = 1; -- ERROR +TRUNCATE ft1; -- ERROR +RESET restrict_nonsystem_relation_kind; + -- =================================================================== -- WHERE with remotely-executable conditions -- =================================================================== diff --git a/contrib/xml2/xpath.c b/contrib/xml2/xpath.c index 1e5b71d9a02..0555294f234 100644 --- a/contrib/xml2/xpath.c +++ b/contrib/xml2/xpath.c @@ -74,8 +74,6 @@ pgxml_parser_init(PgXmlStrictness strictness) /* Initialize libxml */ xmlInitParser(); - xmlSubstituteEntitiesDefault(1); - xmlLoadExtDtdDefaultValue = 1; return xmlerrcxt; } @@ -425,8 +423,9 @@ pgxml_xpath(text *document, xmlChar *xpath, xpath_workspace *workspace) PG_TRY(); { - workspace->doctree = xmlParseMemory((char *) VARDATA_ANY(document), - docsize); + workspace->doctree = xmlReadMemory((char *) VARDATA_ANY(document), + docsize, NULL, NULL, + XML_PARSE_NOENT); if (workspace->doctree != NULL) { workspace->ctxt = xmlXPathNewContext(workspace->doctree); @@ -719,7 +718,9 @@ xpath_table(PG_FUNCTION_ARGS) /* Parse the document */ if (xmldoc) - doctree = xmlParseMemory(xmldoc, strlen(xmldoc)); + doctree = xmlReadMemory(xmldoc, strlen(xmldoc), + NULL, NULL, + XML_PARSE_NOENT); else /* treat NULL as not well-formed */ doctree = NULL; diff --git a/contrib/xml2/xslt_proc.c b/contrib/xml2/xslt_proc.c index 2189bca86ff..f30a3a42c03 100644 --- a/contrib/xml2/xslt_proc.c +++ b/contrib/xml2/xslt_proc.c @@ -85,16 +85,18 @@ xslt_process(PG_FUNCTION_ARGS) bool xslt_sec_prefs_error; /* Parse document */ - doctree = xmlParseMemory((char *) VARDATA_ANY(doct), - VARSIZE_ANY_EXHDR(doct)); + doctree = xmlReadMemory((char *) VARDATA_ANY(doct), + VARSIZE_ANY_EXHDR(doct), NULL, NULL, + XML_PARSE_NOENT); if (doctree == NULL) xml_ereport(xmlerrcxt, ERROR, ERRCODE_EXTERNAL_ROUTINE_EXCEPTION, "error parsing XML document"); /* Same for stylesheet */ - ssdoc = xmlParseMemory((char *) VARDATA_ANY(ssheet), - VARSIZE_ANY_EXHDR(ssheet)); + ssdoc = xmlReadMemory((char *) VARDATA_ANY(ssheet), + VARSIZE_ANY_EXHDR(ssheet), NULL, NULL, + XML_PARSE_NOENT); if (ssdoc == NULL) xml_ereport(xmlerrcxt, ERROR, ERRCODE_EXTERNAL_ROUTINE_EXCEPTION, diff --git a/devops/build/automation/cloudberry/scripts/build-cloudberry.sh b/devops/build/automation/cloudberry/scripts/build-cloudberry.sh index efa061a0f83..ca4c73d55cb 100755 --- a/devops/build/automation/cloudberry/scripts/build-cloudberry.sh +++ b/devops/build/automation/cloudberry/scripts/build-cloudberry.sh @@ -71,7 +71,7 @@ init_environment "Cloudberry Build Script" "${BUILD_LOG}" # Set environment log_section "Environment Setup" -export LD_LIBRARY_PATH=${BUILD_DESTINATION}/lib:LD_LIBRARY_PATH +export LD_LIBRARY_PATH=${BUILD_DESTINATION}/lib:${LD_LIBRARY_PATH:-""} log_section_end "Environment Setup" # Build process diff --git a/devops/build/automation/cloudberry/scripts/configure-cloudberry.sh b/devops/build/automation/cloudberry/scripts/configure-cloudberry.sh index 32a9f3d8657..cc9e7376239 100755 --- a/devops/build/automation/cloudberry/scripts/configure-cloudberry.sh +++ b/devops/build/automation/cloudberry/scripts/configure-cloudberry.sh @@ -121,7 +121,7 @@ log_section "Initial Setup" execute_cmd sudo rm -rf ${BUILD_DESTINATION} || exit 2 execute_cmd sudo chmod a+w /usr/local || exit 2 execute_cmd sudo mkdir -p ${BUILD_DESTINATION}/lib || exit 2 -if [[ "$OS_ID" == "rocky" && "$OS_VERSION" =~ ^(8|9) ]]; then +if [[ "$OS_ID" == "rocky" && "$OS_VERSION" =~ ^(8|9|10) ]]; then execute_cmd sudo cp /usr/local/xerces-c/lib/libxerces-c.so \ /usr/local/xerces-c/lib/libxerces-c-3.3.so \ ${BUILD_DESTINATION}/lib || exit 3 @@ -131,7 +131,7 @@ log_section_end "Initial Setup" # Set environment log_section "Environment Setup" -export LD_LIBRARY_PATH=${BUILD_DESTINATION}/lib:LD_LIBRARY_PATH +export LD_LIBRARY_PATH=${BUILD_DESTINATION}/lib:${LD_LIBRARY_PATH:-""} log_section_end "Environment Setup" # Add debug options if ENABLE_DEBUG is set to "true" @@ -162,6 +162,8 @@ execute_cmd ./configure --prefix=${BUILD_DESTINATION} \ --disable-pxf \ --enable-tap-tests \ ${CONFIGURE_DEBUG_OPTS} \ + --with-diskquota \ + --with-gp-stats-collector \ --with-gssapi \ --with-ldap \ --with-libxml \ diff --git a/devops/build/automation/cloudberry/scripts/parse-results.pl b/devops/build/automation/cloudberry/scripts/parse-results.pl index d09085d5fb9..2c754bcae9d 100755 --- a/devops/build/automation/cloudberry/scripts/parse-results.pl +++ b/devops/build/automation/cloudberry/scripts/parse-results.pl @@ -110,7 +110,7 @@ my @ignored_test_list = (); while (<$fh>) { - # Match the summary lines + # Match the summary lines (pg_regress format) if (/All (\d+) tests passed\./) { $status = 'passed'; $total_tests = $1; @@ -132,8 +132,22 @@ $status = 'failed'; $failed_tests = $1 - $3; $ignored_tests = $3; - $total_tests = $2; - $passed_tests = $2 - $1; + + # TAP/prove summary format: "Files=N, Tests=N, ..." + } elsif (/^Files=\d+, Tests=(\d+),/) { + $total_tests = $1; + + # TAP/prove result: "Result: PASS" or "Result: FAIL" + } elsif (/^Result: PASS/) { + $status = 'passed'; + $passed_tests = $total_tests; + $failed_tests = 0; + } elsif (/^Result: FAIL/) { + $status = 'failed'; + + # TAP individual test failure: " t/xxx.pl (Wstat: ...)" + } elsif (/^\s+(t\/\S+\.pl)\s+\(Wstat:/) { + push @failed_test_list, $1; } # Capture failed tests @@ -150,8 +164,15 @@ # Close the log file close $fh; -# Validate failed test count matches found test names -if ($status eq 'failed' && scalar(@failed_test_list) != $failed_tests) { +# For TAP format, derive failed/passed counts from collected test names +if ($status eq 'failed' && $failed_tests == 0 && scalar(@failed_test_list) > 0) { + $failed_tests = scalar(@failed_test_list); + $passed_tests = $total_tests - $failed_tests if $total_tests > 0; +} + +# Validate failed test count matches found test names (pg_regress format only) +if ($status eq 'failed' && $failed_tests > 0 && scalar(@failed_test_list) > 0 + && scalar(@failed_test_list) != $failed_tests) { print "Error: Found $failed_tests failed tests in summary but found " . scalar(@failed_test_list) . " failed test names\n"; print "Failed test names found:\n"; foreach my $test (@failed_test_list) { diff --git a/devops/build/automation/cloudberry/scripts/unittest-cloudberry.sh b/devops/build/automation/cloudberry/scripts/unittest-cloudberry.sh index 97107ea1a9f..69536f0067f 100755 --- a/devops/build/automation/cloudberry/scripts/unittest-cloudberry.sh +++ b/devops/build/automation/cloudberry/scripts/unittest-cloudberry.sh @@ -56,7 +56,7 @@ init_environment "Cloudberry Unittest Script" "${UNITTEST_LOG}" # Set environment log_section "Environment Setup" -export LD_LIBRARY_PATH=${BUILD_DESTINATION}/lib:LD_LIBRARY_PATH +export LD_LIBRARY_PATH=${BUILD_DESTINATION}/lib:${LD_LIBRARY_PATH:-""} log_section_end "Environment Setup" # Unittest process diff --git a/devops/build/packaging/deb/build-deb.sh b/devops/build/packaging/deb/build-deb.sh index 1f5aef2258a..61a29e50fc9 100755 --- a/devops/build/packaging/deb/build-deb.sh +++ b/devops/build/packaging/deb/build-deb.sh @@ -109,7 +109,7 @@ export CBDB_FULL_VERSION=$VERSION # Set version if not provided if [ -z "${VERSION}" ]; then - export CBDB_FULL_VERSION=$(./getversion | cut -d'-' -f 1 | cut -d'+' -f 1) + export CBDB_FULL_VERSION=$(./getversion 2>/dev/null | cut -d'-' -f 1 | cut -d'+' -f 1 || echo "unknown") fi if [[ ! $CBDB_FULL_VERSION =~ ^[0-9] ]]; then @@ -127,22 +127,48 @@ fi # Detect OS distribution (e.g., ubuntu22.04, debian12) if [ -z ${OS_DISTRO+x} ]; then if [ -f /etc/os-release ]; then + # Temporarily disable unbound variable check for sourcing os-release + set +u . /etc/os-release - OS_DISTRO=$(echo "${ID}${VERSION_ID}" | tr '[:upper:]' '[:lower:]') + set -u + # Ensure ID and VERSION_ID are set before using them + OS_DISTRO=$(echo "${ID:-unknown}${VERSION_ID:-}" | tr '[:upper:]' '[:lower:]') else OS_DISTRO="unknown" fi fi +# Ensure OS_DISTRO is exported and not empty +export OS_DISTRO=${OS_DISTRO:-unknown} + export CBDB_PKG_VERSION=${CBDB_FULL_VERSION}-${BUILD_NUMBER}-${OS_DISTRO} # Check if required commands are available check_commands -# Define the control file path -CONTROL_FILE=debian/control +# Find project root (assumed to be four levels up from scripts directory: devops/build/packaging/deb/) +PROJECT_ROOT="$(cd "$(dirname "$0")/../../../../" && pwd)" + +# Define where the debian metadata is located +DEBIAN_SRC_DIR="$(dirname "$0")/${OS_DISTRO}" + +# Prepare the debian directory at the project root (required by dpkg-buildpackage) +if [ -d "$DEBIAN_SRC_DIR" ]; then + echo "Preparing debian directory from $DEBIAN_SRC_DIR..." + mkdir -p "$PROJECT_ROOT/debian" + # Use /. to copy directory contents if target exists instead of nested directories + cp -rf "$DEBIAN_SRC_DIR"/. "$PROJECT_ROOT/debian/" +else + if [ ! -d "$PROJECT_ROOT/debian" ]; then + echo "Error: Debian metadata not found at $DEBIAN_SRC_DIR and no debian/ directory exists at root." + exit 1 + fi +fi + +# Define the control file path (at the project root) +CONTROL_FILE="$PROJECT_ROOT/debian/control" -# Check if the spec file exists +# Check if the control file exists if [ ! -f "$CONTROL_FILE" ]; then echo "Error: Control file not found at $CONTROL_FILE." exit 1 @@ -160,10 +186,15 @@ if [ "${DRY_RUN:-false}" = true ]; then exit 0 fi -# Run debbuild with the provided options -echo "Building DEB with Version $CBDB_FULL_VERSION ..." +# Run debbuild from the project root +echo "Building DEB with Version $CBDB_FULL_VERSION in $PROJECT_ROOT ..." + +print_changelog > "$PROJECT_ROOT/debian/changelog" -print_changelog > debian/changelog +# Only cd if we are not already at the project root +if [ "$(pwd)" != "$PROJECT_ROOT" ]; then + cd "$PROJECT_ROOT" +fi if ! eval "$DEBBUILD_CMD"; then echo "Error: deb build failed." diff --git a/devops/build/packaging/deb/ubuntu22.04/control b/devops/build/packaging/deb/ubuntu22.04/control index 4bc5d90b84d..6b05863b780 100644 --- a/devops/build/packaging/deb/ubuntu22.04/control +++ b/devops/build/packaging/deb/ubuntu22.04/control @@ -46,6 +46,8 @@ Provides: apache-cloudberry-db Architecture: any Depends: curl, cgroup-tools, + debianutils, + hostname, iputils-ping, iproute2, keyutils, diff --git a/devops/build/packaging/deb/ubuntu22.04/rules b/devops/build/packaging/deb/ubuntu22.04/rules index cb387d209e6..463486cf03f 100755 --- a/devops/build/packaging/deb/ubuntu22.04/rules +++ b/devops/build/packaging/deb/ubuntu22.04/rules @@ -19,7 +19,22 @@ include /usr/share/dpkg/default.mk dh $@ --parallel gpinstall: - make install DESTDIR=${DEBIAN_DESTINATION} prefix= + # If the build staging directory is empty, copy from the pre-installed location. + # In CI, BUILD_DESTINATION already points here so it will be populated. + # For local manual packaging, copy from the installed Cloudberry path. + @mkdir -p ${DEBIAN_DESTINATION} + @if [ -z "$$(ls -A ${DEBIAN_DESTINATION} 2>/dev/null)" ]; then \ + echo "Copying pre-built binaries from ${CBDB_BIN_PATH} to ${DEBIAN_DESTINATION}..."; \ + cp -a ${CBDB_BIN_PATH}/* ${DEBIAN_DESTINATION}/; \ + else \ + echo "Build staging directory already populated, skipping copy."; \ + fi + # Copy Apache compliance files into the build staging directory + cp -a LICENSE NOTICE DISCLAIMER ${DEBIAN_DESTINATION}/ + cp -a licenses ${DEBIAN_DESTINATION}/ + # Create debian/copyright for Debian policy compliance + mkdir -p $(shell pwd)/debian + cat LICENSE NOTICE > $(shell pwd)/debian/copyright override_dh_auto_install: gpinstall # the staging directory for creating a debian is NOT the right GPHOME. diff --git a/devops/build/packaging/deb/ubuntu24.04/control b/devops/build/packaging/deb/ubuntu24.04/control index a561d8a4386..9e2c3eab451 100644 --- a/devops/build/packaging/deb/ubuntu24.04/control +++ b/devops/build/packaging/deb/ubuntu24.04/control @@ -46,6 +46,8 @@ Provides: apache-cloudberry-db Architecture: amd64 Depends: curl, cgroup-tools, + debianutils, + hostname, iputils-ping, iproute2, keyutils, diff --git a/devops/build/packaging/rpm/apache-cloudberry-db-incubating.spec b/devops/build/packaging/rpm/apache-cloudberry-db-incubating.spec index 03fa0a34570..e228f8fe76a 100644 --- a/devops/build/packaging/rpm/apache-cloudberry-db-incubating.spec +++ b/devops/build/packaging/rpm/apache-cloudberry-db-incubating.spec @@ -52,12 +52,15 @@ Prefix: %{cloudberry_install_dir} # List runtime dependencies Requires: bash +Requires: hostname Requires: iproute Requires: iputils +Requires: less Requires: openssh Requires: openssh-clients Requires: openssh-server Requires: rsync +Requires: which %if 0%{?rhel} == 8 Requires: apr @@ -152,6 +155,12 @@ mkdir -p %{buildroot}%{cloudberry_install_dir}-%{version} cp -R %{cloudberry_install_dir}/* %{buildroot}%{cloudberry_install_dir}-%{version} +# Copy Apache mandatory compliance files from the SOURCES directory into the installation directory +cp %{_sourcedir}/LICENSE %{buildroot}%{cloudberry_install_dir}-%{version}/ +cp %{_sourcedir}/NOTICE %{buildroot}%{cloudberry_install_dir}-%{version}/ +cp %{_sourcedir}/DISCLAIMER %{buildroot}%{cloudberry_install_dir}-%{version}/ +cp -R %{_sourcedir}/licenses %{buildroot}%{cloudberry_install_dir}-%{version}/ + # Create the symbolic link ln -sfn %{cloudberry_install_dir}-%{version} %{buildroot}%{cloudberry_install_dir} @@ -159,8 +168,6 @@ ln -sfn %{cloudberry_install_dir}-%{version} %{buildroot}%{cloudberry_install_di %{prefix}-%{version} %{prefix} -%license %{cloudberry_install_dir}-%{version}/LICENSE - %debug_package %post diff --git a/devops/build/packaging/rpm/build-rpm.sh b/devops/build/packaging/rpm/build-rpm.sh index ceb7d18d392..2c490166f45 100755 --- a/devops/build/packaging/rpm/build-rpm.sh +++ b/devops/build/packaging/rpm/build-rpm.sh @@ -118,10 +118,46 @@ fi # Check if required commands are available check_commands -# Define the spec file path +# Define the source spec file path (assuming it is in the same directory as the script) +SOURCE_SPEC_FILE="$(dirname "$0")/apache-cloudberry-db-incubating.spec" + +# Ensure rpmbuild SPECS and SOURCES directories exist +mkdir -p ~/rpmbuild/SPECS +mkdir -p ~/rpmbuild/SOURCES + +# Find project root (assumed to be four levels up from scripts directory: devops/build/packaging/rpm/) +PROJECT_ROOT="$(cd "$(dirname "$0")/../../../../" && pwd)" + +# Define the target spec file path SPEC_FILE=~/rpmbuild/SPECS/apache-cloudberry-db-incubating.spec -# Check if the spec file exists +# Copy the spec file to rpmbuild/SPECS if the source exists and is different +if [ -f "$SOURCE_SPEC_FILE" ]; then + # Avoid copying if SPEC_FILE is already a symlink/file pointing to SOURCE_SPEC_FILE (common in CI) + if [ ! "$SOURCE_SPEC_FILE" -ef "$SPEC_FILE" ]; then + cp -f "$SOURCE_SPEC_FILE" "$SPEC_FILE" + fi +else + echo "Warning: Source spec file not found at $SOURCE_SPEC_FILE, assuming it is already in ~/rpmbuild/SPECS/" +fi + +# Copy Apache mandatory compliance files to rpmbuild/SOURCES +echo "Copying compliance files from $PROJECT_ROOT to ~/rpmbuild/SOURCES..." +for f in LICENSE NOTICE DISCLAIMER; do + if [ -f "$PROJECT_ROOT/$f" ]; then + cp -af "$PROJECT_ROOT/$f" ~/rpmbuild/SOURCES/ + else + echo "Warning: $f not found in $PROJECT_ROOT" + fi +done + +if [ -d "$PROJECT_ROOT/licenses" ]; then + cp -af "$PROJECT_ROOT/licenses" ~/rpmbuild/SOURCES/ +else + echo "Warning: licenses directory not found in $PROJECT_ROOT" +fi + +# Check if the spec file exists at the target location before proceeding if [ ! -f "$SPEC_FILE" ]; then echo "Error: Spec file not found at $SPEC_FILE." exit 1 diff --git a/devops/deploy/docker/build/rocky10/Dockerfile b/devops/deploy/docker/build/rocky10/Dockerfile new file mode 100644 index 00000000000..3ab40229020 --- /dev/null +++ b/devops/deploy/docker/build/rocky10/Dockerfile @@ -0,0 +1,217 @@ +# -------------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed +# with this work for additional information regarding copyright +# ownership. The ASF licenses this file to You under the Apache +# License, Version 2.0 (the "License"); you may not use this file +# except in compliance with the License. You may obtain a copy of the +# License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. See the License for the specific language governing +# permissions and limitations under the License. +# +# -------------------------------------------------------------------- +# +# Apache Cloudberry (Incubating) is an effort undergoing incubation at +# the Apache Software Foundation (ASF), sponsored by the Apache +# Incubator PMC. +# +# Incubation is required of all newly accepted projects until a +# further review indicates that the infrastructure, communications, +# and decision making process have stabilized in a manner consistent +# with other successful ASF projects. +# +# While incubation status is not necessarily a reflection of the +# completeness or stability of the code, it does indicate that the +# project has yet to be fully endorsed by the ASF. +# +# -------------------------------------------------------------------- +# Dockerfile for Apache Cloudberry Build Environment +# -------------------------------------------------------------------- +# This Dockerfile sets up a Rocky Linux 10-based container for building +# and developing Apache Cloudberry. It installs necessary system +# utilities, development tools, and configures the environment for SSH +# access and systemd support. +# +# Key Features: +# - Locale setup for en_US.UTF-8 +# - SSH daemon setup for remote access +# - Essential development tools and libraries installation +# - User configuration for 'gpadmin' with sudo privileges +# +# Usage: +# docker build -t cloudberry-db-env . +# docker run -h cdw -it cloudberry-db-env +# -------------------------------------------------------------------- + +# Base image: Rocky Linux 10 +FROM rockylinux/rockylinux:10 + +# Argument for configuring the timezone +ARG TIMEZONE_VAR="America/Los_Angeles" + +# Environment variables for locale and user +ENV container=docker +ENV LANG=en_US.UTF-8 +ENV USER=gpadmin + +# -------------------------------------------------------------------- +# Install Development Tools and Utilities +# -------------------------------------------------------------------- +# Install various development tools, system utilities, and libraries +# required for building and running Apache Cloudberry. +# - EPEL repository is enabled for additional packages. +# - Cleanup steps are added to reduce image size after installation. +# -------------------------------------------------------------------- +RUN dnf makecache && \ + dnf install -y \ + epel-release \ + git && \ + dnf makecache && \ + dnf config-manager --disable epel && \ + dnf install -y --enablerepo=epel \ + bat \ + libssh2-devel \ + python3-devel \ + htop && \ + dnf install -y \ + bison \ + cmake3 \ + ed \ + file \ + flex \ + gcc \ + gcc-c++ \ + gdb \ + glibc-langpack-en \ + glibc-locale-source \ + initscripts \ + iproute \ + less \ + lsof \ + m4 \ + net-tools \ + openssh-clients \ + openssh-server \ + perl \ + rpm-build \ + rpmdevtools \ + rsync \ + sudo \ + tar \ + unzip \ + util-linux-ng \ + wget \ + sshpass \ + which && \ + dnf install -y \ + apr-devel \ + bzip2-devel \ + java-21-openjdk \ + java-21-openjdk-devel \ + krb5-devel \ + libcurl-devel \ + libevent-devel \ + libicu-devel \ + libxml2-devel \ + libuuid-devel \ + libzstd-devel \ + lz4 \ + lz4-devel \ + openldap-devel \ + openssl-devel \ + pam-devel \ + perl-ExtUtils-Embed \ + perl-Test-Simple \ + perl-core \ + python3-setuptools \ + readline-devel \ + zlib-devel && \ + dnf install -y --enablerepo=crb \ + liburing-devel \ + libuv-devel \ + libyaml-devel \ + perl-IPC-Run \ + python3-wheel \ + protobuf-devel && \ + dnf clean all && \ + cd && XERCES_LATEST_RELEASE=3.3.0 && \ + wget -nv "https://archive.apache.org/dist/xerces/c/3/sources/xerces-c-${XERCES_LATEST_RELEASE}.tar.gz" && \ + echo "$(curl -sL https://archive.apache.org/dist/xerces/c/3/sources/xerces-c-${XERCES_LATEST_RELEASE}.tar.gz.sha256)" | sha256sum -c - && \ + tar xf "xerces-c-${XERCES_LATEST_RELEASE}.tar.gz"; rm "xerces-c-${XERCES_LATEST_RELEASE}.tar.gz" && \ + cd xerces-c-${XERCES_LATEST_RELEASE} && \ + ./configure --prefix=/usr/local/xerces-c && \ + make -j$(nproc) && \ + make install -C ~/xerces-c-${XERCES_LATEST_RELEASE} && \ + rm -rf ~/xerces-c* && \ + cd && GO_VERSION="go1.25.10" && \ + ARCH=$(uname -m) && \ + if [ "${ARCH}" = "aarch64" ]; then \ + GO_ARCH="arm64" && \ + GO_SHA256="654da1f9b50a5d1c2a85ccf8ed405aa89c06e94d18384628bf186f7712677b08"; \ + elif [ "${ARCH}" = "x86_64" ]; then \ + GO_ARCH="amd64" && \ + GO_SHA256="42d4f7a32316aa66591eca7e89867256057a4264451aca10570a715b3637ba70"; \ + else \ + echo "Unsupported architecture: ${ARCH}" && exit 1; \ + fi && \ + GO_URL="https://go.dev/dl/${GO_VERSION}.linux-${GO_ARCH}.tar.gz" && \ + wget -nv "${GO_URL}" && \ + echo "${GO_SHA256} ${GO_VERSION}.linux-${GO_ARCH}.tar.gz" | sha256sum -c - && \ + tar xf "${GO_VERSION}.linux-${GO_ARCH}.tar.gz" && \ + mv go "/usr/local/${GO_VERSION}" && \ + ln -s "/usr/local/${GO_VERSION}" /usr/local/go && \ + rm -f "${GO_VERSION}.linux-${GO_ARCH}.tar.gz" && \ + echo 'export PATH=$PATH:/usr/local/go/bin' | tee -a /etc/profile.d/go.sh > /dev/null + +# -------------------------------------------------------------------- +# Copy Configuration Files and Setup the Environment +# -------------------------------------------------------------------- +# - Copy custom configuration files from the build context to /tmp/. +# - Apply custom system limits and timezone. +# - Create and configure the 'gpadmin' user with sudo privileges. +# - Set up SSH for password-based authentication. +# - Generate locale and set the default locale to en_US.UTF-8. +# -------------------------------------------------------------------- + +# Copy configuration files from their respective locations +COPY ./configs/* /tmp/ + +RUN cp /tmp/90-cbdb-limits /etc/security/limits.d/90-cbdb-limits && \ + sed -i.bak -r 's/^(session\s+required\s+pam_limits.so)/#\1/' /etc/pam.d/* && \ + cat /usr/share/zoneinfo/${TIMEZONE_VAR} > /etc/localtime && \ + chmod 777 /tmp/init_system.sh && \ + /usr/sbin/groupadd gpadmin && \ + /usr/sbin/useradd gpadmin -g gpadmin -G wheel && \ + setcap cap_net_raw+ep /usr/bin/ping && \ + echo 'gpadmin ALL=(ALL) NOPASSWD: ALL' > /etc/sudoers.d/90-gpadmin && \ + echo -e '\n# Add Cloudberry entries\nif [ -f /usr/local/cbdb/cloudberry-env.sh ]; then\n source /usr/local/cbdb/cloudberry-env.sh\nfi' >> /home/gpadmin/.bashrc && \ + ssh-keygen -A && \ + echo "PasswordAuthentication yes" >> /etc/ssh/sshd_config && \ + localedef -i en_US -f UTF-8 en_US.UTF-8 && \ + echo "LANG=en_US.UTF-8" | tee /etc/locale.conf && \ + dnf clean all # Final cleanup to remove unnecessary files + +# Install testinfra via pip +RUN pip3 install pytest-testinfra + +# Copying test files into the container +COPY ./tests /tests + +# -------------------------------------------------------------------- +# Set the Default User and Command +# -------------------------------------------------------------------- +# The default user is set to 'gpadmin', and the container starts by +# running the init_system.sh script. The container also mounts the +# /sys/fs/cgroup volume for systemd compatibility. +# -------------------------------------------------------------------- +USER gpadmin + +VOLUME [ "/sys/fs/cgroup" ] +CMD ["bash","-c","/tmp/init_system.sh"] diff --git a/devops/deploy/docker/build/rocky10/configs/90-cbdb-limits b/devops/deploy/docker/build/rocky10/configs/90-cbdb-limits new file mode 100644 index 00000000000..474957c42f6 --- /dev/null +++ b/devops/deploy/docker/build/rocky10/configs/90-cbdb-limits @@ -0,0 +1,32 @@ +# /etc/security/limits.d/90-db-limits +# -------------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed +# with this work for additional information regarding copyright +# ownership. The ASF licenses this file to You under the Apache +# License, Version 2.0 (the "License"); you may not use this file +# except in compliance with the License. You may obtain a copy of the +# License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. See the License for the specific language governing +# permissions and limitations under the License. +# +# -------------------------------------------------------------------- + +# Core dump file size limits for gpadmin +gpadmin soft core unlimited +gpadmin hard core unlimited + +# Open file limits for gpadmin +gpadmin soft nofile 524288 +gpadmin hard nofile 524288 + +# Process limits for gpadmin +gpadmin soft nproc 131072 +gpadmin hard nproc 131072 diff --git a/devops/deploy/docker/build/rocky10/configs/gpinitsystem.conf b/devops/deploy/docker/build/rocky10/configs/gpinitsystem.conf new file mode 100644 index 00000000000..d4d312231c5 --- /dev/null +++ b/devops/deploy/docker/build/rocky10/configs/gpinitsystem.conf @@ -0,0 +1,89 @@ +# -------------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed +# with this work for additional information regarding copyright +# ownership. The ASF licenses this file to You under the Apache +# License, Version 2.0 (the "License"); you may not use this file +# except in compliance with the License. You may obtain a copy of the +# License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. See the License for the specific language governing +# permissions and limitations under the License. +# +# -------------------------------------------------------------------- + +# -------------------------------------------------------------------- +# gpinitsystem Configuration File for Apache Cloudberry +# -------------------------------------------------------------------- +# This configuration file is used to initialize an Apache Cloudberry +# cluster. It defines the settings for the coordinator, primary segments, +# and mirrors, as well as other important configuration options. +# -------------------------------------------------------------------- + +# Segment prefix - This prefix is used for naming the segment directories. +# For example, the primary segment directories will be named gpseg0, gpseg1, etc. +SEG_PREFIX=gpseg + +# Coordinator port - The port number where the coordinator will listen. +# This is the port used by clients to connect to the database. +COORDINATOR_PORT=5432 + +# Coordinator hostname - The hostname of the machine where the coordinator +# will be running. The $(hostname) command will automatically insert the +# hostname of the current machine. +COORDINATOR_HOSTNAME=$(hostname) + +# Coordinator data directory - The directory where the coordinator's data +# will be stored. This directory should have enough space to store metadata +# and system catalogs. +COORDINATOR_DIRECTORY=/data1/coordinator + +# Base port for primary segments - The starting port number for the primary +# segments. Each primary segment will use a unique port number starting from +# this base. +PORT_BASE=6000 + +# Primary segment data directories - An array specifying the directories where +# the primary segment data will be stored. Each directory corresponds to a +# primary segment. In this case, two primary segments will be created in the +# same directory. +declare -a DATA_DIRECTORY=(/data1/primary /data1/primary) + +# Base port for mirror segments - The starting port number for the mirror +# segments. Each mirror segment will use a unique port number starting from +# this base. +MIRROR_PORT_BASE=7000 + +# Mirror segment data directories - An array specifying the directories where +# the mirror segment data will be stored. Each directory corresponds to a +# mirror segment. In this case, two mirror segments will be created in the +# same directory. +declare -a MIRROR_DATA_DIRECTORY=(/data1/mirror /data1/mirror) + +# Trusted shell - The shell program used for remote execution. Cloudberry uses +# SSH to run commands on other machines in the cluster. 'ssh' is the default. +TRUSTED_SHELL=ssh + +# Database encoding - The character set encoding to be used by the database. +# 'UNICODE' is a common choice, especially for internationalization. +ENCODING=UNICODE + +# Default database name - The name of the default database to be created during +# initialization. This is also the default database that the gpadmin user will +# connect to. +DATABASE_NAME=gpadmin + +# Machine list file - A file containing the list of hostnames where the primary +# segments will be created. Each line in the file represents a different machine. +# This file is critical for setting up the cluster across multiple nodes. +MACHINE_LIST_FILE=/home/gpadmin/hostfile_gpinitsystem + +# -------------------------------------------------------------------- +# End of gpinitsystem Configuration File +# -------------------------------------------------------------------- diff --git a/devops/deploy/docker/build/rocky10/configs/init_system.sh b/devops/deploy/docker/build/rocky10/configs/init_system.sh new file mode 100755 index 00000000000..d8c4a00b035 --- /dev/null +++ b/devops/deploy/docker/build/rocky10/configs/init_system.sh @@ -0,0 +1,192 @@ +#!/bin/bash +# -------------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed +# with this work for additional information regarding copyright +# ownership. The ASF licenses this file to You under the Apache +# License, Version 2.0 (the "License"); you may not use this file +# except in compliance with the License. You may obtain a copy of the +# License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. See the License for the specific language governing +# permissions and limitations under the License. +# +# -------------------------------------------------------------------- +## Container Initialization Script +# -------------------------------------------------------------------- +## This script sets up the environment inside the Docker container for +## the Apache Cloudberry Build Environment. It performs the following +## tasks: +## +## 1. Verifies that the container is running with the expected hostname. +## 2. Starts the SSH daemon to allow SSH access to the container. +## 3. Configures passwordless SSH access for the 'gpadmin' user. +## 4. Displays a welcome banner and system information. +## 5. Starts an interactive bash shell. +## +## This script is intended to be used as an entrypoint or initialization +## script for the Docker container. +# -------------------------------------------------------------------- + +# -------------------------------------------------------------------- +# Check if the hostname is 'cdw' +# -------------------------------------------------------------------- +# The script checks if the container's hostname is set to 'cdw'. This is +# a requirement for this environment, and if the hostname does not match, +# the script will exit with an error message. This ensures consistency +# across different environments. +# -------------------------------------------------------------------- +if [ "$(hostname)" != "cdw" ]; then + echo "Error: This container must be run with the hostname 'cdw'." + echo "Use the following command: docker run -h cdw ..." + exit 1 +fi + +# -------------------------------------------------------------------- +# Start SSH daemon and setup for SSH access +# -------------------------------------------------------------------- +# The SSH daemon is started to allow remote access to the container via +# SSH. This is useful for development and debugging purposes. If the SSH +# daemon fails to start, the script exits with an error. +# -------------------------------------------------------------------- +if ! sudo /usr/sbin/sshd; then + echo "Failed to start SSH daemon" >&2 + exit 1 +fi + +# -------------------------------------------------------------------- +# Remove /run/nologin to allow logins +# -------------------------------------------------------------------- +# The /run/nologin file, if present, prevents users from logging into +# the system. This file is removed to ensure that users can log in via SSH. +# -------------------------------------------------------------------- +sudo rm -rf /run/nologin + +# -------------------------------------------------------------------- +# Configure passwordless SSH access for 'gpadmin' user +# -------------------------------------------------------------------- +# The script sets up SSH key-based authentication for the 'gpadmin' user, +# allowing passwordless SSH access. It generates a new SSH key pair if one +# does not already exist, and configures the necessary permissions. +# -------------------------------------------------------------------- +mkdir -p /home/gpadmin/.ssh +chmod 700 /home/gpadmin/.ssh + +if [ ! -f /home/gpadmin/.ssh/id_rsa ]; then + ssh-keygen -t rsa -b 4096 -C gpadmin -f /home/gpadmin/.ssh/id_rsa -P "" > /dev/null 2>&1 +fi + +cat /home/gpadmin/.ssh/id_rsa.pub >> /home/gpadmin/.ssh/authorized_keys +chmod 600 /home/gpadmin/.ssh/authorized_keys + +# Add the container's hostname to the known_hosts file to avoid SSH warnings +ssh-keyscan -t rsa cdw > /home/gpadmin/.ssh/known_hosts 2>/dev/null + +# Change to the home directory of the current user +cd $HOME + +# -------------------------------------------------------------------- +# Display a Welcome Banner +# -------------------------------------------------------------------- +# The following ASCII art and welcome message are displayed when the +# container starts. This banner provides a visual indication that the +# container is running in the Apache Cloudberry Build Environment. +# -------------------------------------------------------------------- +cat <<-'EOF' + +====================================================================== + + ++++++++++ ++++++ + ++++++++++++++ +++++++ + ++++ +++++ ++++ + ++++ +++++++++ + =+==== =============+ + ======== =====+ ===== + ==== ==== ==== ==== + ==== === === ==== + ==== === === ==== + ==== === ==-- === + ===== ===== -- ==== + ===================== ====== + ============================ + =-----= + ____ _ _ _ + / ___|| | ___ _ _ __| || |__ ___ _ __ _ __ _ _ + | | | | / _ \ | | | | / _` || '_ \ / _ \| '__|| '__|| | | | + | |___ | || (_) || |_| || (_| || |_) || __/| | | | | |_| | + \____||_| \____ \__,_| \__,_||_.__/ \___||_| |_| \__, | + |___/ +---------------------------------------------------------------------- + +EOF + +# -------------------------------------------------------------------- +# Display System Information +# -------------------------------------------------------------------- +# The script sources the /etc/os-release file to retrieve the operating +# system name and version. It then displays the following information: +# - OS name and version +# - Current user +# - Container hostname +# - IP address +# - CPU model name and number of cores +# - Total memory available +# This information is useful for users to understand the environment they +# are working in. +# -------------------------------------------------------------------- +source /etc/os-release + +# First, create the CPU info detection function +get_cpu_info() { + ARCH=$(uname -m) + if [ "$ARCH" = "x86_64" ]; then + lscpu | grep 'Model name:' | awk '{print substr($0, index($0,$3))}' + elif [ "$ARCH" = "aarch64" ]; then + VENDOR=$(lscpu | grep 'Vendor ID:' | awk '{print $3}') + if [ "$VENDOR" = "Apple" ] || [ "$VENDOR" = "0x61" ]; then + echo "Apple Silicon ($ARCH)" + else + if [ -f /proc/cpuinfo ]; then + IMPL=$(grep "CPU implementer" /proc/cpuinfo | head -1 | awk '{print $3}') + PART=$(grep "CPU part" /proc/cpuinfo | head -1 | awk '{print $3}') + if [ ! -z "$IMPL" ] && [ ! -z "$PART" ]; then + echo "ARM $ARCH (Implementer: $IMPL, Part: $PART)" + else + echo "ARM $ARCH" + fi + else + echo "ARM $ARCH" + fi + fi + else + echo "Unknown architecture: $ARCH" + fi +} + +cat <<-EOF +Welcome to the Apache Cloudberry Build Environment! + +Container OS ........ : $NAME $VERSION +User ................ : $(whoami) +Container hostname .. : $(hostname) +IP Address .......... : $(hostname -I | awk '{print $1}') +CPU Info ............ : $(get_cpu_info) +CPU(s) .............. : $(nproc) +Memory .............. : $(free -h | grep Mem: | awk '{print $2}') total +====================================================================== + +EOF + +# -------------------------------------------------------------------- +# Start an interactive bash shell +# -------------------------------------------------------------------- +# Finally, the script starts an interactive bash shell to keep the +# container running and allow the user to interact with the environment. +# -------------------------------------------------------------------- +/bin/bash diff --git a/devops/deploy/docker/build/rocky10/tests/requirements.txt b/devops/deploy/docker/build/rocky10/tests/requirements.txt new file mode 100644 index 00000000000..b9711eddac5 --- /dev/null +++ b/devops/deploy/docker/build/rocky10/tests/requirements.txt @@ -0,0 +1,3 @@ +testinfra +pytest-testinfra +paramiko diff --git a/devops/deploy/docker/build/rocky10/tests/testinfra/test_cloudberry_db_env.py b/devops/deploy/docker/build/rocky10/tests/testinfra/test_cloudberry_db_env.py new file mode 100644 index 00000000000..445318f5335 --- /dev/null +++ b/devops/deploy/docker/build/rocky10/tests/testinfra/test_cloudberry_db_env.py @@ -0,0 +1,127 @@ +# -------------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed +# with this work for additional information regarding copyright +# ownership. The ASF licenses this file to You under the Apache +# License, Version 2.0 (the "License"); you may not use this file +# except in compliance with the License. You may obtain a copy of the +# License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. See the License for the specific language governing +# permissions and limitations under the License. +# +# -------------------------------------------------------------------- + +import testinfra + +def test_installed_packages(host): + """ + Test if the essential packages are installed. + """ + packages = [ + "epel-release", + "git", + "bat", + "htop", + "bison", + "cmake", + "gcc", + "gcc-c++", + "glibc-langpack-en", + "glibc-locale-source", + "openssh-clients", + "openssh-server", + "sudo", + "rsync", + "wget", + "openssl-devel", + "python3-devel", + "readline-devel", + "zlib-ng-compat-devel", + "libcurl-devel", + "libevent-devel", + "libxml2-devel", + "libuuid-devel", + "libzstd-devel", + "lz4", + "openldap-devel", + "libuv-devel", + "libyaml-devel" + ] + for package in packages: + pkg = host.package(package) + assert pkg.is_installed + + +def test_user_gpadmin_exists(host): + """ + Test if the gpadmin user exists and is configured properly. + """ + user = host.user("gpadmin") + assert user.exists + assert "wheel" in user.groups + + +def test_ssh_service(host): + """ + Test if SSH service is configured correctly. + """ + sshd_config = host.file("/etc/ssh/sshd_config") + assert sshd_config.exists + + +def test_locale_configured(host): + """ + Test if the locale is configured correctly. + """ + locale_conf = host.file("/etc/locale.conf") + assert locale_conf.exists + assert locale_conf.contains("LANG=en_US.UTF-8") + + +def test_timezone(host): + """ + Test if the timezone is configured correctly. + """ + localtime = host.file("/etc/localtime") + assert localtime.exists + + +def test_system_limits_configured(host): + """ + Test if the custom system limits are applied. + """ + limits_file = host.file("/etc/security/limits.d/90-cbdb-limits") + assert limits_file.exists + + +def test_init_system_script(host): + """ + Test if the init_system.sh script is present and executable. + """ + script = host.file("/tmp/init_system.sh") + assert script.exists + assert script.mode == 0o777 + + +def test_custom_configuration_files(host): + """ + Test if custom configuration files are correctly copied. + """ + config_file = host.file("/tmp/90-cbdb-limits") + assert config_file.exists + + +def test_locale_generated(host): + """ + Test if the en_US.UTF-8 locale is correctly generated. + """ + locale = host.run("locale -a | grep en_US.utf8") + assert locale.exit_status == 0 + assert "en_US.utf8" in locale.stdout diff --git a/devops/deploy/docker/build/rocky8/Dockerfile b/devops/deploy/docker/build/rocky8/Dockerfile index 45d6706e593..eed55e4b171 100644 --- a/devops/deploy/docker/build/rocky8/Dockerfile +++ b/devops/deploy/docker/build/rocky8/Dockerfile @@ -99,6 +99,7 @@ RUN dnf makecache && \ krb5-devel \ libcurl-devel \ libevent-devel \ + libicu-devel \ libssh2-devel \ libuuid-devel \ libxml2-devel \ @@ -150,14 +151,14 @@ RUN dnf makecache && \ make -j$(nproc) && \ make install -C ~/xerces-c-${XERCES_LATEST_RELEASE} && \ rm -rf ~/xerces-c* && \ - cd && GO_VERSION="go1.23.4" && \ + cd && GO_VERSION="go1.25.10" && \ ARCH=$(uname -m) && \ if [ "${ARCH}" = "aarch64" ]; then \ GO_ARCH="arm64" && \ - GO_SHA256="16e5017863a7f6071363782b1b8042eb12c6ca4f4cd71528b2123f0a1275b13e"; \ + GO_SHA256="654da1f9b50a5d1c2a85ccf8ed405aa89c06e94d18384628bf186f7712677b08"; \ elif [ "${ARCH}" = "x86_64" ]; then \ GO_ARCH="amd64" && \ - GO_SHA256="6924efde5de86fe277676e929dc9917d466efa02fb934197bc2eba35d5680971"; \ + GO_SHA256="42d4f7a32316aa66591eca7e89867256057a4264451aca10570a715b3637ba70"; \ else \ echo "Unsupported architecture: ${ARCH}" && exit 1; \ fi && \ diff --git a/devops/deploy/docker/build/rocky9/Dockerfile b/devops/deploy/docker/build/rocky9/Dockerfile index 26190109ef0..592d9b2e10a 100644 --- a/devops/deploy/docker/build/rocky9/Dockerfile +++ b/devops/deploy/docker/build/rocky9/Dockerfile @@ -120,6 +120,7 @@ RUN dnf makecache && \ libcurl-devel \ libssh2-devel \ libevent-devel \ + libicu-devel \ libxml2-devel \ libuuid-devel \ libzstd-devel \ @@ -151,14 +152,14 @@ RUN dnf makecache && \ make -j$(nproc) && \ make install -C ~/xerces-c-${XERCES_LATEST_RELEASE} && \ rm -rf ~/xerces-c* && \ - cd && GO_VERSION="go1.23.4" && \ + cd && GO_VERSION="go1.25.10" && \ ARCH=$(uname -m) && \ if [ "${ARCH}" = "aarch64" ]; then \ GO_ARCH="arm64" && \ - GO_SHA256="16e5017863a7f6071363782b1b8042eb12c6ca4f4cd71528b2123f0a1275b13e"; \ + GO_SHA256="654da1f9b50a5d1c2a85ccf8ed405aa89c06e94d18384628bf186f7712677b08"; \ elif [ "${ARCH}" = "x86_64" ]; then \ GO_ARCH="amd64" && \ - GO_SHA256="6924efde5de86fe277676e929dc9917d466efa02fb934197bc2eba35d5680971"; \ + GO_SHA256="42d4f7a32316aa66591eca7e89867256057a4264451aca10570a715b3637ba70"; \ else \ echo "Unsupported architecture: ${ARCH}" && exit 1; \ fi && \ diff --git a/devops/deploy/docker/build/ubuntu22.04/Dockerfile b/devops/deploy/docker/build/ubuntu22.04/Dockerfile index 3023a9fce67..d32467338bd 100644 --- a/devops/deploy/docker/build/ubuntu22.04/Dockerfile +++ b/devops/deploy/docker/build/ubuntu22.04/Dockerfile @@ -144,14 +144,14 @@ RUN apt-get update && \ quilt \ unzip && \ apt-get clean && rm -rf /var/lib/apt/lists/* && \ - cd && GO_VERSION="go1.23.4" && \ + cd && GO_VERSION="go1.25.10" && \ ARCH=$(uname -m) && \ if [ "${ARCH}" = "aarch64" ]; then \ GO_ARCH="arm64" && \ - GO_SHA256="16e5017863a7f6071363782b1b8042eb12c6ca4f4cd71528b2123f0a1275b13e"; \ + GO_SHA256="654da1f9b50a5d1c2a85ccf8ed405aa89c06e94d18384628bf186f7712677b08"; \ elif [ "${ARCH}" = "x86_64" ]; then \ GO_ARCH="amd64" && \ - GO_SHA256="6924efde5de86fe277676e929dc9917d466efa02fb934197bc2eba35d5680971"; \ + GO_SHA256="42d4f7a32316aa66591eca7e89867256057a4264451aca10570a715b3637ba70"; \ else \ echo "Unsupported architecture: ${ARCH}" && exit 1; \ fi && \ diff --git a/devops/deploy/docker/build/ubuntu24.04/Dockerfile b/devops/deploy/docker/build/ubuntu24.04/Dockerfile index c4f4e646720..83855bcaf68 100644 --- a/devops/deploy/docker/build/ubuntu24.04/Dockerfile +++ b/devops/deploy/docker/build/ubuntu24.04/Dockerfile @@ -144,14 +144,14 @@ RUN apt-get update && \ quilt \ unzip && \ apt-get clean && rm -rf /var/lib/apt/lists/* && \ - cd && GO_VERSION="go1.23.4" && \ + cd && GO_VERSION="go1.25.10" && \ ARCH=$(uname -m) && \ if [ "${ARCH}" = "aarch64" ]; then \ GO_ARCH="arm64" && \ - GO_SHA256="16e5017863a7f6071363782b1b8042eb12c6ca4f4cd71528b2123f0a1275b13e"; \ + GO_SHA256="654da1f9b50a5d1c2a85ccf8ed405aa89c06e94d18384628bf186f7712677b08"; \ elif [ "${ARCH}" = "x86_64" ]; then \ GO_ARCH="amd64" && \ - GO_SHA256="6924efde5de86fe277676e929dc9917d466efa02fb934197bc2eba35d5680971"; \ + GO_SHA256="42d4f7a32316aa66591eca7e89867256057a4264451aca10570a715b3637ba70"; \ else \ echo "Unsupported architecture: ${ARCH}" && exit 1; \ fi && \ diff --git a/devops/deploy/docker/test/rocky10/Dockerfile b/devops/deploy/docker/test/rocky10/Dockerfile new file mode 100644 index 00000000000..ec6b268f708 --- /dev/null +++ b/devops/deploy/docker/test/rocky10/Dockerfile @@ -0,0 +1,135 @@ +# -------------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed +# with this work for additional information regarding copyright +# ownership. The ASF licenses this file to You under the Apache +# License, Version 2.0 (the "License"); you may not use this file +# except in compliance with the License. You may obtain a copy of the +# License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. See the License for the specific language governing +# permissions and limitations under the License. +# +# -------------------------------------------------------------------- +# +# Apache Cloudberry (Incubating) is an effort undergoing incubation at +# the Apache Software Foundation (ASF), sponsored by the Apache +# Incubator PMC. +# +# Incubation is required of all newly accepted projects until a +# further review indicates that the infrastructure, communications, +# and decision making process have stabilized in a manner consistent +# with other successful ASF projects. +# +# While incubation status is not necessarily a reflection of the +# completeness or stability of the code, it does indicate that the +# project has yet to be fully endorsed by the ASF. +# +# -------------------------------------------------------------------- +# Dockerfile for Apache Cloudberry Base Environment +# -------------------------------------------------------------------- +# This Dockerfile sets up a Rocky Linux 10-based container to serve as +# a base environment for evaluating the Apache Cloudberry. It installs +# necessary system utilities, configures the environment for SSH access, +# and sets up a 'gpadmin' user with sudo privileges. The Cloudberry +# Database RPM can be installed into this container for testing and +# functional verification. +# +# Key Features: +# - Locale setup for en_US.UTF-8 +# - SSH daemon setup for remote access +# - Essential system utilities installation +# - Separate user creation and configuration steps +# +# Security Considerations: +# - This Dockerfile prioritizes ease of use for functional testing and +# evaluation. It includes configurations such as passwordless sudo access +# for the 'gpadmin' user and SSH access with password authentication. +# - These configurations are suitable for testing and development but +# should NOT be used in a production environment due to potential security +# risks. +# +# Usage: +# docker build -t cloudberry-db-base-env . +# docker run -h cdw -it cloudberry-db-base-env +# -------------------------------------------------------------------- + +# Base image: Rocky Linux 10 +FROM rockylinux/rockylinux:10 + +# Argument for configuring the timezone +ARG TIMEZONE_VAR="America/Los_Angeles" + +# Environment variables for locale +ENV LANG=en_US.UTF-8 + +# -------------------------------------------------------------------- +# System Update and Installation +# -------------------------------------------------------------------- +# Update the system and install essential system utilities required for +# running and testing Apache Cloudberry. Cleanup the DNF cache afterward +# to reduce the image size. +# -------------------------------------------------------------------- +RUN dnf install -y \ + file \ + gdb \ + glibc-locale-source \ + make \ + openssh \ + openssh-clients \ + openssh-server \ + procps-ng \ + sudo \ + which \ + && \ + dnf clean all # Clean up DNF cache after package installations + +# -------------------------------------------------------------------- +# User Creation and Configuration +# -------------------------------------------------------------------- +# - Create the 'gpadmin' user and group. +# - Configure the 'gpadmin' user with passwordless sudo privileges. +# - Add Cloudberry-specific entries to the gpadmin's .bashrc. +# -------------------------------------------------------------------- +RUN /usr/sbin/groupadd gpadmin && \ + /usr/sbin/useradd gpadmin -g gpadmin -G wheel && \ + echo 'gpadmin ALL=(ALL) NOPASSWD: ALL' > /etc/sudoers.d/90-gpadmin && \ + echo -e '\n# Add Cloudberry entries\nif [ -f /usr/local/cloudberry/cloudberry-env.sh ]; then\n source /usr/local/cloudberry/cloudberry-env.sh\n export COORDINATOR_DATA_DIRECTORY=/data1/coordinator/gpseg-1\nfi' >> /home/gpadmin/.bashrc + +# -------------------------------------------------------------------- +# Copy Configuration Files and Setup the Environment +# -------------------------------------------------------------------- +# - Copy custom configuration files from the build context to /tmp/. +# - Apply custom system limits and timezone. +# - Set up SSH for password-based authentication. +# - Generate locale and set the default locale to en_US.UTF-8. +# -------------------------------------------------------------------- +COPY ./configs/* /tmp/ + +RUN cp /tmp/90-cbdb-limits /etc/security/limits.d/90-cbdb-limits && \ + sed -i.bak -r 's/^(session\s+required\s+pam_limits.so)/#\1/' /etc/pam.d/* && \ + cat /usr/share/zoneinfo/${TIMEZONE_VAR} > /etc/localtime && \ + chmod 777 /tmp/init_system.sh && \ + setcap cap_net_raw+ep /usr/bin/ping && \ + ssh-keygen -A && \ + echo "PasswordAuthentication yes" >> /etc/ssh/sshd_config && \ + localedef -i en_US -f UTF-8 en_US.UTF-8 && \ + echo "LANG=en_US.UTF-8" | tee /etc/locale.conf + +# -------------------------------------------------------------------- +# Set the Default User and Command +# -------------------------------------------------------------------- +# The default user is set to 'gpadmin', and the container starts by +# running the init_system.sh script. This container serves as a base +# environment, and the Apache Cloudberry RPM can be installed for +# testing and functional verification. +# -------------------------------------------------------------------- +USER gpadmin + +CMD ["bash","-c","/tmp/init_system.sh"] diff --git a/devops/deploy/docker/test/rocky10/configs/90-cbdb-limits b/devops/deploy/docker/test/rocky10/configs/90-cbdb-limits new file mode 100644 index 00000000000..474957c42f6 --- /dev/null +++ b/devops/deploy/docker/test/rocky10/configs/90-cbdb-limits @@ -0,0 +1,32 @@ +# /etc/security/limits.d/90-db-limits +# -------------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed +# with this work for additional information regarding copyright +# ownership. The ASF licenses this file to You under the Apache +# License, Version 2.0 (the "License"); you may not use this file +# except in compliance with the License. You may obtain a copy of the +# License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. See the License for the specific language governing +# permissions and limitations under the License. +# +# -------------------------------------------------------------------- + +# Core dump file size limits for gpadmin +gpadmin soft core unlimited +gpadmin hard core unlimited + +# Open file limits for gpadmin +gpadmin soft nofile 524288 +gpadmin hard nofile 524288 + +# Process limits for gpadmin +gpadmin soft nproc 131072 +gpadmin hard nproc 131072 diff --git a/devops/deploy/docker/test/rocky10/configs/gpinitsystem.conf b/devops/deploy/docker/test/rocky10/configs/gpinitsystem.conf new file mode 100644 index 00000000000..3dcd5a99365 --- /dev/null +++ b/devops/deploy/docker/test/rocky10/configs/gpinitsystem.conf @@ -0,0 +1,87 @@ +# -------------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed +# with this work for additional information regarding copyright +# ownership. The ASF licenses this file to You under the Apache +# License, Version 2.0 (the "License"); you may not use this file +# except in compliance with the License. You may obtain a copy of the +# License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. See the License for the specific language governing +# permissions and limitations under the License. +# +# ---------------------------------------------------------------------- +# gpinitsystem Configuration File for Apache Cloudberry +# ---------------------------------------------------------------------- +# This configuration file is used to initialize an Apache Cloudberry +# cluster. It defines the settings for the coordinator, primary segments, +# and mirrors, as well as other important configuration options. +# ---------------------------------------------------------------------- + +# Segment prefix - This prefix is used for naming the segment directories. +# For example, the primary segment directories will be named gpseg0, gpseg1, etc. +SEG_PREFIX=gpseg + +# Coordinator port - The port number where the coordinator will listen. +# This is the port used by clients to connect to the database. +COORDINATOR_PORT=5432 + +# Coordinator hostname - The hostname of the machine where the coordinator +# will be running. The $(hostname) command will automatically insert the +# hostname of the current machine. +COORDINATOR_HOSTNAME=$(hostname) + +# Coordinator data directory - The directory where the coordinator's data +# will be stored. This directory should have enough space to store metadata +# and system catalogs. +COORDINATOR_DIRECTORY=/data1/coordinator + +# Base port for primary segments - The starting port number for the primary +# segments. Each primary segment will use a unique port number starting from +# this base. +PORT_BASE=6000 + +# Primary segment data directories - An array specifying the directories where +# the primary segment data will be stored. Each directory corresponds to a +# primary segment. In this case, two primary segments will be created in the +# same directory. +declare -a DATA_DIRECTORY=(/data1/primary /data1/primary) + +# Base port for mirror segments - The starting port number for the mirror +# segments. Each mirror segment will use a unique port number starting from +# this base. +MIRROR_PORT_BASE=7000 + +# Mirror segment data directories - An array specifying the directories where +# the mirror segment data will be stored. Each directory corresponds to a +# mirror segment. In this case, two mirror segments will be created in the +# same directory. +declare -a MIRROR_DATA_DIRECTORY=(/data1/mirror /data1/mirror) + +# Trusted shell - The shell program used for remote execution. Cloudberry uses +# SSH to run commands on other machines in the cluster. 'ssh' is the default. +TRUSTED_SHELL=ssh + +# Database encoding - The character set encoding to be used by the database. +# 'UNICODE' is a common choice, especially for internationalization. +ENCODING=UNICODE + +# Default database name - The name of the default database to be created during +# initialization. This is also the default database that the gpadmin user will +# connect to. +DATABASE_NAME=gpadmin + +# Machine list file - A file containing the list of hostnames where the primary +# segments will be created. Each line in the file represents a different machine. +# This file is critical for setting up the cluster across multiple nodes. +MACHINE_LIST_FILE=/home/gpadmin/hostfile_gpinitsystem + +# ---------------------------------------------------------------------- +# End of gpinitsystem Configuration File +# ---------------------------------------------------------------------- diff --git a/devops/deploy/docker/test/rocky10/configs/init_system.sh b/devops/deploy/docker/test/rocky10/configs/init_system.sh new file mode 100755 index 00000000000..3ea7e34b0ff --- /dev/null +++ b/devops/deploy/docker/test/rocky10/configs/init_system.sh @@ -0,0 +1,221 @@ +#!/bin/bash +# -------------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed +# with this work for additional information regarding copyright +# ownership. The ASF licenses this file to You under the Apache +# License, Version 2.0 (the "License"); you may not use this file +# except in compliance with the License. You may obtain a copy of the +# License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. See the License for the specific language governing +# permissions and limitations under the License. +# +# -------------------------------------------------------------------- +# Container Initialization Script +# -------------------------------------------------------------------- +# This script sets up the environment inside the Docker container for +# the Apache Cloudberry Build Environment. It performs the following +# tasks: +# +# 1. Verifies that the container is running with the expected hostname. +# 2. Starts the SSH daemon to allow SSH access to the container. +# 3. Configures passwordless SSH access for the 'gpadmin' user. +# 4. Sets up the necessary directories and configuration files for +# Apache Cloudberry. +# 5. Displays a welcome banner and system information. +# 6. Starts an interactive bash shell. +# +# This script is intended to be used as an entrypoint or initialization +# script for the Docker container. +# -------------------------------------------------------------------- + +# -------------------------------------------------------------------- +# Check if the hostname is 'cdw' +# -------------------------------------------------------------------- +# The script checks if the container's hostname is set to 'cdw'. This is +# a requirement for this environment, and if the hostname does not match, +# the script will exit with an error message. This ensures consistency +# across different environments. +# -------------------------------------------------------------------- +if [ "$(hostname)" != "cdw" ]; then + echo "Error: This container must be run with the hostname 'cdw'." + echo "Use the following command: docker run -h cdw ..." + exit 1 +fi + +# -------------------------------------------------------------------- +# Start SSH daemon and setup for SSH access +# -------------------------------------------------------------------- +# The SSH daemon is started to allow remote access to the container via +# SSH. This is useful for development and debugging purposes. If the SSH +# daemon fails to start, the script exits with an error. +# -------------------------------------------------------------------- +if ! sudo /usr/sbin/sshd; then + echo "Failed to start SSH daemon" >&2 + exit 1 +fi + +# -------------------------------------------------------------------- +# Remove /run/nologin to allow logins +# -------------------------------------------------------------------- +# The /run/nologin file, if present, prevents users from logging into +# the system. This file is removed to ensure that users can log in via SSH. +# -------------------------------------------------------------------- +sudo rm -rf /run/nologin + +# -------------------------------------------------------------------- +# Configure passwordless SSH access for 'gpadmin' user +# -------------------------------------------------------------------- +# The script sets up SSH key-based authentication for the 'gpadmin' user, +# allowing passwordless SSH access. It generates a new SSH key pair if one +# does not already exist, and configures the necessary permissions. +# -------------------------------------------------------------------- +mkdir -p /home/gpadmin/.ssh +chmod 700 /home/gpadmin/.ssh + +if [ ! -f /home/gpadmin/.ssh/id_rsa ]; then + ssh-keygen -t rsa -b 4096 -C gpadmin -f /home/gpadmin/.ssh/id_rsa -P "" > /dev/null 2>&1 +fi + +cat /home/gpadmin/.ssh/id_rsa.pub >> /home/gpadmin/.ssh/authorized_keys +chmod 600 /home/gpadmin/.ssh/authorized_keys + +# Add the container's hostname to the known_hosts file to avoid SSH warnings +ssh-keyscan -t rsa cdw > /home/gpadmin/.ssh/known_hosts 2>/dev/null + +# -------------------------------------------------------------------- +# Cloudberry Data Directories Setup +# -------------------------------------------------------------------- +# The script sets up the necessary directories for Apache Cloudberry, +# including directories for the coordinator, standby coordinator, primary +# segments, and mirror segments. It also sets up the configuration files +# required for initializing the database. +# -------------------------------------------------------------------- +sudo rm -rf /data1/* +sudo mkdir -p /data1/coordinator /data1/standby_coordinator /data1/primary /data1/mirror +sudo chown -R gpadmin.gpadmin /data1 + +# Copy the gpinitsystem configuration file to the home directory +cp /tmp/gpinitsystem.conf /home/gpadmin + +# Set up the hostfile for cluster initialization +echo $(hostname) > /home/gpadmin/hostfile_gpinitsystem + +# Change to the home directory of the current user +cd $HOME + +# -------------------------------------------------------------------- +# Display a Welcome Banner +# -------------------------------------------------------------------- +# The following ASCII art and welcome message are displayed when the +# container starts. This banner provides a visual indication that the +# container is running in the Apache Cloudberry Build Environment. +# -------------------------------------------------------------------- +cat <<-'EOF' + +====================================================================== + + ++++++++++ ++++++ + ++++++++++++++ +++++++ + ++++ +++++ ++++ + ++++ +++++++++ + =+==== =============+ + ======== =====+ ===== + ==== ==== ==== ==== + ==== === === ==== + ==== === === ==== + ==== === ==-- === + ===== ===== -- ==== + ===================== ====== + ============================ + =-----= + ____ _ _ _ + / ___|| | ___ _ _ __| || |__ ___ _ __ _ __ _ _ + | | | | / _ \ | | | | / _` || '_ \ / _ \| '__|| '__|| | | | + | |___ | || (_) || |_| || (_| || |_) || __/| | | | | |_| | + \____||_| \____ \__,_| \__,_||_.__/ \___||_| |_| \__, | + |___/ +---------------------------------------------------------------------- + +EOF + +# -------------------------------------------------------------------- +# Display System Information +# -------------------------------------------------------------------- +# The script sources the /etc/os-release file to retrieve the operating +# system name and version. It then displays the following information: +# - OS name and version +# - Current user +# - Container hostname +# - IP address +# - CPU model name and number of cores +# - Total memory available +# - Cloudberry version (if installed) +# This information is useful for users to understand the environment they +# are working in. +# -------------------------------------------------------------------- +source /etc/os-release + +# First, create the CPU info detection function +get_cpu_info() { + ARCH=$(uname -m) + if [ "$ARCH" = "x86_64" ]; then + lscpu | grep 'Model name:' | awk '{print substr($0, index($0,$3))}' + elif [ "$ARCH" = "aarch64" ]; then + VENDOR=$(lscpu | grep 'Vendor ID:' | awk '{print $3}') + if [ "$VENDOR" = "Apple" ] || [ "$VENDOR" = "0x61" ]; then + echo "Apple Silicon ($ARCH)" + else + if [ -f /proc/cpuinfo ]; then + IMPL=$(grep "CPU implementer" /proc/cpuinfo | head -1 | awk '{print $3}') + PART=$(grep "CPU part" /proc/cpuinfo | head -1 | awk '{print $3}') + if [ ! -z "$IMPL" ] && [ ! -z "$PART" ]; then + echo "ARM $ARCH (Implementer: $IMPL, Part: $PART)" + else + echo "ARM $ARCH" + fi + else + echo "ARM $ARCH" + fi + fi + else + echo "Unknown architecture: $ARCH" + fi +} + +# Check if Apache Cloudberry is installed and display its version +if rpm -q apache-cloudberry-db-incubating > /dev/null 2>&1; then + CBDB_VERSION=$(/usr/local/cbdb/bin/postgres --gp-version) +else + CBDB_VERSION="Not installed" +fi + +cat <<-EOF +Welcome to the Apache Cloudberry Test Environment! + +Cloudberry version .. : $CBDB_VERSION +Container OS ........ : $NAME $VERSION +User ................ : $(whoami) +Container hostname .. : $(hostname) +IP Address .......... : $(hostname -I | awk '{print $1}') +CPU Info ............ : $(get_cpu_info) +CPU(s) .............. : $(nproc) +Memory .............. : $(free -h | grep Mem: | awk '{print $2}') total +====================================================================== + +EOF + +# -------------------------------------------------------------------- +# Start an interactive bash shell +# -------------------------------------------------------------------- +# Finally, the script starts an interactive bash shell to keep the +# container running and allow the user to interact with the environment. +# -------------------------------------------------------------------- +/bin/bash diff --git a/devops/release/cloudberry-release.sh b/devops/release/cloudberry-release.sh index 3ab044d5aab..fdc4809f2f8 100755 --- a/devops/release/cloudberry-release.sh +++ b/devops/release/cloudberry-release.sh @@ -565,9 +565,10 @@ section "Staging release: $TAG" # NOTE: For RC tags like "X.Y.Z-incubating-rcN", keep the tag as-is but # generate the tarball name and top-level directory using BASE_VERSION # (without "-rcN"). This allows promoting the voted bits without rebuilding. - # Keep -rcN in the artifact filename for RC voting, but keep the extracted - # top-level directory name as BASE_VERSION (without -rcN). - TAR_NAME="apache-cloudberry-${TAG}-src.tar.gz" + # Use BASE_VERSION for both tarball filename and extracted directory name + # to align with Apache incubator release conventions. This enables direct + # 'svn mv' to release repository after voting without renaming artifacts. + TAR_NAME="apache-cloudberry-${BASE_VERSION}-src.tar.gz" TMP_DIR=$(mktemp -d) trap 'rm -rf "$TMP_DIR"' EXIT diff --git a/devops/sandbox/.env b/devops/sandbox/.env index 233d7c5b1b5..1ceec2e5fb7 100644 --- a/devops/sandbox/.env +++ b/devops/sandbox/.env @@ -17,5 +17,5 @@ # permissions and limitations under the License. # # -------------------------------------------------------------------- -CODEBASE_VERSION=2.0.0 +CODEBASE_VERSION=2.1.0 OS_VERSION=rockylinux9 diff --git a/devops/sandbox/Dockerfile.RELEASE.rockylinux9 b/devops/sandbox/Dockerfile.RELEASE.rockylinux9 index ac394c6cb60..215c32f452d 100644 --- a/devops/sandbox/Dockerfile.RELEASE.rockylinux9 +++ b/devops/sandbox/Dockerfile.RELEASE.rockylinux9 @@ -94,6 +94,7 @@ RUN dnf makecache && \ readline-devel \ zlib-devel && \ dnf install -y --enablerepo=crb \ + liburing-devel \ libuv-devel \ libyaml-devel \ perl-IPC-Run \ @@ -120,10 +121,12 @@ USER gpadmin WORKDIR /home/gpadmin # Release version to build (Apache official tarball) -ARG CB_RELEASE_VERSION=2.0.0-incubating +ARG CB_RELEASE_VERSION=2.1.0-incubating # Download and extract the specified release version from Apache -RUN wget -nv "https://downloads.apache.org/incubator/cloudberry/${CB_RELEASE_VERSION}/apache-cloudberry-${CB_RELEASE_VERSION}-src.tar.gz" -O /home/gpadmin/apache-cloudberry-${CB_RELEASE_VERSION}-src.tar.gz && \ +# Using Apache mirror system for better download reliability and speed +RUN curl -L -o /home/gpadmin/apache-cloudberry-${CB_RELEASE_VERSION}-src.tar.gz \ + "https://www.apache.org/dyn/closer.lua/incubator/cloudberry/${CB_RELEASE_VERSION}/apache-cloudberry-${CB_RELEASE_VERSION}-src.tar.gz?action=download" && \ tar -xzf /home/gpadmin/apache-cloudberry-${CB_RELEASE_VERSION}-src.tar.gz -C /home/gpadmin && \ rm -f /home/gpadmin/apache-cloudberry-${CB_RELEASE_VERSION}-src.tar.gz && \ mv /home/gpadmin/apache-cloudberry-${CB_RELEASE_VERSION} /home/gpadmin/cloudberry @@ -131,47 +134,9 @@ RUN wget -nv "https://downloads.apache.org/incubator/cloudberry/${CB_RELEASE_VER # Build Cloudberry using the official build scripts RUN cd /home/gpadmin/cloudberry && \ export SRC_DIR=/home/gpadmin/cloudberry && \ - mkdir -p "${SRC_DIR}/build-logs" && \ - # Ensure Cloudberry lib dir exists and has Xerces libs available - sudo rm -rf /usr/local/cloudberry-db && \ - sudo mkdir -p /usr/local/cloudberry-db/lib && \ - sudo cp -v /usr/local/xerces-c/lib/libxerces-c.so \ - /usr/local/xerces-c/lib/libxerces-c-3.*.so \ - /usr/local/cloudberry-db/lib/ && \ - sudo chown -R gpadmin:gpadmin /usr/local/cloudberry-db && \ - # Configure with required features and paths - export LD_LIBRARY_PATH=/usr/local/cloudberry-db/lib:$LD_LIBRARY_PATH && \ - ./configure --prefix=/usr/local/cloudberry-db \ - --disable-external-fts \ - --enable-debug \ - --enable-cassert \ - --enable-debug-extensions \ - --enable-gpcloud \ - --enable-ic-proxy \ - --enable-mapreduce \ - --enable-orafce \ - --enable-orca \ - --enable-pax \ - --disable-pxf \ - --enable-tap-tests \ - --with-gssapi \ - --with-ldap \ - --with-libxml \ - --with-lz4 \ - --with-pam \ - --with-perl \ - --with-pgport=5432 \ - --with-python \ - --with-pythonsrc-ext \ - --with-ssl=openssl \ - --with-uuid=e2fs \ - --with-includes=/usr/local/xerces-c/include \ - --with-libraries=/usr/local/cloudberry-db/lib && \ - # Build and install - make -j$(nproc) --directory ${SRC_DIR} && \ - make -j$(nproc) --directory ${SRC_DIR}/contrib && \ - make install --directory ${SRC_DIR} && \ - make install --directory "${SRC_DIR}/contrib" + mkdir -p ${SRC_DIR}/build-logs && \ + ./devops/build/automation/cloudberry/scripts/configure-cloudberry.sh && \ + ./devops/build/automation/cloudberry/scripts/build-cloudberry.sh # -------------------------------------------------------------------- # Runtime stage: Rocky Linux 9 runtime with required dependencies @@ -192,6 +157,7 @@ RUN dnf -y update && \ krb5-libs \ libevent \ libicu \ + liburing \ libuuid \ libxml2 \ libyaml \ diff --git a/devops/sandbox/README.md b/devops/sandbox/README.md index 9f475977835..fb6a5ef80c3 100644 --- a/devops/sandbox/README.md +++ b/devops/sandbox/README.md @@ -92,14 +92,14 @@ Build and deploy steps: ```shell cd cloudberry/devops/sandbox - ./run.sh -c 2.0.0 + ./run.sh -c 2.1.0 ``` - For latest Apache Cloudberry release running across multiple containers ```shell cd cloudberry/devops/sandbox - ./run.sh -c 2.0.0 -m + ./run.sh -c 2.1.0 -m ``` - For latest main branch running on a single container diff --git a/devops/sandbox/run.sh b/devops/sandbox/run.sh index 7c266b8f64c..705442d98e1 100755 --- a/devops/sandbox/run.sh +++ b/devops/sandbox/run.sh @@ -38,7 +38,7 @@ PIP_INDEX_URL_VAR="${PIP_INDEX_URL_VAR:-$DEFAULT_PIP_INDEX_URL_VAR}" # Function to display help message function usage() { echo "Usage: $0 [-o ] [-c ] [-b] [-m]" - echo " -c Codebase version (valid values: main, local, or other available version like 2.0.0)" + echo " -c Codebase version (valid values: main, local, or other available version like 2.1.0)" echo " -t Timezone (default: America/Los_Angeles, or set via TIMEZONE_VAR environment variable)" echo " -p Python Package Index (PyPI) (default: https://pypi.org/simple, or set via PIP_INDEX_URL_VAR environment variable)" echo " -b Build only, do not run the container (default: false, or set via BUILD_ONLY environment variable)" diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml index 23f60cad528..9b03793f74a 100644 --- a/doc/src/sgml/config.sgml +++ b/doc/src/sgml/config.sgml @@ -9027,6 +9027,23 @@ SET XML OPTION { DOCUMENT | CONTENT }; + + restrict_nonsystem_relation_kind (string) + + restrict_nonsystem_relation_kind + configuration parameter + + + + + This variable specifies relation kind to which access is restricted. + It contains a comma-separated list of relation kind. Currently, the + supported relation kinds are view and + foreign-table. + + + + diff --git a/doc/src/sgml/protocol.sgml b/doc/src/sgml/protocol.sgml index 7141f6c277a..682142724c7 100644 --- a/doc/src/sgml/protocol.sgml +++ b/doc/src/sgml/protocol.sgml @@ -1527,10 +1527,10 @@ SELCT 1/0; The frontend should also be prepared to handle an ErrorMessage - response to SSLRequest from the server. This would only occur if - the server predates the addition of SSL support - to PostgreSQL. (Such servers are now very ancient, - and likely do not exist in the wild anymore.) + response to SSLRequest from the server. The frontend should not display + this error message to the user/application, since the server has not been + authenticated + (CVE-2024-10977). In this case the connection must be closed, but the frontend might choose to open a fresh connection and proceed without requesting SSL. @@ -1604,12 +1604,13 @@ SELCT 1/0; The frontend should also be prepared to handle an ErrorMessage - response to GSSENCRequest from the server. This would only occur if - the server predates the addition of GSSAPI encryption - support to PostgreSQL. In this case the - connection must be closed, but the frontend might choose to open a fresh - connection and proceed without requesting GSSAPI - encryption. + response to GSSENCRequest from the server. The frontend should not display + this error message to the user/application, since the server has not been + authenticated + (CVE-2024-10977). + In this case the connection must be closed, but the frontend might choose + to open a fresh connection and proceed without requesting + GSSAPI encryption. diff --git a/doc/src/sgml/ref/pg_dump.sgml b/doc/src/sgml/ref/pg_dump.sgml index d3113d76a07..c405fef866b 100644 --- a/doc/src/sgml/ref/pg_dump.sgml +++ b/doc/src/sgml/ref/pg_dump.sgml @@ -831,6 +831,14 @@ PostgreSQL documentation The only exception is that an empty pattern is disallowed. + + + Using wildcards in may result + in access to unexpected foreign servers. Also, to use this option securely, + make sure that the named server must have a trusted owner. + + + When is specified, diff --git a/gpMgmt/bin/Makefile b/gpMgmt/bin/Makefile index c5eb6ccba9c..7092700b784 100644 --- a/gpMgmt/bin/Makefile +++ b/gpMgmt/bin/Makefile @@ -111,8 +111,19 @@ download-python-deps: else \ echo "PyGreSQL-$(PYGRESQL_VERSION).tar.gz already exists, skipping download"; \ fi - # Install wheel and cython for PyYAML building - pip3 install --user wheel "cython<3.0.0" + # Install wheel and cython for PyYAML building (only if not exists) + @if python3 -c "import wheel" >/dev/null 2>&1; then \ + echo "wheel already exists, skipping installation"; \ + else \ + echo "Installing wheel..."; \ + pip3 install --user wheel 2>/dev/null || pip3 install --user --break-system-packages wheel; \ + fi + @if python3 -c "import cython" >/dev/null 2>&1; then \ + echo "cython already exists, skipping installation"; \ + else \ + echo "Installing cython..."; \ + pip3 install --user "cython<3.0.0" 2>/dev/null || pip3 install --user --break-system-packages "cython<3.0.0"; \ + fi # # PyGreSQL diff --git a/gpMgmt/bin/gpcheckcat_modules/orphaned_toast_tables_check.py b/gpMgmt/bin/gpcheckcat_modules/orphaned_toast_tables_check.py index 21ec8d18047..789e1b139d2 100644 --- a/gpMgmt/bin/gpcheckcat_modules/orphaned_toast_tables_check.py +++ b/gpMgmt/bin/gpcheckcat_modules/orphaned_toast_tables_check.py @@ -25,7 +25,7 @@ def __init__(self): # pg_depend back to pg_class, and if the table oids don't match and/or # one is missing, the TOAST table is considered to be an orphan. # Note: Handles toast tables which is created/used by InitTempTableNamespace(). - self.orphaned_toast_tables_query = """ + self.orphaned_toast_tables_query = r""" SELECT gp_segment_id AS content_id, toast_table_oid, diff --git a/gpMgmt/bin/gppylib/commands/base.py b/gpMgmt/bin/gppylib/commands/base.py index d455c6e2d13..e09dd40c061 100755 --- a/gpMgmt/bin/gppylib/commands/base.py +++ b/gpMgmt/bin/gppylib/commands/base.py @@ -230,7 +230,7 @@ def __init__(self, numWorkers=16, operations=None): if operations is not None: for operation in operations: self._spoof_operation(operation) - super(OperationWorkerPool, self).__init__(numWorkers, operations) + super(OperationWorkerPool, self).__init__(numWorkers, items=operations) def check_results(self): raise NotImplementedError("OperationWorkerPool has no means of verifying success.") diff --git a/gpMgmt/bin/gppylib/commands/pg.py b/gpMgmt/bin/gppylib/commands/pg.py index a2af133c28f..f430b294500 100644 --- a/gpMgmt/bin/gppylib/commands/pg.py +++ b/gpMgmt/bin/gppylib/commands/pg.py @@ -1,13 +1,15 @@ #!/usr/bin/env python3 # -# Copyright (c) Greenplum Inc 2008. All Rights Reserved. +# Copyright (c) Greenplum Inc 2008. All Rights Reserved. # +from contextlib import closing import os import pipes from gppylib.gplog import * from gppylib.gparray import * +from gppylib.db import dbconn from .base import * from .unix import * from gppylib.commands.base import * @@ -17,6 +19,30 @@ GPHOME=os.environ.get('GPHOME') + +def ensure_replication_slot_exists(source_host, source_port, + replication_slot_name): + if not replication_slot_name: + return False + + escaped_slot_name = replication_slot_name.replace("'", "''") + dburl = dbconn.DbURL(hostname=source_host, port=source_port, + dbname='template1') + with closing(dbconn.connect(dburl, utility=True)) as conn: + slot_exists = dbconn.querySingleton( + conn, + "SELECT count(*) FROM pg_catalog.pg_replication_slots " + "WHERE slot_name = '{}'".format(escaped_slot_name)) + if slot_exists > 0: + return False + + dbconn.execSQL( + conn, + "SELECT pg_catalog.pg_create_physical_replication_slot('{}')" + .format(escaped_slot_name)) + + return True + class DbStatus(Command): def __init__(self,name,db,ctxt=LOCAL,remoteHost=None): self.db=db diff --git a/gpMgmt/bin/gppylib/commands/test/unit/test_unit_pg_base_backup.py b/gpMgmt/bin/gppylib/commands/test/unit/test_unit_pg_base_backup.py index 117f62b41ea..66621c39d5c 100644 --- a/gpMgmt/bin/gppylib/commands/test/unit/test_unit_pg_base_backup.py +++ b/gpMgmt/bin/gppylib/commands/test/unit/test_unit_pg_base_backup.py @@ -1,4 +1,5 @@ import unittest +from unittest.mock import Mock, patch from gppylib.commands import pg @@ -44,6 +45,60 @@ def test_base_backup_does_not_pass_conflicting_xlog_method_argument_when_given_r self.assertNotIn("-x", base_backup.command_tokens) self.assertNotIn("--xlog", base_backup.command_tokens) + @patch('gppylib.commands.pg.dbconn.querySingleton', return_value=1) + @patch('gppylib.commands.pg.dbconn.connect') + @patch('gppylib.commands.pg.dbconn.DbURL') + def test_ensure_replication_slot_exists_returns_false_when_slot_exists(self, mock_dburl, + mock_connect, + mock_query_singleton): + mock_conn = Mock() + mock_connect.return_value = mock_conn + + created = pg.ensure_replication_slot_exists('source-host', 5432, 'slot_name') + + self.assertFalse(created) + mock_dburl.assert_called_once_with(hostname='source-host', port=5432, dbname='template1') + mock_connect.assert_called_once_with(mock_dburl.return_value, utility=True) + self.assertEqual(1, mock_query_singleton.call_count) + self.assertIn("FROM pg_catalog.pg_replication_slots", mock_query_singleton.call_args[0][1]) + mock_conn.close.assert_called_once_with() + + @patch('gppylib.commands.pg.dbconn.execSQL') + @patch('gppylib.commands.pg.dbconn.querySingleton', return_value=0) + @patch('gppylib.commands.pg.dbconn.connect') + @patch('gppylib.commands.pg.dbconn.DbURL') + def test_ensure_replication_slot_exists_creates_missing_slot(self, mock_dburl, + mock_connect, + mock_query_singleton, + mock_exec_sql): + mock_conn = Mock() + mock_connect.return_value = mock_conn + + created = pg.ensure_replication_slot_exists('source-host', 5432, 'slot_name') + + self.assertTrue(created) + mock_dburl.assert_called_once_with(hostname='source-host', port=5432, dbname='template1') + mock_connect.assert_called_once_with(mock_dburl.return_value, utility=True) + self.assertEqual(1, mock_query_singleton.call_count) + self.assertIn("FROM pg_catalog.pg_replication_slots", mock_query_singleton.call_args[0][1]) + mock_exec_sql.assert_called_once() + self.assertIn("pg_create_physical_replication_slot('slot_name')", + mock_exec_sql.call_args[0][1]) + mock_conn.close.assert_called_once_with() + + @patch('gppylib.commands.pg.dbconn.querySingleton') + @patch('gppylib.commands.pg.dbconn.connect') + @patch('gppylib.commands.pg.dbconn.DbURL') + def test_ensure_replication_slot_exists_skips_empty_slot_name(self, mock_dburl, + mock_connect, + mock_query_singleton): + created = pg.ensure_replication_slot_exists('source-host', 5432, None) + + self.assertFalse(created) + mock_dburl.assert_not_called() + mock_connect.assert_not_called() + mock_query_singleton.assert_not_called() + if __name__ == '__main__': unittest.main() diff --git a/gpMgmt/bin/gppylib/logfilter.py b/gpMgmt/bin/gppylib/logfilter.py index c427ac1a6cb..46c0b5e174a 100644 --- a/gpMgmt/bin/gppylib/logfilter.py +++ b/gpMgmt/bin/gppylib/logfilter.py @@ -67,7 +67,7 @@ def FilterLogEntries(iterable, filters=[], ibegin=0, jend=None): - """ + r""" Generator to consume the lines of a GPDB log file from iterable, yield the lines which satisfy the given criteria, and skip the rest. @@ -668,7 +668,7 @@ def MatchInFirstLine(iterable, regex): def NoMatchInFirstLine(iterable, regex): - """ + r""" Generator to filter a stream of groups. Skips those groups whose first line contains a match for the given regex; yields all other groups. diff --git a/gpMgmt/bin/gppylib/mainUtils.py b/gpMgmt/bin/gppylib/mainUtils.py index 553ca9d57c9..e947639591d 100644 --- a/gpMgmt/bin/gppylib/mainUtils.py +++ b/gpMgmt/bin/gppylib/mainUtils.py @@ -488,7 +488,7 @@ def parseStatusLine(line, isStart = False, isStop = False): def check_fts(fts): - fts_check_cmd= "ps -ef | awk '{print \$2, \$8}' | grep gpfts | grep -v grep" + fts_check_cmd= r"ps -ef | awk '{print \$2, \$8}' | grep gpfts | grep -v grep" process_cmd = "gpssh -h %s -e \"%s\" | wc -l" % (fts, fts_check_cmd) fts_process_res=int(subprocess.check_output(process_cmd, shell=True).decode().strip()) return fts_process_res == 2 @@ -500,7 +500,7 @@ def check_etcd(etcd): if etcd_process_res == 2: return True # for demo cluster - etcd_check_cmd = "ps -ef | awk '{print \$2, \$8}' | grep etcd | grep -v grep" + etcd_check_cmd = r"ps -ef | awk '{print \$2, \$8}' | grep etcd | grep -v grep" process_cmd = "gpssh -h %s -e \"%s\"| wc -l" % (etcd, etcd_check_cmd) etcd_process_res = int(subprocess.check_output(process_cmd, shell=True).decode().strip()) return etcd_process_res == 2 diff --git a/gpMgmt/bin/gppylib/test/unit/test_unit_gpsegrecovery.py b/gpMgmt/bin/gppylib/test/unit/test_unit_gpsegrecovery.py index c806dbaed90..b525801b07a 100644 --- a/gpMgmt/bin/gppylib/test/unit/test_unit_gpsegrecovery.py +++ b/gpMgmt/bin/gppylib/test/unit/test_unit_gpsegrecovery.py @@ -106,10 +106,12 @@ def setUp(self): self.maxDiff = None self.mock_logger = Mock(spec=['log', 'info', 'debug', 'error', 'warn', 'exception']) self.apply_patches([ + patch('gpsegrecovery.ensure_replication_slot_exists'), patch('gpsegrecovery.start_segment', return_value=Mock()), patch('gpsegrecovery.PgBaseBackup.__init__', return_value=None), patch('gpsegrecovery.PgBaseBackup.run') ]) + self.mock_ensure_slot = self.get_mock_from_apply_patch('ensure_replication_slot_exists') self.mock_pgbasebackup_run = self.get_mock_from_apply_patch('run') self.mock_pgbasebackup_init = self.get_mock_from_apply_patch('__init__') @@ -130,6 +132,7 @@ def tearDown(self): super(FullRecoveryTestCase, self).tearDown() def _assert_basebackup_runs(self, expected_init_args): + self.mock_ensure_slot.assert_called_once_with('sdw1', 40000, 'internal_wal_replication_slot') self.assertEqual(1, self.mock_pgbasebackup_init.call_count) self.assertEqual(expected_init_args, self.mock_pgbasebackup_init.call_args) self.assertEqual(1, self.mock_pgbasebackup_run.call_count) @@ -172,49 +175,37 @@ def test_basebackup_run_no_forceoverwrite_passes(self): self._assert_basebackup_runs(expected_init_args1) self._assert_cmd_passed() - def test_basebackup_run_one_exception(self): - self.mock_pgbasebackup_run.side_effect = [Exception('backup failed once'), Mock()] + def test_basebackup_slot_check_exception(self): + self.mock_ensure_slot.side_effect = [Exception('slot check failed')] self.full_recovery_cmd.run() - expected_init_args1 = call("/data/mirror0", "sdw1", '40000', create_slot=False, - replication_slot_name='internal_wal_replication_slot', - forceoverwrite=True, target_gp_dbid=2, progress_file='/tmp/test_progress_file') - expected_init_args2 = call("/data/mirror0", "sdw1", '40000', create_slot=True, - replication_slot_name='internal_wal_replication_slot', - forceoverwrite=True, target_gp_dbid=2, progress_file='/tmp/test_progress_file') - self.assertEqual(2, self.mock_pgbasebackup_init.call_count) - self.assertEqual([expected_init_args1, expected_init_args2] , self.mock_pgbasebackup_init.call_args_list) - self.assertEqual(2, self.mock_pgbasebackup_run.call_count) - self.assertEqual([call(validateAfter=True),call(validateAfter=True)], self.mock_pgbasebackup_run.call_args_list) - gpsegrecovery.start_segment.assert_called_once_with(self.seg_recovery_info, self.mock_logger, self.era) - self._assert_cmd_passed() + self.assertEqual(0, self.mock_pgbasebackup_init.call_count) + self.assertEqual(0, self.mock_pgbasebackup_run.call_count) + self.assertEqual(0, gpsegrecovery.start_segment.call_count) + self._assert_cmd_failed('{"error_type": "full", "error_msg": "slot check failed", "dbid": 2, ' + '"datadir": "/data/mirror0", "port": 50000, ' + '"progress_file": "/tmp/test_progress_file"}') - def test_basebackup_run_two_exceptions(self): - self.mock_pgbasebackup_run.side_effect=[Exception('backup failed once'), - Exception('backup failed twice')] + def test_basebackup_run_exception(self): + self.mock_pgbasebackup_run.side_effect=[Exception('backup failed once')] self.full_recovery_cmd.run() expected_init_args1 = call("/data/mirror0", "sdw1", '40000', create_slot=False, replication_slot_name='internal_wal_replication_slot', forceoverwrite=True, target_gp_dbid=2, progress_file='/tmp/test_progress_file') - expected_init_args2 = call("/data/mirror0", "sdw1", '40000', create_slot=True, - replication_slot_name='internal_wal_replication_slot', - forceoverwrite=True, target_gp_dbid=2, progress_file='/tmp/test_progress_file') - self.assertEqual(2, self.mock_pgbasebackup_init.call_count) - self.assertEqual([expected_init_args1, expected_init_args2], self.mock_pgbasebackup_init.call_args_list) - self.assertEqual(2, self.mock_pgbasebackup_run.call_count) - self.assertEqual([call(validateAfter=True),call(validateAfter=True)], self.mock_pgbasebackup_run.call_args_list) - self.mock_logger.info.any_call('Running pg_basebackup failed: backup failed once') - self.mock_logger.info.assert_called_with("Re-running pg_basebackup, creating the slot this time") + self.mock_ensure_slot.assert_called_once_with('sdw1', 40000, 'internal_wal_replication_slot') + self.assertEqual(1, self.mock_pgbasebackup_init.call_count) + self.assertEqual([expected_init_args1], self.mock_pgbasebackup_init.call_args_list) + self.assertEqual(1, self.mock_pgbasebackup_run.call_count) + self.assertEqual([call(validateAfter=True)], self.mock_pgbasebackup_run.call_args_list) self.assertEqual(0, gpsegrecovery.start_segment.call_count) - self._assert_cmd_failed('{"error_type": "full", "error_msg": "backup failed twice", "dbid": 2, ' \ + self._assert_cmd_failed('{"error_type": "full", "error_msg": "backup failed once", "dbid": 2, ' \ '"datadir": "/data/mirror0", "port": 50000, "progress_file": "/tmp/test_progress_file"}') - def test_basebackup_run_no_forceoverwrite_two_exceptions(self): - self.mock_pgbasebackup_run.side_effect = [Exception('backup failed once'), - Exception('backup failed twice')] + def test_basebackup_run_no_forceoverwrite_exception(self): + self.mock_pgbasebackup_run.side_effect = [Exception('backup failed once')] self.full_recovery_cmd.forceoverwrite = False self.full_recovery_cmd.run() @@ -222,16 +213,13 @@ def test_basebackup_run_no_forceoverwrite_two_exceptions(self): expected_init_args1 = call("/data/mirror0", "sdw1", '40000', create_slot=False, replication_slot_name='internal_wal_replication_slot', forceoverwrite=False, target_gp_dbid=2, progress_file='/tmp/test_progress_file') - # regardless of the passed in value, second call to pg_basebackup will always have forceoverwrite=True - expected_init_args2 = call("/data/mirror0", "sdw1", '40000', create_slot=True, - replication_slot_name='internal_wal_replication_slot', - forceoverwrite=True, target_gp_dbid=2, progress_file='/tmp/test_progress_file') - self.assertEqual(2, self.mock_pgbasebackup_init.call_count) - self.assertEqual([expected_init_args1, expected_init_args2], self.mock_pgbasebackup_init.call_args_list) - self.assertEqual(2, self.mock_pgbasebackup_run.call_count) - self.assertEqual([call(validateAfter=True),call(validateAfter=True)], self.mock_pgbasebackup_run.call_args_list) + self.mock_ensure_slot.assert_called_once_with('sdw1', 40000, 'internal_wal_replication_slot') + self.assertEqual(1, self.mock_pgbasebackup_init.call_count) + self.assertEqual([expected_init_args1], self.mock_pgbasebackup_init.call_args_list) + self.assertEqual(1, self.mock_pgbasebackup_run.call_count) + self.assertEqual([call(validateAfter=True)], self.mock_pgbasebackup_run.call_args_list) self.assertEqual(0, gpsegrecovery.start_segment.call_count) - self._assert_cmd_failed('{"error_type": "full", "error_msg": "backup failed twice", "dbid": 2, ' \ + self._assert_cmd_failed('{"error_type": "full", "error_msg": "backup failed once", "dbid": 2, ' \ '"datadir": "/data/mirror0", "port": 50000, "progress_file": "/tmp/test_progress_file"}') def test_basebackup_init_exception(self): @@ -287,7 +275,10 @@ def tearDown(self): @patch('gppylib.commands.pg.PgRewind.run') @patch('gpsegrecovery.PgBaseBackup.__init__', return_value=None) @patch('gpsegrecovery.PgBaseBackup.run') - def test_complete_workflow(self, mock_pgbasebackup_run, mock_pgbasebackup_init, mock_pgrewind_run, mock_pgrewind_init): + @patch('gpsegrecovery.ensure_replication_slot_exists') + def test_complete_workflow(self, mock_ensure_slot, mock_pgbasebackup_run, + mock_pgbasebackup_init, mock_pgrewind_run, + mock_pgrewind_init): mix_confinfo = gppylib.recoveryinfo.serialize_list([ self.full_r1, self.incr_r2]) sys.argv = ['gpsegrecovery', '-l', '/tmp/logdir', '--era', '{}'.format(self.era), '-c {}'.format(mix_confinfo)] @@ -301,17 +292,21 @@ def test_complete_workflow(self, mock_pgbasebackup_run, mock_pgbasebackup_init, self.assertEqual(1, mock_pgrewind_init.call_count) self.assertEqual(1, mock_pgbasebackup_run.call_count) self.assertEqual(1, mock_pgbasebackup_init.call_count) + mock_ensure_slot.assert_called_once_with('source_hostname1', 6001, 'internal_wal_replication_slot') self.assertRegex(gplog.get_logfile(), '/gpsegrecovery.py_\d+\.log') @patch('gppylib.commands.pg.PgRewind.__init__', return_value=None) @patch('gppylib.commands.pg.PgRewind.run') @patch('gpsegrecovery.PgBaseBackup.__init__', return_value=None) @patch('gpsegrecovery.PgBaseBackup.run') - def test_complete_workflow_exception(self, mock_pgbasebackup_run, mock_pgbasebackup_init, mock_pgrewind_run, + @patch('gpsegrecovery.ensure_replication_slot_exists') + def test_complete_workflow_exception(self, mock_ensure_slot, + mock_pgbasebackup_run, + mock_pgbasebackup_init, + mock_pgrewind_run, mock_pgrewind_init): mock_pgrewind_run.side_effect = [Exception('pg_rewind failed')] - mock_pgbasebackup_run.side_effect = [Exception('pg_basebackup failed once'), - Exception('pg_basebackup failed twice')] + mock_pgbasebackup_run.side_effect = [Exception('pg_basebackup failed once')] mix_confinfo = gppylib.recoveryinfo.serialize_list([ self.full_r1, self.incr_r2]) sys.argv = ['gpsegrecovery', '-l', '/tmp/logdir', '--era={}'.format(self.era), '-c {}'.format(mix_confinfo)] @@ -322,14 +317,15 @@ def test_complete_workflow_exception(self, mock_pgbasebackup_run, mock_pgbasebac self.assertCountEqual('[{"error_type": "incremental", "error_msg": "pg_rewind failed", "dbid": 4, "datadir": "target_data_dir4", ' '"port": 5004, "progress_file": "/tmp/progress_file4"} , ' - '{"error_type": "full", "error_msg": "pg_basebackup failed twice", "dbid": 1,' + '{"error_type": "full", "error_msg": "pg_basebackup failed once", "dbid": 1,' '"datadir": "target_data_dir1", "port": 5001, "progress_file": "/tmp/progress_file1"}]', buf.getvalue().strip()) self.assertEqual(1, ex.exception.code) self.assertEqual(1, mock_pgrewind_run.call_count) self.assertEqual(1, mock_pgrewind_init.call_count) - self.assertEqual(2, mock_pgbasebackup_run.call_count) - self.assertEqual(2, mock_pgbasebackup_init.call_count) + self.assertEqual(1, mock_pgbasebackup_run.call_count) + self.assertEqual(1, mock_pgbasebackup_init.call_count) + mock_ensure_slot.assert_called_once_with('source_hostname1', 6001, 'internal_wal_replication_slot') self.assertRegex(gplog.get_logfile(), '/gpsegrecovery.py_\d+\.log') @patch('recovery_base.gplog.setup_tool_logging') diff --git a/gpMgmt/bin/lib/gpconfigurenewsegment b/gpMgmt/bin/lib/gpconfigurenewsegment index c37c70bf1fa..4f71348b9a8 100755 --- a/gpMgmt/bin/lib/gpconfigurenewsegment +++ b/gpMgmt/bin/lib/gpconfigurenewsegment @@ -10,7 +10,7 @@ from optparse import Option, OptionGroup, OptionParser, OptionValueError, SUPPRE from gppylib.gpparseopts import OptParser, OptChecker from gppylib.commands.gp import ModifyConfSetting, SegmentStart, SegmentStop -from gppylib.commands.pg import PgBaseBackup +from gppylib.commands.pg import PgBaseBackup, ensure_replication_slot_exists from gppylib.db import dbconn from gppylib.commands import unix from gppylib.commands.pg import DbStatus @@ -134,6 +134,10 @@ class ConfExpSegCmd(Command): self.progressFile = '%s/pg_basebackup.%s.dbid%s.out' % (gplog.get_logger_dir(), datetime.datetime.today().strftime('%Y%m%d_%H%M%S'), self.dbid) + ensure_replication_slot_exists( + self.syncWithSegmentHostname, + self.syncWithSegmentPort, + self.replicationSlotName) # Create a mirror based on the primary cmd = PgBaseBackup(target_datadir=self.datadir, source_host=self.syncWithSegmentHostname, @@ -149,30 +153,9 @@ class ConfExpSegCmd(Command): self.set_results(CommandResult(0, b'', b'', True, False)) if shouldDeleteProgressFile: os.remove(self.progressFile) - except Exception as e: - # If the cluster never has mirrors, cmd will fail - # quickly because the internal slot doesn't exist. - # Re-run with `create_slot`. - # GPDB_12_MERGE_FIXME could we check it before? or let - # pg_basebackup create slot if not exists. - cmd = PgBaseBackup(target_datadir=self.datadir, - source_host=self.syncWithSegmentHostname, - source_port=str(self.syncWithSegmentPort), - create_slot = True, - replication_slot_name=self.replicationSlotName, - forceoverwrite=True, - target_gp_dbid=self.dbid, - logfile=self.progressFile) - try: - logger.info("Re-running pg_basebackup, creating the slot this time") - cmd.run(validateAfter=True) - self.set_results(CommandResult(0, b'', b'', True, False)) - if shouldDeleteProgressFile: - os.remove(self.progressFile) - except Exception as e: - self.set_results(CommandResult(1, b'', str(e).encode(), True, False)) - raise + self.set_results(CommandResult(1, b'', str(e).encode(), True, False)) + raise logger.info("Successfully ran pg_basebackup: %s" % cmd.cmdStr) return diff --git a/gpMgmt/sbin/gpsegrecovery.py b/gpMgmt/sbin/gpsegrecovery.py index 811a73ccbb5..51c72581e47 100644 --- a/gpMgmt/sbin/gpsegrecovery.py +++ b/gpMgmt/sbin/gpsegrecovery.py @@ -4,7 +4,8 @@ import signal from gppylib.recoveryinfo import RecoveryErrorType -from gppylib.commands.pg import PgBaseBackup, PgRewind +from gppylib.commands.pg import (PgBaseBackup, PgRewind, + ensure_replication_slot_exists) from recovery_base import RecoveryBase, set_recovery_cmd_results from gppylib.commands.base import Command from gppylib.commands.gp import SegmentStart @@ -30,6 +31,9 @@ def __init__(self, name, recovery_info, forceoverwrite, logger, era): @set_recovery_cmd_results def run(self): self.error_type = RecoveryErrorType.BASEBACKUP_ERROR + ensure_replication_slot_exists(self.recovery_info.source_hostname, + self.recovery_info.source_port, + self.replicationSlotName) cmd = PgBaseBackup(self.recovery_info.target_datadir, self.recovery_info.source_hostname, str(self.recovery_info.source_port), @@ -39,26 +43,7 @@ def run(self): target_gp_dbid=self.recovery_info.target_segment_dbid, progress_file=self.recovery_info.progress_file) self.logger.info("Running pg_basebackup with progress output temporarily in %s" % self.recovery_info.progress_file) - try: - cmd.run(validateAfter=True) - except Exception as e: #TODO should this be ExecutionError? - self.logger.info("Running pg_basebackup failed: {}".format(str(e))) - - # If the cluster never has mirrors, cmd will fail - # quickly because the internal slot doesn't exist. - # Re-run with `create_slot`. - # GPDB_12_MERGE_FIXME could we check it before? or let - # pg_basebackup create slot if not exists. - cmd = PgBaseBackup(self.recovery_info.target_datadir, - self.recovery_info.source_hostname, - str(self.recovery_info.source_port), - create_slot=True, - replication_slot_name=self.replicationSlotName, - forceoverwrite=True, - target_gp_dbid=self.recovery_info.target_segment_dbid, - progress_file=self.recovery_info.progress_file) - self.logger.info("Re-running pg_basebackup, creating the slot this time") - cmd.run(validateAfter=True) + cmd.run(validateAfter=True) self.error_type = RecoveryErrorType.DEFAULT_ERROR self.logger.info("Successfully ran pg_basebackup for dbid: {}".format( diff --git a/gpcontrib/Makefile b/gpcontrib/Makefile index 8d95a14f876..2969194cfac 100644 --- a/gpcontrib/Makefile +++ b/gpcontrib/Makefile @@ -22,8 +22,7 @@ ifeq "$(enable_debug_extensions)" "yes" gp_legacy_string_agg \ gp_replica_check \ gp_toolkit \ - pg_hint_plan \ - diskquota + pg_hint_plan else recurse_targets = gp_sparse_vector \ gp_distribution_policy \ @@ -31,10 +30,16 @@ else gp_legacy_string_agg \ gp_exttable_fdw \ gp_toolkit \ - pg_hint_plan \ - diskquota + pg_hint_plan endif +ifeq "$(with_diskquota)" "yes" + recurse_targets += diskquota +endif + +ifeq "$(with_gp_stats_collector)" "yes" + recurse_targets += gp_stats_collector +endif ifeq "$(with_zstd)" "yes" recurse_targets += zstd endif @@ -99,4 +104,4 @@ installcheck: $(MAKE) -C gp_sparse_vector installcheck $(MAKE) -C gp_toolkit installcheck $(MAKE) -C gp_exttable_fdw installcheck - $(MAKE) -C diskquota installcheck + if [ "$(with_diskquota)" = "yes" ]; then $(MAKE) -C diskquota installcheck; fi diff --git a/gpcontrib/diskquota/CMakeLists.txt b/gpcontrib/diskquota/CMakeLists.txt index fad393cb101..face48578a6 100644 --- a/gpcontrib/diskquota/CMakeLists.txt +++ b/gpcontrib/diskquota/CMakeLists.txt @@ -12,10 +12,6 @@ endif() # generate 'compile_commands.json' set(CMAKE_EXPORT_COMPILE_COMMANDS ON) -# Retrieve repository information -include(${CMAKE_CURRENT_SOURCE_DIR}/cmake/Git.cmake) -GitHash_Get(DISKQUOTA_GIT_HASH) - include(${CMAKE_CURRENT_SOURCE_DIR}/cmake/Gpdb.cmake) @@ -154,19 +150,6 @@ add_custom_target(create_artifact ${CMAKE_COMMAND} -E tar czvf ${artifact_NAME} "${tgz_NAME}.tar.gz") # packing end -# Create build-info -# The diskquota-build-info shouldn't be copied to GPDB release by install_gpdb_component -include(${CMAKE_CURRENT_SOURCE_DIR}/cmake/BuildInfo.cmake) -set(build_info_PATH ${CMAKE_CURRENT_BINARY_DIR}/diskquota-build-info) -BuildInfo_Create(${build_info_PATH} - VARS - DISKQUOTA_GIT_HASH - DISKQUOTA_VERSION - GP_MAJOR_VERSION - GP_VERSION - CMAKE_BUILD_TYPE) -# Create build-info end - # Add installcheck targets add_subdirectory(tests) add_subdirectory(upgrade_test) @@ -175,4 +158,3 @@ add_subdirectory(upgrade_test) install(PROGRAMS "cmake/install_gpdb_component" DESTINATION ".") install(FILES ${diskquota_DDL} DESTINATION "share/postgresql/extension/") install(TARGETS diskquota DESTINATION "lib/postgresql/") -install(FILES ${build_info_PATH} DESTINATION ".") diff --git a/gpcontrib/diskquota/cmake/BuildInfo.cmake b/gpcontrib/diskquota/cmake/BuildInfo.cmake deleted file mode 100644 index 6e256f34502..00000000000 --- a/gpcontrib/diskquota/cmake/BuildInfo.cmake +++ /dev/null @@ -1,32 +0,0 @@ -# Create a build info file based on the given cmake variables -# For example: -# BuildInfo_Create( -# ${CMAKE_CURRENT_BINARY_DIR}/build-info -# VARS -# DISKQUOTA_GIT_HASH -# GP_MAJOR_VERSION) -# ) -# will create a build info file: -# ❯ cat build-info -# DISKQUOTA_GIT_HASH = 151ed92 -# GP_MAJOR_VERSION = 6 - -function(BuildInfo_Create path) - cmake_parse_arguments( - arg - "" - "" - "VARS" - ${ARGN}) - - # Set REGRESS test cases - foreach(key IN LISTS arg_VARS) - get_property(val VARIABLE PROPERTY ${key}) - list(APPEND info_list "${key} = ${val}") - endforeach() - file(WRITE ${path} "") - foreach(content IN LISTS info_list) - file(APPEND ${path} "${content}\n") - endforeach() -endfunction() - diff --git a/gpcontrib/diskquota/cmake/Git.cmake b/gpcontrib/diskquota/cmake/Git.cmake deleted file mode 100644 index 81a68b1f1f4..00000000000 --- a/gpcontrib/diskquota/cmake/Git.cmake +++ /dev/null @@ -1,9 +0,0 @@ -# get git hash -macro(GitHash_Get _git_hash) - find_package(Git) - execute_process( - COMMAND ${GIT_EXECUTABLE} log -1 --pretty=format:%h - OUTPUT_VARIABLE ${_git_hash} - OUTPUT_STRIP_TRAILING_WHITESPACE ERROR_QUIET - WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}) -endmacro() diff --git a/gpcontrib/gp_exttable_fdw/input/gp_exttable_fdw.source b/gpcontrib/gp_exttable_fdw/input/gp_exttable_fdw.source index 41012e73c81..1c089e34760 100644 --- a/gpcontrib/gp_exttable_fdw/input/gp_exttable_fdw.source +++ b/gpcontrib/gp_exttable_fdw/input/gp_exttable_fdw.source @@ -53,12 +53,26 @@ OPTIONS (format_type 'c', delimiter ',', location_uris 'file://@hostname@@abs_srcdir@/data/tableless.csv', reject_limit_type 'p', reject_limit '120'); --- Error, invalid encoding +-- Error, invalid encoding (negative numeric ID) CREATE FOREIGN TABLE tableless_ext_fdw(a int, b int) SERVER gp_exttable_server OPTIONS (format_type 'c', delimiter ',', encoding '-1', location_uris 'file://@hostname@@abs_srcdir@/data/tableless.csv'); +-- Error, invalid encoding (unknown name) +CREATE FOREIGN TABLE tableless_ext_fdw(a int, b int) +SERVER gp_exttable_server +OPTIONS (format_type 'c', delimiter ',', encoding 'bogus', + location_uris 'file://@hostname@@abs_srcdir@/data/tableless.csv'); + +-- Error, mixed numeric+letters must not be silently truncated to a +-- valid prefix (atoi('6abc') would return 6 = UTF8; strict parsing +-- in parse_fdw_encoding_option() rejects it). +CREATE FOREIGN TABLE tableless_ext_fdw(a int, b int) +SERVER gp_exttable_server +OPTIONS (format_type 'c', delimiter ',', encoding '6abc', + location_uris 'file://@hostname@@abs_srcdir@/data/tableless.csv'); + -- OK, no execute_on | log_errors | encoding | is_writable option CREATE FOREIGN TABLE tableless_ext_fdw(a int, b int) SERVER gp_exttable_server @@ -79,3 +93,59 @@ SELECT urilocation FROM pg_exttable WHERE reloid = 'public.ext_special_uri'::reg SELECT ftoptions FROM pg_foreign_table WHERE ftrelid='public.ext_special_uri'::regclass; \a SELECT * FROM ext_special_uri ORDER BY a; + +-- =================================================================== +-- Tests for issue #1726: FDW OPTIONS encoding accepts both numeric IDs +-- and symbolic names (UTF8, utf-8, GBK, ...). Names previously parsed +-- via atoi() and silently degraded to SQL_ASCII. +-- =================================================================== + +-- Numeric form (baseline; worked before the fix as well). +CREATE FOREIGN TABLE ext_enc_num (a int) SERVER gp_exttable_server +OPTIONS (format_type 'c', delimiter ',', + location_uris 'file:///tmp/ext_enc_ignored.csv', + encoding '6'); +SELECT pg_encoding_to_char(encoding) FROM pg_exttable +WHERE reloid = 'ext_enc_num'::regclass; + +-- Symbolic name 'UTF8' — used to be silently SQL_ASCII (the bug). +CREATE FOREIGN TABLE ext_enc_utf8 (a int) SERVER gp_exttable_server +OPTIONS (format_type 'c', delimiter ',', + location_uris 'file:///tmp/ext_enc_ignored.csv', + encoding 'UTF8'); +SELECT pg_encoding_to_char(encoding) FROM pg_exttable +WHERE reloid = 'ext_enc_utf8'::regclass; + +-- Case + dash variant resolved by pg_char_to_encoding(). +CREATE FOREIGN TABLE ext_enc_utf8_dash (a int) SERVER gp_exttable_server +OPTIONS (format_type 'c', delimiter ',', + location_uris 'file:///tmp/ext_enc_ignored.csv', + encoding 'utf-8'); +SELECT pg_encoding_to_char(encoding) FROM pg_exttable +WHERE reloid = 'ext_enc_utf8_dash'::regclass; + +-- Non-UTF8 symbolic name. +CREATE FOREIGN TABLE ext_enc_gbk (a int) SERVER gp_exttable_server +OPTIONS (format_type 'c', delimiter ',', + location_uris 'file:///tmp/ext_enc_ignored.csv', + encoding 'GBK'); +SELECT pg_encoding_to_char(encoding) FROM pg_exttable +WHERE reloid = 'ext_enc_gbk'::regclass; + +-- ALTER FOREIGN TABLE ... OPTIONS (SET encoding 'UTF8') — same code +-- path, this proves the read-side resolution works after an ALTER too. +CREATE FOREIGN TABLE ext_enc_alter (a int) SERVER gp_exttable_server +OPTIONS (format_type 'c', delimiter ',', + location_uris 'file:///tmp/ext_enc_ignored.csv', + encoding '0'); +SELECT pg_encoding_to_char(encoding) FROM pg_exttable +WHERE reloid = 'ext_enc_alter'::regclass; +ALTER FOREIGN TABLE ext_enc_alter OPTIONS (SET encoding 'UTF8'); +SELECT pg_encoding_to_char(encoding) FROM pg_exttable +WHERE reloid = 'ext_enc_alter'::regclass; + +DROP FOREIGN TABLE ext_enc_num; +DROP FOREIGN TABLE ext_enc_utf8; +DROP FOREIGN TABLE ext_enc_utf8_dash; +DROP FOREIGN TABLE ext_enc_gbk; +DROP FOREIGN TABLE ext_enc_alter; diff --git a/gpcontrib/gp_exttable_fdw/option.c b/gpcontrib/gp_exttable_fdw/option.c index 04cccfe0e47..59bd6b99014 100644 --- a/gpcontrib/gp_exttable_fdw/option.c +++ b/gpcontrib/gp_exttable_fdw/option.c @@ -135,11 +135,13 @@ gp_exttable_permission_check(PG_FUNCTION_ARGS) } else if(pg_strcasecmp(def->defname, "encoding") == 0) { - char *encoding = (char *) defGetString(def); - if (!PG_VALID_ENCODING(atoi(encoding))) - ereport(ERROR, - (errcode(ERRCODE_FDW_INVALID_ATTRIBUTE_VALUE), - errmsg("%s is not a valid encoding code", encoding))); + /* + * Accept either a symbolic encoding name (e.g. 'UTF8', 'GBK') + * or a numeric encoding ID. Reject anything else explicitly, + * rather than letting atoi() silently mistranslate non-numeric + * names to SQL_ASCII. + */ + (void) parse_fdw_encoding_option((char *) defGetString(def)); } } diff --git a/gpcontrib/gp_exttable_fdw/output/gp_exttable_fdw.source b/gpcontrib/gp_exttable_fdw/output/gp_exttable_fdw.source index a3191eb0853..8ba1be8ed6c 100644 --- a/gpcontrib/gp_exttable_fdw/output/gp_exttable_fdw.source +++ b/gpcontrib/gp_exttable_fdw/output/gp_exttable_fdw.source @@ -52,12 +52,26 @@ OPTIONS (format_type 'c', delimiter ',', location_uris 'file://@hostname@@abs_srcdir@/data/tableless.csv', reject_limit_type 'p', reject_limit '120'); ERROR: segment reject limit in PERCENT must be between 1 and 100 (got 120) (seg1 127.0.0.1:7003 pid=5173) --- Error, invalid encoding +-- Error, invalid encoding (negative numeric ID) CREATE FOREIGN TABLE tableless_ext_fdw(a int, b int) SERVER gp_exttable_server OPTIONS (format_type 'c', delimiter ',', encoding '-1', location_uris 'file://@hostname@@abs_srcdir@/data/tableless.csv'); -ERROR: -1 is not a valid encoding code (seg0 127.0.0.1:7002 pid=8289) +ERROR: "-1" is not a valid encoding name or code +-- Error, invalid encoding (unknown name) +CREATE FOREIGN TABLE tableless_ext_fdw(a int, b int) +SERVER gp_exttable_server +OPTIONS (format_type 'c', delimiter ',', encoding 'bogus', + location_uris 'file://@hostname@@abs_srcdir@/data/tableless.csv'); +ERROR: "bogus" is not a valid encoding name or code +-- Error, mixed numeric+letters must not be silently truncated to a +-- valid prefix (atoi('6abc') would return 6 = UTF8; strict parsing +-- in parse_fdw_encoding_option() rejects it). +CREATE FOREIGN TABLE tableless_ext_fdw(a int, b int) +SERVER gp_exttable_server +OPTIONS (format_type 'c', delimiter ',', encoding '6abc', + location_uris 'file://@hostname@@abs_srcdir@/data/tableless.csv'); +ERROR: "6abc" is not a valid encoding name or code -- OK, no execute_on | log_errors | encoding | is_writable option CREATE FOREIGN TABLE tableless_ext_fdw(a int, b int) SERVER gp_exttable_server @@ -96,3 +110,82 @@ SELECT * FROM ext_special_uri ORDER BY a; 3 | 3 (3 rows) +-- =================================================================== +-- Tests for issue #1726: FDW OPTIONS encoding accepts both numeric IDs +-- and symbolic names (UTF8, utf-8, GBK, ...). Names previously parsed +-- via atoi() and silently degraded to SQL_ASCII. +-- =================================================================== +-- Numeric form (baseline; worked before the fix as well). +CREATE FOREIGN TABLE ext_enc_num (a int) SERVER gp_exttable_server +OPTIONS (format_type 'c', delimiter ',', + location_uris 'file:///tmp/ext_enc_ignored.csv', + encoding '6'); +SELECT pg_encoding_to_char(encoding) FROM pg_exttable +WHERE reloid = 'ext_enc_num'::regclass; + pg_encoding_to_char +--------------------- + UTF8 +(1 row) + +-- Symbolic name 'UTF8' — used to be silently SQL_ASCII (the bug). +CREATE FOREIGN TABLE ext_enc_utf8 (a int) SERVER gp_exttable_server +OPTIONS (format_type 'c', delimiter ',', + location_uris 'file:///tmp/ext_enc_ignored.csv', + encoding 'UTF8'); +SELECT pg_encoding_to_char(encoding) FROM pg_exttable +WHERE reloid = 'ext_enc_utf8'::regclass; + pg_encoding_to_char +--------------------- + UTF8 +(1 row) + +-- Case + dash variant resolved by pg_char_to_encoding(). +CREATE FOREIGN TABLE ext_enc_utf8_dash (a int) SERVER gp_exttable_server +OPTIONS (format_type 'c', delimiter ',', + location_uris 'file:///tmp/ext_enc_ignored.csv', + encoding 'utf-8'); +SELECT pg_encoding_to_char(encoding) FROM pg_exttable +WHERE reloid = 'ext_enc_utf8_dash'::regclass; + pg_encoding_to_char +--------------------- + UTF8 +(1 row) + +-- Non-UTF8 symbolic name. +CREATE FOREIGN TABLE ext_enc_gbk (a int) SERVER gp_exttable_server +OPTIONS (format_type 'c', delimiter ',', + location_uris 'file:///tmp/ext_enc_ignored.csv', + encoding 'GBK'); +SELECT pg_encoding_to_char(encoding) FROM pg_exttable +WHERE reloid = 'ext_enc_gbk'::regclass; + pg_encoding_to_char +--------------------- + GBK +(1 row) + +-- ALTER FOREIGN TABLE ... OPTIONS (SET encoding 'UTF8') — same code +-- path, this proves the read-side resolution works after an ALTER too. +CREATE FOREIGN TABLE ext_enc_alter (a int) SERVER gp_exttable_server +OPTIONS (format_type 'c', delimiter ',', + location_uris 'file:///tmp/ext_enc_ignored.csv', + encoding '0'); +SELECT pg_encoding_to_char(encoding) FROM pg_exttable +WHERE reloid = 'ext_enc_alter'::regclass; + pg_encoding_to_char +--------------------- + SQL_ASCII +(1 row) + +ALTER FOREIGN TABLE ext_enc_alter OPTIONS (SET encoding 'UTF8'); +SELECT pg_encoding_to_char(encoding) FROM pg_exttable +WHERE reloid = 'ext_enc_alter'::regclass; + pg_encoding_to_char +--------------------- + UTF8 +(1 row) + +DROP FOREIGN TABLE ext_enc_num; +DROP FOREIGN TABLE ext_enc_utf8; +DROP FOREIGN TABLE ext_enc_utf8_dash; +DROP FOREIGN TABLE ext_enc_gbk; +DROP FOREIGN TABLE ext_enc_alter; diff --git a/gpcontrib/gp_internal_tools/Makefile b/gpcontrib/gp_internal_tools/Makefile index 643a13f0118..829645e1268 100755 --- a/gpcontrib/gp_internal_tools/Makefile +++ b/gpcontrib/gp_internal_tools/Makefile @@ -4,6 +4,8 @@ DATA = gp_internal_tools--1.0.0.sql PG_CPPFLAGS = -I$(libpq_srcdir) +REGRESS = gp_session_state_memory + ifdef USE_PGXS PG_CONFIG = pg_config PGXS := $(shell $(PG_CONFIG) --pgxs) diff --git a/gpcontrib/gp_stats_collector/.clang-format b/gpcontrib/gp_stats_collector/.clang-format new file mode 100644 index 00000000000..eb90ff33671 --- /dev/null +++ b/gpcontrib/gp_stats_collector/.clang-format @@ -0,0 +1,178 @@ +--- +Language: Cpp +AccessModifierOffset: -4 +AlignAfterOpenBracket: Align +AlignConsecutiveMacros: false +AlignConsecutiveAssignments: false +AlignConsecutiveBitFields: false +AlignConsecutiveDeclarations: false +AlignEscapedNewlines: Left +AlignOperands: Align +AlignTrailingComments: true +AllowAllArgumentsOnNextLine: true +AllowAllConstructorInitializersOnNextLine: true +AllowAllParametersOfDeclarationOnNextLine: false +AllowShortEnumsOnASingleLine: true +AllowShortBlocksOnASingleLine: Never +AllowShortCaseLabelsOnASingleLine: false +AllowShortFunctionsOnASingleLine: None +AllowShortLambdasOnASingleLine: All +AllowShortIfStatementsOnASingleLine: Never +AllowShortLoopsOnASingleLine: false +AlwaysBreakAfterDefinitionReturnType: All +AlwaysBreakAfterReturnType: AllDefinitions +AlwaysBreakBeforeMultilineStrings: true +AlwaysBreakTemplateDeclarations: Yes +BinPackArguments: true +BinPackParameters: true +BraceWrapping: + AfterCaseLabel: true + AfterClass: true + AfterControlStatement: Always + AfterEnum: true + AfterFunction: true + AfterNamespace: true + AfterObjCDeclaration: true + AfterStruct: true + AfterUnion: true + AfterExternBlock: false + BeforeCatch: true + BeforeElse: true + BeforeLambdaBody: false + BeforeWhile: false + IndentBraces: false + SplitEmptyFunction: true + SplitEmptyRecord: true + SplitEmptyNamespace: true +BreakBeforeBinaryOperators: None +BreakBeforeBraces: Custom +BreakBeforeInheritanceComma: false +BreakInheritanceList: BeforeColon +BreakBeforeTernaryOperators: true +BreakConstructorInitializersBeforeComma: false +BreakConstructorInitializers: BeforeColon +BreakAfterJavaFieldAnnotations: false +BreakStringLiterals: false +ColumnLimit: 80 +CommentPragmas: '^ IWYU pragma:' +CompactNamespaces: false +ConstructorInitializerAllOnOneLineOrOnePerLine: true +ConstructorInitializerIndentWidth: 4 +ContinuationIndentWidth: 4 +Cpp11BracedListStyle: true +DeriveLineEnding: true +DerivePointerAlignment: false +DisableFormat: false +ExperimentalAutoDetectBinPacking: false +FixNamespaceComments: true +ForEachMacros: + - foreach + - Q_FOREACH + - BOOST_FOREACH +IncludeBlocks: Preserve +IncludeCategories: + - Regex: '^<.*' + Priority: 1 + - Regex: '"protos/.*\.pb\.h"' + Priority: 2 + - Regex: '"postgres\.h"' + Priority: 3 + - Regex: '.*' + Priority: 4 +IncludeIsMainRegex: '([-_](test|unittest))?$' +IncludeIsMainSourceRegex: '' +IndentCaseLabels: true +IndentCaseBlocks: false +IndentGotoLabels: true +IndentPPDirectives: None +IndentExternBlock: AfterExternBlock +IndentWidth: 4 +IndentWrappedFunctionNames: false +InsertTrailingCommas: None +JavaScriptQuotes: Leave +JavaScriptWrapImports: true +KeepEmptyLinesAtTheStartOfBlocks: false +MacroBlockBegin: '' +MacroBlockEnd: '' +MaxEmptyLinesToKeep: 3 +NamespaceIndentation: None +ObjCBinPackProtocolList: Never +ObjCBlockIndentWidth: 2 +ObjCBreakBeforeNestedBlockParam: true +ObjCSpaceAfterProperty: false +ObjCSpaceBeforeProtocolList: true +PenaltyBreakAssignment: 2 +PenaltyBreakBeforeFirstCallParameter: 1 +PenaltyBreakComment: 300 +PenaltyBreakFirstLessLess: 120 +PenaltyBreakString: 1000 +PenaltyBreakTemplateDeclaration: 10 +PenaltyExcessCharacter: 1000000 +PenaltyReturnTypeOnItsOwnLine: 200 +PointerAlignment: Right +RawStringFormats: + - Language: Cpp + Delimiters: + - cc + - CC + - cpp + - Cpp + - CPP + - 'c++' + - 'C++' + CanonicalDelimiter: '' + BasedOnStyle: google + - Language: TextProto + Delimiters: + - pb + - PB + - proto + - PROTO + EnclosingFunctions: + - EqualsProto + - EquivToProto + - PARSE_PARTIAL_TEXT_PROTO + - PARSE_TEST_PROTO + - PARSE_TEXT_PROTO + - ParseTextOrDie + - ParseTextProtoOrDie + - ParseTestProto + - ParsePartialTestProto + CanonicalDelimiter: '' + BasedOnStyle: google +ReflowComments: false +SortIncludes: true +SortUsingDeclarations: true +SpaceAfterCStyleCast: true +SpaceAfterLogicalNot: false +SpaceAfterTemplateKeyword: true +SpaceBeforeAssignmentOperators: true +SpaceBeforeCpp11BracedList: false +SpaceBeforeCtorInitializerColon: true +SpaceBeforeInheritanceColon: true +SpaceBeforeParens: ControlStatements +SpaceBeforeRangeBasedForLoopColon: true +SpaceInEmptyBlock: false +SpaceInEmptyParentheses: false +SpacesBeforeTrailingComments: 2 +SpacesInAngles: false +SpacesInConditionalStatement: false +SpacesInContainerLiterals: true +SpacesInCStyleCastParentheses: false +SpacesInParentheses: false +SpacesInSquareBrackets: false +SpaceBeforeSquareBrackets: false +Standard: Auto +StatementMacros: + - Q_UNUSED + - QT_REQUIRE_VERSION +TabWidth: 4 +UseCRLF: false +UseTab: Always +WhitespaceSensitiveMacros: + - STRINGIZE + - PP_STRINGIZE + - BOOST_PP_STRINGIZE +... + + diff --git a/gpcontrib/gp_stats_collector/Makefile b/gpcontrib/gp_stats_collector/Makefile new file mode 100644 index 00000000000..b3228d2c45e --- /dev/null +++ b/gpcontrib/gp_stats_collector/Makefile @@ -0,0 +1,35 @@ +MODULE_big = gp_stats_collector +EXTENSION = gp_stats_collector +DATA = $(wildcard *--*.sql) +REGRESS = gpsc_cursors gpsc_dist gpsc_select gpsc_utf8_trim gpsc_utility gpsc_guc_cache gpsc_uds gpsc_locale + +PROTO_BASES = gpsc_plan gpsc_metrics gpsc_set_service +PROTO_OBJS = $(patsubst %,src/protos/%.pb.o,$(PROTO_BASES)) + +C_OBJS = $(patsubst %.c,%.o,$(wildcard src/*.c src/*/*.c)) +CPP_OBJS = $(patsubst %.cpp,%.o,$(wildcard src/*.cpp src/log/*.cpp src/memory/*.cpp)) +OBJS = $(C_OBJS) $(CPP_OBJS) $(PROTO_OBJS) + +PG_CXXFLAGS += -Werror -Wall -Wno-unused-but-set-variable -std=c++17 -Isrc/protos -Isrc -Iinclude -DGPBUILD +SHLIB_LINK += -lprotobuf -lstdc++ +EXTRA_CLEAN = src/protos + +ifdef USE_PGXS +PG_CONFIG = pg_config +PGXS := $(shell $(PG_CONFIG) --pgxs) +include $(PGXS) +else +subdir = gpcontrib/gp_stats_collector +top_builddir = ../.. +include $(top_builddir)/src/Makefile.global +include $(top_srcdir)/contrib/contrib-global.mk +endif + +src/protos/.done: $(wildcard protos/*.proto) + @mkdir -p src/protos + protoc -I /usr/include -I /usr/local/include -I . --cpp_out=src $^ + for f in src/protos/*.pb.cc; do mv "$$f" "$${f%.cc}.cpp"; done + touch $@ + +src/protos/%.pb.cpp src/protos/%.pb.h: src/protos/.done ; +$(CPP_OBJS): src/protos/.done diff --git a/gpcontrib/gp_stats_collector/README.md b/gpcontrib/gp_stats_collector/README.md new file mode 100644 index 00000000000..8c2d5c6868e --- /dev/null +++ b/gpcontrib/gp_stats_collector/README.md @@ -0,0 +1,47 @@ + + +## GP Stats Collector + +An extension for collecting query execution metrics and reporting them to an external agent. + +### Collected Statistics + +#### 1. Query Lifecycle +- **What:** Captures query text, normalized query text, timestamps (submit, start, end, done), and user/database info. +- **GUC:** `gpsc.enable`. + +#### 2. `EXPLAIN` data +- **What:** Triggers generation of the `EXPLAIN (TEXT, COSTS, VERBOSE)` and captures it. +- **GUC:** `gpsc.enable`. + +#### 3. `EXPLAIN ANALYZE` data +- **What:** Triggers generation of the `EXPLAIN (TEXT, ANALYZE, BUFFERS, TIMING, VERBOSE)` and captures it. +- **GUCs:** `gpsc.enable`, `gpsc.min_analyze_time`, `gpsc.enable_cdbstats`(ANALYZE), `gpsc.enable_analyze`(BUFFERS, TIMING, VERBOSE). + +#### 4. Other Metrics +- **What:** Captures Instrument, System, Network, Interconnect, Spill metrics. +- **GUC:** `gpsc.enable`. + +### General Configuration +- **Nested Queries:** When `gpsc.report_nested_queries` is `false`, only top-level queries are reported from the coordinator and segments, when `true`, both top-level and nested queries are reported from the coordinator, from segments collected as aggregates. +- **Data Destination:** All collected data is sent to a Unix Domain Socket. Configure the path with `gpsc.uds_path`. +- **User Filtering:** To exclude activity from certain roles, add them to the comma-separated list in `gpsc.ignored_users_list`. +- **Trimming plans:** Query texts and execution plans are trimmed based on `gpsc.max_text_size` and `gpsc.max_plan_size` (default: 1024KB). For now, it is not recommended to set these GUCs higher than 1024KB. +- **Analyze collection:** Analyze is sent if execution time exceeds `gpsc.min_analyze_time`, which is 10 seconds by default. Analyze is collected if `gpsc.enable_analyze` is true. diff --git a/gpcontrib/gp_stats_collector/expected/gpsc_cursors.out b/gpcontrib/gp_stats_collector/expected/gpsc_cursors.out new file mode 100644 index 00000000000..282d9ac49e1 --- /dev/null +++ b/gpcontrib/gp_stats_collector/expected/gpsc_cursors.out @@ -0,0 +1,163 @@ +CREATE EXTENSION gp_stats_collector; +CREATE FUNCTION gpsc_status_order(status text) +RETURNS integer +AS $$ +BEGIN + RETURN CASE status + WHEN 'QUERY_STATUS_SUBMIT' THEN 1 + WHEN 'QUERY_STATUS_START' THEN 2 + WHEN 'QUERY_STATUS_END' THEN 3 + WHEN 'QUERY_STATUS_DONE' THEN 4 + ELSE 999 + END; +END; +$$ LANGUAGE plpgsql IMMUTABLE; +SET gpsc.ignored_users_list TO ''; +SET gpsc.enable TO TRUE; +SET gpsc.enable_utility TO TRUE; +SET gpsc.report_nested_queries TO TRUE; +-- DECLARE +SET gpsc.logging_mode to 'TBL'; +BEGIN; +DECLARE cursor_stats_0 CURSOR FOR SELECT 0; +CLOSE cursor_stats_0; +COMMIT; +RESET gpsc.logging_mode; +SELECT segid, query_text, query_status FROM gpsc.log WHERE segid = -1 AND utility = true ORDER BY segid, ccnt, gpsc_status_order(query_status) ASC; + segid | query_text | query_status +-------+---------------------------------------------+--------------------- + -1 | BEGIN; | QUERY_STATUS_SUBMIT + -1 | BEGIN; | QUERY_STATUS_DONE + -1 | DECLARE cursor_stats_0 CURSOR FOR SELECT 0; | QUERY_STATUS_SUBMIT + -1 | DECLARE cursor_stats_0 CURSOR FOR SELECT 0; | QUERY_STATUS_DONE + -1 | CLOSE cursor_stats_0; | QUERY_STATUS_SUBMIT + -1 | CLOSE cursor_stats_0; | QUERY_STATUS_DONE + -1 | COMMIT; | QUERY_STATUS_SUBMIT + -1 | COMMIT; | QUERY_STATUS_DONE + -1 | RESET gpsc.logging_mode; | QUERY_STATUS_SUBMIT + -1 | RESET gpsc.logging_mode; | QUERY_STATUS_DONE +(10 rows) + +SELECT gpsc.truncate_log() IS NOT NULL AS t; + t +--- +(0 rows) + +-- DECLARE WITH HOLD +SET gpsc.logging_mode to 'TBL'; +BEGIN; +DECLARE cursor_stats_1 CURSOR WITH HOLD FOR SELECT 1; +CLOSE cursor_stats_1; +DECLARE cursor_stats_2 CURSOR WITH HOLD FOR SELECT 2; +CLOSE cursor_stats_2; +COMMIT; +RESET gpsc.logging_mode; +SELECT segid, query_text, query_status FROM gpsc.log WHERE segid = -1 AND utility = true ORDER BY segid, ccnt, gpsc_status_order(query_status) ASC; + segid | query_text | query_status +-------+-------------------------------------------------------+--------------------- + -1 | BEGIN; | QUERY_STATUS_SUBMIT + -1 | BEGIN; | QUERY_STATUS_DONE + -1 | DECLARE cursor_stats_1 CURSOR WITH HOLD FOR SELECT 1; | QUERY_STATUS_SUBMIT + -1 | DECLARE cursor_stats_1 CURSOR WITH HOLD FOR SELECT 1; | QUERY_STATUS_DONE + -1 | CLOSE cursor_stats_1; | QUERY_STATUS_SUBMIT + -1 | CLOSE cursor_stats_1; | QUERY_STATUS_DONE + -1 | DECLARE cursor_stats_2 CURSOR WITH HOLD FOR SELECT 2; | QUERY_STATUS_SUBMIT + -1 | DECLARE cursor_stats_2 CURSOR WITH HOLD FOR SELECT 2; | QUERY_STATUS_DONE + -1 | CLOSE cursor_stats_2; | QUERY_STATUS_SUBMIT + -1 | CLOSE cursor_stats_2; | QUERY_STATUS_DONE + -1 | COMMIT; | QUERY_STATUS_SUBMIT + -1 | COMMIT; | QUERY_STATUS_DONE + -1 | RESET gpsc.logging_mode; | QUERY_STATUS_SUBMIT + -1 | RESET gpsc.logging_mode; | QUERY_STATUS_DONE +(14 rows) + +SELECT gpsc.truncate_log() IS NOT NULL AS t; + t +--- +(0 rows) + +-- ROLLBACK +SET gpsc.logging_mode to 'TBL'; +BEGIN; +DECLARE cursor_stats_3 CURSOR FOR SELECT 1; +CLOSE cursor_stats_3; +DECLARE cursor_stats_4 CURSOR FOR SELECT 1; +ROLLBACK; +RESET gpsc.logging_mode; +SELECT segid, query_text, query_status FROM gpsc.log WHERE segid = -1 AND utility = true ORDER BY segid, ccnt, gpsc_status_order(query_status) ASC; + segid | query_text | query_status +-------+---------------------------------------------+--------------------- + -1 | BEGIN; | QUERY_STATUS_SUBMIT + -1 | BEGIN; | QUERY_STATUS_DONE + -1 | DECLARE cursor_stats_3 CURSOR FOR SELECT 1; | QUERY_STATUS_SUBMIT + -1 | DECLARE cursor_stats_3 CURSOR FOR SELECT 1; | QUERY_STATUS_DONE + -1 | CLOSE cursor_stats_3; | QUERY_STATUS_SUBMIT + -1 | CLOSE cursor_stats_3; | QUERY_STATUS_DONE + -1 | DECLARE cursor_stats_4 CURSOR FOR SELECT 1; | QUERY_STATUS_SUBMIT + -1 | DECLARE cursor_stats_4 CURSOR FOR SELECT 1; | QUERY_STATUS_DONE + -1 | ROLLBACK; | QUERY_STATUS_SUBMIT + -1 | ROLLBACK; | QUERY_STATUS_DONE + -1 | RESET gpsc.logging_mode; | QUERY_STATUS_SUBMIT + -1 | RESET gpsc.logging_mode; | QUERY_STATUS_DONE +(12 rows) + +SELECT gpsc.truncate_log() IS NOT NULL AS t; + t +--- +(0 rows) + +-- FETCH +SET gpsc.logging_mode to 'TBL'; +BEGIN; +DECLARE cursor_stats_5 CURSOR WITH HOLD FOR SELECT 2; +DECLARE cursor_stats_6 CURSOR WITH HOLD FOR SELECT 3; +FETCH 1 IN cursor_stats_5; + ?column? +---------- + 2 +(1 row) + +FETCH 1 IN cursor_stats_6; + ?column? +---------- + 3 +(1 row) + +CLOSE cursor_stats_5; +CLOSE cursor_stats_6; +COMMIT; +RESET gpsc.logging_mode; +SELECT segid, query_text, query_status FROM gpsc.log WHERE segid = -1 AND utility = true ORDER BY segid, ccnt, gpsc_status_order(query_status) ASC; + segid | query_text | query_status +-------+-------------------------------------------------------+--------------------- + -1 | BEGIN; | QUERY_STATUS_SUBMIT + -1 | BEGIN; | QUERY_STATUS_DONE + -1 | DECLARE cursor_stats_5 CURSOR WITH HOLD FOR SELECT 2; | QUERY_STATUS_SUBMIT + -1 | DECLARE cursor_stats_5 CURSOR WITH HOLD FOR SELECT 2; | QUERY_STATUS_DONE + -1 | DECLARE cursor_stats_6 CURSOR WITH HOLD FOR SELECT 3; | QUERY_STATUS_SUBMIT + -1 | DECLARE cursor_stats_6 CURSOR WITH HOLD FOR SELECT 3; | QUERY_STATUS_DONE + -1 | FETCH 1 IN cursor_stats_5; | QUERY_STATUS_SUBMIT + -1 | FETCH 1 IN cursor_stats_5; | QUERY_STATUS_DONE + -1 | FETCH 1 IN cursor_stats_6; | QUERY_STATUS_SUBMIT + -1 | FETCH 1 IN cursor_stats_6; | QUERY_STATUS_DONE + -1 | CLOSE cursor_stats_5; | QUERY_STATUS_SUBMIT + -1 | CLOSE cursor_stats_5; | QUERY_STATUS_DONE + -1 | CLOSE cursor_stats_6; | QUERY_STATUS_SUBMIT + -1 | CLOSE cursor_stats_6; | QUERY_STATUS_DONE + -1 | COMMIT; | QUERY_STATUS_SUBMIT + -1 | COMMIT; | QUERY_STATUS_DONE + -1 | RESET gpsc.logging_mode; | QUERY_STATUS_SUBMIT + -1 | RESET gpsc.logging_mode; | QUERY_STATUS_DONE +(18 rows) + +SELECT gpsc.truncate_log() IS NOT NULL AS t; + t +--- +(0 rows) + +DROP FUNCTION gpsc_status_order(text); +DROP EXTENSION gp_stats_collector; +RESET gpsc.enable; +RESET gpsc.report_nested_queries; +RESET gpsc.enable_utility; +RESET gpsc.ignored_users_list; diff --git a/gpcontrib/gp_stats_collector/expected/gpsc_dist.out b/gpcontrib/gp_stats_collector/expected/gpsc_dist.out new file mode 100644 index 00000000000..92e8678767b --- /dev/null +++ b/gpcontrib/gp_stats_collector/expected/gpsc_dist.out @@ -0,0 +1,175 @@ +CREATE EXTENSION gp_stats_collector; +CREATE OR REPLACE FUNCTION gpsc_status_order(status text) +RETURNS integer +AS $$ +BEGIN + RETURN CASE status + WHEN 'QUERY_STATUS_SUBMIT' THEN 1 + WHEN 'QUERY_STATUS_START' THEN 2 + WHEN 'QUERY_STATUS_END' THEN 3 + WHEN 'QUERY_STATUS_DONE' THEN 4 + ELSE 999 + END; +END; +$$ LANGUAGE plpgsql IMMUTABLE; +SET gpsc.ignored_users_list TO ''; +SET gpsc.enable TO TRUE; +SET gpsc.report_nested_queries TO TRUE; +SET gpsc.enable_utility TO FALSE; +-- Hash distributed table +CREATE TABLE test_hash_dist (id int) DISTRIBUTED BY (id); +INSERT INTO test_hash_dist SELECT 1; +SET gpsc.logging_mode to 'TBL'; +SET optimizer_enable_direct_dispatch TO TRUE; +-- Direct dispatch is used here, only one segment is scanned. +select * from test_hash_dist where id = 1; + id +---- + 1 +(1 row) + +RESET optimizer_enable_direct_dispatch; +RESET gpsc.logging_mode; +-- Should see 8 rows. +SELECT segid, query_text, query_status FROM gpsc.log ORDER BY segid, ccnt, gpsc_status_order(query_status) ASC; + segid | query_text | query_status +-------+--------------------------------------------+--------------------- + -1 | select * from test_hash_dist where id = 1; | QUERY_STATUS_SUBMIT + -1 | select * from test_hash_dist where id = 1; | QUERY_STATUS_START + -1 | select * from test_hash_dist where id = 1; | QUERY_STATUS_END + -1 | select * from test_hash_dist where id = 1; | QUERY_STATUS_DONE + 1 | | QUERY_STATUS_SUBMIT + 1 | | QUERY_STATUS_START + 1 | | QUERY_STATUS_END + 1 | | QUERY_STATUS_DONE +(8 rows) + +SELECT gpsc.truncate_log() IS NOT NULL AS t; + t +--- +(0 rows) + +SET gpsc.logging_mode to 'TBL'; +-- Scan all segments. +select * from test_hash_dist; + id +---- + 1 +(1 row) + +DROP TABLE test_hash_dist; +RESET gpsc.logging_mode; +SELECT segid, query_text, query_status FROM gpsc.log ORDER BY segid, ccnt, gpsc_status_order(query_status) ASC; + segid | query_text | query_status +-------+-------------------------------+--------------------- + -1 | select * from test_hash_dist; | QUERY_STATUS_SUBMIT + -1 | select * from test_hash_dist; | QUERY_STATUS_START + -1 | select * from test_hash_dist; | QUERY_STATUS_END + -1 | select * from test_hash_dist; | QUERY_STATUS_DONE + 1 | | QUERY_STATUS_SUBMIT + 1 | | QUERY_STATUS_START + 1 | | QUERY_STATUS_END + 1 | | QUERY_STATUS_DONE + 2 | | QUERY_STATUS_SUBMIT + 2 | | QUERY_STATUS_START + 2 | | QUERY_STATUS_END + 2 | | QUERY_STATUS_DONE + | | QUERY_STATUS_SUBMIT + | | QUERY_STATUS_START + | | QUERY_STATUS_END + | | QUERY_STATUS_DONE +(16 rows) + +SELECT gpsc.truncate_log() IS NOT NULL AS t; + t +--- +(0 rows) + +-- Replicated table +CREATE FUNCTION force_segments() RETURNS SETOF text AS $$ +BEGIN + RETURN NEXT 'seg'; +END; +$$ LANGUAGE plpgsql VOLATILE EXECUTE ON ALL SEGMENTS; +CREATE TABLE test_replicated (id int) DISTRIBUTED REPLICATED; +INSERT INTO test_replicated SELECT 1; +SET gpsc.logging_mode to 'TBL'; +SELECT COUNT(*) FROM test_replicated, force_segments(); + count +------- + 3 +(1 row) + +DROP TABLE test_replicated; +DROP FUNCTION force_segments(); +RESET gpsc.logging_mode; +SELECT segid, query_text, query_status FROM gpsc.log ORDER BY segid, ccnt, gpsc_status_order(query_status) ASC; + segid | query_text | query_status +-------+---------------------------------------------------------+--------------------- + -1 | SELECT COUNT(*) FROM test_replicated, force_segments(); | QUERY_STATUS_SUBMIT + -1 | SELECT COUNT(*) FROM test_replicated, force_segments(); | QUERY_STATUS_START + -1 | SELECT COUNT(*) FROM test_replicated, force_segments(); | QUERY_STATUS_END + -1 | SELECT COUNT(*) FROM test_replicated, force_segments(); | QUERY_STATUS_DONE + 1 | | QUERY_STATUS_SUBMIT + 1 | | QUERY_STATUS_START + 1 | | QUERY_STATUS_END + 1 | | QUERY_STATUS_DONE + 2 | | QUERY_STATUS_SUBMIT + 2 | | QUERY_STATUS_START + 2 | | QUERY_STATUS_END + 2 | | QUERY_STATUS_DONE + | | QUERY_STATUS_SUBMIT + | | QUERY_STATUS_START + | | QUERY_STATUS_END + | | QUERY_STATUS_DONE +(16 rows) + +SELECT gpsc.truncate_log() IS NOT NULL AS t; + t +--- +(0 rows) + +-- Partially distributed table (2 numsegments) +SET allow_system_table_mods = ON; +CREATE TABLE test_partial_dist (id int, data text) DISTRIBUTED BY (id); +UPDATE gp_distribution_policy SET numsegments = 2 WHERE localoid = 'test_partial_dist'::regclass; +INSERT INTO test_partial_dist SELECT * FROM generate_series(1, 100); +SET gpsc.logging_mode to 'TBL'; +SELECT COUNT(*) FROM test_partial_dist; + count +------- + 100 +(1 row) + +RESET gpsc.logging_mode; +DROP TABLE test_partial_dist; +RESET allow_system_table_mods; +-- Should see 12 rows. +SELECT query_text, query_status FROM gpsc.log ORDER BY segid, ccnt, gpsc_status_order(query_status) ASC; + query_text | query_status +-----------------------------------------+--------------------- + SELECT COUNT(*) FROM test_partial_dist; | QUERY_STATUS_SUBMIT + SELECT COUNT(*) FROM test_partial_dist; | QUERY_STATUS_START + SELECT COUNT(*) FROM test_partial_dist; | QUERY_STATUS_END + SELECT COUNT(*) FROM test_partial_dist; | QUERY_STATUS_DONE + | QUERY_STATUS_SUBMIT + | QUERY_STATUS_START + | QUERY_STATUS_END + | QUERY_STATUS_DONE + | QUERY_STATUS_SUBMIT + | QUERY_STATUS_START + | QUERY_STATUS_END + | QUERY_STATUS_DONE +(12 rows) + +SELECT gpsc.truncate_log() IS NOT NULL AS t; + t +--- +(0 rows) + +DROP FUNCTION gpsc_status_order(text); +DROP EXTENSION gp_stats_collector; +RESET gpsc.enable; +RESET gpsc.report_nested_queries; +RESET gpsc.enable_utility; +RESET gpsc.ignored_users_list; diff --git a/gpcontrib/gp_stats_collector/expected/gpsc_guc_cache.out b/gpcontrib/gp_stats_collector/expected/gpsc_guc_cache.out new file mode 100644 index 00000000000..11a420839db --- /dev/null +++ b/gpcontrib/gp_stats_collector/expected/gpsc_guc_cache.out @@ -0,0 +1,57 @@ +-- +-- Test GUC caching for query lifecycle consistency. +-- +-- The extension logs SUBMIT and DONE events for each query. +-- GUC values that control logging (enable_utility, ignored_users_list, ...) +-- must be cached at SUBMIT time to ensure DONE uses the same filtering +-- criteria. Otherwise, a SET command that modifies these GUCs would +-- have its DONE event rejected, creating orphaned SUBMIT entries. +-- This is due to query being actually executed between SUBMIT and DONE. +-- start_ignore +CREATE EXTENSION IF NOT EXISTS gp_stats_collector; +SELECT gpsc.truncate_log(); +-- end_ignore +CREATE OR REPLACE FUNCTION print_last_query(query text) +RETURNS TABLE(query_status text) AS $$ + SELECT query_status + FROM gpsc.log + WHERE segid = -1 AND query_text = query + ORDER BY ccnt DESC +$$ LANGUAGE sql; +SET gpsc.ignored_users_list TO ''; +SET gpsc.enable TO TRUE; +SET gpsc.enable_utility TO TRUE; +SET gpsc.logging_mode TO 'TBL'; +-- SET below disables utility logging and DONE must still be logged. +SET gpsc.enable_utility TO FALSE; +SELECT * FROM print_last_query('SET gpsc.enable_utility TO FALSE;'); + query_status +--------------------- + QUERY_STATUS_SUBMIT + QUERY_STATUS_DONE +(2 rows) + +-- SELECT below adds current user to ignore list and DONE must still be logged. +-- start_ignore +SELECT set_config('gpsc.ignored_users_list', current_user, false); + set_config +------------ + gpadmin +(1 row) + +-- end_ignore +SELECT * FROM print_last_query('SELECT set_config(''gpsc.ignored_users_list'', current_user, false);'); + query_status +--------------------- + QUERY_STATUS_SUBMIT + QUERY_STATUS_START + QUERY_STATUS_END + QUERY_STATUS_DONE +(4 rows) + +DROP FUNCTION print_last_query(text); +DROP EXTENSION gp_stats_collector; +RESET gpsc.enable; +RESET gpsc.enable_utility; +RESET gpsc.ignored_users_list; +RESET gpsc.logging_mode; diff --git a/gpcontrib/gp_stats_collector/expected/gpsc_locale.out b/gpcontrib/gp_stats_collector/expected/gpsc_locale.out new file mode 100644 index 00000000000..a01fe0648b9 --- /dev/null +++ b/gpcontrib/gp_stats_collector/expected/gpsc_locale.out @@ -0,0 +1,23 @@ +-- The extension generates normalized query text and plan using jumbling functions. +-- Those functions may fail when translating to wide character if the current locale +-- cannot handle the character set. This test checks that even when those functions +-- fail, the plan is still generated and executed. This test is partially taken from +-- gp_locale. +-- start_ignore +DROP DATABASE IF EXISTS gpsc_test_locale; +-- end_ignore +CREATE DATABASE gpsc_test_locale WITH LC_COLLATE='C' LC_CTYPE='C' TEMPLATE=template0; +\c gpsc_test_locale +CREATE EXTENSION gp_stats_collector; +SET gpsc.ignored_users_list TO ''; +SET gpsc.enable_utility TO TRUE; +SET gpsc.enable TO TRUE; +CREATE TABLE gpsc_hi_안녕세계 (a int, 안녕세계1 text, 안녕세계2 text, 안녕세계3 text) DISTRIBUTED BY (a); +INSERT INTO gpsc_hi_안녕세계 VALUES(1, '안녕세계1 first', '안녕세2 first', '안녕세계3 first'); +-- Should not see error here +UPDATE gpsc_hi_안녕세계 SET 안녕세계1='안녕세계1 first UPDATE' WHERE 안녕세계1='안녕세계1 first'; +RESET gpsc.enable; +RESET gpsc.enable_utility; +RESET gpsc.ignored_users_list; +DROP TABLE gpsc_hi_안녕세계; +DROP EXTENSION gp_stats_collector; diff --git a/gpcontrib/gp_stats_collector/expected/gpsc_select.out b/gpcontrib/gp_stats_collector/expected/gpsc_select.out new file mode 100644 index 00000000000..3008c8f6d55 --- /dev/null +++ b/gpcontrib/gp_stats_collector/expected/gpsc_select.out @@ -0,0 +1,136 @@ +CREATE EXTENSION gp_stats_collector; +CREATE OR REPLACE FUNCTION gpsc_status_order(status text) +RETURNS integer +AS $$ +BEGIN + RETURN CASE status + WHEN 'QUERY_STATUS_SUBMIT' THEN 1 + WHEN 'QUERY_STATUS_START' THEN 2 + WHEN 'QUERY_STATUS_END' THEN 3 + WHEN 'QUERY_STATUS_DONE' THEN 4 + ELSE 999 + END; +END; +$$ LANGUAGE plpgsql IMMUTABLE; +SET gpsc.ignored_users_list TO ''; +SET gpsc.enable TO TRUE; +SET gpsc.report_nested_queries TO TRUE; +SET gpsc.enable_utility TO FALSE; +-- Basic SELECT tests +SET gpsc.logging_mode to 'TBL'; +SELECT 1; + ?column? +---------- + 1 +(1 row) + +SELECT COUNT(*) FROM generate_series(1,10); + count +------- + 10 +(1 row) + +RESET gpsc.logging_mode; +SELECT segid, query_text, query_status FROM gpsc.log ORDER BY segid, ccnt, gpsc_status_order(query_status) ASC; + segid | query_text | query_status +-------+---------------------------------------------+--------------------- + -1 | SELECT 1; | QUERY_STATUS_SUBMIT + -1 | SELECT 1; | QUERY_STATUS_START + -1 | SELECT 1; | QUERY_STATUS_END + -1 | SELECT 1; | QUERY_STATUS_DONE + -1 | SELECT COUNT(*) FROM generate_series(1,10); | QUERY_STATUS_SUBMIT + -1 | SELECT COUNT(*) FROM generate_series(1,10); | QUERY_STATUS_START + -1 | SELECT COUNT(*) FROM generate_series(1,10); | QUERY_STATUS_END + -1 | SELECT COUNT(*) FROM generate_series(1,10); | QUERY_STATUS_DONE +(8 rows) + +SELECT gpsc.truncate_log() IS NOT NULL AS t; + t +--- +(0 rows) + +-- Transaction test +SET gpsc.logging_mode to 'TBL'; +BEGIN; +SELECT 1; + ?column? +---------- + 1 +(1 row) + +COMMIT; +RESET gpsc.logging_mode; +SELECT segid, query_text, query_status FROM gpsc.log ORDER BY segid, ccnt, gpsc_status_order(query_status) ASC; + segid | query_text | query_status +-------+------------+--------------------- + -1 | SELECT 1; | QUERY_STATUS_SUBMIT + -1 | SELECT 1; | QUERY_STATUS_START + -1 | SELECT 1; | QUERY_STATUS_END + -1 | SELECT 1; | QUERY_STATUS_DONE +(4 rows) + +SELECT gpsc.truncate_log() IS NOT NULL AS t; + t +--- +(0 rows) + +-- CTE test +SET gpsc.logging_mode to 'TBL'; +WITH t AS (VALUES (1), (2)) +SELECT * FROM t; + column1 +--------- + 1 + 2 +(2 rows) + +RESET gpsc.logging_mode; +SELECT segid, query_text, query_status FROM gpsc.log ORDER BY segid, ccnt, gpsc_status_order(query_status) ASC; + segid | query_text | query_status +-------+-----------------------------+--------------------- + -1 | WITH t AS (VALUES (1), (2))+| QUERY_STATUS_SUBMIT + | SELECT * FROM t; | + -1 | WITH t AS (VALUES (1), (2))+| QUERY_STATUS_START + | SELECT * FROM t; | + -1 | WITH t AS (VALUES (1), (2))+| QUERY_STATUS_END + | SELECT * FROM t; | + -1 | WITH t AS (VALUES (1), (2))+| QUERY_STATUS_DONE + | SELECT * FROM t; | +(4 rows) + +SELECT gpsc.truncate_log() IS NOT NULL AS t; + t +--- +(0 rows) + +-- Prepared statement test +SET gpsc.logging_mode to 'TBL'; +PREPARE test_stmt AS SELECT 1; +EXECUTE test_stmt; + ?column? +---------- + 1 +(1 row) + +DEALLOCATE test_stmt; +RESET gpsc.logging_mode; +SELECT segid, query_text, query_status FROM gpsc.log ORDER BY segid, ccnt, gpsc_status_order(query_status) ASC; + segid | query_text | query_status +-------+--------------------------------+--------------------- + -1 | PREPARE test_stmt AS SELECT 1; | QUERY_STATUS_SUBMIT + -1 | PREPARE test_stmt AS SELECT 1; | QUERY_STATUS_START + -1 | PREPARE test_stmt AS SELECT 1; | QUERY_STATUS_END + -1 | PREPARE test_stmt AS SELECT 1; | QUERY_STATUS_DONE +(4 rows) + +SELECT gpsc.truncate_log() IS NOT NULL AS t; + t +--- +(0 rows) + +DROP FUNCTION gpsc_status_order(text); +DROP EXTENSION gp_stats_collector; +RESET gpsc.enable; +RESET gpsc.report_nested_queries; +RESET gpsc.enable_utility; +RESET gpsc.ignored_users_list; diff --git a/gpcontrib/gp_stats_collector/expected/gpsc_uds.out b/gpcontrib/gp_stats_collector/expected/gpsc_uds.out new file mode 100644 index 00000000000..e8bca79e669 --- /dev/null +++ b/gpcontrib/gp_stats_collector/expected/gpsc_uds.out @@ -0,0 +1,42 @@ +-- Test UDS socket +-- start_ignore +CREATE EXTENSION IF NOT EXISTS gp_stats_collector; +-- end_ignore +\set UDS_PATH '/tmp/gpsc_test.sock' +-- Configure extension to send via UDS +SET gpsc.uds_path TO :'UDS_PATH'; +SET gpsc.ignored_users_list TO ''; +SET gpsc.enable TO TRUE; +SET gpsc.logging_mode TO 'UDS'; +-- Start receiver +SELECT gpsc.__test_uds_start_server(:'UDS_PATH'); + __test_uds_start_server +------------------------- +(0 rows) + +-- Send +SELECT 1; + ?column? +---------- + 1 +(1 row) + +-- Receive +SELECT gpsc.__test_uds_receive() > 0 as received; + received +---------- + t +(1 row) + +-- Stop receiver +SELECT gpsc.__test_uds_stop_server(); + __test_uds_stop_server +------------------------ +(0 rows) + +-- Cleanup +DROP EXTENSION gp_stats_collector; +RESET gpsc.uds_path; +RESET gpsc.ignored_users_list; +RESET gpsc.enable; +RESET gpsc.logging_mode; diff --git a/gpcontrib/gp_stats_collector/expected/gpsc_utf8_trim.out b/gpcontrib/gp_stats_collector/expected/gpsc_utf8_trim.out new file mode 100644 index 00000000000..db3949f3152 --- /dev/null +++ b/gpcontrib/gp_stats_collector/expected/gpsc_utf8_trim.out @@ -0,0 +1,68 @@ +CREATE EXTENSION IF NOT EXISTS gp_stats_collector; +CREATE OR REPLACE FUNCTION get_marked_query(marker TEXT) +RETURNS TEXT AS $$ + SELECT query_text + FROM gpsc.log + WHERE query_text LIKE '%' || marker || '%' + ORDER BY datetime DESC + LIMIT 1 +$$ LANGUAGE sql VOLATILE; +SET gpsc.ignored_users_list TO ''; +SET gpsc.enable TO TRUE; +-- Test 1: 1 byte chars +SET gpsc.max_text_size to 19; +SET gpsc.logging_mode to 'TBL'; +SELECT /*test1*/ 'HelloWorld'; + ?column? +------------ + HelloWorld +(1 row) + +RESET gpsc.logging_mode; +SELECT octet_length(get_marked_query('test1')) = 19 AS correct_length; + correct_length +---------------- + t +(1 row) + +-- Test 2: 2 byte chars +SET gpsc.max_text_size to 19; +SET gpsc.logging_mode to 'TBL'; +SELECT /*test2*/ 'РУССКИЙЯЗЫК'; + ?column? +------------- + РУССКИЙЯЗЫК +(1 row) + +RESET gpsc.logging_mode; +-- Character 'Р' has two bytes and cut in the middle => not included. +SELECT octet_length(get_marked_query('test2')) = 18 AS correct_length; + correct_length +---------------- + t +(1 row) + +-- Test 3: 4 byte chars +SET gpsc.max_text_size to 21; +SET gpsc.logging_mode to 'TBL'; +SELECT /*test3*/ '😀'; + ?column? +---------- + 😀 +(1 row) + +RESET gpsc.logging_mode; +-- Emoji has 4 bytes and cut before the last byte => not included. +SELECT octet_length(get_marked_query('test3')) = 18 AS correct_length; + correct_length +---------------- + t +(1 row) + +-- Cleanup +DROP FUNCTION get_marked_query(TEXT); +RESET gpsc.max_text_size; +RESET gpsc.logging_mode; +RESET gpsc.enable; +RESET gpsc.ignored_users_list; +DROP EXTENSION gp_stats_collector; diff --git a/gpcontrib/gp_stats_collector/expected/gpsc_utility.out b/gpcontrib/gp_stats_collector/expected/gpsc_utility.out new file mode 100644 index 00000000000..e8e28614370 --- /dev/null +++ b/gpcontrib/gp_stats_collector/expected/gpsc_utility.out @@ -0,0 +1,248 @@ +CREATE EXTENSION gp_stats_collector; +CREATE OR REPLACE FUNCTION gpsc_status_order(status text) +RETURNS integer +AS $$ +BEGIN + RETURN CASE status + WHEN 'QUERY_STATUS_SUBMIT' THEN 1 + WHEN 'QUERY_STATUS_START' THEN 2 + WHEN 'QUERY_STATUS_END' THEN 3 + WHEN 'QUERY_STATUS_DONE' THEN 4 + ELSE 999 + END; +END; +$$ LANGUAGE plpgsql IMMUTABLE; +SET gpsc.ignored_users_list TO ''; +SET gpsc.enable TO TRUE; +SET gpsc.enable_utility TO TRUE; +SET gpsc.report_nested_queries TO TRUE; +SET gpsc.logging_mode to 'TBL'; +CREATE TABLE test_table (a int, b text); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. +CREATE INDEX test_idx ON test_table(a); +ALTER TABLE test_table ADD COLUMN c int DEFAULT 1; +DROP TABLE test_table; +RESET gpsc.logging_mode; +SELECT segid, query_text, query_status FROM gpsc.log WHERE segid = -1 AND utility = true ORDER BY segid, ccnt, gpsc_status_order(query_status) ASC; + segid | query_text | query_status +-------+----------------------------------------------------+--------------------- + -1 | CREATE TABLE test_table (a int, b text); | QUERY_STATUS_SUBMIT + -1 | CREATE TABLE test_table (a int, b text); | QUERY_STATUS_DONE + -1 | CREATE INDEX test_idx ON test_table(a); | QUERY_STATUS_SUBMIT + -1 | CREATE INDEX test_idx ON test_table(a); | QUERY_STATUS_DONE + -1 | ALTER TABLE test_table ADD COLUMN c int DEFAULT 1; | QUERY_STATUS_SUBMIT + -1 | ALTER TABLE test_table ADD COLUMN c int DEFAULT 1; | QUERY_STATUS_DONE + -1 | DROP TABLE test_table; | QUERY_STATUS_SUBMIT + -1 | DROP TABLE test_table; | QUERY_STATUS_DONE + -1 | RESET gpsc.logging_mode; | QUERY_STATUS_SUBMIT + -1 | RESET gpsc.logging_mode; | QUERY_STATUS_DONE +(10 rows) + +SELECT gpsc.truncate_log() IS NOT NULL AS t; + t +--- +(0 rows) + +-- Partitioning +SET gpsc.logging_mode to 'TBL'; +CREATE TABLE pt_test (a int, b int) +DISTRIBUTED BY (a) +PARTITION BY RANGE (a) +(START (0) END (100) EVERY (50)); +DROP TABLE pt_test; +RESET gpsc.logging_mode; +SELECT segid, query_text, query_status FROM gpsc.log WHERE segid = -1 AND utility = true ORDER BY segid, ccnt, gpsc_status_order(query_status) ASC; + segid | query_text | query_status +-------+-------------------------------------+--------------------- + -1 | CREATE TABLE pt_test (a int, b int)+| QUERY_STATUS_SUBMIT + | DISTRIBUTED BY (a) +| + | PARTITION BY RANGE (a) +| + | (START (0) END (100) EVERY (50)); | + -1 | CREATE TABLE pt_test (a int, b int)+| QUERY_STATUS_DONE + | DISTRIBUTED BY (a) +| + | PARTITION BY RANGE (a) +| + | (START (0) END (100) EVERY (50)); | + -1 | DROP TABLE pt_test; | QUERY_STATUS_SUBMIT + -1 | DROP TABLE pt_test; | QUERY_STATUS_DONE + -1 | RESET gpsc.logging_mode; | QUERY_STATUS_SUBMIT + -1 | RESET gpsc.logging_mode; | QUERY_STATUS_DONE +(6 rows) + +SELECT gpsc.truncate_log() IS NOT NULL AS t; + t +--- +(0 rows) + +-- Views and Functions +SET gpsc.logging_mode to 'TBL'; +CREATE VIEW test_view AS SELECT 1 AS a; +CREATE FUNCTION test_func(i int) RETURNS int AS $$ SELECT $1 + 1; $$ LANGUAGE SQL; +DROP VIEW test_view; +DROP FUNCTION test_func(int); +RESET gpsc.logging_mode; +SELECT segid, query_text, query_status FROM gpsc.log WHERE segid = -1 AND utility = true ORDER BY segid, ccnt, gpsc_status_order(query_status) ASC; + segid | query_text | query_status +-------+------------------------------------------------------------------------------------+--------------------- + -1 | CREATE VIEW test_view AS SELECT 1 AS a; | QUERY_STATUS_SUBMIT + -1 | CREATE VIEW test_view AS SELECT 1 AS a; | QUERY_STATUS_DONE + -1 | CREATE FUNCTION test_func(i int) RETURNS int AS $$ SELECT $1 + 1; $$ LANGUAGE SQL; | QUERY_STATUS_SUBMIT + -1 | CREATE FUNCTION test_func(i int) RETURNS int AS $$ SELECT $1 + 1; $$ LANGUAGE SQL; | QUERY_STATUS_DONE + -1 | DROP VIEW test_view; | QUERY_STATUS_SUBMIT + -1 | DROP VIEW test_view; | QUERY_STATUS_DONE + -1 | DROP FUNCTION test_func(int); | QUERY_STATUS_SUBMIT + -1 | DROP FUNCTION test_func(int); | QUERY_STATUS_DONE + -1 | RESET gpsc.logging_mode; | QUERY_STATUS_SUBMIT + -1 | RESET gpsc.logging_mode; | QUERY_STATUS_DONE +(10 rows) + +SELECT gpsc.truncate_log() IS NOT NULL AS t; + t +--- +(0 rows) + +-- Transaction Operations +SET gpsc.logging_mode to 'TBL'; +BEGIN; +SAVEPOINT sp1; +ROLLBACK TO sp1; +COMMIT; +BEGIN; +SAVEPOINT sp2; +ABORT; +BEGIN; +ROLLBACK; +RESET gpsc.logging_mode; +SELECT segid, query_text, query_status FROM gpsc.log WHERE segid = -1 AND utility = true ORDER BY segid, ccnt, gpsc_status_order(query_status) ASC; + segid | query_text | query_status +-------+--------------------------+--------------------- + -1 | BEGIN; | QUERY_STATUS_SUBMIT + -1 | BEGIN; | QUERY_STATUS_DONE + -1 | SAVEPOINT sp1; | QUERY_STATUS_SUBMIT + -1 | ROLLBACK TO sp1; | QUERY_STATUS_SUBMIT + -1 | ROLLBACK TO sp1; | QUERY_STATUS_DONE + -1 | COMMIT; | QUERY_STATUS_SUBMIT + -1 | COMMIT; | QUERY_STATUS_DONE + -1 | BEGIN; | QUERY_STATUS_SUBMIT + -1 | BEGIN; | QUERY_STATUS_DONE + -1 | SAVEPOINT sp2; | QUERY_STATUS_SUBMIT + -1 | ABORT; | QUERY_STATUS_SUBMIT + -1 | ABORT; | QUERY_STATUS_DONE + -1 | BEGIN; | QUERY_STATUS_SUBMIT + -1 | BEGIN; | QUERY_STATUS_DONE + -1 | ROLLBACK; | QUERY_STATUS_SUBMIT + -1 | ROLLBACK; | QUERY_STATUS_DONE + -1 | RESET gpsc.logging_mode; | QUERY_STATUS_SUBMIT + -1 | RESET gpsc.logging_mode; | QUERY_STATUS_DONE +(18 rows) + +SELECT gpsc.truncate_log() IS NOT NULL AS t; + t +--- +(0 rows) + +-- DML Operations +SET gpsc.logging_mode to 'TBL'; +CREATE TABLE dml_test (a int, b text); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. +INSERT INTO dml_test VALUES (1, 'test'); +UPDATE dml_test SET b = 'updated' WHERE a = 1; +DELETE FROM dml_test WHERE a = 1; +DROP TABLE dml_test; +RESET gpsc.logging_mode; +SELECT segid, query_text, query_status FROM gpsc.log WHERE segid = -1 AND utility = true ORDER BY segid, ccnt, gpsc_status_order(query_status) ASC; + segid | query_text | query_status +-------+----------------------------------------+--------------------- + -1 | CREATE TABLE dml_test (a int, b text); | QUERY_STATUS_SUBMIT + -1 | CREATE TABLE dml_test (a int, b text); | QUERY_STATUS_DONE + -1 | DROP TABLE dml_test; | QUERY_STATUS_SUBMIT + -1 | DROP TABLE dml_test; | QUERY_STATUS_DONE + -1 | RESET gpsc.logging_mode; | QUERY_STATUS_SUBMIT + -1 | RESET gpsc.logging_mode; | QUERY_STATUS_DONE +(6 rows) + +SELECT gpsc.truncate_log() IS NOT NULL AS t; + t +--- +(0 rows) + +-- COPY Operations +SET gpsc.logging_mode to 'TBL'; +CREATE TABLE copy_test (a int); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. +COPY (SELECT 1) TO STDOUT; +1 +DROP TABLE copy_test; +RESET gpsc.logging_mode; +SELECT segid, query_text, query_status FROM gpsc.log WHERE segid = -1 AND utility = true ORDER BY segid, ccnt, gpsc_status_order(query_status) ASC; + segid | query_text | query_status +-------+---------------------------------+--------------------- + -1 | CREATE TABLE copy_test (a int); | QUERY_STATUS_SUBMIT + -1 | CREATE TABLE copy_test (a int); | QUERY_STATUS_DONE + -1 | COPY (SELECT 1) TO STDOUT; | QUERY_STATUS_SUBMIT + -1 | COPY (SELECT 1) TO STDOUT; | QUERY_STATUS_DONE + -1 | DROP TABLE copy_test; | QUERY_STATUS_SUBMIT + -1 | DROP TABLE copy_test; | QUERY_STATUS_DONE + -1 | RESET gpsc.logging_mode; | QUERY_STATUS_SUBMIT + -1 | RESET gpsc.logging_mode; | QUERY_STATUS_DONE +(8 rows) + +SELECT gpsc.truncate_log() IS NOT NULL AS t; + t +--- +(0 rows) + +-- Prepared Statements and error during execute +SET gpsc.logging_mode to 'TBL'; +PREPARE test_prep(int) AS SELECT $1/0 AS value; +EXECUTE test_prep(0::int); +ERROR: division by zero +DEALLOCATE test_prep; +RESET gpsc.logging_mode; +SELECT segid, query_text, query_status FROM gpsc.log WHERE segid = -1 AND utility = true ORDER BY segid, ccnt, gpsc_status_order(query_status) ASC; + segid | query_text | query_status +-------+-------------------------------------------------+--------------------- + -1 | PREPARE test_prep(int) AS SELECT $1/0 AS value; | QUERY_STATUS_SUBMIT + -1 | PREPARE test_prep(int) AS SELECT $1/0 AS value; | QUERY_STATUS_DONE + -1 | EXECUTE test_prep(0::int); | QUERY_STATUS_SUBMIT + -1 | EXECUTE test_prep(0::int); | QUERY_STATUS_ERROR + -1 | DEALLOCATE test_prep; | QUERY_STATUS_SUBMIT + -1 | DEALLOCATE test_prep; | QUERY_STATUS_DONE + -1 | RESET gpsc.logging_mode; | QUERY_STATUS_SUBMIT + -1 | RESET gpsc.logging_mode; | QUERY_STATUS_DONE +(8 rows) + +SELECT gpsc.truncate_log() IS NOT NULL AS t; + t +--- +(0 rows) + +-- GUC Settings +SET gpsc.logging_mode to 'TBL'; +SET gpsc.report_nested_queries TO FALSE; +RESET gpsc.report_nested_queries; +RESET gpsc.logging_mode; +SELECT segid, query_text, query_status FROM gpsc.log WHERE segid = -1 AND utility = true ORDER BY segid, ccnt, gpsc_status_order(query_status) ASC; + segid | query_text | query_status +-------+------------------------------------------+--------------------- + -1 | SET gpsc.report_nested_queries TO FALSE; | QUERY_STATUS_SUBMIT + -1 | SET gpsc.report_nested_queries TO FALSE; | QUERY_STATUS_DONE + -1 | RESET gpsc.report_nested_queries; | QUERY_STATUS_SUBMIT + -1 | RESET gpsc.report_nested_queries; | QUERY_STATUS_DONE + -1 | RESET gpsc.logging_mode; | QUERY_STATUS_SUBMIT + -1 | RESET gpsc.logging_mode; | QUERY_STATUS_DONE +(6 rows) + +SELECT gpsc.truncate_log() IS NOT NULL AS t; + t +--- +(0 rows) + +DROP FUNCTION gpsc_status_order(text); +DROP EXTENSION gp_stats_collector; +RESET gpsc.enable; +RESET gpsc.report_nested_queries; +RESET gpsc.enable_utility; +RESET gpsc.ignored_users_list; diff --git a/gpcontrib/gp_stats_collector/gp_stats_collector--1.0--1.1.sql b/gpcontrib/gp_stats_collector/gp_stats_collector--1.0--1.1.sql new file mode 100644 index 00000000000..398f03b4fa9 --- /dev/null +++ b/gpcontrib/gp_stats_collector/gp_stats_collector--1.0--1.1.sql @@ -0,0 +1,113 @@ +/* gp_stats_collector--1.0--1.1.sql */ + +-- complain if script is sourced in psql, rather than via ALTER EXTENSION +\echo Use "ALTER EXTENSION gp_stats_collector UPDATE TO '1.1'" to load this file. \quit + +CREATE SCHEMA gpsc; + +-- Unlink existing objects from extension. +ALTER EXTENSION gp_stats_collector DROP VIEW gpsc_stat_messages; +ALTER EXTENSION gp_stats_collector DROP FUNCTION gpsc_stat_messages_reset(); +ALTER EXTENSION gp_stats_collector DROP FUNCTION __gpsc_stat_messages_f_on_segments(); +ALTER EXTENSION gp_stats_collector DROP FUNCTION __gpsc_stat_messages_f_on_master(); +ALTER EXTENSION gp_stats_collector DROP FUNCTION __gpsc_stat_messages_reset_f_on_segments(); +ALTER EXTENSION gp_stats_collector DROP FUNCTION __gpsc_stat_messages_reset_f_on_master(); + +-- Now drop the objects. +DROP VIEW gpsc_stat_messages; +DROP FUNCTION gpsc_stat_messages_reset(); +DROP FUNCTION __gpsc_stat_messages_f_on_segments(); +DROP FUNCTION __gpsc_stat_messages_f_on_master(); +DROP FUNCTION __gpsc_stat_messages_reset_f_on_segments(); +DROP FUNCTION __gpsc_stat_messages_reset_f_on_master(); + +-- Recreate functions and view in new schema. +CREATE FUNCTION gpsc.__stat_messages_reset_f_on_master() +RETURNS SETOF void +AS 'MODULE_PATHNAME', 'gpsc_stat_messages_reset' +LANGUAGE C EXECUTE ON COORDINATOR; + +CREATE FUNCTION gpsc.__stat_messages_reset_f_on_segments() +RETURNS SETOF void +AS 'MODULE_PATHNAME', 'gpsc_stat_messages_reset' +LANGUAGE C EXECUTE ON ALL SEGMENTS; + +CREATE FUNCTION gpsc.stat_messages_reset() +RETURNS SETOF void +AS +$$ + SELECT gpsc.__stat_messages_reset_f_on_master(); + SELECT gpsc.__stat_messages_reset_f_on_segments(); +$$ +LANGUAGE SQL EXECUTE ON COORDINATOR; + +CREATE FUNCTION gpsc.__stat_messages_f_on_master() +RETURNS SETOF record +AS 'MODULE_PATHNAME', 'gpsc_stat_messages' +LANGUAGE C STRICT VOLATILE EXECUTE ON COORDINATOR; + +CREATE FUNCTION gpsc.__stat_messages_f_on_segments() +RETURNS SETOF record +AS 'MODULE_PATHNAME', 'gpsc_stat_messages' +LANGUAGE C STRICT VOLATILE EXECUTE ON ALL SEGMENTS; + +CREATE VIEW gpsc.stat_messages AS + SELECT C.* + FROM gpsc.__stat_messages_f_on_master() as C ( + segid int, + total_messages bigint, + send_failures bigint, + connection_failures bigint, + other_errors bigint, + max_message_size int + ) + UNION ALL + SELECT C.* + FROM gpsc.__stat_messages_f_on_segments() as C ( + segid int, + total_messages bigint, + send_failures bigint, + connection_failures bigint, + other_errors bigint, + max_message_size int + ) +ORDER BY segid; + +-- Create new objects. +CREATE FUNCTION gpsc.__init_log_on_master() +RETURNS SETOF void +AS 'MODULE_PATHNAME', 'gpsc_init_log' +LANGUAGE C STRICT VOLATILE EXECUTE ON COORDINATOR; + +CREATE FUNCTION gpsc.__init_log_on_segments() +RETURNS SETOF void +AS 'MODULE_PATHNAME', 'gpsc_init_log' +LANGUAGE C STRICT VOLATILE EXECUTE ON ALL SEGMENTS; + +-- Creates log table inside gpsc schema. +SELECT gpsc.__init_log_on_master(); +SELECT gpsc.__init_log_on_segments(); + +CREATE VIEW gpsc.log AS + SELECT * FROM gpsc.__log -- master + UNION ALL + SELECT * FROM gp_dist_random('gpsc.__log') -- segments + ORDER BY tmid, ssid, ccnt; + +CREATE FUNCTION gpsc.__truncate_log_on_master() +RETURNS SETOF void +AS 'MODULE_PATHNAME', 'gpsc_truncate_log' +LANGUAGE C STRICT VOLATILE EXECUTE ON COORDINATOR; + +CREATE FUNCTION gpsc.__truncate_log_on_segments() +RETURNS SETOF void +AS 'MODULE_PATHNAME', 'gpsc_truncate_log' +LANGUAGE C STRICT VOLATILE EXECUTE ON ALL SEGMENTS; + +CREATE FUNCTION gpsc.truncate_log() +RETURNS SETOF void AS $$ +BEGIN + PERFORM gpsc.__truncate_log_on_master(); + PERFORM gpsc.__truncate_log_on_segments(); +END; +$$ LANGUAGE plpgsql VOLATILE; diff --git a/gpcontrib/gp_stats_collector/gp_stats_collector--1.0.sql b/gpcontrib/gp_stats_collector/gp_stats_collector--1.0.sql new file mode 100644 index 00000000000..e4a50aa2133 --- /dev/null +++ b/gpcontrib/gp_stats_collector/gp_stats_collector--1.0.sql @@ -0,0 +1,55 @@ +/* gp_stats_collector--1.0.sql */ + +-- complain if script is sourced in psql, rather than via CREATE EXTENSION +\echo Use "CREATE EXTENSION gp_stats_collector" to load this file. \quit + +CREATE FUNCTION __gpsc_stat_messages_reset_f_on_master() +RETURNS SETOF void +AS 'MODULE_PATHNAME', 'gpsc_stat_messages_reset' +LANGUAGE C EXECUTE ON COORDINATOR; + +CREATE FUNCTION __gpsc_stat_messages_reset_f_on_segments() +RETURNS SETOF void +AS 'MODULE_PATHNAME', 'gpsc_stat_messages_reset' +LANGUAGE C EXECUTE ON ALL SEGMENTS; + +CREATE FUNCTION gpsc_stat_messages_reset() +RETURNS SETOF void +AS +$$ + SELECT __gpsc_stat_messages_reset_f_on_master(); + SELECT __gpsc_stat_messages_reset_f_on_segments(); +$$ +LANGUAGE SQL EXECUTE ON COORDINATOR; + +CREATE FUNCTION __gpsc_stat_messages_f_on_master() +RETURNS SETOF record +AS 'MODULE_PATHNAME', 'gpsc_stat_messages' +LANGUAGE C STRICT VOLATILE EXECUTE ON COORDINATOR; + +CREATE FUNCTION __gpsc_stat_messages_f_on_segments() +RETURNS SETOF record +AS 'MODULE_PATHNAME', 'gpsc_stat_messages' +LANGUAGE C STRICT VOLATILE EXECUTE ON ALL SEGMENTS; + +CREATE VIEW gpsc_stat_messages AS + SELECT C.* + FROM __gpsc_stat_messages_f_on_master() as C ( + segid int, + total_messages bigint, + send_failures bigint, + connection_failures bigint, + other_errors bigint, + max_message_size int + ) + UNION ALL + SELECT C.* + FROM __gpsc_stat_messages_f_on_segments() as C ( + segid int, + total_messages bigint, + send_failures bigint, + connection_failures bigint, + other_errors bigint, + max_message_size int + ) +ORDER BY segid; diff --git a/gpcontrib/gp_stats_collector/gp_stats_collector--1.1.sql b/gpcontrib/gp_stats_collector/gp_stats_collector--1.1.sql new file mode 100644 index 00000000000..3ebdad14b06 --- /dev/null +++ b/gpcontrib/gp_stats_collector/gp_stats_collector--1.1.sql @@ -0,0 +1,110 @@ +/* gp_stats_collector--1.1.sql */ + +-- complain if script is sourced in psql, rather than via CREATE EXTENSION +\echo Use "CREATE EXTENSION gp_stats_collector" to load this file. \quit + +CREATE SCHEMA gpsc; + +CREATE FUNCTION gpsc.__stat_messages_reset_f_on_master() +RETURNS SETOF void +AS 'MODULE_PATHNAME', 'gpsc_stat_messages_reset' +LANGUAGE C EXECUTE ON COORDINATOR; + +CREATE FUNCTION gpsc.__stat_messages_reset_f_on_segments() +RETURNS SETOF void +AS 'MODULE_PATHNAME', 'gpsc_stat_messages_reset' +LANGUAGE C EXECUTE ON ALL SEGMENTS; + +CREATE FUNCTION gpsc.stat_messages_reset() +RETURNS SETOF void +AS +$$ + SELECT gpsc.__stat_messages_reset_f_on_master(); + SELECT gpsc.__stat_messages_reset_f_on_segments(); +$$ +LANGUAGE SQL EXECUTE ON COORDINATOR; + +CREATE FUNCTION gpsc.__stat_messages_f_on_master() +RETURNS SETOF record +AS 'MODULE_PATHNAME', 'gpsc_stat_messages' +LANGUAGE C STRICT VOLATILE EXECUTE ON COORDINATOR; + +CREATE FUNCTION gpsc.__stat_messages_f_on_segments() +RETURNS SETOF record +AS 'MODULE_PATHNAME', 'gpsc_stat_messages' +LANGUAGE C STRICT VOLATILE EXECUTE ON ALL SEGMENTS; + +CREATE VIEW gpsc.stat_messages AS + SELECT C.* + FROM gpsc.__stat_messages_f_on_master() as C ( + segid int, + total_messages bigint, + send_failures bigint, + connection_failures bigint, + other_errors bigint, + max_message_size int + ) + UNION ALL + SELECT C.* + FROM gpsc.__stat_messages_f_on_segments() as C ( + segid int, + total_messages bigint, + send_failures bigint, + connection_failures bigint, + other_errors bigint, + max_message_size int + ) +ORDER BY segid; + +CREATE FUNCTION gpsc.__init_log_on_master() +RETURNS SETOF void +AS 'MODULE_PATHNAME', 'gpsc_init_log' +LANGUAGE C STRICT VOLATILE EXECUTE ON COORDINATOR; + +CREATE FUNCTION gpsc.__init_log_on_segments() +RETURNS SETOF void +AS 'MODULE_PATHNAME', 'gpsc_init_log' +LANGUAGE C STRICT VOLATILE EXECUTE ON ALL SEGMENTS; + +-- Creates log table inside gpsc schema. +SELECT gpsc.__init_log_on_master(); +SELECT gpsc.__init_log_on_segments(); + +CREATE VIEW gpsc.log AS + SELECT * FROM gpsc.__log -- master + UNION ALL + SELECT * FROM gp_dist_random('gpsc.__log') -- segments +ORDER BY tmid, ssid, ccnt; + +CREATE FUNCTION gpsc.__truncate_log_on_master() +RETURNS SETOF void +AS 'MODULE_PATHNAME', 'gpsc_truncate_log' +LANGUAGE C STRICT VOLATILE EXECUTE ON COORDINATOR; + +CREATE FUNCTION gpsc.__truncate_log_on_segments() +RETURNS SETOF void +AS 'MODULE_PATHNAME', 'gpsc_truncate_log' +LANGUAGE C STRICT VOLATILE EXECUTE ON ALL SEGMENTS; + +CREATE FUNCTION gpsc.truncate_log() +RETURNS SETOF void AS $$ +BEGIN + PERFORM gpsc.__truncate_log_on_master(); + PERFORM gpsc.__truncate_log_on_segments(); +END; +$$ LANGUAGE plpgsql VOLATILE; + +CREATE FUNCTION gpsc.__test_uds_start_server(path text) +RETURNS SETOF void +AS 'MODULE_PATHNAME', 'gpsc_test_uds_start_server' +LANGUAGE C STRICT EXECUTE ON COORDINATOR; + +CREATE FUNCTION gpsc.__test_uds_receive(timeout_ms int DEFAULT 2000) +RETURNS SETOF bigint +AS 'MODULE_PATHNAME', 'gpsc_test_uds_receive' +LANGUAGE C STRICT EXECUTE ON COORDINATOR; + +CREATE FUNCTION gpsc.__test_uds_stop_server() +RETURNS SETOF void +AS 'MODULE_PATHNAME', 'gpsc_test_uds_stop_server' +LANGUAGE C EXECUTE ON COORDINATOR; diff --git a/gpcontrib/gp_stats_collector/gp_stats_collector.control b/gpcontrib/gp_stats_collector/gp_stats_collector.control new file mode 100644 index 00000000000..4aea2bd49b8 --- /dev/null +++ b/gpcontrib/gp_stats_collector/gp_stats_collector.control @@ -0,0 +1,5 @@ +# gp_stats_collector extension +comment = 'Intercept query and plan execution hooks and report them to Cloudberry monitor agents' +default_version = '1.1' +module_pathname = '$libdir/gp_stats_collector' +superuser = true diff --git a/gpcontrib/gp_stats_collector/metric.md b/gpcontrib/gp_stats_collector/metric.md new file mode 100644 index 00000000000..6f168d8cd98 --- /dev/null +++ b/gpcontrib/gp_stats_collector/metric.md @@ -0,0 +1,145 @@ + + +## GP Stats Collector Metrics + +### States +A Postgres process goes through 4 executor functions to execute a query: +1) `ExecutorStart()` - resource allocation for the query. +2) `ExecutorRun()` - query execution. +3) `ExecutorFinish()` - cleanup. +4) `ExecutorEnd()` - cleanup. + +gp-stats-collector sends messages with 4 states, from _Dispatcher_ and/or _Execute_ processes: `submit`, `start`, `end`, `done`, in this order: +``` +submit -> ExecutorStart() -> start -> ExecutorRun() -> ExecutorFinish() -> end -> ExecutorEnd() -> done +``` + +### Key Points +- Some queries may skip the _end_ state, then the _end_ statistics is sent during _done_. +- If a query finishes with an error (`METRICS_QUERY_ERROR`), or is cancelled (`METRICS_QUERY_CANCELLED`), statistics is sent at _done_. +- Some statistics is calculated as the difference between the current global metric and the previous. The initial snapshot is taken at submit, and at _end_/_done_ the diff is calculated. +- Nested queries on _Dispatcher_ become top-level on _Execute_. +- Each process (_Dispatcher_/_Execute_) sends its own statistics + +### Notations +- **S** = Submit event. +- **T** = Start event. +- **E** = End event. +- **D** = Done event. +- **DIFF** = current_value - submit_value (submit event). +- **ABS** = Absolute value, or where diff is not applicable, the value taken. +- **Local*** - Statistics that starts counting from zero for each new query. A nested query is also considered new. +- **Node** - PG process, either a `Query Dispatcher` (on master) or an `Execute` (on segment). + +### Statistics Table + +| Proto Field | Type | When | DIFF/ABS | Local* | Scope | Dispatcher | Execute | Units | Notes | +| :--------------------------- | :----- | :------ | :------- | ------ | :------ | :--------: | :-----: | :------ | :-------------------------------------------------- | +| **SystemStat** | | | | | | | | | | +| `runningTimeSeconds` | double | E, D | DIFF | - | Node | + | + | seconds | Wall clock time | +| `userTimeSeconds` | double | E, D | DIFF | - | Node | + | + | seconds | /proc/pid/stat utime | +| `kernelTimeSeconds` | double | E, D | DIFF | - | Node | + | + | seconds | /proc/pid/stat stime | +| `vsize` | uint64 | E, D | ABS | - | Node | + | + | bytes | /proc/pid/stat vsize | +| `rss` | uint64 | E, D | ABS | - | Node | + | + | pages | /proc/pid/stat rss | +| `VmSizeKb` | uint64 | E, D | ABS | - | Node | + | + | KB | /proc/pid/status VmSize | +| `VmPeakKb` | uint64 | E, D | ABS | - | Node | + | + | KB | /proc/pid/status VmPeak | +| `rchar` | uint64 | E, D | DIFF | - | Node | + | + | bytes | /proc/pid/io rchar | +| `wchar` | uint64 | E, D | DIFF | - | Node | + | + | bytes | /proc/pid/io wchar | +| `syscr` | uint64 | E, D | DIFF | - | Node | + | + | count | /proc/pid/io syscr | +| `syscw` | uint64 | E, D | DIFF | - | Node | + | + | count | /proc/pid/io syscw | +| `read_bytes` | uint64 | E, D | DIFF | - | Node | + | + | bytes | /proc/pid/io read_bytes | +| `write_bytes` | uint64 | E, D | DIFF | - | Node | + | + | bytes | /proc/pid/io write_bytes | +| `cancelled_write_bytes` | uint64 | E, D | DIFF | - | Node | + | + | bytes | /proc/pid/io cancelled_write_bytes | +| **MetricInstrumentation** | | | | | | | | | | +| `ntuples` | uint64 | E, D | ABS | + | Node | + | + | tuples | Accumulated total tuples | +| `nloops` | uint64 | E, D | ABS | + | Node | + | + | count | Number of cycles | +| `tuplecount` | uint64 | E, D | ABS | + | Node | + | + | tuples | Accumulated tuples per cycle | +| `firsttuple` | double | E, D | ABS | + | Node | + | + | seconds | Time for first tuple of this cycle | +| `startup` | double | E, D | ABS | + | Node | + | + | seconds | Start time of current iteration | +| `total` | double | E, D | ABS | + | Node | + | + | seconds | Total time taken | +| `shared_blks_hit` | uint64 | E, D | ABS | + | Node | + | + | blocks | Shared buffer blocks found in cache | +| `shared_blks_read` | uint64 | E, D | ABS | + | Node | + | + | blocks | Shared buffer blocks read from disk | +| `shared_blks_dirtied` | uint64 | E, D | ABS | + | Node | + | + | blocks | Shared blocks dirtied | +| `shared_blks_written` | uint64 | E, D | ABS | + | Node | + | + | blocks | Dirty shared buffer blocks written to disk | +| `local_blks_hit` | uint64 | E, D | ABS | + | Node | + | + | blocks | Local buffer hits | +| `local_blks_read` | uint64 | E, D | ABS | + | Node | + | + | blocks | Disk blocks read | +| `local_blks_dirtied` | uint64 | E, D | ABS | + | Node | + | + | blocks | Local blocks dirtied | +| `local_blks_written` | uint64 | E, D | ABS | + | Node | + | + | blocks | Local blocks written to disk | +| `temp_blks_read` | uint64 | E, D | ABS | + | Node | + | + | blocks | Temp file blocks read | +| `temp_blks_written` | uint64 | E, D | ABS | + | Node | + | + | blocks | Temp file blocks written | +| `blk_read_time` | double | E, D | ABS | + | Node | + | + | seconds | Time reading data blocks | +| `blk_write_time` | double | E, D | ABS | + | Node | + | + | seconds | Time writing data blocks | +| `inherited_calls` | uint64 | E, D | ABS | - | Node | + | + | count | Nested query count (GPSC-specific) | +| `inherited_time` | double | E, D | ABS | - | Node | + | + | seconds | Nested query time (GPSC-specific) | +| **NetworkStat (sent)** | | | | | | | | | | +| `sent.total_bytes` | uint32 | D | ABS | - | Node | + | + | bytes | Bytes sent, including headers | +| `sent.tuple_bytes` | uint32 | D | ABS | - | Node | + | + | bytes | Bytes of pure tuple-data sent | +| `sent.chunks` | uint32 | D | ABS | - | Node | + | + | count | Tuple-chunks sent | +| **NetworkStat (received)** | | | | | | | | | | +| `received.total_bytes` | uint32 | D | ABS | - | Node | + | + | bytes | Bytes of pure tuple-data received | +| `received.tuple_bytes` | uint32 | D | ABS | - | Node | + | + | bytes | Bytes of pure tuple-data received | +| `received.chunks` | uint32 | D | ABS | - | Node | + | + | count | Tuple-chunks received | +| **InterconnectStat** | | | | | | | | | | +| `total_recv_queue_size` | uint64 | D | DIFF | - | Node | + | + | bytes | Receive queue size sum | +| `recv_queue_size_counting_t` | uint64 | D | DIFF | - | Node | + | + | count | Counting times when computing total_recv_queue_size | +| `total_capacity` | uint64 | D | DIFF | - | Node | + | + | bytes | the capacity sum for sent packets | +| `capacity_counting_time` | uint64 | D | DIFF | - | Node | + | + | count | counting times used to compute total_capacity | +| `total_buffers` | uint64 | D | DIFF | - | Node | + | + | count | Available buffers | +| `buffer_counting_time` | uint64 | D | DIFF | - | Node | + | + | count | counting times when compute total_buffers | +| `active_connections_num` | uint64 | D | DIFF | - | Node | + | + | count | Active connections | +| `retransmits` | int64 | D | DIFF | - | Node | + | + | count | Packet retransmits | +| `startup_cached_pkt_num` | int64 | D | DIFF | - | Node | + | + | count | Startup cached packets | +| `mismatch_num` | int64 | D | DIFF | - | Node | + | + | count | Mismatched packets received | +| `crc_errors` | int64 | D | DIFF | - | Node | + | + | count | CRC errors | +| `snd_pkt_num` | int64 | D | DIFF | - | Node | + | + | count | Packets sent | +| `recv_pkt_num` | int64 | D | DIFF | - | Node | + | + | count | Packets received | +| `disordered_pkt_num` | int64 | D | DIFF | - | Node | + | + | count | Out-of-order packets | +| `duplicated_pkt_num` | int64 | D | DIFF | - | Node | + | + | count | Duplicate packets | +| `recv_ack_num` | int64 | D | DIFF | - | Node | + | + | count | ACKs received | +| `status_query_msg_num` | int64 | D | DIFF | - | Node | + | + | count | Status query messages sent | +| **SpillInfo** | | | | | | | | | | +| `fileCount` | int32 | E, D | DIFF | - | Node | + | + | count | Spill (temp) files created | +| `totalBytes` | int64 | E, D | DIFF | - | Node | + | + | bytes | Spill bytes written | +| **QueryInfo** | | | | | | | | | | +| `generator` | enum | T, E, D | ABS | - | Cluster | + | - | enum | Planner/Optimizer | +| `query_id` | uint64 | T, E, D | ABS | - | Cluster | + | - | id | Query ID | +| `plan_id` | uint64 | T, E, D | ABS | - | Cluster | + | - | id | Hash of normalized plan | +| `query_text` | string | S | ABS | - | Cluster | + | - | text | Query text | +| `plan_text` | string | T | ABS | - | Cluster | + | - | text | EXPLAIN text | +| `template_query_text` | string | S | ABS | - | Cluster | + | - | text | Normalized query text | +| `template_plan_text` | string | T | ABS | - | Cluster | + | - | text | Normalized plan text | +| `userName` | string | All | ABS | - | Cluster | + | - | text | Session user | +| `databaseName` | string | All | ABS | - | Cluster | + | - | text | Database name | +| `rsgname` | string | All | ABS | - | Cluster | + | - | text | Resource group name | +| `analyze_text` | string | D | ABS | - | Cluster | + | - | text | EXPLAIN ANALYZE | +| **AdditionalQueryInfo** | | | | | | | | | | +| `nested_level` | int64 | All | ABS | - | Node | + | + | count | Current nesting level | +| `error_message` | string | D | ABS | - | Node | + | + | text | Error message | +| `slice_id` | int64 | All | ABS | - | Node | + | + | id | Slice ID | +| **QueryKey** | | | | | | | | | | +| `tmid` | int32 | All | ABS | - | Node | + | + | id | Transaction start time | +| `ssid` | int32 | All | ABS | - | Node | + | + | id | Session ID | +| `ccnt` | int32 | All | ABS | - | Node | + | + | count | Command counter | +| **SegmentKey** | | | | | | | | | | +| `dbid` | int32 | All | ABS | - | Node | + | + | id | Database ID | +| `segment_index` | int32 | All | ABS | - | Node | + | + | id | Segment index (-1=coordinator) | + +--- + diff --git a/gpcontrib/gp_stats_collector/protos/gpsc_metrics.proto b/gpcontrib/gp_stats_collector/protos/gpsc_metrics.proto new file mode 100644 index 00000000000..10991301557 --- /dev/null +++ b/gpcontrib/gp_stats_collector/protos/gpsc_metrics.proto @@ -0,0 +1,201 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +syntax = "proto3"; + +package gpsc; + +enum QueryStatus { + QUERY_STATUS_UNSPECIFIED = 0; + QUERY_STATUS_SUBMIT = 1; + QUERY_STATUS_START = 2; + QUERY_STATUS_DONE = 3; + QUERY_STATUS_QUERY_DONE = 4; + QUERY_STATUS_ERROR = 5; + QUERY_STATUS_CANCELLING = 6; + QUERY_STATUS_CANCELED = 7; + QUERY_STATUS_END = 8; +} + +enum PlanNodeStatus { + PLAN_NODE_STATUS_UNSPECIFIED = 0; + PLAN_NODE_STATUS_INITIALIZED = 1; + PLAN_NODE_STATUS_EXECUTING = 2; + PLAN_NODE_STATUS_FINISHED = 3; +} + +message QueryInfo { + PlanGenerator generator = 1; + uint64 query_id = 2; + uint64 plan_id = 3; + string query_text = 4; + string plan_text = 5; + string template_query_text = 6; + string template_plan_text = 7; + string userName = 8; + string databaseName = 9; + string rsgname = 10; + string analyze_text = 11; +} + +message AdditionalQueryInfo { + int64 nested_level = 1; + string error_message = 2; + int64 slice_id = 3; +} + +message AdditionalQueryStat { + string error_message = 1; + repeated int64 slices = 2; +} + +enum PlanGenerator +{ + PLAN_GENERATOR_UNSPECIFIED = 0; + PLAN_GENERATOR_PLANNER = 1; /* plan produced by the planner*/ + PLAN_GENERATOR_OPTIMIZER = 2; /* plan produced by the optimizer*/ +} + +message GPMetrics { + SystemStat systemStat = 1; + MetricInstrumentation instrumentation = 2; + SpillInfo spill = 3; +} + +message QueryKey { + int32 tmid = 1; /* A time identifier for a particular query. All records associated with the query will have the same tmid. */ + int32 ssid = 2; /* The session id as shown by gp_session_id. All records associated with the query will have the same ssid */ + int32 ccnt = 3; /* The command number within this session as shown by gp_command_count. All records associated with the query will have the same ccnt */ +} + +message SegmentKey { + int32 dbid = 1; /* the dbid of this database */ + int32 segindex = 2; /* content indicator: -1 for entry database, + * 0, ..., n-1 for segment database * + * a primary and its mirror have the same segIndex */ +} + +message SystemStat { + /* CPU stat*/ + double runningTimeSeconds = 1; + double userTimeSeconds = 2; + double kernelTimeSeconds = 3; + + /* Memory stat */ + uint64 vsize = 4; + uint64 rss = 5; + uint64 VmSizeKb = 6; + uint64 VmPeakKb = 7; + + /* Storage stat */ + uint64 rchar = 8; + uint64 wchar = 9; + uint64 syscr = 10; + uint64 syscw = 11; + uint64 read_bytes = 12; + uint64 write_bytes = 13; + uint64 cancelled_write_bytes = 14; +} + +message NetworkStat { + uint64 total_bytes = 1; + uint64 tuple_bytes = 2; + uint64 chunks = 3; +} + +message InterconnectStat { + // Receive queue size sum when main thread is trying to get a packet + uint64 total_recv_queue_size = 1; + // Counting times when computing total_recv_queue_size + uint64 recv_queue_size_counting_time = 2; + + // The capacity sum when packets are tried to be sent + uint64 total_capacity = 3; + // Counting times used to compute total_capacity + uint64 capacity_counting_time = 4; + + // Total buffers available when sending packets + uint64 total_buffers = 5; + // Counting times when compute total_buffers + uint64 buffer_counting_time = 6; + + // The number of active connections + uint64 active_connections_num = 7; + + // The number of packet retransmits + int64 retransmits = 8; + + // The number of cached future packets + int64 startup_cached_pkt_num = 9; + + // The number of mismatched packets received + int64 mismatch_num = 10; + + // The number of crc errors + int64 crc_errors = 11; + + // The number of packets sent by sender + int64 snd_pkt_num = 12; + + // The number of packets received by receiver + int64 recv_pkt_num = 13; + + // Disordered packet number + int64 disordered_pkt_num = 14; + + // Duplicate packet number + int64 duplicated_pkt_num = 15; + + // The number of Acks received + int64 recv_ack_num = 16; + + // The number of status query messages sent + int64 status_query_msg_num = 17; +} + +message MetricInstrumentation { + uint64 ntuples = 1; /* Total tuples produced */ + uint64 nloops = 2; /* # of run cycles for this node */ + uint64 tuplecount = 3; /* Tuples emitted so far this cycle */ + double firsttuple = 4; /* Time for first tuple of this cycle */ + double startup = 5; /* Total startup time (in seconds) (optimiser's cost estimation) */ + double total = 6; /* Total total time (in seconds) */ + uint64 shared_blks_hit = 7; /* shared blocks stats*/ + uint64 shared_blks_read = 8; + uint64 shared_blks_dirtied = 9; + uint64 shared_blks_written = 10; + uint64 local_blks_hit = 11; /* data read from disks */ + uint64 local_blks_read = 12; + uint64 local_blks_dirtied = 13; + uint64 local_blks_written = 14; + uint64 temp_blks_read = 15; /* temporary tables read stat */ + uint64 temp_blks_written = 16; + double blk_read_time = 17; /* measured read/write time */ + double blk_write_time = 18; + NetworkStat sent = 19; + NetworkStat received = 20; + double startup_time = 21; /* real query startup time (planning + queue time) */ + uint64 inherited_calls = 22; /* the number of executed sub-queries */ + double inherited_time = 23; /* total time spend on inherited execution */ + InterconnectStat interconnect = 24; +} + +message SpillInfo { + int32 fileCount = 1; + int64 totalBytes = 2; +} diff --git a/gpcontrib/gp_stats_collector/protos/gpsc_plan.proto b/gpcontrib/gp_stats_collector/protos/gpsc_plan.proto new file mode 100644 index 00000000000..c1632478464 --- /dev/null +++ b/gpcontrib/gp_stats_collector/protos/gpsc_plan.proto @@ -0,0 +1,586 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +syntax = "proto3"; + +package gpsc; + +message MetricPlan { + GpdbNodeType type = 1; + + int32 plan_node_id = 2; + int32 parent_plan_node_id = 3; // Valid only for QueryInfoMetricQuerySubmit + + double startup_cost = 4; /* cost expended before fetching any tuples */ + double total_cost = 5; /* total cost (assuming all tuples fetched) */ + double plan_rows = 6; /* number of rows plan is expected to emit */ + int32 plan_width = 7; /* average row width in bytes */ + + int32 arg1 = 8; // for some nodes it's additional opperand type + int32 arg2 = 9; // for some nodes it's additional opperand type + + MetricMotionInfo motion_info = 10; + MetricRelationInfo relation_info = 11; + + string scan_index_name = 12; + ScanDirection scan_direction = 13; + MetricSliceInfo slice_info = 14; + string statement = 15; +} + +message MetricMotionInfo { + MotionType type = 1; + bool isBroadcast = 2; + CdbLocusType locusType = 3; + + int32 sliceId = 4; + int32 parentSliceId = 5; +} + +message MetricRelationInfo { + int32 oid = 1; + string name = 2; + string schema = 3; + string alias = 4; + int32 dynamicScanId = 5; +} + +message MetricSliceInfo { + int32 slice = 1; + int32 segments = 2; + GangType gangType = 3; + int32 gang = 4; +} + +enum ScanDirection +{ + SCAN_DIRECTION_UNSPECIFIED = 0; + SCAN_DIRECTION_BACKWARD = 1; + SCAN_DIRECTION_FORWARD = 2; +} + +/* GangType enumeration is used in several structures related to CDB + * slice plan support. + */ +enum GangType +{ + GANG_TYPE_UNSPECIFIED = 0; + GANG_TYPE_UNALLOCATED = 1; /* a root slice executed by the qDisp */ + GANG_TYPE_ENTRYDB_READER = 2; /* a 1-gang with read access to the entry db */ + GANG_TYPE_SINGLETON_READER = 3; /* a 1-gang to read the segment dbs */ + GANG_TYPE_PRIMARY_READER = 4; /* a 1-gang or N-gang to read the segment dbs */ + GANG_TYPE_PRIMARY_WRITER = 5; /* the N-gang that can update the segment dbs */ +} + + +enum CdbLocusType +{ + CDB_LOCUS_TYPE_UNSPECIFIED = 0; + CDB_LOCUS_TYPE_ENTRY = 1; /* a single backend process on the entry db: + * usually the qDisp itself, but could be a + * qExec started by the entry postmaster. + */ + + CDB_LOCUS_TYPE_SINGLE_QE = 2; /* a single backend process on any db: the + * qDisp itself, or a qExec started by a + * segment postmaster or the entry postmaster. + */ + + CDB_LOCUS_TYPE_GENERAL = 3; /* compatible with any locus (data is + * self-contained in the query plan or + * generally available in any qExec or qDisp) */ + + CDB_LOCUS_TYPE_SEGMENT_GENERAL = 4; /* generally available in any qExec, but not + * available in qDisp */ + + CDB_LOCUS_TYPE_REPLICATED = 5; /* replicated over all qExecs of an N-gang */ + CDB_LOCUS_TYPE_HASHED = 6; /* hash partitioned over all qExecs of N-gang */ + CDB_LOCUS_TYPE_HASHED_OJ = 7; /* result of hash partitioned outer join, NULLs can be anywhere */ + CDB_LOCUS_TYPE_STREWN = 8; /* partitioned on no known function */ + CDB_LOCUS_TYPE_END = 9; /* = last valid CdbLocusType + 1 */ +} + +enum MotionType +{ + MOTION_TYPE_UNSPECIFIED = 0; + MOTION_TYPE_HASH = 1; // Use hashing to select a segindex destination + MOTION_TYPE_FIXED = 2; // Send tuples to a fixed set of segindexes + MOTION_TYPE_EXPLICIT = 3; // Send tuples to the segment explicitly specified in their segid column +} + +enum GpdbNodeType { + GPDB_NODE_TYPE_UNSPECIFIED = 0; + INDEX_INFO = 1; + EXPR_CONTEXT = 2; + PROJECTION_INFO = 3; + JUNK_FILTER = 4; + RESULT_REL_INFO = 5; + E_STATE = 6; + TUPLE_TABLE_SLOT = 7; + CDB_PROCESS = 8; + SLICE = 9; + SLICE_TABLE = 10; + CURSOR_POS_INFO = 11; + SHARE_NODE_ENTRY = 12; + PARTITION_STATE = 13; + QUERY_DISPATCH_DESC = 14; + OID_ASSIGNMENT = 15; + PLAN = 16; + SCAN = 17; + JOIN = 18; + RESULT = 19; + MODIFY_TABLE = 20; + APPEND = 21; + MERGE_APPEND = 22; + RECURSIVE_UNION = 23; + SEQUENCE = 24; + BITMAP_AND = 25; + BITMAP_OR = 26; + SEQ_SCAN = 27; + DYNAMIC_SEQ_SCAN = 28; + EXTERNAL_SCAN = 29; + INDEX_SCAN = 30; + DYNAMIC_INDEX_SCAN = 31; + INDEX_ONLY_SCAN = 32; + BITMAP_INDEX_SCAN = 33; + DYNAMIC_BITMAP_INDEX_SCAN = 34; + BITMAP_HEAP_SCAN = 35; + DYNAMIC_BITMAP_HEAP_SCAN = 36; + TID_SCAN = 37; + SUBQUERY_SCAN = 38; + FUNCTION_SCAN = 39; + TABLE_FUNCTION_SCAN = 40; + VALUES_SCAN = 41; + CTE_SCAN = 42; + WORK_TABLE_SCAN = 43; + FOREIGN_SCAN = 44; + NEST_LOOP = 45; + MERGE_JOIN = 46; + HASH_JOIN = 47; + MATERIAL = 48; + SORT = 49; + AGG = 50; + WINDOW_AGG = 51; + UNIQUE = 52; + HASH = 53; + SET_OP = 54; + LOCK_ROWS = 55; + LIMIT = 56; + MOTION = 57; + SHARE_INPUT_SCAN = 58; + REPEAT = 59; + DML = 60; + SPLIT_UPDATE = 61; + ROW_TRIGGER = 62; + ASSERT_OP = 63; + PARTITION_SELECTOR = 64; + PLAN_END = 65; + NEST_LOOP_PARAM = 66; + PLAN_ROW_MARK = 67; + PLAN_INVAL_ITEM = 68; + PLAN_STATE = 69; + SCAN_STATE = 70; + JOIN_STATE = 71; + RESULT_STATE = 72; + MODIFY_TABLE_STATE = 73; + APPEND_STATE = 74; + MERGE_APPEND_STATE = 75; + RECURSIVE_UNION_STATE = 76; + SEQUENCE_STATE = 77; + BITMAP_AND_STATE = 78; + BITMAP_OR_STATE = 79; + SEQ_SCAN_STATE = 80; + DYNAMIC_SEQ_SCAN_STATE = 81; + EXTERNAL_SCAN_STATE = 82; + INDEX_SCAN_STATE = 83; + DYNAMIC_INDEX_SCAN_STATE = 84; + INDEX_ONLY_SCAN_STATE = 85; + BITMAP_INDEX_SCAN_STATE = 86; + DYNAMIC_BITMAP_INDEX_SCAN_STATE = 87; + BITMAP_HEAP_SCAN_STATE = 88; + DYNAMIC_BITMAP_HEAP_SCAN_STATE = 89; + TID_SCAN_STATE = 90; + SUBQUERY_SCAN_STATE = 91; + FUNCTION_SCAN_STATE = 92; + TABLE_FUNCTION_STATE = 93; + VALUES_SCAN_STATE = 94; + CTE_SCAN_STATE = 95; + WORK_TABLE_SCAN_STATE = 96; + FOREIGN_SCAN_STATE = 97; + NEST_LOOP_STATE = 98; + MERGE_JOIN_STATE = 99; + HASH_JOIN_STATE = 100; + MATERIAL_STATE = 101; + SORT_STATE = 102; + AGG_STATE = 103; + WINDOW_AGG_STATE = 104; + UNIQUE_STATE = 105; + HASH_STATE = 106; + SET_OP_STATE = 107; + LOCK_ROWS_STATE = 108; + LIMIT_STATE = 109; + MOTION_STATE = 110; + SHARE_INPUT_SCAN_STATE = 111; + REPEAT_STATE = 112; + DML_STATE = 113; + SPLIT_UPDATE_STATE = 114; + ROW_TRIGGER_STATE = 115; + ASSERT_OP_STATE = 116; + PARTITION_SELECTOR_STATE = 117; + TUPLE_DESC_NODE = 118; + SERIALIZED_PARAM_EXTERN_DATA = 119; + ALIAS = 120; + RANGE_VAR = 121; + EXPR = 122; + VAR = 123; + CONST = 124; + PARAM = 125; + AGGREF = 126; + WINDOW_FUNC = 127; + ARRAY_REF = 128; + FUNC_EXPR = 129; + NAMED_ARG_EXPR = 130; + OP_EXPR = 131; + DISTINCT_EXPR = 132; + NULL_IF_EXPR = 133; + SCALAR_ARRAY_OP_EXPR = 134; + BOOL_EXPR = 135; + SUB_LINK = 136; + SUB_PLAN = 137; + ALTERNATIVE_SUB_PLAN = 138; + FIELD_SELECT = 139; + FIELD_STORE = 140; + RELABEL_TYPE = 141; + COERCE_VIA_IO = 142; + ARRAY_COERCE_EXPR = 143; + CONVERT_ROWTYPE_EXPR = 144; + COLLATE_EXPR = 145; + CASE_EXPR = 146; + CASE_WHEN = 147; + CASE_TEST_EXPR = 148; + ARRAY_EXPR = 149; + ROW_EXPR = 150; + ROW_COMPARE_EXPR = 151; + COALESCE_EXPR = 152; + MIN_MAX_EXPR = 153; + XML_EXPR = 154; + NULL_TEST = 155; + BOOLEAN_TEST = 156; + COERCE_TO_DOMAIN = 157; + COERCE_TO_DOMAIN_VALUES = 158; + SET_TO_DEFAULT = 159; + CURRENT_OF_EXPR = 160; + TARGET_ENTRY = 161; + RANGE_TBL_REF = 162; + JOIN_EXPR = 163; + FROM_EXPR = 164; + INTO_CLAUSE = 165; + COPY_INTO_CLAUSE = 166; + REFRESH_CLAUSE = 167; + FLOW = 168; + GROUPING = 169; + GROUP_ID = 170; + DISTRIBUTED_BY = 171; + DML_ACTION_EXPR = 172; + PART_SELECTED_EXPR = 173; + PART_DEFAULT_EXPR = 174; + PART_BOUND_EXPR = 175; + PART_BOUND_INCLUSION_EXPR = 176; + PART_BOUND_OPEN_EXPR = 177; + PART_LIST_RULE_EXPR = 178; + PART_LIST_NULL_TEST_EXPR = 179; + TABLE_OID_INFO = 180; + EXPR_STATE = 181; + GENERIC_EXPR_STATE = 182; + WHOLE_ROW_VAR_EXPR_STATE = 183; + AGGREF_EXPR_STATE = 184; + WINDOW_FUNC_EXPR_STATE = 185; + ARRAY_REF_EXPR_STATE = 186; + FUNC_EXPR_STATE = 187; + SCALAR_ARRAY_OP_EXPR_STATE = 188; + BOOL_EXPR_STATE = 189; + SUB_PLAN_STATE = 190; + ALTERNATIVE_SUB_PLAN_STATE = 191; + FIELD_SELECT_STATE = 192; + FIELD_STORE_STATE = 193; + COERCE_VIA_IO_STATE = 194; + ARRAY_COERCE_EXPR_STATE = 195; + CONVERT_ROWTYPE_EXPR_STATE = 196; + CASE_EXPR_STATE = 197; + CASE_WHEN_STATE = 198; + ARRAY_EXPR_STATE = 199; + ROW_EXPR_STATE = 200; + ROW_COMPARE_EXPR_STATE = 201; + COALESCE_EXPR_STATE = 202; + MIN_MAX_EXPR_STATE = 203; + XML_EXPR_STATE = 204; + NULL_TEST_STATE = 205; + COERCE_TO_DOMAIN_STATE = 206; + DOMAIN_CONSTRAINT_STATE = 207; + GROUPING_FUNC_EXPR_STATE = 208; + PART_SELECTED_EXPR_STATE = 209; + PART_DEFAULT_EXPR_STATE = 210; + PART_BOUND_EXPR_STATE = 211; + PART_BOUND_INCLUSION_EXPR_STATE = 212; + PART_BOUND_OPEN_EXPR_STATE = 213; + PART_LIST_RULE_EXPR_STATE = 214; + PART_LIST_NULL_TEST_EXPR_STATE = 215; + PLANNER_INFO = 216; + PLANNER_GLOBAL = 217; + REL_OPT_INFO = 218; + INDEX_OPT_INFO = 219; + PARAM_PATH_INFO = 220; + PATH = 221; + APPEND_ONLY_PATH = 222; + AOCS_PATH = 223; + EXTERNAL_PATH = 224; + INDEX_PATH = 225; + BITMAP_HEAP_PATH = 226; + BITMAP_AND_PATH = 227; + BITMAP_OR_PATH = 228; + NEST_PATH = 229; + MERGE_PATH = 230; + HASH_PATH = 231; + TID_PATH = 232; + FOREIGN_PATH = 233; + APPEND_PATH = 234; + MERGE_APPEND_PATH = 235; + RESULT_PATH = 236; + MATERIAL_PATH = 237; + UNIQUE_PATH = 238; + PROJECTION_PATH = 239; + EQUIVALENCE_CLASS = 240; + EQUIVALENCE_MEMBER = 241; + PATH_KEY = 242; + RESTRICT_INFO = 243; + PLACE_HOLDER_VAR = 244; + SPECIAL_JOIN_INFO = 245; + LATERAL_JOIN_INFO = 246; + APPEND_REL_INFO = 247; + PLACE_HOLDER_INFO = 248; + MIN_MAX_AGG_INFO = 249; + PARTITION = 250; + PARTITION_RULE = 251; + PARTITION_NODE = 252; + PG_PART_RULE = 253; + SEGFILE_MAP_NODE = 254; + PLANNER_PARAM_ITEM = 255; + CDB_MOTION_PATH = 256; + PARTITION_SELECTOR_PATH = 257; + CDB_REL_COLUMN_INFO = 258; + DISTRIBUTION_KEY = 259; + MEMORY_CONTEXT = 260; + ALLOC_SET_CONTEXT = 261; + MEMORY_ACCOUNT = 262; + VALUE = 263; + INTEGER = 264; + FLOAT = 265; + STRING = 266; + BIT_STRING = 267; + NULL_VALUE = 268; + LIST = 269; + INT_LIST = 270; + OID_LIST = 271; + QUERY = 272; + PLANNED_STMT = 273; + INSERT_STMT = 274; + DELETE_STMT = 275; + UPDATE_STMT = 276; + SELECT_STMT = 277; + ALTER_TABLE_STMT = 278; + ALTER_TABLE_CMD = 279; + ALTER_DOMAIN_STMT = 280; + SET_OPERATION_STMT = 281; + GRANT_STMT = 282; + GRANT_ROLE_STMT = 283; + ALTER_DEFAULT_PRIVILEGES_STMT = 284; + CLOSE_PORTAL_STMT = 285; + CLUSTER_STMT = 286; + COPY_STMT = 287; + CREATE_STMT = 288; + SINGLE_ROW_ERROR_DESC = 289; + EXT_TABLE_TYPE_DESC = 290; + CREATE_EXTERNAL_STMT = 291; + DEFINE_STMT = 292; + DROP_STMT = 293; + TRUNCATE_STMT = 294; + COMMENT_STMT = 295; + FETCH_STMT = 296; + INDEX_STMT = 297; + CREATE_FUNCTION_STMT = 298; + ALTER_FUNCTION_STMT = 299; + DO_STMT = 300; + RENAME_STMT = 301; + RULE_STMT = 302; + NOTIFY_STMT = 303; + LISTEN_STMT = 304; + UNLISTEN_STMT = 305; + TRANSACTION_STMT = 306; + VIEW_STMT = 307; + LOAD_STMT = 308; + CREATE_DOMAIN_STMT = 309; + CREATEDB_STMT = 310; + DROPDB_STMT = 311; + VACUUM_STMT = 312; + EXPLAIN_STMT = 313; + CREATE_TABLE_AS_STMT = 314; + CREATE_SEQ_STMT = 315; + ALTER_SEQ_STMT = 316; + VARIABLE_SET_STMT = 317; + VARIABLE_SHOW_STMT = 318; + DISCARD_STMT = 319; + CREATE_TRIG_STMT = 320; + CREATE_P_LANG_STMT = 321; + CREATE_ROLE_STMT = 322; + ALTER_ROLE_STMT = 323; + DROP_ROLE_STMT = 324; + CREATE_QUEUE_STMT = 325; + ALTER_QUEUE_STMT = 326; + DROP_QUEUE_STMT = 327; + CREATE_RESOURCE_GROUP_STMT = 328; + DROP_RESOURCE_GROUP_STMT = 329; + ALTER_RESOURCE_GROUP_STMT = 330; + LOCK_STMT = 331; + CONSTRAINTS_SET_STMT = 332; + REINDEX_STMT = 333; + CHECK_POINT_STMT = 334; + CREATE_SCHEMA_STMT = 335; + ALTER_DATABASE_STMT = 336; + ALTER_DATABASE_SET_STMT = 337; + ALTER_ROLE_SET_STMT = 338; + CREATE_CONVERSION_STMT = 339; + CREATE_CAST_STMT = 340; + CREATE_OP_CLASS_STMT = 341; + CREATE_OP_FAMILY_STMT = 342; + ALTER_OP_FAMILY_STMT = 343; + PREPARE_STMT = 344; + EXECUTE_STMT = 345; + DEALLOCATE_STMT = 346; + DECLARE_CURSOR_STMT = 347; + CREATE_TABLE_SPACE_STMT = 348; + DROP_TABLE_SPACE_STMT = 349; + ALTER_OBJECT_SCHEMA_STMT = 350; + ALTER_OWNER_STMT = 351; + DROP_OWNED_STMT = 352; + REASSIGN_OWNED_STMT = 353; + COMPOSITE_TYPE_STMT = 354; + CREATE_ENUM_STMT = 355; + CREATE_RANGE_STMT = 356; + ALTER_ENUM_STMT = 357; + ALTER_TS_DICTIONARY_STMT = 358; + ALTER_TS_CONFIGURATION_STMT = 359; + CREATE_FDW_STMT = 360; + ALTER_FDW_STMT = 361; + CREATE_FOREIGN_SERVER_STMT = 362; + ALTER_FOREIGN_SERVER_STMT = 363; + CREATE_USER_MAPPING_STMT = 364; + ALTER_USER_MAPPING_STMT = 365; + DROP_USER_MAPPING_STMT = 366; + ALTER_TABLE_SPACE_OPTIONS_STMT = 367; + ALTER_TABLE_MOVE_ALL_STMT = 368; + SEC_LABEL_STMT = 369; + CREATE_FOREIGN_TABLE_STMT = 370; + CREATE_EXTENSION_STMT = 371; + ALTER_EXTENSION_STMT = 372; + ALTER_EXTENSION_CONTENTS_STMT = 373; + CREATE_EVENT_TRIG_STMT = 374; + ALTER_EVENT_TRIG_STMT = 375; + REFRESH_MAT_VIEW_STMT = 376; + REPLICA_IDENTITY_STMT = 377; + ALTER_SYSTEM_STMT = 378; + PARTITION_BY = 379; + PARTITION_ELEM = 380; + PARTITION_RANGE_ITEM = 381; + PARTITION_BOUND_SPEC = 382; + PARTITION_SPEC = 383; + PARTITION_VALUES_SPEC = 384; + ALTER_PARTITION_ID = 385; + ALTER_PARTITION_CMD = 386; + INHERIT_PARTITION_CMD = 387; + CREATE_FILE_SPACE_STMT = 388; + FILE_SPACE_ENTRY = 389; + DROP_FILE_SPACE_STMT = 390; + TABLE_VALUE_EXPR = 391; + DENY_LOGIN_INTERVAL = 392; + DENY_LOGIN_POINT = 393; + ALTER_TYPE_STMT = 394; + SET_DISTRIBUTION_CMD = 395; + EXPAND_STMT_SPEC = 396; + A_EXPR = 397; + COLUMN_REF = 398; + PARAM_REF = 399; + A_CONST = 400; + FUNC_CALL = 401; + A_STAR = 402; + A_INDICES = 403; + A_INDIRECTION = 404; + A_ARRAY_EXPR = 405; + RES_TARGET = 406; + TYPE_CAST = 407; + COLLATE_CLAUSE = 408; + SORT_BY = 409; + WINDOW_DEF = 410; + RANGE_SUBSELECT = 411; + RANGE_FUNCTION = 412; + TYPE_NAME = 413; + COLUMN_DEF = 414; + INDEX_ELEM = 415; + CONSTRAINT = 416; + DEF_ELEM = 417; + RANGE_TBL_ENTRY = 418; + RANGE_TBL_FUNCTION = 419; + WITH_CHECK_OPTION = 420; + GROUPING_CLAUSE = 421; + GROUPING_FUNC = 422; + SORT_GROUP_CLAUSE = 423; + WINDOW_CLAUSE = 424; + PRIV_GRANTEE = 425; + FUNC_WITH_ARGS = 426; + ACCESS_PRIV = 427; + CREATE_OP_CLASS_ITEM = 428; + TABLE_LIKE_CLAUSE = 429; + FUNCTION_PARAMETER = 430; + LOCKING_CLAUSE = 431; + ROW_MARK_CLAUSE = 432; + XML_SERIALIZE = 433; + WITH_CLAUSE = 434; + COMMON_TABLE_EXPR = 435; + COLUMN_REFERENCE_STORAGE_DIRECTIVE = 436; + IDENTIFY_SYSTEM_CMD = 437; + BASE_BACKUP_CMD = 438; + CREATE_REPLICATION_SLOT_CMD = 439; + DROP_REPLICATION_SLOT_CMD = 440; + START_REPLICATION_CMD = 441; + TIME_LINE_HISTORY_CMD = 442; + TRIGGER_DATA = 443; + EVENT_TRIGGER_DATA = 444; + RETURN_SET_INFO = 445; + WINDOW_OBJECT_DATA = 446; + TID_BITMAP = 447; + INLINE_CODE_BLOCK = 448; + FDW_ROUTINE = 449; + STREAM_BITMAP = 450; + FORMATTER_DATA = 451; + EXT_PROTOCOL_DATA = 452; + EXT_PROTOCOL_VALIDATOR_DATA = 453; + SELECTED_PARTS = 454; + COOKED_CONSTRAINT = 455; + CDB_EXPLAIN_STAT_HDR = 456; + GP_POLICY = 457; + RETRIEVE_STMT = 458; +} diff --git a/gpcontrib/gp_stats_collector/protos/gpsc_set_service.proto b/gpcontrib/gp_stats_collector/protos/gpsc_set_service.proto new file mode 100644 index 00000000000..bcf09074ed7 --- /dev/null +++ b/gpcontrib/gp_stats_collector/protos/gpsc_set_service.proto @@ -0,0 +1,69 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +syntax = "proto3"; + +import "google/protobuf/timestamp.proto"; + +import "protos/gpsc_metrics.proto"; +import "protos/gpsc_plan.proto"; + +package gpsc; + +service SetQueryInfo { + rpc SetMetricPlanNode (SetPlanNodeReq) returns (MetricResponse) {} + + rpc SetMetricQuery (SetQueryReq) returns (MetricResponse) {} +} + +message MetricResponse { + MetricResponseStatusCode error_code = 1; + string error_text = 2; +} + +enum MetricResponseStatusCode { + METRIC_RESPONSE_STATUS_CODE_UNSPECIFIED = 0; + METRIC_RESPONSE_STATUS_CODE_SUCCESS = 1; + METRIC_RESPONSE_STATUS_CODE_ERROR = 2; +} + +message SetQueryReq { + QueryStatus query_status = 1; + google.protobuf.Timestamp datetime = 2; + QueryKey query_key = 3; + SegmentKey segment_key = 4; + QueryInfo query_info = 5; + GPMetrics query_metrics = 6; + repeated MetricPlan plan_tree = 7; + google.protobuf.Timestamp submit_time = 8; + google.protobuf.Timestamp start_time = 9; + google.protobuf.Timestamp end_time = 10; + AdditionalQueryInfo add_info = 11; +} + +message SetPlanNodeReq { + PlanNodeStatus node_status = 1; + google.protobuf.Timestamp datetime = 2; + QueryKey query_key = 3; + SegmentKey segment_key = 4; + GPMetrics node_metrics = 5; + MetricPlan plan_node = 6; + google.protobuf.Timestamp submit_time = 7; + google.protobuf.Timestamp start_time = 8; + google.protobuf.Timestamp end_time = 9; +} diff --git a/gpcontrib/gp_stats_collector/sql/gpsc_cursors.sql b/gpcontrib/gp_stats_collector/sql/gpsc_cursors.sql new file mode 100644 index 00000000000..8361f7b678d --- /dev/null +++ b/gpcontrib/gp_stats_collector/sql/gpsc_cursors.sql @@ -0,0 +1,85 @@ +CREATE EXTENSION gp_stats_collector; + +CREATE FUNCTION gpsc_status_order(status text) +RETURNS integer +AS $$ +BEGIN + RETURN CASE status + WHEN 'QUERY_STATUS_SUBMIT' THEN 1 + WHEN 'QUERY_STATUS_START' THEN 2 + WHEN 'QUERY_STATUS_END' THEN 3 + WHEN 'QUERY_STATUS_DONE' THEN 4 + ELSE 999 + END; +END; +$$ LANGUAGE plpgsql IMMUTABLE; + +SET gpsc.ignored_users_list TO ''; +SET gpsc.enable TO TRUE; +SET gpsc.enable_utility TO TRUE; +SET gpsc.report_nested_queries TO TRUE; + +-- DECLARE +SET gpsc.logging_mode to 'TBL'; + +BEGIN; +DECLARE cursor_stats_0 CURSOR FOR SELECT 0; +CLOSE cursor_stats_0; +COMMIT; + +RESET gpsc.logging_mode; +SELECT segid, query_text, query_status FROM gpsc.log WHERE segid = -1 AND utility = true ORDER BY segid, ccnt, gpsc_status_order(query_status) ASC; +SELECT gpsc.truncate_log() IS NOT NULL AS t; + +-- DECLARE WITH HOLD +SET gpsc.logging_mode to 'TBL'; + +BEGIN; +DECLARE cursor_stats_1 CURSOR WITH HOLD FOR SELECT 1; +CLOSE cursor_stats_1; +DECLARE cursor_stats_2 CURSOR WITH HOLD FOR SELECT 2; +CLOSE cursor_stats_2; +COMMIT; + +RESET gpsc.logging_mode; + +SELECT segid, query_text, query_status FROM gpsc.log WHERE segid = -1 AND utility = true ORDER BY segid, ccnt, gpsc_status_order(query_status) ASC; +SELECT gpsc.truncate_log() IS NOT NULL AS t; + +-- ROLLBACK +SET gpsc.logging_mode to 'TBL'; + +BEGIN; +DECLARE cursor_stats_3 CURSOR FOR SELECT 1; +CLOSE cursor_stats_3; +DECLARE cursor_stats_4 CURSOR FOR SELECT 1; +ROLLBACK; + +RESET gpsc.logging_mode; + +SELECT segid, query_text, query_status FROM gpsc.log WHERE segid = -1 AND utility = true ORDER BY segid, ccnt, gpsc_status_order(query_status) ASC; +SELECT gpsc.truncate_log() IS NOT NULL AS t; + +-- FETCH +SET gpsc.logging_mode to 'TBL'; + +BEGIN; +DECLARE cursor_stats_5 CURSOR WITH HOLD FOR SELECT 2; +DECLARE cursor_stats_6 CURSOR WITH HOLD FOR SELECT 3; +FETCH 1 IN cursor_stats_5; +FETCH 1 IN cursor_stats_6; +CLOSE cursor_stats_5; +CLOSE cursor_stats_6; +COMMIT; + +RESET gpsc.logging_mode; + +SELECT segid, query_text, query_status FROM gpsc.log WHERE segid = -1 AND utility = true ORDER BY segid, ccnt, gpsc_status_order(query_status) ASC; +SELECT gpsc.truncate_log() IS NOT NULL AS t; + +DROP FUNCTION gpsc_status_order(text); +DROP EXTENSION gp_stats_collector; +RESET gpsc.enable; +RESET gpsc.report_nested_queries; +RESET gpsc.enable_utility; +RESET gpsc.ignored_users_list; diff --git a/gpcontrib/gp_stats_collector/sql/gpsc_dist.sql b/gpcontrib/gp_stats_collector/sql/gpsc_dist.sql new file mode 100644 index 00000000000..46b531a70ca --- /dev/null +++ b/gpcontrib/gp_stats_collector/sql/gpsc_dist.sql @@ -0,0 +1,88 @@ +CREATE EXTENSION gp_stats_collector; + +CREATE OR REPLACE FUNCTION gpsc_status_order(status text) +RETURNS integer +AS $$ +BEGIN + RETURN CASE status + WHEN 'QUERY_STATUS_SUBMIT' THEN 1 + WHEN 'QUERY_STATUS_START' THEN 2 + WHEN 'QUERY_STATUS_END' THEN 3 + WHEN 'QUERY_STATUS_DONE' THEN 4 + ELSE 999 + END; +END; +$$ LANGUAGE plpgsql IMMUTABLE; + +SET gpsc.ignored_users_list TO ''; +SET gpsc.enable TO TRUE; +SET gpsc.report_nested_queries TO TRUE; +SET gpsc.enable_utility TO FALSE; + +-- Hash distributed table + +CREATE TABLE test_hash_dist (id int) DISTRIBUTED BY (id); +INSERT INTO test_hash_dist SELECT 1; + +SET gpsc.logging_mode to 'TBL'; +SET optimizer_enable_direct_dispatch TO TRUE; +-- Direct dispatch is used here, only one segment is scanned. +select * from test_hash_dist where id = 1; +RESET optimizer_enable_direct_dispatch; + +RESET gpsc.logging_mode; +-- Should see 8 rows. +SELECT segid, query_text, query_status FROM gpsc.log ORDER BY segid, ccnt, gpsc_status_order(query_status) ASC; +SELECT gpsc.truncate_log() IS NOT NULL AS t; + +SET gpsc.logging_mode to 'TBL'; + +-- Scan all segments. +select * from test_hash_dist; + +DROP TABLE test_hash_dist; +RESET gpsc.logging_mode; +SELECT segid, query_text, query_status FROM gpsc.log ORDER BY segid, ccnt, gpsc_status_order(query_status) ASC; +SELECT gpsc.truncate_log() IS NOT NULL AS t; + +-- Replicated table +CREATE FUNCTION force_segments() RETURNS SETOF text AS $$ +BEGIN + RETURN NEXT 'seg'; +END; +$$ LANGUAGE plpgsql VOLATILE EXECUTE ON ALL SEGMENTS; + +CREATE TABLE test_replicated (id int) DISTRIBUTED REPLICATED; +INSERT INTO test_replicated SELECT 1; + +SET gpsc.logging_mode to 'TBL'; +SELECT COUNT(*) FROM test_replicated, force_segments(); +DROP TABLE test_replicated; +DROP FUNCTION force_segments(); + +RESET gpsc.logging_mode; +SELECT segid, query_text, query_status FROM gpsc.log ORDER BY segid, ccnt, gpsc_status_order(query_status) ASC; +SELECT gpsc.truncate_log() IS NOT NULL AS t; + +-- Partially distributed table (2 numsegments) +SET allow_system_table_mods = ON; +CREATE TABLE test_partial_dist (id int, data text) DISTRIBUTED BY (id); +UPDATE gp_distribution_policy SET numsegments = 2 WHERE localoid = 'test_partial_dist'::regclass; +INSERT INTO test_partial_dist SELECT * FROM generate_series(1, 100); + +SET gpsc.logging_mode to 'TBL'; +SELECT COUNT(*) FROM test_partial_dist; +RESET gpsc.logging_mode; + +DROP TABLE test_partial_dist; +RESET allow_system_table_mods; +-- Should see 12 rows. +SELECT query_text, query_status FROM gpsc.log ORDER BY segid, ccnt, gpsc_status_order(query_status) ASC; +SELECT gpsc.truncate_log() IS NOT NULL AS t; + +DROP FUNCTION gpsc_status_order(text); +DROP EXTENSION gp_stats_collector; +RESET gpsc.enable; +RESET gpsc.report_nested_queries; +RESET gpsc.enable_utility; +RESET gpsc.ignored_users_list; diff --git a/gpcontrib/gp_stats_collector/sql/gpsc_guc_cache.sql b/gpcontrib/gp_stats_collector/sql/gpsc_guc_cache.sql new file mode 100644 index 00000000000..6aff2ad5cf6 --- /dev/null +++ b/gpcontrib/gp_stats_collector/sql/gpsc_guc_cache.sql @@ -0,0 +1,43 @@ +-- +-- Test GUC caching for query lifecycle consistency. +-- +-- The extension logs SUBMIT and DONE events for each query. +-- GUC values that control logging (enable_utility, ignored_users_list, ...) +-- must be cached at SUBMIT time to ensure DONE uses the same filtering +-- criteria. Otherwise, a SET command that modifies these GUCs would +-- have its DONE event rejected, creating orphaned SUBMIT entries. +-- This is due to query being actually executed between SUBMIT and DONE. +-- start_ignore +CREATE EXTENSION IF NOT EXISTS gp_stats_collector; +SELECT gpsc.truncate_log(); +-- end_ignore + +CREATE OR REPLACE FUNCTION print_last_query(query text) +RETURNS TABLE(query_status text) AS $$ + SELECT query_status + FROM gpsc.log + WHERE segid = -1 AND query_text = query + ORDER BY ccnt DESC +$$ LANGUAGE sql; + +SET gpsc.ignored_users_list TO ''; +SET gpsc.enable TO TRUE; +SET gpsc.enable_utility TO TRUE; +SET gpsc.logging_mode TO 'TBL'; + +-- SET below disables utility logging and DONE must still be logged. +SET gpsc.enable_utility TO FALSE; +SELECT * FROM print_last_query('SET gpsc.enable_utility TO FALSE;'); + +-- SELECT below adds current user to ignore list and DONE must still be logged. +-- start_ignore +SELECT set_config('gpsc.ignored_users_list', current_user, false); +-- end_ignore +SELECT * FROM print_last_query('SELECT set_config(''gpsc.ignored_users_list'', current_user, false);'); + +DROP FUNCTION print_last_query(text); +DROP EXTENSION gp_stats_collector; +RESET gpsc.enable; +RESET gpsc.enable_utility; +RESET gpsc.ignored_users_list; +RESET gpsc.logging_mode; diff --git a/gpcontrib/gp_stats_collector/sql/gpsc_locale.sql b/gpcontrib/gp_stats_collector/sql/gpsc_locale.sql new file mode 100644 index 00000000000..6321c93f5ab --- /dev/null +++ b/gpcontrib/gp_stats_collector/sql/gpsc_locale.sql @@ -0,0 +1,29 @@ +-- The extension generates normalized query text and plan using jumbling functions. +-- Those functions may fail when translating to wide character if the current locale +-- cannot handle the character set. This test checks that even when those functions +-- fail, the plan is still generated and executed. This test is partially taken from +-- gp_locale. + +-- start_ignore +DROP DATABASE IF EXISTS gpsc_test_locale; +-- end_ignore + +CREATE DATABASE gpsc_test_locale WITH LC_COLLATE='C' LC_CTYPE='C' TEMPLATE=template0; +\c gpsc_test_locale + +CREATE EXTENSION gp_stats_collector; + +SET gpsc.ignored_users_list TO ''; +SET gpsc.enable_utility TO TRUE; +SET gpsc.enable TO TRUE; + +CREATE TABLE gpsc_hi_안녕세계 (a int, 안녕세계1 text, 안녕세계2 text, 안녕세계3 text) DISTRIBUTED BY (a); +INSERT INTO gpsc_hi_안녕세계 VALUES(1, '안녕세계1 first', '안녕세2 first', '안녕세계3 first'); +-- Should not see error here +UPDATE gpsc_hi_안녕세계 SET 안녕세계1='안녕세계1 first UPDATE' WHERE 안녕세계1='안녕세계1 first'; + +RESET gpsc.enable; +RESET gpsc.enable_utility; +RESET gpsc.ignored_users_list; +DROP TABLE gpsc_hi_안녕세계; +DROP EXTENSION gp_stats_collector; diff --git a/gpcontrib/gp_stats_collector/sql/gpsc_select.sql b/gpcontrib/gp_stats_collector/sql/gpsc_select.sql new file mode 100644 index 00000000000..673cbee0c10 --- /dev/null +++ b/gpcontrib/gp_stats_collector/sql/gpsc_select.sql @@ -0,0 +1,69 @@ +CREATE EXTENSION gp_stats_collector; + +CREATE OR REPLACE FUNCTION gpsc_status_order(status text) +RETURNS integer +AS $$ +BEGIN + RETURN CASE status + WHEN 'QUERY_STATUS_SUBMIT' THEN 1 + WHEN 'QUERY_STATUS_START' THEN 2 + WHEN 'QUERY_STATUS_END' THEN 3 + WHEN 'QUERY_STATUS_DONE' THEN 4 + ELSE 999 + END; +END; +$$ LANGUAGE plpgsql IMMUTABLE; + +SET gpsc.ignored_users_list TO ''; +SET gpsc.enable TO TRUE; +SET gpsc.report_nested_queries TO TRUE; +SET gpsc.enable_utility TO FALSE; + +-- Basic SELECT tests +SET gpsc.logging_mode to 'TBL'; + +SELECT 1; +SELECT COUNT(*) FROM generate_series(1,10); + +RESET gpsc.logging_mode; +SELECT segid, query_text, query_status FROM gpsc.log ORDER BY segid, ccnt, gpsc_status_order(query_status) ASC; +SELECT gpsc.truncate_log() IS NOT NULL AS t; + +-- Transaction test +SET gpsc.logging_mode to 'TBL'; + +BEGIN; +SELECT 1; +COMMIT; + +RESET gpsc.logging_mode; +SELECT segid, query_text, query_status FROM gpsc.log ORDER BY segid, ccnt, gpsc_status_order(query_status) ASC; +SELECT gpsc.truncate_log() IS NOT NULL AS t; + +-- CTE test +SET gpsc.logging_mode to 'TBL'; + +WITH t AS (VALUES (1), (2)) +SELECT * FROM t; + +RESET gpsc.logging_mode; +SELECT segid, query_text, query_status FROM gpsc.log ORDER BY segid, ccnt, gpsc_status_order(query_status) ASC; +SELECT gpsc.truncate_log() IS NOT NULL AS t; + +-- Prepared statement test +SET gpsc.logging_mode to 'TBL'; + +PREPARE test_stmt AS SELECT 1; +EXECUTE test_stmt; +DEALLOCATE test_stmt; + +RESET gpsc.logging_mode; +SELECT segid, query_text, query_status FROM gpsc.log ORDER BY segid, ccnt, gpsc_status_order(query_status) ASC; +SELECT gpsc.truncate_log() IS NOT NULL AS t; + +DROP FUNCTION gpsc_status_order(text); +DROP EXTENSION gp_stats_collector; +RESET gpsc.enable; +RESET gpsc.report_nested_queries; +RESET gpsc.enable_utility; +RESET gpsc.ignored_users_list; diff --git a/gpcontrib/gp_stats_collector/sql/gpsc_uds.sql b/gpcontrib/gp_stats_collector/sql/gpsc_uds.sql new file mode 100644 index 00000000000..14377b15c8c --- /dev/null +++ b/gpcontrib/gp_stats_collector/sql/gpsc_uds.sql @@ -0,0 +1,31 @@ +-- Test UDS socket +-- start_ignore +CREATE EXTENSION IF NOT EXISTS gp_stats_collector; +-- end_ignore + +\set UDS_PATH '/tmp/gpsc_test.sock' + +-- Configure extension to send via UDS +SET gpsc.uds_path TO :'UDS_PATH'; +SET gpsc.ignored_users_list TO ''; +SET gpsc.enable TO TRUE; +SET gpsc.logging_mode TO 'UDS'; + +-- Start receiver +SELECT gpsc.__test_uds_start_server(:'UDS_PATH'); + +-- Send +SELECT 1; + +-- Receive +SELECT gpsc.__test_uds_receive() > 0 as received; + +-- Stop receiver +SELECT gpsc.__test_uds_stop_server(); + +-- Cleanup +DROP EXTENSION gp_stats_collector; +RESET gpsc.uds_path; +RESET gpsc.ignored_users_list; +RESET gpsc.enable; +RESET gpsc.logging_mode; diff --git a/gpcontrib/gp_stats_collector/sql/gpsc_utf8_trim.sql b/gpcontrib/gp_stats_collector/sql/gpsc_utf8_trim.sql new file mode 100644 index 00000000000..a3f8a376d55 --- /dev/null +++ b/gpcontrib/gp_stats_collector/sql/gpsc_utf8_trim.sql @@ -0,0 +1,45 @@ +CREATE EXTENSION IF NOT EXISTS gp_stats_collector; + +CREATE OR REPLACE FUNCTION get_marked_query(marker TEXT) +RETURNS TEXT AS $$ + SELECT query_text + FROM gpsc.log + WHERE query_text LIKE '%' || marker || '%' + ORDER BY datetime DESC + LIMIT 1 +$$ LANGUAGE sql VOLATILE; + +SET gpsc.ignored_users_list TO ''; +SET gpsc.enable TO TRUE; + +-- Test 1: 1 byte chars +SET gpsc.max_text_size to 19; +SET gpsc.logging_mode to 'TBL'; +SELECT /*test1*/ 'HelloWorld'; +RESET gpsc.logging_mode; +SELECT octet_length(get_marked_query('test1')) = 19 AS correct_length; + +-- Test 2: 2 byte chars +SET gpsc.max_text_size to 19; +SET gpsc.logging_mode to 'TBL'; +SELECT /*test2*/ 'РУССКИЙЯЗЫК'; +RESET gpsc.logging_mode; +-- Character 'Р' has two bytes and cut in the middle => not included. +SELECT octet_length(get_marked_query('test2')) = 18 AS correct_length; + +-- Test 3: 4 byte chars +SET gpsc.max_text_size to 21; +SET gpsc.logging_mode to 'TBL'; +SELECT /*test3*/ '😀'; +RESET gpsc.logging_mode; +-- Emoji has 4 bytes and cut before the last byte => not included. +SELECT octet_length(get_marked_query('test3')) = 18 AS correct_length; + +-- Cleanup +DROP FUNCTION get_marked_query(TEXT); +RESET gpsc.max_text_size; +RESET gpsc.logging_mode; +RESET gpsc.enable; +RESET gpsc.ignored_users_list; + +DROP EXTENSION gp_stats_collector; diff --git a/gpcontrib/gp_stats_collector/sql/gpsc_utility.sql b/gpcontrib/gp_stats_collector/sql/gpsc_utility.sql new file mode 100644 index 00000000000..9abb965db37 --- /dev/null +++ b/gpcontrib/gp_stats_collector/sql/gpsc_utility.sql @@ -0,0 +1,135 @@ +CREATE EXTENSION gp_stats_collector; + +CREATE OR REPLACE FUNCTION gpsc_status_order(status text) +RETURNS integer +AS $$ +BEGIN + RETURN CASE status + WHEN 'QUERY_STATUS_SUBMIT' THEN 1 + WHEN 'QUERY_STATUS_START' THEN 2 + WHEN 'QUERY_STATUS_END' THEN 3 + WHEN 'QUERY_STATUS_DONE' THEN 4 + ELSE 999 + END; +END; +$$ LANGUAGE plpgsql IMMUTABLE; + +SET gpsc.ignored_users_list TO ''; +SET gpsc.enable TO TRUE; +SET gpsc.enable_utility TO TRUE; +SET gpsc.report_nested_queries TO TRUE; + +SET gpsc.logging_mode to 'TBL'; + +CREATE TABLE test_table (a int, b text); +CREATE INDEX test_idx ON test_table(a); +ALTER TABLE test_table ADD COLUMN c int DEFAULT 1; +DROP TABLE test_table; + +RESET gpsc.logging_mode; + +SELECT segid, query_text, query_status FROM gpsc.log WHERE segid = -1 AND utility = true ORDER BY segid, ccnt, gpsc_status_order(query_status) ASC; +SELECT gpsc.truncate_log() IS NOT NULL AS t; + +-- Partitioning +SET gpsc.logging_mode to 'TBL'; + +CREATE TABLE pt_test (a int, b int) +DISTRIBUTED BY (a) +PARTITION BY RANGE (a) +(START (0) END (100) EVERY (50)); +DROP TABLE pt_test; + +RESET gpsc.logging_mode; + +SELECT segid, query_text, query_status FROM gpsc.log WHERE segid = -1 AND utility = true ORDER BY segid, ccnt, gpsc_status_order(query_status) ASC; +SELECT gpsc.truncate_log() IS NOT NULL AS t; + +-- Views and Functions +SET gpsc.logging_mode to 'TBL'; + +CREATE VIEW test_view AS SELECT 1 AS a; +CREATE FUNCTION test_func(i int) RETURNS int AS $$ SELECT $1 + 1; $$ LANGUAGE SQL; +DROP VIEW test_view; +DROP FUNCTION test_func(int); + +RESET gpsc.logging_mode; + +SELECT segid, query_text, query_status FROM gpsc.log WHERE segid = -1 AND utility = true ORDER BY segid, ccnt, gpsc_status_order(query_status) ASC; +SELECT gpsc.truncate_log() IS NOT NULL AS t; + +-- Transaction Operations +SET gpsc.logging_mode to 'TBL'; + +BEGIN; +SAVEPOINT sp1; +ROLLBACK TO sp1; +COMMIT; + +BEGIN; +SAVEPOINT sp2; +ABORT; + +BEGIN; +ROLLBACK; + +RESET gpsc.logging_mode; + +SELECT segid, query_text, query_status FROM gpsc.log WHERE segid = -1 AND utility = true ORDER BY segid, ccnt, gpsc_status_order(query_status) ASC; +SELECT gpsc.truncate_log() IS NOT NULL AS t; + +-- DML Operations +SET gpsc.logging_mode to 'TBL'; + +CREATE TABLE dml_test (a int, b text); +INSERT INTO dml_test VALUES (1, 'test'); +UPDATE dml_test SET b = 'updated' WHERE a = 1; +DELETE FROM dml_test WHERE a = 1; +DROP TABLE dml_test; + +RESET gpsc.logging_mode; + +SELECT segid, query_text, query_status FROM gpsc.log WHERE segid = -1 AND utility = true ORDER BY segid, ccnt, gpsc_status_order(query_status) ASC; +SELECT gpsc.truncate_log() IS NOT NULL AS t; + +-- COPY Operations +SET gpsc.logging_mode to 'TBL'; + +CREATE TABLE copy_test (a int); +COPY (SELECT 1) TO STDOUT; +DROP TABLE copy_test; + +RESET gpsc.logging_mode; + +SELECT segid, query_text, query_status FROM gpsc.log WHERE segid = -1 AND utility = true ORDER BY segid, ccnt, gpsc_status_order(query_status) ASC; +SELECT gpsc.truncate_log() IS NOT NULL AS t; + +-- Prepared Statements and error during execute +SET gpsc.logging_mode to 'TBL'; + +PREPARE test_prep(int) AS SELECT $1/0 AS value; +EXECUTE test_prep(0::int); +DEALLOCATE test_prep; + +RESET gpsc.logging_mode; + +SELECT segid, query_text, query_status FROM gpsc.log WHERE segid = -1 AND utility = true ORDER BY segid, ccnt, gpsc_status_order(query_status) ASC; +SELECT gpsc.truncate_log() IS NOT NULL AS t; + +-- GUC Settings +SET gpsc.logging_mode to 'TBL'; + +SET gpsc.report_nested_queries TO FALSE; +RESET gpsc.report_nested_queries; + +RESET gpsc.logging_mode; + +SELECT segid, query_text, query_status FROM gpsc.log WHERE segid = -1 AND utility = true ORDER BY segid, ccnt, gpsc_status_order(query_status) ASC; +SELECT gpsc.truncate_log() IS NOT NULL AS t; + +DROP FUNCTION gpsc_status_order(text); +DROP EXTENSION gp_stats_collector; +RESET gpsc.enable; +RESET gpsc.report_nested_queries; +RESET gpsc.enable_utility; +RESET gpsc.ignored_users_list; diff --git a/gpcontrib/gp_stats_collector/src/Config.cpp b/gpcontrib/gp_stats_collector/src/Config.cpp new file mode 100644 index 00000000000..08a8d8cff86 --- /dev/null +++ b/gpcontrib/gp_stats_collector/src/Config.cpp @@ -0,0 +1,191 @@ +/*------------------------------------------------------------------------- + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + * Config.cpp + * + * IDENTIFICATION + * gpcontrib/gp_stats_collector/src/Config.cpp + * + *------------------------------------------------------------------------- + */ + +#include "Config.h" +#include +#include +#include +#include +#include "memory/gpdbwrappers.h" + +extern "C" { +#include "postgres.h" +#include "utils/guc.h" +} + +static char *guc_uds_path = nullptr; +static bool guc_enable_analyze = true; +static bool guc_enable_cdbstats = true; +static bool guc_enable_collector = false; +static bool guc_report_nested_queries = true; +static char *guc_ignored_users = nullptr; +static int guc_max_text_size = 1 << 20; // in bytes (1MB) +static int guc_max_plan_size = 1024; // in KB +static int guc_min_analyze_time = 10000; // in ms +static int guc_logging_mode = LOG_MODE_UDS; +static bool guc_enable_utility = false; + +static const struct config_enum_entry logging_mode_options[] = { + {"uds", LOG_MODE_UDS, false /* hidden */}, + {"tbl", LOG_MODE_TBL, false}, + {NULL, 0, false}}; + +static bool ignored_users_guc_dirty = false; + +static void +assign_ignored_users_hook(const char *, void *) +{ + ignored_users_guc_dirty = true; +} + +void +Config::init_gucs() +{ + DefineCustomStringVariable( + "gpsc.uds_path", "Sets filesystem path of the agent socket", 0LL, + &guc_uds_path, "/tmp/gpsc_agent.sock", PGC_SUSET, + GUC_NOT_IN_SAMPLE | GUC_GPDB_NEED_SYNC, 0LL, 0LL, 0LL); + + DefineCustomBoolVariable("gpsc.enable", "Enable metrics collector", 0LL, + &guc_enable_collector, false, PGC_SUSET, + GUC_NOT_IN_SAMPLE | GUC_GPDB_NEED_SYNC, 0LL, 0LL, + 0LL); + + DefineCustomBoolVariable( + "gpsc.enable_analyze", "Collect analyze metrics in gpsc", 0LL, + &guc_enable_analyze, true, PGC_SUSET, + GUC_NOT_IN_SAMPLE | GUC_GPDB_NEED_SYNC, 0LL, 0LL, 0LL); + + DefineCustomBoolVariable( + "gpsc.enable_cdbstats", "Collect CDB metrics in gpsc", 0LL, + &guc_enable_cdbstats, true, PGC_SUSET, + GUC_NOT_IN_SAMPLE | GUC_GPDB_NEED_SYNC, 0LL, 0LL, 0LL); + + DefineCustomBoolVariable( + "gpsc.report_nested_queries", "Collect stats on nested queries", 0LL, + &guc_report_nested_queries, true, PGC_USERSET, + GUC_NOT_IN_SAMPLE | GUC_GPDB_NEED_SYNC, 0LL, 0LL, 0LL); + + DefineCustomStringVariable("gpsc.ignored_users_list", + "Make gpsc ignore queries issued by given users", + 0LL, &guc_ignored_users, "", PGC_SUSET, + GUC_NOT_IN_SAMPLE | GUC_GPDB_NEED_SYNC, 0LL, + assign_ignored_users_hook, 0LL); + + DefineCustomIntVariable( + "gpsc.max_text_size", + "Make gpsc trim query texts longer than configured size in bytes", NULL, + &guc_max_text_size, 1 << 20 /* 1MB */, 0, INT_MAX, PGC_SUSET, + GUC_NOT_IN_SAMPLE | GUC_GPDB_NEED_SYNC, NULL, NULL, NULL); + + DefineCustomIntVariable( + "gpsc.max_plan_size", "Make gpsc trim plan longer than configured size", + NULL, &guc_max_plan_size, 1024, 0, INT_MAX / 1024, PGC_SUSET, + GUC_NOT_IN_SAMPLE | GUC_GPDB_NEED_SYNC | GUC_UNIT_KB, NULL, NULL, NULL); + + DefineCustomIntVariable( + "gpsc.min_analyze_time", + "Sets the minimum execution time above which plans will be logged.", + "Zero prints all plans. -1 turns this feature off.", + &guc_min_analyze_time, 10000, -1, INT_MAX, PGC_USERSET, + GUC_NOT_IN_SAMPLE | GUC_GPDB_NEED_SYNC | GUC_UNIT_MS, NULL, NULL, NULL); + + DefineCustomEnumVariable( + "gpsc.logging_mode", "Logging mode: UDS or PG Table", NULL, + &guc_logging_mode, LOG_MODE_UDS, logging_mode_options, PGC_SUSET, + GUC_NOT_IN_SAMPLE | GUC_GPDB_NEED_SYNC | GUC_SUPERUSER_ONLY, NULL, NULL, + NULL); + + DefineCustomBoolVariable( + "gpsc.enable_utility", "Collect utility statement stats", NULL, + &guc_enable_utility, false, PGC_USERSET, + GUC_NOT_IN_SAMPLE | GUC_GPDB_NEED_SYNC, NULL, NULL, NULL); +} + +void +Config::update_ignored_users(const char *new_guc_ignored_users) +{ + auto new_ignored_users_set = std::make_unique(); + if (new_guc_ignored_users != nullptr && new_guc_ignored_users[0] != '\0') + { + /* Need a modifiable copy of string */ + char *rawstring = gpdb::pstrdup(new_guc_ignored_users); + List *elemlist; + ListCell *l; + + /* Parse string into list of identifiers */ + if (!gpdb::split_identifier_string(rawstring, ',', &elemlist)) + { + /* syntax error in list */ + gpdb::pfree(rawstring); + gpdb::list_free(elemlist); + ereport( + LOG, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg( + "invalid list syntax in parameter gpsc.ignored_users_list"))); + return; + } + foreach (l, elemlist) + { + new_ignored_users_set->insert((char *) lfirst(l)); + } + gpdb::pfree(rawstring); + gpdb::list_free(elemlist); + } + ignored_users_ = std::move(new_ignored_users_set); +} + +bool +Config::filter_user(const std::string &username) const +{ + if (!ignored_users_) + { + return true; + } + return ignored_users_->find(username) != ignored_users_->end(); +} + +void +Config::sync() +{ + if (ignored_users_guc_dirty) + { + update_ignored_users(guc_ignored_users); + ignored_users_guc_dirty = false; + } + uds_path_ = guc_uds_path; + enable_analyze_ = guc_enable_analyze; + enable_cdbstats_ = guc_enable_cdbstats; + enable_collector_ = guc_enable_collector; + enable_utility_ = guc_enable_utility; + report_nested_queries_ = guc_report_nested_queries; + max_text_size_ = guc_max_text_size; + max_plan_size_ = guc_max_plan_size; + min_analyze_time_ = guc_min_analyze_time; + logging_mode_ = guc_logging_mode; +} diff --git a/gpcontrib/gp_stats_collector/src/Config.h b/gpcontrib/gp_stats_collector/src/Config.h new file mode 100644 index 00000000000..259799e5135 --- /dev/null +++ b/gpcontrib/gp_stats_collector/src/Config.h @@ -0,0 +1,115 @@ +/*------------------------------------------------------------------------- + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + * Config.h + * + * IDENTIFICATION + * gpcontrib/gp_stats_collector/src/Config.h + * + *------------------------------------------------------------------------- + */ + +#ifndef CONFIG_H +#define CONFIG_H + +#include +#include +#include + +#define LOG_MODE_UDS 0 +#define LOG_MODE_TBL 1 + +using IgnoredUsers = std::unordered_set; + +class Config +{ +public: + static void init_gucs(); + + void sync(); + + const std::string & + uds_path() const + { + return uds_path_; + } + bool + enable_analyze() const + { + return enable_analyze_; + } + bool + enable_cdbstats() const + { + return enable_cdbstats_; + } + bool + enable_collector() const + { + return enable_collector_; + } + bool + enable_utility() const + { + return enable_utility_; + } + bool + report_nested_queries() const + { + return report_nested_queries_; + } + int + max_text_size() const + { + return max_text_size_; + } + int + max_plan_size() const + { + return max_plan_size_ * 1024; + } + int + min_analyze_time() const + { + return min_analyze_time_; + } + int + logging_mode() const + { + return logging_mode_; + } + bool filter_user(const std::string &username) const; + +private: + void update_ignored_users(const char *new_guc_ignored_users); + + std::unique_ptr ignored_users_; + std::string uds_path_; + bool enable_analyze_; + bool enable_cdbstats_; + bool enable_collector_; + bool enable_utility_; + bool report_nested_queries_; + int max_text_size_; + int max_plan_size_; + int min_analyze_time_; + int logging_mode_; +}; + +#endif /* CONFIG_H */ diff --git a/gpcontrib/gp_stats_collector/src/EventSender.cpp b/gpcontrib/gp_stats_collector/src/EventSender.cpp new file mode 100644 index 00000000000..0bc44c1198d --- /dev/null +++ b/gpcontrib/gp_stats_collector/src/EventSender.cpp @@ -0,0 +1,645 @@ +/*------------------------------------------------------------------------- + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + * EventSender.cpp + * + * IDENTIFICATION + * gpcontrib/gp_stats_collector/src/EventSender.cpp + * + *------------------------------------------------------------------------- + */ + +#include "UDSConnector.h" +#include "log/LogOps.h" +#include "memory/gpdbwrappers.h" + +#define typeid __typeid +extern "C" { +#include "postgres.h" + +#include "executor/executor.h" +#include "utils/elog.h" +#include "utils/guc.h" + +#include "cdb/cdbexplain.h" +#include "cdb/cdbvars.h" +#include "cdb/ml_ipc.h" +} +#undef typeid + +#include "EventSender.h" +#include "PgUtils.h" +#include "ProtoUtils.h" + +#define need_collect_analyze() \ + (Gp_role == GP_ROLE_DISPATCH && config.min_analyze_time() >= 0 && \ + config.enable_analyze()) + +bool +EventSender::verify_query(QueryDesc *query_desc, QueryState state, bool utility) +{ + if (!proto_verified) + { + return false; + } + if (Gp_role != GP_ROLE_DISPATCH && Gp_role != GP_ROLE_EXECUTE) + { + return false; + } + + switch (state) + { + case QueryState::SUBMIT: + // Cache GUCs once at SUBMIT. Synced GUCs are visible to all subsequent + // states. Without caching, a query that unsets/sets filtering GUCs would + // see different filter criteria at DONE, because at SUBMIT the query was + // not executed yet, causing DONE to be skipped/added. + config.sync(); + + if (!config.enable_collector()) + { + return false; + } + + if (utility && !config.enable_utility()) + { + return false; + } + + // Register qkey for a nested query we won't report, + // so we can detect nesting_level > 0 and skip reporting at end/done. + if (!need_report_nested_query() && nesting_level > 0) + { + QueryKey::register_qkey(query_desc, nesting_level); + return false; + } + if (is_top_level_query(query_desc, nesting_level)) + { + nested_timing = 0; + nested_calls = 0; + } + break; + case QueryState::START: + if (!qdesc_submitted(query_desc)) + { + collect_query_submit(query_desc, false /* utility */); + } + break; + case QueryState::DONE: + if (utility && !config.enable_utility()) + { + return false; + } + default: + break; + } + + if (filter_query(query_desc)) + { + return false; + } + if (!nesting_is_valid(query_desc, nesting_level)) + { + return false; + } + + return true; +} + +bool +EventSender::log_query_req(const gpsc::SetQueryReq &req, + const std::string &event, bool utility) +{ + bool clear_big_fields = false; + switch (config.logging_mode()) + { + case LOG_MODE_UDS: + clear_big_fields = UDSConnector::report_query(req, event, config); + break; + case LOG_MODE_TBL: + gpdb::insert_log(req, utility); + clear_big_fields = false; + break; + default: + Assert(false); + } + return clear_big_fields; +} + +void +EventSender::query_metrics_collect(QueryMetricsStatus status, void *arg, + bool utility, ErrorData *edata) +{ + auto *query_desc = reinterpret_cast(arg); + switch (status) + { + case METRICS_PLAN_NODE_INITIALIZE: + case METRICS_PLAN_NODE_EXECUTING: + case METRICS_PLAN_NODE_FINISHED: + // TODO + break; + case METRICS_QUERY_SUBMIT: + collect_query_submit(query_desc, utility); + break; + case METRICS_QUERY_START: + // no-op: executor_after_start is enough + break; + case METRICS_QUERY_CANCELING: + // it appears we're only interested in the actual CANCELED event. + // for now we will ignore CANCELING state unless otherwise requested from + // end users + break; + case METRICS_QUERY_DONE: + case METRICS_QUERY_ERROR: + case METRICS_QUERY_CANCELED: + case METRICS_INNER_QUERY_DONE: + collect_query_done(query_desc, utility, status, edata); + break; + default: + ereport(ERROR, (errmsg("Unknown query status: %d", status))); + } +} + +void +EventSender::executor_before_start(QueryDesc *query_desc, int eflags) +{ + if (!verify_query(query_desc, QueryState::START, false /* utility*/)) + { + return; + } + + if (Gp_role == GP_ROLE_DISPATCH && config.enable_analyze() && + (eflags & EXEC_FLAG_EXPLAIN_ONLY) == 0) + { + query_desc->instrument_options |= INSTRUMENT_BUFFERS; + query_desc->instrument_options |= INSTRUMENT_ROWS; + query_desc->instrument_options |= INSTRUMENT_TIMER; + if (config.enable_cdbstats()) + { + query_desc->instrument_options |= INSTRUMENT_CDB; + if (!query_desc->showstatctx) + { + instr_time starttime; + INSTR_TIME_SET_CURRENT(starttime); + query_desc->showstatctx = + gpdb::cdbexplain_showExecStatsBegin(query_desc, starttime); + } + } + } +} + +void +EventSender::executor_after_start(QueryDesc *query_desc, int /* eflags*/) +{ + if (!verify_query(query_desc, QueryState::START, false /* utility */)) + { + return; + } + + auto &query = get_query(query_desc); + auto query_msg = query.message.get(); + *query_msg->mutable_start_time() = current_ts(); + update_query_state(query, QueryState::START, false /* utility */); + set_query_plan(query_msg, query_desc, config); + if (need_collect_analyze()) + { + // Set up to track total elapsed time during query run. + // Make sure the space is allocated in the per-query + // context so it will go away at executor_end. + if (query_desc->totaltime == NULL) + { + MemoryContext oldcxt = + gpdb::mem_ctx_switch_to(query_desc->estate->es_query_cxt); + query_desc->totaltime = gpdb::instr_alloc(1, INSTRUMENT_ALL, false); + gpdb::mem_ctx_switch_to(oldcxt); + } + } + gpsc::GPMetrics stats; + std::swap(stats, *query_msg->mutable_query_metrics()); + if (log_query_req(*query_msg, "started", false /* utility */)) + { + clear_big_fields(query_msg); + } + std::swap(stats, *query_msg->mutable_query_metrics()); +} + +void +EventSender::executor_end(QueryDesc *query_desc) +{ + if (!verify_query(query_desc, QueryState::END, false /* utility */)) + { + return; + } + + auto &query = get_query(query_desc); + auto *query_msg = query.message.get(); + *query_msg->mutable_end_time() = current_ts(); + update_query_state(query, QueryState::END, false /* utility */); + if (is_top_level_query(query_desc, nesting_level)) + { + set_gp_metrics(query_msg->mutable_query_metrics(), query_desc, + nested_calls, nested_timing); + } + else + { + set_gp_metrics(query_msg->mutable_query_metrics(), query_desc, 0, 0); + } + if (log_query_req(*query_msg, "ended", false /* utility */)) + { + clear_big_fields(query_msg); + } +} + +void +EventSender::collect_query_submit(QueryDesc *query_desc, bool utility) +{ + if (!verify_query(query_desc, QueryState::SUBMIT, utility)) + { + return; + } + + submit_query(query_desc); + auto &query = get_query(query_desc); + auto *query_msg = query.message.get(); + *query_msg = create_query_req(gpsc::QueryStatus::QUERY_STATUS_SUBMIT); + *query_msg->mutable_submit_time() = current_ts(); + set_query_info(query_msg); + set_qi_nesting_level(query_msg, nesting_level); + set_qi_slice_id(query_msg); + set_query_text(query_msg, query_desc, config); + if (log_query_req(*query_msg, "submit", utility)) + { + clear_big_fields(query_msg); + } + // take initial metrics snapshot so that we can safely take diff afterwards + // in END or DONE events. + set_gp_metrics(query_msg->mutable_query_metrics(), query_desc, 0, 0); +#ifdef IC_TEARDOWN_HOOK + // same for interconnect statistics + ic_metrics_collect(); + set_ic_stats(query_msg->mutable_query_metrics()->mutable_instrumentation(), + &ic_statistics); +#endif +} + +void +EventSender::report_query_done(QueryDesc *query_desc, QueryItem &query, + QueryMetricsStatus status, bool utility, + ErrorData *edata) +{ + gpsc::QueryStatus query_status; + std::string msg; + switch (status) + { + case METRICS_QUERY_DONE: + case METRICS_INNER_QUERY_DONE: + query_status = gpsc::QueryStatus::QUERY_STATUS_DONE; + msg = "done"; + break; + case METRICS_QUERY_ERROR: + query_status = gpsc::QueryStatus::QUERY_STATUS_ERROR; + msg = "error"; + break; + case METRICS_QUERY_CANCELING: + // at the moment we don't track this event, but I`ll leave this code + // here just in case + Assert(false); + query_status = gpsc::QueryStatus::QUERY_STATUS_CANCELLING; + msg = "cancelling"; + break; + case METRICS_QUERY_CANCELED: + query_status = gpsc::QueryStatus::QUERY_STATUS_CANCELED; + msg = "cancelled"; + break; + default: + ereport(ERROR, + (errmsg("Unexpected query status in query_done hook: %d", + status))); + } + auto prev_state = query.state; + update_query_state(query, QueryState::DONE, utility, + query_status == gpsc::QueryStatus::QUERY_STATUS_DONE); + auto query_msg = query.message.get(); + query_msg->set_query_status(query_status); + if (status == METRICS_QUERY_ERROR) + { + bool error_flushed = elog_message() == NULL; + if (error_flushed && (edata == NULL || edata->message == NULL)) + { + ereport(WARNING, (errmsg("GPSC missing error message"))); + ereport(DEBUG3, (errmsg("GPSC query sourceText: %s", + query_desc->sourceText))); + } + else + { + set_qi_error_message( + query_msg, error_flushed ? edata->message : elog_message(), + config); + } + } + if (prev_state == START) + { + // We've missed ExecutorEnd call due to query cancel or error. It's + // fine, but now we need to collect and report execution stats + *query_msg->mutable_end_time() = current_ts(); + set_gp_metrics(query_msg->mutable_query_metrics(), query_desc, + nested_calls, nested_timing); + } +#ifdef IC_TEARDOWN_HOOK + ic_metrics_collect(); + set_ic_stats(query_msg->mutable_query_metrics()->mutable_instrumentation(), + &ic_statistics); +#endif + (void) log_query_req(*query_msg, msg, utility); +} + +void +EventSender::collect_query_done(QueryDesc *query_desc, bool utility, + QueryMetricsStatus status, ErrorData *edata) +{ + if (!verify_query(query_desc, QueryState::DONE, utility)) + { + return; + } + + // Skip sending done message if query errored before submit. + if (!qdesc_submitted(query_desc)) + { + if (status != METRICS_QUERY_ERROR) + { + ereport(WARNING, (errmsg("GPSC trying to process DONE hook for " + "unsubmitted and unerrored query"))); + ereport(DEBUG3, (errmsg("GPSC query sourceText: %s", + query_desc->sourceText))); + } + return; + } + + if (queries.empty()) + { + ereport(WARNING, + (errmsg("GPSC cannot find query to process DONE hook"))); + ereport(DEBUG3, + (errmsg("GPSC query sourceText: %s", query_desc->sourceText))); + return; + } + auto &query = get_query(query_desc); + + report_query_done(query_desc, query, status, utility, edata); + + if (need_report_nested_query()) + update_nested_counters(query_desc); + + queries.erase(QueryKey::from_qdesc(query_desc)); + pfree(query_desc->gpsc_query_key); + query_desc->gpsc_query_key = NULL; +} + +void +EventSender::ic_metrics_collect() +{ +#ifdef IC_TEARDOWN_HOOK + if (Gp_interconnect_type != INTERCONNECT_TYPE_UDPIFC) + { + return; + } + if (!proto_verified || gp_command_count == 0 || + !config.enable_collector() || config.filter_user(get_user_name())) + { + return; + } + // we also would like to know nesting level here and filter queries BUT we + // don't have this kind of information from this callback. Will have to + // collect stats anyways and throw it away later, if necessary + auto metrics = UDPIFCGetICStats(); + ic_statistics.totalRecvQueueSize += metrics.totalRecvQueueSize; + ic_statistics.recvQueueSizeCountingTime += + metrics.recvQueueSizeCountingTime; + ic_statistics.totalCapacity += metrics.totalCapacity; + ic_statistics.capacityCountingTime += metrics.capacityCountingTime; + ic_statistics.totalBuffers += metrics.totalBuffers; + ic_statistics.bufferCountingTime += metrics.bufferCountingTime; + ic_statistics.activeConnectionsNum += metrics.activeConnectionsNum; + ic_statistics.retransmits += metrics.retransmits; + ic_statistics.startupCachedPktNum += metrics.startupCachedPktNum; + ic_statistics.mismatchNum += metrics.mismatchNum; + ic_statistics.crcErrors += metrics.crcErrors; + ic_statistics.sndPktNum += metrics.sndPktNum; + ic_statistics.recvPktNum += metrics.recvPktNum; + ic_statistics.disorderedPktNum += metrics.disorderedPktNum; + ic_statistics.duplicatedPktNum += metrics.duplicatedPktNum; + ic_statistics.recvAckNum += metrics.recvAckNum; + ic_statistics.statusQueryMsgNum += metrics.statusQueryMsgNum; +#endif +} + +void +EventSender::analyze_stats_collect(QueryDesc *query_desc) +{ + if (!verify_query(query_desc, QueryState::END, false /* utility */)) + { + return; + } + if (Gp_role != GP_ROLE_DISPATCH) + { + return; + } + if (!query_desc->totaltime || !need_collect_analyze()) + { + return; + } + // Make sure stats accumulation is done. + // (Note: it's okay if several levels of hook all do this.) + gpdb::instr_end_loop(query_desc->totaltime); + + double ms = query_desc->totaltime->total * 1000.0; + if (ms >= config.min_analyze_time()) + { + auto &query = get_query(query_desc); + auto *query_msg = query.message.get(); + set_analyze_plan_text(query_desc, query_msg, config); + } +} + +EventSender::EventSender() +{ + // Perform initial sync to get default GUC values + config.sync(); + + try + { + GOOGLE_PROTOBUF_VERIFY_VERSION; + proto_verified = true; + } + catch (const std::exception &e) + { + ereport(INFO, (errmsg("GPSC protobuf version mismatch is detected %s", + e.what()))); + } +#ifdef IC_TEARDOWN_HOOK + memset(&ic_statistics, 0, sizeof(ICStatistics)); +#endif +} + +EventSender::~EventSender() +{ + for (const auto &[qkey, _] : queries) + { + ereport(LOG, + (errmsg("GPSC query with missing done event: " + "tmid=%d ssid=%d ccnt=%d nlvl=%d", + qkey.tmid, qkey.ssid, qkey.ccnt, qkey.nesting_level))); + } +} + +// That's basically a very simplistic state machine to fix or highlight any bugs +// coming from GP +void +EventSender::update_query_state(QueryItem &query, QueryState new_state, + bool utility, bool success) +{ + switch (new_state) + { + case QueryState::SUBMIT: + Assert(false); + break; + case QueryState::START: + if (query.state == QueryState::SUBMIT) + { + query.message->set_query_status( + gpsc::QueryStatus::QUERY_STATUS_START); + } + else + { + Assert(false); + } + break; + case QueryState::END: + // Example of below assert triggering: CURSOR closes before ever being + // executed Assert(query->state == QueryState::START || + // IsAbortInProgress()); + query.message->set_query_status( + gpsc::QueryStatus::QUERY_STATUS_END); + break; + case QueryState::DONE: + Assert(query.state == QueryState::END || !success || utility); + query.message->set_query_status( + gpsc::QueryStatus::QUERY_STATUS_DONE); + break; + default: + Assert(false); + } + query.state = new_state; +} + +EventSender::QueryItem & +EventSender::get_query(QueryDesc *query_desc) +{ + if (!qdesc_submitted(query_desc)) + { + ereport( + WARNING, + (errmsg("GPSC attempting to get query that was not submitted"))); + ereport(DEBUG3, + (errmsg("GPSC query sourceText: %s", query_desc->sourceText))); + throw std::runtime_error( + "Attempting to get query that was not submitted"); + } + return queries.find(QueryKey::from_qdesc(query_desc))->second; +} + +void +EventSender::submit_query(QueryDesc *query_desc) +{ + if (query_desc->gpsc_query_key) + { + ereport(WARNING, + (errmsg("GPSC trying to submit already submitted query"))); + ereport(DEBUG3, + (errmsg("GPSC query sourceText: %s", query_desc->sourceText))); + } + QueryKey::register_qkey(query_desc, nesting_level); + auto key = QueryKey::from_qdesc(query_desc); + auto [_, inserted] = queries.emplace(key, QueryItem(QueryState::SUBMIT)); + if (!inserted) + { + ereport(WARNING, (errmsg("GPSC duplicate query submit detected"))); + ereport(DEBUG3, + (errmsg("GPSC query sourceText: %s", query_desc->sourceText))); + } +} + +void +EventSender::update_nested_counters(QueryDesc *query_desc) +{ + if (!is_top_level_query(query_desc, nesting_level)) + { + auto &query = get_query(query_desc); + nested_calls++; + double end_time = protots_to_double(query.message->end_time()); + double start_time = protots_to_double(query.message->start_time()); + if (end_time >= start_time) + { + nested_timing += end_time - start_time; + } + else + { + ereport(WARNING, + (errmsg("GPSC query start_time > end_time (%f > %f)", + start_time, end_time))); + ereport(DEBUG3, (errmsg("GPSC nested query text %s", + query_desc->sourceText))); + } + } +} + +bool +EventSender::qdesc_submitted(QueryDesc *query_desc) +{ + if (query_desc->gpsc_query_key == NULL) + { + return false; + } + return queries.find(QueryKey::from_qdesc(query_desc)) != queries.end(); +} + +bool +EventSender::nesting_is_valid(QueryDesc *query_desc, int nesting_level) +{ + return need_report_nested_query() || + is_top_level_query(query_desc, nesting_level); +} + +bool +EventSender::need_report_nested_query() +{ + return config.report_nested_queries() && Gp_role == GP_ROLE_DISPATCH; +} + +bool +EventSender::filter_query(QueryDesc *query_desc) +{ + return gp_command_count == 0 || query_desc->sourceText == nullptr || + !config.enable_collector() || config.filter_user(get_user_name()); +} + +EventSender::QueryItem::QueryItem(QueryState st) + : message(std::make_unique()), state(st) +{ +} diff --git a/gpcontrib/gp_stats_collector/src/EventSender.h b/gpcontrib/gp_stats_collector/src/EventSender.h new file mode 100644 index 00000000000..2651a020593 --- /dev/null +++ b/gpcontrib/gp_stats_collector/src/EventSender.h @@ -0,0 +1,203 @@ +/*------------------------------------------------------------------------- + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + * EventSender.h + * + * IDENTIFICATION + * gpcontrib/gp_stats_collector/src/EventSender.h + * + *------------------------------------------------------------------------- + */ + +#ifndef EVENTSENDER_H +#define EVENTSENDER_H + +#include +#include +#include + +#define typeid __typeid +extern "C" { +#include "utils/metrics_utils.h" +#ifdef IC_TEARDOWN_HOOK +#include "cdb/ic_udpifc.h" +#endif +} +#undef typeid + +#include "Config.h" +#include "memory/gpdbwrappers.h" + +class UDSConnector; +struct QueryDesc; +namespace gpsc +{ +class SetQueryReq; +} + +#include + +extern void gp_gettmid(int32 *); + +struct QueryKey +{ + int tmid; + int ssid; + int ccnt; + int nesting_level; + uintptr_t query_desc_addr; + + bool + operator==(const QueryKey &other) const + { + return std::tie(tmid, ssid, ccnt, nesting_level, query_desc_addr) == + std::tie(other.tmid, other.ssid, other.ccnt, other.nesting_level, + other.query_desc_addr); + } + + static void + register_qkey(QueryDesc *query_desc, size_t nesting_level) + { + query_desc->gpsc_query_key = + (GpscQueryKey *) gpdb::palloc0(sizeof(GpscQueryKey)); + int32 tmid; + gp_gettmid(&tmid); + query_desc->gpsc_query_key->tmid = tmid; + query_desc->gpsc_query_key->ssid = gp_session_id; + query_desc->gpsc_query_key->ccnt = gp_command_count; + query_desc->gpsc_query_key->nesting_level = nesting_level; + query_desc->gpsc_query_key->query_desc_addr = (uintptr_t) query_desc; + } + + static QueryKey + from_qdesc(QueryDesc *query_desc) + { + return { + .tmid = query_desc->gpsc_query_key->tmid, + .ssid = query_desc->gpsc_query_key->ssid, + .ccnt = query_desc->gpsc_query_key->ccnt, + .nesting_level = query_desc->gpsc_query_key->nesting_level, + .query_desc_addr = query_desc->gpsc_query_key->query_desc_addr, + }; + } +}; + +// https://www.boost.org/doc/libs/1_35_0/doc/html/boost/hash_combine_id241013.html +template +inline void +hash_combine(std::size_t &seed, const T &v) +{ + std::hash hasher; + seed ^= hasher(v) + 0x9e3779b9 + (seed << 6) + (seed >> 2); +} + +namespace std +{ +template <> +struct hash +{ + size_t + operator()(const QueryKey &k) const noexcept + { + size_t seed = hash{}(k.tmid); + hash_combine(seed, k.ssid); + hash_combine(seed, k.ccnt); + hash_combine(seed, k.nesting_level); + uintptr_t addr = k.query_desc_addr; + if constexpr (SIZE_MAX < UINTPTR_MAX) + { + addr %= SIZE_MAX; + } + hash_combine(seed, addr); + return seed; + } +}; +} // namespace std + +class EventSender +{ +public: + void executor_before_start(QueryDesc *query_desc, int eflags); + void executor_after_start(QueryDesc *query_desc, int eflags); + void executor_end(QueryDesc *query_desc); + void query_metrics_collect(QueryMetricsStatus status, void *arg, + bool utility, ErrorData *edata = NULL); + void ic_metrics_collect(); + void analyze_stats_collect(QueryDesc *query_desc); + void + incr_depth() + { + nesting_level++; + } + void + decr_depth() + { + nesting_level--; + } + EventSender(); + ~EventSender(); + +private: + enum QueryState + { + SUBMIT, + START, + END, + DONE + }; + + struct QueryItem + { + std::unique_ptr message; + QueryState state; + + explicit QueryItem(QueryState st); + }; + + bool log_query_req(const gpsc::SetQueryReq &req, const std::string &event, + bool utility); + bool verify_query(QueryDesc *query_desc, QueryState state, bool utility); + void update_query_state(QueryItem &query, QueryState new_state, + bool utility, bool success = true); + QueryItem &get_query(QueryDesc *query_desc); + void submit_query(QueryDesc *query_desc); + void collect_query_submit(QueryDesc *query_desc, bool utility); + void report_query_done(QueryDesc *query_desc, QueryItem &query, + QueryMetricsStatus status, bool utility, + ErrorData *edata = NULL); + void collect_query_done(QueryDesc *query_desc, bool utility, + QueryMetricsStatus status, ErrorData *edata = NULL); + void update_nested_counters(QueryDesc *query_desc); + bool qdesc_submitted(QueryDesc *query_desc); + bool nesting_is_valid(QueryDesc *query_desc, int nesting_level); + bool need_report_nested_query(); + bool filter_query(QueryDesc *query_desc); + + bool proto_verified = false; + int nesting_level = 0; + int64_t nested_calls = 0; + double nested_timing = 0; +#ifdef IC_TEARDOWN_HOOK + ICStatistics ic_statistics; +#endif + std::unordered_map queries; + + Config config; +}; +#endif /* EVENTSENDER_H */ diff --git a/gpcontrib/gp_stats_collector/src/GpscStat.cpp b/gpcontrib/gp_stats_collector/src/GpscStat.cpp new file mode 100644 index 00000000000..151cfd87c02 --- /dev/null +++ b/gpcontrib/gp_stats_collector/src/GpscStat.cpp @@ -0,0 +1,154 @@ +/*------------------------------------------------------------------------- + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + * GpscStat.cpp + * + * IDENTIFICATION + * gpcontrib/gp_stats_collector/src/GpscStat.cpp + * + *------------------------------------------------------------------------- + */ + +#include "GpscStat.h" + +#include + +extern "C" { +#include "postgres.h" +#include "miscadmin.h" +#include "storage/ipc.h" +#include "storage/lwlock.h" +#include "storage/shmem.h" +#include "storage/spin.h" +} + +namespace +{ +struct ProtectedData +{ + slock_t mutex; + GpscStat::Data data; +}; +shmem_startup_hook_type prev_shmem_startup_hook = NULL; +ProtectedData *data = nullptr; + +void +gpsc_shmem_startup() +{ + if (prev_shmem_startup_hook) + prev_shmem_startup_hook(); + LWLockAcquire(AddinShmemInitLock, LW_EXCLUSIVE); + bool found; + data = reinterpret_cast( + ShmemInitStruct("gpsc_stat_messages", sizeof(ProtectedData), &found)); + if (!found) + { + SpinLockInit(&data->mutex); + data->data = GpscStat::Data(); + } + LWLockRelease(AddinShmemInitLock); +} + +class LockGuard +{ +public: + LockGuard(slock_t *mutex) : mutex_(mutex) + { + SpinLockAcquire(mutex_); + } + ~LockGuard() + { + SpinLockRelease(mutex_); + } + +private: + slock_t *mutex_; +}; +} // namespace + +void +GpscStat::init() +{ + if (!process_shared_preload_libraries_in_progress) + return; + RequestAddinShmemSpace(sizeof(ProtectedData)); + prev_shmem_startup_hook = shmem_startup_hook; + shmem_startup_hook = gpsc_shmem_startup; +} + +void +GpscStat::deinit() +{ + shmem_startup_hook = prev_shmem_startup_hook; +} + +void +GpscStat::reset() +{ + LockGuard lg(&data->mutex); + data->data = GpscStat::Data(); +} + +void +GpscStat::report_send(int32_t msg_size) +{ + LockGuard lg(&data->mutex); + data->data.total++; + data->data.max_message_size = + std::max(msg_size, data->data.max_message_size); +} + +void +GpscStat::report_bad_connection() +{ + LockGuard lg(&data->mutex); + data->data.total++; + data->data.failed_connects++; +} + +void +GpscStat::report_bad_send(int32_t msg_size) +{ + LockGuard lg(&data->mutex); + data->data.total++; + data->data.failed_sends++; + data->data.max_message_size = + std::max(msg_size, data->data.max_message_size); +} + +void +GpscStat::report_error() +{ + LockGuard lg(&data->mutex); + data->data.total++; + data->data.failed_other++; +} + +GpscStat::Data +GpscStat::get_stats() +{ + LockGuard lg(&data->mutex); + return data->data; +} + +bool +GpscStat::loaded() +{ + return data != nullptr; +} diff --git a/gpcontrib/gp_stats_collector/src/GpscStat.h b/gpcontrib/gp_stats_collector/src/GpscStat.h new file mode 100644 index 00000000000..d82930c7b5b --- /dev/null +++ b/gpcontrib/gp_stats_collector/src/GpscStat.h @@ -0,0 +1,52 @@ +/*------------------------------------------------------------------------- + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + * GpscStat.h + * + * IDENTIFICATION + * gpcontrib/gp_stats_collector/src/GpscStat.h + * + *------------------------------------------------------------------------- + */ + +#ifndef GPSCSTAT_H +#define GPSCSTAT_H + +#include + +class GpscStat +{ +public: + struct Data + { + int64_t total, failed_sends, failed_connects, failed_other; + int32_t max_message_size; + }; + + static void init(); + static void deinit(); + static void reset(); + static void report_send(int32_t msg_size); + static void report_bad_connection(); + static void report_bad_send(int32_t msg_size); + static void report_error(); + static Data get_stats(); + static bool loaded(); +}; +#endif /* GPSCSTAT_H */ diff --git a/gpcontrib/gp_stats_collector/src/PgUtils.cpp b/gpcontrib/gp_stats_collector/src/PgUtils.cpp new file mode 100644 index 00000000000..c473cc383f2 --- /dev/null +++ b/gpcontrib/gp_stats_collector/src/PgUtils.cpp @@ -0,0 +1,104 @@ +/*------------------------------------------------------------------------- + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + * PgUtils.cpp + * + * IDENTIFICATION + * gpcontrib/gp_stats_collector/src/PgUtils.cpp + * + *------------------------------------------------------------------------- + */ + +#include "PgUtils.h" +#include "Config.h" +#include "memory/gpdbwrappers.h" + +extern "C" { +#include "cdb/cdbvars.h" +#include "commands/resgroupcmds.h" +} + +std::string +get_user_name() +{ + // username is allocated on stack, we don't need to pfree it. + const char *username = + gpdb::get_config_option("session_authorization", false, false); + return username ? std::string(username) : ""; +} + +std::string +get_db_name() +{ + char *dbname = gpdb::get_database_name(MyDatabaseId); + if (dbname) + { + std::string result(dbname); + gpdb::pfree(dbname); + return result; + } + return ""; +} + +std::string +get_rg_name() +{ + auto groupId = gpdb::get_rg_id_by_session_id(MySessionState->sessionId); + if (!OidIsValid(groupId)) + return ""; + + char *rgname = gpdb::get_rg_name_for_id(groupId); + if (rgname == nullptr) + return ""; + + std::string result(rgname); + gpdb::pfree(rgname); + return result; +} + +/** + * Things get tricky with nested queries. + * a) A nested query on master is a real query optimized and executed from + * master. An example would be `select some_insert_function();`, where + * some_insert_function does something like `insert into tbl values (1)`. Master + * will create two statements. Outer select statement and inner insert statement + * with nesting level 1. + * For segments both statements are top-level statements with nesting level 0. + * b) A nested query on segment is something executed as sub-statement on + * segment. An example would be `select a from tbl where is_good_value(b);`. In + * this case master will issue one top-level statement, but segments will change + * contexts for UDF execution and execute is_good_value(b) once for each tuple + * as a nested query. Creating massive load on external agent. + * + * Hence, here is a decision: + * 1) ignore all queries that are nested on segments + * 2) record (if enabled) all queries that are nested on master + * NODE: The truth is, we can't really ignore nested master queries, because + * segment sees those as top-level. + */ + +bool +is_top_level_query(QueryDesc *query_desc, int nesting_level) +{ + if (query_desc->gpsc_query_key == NULL) + { + return nesting_level == 0; + } + return query_desc->gpsc_query_key->nesting_level == 0; +} diff --git a/gpcontrib/gp_stats_collector/src/PgUtils.h b/gpcontrib/gp_stats_collector/src/PgUtils.h new file mode 100644 index 00000000000..d9f673e7cbc --- /dev/null +++ b/gpcontrib/gp_stats_collector/src/PgUtils.h @@ -0,0 +1,38 @@ +/*------------------------------------------------------------------------- + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + * PgUtils.h + * + * IDENTIFICATION + * gpcontrib/gp_stats_collector/src/PgUtils.h + * + *------------------------------------------------------------------------- + */ + +extern "C" { +#include "postgres.h" +#include "commands/explain.h" +} + +#include + +std::string get_user_name(); +std::string get_db_name(); +std::string get_rg_name(); +bool is_top_level_query(QueryDesc *query_desc, int nesting_level); diff --git a/gpcontrib/gp_stats_collector/src/ProcStats.cpp b/gpcontrib/gp_stats_collector/src/ProcStats.cpp new file mode 100644 index 00000000000..e308b30dfa5 --- /dev/null +++ b/gpcontrib/gp_stats_collector/src/ProcStats.cpp @@ -0,0 +1,144 @@ +/*------------------------------------------------------------------------- + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + * ProcStats.cpp + * + * IDENTIFICATION + * gpcontrib/gp_stats_collector/src/ProcStats.cpp + * + *------------------------------------------------------------------------- + */ + +#include "ProcStats.h" +#include +#include +#include +#include "gpsc_metrics.pb.h" + +extern "C" { +#include "postgres.h" +#include "utils/elog.h" +} + +namespace +{ +#define FILL_IO_STAT(stat_name) \ + uint64_t stat_name; \ + proc_stat >> tmp >> stat_name; \ + stats->set_##stat_name(stat_name - stats->stat_name()); + +void +fill_io_stats(gpsc::SystemStat *stats) +{ + std::ifstream proc_stat("/proc/self/io"); + std::string tmp; + FILL_IO_STAT(rchar); + FILL_IO_STAT(wchar); + FILL_IO_STAT(syscr); + FILL_IO_STAT(syscw); + FILL_IO_STAT(read_bytes); + FILL_IO_STAT(write_bytes); + FILL_IO_STAT(cancelled_write_bytes); +} + +void +fill_cpu_stats(gpsc::SystemStat *stats) +{ + static const int UTIME_ID = 13; + static const int STIME_ID = 14; + static const int VSIZE_ID = 22; + static const int RSS_ID = 23; + static const double tps = sysconf(_SC_CLK_TCK); + + std::ifstream proc_stat("/proc/self/stat"); + std::string trash; + for (int i = 0; i <= RSS_ID; ++i) + { + switch (i) + { + case UTIME_ID: + double utime; + proc_stat >> utime; + stats->set_usertimeseconds(utime / tps - + stats->usertimeseconds()); + break; + case STIME_ID: + double stime; + proc_stat >> stime; + stats->set_kerneltimeseconds(stime / tps - + stats->kerneltimeseconds()); + break; + case VSIZE_ID: + uint64_t vsize; + proc_stat >> vsize; + stats->set_vsize(vsize); + break; + case RSS_ID: + uint64_t rss; + proc_stat >> rss; + // NOTE: this is a double AFAIU, need to double-check + stats->set_rss(rss); + break; + default: + proc_stat >> trash; + } + } +} + +void +fill_status_stats(gpsc::SystemStat *stats) +{ + std::ifstream proc_stat("/proc/self/status"); + std::string key, measure; + while (proc_stat >> key) + { + if (key == "VmPeak:") + { + uint64_t value; + proc_stat >> value; + stats->set_vmpeakkb(value); + proc_stat >> measure; + if (measure != "kB") + { + throw std::runtime_error( + "Expected memory sizes in kB, but got in " + measure); + } + } + else if (key == "VmSize:") + { + uint64_t value; + proc_stat >> value; + stats->set_vmsizekb(value); + if (measure != "kB") + { + throw std::runtime_error( + "Expected memory sizes in kB, but got in " + measure); + } + } + } +} +} // namespace + +void +fill_self_stats(gpsc::SystemStat *stats) +{ + fill_io_stats(stats); + fill_cpu_stats(stats); + fill_status_stats(stats); +} \ No newline at end of file diff --git a/gpcontrib/gp_stats_collector/src/ProcStats.h b/gpcontrib/gp_stats_collector/src/ProcStats.h new file mode 100644 index 00000000000..8b83dbfef02 --- /dev/null +++ b/gpcontrib/gp_stats_collector/src/ProcStats.h @@ -0,0 +1,37 @@ +/*------------------------------------------------------------------------- + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + * ProcStats.h + * + * IDENTIFICATION + * gpcontrib/gp_stats_collector/src/ProcStats.h + * + *------------------------------------------------------------------------- + */ + +#ifndef PROCSTATS_H +#define PROCSTATS_H + +namespace gpsc +{ +class SystemStat; +} + +void fill_self_stats(gpsc::SystemStat *stats); +#endif /* PROCSTATS_H */ diff --git a/gpcontrib/gp_stats_collector/src/ProtoUtils.cpp b/gpcontrib/gp_stats_collector/src/ProtoUtils.cpp new file mode 100644 index 00000000000..b22f580303e --- /dev/null +++ b/gpcontrib/gp_stats_collector/src/ProtoUtils.cpp @@ -0,0 +1,375 @@ +/*------------------------------------------------------------------------- + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + * ProtoUtils.cpp + * + * IDENTIFICATION + * gpcontrib/gp_stats_collector/src/ProtoUtils.cpp + * + *------------------------------------------------------------------------- + */ + +#include "ProtoUtils.h" +#include "Config.h" +#include "PgUtils.h" +#include "ProcStats.h" +#include "memory/gpdbwrappers.h" + +#define typeid __typeid +#define operator __operator +extern "C" { +#include "postgres.h" +#include "access/hash.h" +#include "access/xact.h" +#include "cdb/cdbinterconnect.h" +#include "cdb/cdbvars.h" +#include "cdb/ml_ipc.h" +#ifdef IC_TEARDOWN_HOOK +#include "cdb/ic_udpifc.h" +#endif +#include "utils/workfile_mgr.h" +} +#undef typeid +#undef operator + +#include +#include + +extern void gp_gettmid(int32 *); + +namespace +{ +constexpr uint8_t UTF8_CONTINUATION_BYTE_MASK = (1 << 7) | (1 << 6); +constexpr uint8_t UTF8_CONTINUATION_BYTE = (1 << 7); +constexpr uint8_t UTF8_MAX_SYMBOL_BYTES = 4; + +// Returns true if byte is the starting byte of utf8 +// character, false if byte is the continuation (10xxxxxx). +inline bool +utf8_start_byte(uint8_t byte) +{ + return (byte & UTF8_CONTINUATION_BYTE_MASK) != UTF8_CONTINUATION_BYTE; +} +} // namespace + +google::protobuf::Timestamp +current_ts() +{ + google::protobuf::Timestamp current_ts; + struct timeval tv; + gettimeofday(&tv, nullptr); + current_ts.set_seconds(tv.tv_sec); + current_ts.set_nanos(static_cast(tv.tv_usec * 1000)); + return current_ts; +} + +void +set_query_key(gpsc::QueryKey *key) +{ + key->set_ccnt(gp_command_count); + key->set_ssid(gp_session_id); + int32 tmid = 0; + gp_gettmid(&tmid); + key->set_tmid(tmid); +} + +void +set_segment_key(gpsc::SegmentKey *key) +{ + key->set_dbid(GpIdentity.dbid); + key->set_segindex(GpIdentity.segindex); +} + +std::string +trim_str_shrink_utf8(const char *str, size_t len, size_t lim) +{ + if (unlikely(str == nullptr)) + { + return std::string(); + } + if (likely(len <= lim || GetDatabaseEncoding() != PG_UTF8)) + { + return std::string(str, std::min(len, lim)); + } + + // Handle trimming of utf8 correctly, do not cut multi-byte characters. + size_t cut_pos = lim; + size_t visited_bytes = 1; + while (visited_bytes < UTF8_MAX_SYMBOL_BYTES && cut_pos > 0) + { + if (utf8_start_byte(static_cast(str[cut_pos]))) + { + break; + } + ++visited_bytes; + --cut_pos; + } + + return std::string(str, cut_pos); +} + +void +set_query_plan(gpsc::SetQueryReq *req, QueryDesc *query_desc, + const Config &config) +{ + if (Gp_role == GP_ROLE_DISPATCH && query_desc->plannedstmt) + { + auto qi = req->mutable_query_info(); + qi->set_generator(query_desc->plannedstmt->planGen == PLANGEN_OPTIMIZER + ? gpsc::PlanGenerator::PLAN_GENERATOR_OPTIMIZER + : gpsc::PlanGenerator::PLAN_GENERATOR_PLANNER); + MemoryContext oldcxt = + gpdb::mem_ctx_switch_to(query_desc->estate->es_query_cxt); + ExplainState es = gpdb::get_explain_state(query_desc, true); + if (es.str) + { + *qi->mutable_plan_text() = trim_str_shrink_utf8( + es.str->data, es.str->len, config.max_plan_size()); + StringInfo norm_plan = gpdb::gen_normplan(es.str->data); + if (norm_plan) + { + *qi->mutable_template_plan_text() = trim_str_shrink_utf8( + norm_plan->data, norm_plan->len, config.max_plan_size()); + qi->set_plan_id(hash_any((unsigned char *) norm_plan->data, + norm_plan->len)); + gpdb::pfree(norm_plan->data); + } + qi->set_query_id(query_desc->plannedstmt->queryId); + gpdb::pfree(es.str->data); + } + gpdb::mem_ctx_switch_to(oldcxt); + } +} + +void +set_query_text(gpsc::SetQueryReq *req, QueryDesc *query_desc, + const Config &config) +{ + if (Gp_role == GP_ROLE_DISPATCH && query_desc->sourceText) + { + auto qi = req->mutable_query_info(); + *qi->mutable_query_text() = trim_str_shrink_utf8( + query_desc->sourceText, strlen(query_desc->sourceText), + config.max_text_size()); + char *norm_query = gpdb::gen_normquery(query_desc->sourceText); + if (norm_query) + { + *qi->mutable_template_query_text() = trim_str_shrink_utf8( + norm_query, strlen(norm_query), config.max_text_size()); + gpdb::pfree(norm_query); + } + } +} + +void +clear_big_fields(gpsc::SetQueryReq *req) +{ + if (Gp_role == GP_ROLE_DISPATCH) + { + auto qi = req->mutable_query_info(); + qi->clear_plan_text(); + qi->clear_template_plan_text(); + qi->clear_query_text(); + qi->clear_template_query_text(); + qi->clear_analyze_text(); + } +} + +void +set_query_info(gpsc::SetQueryReq *req) +{ + if (Gp_role == GP_ROLE_DISPATCH) + { + auto qi = req->mutable_query_info(); + qi->set_username(get_user_name()); + if (IsTransactionState()) + qi->set_databasename(get_db_name()); + qi->set_rsgname(get_rg_name()); + } +} + +void +set_qi_nesting_level(gpsc::SetQueryReq *req, int nesting_level) +{ + auto aqi = req->mutable_add_info(); + aqi->set_nested_level(nesting_level); +} + +void +set_qi_slice_id(gpsc::SetQueryReq *req) +{ + auto aqi = req->mutable_add_info(); + aqi->set_slice_id(currentSliceId); +} + +void +set_qi_error_message(gpsc::SetQueryReq *req, const char *err_msg, + const Config &config) +{ + auto aqi = req->mutable_add_info(); + *aqi->mutable_error_message() = + trim_str_shrink_utf8(err_msg, strlen(err_msg), config.max_text_size()); +} + +void +set_metric_instrumentation(gpsc::MetricInstrumentation *metrics, + QueryDesc *query_desc, int nested_calls, + double nested_time) +{ + auto instrument = query_desc->planstate->instrument; + if (instrument) + { + metrics->set_ntuples(instrument->ntuples); + metrics->set_nloops(instrument->nloops); + metrics->set_tuplecount(instrument->tuplecount); + metrics->set_firsttuple(instrument->firsttuple); + metrics->set_startup(instrument->startup); + metrics->set_total(instrument->total); + auto &buffusage = instrument->bufusage; + metrics->set_shared_blks_hit(buffusage.shared_blks_hit); + metrics->set_shared_blks_read(buffusage.shared_blks_read); + metrics->set_shared_blks_dirtied(buffusage.shared_blks_dirtied); + metrics->set_shared_blks_written(buffusage.shared_blks_written); + metrics->set_local_blks_hit(buffusage.local_blks_hit); + metrics->set_local_blks_read(buffusage.local_blks_read); + metrics->set_local_blks_dirtied(buffusage.local_blks_dirtied); + metrics->set_local_blks_written(buffusage.local_blks_written); + metrics->set_temp_blks_read(buffusage.temp_blks_read); + metrics->set_temp_blks_written(buffusage.temp_blks_written); + metrics->set_blk_read_time( + INSTR_TIME_GET_DOUBLE(buffusage.blk_read_time)); + metrics->set_blk_write_time( + INSTR_TIME_GET_DOUBLE(buffusage.blk_write_time)); + } + if (query_desc->estate && query_desc->estate->motionlayer_context) + { + MotionLayerState *mlstate = + (MotionLayerState *) query_desc->estate->motionlayer_context; + metrics->mutable_sent()->set_total_bytes( + mlstate->stat_total_bytes_sent); + metrics->mutable_sent()->set_tuple_bytes( + mlstate->stat_tuple_bytes_sent); + metrics->mutable_sent()->set_chunks(mlstate->stat_total_chunks_sent); + metrics->mutable_received()->set_total_bytes( + mlstate->stat_total_bytes_recvd); + metrics->mutable_received()->set_tuple_bytes( + mlstate->stat_tuple_bytes_recvd); + metrics->mutable_received()->set_chunks( + mlstate->stat_total_chunks_recvd); + } + metrics->set_inherited_calls(nested_calls); + metrics->set_inherited_time(nested_time); +} + +void +set_gp_metrics(gpsc::GPMetrics *metrics, QueryDesc *query_desc, + int nested_calls, double nested_time) +{ + if (query_desc->planstate && query_desc->planstate->instrument) + { + set_metric_instrumentation(metrics->mutable_instrumentation(), + query_desc, nested_calls, nested_time); + } + fill_self_stats(metrics->mutable_systemstat()); + metrics->mutable_systemstat()->set_runningtimeseconds( + time(NULL) - metrics->mutable_systemstat()->runningtimeseconds()); + metrics->mutable_spill()->set_filecount( + WorkfileTotalFilesCreated() - metrics->mutable_spill()->filecount()); + metrics->mutable_spill()->set_totalbytes( + WorkfileTotalBytesWritten() - metrics->mutable_spill()->totalbytes()); +} + +#define UPDATE_IC_STATS(proto_name, stat_name) \ + metrics->mutable_interconnect()->set_##proto_name( \ + ic_statistics->stat_name - \ + metrics->mutable_interconnect()->proto_name()); \ + Assert(metrics->mutable_interconnect()->proto_name() >= 0 && \ + metrics->mutable_interconnect()->proto_name() <= \ + ic_statistics->stat_name) + +void +set_ic_stats(gpsc::MetricInstrumentation *metrics, + const ICStatistics *ic_statistics) +{ +#ifdef IC_TEARDOWN_HOOK + UPDATE_IC_STATS(total_recv_queue_size, totalRecvQueueSize); + UPDATE_IC_STATS(recv_queue_size_counting_time, recvQueueSizeCountingTime); + UPDATE_IC_STATS(total_capacity, totalCapacity); + UPDATE_IC_STATS(capacity_counting_time, capacityCountingTime); + UPDATE_IC_STATS(total_buffers, totalBuffers); + UPDATE_IC_STATS(buffer_counting_time, bufferCountingTime); + UPDATE_IC_STATS(active_connections_num, activeConnectionsNum); + UPDATE_IC_STATS(retransmits, retransmits); + UPDATE_IC_STATS(startup_cached_pkt_num, startupCachedPktNum); + UPDATE_IC_STATS(mismatch_num, mismatchNum); + UPDATE_IC_STATS(crc_errors, crcErrors); + UPDATE_IC_STATS(snd_pkt_num, sndPktNum); + UPDATE_IC_STATS(recv_pkt_num, recvPktNum); + UPDATE_IC_STATS(disordered_pkt_num, disorderedPktNum); + UPDATE_IC_STATS(duplicated_pkt_num, duplicatedPktNum); + UPDATE_IC_STATS(recv_ack_num, recvAckNum); + UPDATE_IC_STATS(status_query_msg_num, statusQueryMsgNum); +#endif +} + +gpsc::SetQueryReq +create_query_req(gpsc::QueryStatus status) +{ + gpsc::SetQueryReq req; + req.set_query_status(status); + *req.mutable_datetime() = current_ts(); + set_query_key(req.mutable_query_key()); + set_segment_key(req.mutable_segment_key()); + return req; +} + +double +protots_to_double(const google::protobuf::Timestamp &ts) +{ + return double(ts.seconds()) + double(ts.nanos()) / 1000000000.0; +} + +void +set_analyze_plan_text(QueryDesc *query_desc, gpsc::SetQueryReq *req, + const Config &config) +{ + // Make sure it is a valid txn and it is not an utility + // statement for ExplainPrintPlan() later. + if (!IsTransactionState() || !query_desc->plannedstmt) + { + return; + } + MemoryContext oldcxt = + gpdb::mem_ctx_switch_to(query_desc->estate->es_query_cxt); + ExplainState es = gpdb::get_analyze_state( + query_desc, query_desc->instrument_options && config.enable_analyze()); + gpdb::mem_ctx_switch_to(oldcxt); + if (es.str) + { + // Remove last line break. + if (es.str->len > 0 && es.str->data[es.str->len - 1] == '\n') + { + es.str->data[--es.str->len] = '\0'; + } + auto trimmed_analyze = trim_str_shrink_utf8(es.str->data, es.str->len, + config.max_plan_size()); + req->mutable_query_info()->set_analyze_text(trimmed_analyze); + gpdb::pfree(es.str->data); + } +} diff --git a/gpcontrib/gp_stats_collector/src/ProtoUtils.h b/gpcontrib/gp_stats_collector/src/ProtoUtils.h new file mode 100644 index 00000000000..6b38097fbcc --- /dev/null +++ b/gpcontrib/gp_stats_collector/src/ProtoUtils.h @@ -0,0 +1,57 @@ +/*------------------------------------------------------------------------- + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + * ProtoUtils.h + * + * IDENTIFICATION + * gpcontrib/gp_stats_collector/src/ProtoUtils.h + * + *------------------------------------------------------------------------- + */ + +#ifndef PROTOUTILS_H +#define PROTOUTILS_H + +#include "protos/gpsc_set_service.pb.h" + +struct QueryDesc; +struct ICStatistics; +class Config; + +google::protobuf::Timestamp current_ts(); +void set_query_plan(gpsc::SetQueryReq *req, QueryDesc *query_desc, + const Config &config); +void set_query_text(gpsc::SetQueryReq *req, QueryDesc *query_desc, + const Config &config); +void clear_big_fields(gpsc::SetQueryReq *req); +void set_query_info(gpsc::SetQueryReq *req); +void set_qi_nesting_level(gpsc::SetQueryReq *req, int nesting_level); +void set_qi_slice_id(gpsc::SetQueryReq *req); +void set_qi_error_message(gpsc::SetQueryReq *req, const char *err_msg, + const Config &config); +void set_gp_metrics(gpsc::GPMetrics *metrics, QueryDesc *query_desc, + int nested_calls, double nested_time); +void set_ic_stats(gpsc::MetricInstrumentation *metrics, + const ICStatistics *ic_statistics); +gpsc::SetQueryReq create_query_req(gpsc::QueryStatus status); +double protots_to_double(const google::protobuf::Timestamp &ts); +void set_analyze_plan_text(QueryDesc *query_desc, gpsc::SetQueryReq *message, + const Config &config); + +#endif /* PROTOUTILS_H */ diff --git a/gpcontrib/gp_stats_collector/src/UDSConnector.cpp b/gpcontrib/gp_stats_collector/src/UDSConnector.cpp new file mode 100644 index 00000000000..056fa9071a5 --- /dev/null +++ b/gpcontrib/gp_stats_collector/src/UDSConnector.cpp @@ -0,0 +1,144 @@ +/*------------------------------------------------------------------------- + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + * UDSConnector.cpp + * + * IDENTIFICATION + * gpcontrib/gp_stats_collector/src/UDSConnector.cpp + * + *------------------------------------------------------------------------- + */ + +#include "UDSConnector.h" +#include "Config.h" +#include "GpscStat.h" +#include "log/LogOps.h" +#include "memory/gpdbwrappers.h" + +#include +#include +#include +#include +#include +#include + +extern "C" { +#include "postgres.h" +} + +static void inline log_tracing_failure(const gpsc::SetQueryReq &req, + const std::string &event) +{ + ereport(LOG, (errmsg("Query {%d-%d-%d} %s tracing failed with error %m", + req.query_key().tmid(), req.query_key().ssid(), + req.query_key().ccnt(), event.c_str()))); +} + +bool +UDSConnector::report_query(const gpsc::SetQueryReq &req, + const std::string &event, const Config &config) +{ + sockaddr_un address{}; + address.sun_family = AF_UNIX; + const auto &uds_path = config.uds_path(); + + if (uds_path.size() >= sizeof(address.sun_path)) + { + ereport(WARNING, (errmsg("UDS path is too long for socket buffer"))); + GpscStat::report_error(); + return false; + } + strcpy(address.sun_path, uds_path.c_str()); + + const auto sockfd = socket(AF_UNIX, SOCK_STREAM, 0); + if (sockfd == -1) + { + log_tracing_failure(req, event); + GpscStat::report_error(); + return false; + } + + // Close socket automatically on error path. + struct SockGuard + { + int fd; + ~SockGuard() + { + close(fd); + } + } sock_guard{sockfd}; + + if (fcntl(sockfd, F_SETFL, O_NONBLOCK) == -1) + { + // That's a very important error that should never happen, so make it + // visible to an end-user and admins. + ereport(WARNING, + (errmsg("Unable to create non-blocking socket connection %m"))); + GpscStat::report_error(); + return false; + } + + if (connect(sockfd, reinterpret_cast(&address), + sizeof(address)) == -1) + { + log_tracing_failure(req, event); + GpscStat::report_bad_connection(); + return false; + } + + const auto data_size = req.ByteSizeLong(); + const auto total_size = data_size + sizeof(uint32_t); + auto *buf = static_cast(gpdb::palloc(total_size)); + // Free buf automatically on error path. + struct BufGuard + { + void *p; + ~BufGuard() + { + gpdb::pfree(p); + } + } buf_guard{buf}; + + *reinterpret_cast(buf) = data_size; + req.SerializeWithCachedSizesToArray(buf + sizeof(uint32_t)); + + int64_t sent = 0, sent_total = 0; + do + { + sent = send(sockfd, buf + sent_total, total_size - sent_total, + MSG_DONTWAIT); + if (sent > 0) + sent_total += sent; + } while (sent > 0 && size_t(sent_total) != total_size && + // the line below is a small throttling hack: + // if a message does not fit a single packet, we take a nap + // before sending the next one. + // Otherwise, MSG_DONTWAIT send might overflow the UDS + (pg_usleep(1000), true)); + + if (sent < 0) + { + log_tracing_failure(req, event); + GpscStat::report_bad_send(total_size); + return false; + } + + GpscStat::report_send(total_size); + return true; +} diff --git a/gpcontrib/gp_stats_collector/src/UDSConnector.h b/gpcontrib/gp_stats_collector/src/UDSConnector.h new file mode 100644 index 00000000000..ac56dd54f44 --- /dev/null +++ b/gpcontrib/gp_stats_collector/src/UDSConnector.h @@ -0,0 +1,42 @@ +/*------------------------------------------------------------------------- + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + * UDSConnector.h + * + * IDENTIFICATION + * gpcontrib/gp_stats_collector/src/UDSConnector.h + * + *------------------------------------------------------------------------- + */ + +#ifndef UDSCONNECTOR_H +#define UDSCONNECTOR_H + +#include "protos/gpsc_set_service.pb.h" + +class Config; + +class UDSConnector +{ +public: + bool static report_query(const gpsc::SetQueryReq &req, + const std::string &event, const Config &config); +}; + +#endif /* UDSCONNECTOR_H */ diff --git a/gpcontrib/gp_stats_collector/src/gp_stats_collector.c b/gpcontrib/gp_stats_collector/src/gp_stats_collector.c new file mode 100644 index 00000000000..d295e37b396 --- /dev/null +++ b/gpcontrib/gp_stats_collector/src/gp_stats_collector.c @@ -0,0 +1,175 @@ +/*------------------------------------------------------------------------- + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + * gp_stats_collector.c + * + * IDENTIFICATION + * gpcontrib/gp_stats_collector/src/gp_stats_collector.c + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" +#include "cdb/cdbvars.h" +#include "funcapi.h" +#include "utils/builtins.h" + +#include "hook_wrappers.h" + +PG_MODULE_MAGIC; + +void _PG_init(void); +void _PG_fini(void); +PG_FUNCTION_INFO_V1(gpsc_stat_messages_reset); +PG_FUNCTION_INFO_V1(gpsc_stat_messages); +PG_FUNCTION_INFO_V1(gpsc_init_log); +PG_FUNCTION_INFO_V1(gpsc_truncate_log); + +PG_FUNCTION_INFO_V1(gpsc_test_uds_start_server); +PG_FUNCTION_INFO_V1(gpsc_test_uds_receive); +PG_FUNCTION_INFO_V1(gpsc_test_uds_stop_server); + +void +_PG_init(void) +{ + if (Gp_role == GP_ROLE_DISPATCH || Gp_role == GP_ROLE_EXECUTE) + hooks_init(); +} + +void +_PG_fini(void) +{ + if (Gp_role == GP_ROLE_DISPATCH || Gp_role == GP_ROLE_EXECUTE) + hooks_deinit(); +} + +Datum +gpsc_stat_messages_reset(PG_FUNCTION_ARGS) +{ + FuncCallContext *funcctx; + + if (SRF_IS_FIRSTCALL()) + { + funcctx = SRF_FIRSTCALL_INIT(); + gpsc_functions_reset(); + } + + funcctx = SRF_PERCALL_SETUP(); + SRF_RETURN_DONE(funcctx); +} + +Datum +gpsc_stat_messages(PG_FUNCTION_ARGS) +{ + return gpsc_functions_get(fcinfo); +} + +Datum +gpsc_init_log(PG_FUNCTION_ARGS) +{ + FuncCallContext *funcctx; + + if (SRF_IS_FIRSTCALL()) + { + funcctx = SRF_FIRSTCALL_INIT(); + init_log(); + } + + funcctx = SRF_PERCALL_SETUP(); + SRF_RETURN_DONE(funcctx); +} + +Datum +gpsc_truncate_log(PG_FUNCTION_ARGS) +{ + FuncCallContext *funcctx; + + if (SRF_IS_FIRSTCALL()) + { + funcctx = SRF_FIRSTCALL_INIT(); + truncate_log(); + } + + funcctx = SRF_PERCALL_SETUP(); + SRF_RETURN_DONE(funcctx); +} + +Datum +gpsc_test_uds_start_server(PG_FUNCTION_ARGS) +{ + FuncCallContext *funcctx; + + if (SRF_IS_FIRSTCALL()) + { + funcctx = SRF_FIRSTCALL_INIT(); + char *path = text_to_cstring(PG_GETARG_TEXT_PP(0)); + test_uds_start_server(path); + pfree(path); + } + + funcctx = SRF_PERCALL_SETUP(); + SRF_RETURN_DONE(funcctx); +} + +Datum +gpsc_test_uds_receive(PG_FUNCTION_ARGS) +{ + FuncCallContext *funcctx; + int64 *result; + + if (SRF_IS_FIRSTCALL()) + { + MemoryContext oldcontext; + + funcctx = SRF_FIRSTCALL_INIT(); + oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx); + result = (int64 *) palloc(sizeof(int64)); + funcctx->user_fctx = result; + funcctx->max_calls = 1; + MemoryContextSwitchTo(oldcontext); + + int timeout_ms = PG_GETARG_INT32(0); + *result = test_uds_receive(timeout_ms); + } + + funcctx = SRF_PERCALL_SETUP(); + + if (funcctx->call_cntr < funcctx->max_calls) + { + result = (int64 *) funcctx->user_fctx; + SRF_RETURN_NEXT(funcctx, Int64GetDatum(*result)); + } + + SRF_RETURN_DONE(funcctx); +} + +Datum +gpsc_test_uds_stop_server(PG_FUNCTION_ARGS) +{ + FuncCallContext *funcctx; + + if (SRF_IS_FIRSTCALL()) + { + funcctx = SRF_FIRSTCALL_INIT(); + test_uds_stop_server(); + } + + funcctx = SRF_PERCALL_SETUP(); + SRF_RETURN_DONE(funcctx); +} diff --git a/gpcontrib/gp_stats_collector/src/hook_wrappers.cpp b/gpcontrib/gp_stats_collector/src/hook_wrappers.cpp new file mode 100644 index 00000000000..38ea117bda2 --- /dev/null +++ b/gpcontrib/gp_stats_collector/src/hook_wrappers.cpp @@ -0,0 +1,474 @@ +/*------------------------------------------------------------------------- + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + * hook_wrappers.cpp + * + * IDENTIFICATION + * gpcontrib/gp_stats_collector/src/hook_wrappers.cpp + * + *------------------------------------------------------------------------- + */ + +#define typeid __typeid +extern "C" { +#include "postgres.h" +#include "cdb/cdbvars.h" +#include "cdb/ml_ipc.h" +#include "executor/execUtils.h" +#include "executor/executor.h" +#include "funcapi.h" +#include "stat_statements_parser/pg_stat_statements_parser.h" +#include "tcop/utility.h" +#include "utils/builtins.h" +#include "utils/elog.h" +#include "utils/metrics_utils.h" + +#include +#include +#include +#include +#include +} +#undef typeid + +#include "Config.h" +#include "EventSender.h" +#include "GpscStat.h" +#include "hook_wrappers.h" +#include "memory/gpdbwrappers.h" + +static ExecutorStart_hook_type previous_ExecutorStart_hook = nullptr; +static ExecutorRun_hook_type previous_ExecutorRun_hook = nullptr; +static ExecutorFinish_hook_type previous_ExecutorFinish_hook = nullptr; +static ExecutorEnd_hook_type previous_ExecutorEnd_hook = nullptr; +static query_info_collect_hook_type previous_query_info_collect_hook = nullptr; +#ifdef ANALYZE_STATS_COLLECT_HOOK +static analyze_stats_collect_hook_type previous_analyze_stats_collect_hook = + nullptr; +#endif +#ifdef IC_TEARDOWN_HOOK +static ic_teardown_hook_type previous_ic_teardown_hook = nullptr; +#endif +static ProcessUtility_hook_type previous_ProcessUtility_hook = nullptr; + +static void gpsc_ExecutorStart_hook(QueryDesc *query_desc, int eflags); +static void gpsc_ExecutorRun_hook(QueryDesc *query_desc, + ScanDirection direction, uint64 count, + bool execute_once); +static void gpsc_ExecutorFinish_hook(QueryDesc *query_desc); +static void gpsc_ExecutorEnd_hook(QueryDesc *query_desc); +static void gpsc_query_info_collect_hook(QueryMetricsStatus status, void *arg); +#ifdef IC_TEARDOWN_HOOK +static void gpsc_ic_teardown_hook(ChunkTransportState *transportStates, + bool hasErrors); +#endif +#ifdef ANALYZE_STATS_COLLECT_HOOK +static void gpsc_analyze_stats_collect_hook(QueryDesc *query_desc); +#endif +static void gpsc_process_utility_hook( + PlannedStmt *pstmt, const char *queryString, bool readOnlyTree, + ProcessUtilityContext context, ParamListInfo params, + QueryEnvironment *queryEnv, DestReceiver *dest, QueryCompletion *qc); + +#define TEST_MAX_CONNECTIONS 4 +#define TEST_RCV_BUF_SIZE 8192 +#define TEST_POLL_TIMEOUT_MS 200 + +static int test_server_fd = -1; +static char *test_sock_path = NULL; + +static EventSender *sender = nullptr; + +static inline EventSender * +get_sender() +{ + if (!sender) + { + sender = new EventSender(); + } + return sender; +} + +template +R +cpp_call(T *obj, R (T::*func)(Args...), Args... args) +{ + try + { + return (obj->*func)(args...); + } + catch (const std::exception &e) + { + ereport(ERROR, (errmsg("Unexpected exception in gpsc %s", e.what()))); + pg_unreachable(); + } +} + +void +hooks_init() +{ + Config::init_gucs(); + GpscStat::init(); + previous_ExecutorStart_hook = ExecutorStart_hook; + ExecutorStart_hook = gpsc_ExecutorStart_hook; + previous_ExecutorRun_hook = ExecutorRun_hook; + ExecutorRun_hook = gpsc_ExecutorRun_hook; + previous_ExecutorFinish_hook = ExecutorFinish_hook; + ExecutorFinish_hook = gpsc_ExecutorFinish_hook; + previous_ExecutorEnd_hook = ExecutorEnd_hook; + ExecutorEnd_hook = gpsc_ExecutorEnd_hook; + previous_query_info_collect_hook = query_info_collect_hook; + query_info_collect_hook = gpsc_query_info_collect_hook; +#ifdef IC_TEARDOWN_HOOK + previous_ic_teardown_hook = ic_teardown_hook; + ic_teardown_hook = gpsc_ic_teardown_hook; +#endif +#ifdef ANALYZE_STATS_COLLECT_HOOK + previous_analyze_stats_collect_hook = analyze_stats_collect_hook; + analyze_stats_collect_hook = gpsc_analyze_stats_collect_hook; +#endif + stat_statements_parser_init(); + previous_ProcessUtility_hook = ProcessUtility_hook; + ProcessUtility_hook = gpsc_process_utility_hook; +} + +void +hooks_deinit() +{ + ExecutorStart_hook = previous_ExecutorStart_hook; + ExecutorEnd_hook = previous_ExecutorEnd_hook; + ExecutorRun_hook = previous_ExecutorRun_hook; + ExecutorFinish_hook = previous_ExecutorFinish_hook; + query_info_collect_hook = previous_query_info_collect_hook; +#ifdef IC_TEARDOWN_HOOK + ic_teardown_hook = previous_ic_teardown_hook; +#endif +#ifdef ANALYZE_STATS_COLLECT_HOOK + analyze_stats_collect_hook = previous_analyze_stats_collect_hook; +#endif + stat_statements_parser_deinit(); + if (sender) + { + delete sender; + } + GpscStat::deinit(); + ProcessUtility_hook = previous_ProcessUtility_hook; +} + +void +gpsc_ExecutorStart_hook(QueryDesc *query_desc, int eflags) +{ + cpp_call(get_sender(), &EventSender::executor_before_start, query_desc, + eflags); + if (previous_ExecutorStart_hook) + { + (*previous_ExecutorStart_hook)(query_desc, eflags); + } + else + { + standard_ExecutorStart(query_desc, eflags); + } + cpp_call(get_sender(), &EventSender::executor_after_start, query_desc, + eflags); +} + +void +gpsc_ExecutorRun_hook(QueryDesc *query_desc, ScanDirection direction, + uint64 count, bool execute_once) +{ + get_sender()->incr_depth(); + PG_TRY(); + { + if (previous_ExecutorRun_hook) + previous_ExecutorRun_hook(query_desc, direction, count, + execute_once); + else + standard_ExecutorRun(query_desc, direction, count, execute_once); + get_sender()->decr_depth(); + } + PG_CATCH(); + { + get_sender()->decr_depth(); + PG_RE_THROW(); + } + PG_END_TRY(); +} + +void +gpsc_ExecutorFinish_hook(QueryDesc *query_desc) +{ + get_sender()->incr_depth(); + PG_TRY(); + { + if (previous_ExecutorFinish_hook) + previous_ExecutorFinish_hook(query_desc); + else + standard_ExecutorFinish(query_desc); + get_sender()->decr_depth(); + } + PG_CATCH(); + { + get_sender()->decr_depth(); + PG_RE_THROW(); + } + PG_END_TRY(); +} + +void +gpsc_ExecutorEnd_hook(QueryDesc *query_desc) +{ + cpp_call(get_sender(), &EventSender::executor_end, query_desc); + if (previous_ExecutorEnd_hook) + { + (*previous_ExecutorEnd_hook)(query_desc); + } + else + { + standard_ExecutorEnd(query_desc); + } +} + +void +gpsc_query_info_collect_hook(QueryMetricsStatus status, void *arg) +{ + cpp_call(get_sender(), &EventSender::query_metrics_collect, status, + arg /* queryDesc */, false /* utility */, (ErrorData *) NULL); + if (previous_query_info_collect_hook) + { + (*previous_query_info_collect_hook)(status, arg); + } +} + +#ifdef IC_TEARDOWN_HOOK +void +gpsc_ic_teardown_hook(ChunkTransportState *transportStates, bool hasErrors) +{ + cpp_call(get_sender(), &EventSender::ic_metrics_collect); + if (previous_ic_teardown_hook) + { + (*previous_ic_teardown_hook)(transportStates, hasErrors); + } +} +#endif + +#ifdef ANALYZE_STATS_COLLECT_HOOK +void +gpsc_analyze_stats_collect_hook(QueryDesc *query_desc) +{ + cpp_call(get_sender(), &EventSender::analyze_stats_collect, query_desc); + if (previous_analyze_stats_collect_hook) + { + (*previous_analyze_stats_collect_hook)(query_desc); + } +} +#endif + +static void +gpsc_process_utility_hook(PlannedStmt *pstmt, const char *queryString, + bool readOnlyTree, ProcessUtilityContext context, + ParamListInfo params, QueryEnvironment *queryEnv, + DestReceiver *dest, QueryCompletion *qc) +{ + /* Project utility data on QueryDesc to use existing logic */ + QueryDesc *query_desc = (QueryDesc *) palloc0(sizeof(QueryDesc)); + query_desc->sourceText = queryString; + + cpp_call(get_sender(), &EventSender::query_metrics_collect, + METRICS_QUERY_SUBMIT, (void *) query_desc, true /* utility */, + (ErrorData *) NULL); + + get_sender()->incr_depth(); + PG_TRY(); + { + if (previous_ProcessUtility_hook) + { + (*previous_ProcessUtility_hook)(pstmt, queryString, readOnlyTree, + context, params, queryEnv, dest, + qc); + } + else + { + standard_ProcessUtility(pstmt, queryString, readOnlyTree, context, + params, queryEnv, dest, qc); + } + + get_sender()->decr_depth(); + cpp_call(get_sender(), &EventSender::query_metrics_collect, + METRICS_QUERY_DONE, (void *) query_desc, true /* utility */, + (ErrorData *) NULL); + + pfree(query_desc); + } + PG_CATCH(); + { + ErrorData *edata; + MemoryContext oldctx; + + oldctx = MemoryContextSwitchTo(TopMemoryContext); + edata = CopyErrorData(); + FlushErrorState(); + MemoryContextSwitchTo(oldctx); + + get_sender()->decr_depth(); + cpp_call(get_sender(), &EventSender::query_metrics_collect, + METRICS_QUERY_ERROR, (void *) query_desc, true /* utility */, + edata); + + pfree(query_desc); + ReThrowError(edata); + } + PG_END_TRY(); +} + +static void +check_stats_loaded() +{ + if (!GpscStat::loaded()) + { + ereport(ERROR, (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), + errmsg("gp_stats_collector must be loaded via " + "shared_preload_libraries"))); + } +} + +void +gpsc_functions_reset() +{ + check_stats_loaded(); + GpscStat::reset(); +} + +Datum +gpsc_functions_get(FunctionCallInfo fcinfo) +{ + const int ATTNUM = 6; + check_stats_loaded(); + auto stats = GpscStat::get_stats(); + TupleDesc tupdesc = CreateTemplateTupleDesc(ATTNUM); + TupleDescInitEntry(tupdesc, (AttrNumber) 1, "segid", INT4OID, + -1 /* typmod */, 0 /* attdim */); + TupleDescInitEntry(tupdesc, (AttrNumber) 2, "total_messages", INT8OID, + -1 /* typmod */, 0 /* attdim */); + TupleDescInitEntry(tupdesc, (AttrNumber) 3, "send_failures", INT8OID, + -1 /* typmod */, 0 /* attdim */); + TupleDescInitEntry(tupdesc, (AttrNumber) 4, "connection_failures", INT8OID, + -1 /* typmod */, 0 /* attdim */); + TupleDescInitEntry(tupdesc, (AttrNumber) 5, "other_errors", INT8OID, + -1 /* typmod */, 0 /* attdim */); + TupleDescInitEntry(tupdesc, (AttrNumber) 6, "max_message_size", INT4OID, + -1 /* typmod */, 0 /* attdim */); + tupdesc = BlessTupleDesc(tupdesc); + Datum values[ATTNUM]; + bool nulls[ATTNUM]; + MemSet(nulls, 0, sizeof(nulls)); + values[0] = Int32GetDatum(GpIdentity.segindex); + values[1] = Int64GetDatum(stats.total); + values[2] = Int64GetDatum(stats.failed_sends); + values[3] = Int64GetDatum(stats.failed_connects); + values[4] = Int64GetDatum(stats.failed_other); + values[5] = Int32GetDatum(stats.max_message_size); + HeapTuple tuple = gpdb::heap_form_tuple(tupdesc, values, nulls); + Datum result = HeapTupleGetDatum(tuple); + PG_RETURN_DATUM(result); +} + +void +test_uds_stop_server() +{ + if (test_server_fd >= 0) + { + close(test_server_fd); + test_server_fd = -1; + } + if (test_sock_path) + { + unlink(test_sock_path); + pfree(test_sock_path); + test_sock_path = NULL; + } +} + +void +test_uds_start_server(const char *path) +{ + struct sockaddr_un addr = {.sun_family = AF_UNIX}; + + if (strlen(path) >= sizeof(addr.sun_path)) + ereport(ERROR, (errmsg("path too long"))); + + test_uds_stop_server(); + + strlcpy(addr.sun_path, path, sizeof(addr.sun_path)); + test_sock_path = MemoryContextStrdup(TopMemoryContext, path); + unlink(path); + + if ((test_server_fd = socket(AF_UNIX, SOCK_STREAM, 0)) < 0 || + bind(test_server_fd, (struct sockaddr *) &addr, sizeof(addr)) < 0 || + listen(test_server_fd, TEST_MAX_CONNECTIONS) < 0) + { + test_uds_stop_server(); + ereport(ERROR, (errmsg("socket setup failed: %m"))); + } +} + +int64 +test_uds_receive(int timeout_ms) +{ + char buf[TEST_RCV_BUF_SIZE]; + int rc; + struct pollfd pfd = {.fd = test_server_fd, .events = POLLIN}; + int64 total = 0; + + if (test_server_fd < 0) + ereport(ERROR, (errmsg("server not started"))); + + for (;;) + { + CHECK_FOR_INTERRUPTS(); + rc = poll(&pfd, 1, Min(timeout_ms, TEST_POLL_TIMEOUT_MS)); + if (rc > 0) + break; + if (rc < 0 && errno != EINTR) + ereport(ERROR, (errmsg("poll: %m"))); + timeout_ms -= TEST_POLL_TIMEOUT_MS; + if (timeout_ms <= 0) + return total; + } + + if (pfd.revents & POLLIN) + { + int client = accept(test_server_fd, NULL, NULL); + ssize_t n; + + if (client < 0) + ereport(ERROR, (errmsg("accept: %m"))); + + while ((n = recv(client, buf, sizeof(buf), 0)) != 0) + { + if (n > 0) + total += n; + else if (errno != EINTR) + break; + } + + close(client); + } + + return total; +} \ No newline at end of file diff --git a/gpcontrib/gp_stats_collector/src/hook_wrappers.h b/gpcontrib/gp_stats_collector/src/hook_wrappers.h new file mode 100644 index 00000000000..a04f5a95144 --- /dev/null +++ b/gpcontrib/gp_stats_collector/src/hook_wrappers.h @@ -0,0 +1,50 @@ +/*------------------------------------------------------------------------- + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + * hook_wrappers.h + * + * IDENTIFICATION + * gpcontrib/gp_stats_collector/src/hook_wrappers.h + * + *------------------------------------------------------------------------- + */ + +#ifndef HOOK_WRAPPERS_H +#define HOOK_WRAPPERS_H + +#ifdef __cplusplus +extern "C" { +#endif + +extern void hooks_init(); +extern void hooks_deinit(); +extern void gpsc_functions_reset(); +extern Datum gpsc_functions_get(FunctionCallInfo fcinfo); + +extern void init_log(); +extern void truncate_log(); + +extern void test_uds_start_server(const char *path); +extern int64_t test_uds_receive(int timeout_ms); +extern void test_uds_stop_server(); + +#ifdef __cplusplus +} +#endif +#endif /* HOOK_WRAPPERS_H */ diff --git a/gpcontrib/gp_stats_collector/src/log/LogOps.cpp b/gpcontrib/gp_stats_collector/src/log/LogOps.cpp new file mode 100644 index 00000000000..865e0f6ce3f --- /dev/null +++ b/gpcontrib/gp_stats_collector/src/log/LogOps.cpp @@ -0,0 +1,173 @@ +/*------------------------------------------------------------------------- + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + * LogOps.cpp + * + * IDENTIFICATION + * gpcontrib/gp_stats_collector/src/log/LogOps.cpp + * + *------------------------------------------------------------------------- + */ + +#include "protos/gpsc_set_service.pb.h" + +#include "LogOps.h" +#include "LogSchema.h" + +extern "C" { +#include "postgres.h" + +#include "access/heapam.h" +#include "access/htup_details.h" +#include "access/xact.h" +#include "catalog/dependency.h" +#include "catalog/heap.h" +#include "catalog/namespace.h" +#include "catalog/pg_namespace.h" +#include "catalog/pg_type.h" +#include "cdb/cdbvars.h" +#include "commands/tablecmds.h" +#include "fmgr.h" +#include "funcapi.h" +#include "utils/builtins.h" +#include "utils/lsyscache.h" +#include "utils/rel.h" +#include "utils/snapmgr.h" +#include "utils/timestamp.h" +} + +void +init_log() +{ + Oid namespaceId; + Oid relationId; + ObjectAddress tableAddr; + ObjectAddress schemaAddr; + + namespaceId = get_namespace_oid(schema_name.data(), false /* missing_ok */); + + /* Create table */ + relationId = heap_create_with_catalog( + log_relname.data() /* relname */, namespaceId /* namespace */, + 0 /* tablespace */, InvalidOid /* relid */, + InvalidOid /* reltype oid */, InvalidOid /* reloftypeid */, + GetUserId() /* owner */, HEAP_TABLE_AM_OID, + DescribeTuple() /* rel tuple */, NIL /* cooked_constraints */, + RELKIND_RELATION, RELPERSISTENCE_PERMANENT, false /* shared_relation */, + false /* mapped_relation */, ONCOMMIT_NOOP, NULL /* GP Policy */, + (Datum) 0 /* reloptions */, false /* use_user_acl */, + true /* allow_system_table_mods */, true /* is_internal */, + InvalidOid /* relrewrite */, NULL /* typaddress */, + false /* valid_opts */); + + /* Make the table visible */ + CommandCounterIncrement(); + + /* Record dependency of the table on the schema */ + if (OidIsValid(relationId) && OidIsValid(namespaceId)) + { + ObjectAddressSet(tableAddr, RelationRelationId, relationId); + ObjectAddressSet(schemaAddr, NamespaceRelationId, namespaceId); + + /* Table can be dropped only via DROP EXTENSION */ + recordDependencyOn(&tableAddr, &schemaAddr, DEPENDENCY_EXTENSION); + } + else + { + ereport(NOTICE, (errmsg("GPSC failed to create log table or schema"))); + } + + /* Make changes visible */ + CommandCounterIncrement(); +} + +void +insert_log(const gpsc::SetQueryReq &req, bool utility) +{ + Oid namespaceId; + Oid relationId; + Relation rel; + HeapTuple tuple; + + /* Return if xact is not valid (needed for catalog lookups). */ + if (!IsTransactionState()) + { + return; + } + + /* Return if extension was not loaded */ + namespaceId = get_namespace_oid(schema_name.data(), true /* missing_ok */); + if (!OidIsValid(namespaceId)) + { + return; + } + + /* Return if the table was not created yet */ + relationId = get_relname_relid(log_relname.data(), namespaceId); + if (!OidIsValid(relationId)) + { + return; + } + + bool nulls[natts_gpsc_log]; + Datum values[natts_gpsc_log]; + + memset(nulls, true, sizeof(nulls)); + memset(values, 0, sizeof(values)); + + extract_query_req(req, "", values, nulls); + nulls[attnum_gpsc_log_utility] = false; + values[attnum_gpsc_log_utility] = BoolGetDatum(utility); + + rel = heap_open(relationId, RowExclusiveLock); + + /* Insert the tuple as a frozen one to ensure it is logged even if txn rolls + * back or aborts */ + tuple = heap_form_tuple(RelationGetDescr(rel), values, nulls); + frozen_heap_insert(rel, tuple); + + heap_freetuple(tuple); + /* Keep lock on rel until end of xact */ + heap_close(rel, NoLock); + + /* Make changes visible */ + CommandCounterIncrement(); +} + +void +truncate_log() +{ + Oid namespaceId; + Oid relationId; + Relation relation; + + namespaceId = get_namespace_oid(schema_name.data(), false /* missing_ok */); + relationId = get_relname_relid(log_relname.data(), namespaceId); + + relation = heap_open(relationId, AccessExclusiveLock); + + /* Truncate the main table */ + heap_truncate_one_rel(relation); + + /* Keep lock on rel until end of xact */ + heap_close(relation, NoLock); + + /* Make changes visible */ + CommandCounterIncrement(); +} \ No newline at end of file diff --git a/gpcontrib/gp_stats_collector/src/log/LogOps.h b/gpcontrib/gp_stats_collector/src/log/LogOps.h new file mode 100644 index 00000000000..45d79cd4560 --- /dev/null +++ b/gpcontrib/gp_stats_collector/src/log/LogOps.h @@ -0,0 +1,49 @@ +/*------------------------------------------------------------------------- + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + * LogOps.h + * + * IDENTIFICATION + * gpcontrib/gp_stats_collector/src/log/LogOps.h + * + *------------------------------------------------------------------------- + */ + +#ifndef LOGOPS_H +#define LOGOPS_H + +#include + +extern "C" { +#include "postgres.h" +#include "fmgr.h" +} + +extern "C" { +/* CREATE TABLE gpsc.__log (...); */ +void init_log(); + +/* TRUNCATE gpsc.__log */ +void truncate_log(); +} + +/* INSERT INTO gpsc.__log VALUES (...) */ +void insert_log(const gpsc::SetQueryReq &req, bool utility); + +#endif /* LOGOPS_H */ diff --git a/gpcontrib/gp_stats_collector/src/log/LogSchema.cpp b/gpcontrib/gp_stats_collector/src/log/LogSchema.cpp new file mode 100644 index 00000000000..254b1b04af4 --- /dev/null +++ b/gpcontrib/gp_stats_collector/src/log/LogSchema.cpp @@ -0,0 +1,189 @@ +/*------------------------------------------------------------------------- + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + * LogSchema.cpp + * + * IDENTIFICATION + * gpcontrib/gp_stats_collector/src/log/LogSchema.cpp + * + *------------------------------------------------------------------------- + */ + +#include "google/protobuf/descriptor.h" +#include "google/protobuf/reflection.h" +#include "google/protobuf/timestamp.pb.h" + +#include "LogSchema.h" + +const std::unordered_map & +proto_name_to_col_idx() +{ + static const auto name_col_idx = [] { + std::unordered_map map; + map.reserve(log_tbl_desc.size()); + + for (size_t idx = 0; idx < natts_gpsc_log; ++idx) + { + map.emplace(log_tbl_desc[idx].proto_field_name, idx); + } + + return map; + }(); + return name_col_idx; +} + +TupleDesc +DescribeTuple() +{ + TupleDesc tupdesc = CreateTemplateTupleDesc(natts_gpsc_log); + + for (size_t anum = 1; anum <= natts_gpsc_log; ++anum) + { + TupleDescInitEntry( + tupdesc, anum, log_tbl_desc[anum - 1].pg_att_name.data(), + log_tbl_desc[anum - 1].type_oid, -1 /* typmod */, 0 /* attdim */); + } + + return tupdesc; +} + +Datum +protots_to_timestamptz(const google::protobuf::Timestamp &ts) +{ + TimestampTz pgtimestamp = + (TimestampTz) ts.seconds() * USECS_PER_SEC + (ts.nanos() / 1000); + pgtimestamp -= (POSTGRES_EPOCH_JDATE - UNIX_EPOCH_JDATE) * USECS_PER_DAY; + return TimestampTzGetDatum(pgtimestamp); +} + +Datum +field_to_datum(const google::protobuf::FieldDescriptor *field, + const google::protobuf::Reflection *reflection, + const google::protobuf::Message &msg) +{ + using namespace google::protobuf; + + switch (field->cpp_type()) + { + case FieldDescriptor::CPPTYPE_INT32: + return Int32GetDatum(reflection->GetInt32(msg, field)); + case FieldDescriptor::CPPTYPE_INT64: + return Int64GetDatum(reflection->GetInt64(msg, field)); + case FieldDescriptor::CPPTYPE_UINT32: + return Int64GetDatum(reflection->GetUInt32(msg, field)); + case FieldDescriptor::CPPTYPE_UINT64: + return Int64GetDatum( + static_cast(reflection->GetUInt64(msg, field))); + case FieldDescriptor::CPPTYPE_DOUBLE: + return Float8GetDatum(reflection->GetDouble(msg, field)); + case FieldDescriptor::CPPTYPE_FLOAT: + return Float4GetDatum(reflection->GetFloat(msg, field)); + case FieldDescriptor::CPPTYPE_BOOL: + return BoolGetDatum(reflection->GetBool(msg, field)); + case FieldDescriptor::CPPTYPE_ENUM: + return CStringGetTextDatum( + reflection->GetEnum(msg, field)->name().data()); + case FieldDescriptor::CPPTYPE_STRING: + return CStringGetTextDatum( + reflection->GetString(msg, field).c_str()); + default: + return (Datum) 0; + } +} + +void +process_field(const google::protobuf::FieldDescriptor *field, + const google::protobuf::Reflection *reflection, + const google::protobuf::Message &msg, + const std::string &field_name, Datum *values, bool *nulls) +{ + auto proto_idx_map = proto_name_to_col_idx(); + auto it = proto_idx_map.find(field_name); + + if (it == proto_idx_map.end()) + { + ereport(NOTICE, + (errmsg("GPSC protobuf field %s is not registered in log table", + field_name.c_str()))); + return; + } + + int idx = it->second; + + if (!reflection->HasField(msg, field)) + { + nulls[idx] = true; + return; + } + + if (field->cpp_type() == + google::protobuf::FieldDescriptor::CPPTYPE_MESSAGE && + field->message_type()->full_name() == "google.protobuf.Timestamp") + { + const auto &ts = static_cast( + reflection->GetMessage(msg, field)); + values[idx] = protots_to_timestamptz(ts); + } + else + { + values[idx] = field_to_datum(field, reflection, msg); + } + nulls[idx] = false; + + return; +} + +void +extract_query_req(const google::protobuf::Message &msg, + const std::string &prefix, Datum *values, bool *nulls) +{ + using namespace google::protobuf; + + const Descriptor *descriptor = msg.GetDescriptor(); + const Reflection *reflection = msg.GetReflection(); + + for (int i = 0; i < descriptor->field_count(); ++i) + { + const FieldDescriptor *field = descriptor->field(i); + + // For now, we do not log any repeated fields plus they need special + // treatment. + if (field->is_repeated()) + { + continue; + } + + std::string curr_pref = prefix.empty() ? "" : prefix + "."; + std::string field_name = curr_pref + field->name().data(); + + if (field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE && + field->message_type()->full_name() != "google.protobuf.Timestamp") + { + if (reflection->HasField(msg, field)) + { + const Message &nested = reflection->GetMessage(msg, field); + extract_query_req(nested, field_name, values, nulls); + } + } + else + { + process_field(field, reflection, msg, field_name, values, nulls); + } + } +} diff --git a/gpcontrib/gp_stats_collector/src/log/LogSchema.h b/gpcontrib/gp_stats_collector/src/log/LogSchema.h new file mode 100644 index 00000000000..f6c2247370a --- /dev/null +++ b/gpcontrib/gp_stats_collector/src/log/LogSchema.h @@ -0,0 +1,199 @@ +/*------------------------------------------------------------------------- + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + * LogSchema.h + * + * IDENTIFICATION + * gpcontrib/gp_stats_collector/src/log/LogSchema.h + * + *------------------------------------------------------------------------- + */ + +#ifndef LOGSCHEMA_H +#define LOGSCHEMA_H + +#include +#include +#include +#include + +extern "C" { +#include "postgres.h" +#include "access/htup_details.h" +#include "access/tupdesc.h" +#include "catalog/pg_type.h" +#include "utils/builtins.h" +#include "utils/timestamp.h" +} + +namespace google +{ +namespace protobuf +{ +class FieldDescriptor; +class Message; +class Reflection; +class Timestamp; +} // namespace protobuf +} // namespace google + +inline constexpr std::string_view schema_name = "gpsc"; +inline constexpr std::string_view log_relname = "__log"; + +struct LogDesc +{ + std::string_view pg_att_name; + std::string_view proto_field_name; + Oid type_oid; +}; + +/* + * Definition of the log table structure. + * + * System stats collected as %lu (unsigned) may + * overflow INT8OID (signed), but this is acceptable. + */ +/* clang-format off */ +inline constexpr std::array log_tbl_desc = { + /* 8-byte aligned types first - Query Info */ + LogDesc{"query_id", "query_info.query_id", INT8OID}, + LogDesc{"plan_id", "query_info.plan_id", INT8OID}, + LogDesc{"nested_level", "add_info.nested_level", INT8OID}, + LogDesc{"slice_id", "add_info.slice_id", INT8OID}, + /* 8-byte aligned types - System Stats */ + LogDesc{"systemstat_vsize", "query_metrics.systemStat.vsize", INT8OID}, + LogDesc{"systemstat_rss", "query_metrics.systemStat.rss", INT8OID}, + LogDesc{"systemstat_vmsizekb", "query_metrics.systemStat.VmSizeKb", INT8OID}, + LogDesc{"systemstat_vmpeakkb", "query_metrics.systemStat.VmPeakKb", INT8OID}, + LogDesc{"systemstat_rchar", "query_metrics.systemStat.rchar", INT8OID}, + LogDesc{"systemstat_wchar", "query_metrics.systemStat.wchar", INT8OID}, + LogDesc{"systemstat_syscr", "query_metrics.systemStat.syscr", INT8OID}, + LogDesc{"systemstat_syscw", "query_metrics.systemStat.syscw", INT8OID}, + LogDesc{"systemstat_read_bytes", "query_metrics.systemStat.read_bytes", INT8OID}, + LogDesc{"systemstat_write_bytes", "query_metrics.systemStat.write_bytes", INT8OID}, + LogDesc{"systemstat_cancelled_write_bytes", "query_metrics.systemStat.cancelled_write_bytes", INT8OID}, + /* 8-byte aligned types - Metric Instrumentation */ + LogDesc{"instrumentation_ntuples", "query_metrics.instrumentation.ntuples", INT8OID}, + LogDesc{"instrumentation_nloops", "query_metrics.instrumentation.nloops", INT8OID}, + LogDesc{"instrumentation_tuplecount", "query_metrics.instrumentation.tuplecount", INT8OID}, + LogDesc{"instrumentation_shared_blks_hit", "query_metrics.instrumentation.shared_blks_hit", INT8OID}, + LogDesc{"instrumentation_shared_blks_read", "query_metrics.instrumentation.shared_blks_read", INT8OID}, + LogDesc{"instrumentation_shared_blks_dirtied", "query_metrics.instrumentation.shared_blks_dirtied", INT8OID}, + LogDesc{"instrumentation_shared_blks_written", "query_metrics.instrumentation.shared_blks_written", INT8OID}, + LogDesc{"instrumentation_local_blks_hit", "query_metrics.instrumentation.local_blks_hit", INT8OID}, + LogDesc{"instrumentation_local_blks_read", "query_metrics.instrumentation.local_blks_read", INT8OID}, + LogDesc{"instrumentation_local_blks_dirtied", "query_metrics.instrumentation.local_blks_dirtied", INT8OID}, + LogDesc{"instrumentation_local_blks_written", "query_metrics.instrumentation.local_blks_written", INT8OID}, + LogDesc{"instrumentation_temp_blks_read", "query_metrics.instrumentation.temp_blks_read", INT8OID}, + LogDesc{"instrumentation_temp_blks_written", "query_metrics.instrumentation.temp_blks_written", INT8OID}, + LogDesc{"instrumentation_inherited_calls", "query_metrics.instrumentation.inherited_calls", INT8OID}, + /* 8-byte aligned types - Network Stats */ + LogDesc{"instrumentation_sent_total_bytes", "query_metrics.instrumentation.sent.total_bytes", INT8OID}, + LogDesc{"instrumentation_sent_tuple_bytes", "query_metrics.instrumentation.sent.tuple_bytes", INT8OID}, + LogDesc{"instrumentation_sent_chunks", "query_metrics.instrumentation.sent.chunks", INT8OID}, + LogDesc{"instrumentation_received_total_bytes", "query_metrics.instrumentation.received.total_bytes", INT8OID}, + LogDesc{"instrumentation_received_tuple_bytes", "query_metrics.instrumentation.received.tuple_bytes", INT8OID}, + LogDesc{"instrumentation_received_chunks", "query_metrics.instrumentation.received.chunks", INT8OID}, + /* 8-byte aligned types - Interconnect Stats and spilled bytes */ + LogDesc{"interconnect_total_recv_queue_size", "query_metrics.instrumentation.interconnect.total_recv_queue_size", INT8OID}, + LogDesc{"interconnect_recv_queue_size_counting_time", "query_metrics.instrumentation.interconnect.recv_queue_size_counting_time", INT8OID}, + LogDesc{"interconnect_total_capacity", "query_metrics.instrumentation.interconnect.total_capacity", INT8OID}, + LogDesc{"interconnect_capacity_counting_time", "query_metrics.instrumentation.interconnect.capacity_counting_time", INT8OID}, + LogDesc{"interconnect_total_buffers", "query_metrics.instrumentation.interconnect.total_buffers", INT8OID}, + LogDesc{"interconnect_buffer_counting_time", "query_metrics.instrumentation.interconnect.buffer_counting_time", INT8OID}, + LogDesc{"interconnect_active_connections_num", "query_metrics.instrumentation.interconnect.active_connections_num", INT8OID}, + LogDesc{"interconnect_retransmits", "query_metrics.instrumentation.interconnect.retransmits", INT8OID}, + LogDesc{"interconnect_startup_cached_pkt_num", "query_metrics.instrumentation.interconnect.startup_cached_pkt_num", INT8OID}, + LogDesc{"interconnect_mismatch_num", "query_metrics.instrumentation.interconnect.mismatch_num", INT8OID}, + LogDesc{"interconnect_crc_errors", "query_metrics.instrumentation.interconnect.crc_errors", INT8OID}, + LogDesc{"interconnect_snd_pkt_num", "query_metrics.instrumentation.interconnect.snd_pkt_num", INT8OID}, + LogDesc{"interconnect_recv_pkt_num", "query_metrics.instrumentation.interconnect.recv_pkt_num", INT8OID}, + LogDesc{"interconnect_disordered_pkt_num", "query_metrics.instrumentation.interconnect.disordered_pkt_num", INT8OID}, + LogDesc{"interconnect_duplicated_pkt_num", "query_metrics.instrumentation.interconnect.duplicated_pkt_num", INT8OID}, + LogDesc{"interconnect_recv_ack_num", "query_metrics.instrumentation.interconnect.recv_ack_num", INT8OID}, + LogDesc{"interconnect_status_query_msg_num", "query_metrics.instrumentation.interconnect.status_query_msg_num", INT8OID}, + LogDesc{"spill_totalbytes", "query_metrics.spill.totalBytes", INT8OID}, + /* 8-byte aligned types - Float and Timestamp */ + LogDesc{"systemstat_runningtimeseconds", "query_metrics.systemStat.runningTimeSeconds", FLOAT8OID}, + LogDesc{"systemstat_usertimeseconds", "query_metrics.systemStat.userTimeSeconds", FLOAT8OID}, + LogDesc{"systemstat_kerneltimeseconds", "query_metrics.systemStat.kernelTimeSeconds", FLOAT8OID}, + LogDesc{"instrumentation_firsttuple", "query_metrics.instrumentation.firsttuple", FLOAT8OID}, + LogDesc{"instrumentation_startup", "query_metrics.instrumentation.startup", FLOAT8OID}, + LogDesc{"instrumentation_total", "query_metrics.instrumentation.total", FLOAT8OID}, + LogDesc{"instrumentation_blk_read_time", "query_metrics.instrumentation.blk_read_time", FLOAT8OID}, + LogDesc{"instrumentation_blk_write_time", "query_metrics.instrumentation.blk_write_time", FLOAT8OID}, + LogDesc{"instrumentation_startup_time", "query_metrics.instrumentation.startup_time", FLOAT8OID}, + LogDesc{"instrumentation_inherited_time", "query_metrics.instrumentation.inherited_time", FLOAT8OID}, + LogDesc{"datetime", "datetime", TIMESTAMPTZOID}, + LogDesc{"submit_time", "submit_time", TIMESTAMPTZOID}, + LogDesc{"start_time", "start_time", TIMESTAMPTZOID}, + LogDesc{"end_time", "end_time", TIMESTAMPTZOID}, + /* 4-byte aligned types - Query Key */ + LogDesc{"tmid", "query_key.tmid", INT4OID}, + LogDesc{"ssid", "query_key.ssid", INT4OID}, + LogDesc{"ccnt", "query_key.ccnt", INT4OID}, + /* 4-byte aligned types - Segment Key */ + LogDesc{"dbid", "segment_key.dbid", INT4OID}, + LogDesc{"segid", "segment_key.segindex", INT4OID}, + LogDesc{"spill_filecount", "query_metrics.spill.fileCount", INT4OID}, + /* Variable-length types - Query Info */ + LogDesc{"generator", "query_info.generator", TEXTOID}, + LogDesc{"query_text", "query_info.query_text", TEXTOID}, + LogDesc{"plan_text", "query_info.plan_text", TEXTOID}, + LogDesc{"template_query_text", "query_info.template_query_text", TEXTOID}, + LogDesc{"template_plan_text", "query_info.template_plan_text", TEXTOID}, + LogDesc{"user_name", "query_info.userName", TEXTOID}, + LogDesc{"database_name", "query_info.databaseName", TEXTOID}, + LogDesc{"rsgname", "query_info.rsgname", TEXTOID}, + LogDesc{"analyze_text", "query_info.analyze_text", TEXTOID}, + LogDesc{"error_message", "add_info.error_message", TEXTOID}, + LogDesc{"query_status", "query_status", TEXTOID}, + /* Extra field */ + LogDesc{"utility", "", BOOLOID}, +}; +/* clang-format on */ + +inline constexpr size_t natts_gpsc_log = log_tbl_desc.size(); +inline constexpr size_t attnum_gpsc_log_utility = natts_gpsc_log - 1; + +const std::unordered_map &proto_name_to_col_idx(); + +TupleDesc DescribeTuple(); + +Datum protots_to_timestamptz(const google::protobuf::Timestamp &ts); + +Datum field_to_datum(const google::protobuf::FieldDescriptor *field, + const google::protobuf::Reflection *reflection, + const google::protobuf::Message &msg); + +/* Process a single proto field and store in values/nulls arrays */ +void process_field(const google::protobuf::FieldDescriptor *field, + const google::protobuf::Reflection *reflection, + const google::protobuf::Message &msg, + const std::string &field_name, Datum *values, bool *nulls); + +/* + * Extracts values from msg into values/nulls arrays. Caller must + * pre-init nulls[] to true (this function does net set nulls + * to true for nested messages if parent message is missing). + */ +void extract_query_req(const google::protobuf::Message &msg, + const std::string &prefix, Datum *values, bool *nulls); + +#endif /* LOGSCHEMA_H */ diff --git a/gpcontrib/gp_stats_collector/src/memory/gpdbwrappers.cpp b/gpcontrib/gp_stats_collector/src/memory/gpdbwrappers.cpp new file mode 100644 index 00000000000..de54a716016 --- /dev/null +++ b/gpcontrib/gp_stats_collector/src/memory/gpdbwrappers.cpp @@ -0,0 +1,316 @@ +/*------------------------------------------------------------------------- + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + * gpdbwrappers.cpp + * + * IDENTIFICATION + * gpcontrib/gp_stats_collector/src/memory/gpdbwrappers.cpp + * + *------------------------------------------------------------------------- + */ + +#include "gpdbwrappers.h" +#include "log/LogOps.h" + +extern "C" { +#include "postgres.h" +#include "access/htup.h" +#include "access/tupdesc.h" +#include "cdb/cdbexplain.h" +#include "commands/dbcommands.h" +#include "commands/explain.h" +#include "commands/resgroupcmds.h" +#include "executor/instrument.h" +#include "nodes/pg_list.h" +#include "stat_statements_parser/pg_stat_statements_parser.h" +#include "utils/builtins.h" +#include "utils/elog.h" +#include "utils/guc.h" +#include "utils/varlena.h" +} + +namespace +{ + +template +auto +wrap(Func &&func, Args &&...args) noexcept(!Throws) + -> decltype(func(std::forward(args)...)) +{ + using RetType = decltype(func(std::forward(args)...)); + + // Empty struct for void return type. + struct VoidResult + { + }; + using ResultHolder = std::conditional_t, VoidResult, + std::optional>; + + bool success; + ErrorData *edata; + ResultHolder result_holder; + + PG_TRY(); + { + if constexpr (!std::is_void_v) + { + result_holder.emplace(func(std::forward(args)...)); + } + else + { + func(std::forward(args)...); + } + edata = NULL; + success = true; + } + PG_CATCH(); + { + MemoryContext oldctx = MemoryContextSwitchTo(TopMemoryContext); + edata = CopyErrorData(); + MemoryContextSwitchTo(oldctx); + FlushErrorState(); + success = false; + } + PG_END_TRY(); + + if (!success) + { + std::string err; + if (edata && edata->message) + { + err = std::string(edata->message); + } + else + { + err = "Unknown error occurred"; + } + + if (edata) + { + FreeErrorData(edata); + } + + if constexpr (Throws) + { + throw std::runtime_error(err); + } + + if constexpr (!std::is_void_v) + { + return RetType{}; + } + else + { + return; + } + } + + if constexpr (!std::is_void_v) + { + return *std::move(result_holder); + } + else + { + return; + } +} + +template +auto +wrap_throw(Func &&func, Args &&...args) + -> decltype(func(std::forward(args)...)) +{ + return wrap(std::forward(func), std::forward(args)...); +} + +template +auto +wrap_noexcept(Func &&func, Args &&...args) noexcept + -> decltype(func(std::forward(args)...)) +{ + return wrap(std::forward(func), std::forward(args)...); +} +} // namespace + +void * +gpdb::palloc(Size size) +{ + return wrap_throw(::palloc, size); +} + +void * +gpdb::palloc0(Size size) +{ + return wrap_throw(::palloc0, size); +} + +char * +gpdb::pstrdup(const char *str) +{ + return wrap_throw(::pstrdup, str); +} + +char * +gpdb::get_database_name(Oid dbid) noexcept +{ + return wrap_noexcept(::get_database_name, dbid); +} + +bool +gpdb::split_identifier_string(char *rawstring, char separator, + List **namelist) noexcept +{ + return wrap_noexcept(SplitIdentifierString, rawstring, separator, namelist); +} + +ExplainState +gpdb::get_explain_state(QueryDesc *query_desc, bool costs) noexcept +{ + return wrap_noexcept([&]() { + ExplainState *es = NewExplainState(); + es->costs = costs; + es->verbose = true; + es->format = EXPLAIN_FORMAT_TEXT; + ExplainBeginOutput(es); + ExplainPrintPlan(es, query_desc); + ExplainEndOutput(es); + return *es; + }); +} + +ExplainState +gpdb::get_analyze_state(QueryDesc *query_desc, bool analyze) noexcept +{ + return wrap_noexcept([&]() { + ExplainState *es = NewExplainState(); + es->analyze = analyze; + es->verbose = true; + es->buffers = es->analyze; + es->timing = es->analyze; + es->summary = es->analyze; + es->format = EXPLAIN_FORMAT_TEXT; + ExplainBeginOutput(es); + if (analyze) + { + ExplainPrintPlan(es, query_desc); + ExplainPrintExecStatsEnd(es, query_desc); + } + ExplainEndOutput(es); + return *es; + }); +} + +Instrumentation * +gpdb::instr_alloc(size_t n, int instrument_options, bool async_mode) +{ + return wrap_throw(InstrAlloc, n, instrument_options, async_mode); +} + +HeapTuple +gpdb::heap_form_tuple(TupleDesc tupleDescriptor, Datum *values, bool *isnull) +{ + if (!tupleDescriptor || !values || !isnull) + throw std::runtime_error( + "Invalid input parameters for heap tuple formation"); + + return wrap_throw(::heap_form_tuple, tupleDescriptor, values, isnull); +} + +void +gpdb::pfree(void *pointer) noexcept +{ + // Note that ::pfree asserts that pointer != NULL. + if (!pointer) + return; + + wrap_noexcept(::pfree, pointer); +} + +MemoryContext +gpdb::mem_ctx_switch_to(MemoryContext context) noexcept +{ + return MemoryContextSwitchTo(context); +} + +const char * +gpdb::get_config_option(const char *name, bool missing_ok, + bool restrict_superuser) noexcept +{ + if (!name) + return nullptr; + + return wrap_noexcept(GetConfigOption, name, missing_ok, restrict_superuser); +} + +void +gpdb::list_free(List *list) noexcept +{ + if (!list) + return; + + wrap_noexcept(::list_free, list); +} + +CdbExplain_ShowStatCtx * +gpdb::cdbexplain_showExecStatsBegin(QueryDesc *query_desc, instr_time starttime) +{ + if (!query_desc) + throw std::runtime_error("Invalid query descriptor"); + + return wrap_throw(::cdbexplain_showExecStatsBegin, query_desc, starttime); +} + +void +gpdb::instr_end_loop(Instrumentation *instr) +{ + if (!instr) + throw std::runtime_error("Invalid instrumentation pointer"); + + wrap_throw(::InstrEndLoop, instr); +} + +char * +gpdb::gen_normquery(const char *query) noexcept +{ + return wrap_noexcept(::gen_normquery, query); +} + +StringInfo +gpdb::gen_normplan(const char *exec_plan) noexcept +{ + return wrap_noexcept(::gen_normplan, exec_plan); +} + +char * +gpdb::get_rg_name_for_id(Oid group_id) +{ + return wrap_throw(GetResGroupNameForId, group_id); +} + +Oid +gpdb::get_rg_id_by_session_id(int session_id) +{ + return wrap_throw(ResGroupGetGroupIdBySessionId, session_id); +} + +void +gpdb::insert_log(const gpsc::SetQueryReq &req, bool utility) +{ + return wrap_throw(::insert_log, req, utility); +} diff --git a/gpcontrib/gp_stats_collector/src/memory/gpdbwrappers.h b/gpcontrib/gp_stats_collector/src/memory/gpdbwrappers.h new file mode 100644 index 00000000000..5237b6be68a --- /dev/null +++ b/gpcontrib/gp_stats_collector/src/memory/gpdbwrappers.h @@ -0,0 +1,86 @@ +/*------------------------------------------------------------------------- + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + * gpdbwrappers.h + * + * IDENTIFICATION + * gpcontrib/gp_stats_collector/src/memory/gpdbwrappers.h + * + *------------------------------------------------------------------------- + */ + +#ifndef GPDBWRAPPERS_H +#define GPDBWRAPPERS_H + +extern "C" { +#include "postgres.h" +#include "access/htup.h" +#include "commands/explain.h" +#include "executor/instrument.h" +#include "nodes/pg_list.h" +#include "utils/elog.h" +#include "utils/memutils.h" +} + +#include +#include +#include +#include +#include + +namespace gpsc +{ +class SetQueryReq; +} // namespace gpsc + +namespace gpdb +{ + +// Functions that call palloc(). +// Make sure correct memory context is set. +void *palloc(Size size); +void *palloc0(Size size); +char *pstrdup(const char *str); +char *get_database_name(Oid dbid) noexcept; +bool split_identifier_string(char *rawstring, char separator, + List **namelist) noexcept; +ExplainState get_explain_state(QueryDesc *query_desc, bool costs) noexcept; +ExplainState get_analyze_state(QueryDesc *query_desc, bool analyze) noexcept; +Instrumentation *instr_alloc(size_t n, int instrument_options, bool async_mode); +HeapTuple heap_form_tuple(TupleDesc tupleDescriptor, Datum *values, + bool *isnull); +CdbExplain_ShowStatCtx *cdbexplain_showExecStatsBegin(QueryDesc *query_desc, + instr_time starttime); +void instr_end_loop(Instrumentation *instr); +char *gen_normquery(const char *query) noexcept; +StringInfo gen_normplan(const char *executionPlan) noexcept; +char *get_rg_name_for_id(Oid group_id); +void insert_log(const gpsc::SetQueryReq &req, bool utility); + +// Palloc-free functions. +void pfree(void *pointer) noexcept; +MemoryContext mem_ctx_switch_to(MemoryContext context) noexcept; +const char *get_config_option(const char *name, bool missing_ok, + bool restrict_superuser) noexcept; +void list_free(List *list) noexcept; +Oid get_rg_id_by_session_id(int session_id); + +} // namespace gpdb + +#endif /* GPDBWRAPPERS_H */ diff --git a/gpcontrib/gp_stats_collector/src/stat_statements_parser/README.md b/gpcontrib/gp_stats_collector/src/stat_statements_parser/README.md new file mode 100644 index 00000000000..927189474fe --- /dev/null +++ b/gpcontrib/gp_stats_collector/src/stat_statements_parser/README.md @@ -0,0 +1,20 @@ + + +This directory contains a slightly modified subset of pg_stat_statements for PG v9.4 to be used in query and plan ID generation. diff --git a/gpcontrib/gp_stats_collector/src/stat_statements_parser/pg_stat_statements_parser.c b/gpcontrib/gp_stats_collector/src/stat_statements_parser/pg_stat_statements_parser.c new file mode 100644 index 00000000000..8e7bd917541 --- /dev/null +++ b/gpcontrib/gp_stats_collector/src/stat_statements_parser/pg_stat_statements_parser.c @@ -0,0 +1,378 @@ +/*------------------------------------------------------------------------- + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + * pg_stat_statements_parser.c + * + * IDENTIFICATION + * gpcontrib/gp_stats_collector/src/stat_statements_parser/pg_stat_statements_parser.c + * + *------------------------------------------------------------------------- + */ + +// NOTE: this file is just a bunch of code borrowed from pg_stat_statements for PG 9.4 +// and from our own inhouse implementation of pg_stat_statements for managed PG + +#include "postgres.h" + +#include +#include + +#include "common/hashfn.h" +#include "lib/stringinfo.h" +#include "mb/pg_wchar.h" +#include "miscadmin.h" +#include "parser/scanner.h" +#include "utils/builtins.h" +#include "utils/memutils.h" +#include "utils/queryjumble.h" + +#include "pg_stat_statements_parser.h" + +#ifndef FCONST +#define FCONST 260 +#endif +#ifndef SCONST +#define SCONST 261 +#endif +#ifndef BCONST +#define BCONST 263 +#endif +#ifndef XCONST +#define XCONST 264 +#endif +#ifndef ICONST +#define ICONST 266 +#endif + +static void fill_in_constant_lengths(JumbleState *jstate, const char *query); +static int comp_location(const void *a, const void *b); +StringInfo gen_normplan(const char *execution_plan); +static bool need_replace(int token); +static char *generate_normalized_query(JumbleState *jstate, const char *query, + int *query_len_p, int encoding); + +void +stat_statements_parser_init(void) +{ + EnableQueryId(); +} + +void +stat_statements_parser_deinit(void) +{ + /* NO-OP */ +} + +/* check if token should be replaced by substitute varable */ +static bool +need_replace(int token) +{ + return (token == FCONST) || (token == ICONST) || (token == SCONST) || + (token == BCONST) || (token == XCONST); +} + +/* + * gen_normplan - parse execution plan using flex and replace all CONST to + * substitute variables. + */ +StringInfo +gen_normplan(const char *execution_plan) +{ + core_yyscan_t yyscanner; + core_yy_extra_type yyextra; + core_YYSTYPE yylval; + YYLTYPE yylloc; + int tok; + int bind_prefix = 1; + char *tmp_str; + YYLTYPE last_yylloc = 0; + int last_tok = 0; + StringInfo plan_out = makeStringInfo(); + ; + + yyscanner = scanner_init(execution_plan, &yyextra, +#if PG_VERSION_NUM >= 120000 + &ScanKeywords, ScanKeywordTokens +#else + ScanKeywords, NumScanKeywords +#endif + ); + + for (;;) + { + /* get the next lexem */ + tok = core_yylex(&yylval, &yylloc, yyscanner); + + /* now we store end previsous lexem in yylloc - so could prcess it */ + if (need_replace(last_tok)) + { + /* substitute variable instead of CONST */ + int s_len = asprintf(&tmp_str, "$%i", bind_prefix++); + if (s_len > 0) + { + appendStringInfoString(plan_out, tmp_str); + free(tmp_str); + } + else + { + appendStringInfoString(plan_out, "??"); + } + } + else + { + /* do not change - just copy as-is */ + tmp_str = strndup((char *) execution_plan + last_yylloc, + yylloc - last_yylloc); + appendStringInfoString(plan_out, tmp_str); + free(tmp_str); + } + /* check if further parsing not needed */ + if (tok == 0) + break; + last_tok = tok; + last_yylloc = yylloc; + } + + scanner_finish(yyscanner); + + return plan_out; +} + +/* + * comp_location: comparator for qsorting LocationLen structs by location + */ +static int +comp_location(const void *a, const void *b) +{ + int l = ((const LocationLen *) a)->location; + int r = ((const LocationLen *) b)->location; + + if (l < r) + return -1; + else if (l > r) + return +1; + else + return 0; +} + +/* + * Given a valid SQL string and an array of constant-location records, + * fill in the textual lengths of those constants. + * + * The constants may use any allowed constant syntax, such as float literals, + * bit-strings, single-quoted strings and dollar-quoted strings. This is + * accomplished by using the public API for the core scanner. + * + * It is the caller's job to ensure that the string is a valid SQL statement + * with constants at the indicated locations. Since in practice the string + * has already been parsed, and the locations that the caller provides will + * have originated from within the authoritative parser, this should not be + * a problem. + * + * Duplicate constant pointers are possible, and will have their lengths + * marked as '-1', so that they are later ignored. (Actually, we assume the + * lengths were initialized as -1 to start with, and don't change them here.) + * + * N.B. There is an assumption that a '-' character at a Const location begins + * a negative numeric constant. This precludes there ever being another + * reason for a constant to start with a '-'. + */ +static void +fill_in_constant_lengths(JumbleState *jstate, const char *query) +{ + LocationLen *locs; + core_yyscan_t yyscanner; + core_yy_extra_type yyextra; + core_YYSTYPE yylval; + YYLTYPE yylloc; + int last_loc = -1; + int i; + + /* + * Sort the records by location so that we can process them in order while + * scanning the query text. + */ + if (jstate->clocations_count > 1) + qsort(jstate->clocations, jstate->clocations_count, sizeof(LocationLen), + comp_location); + locs = jstate->clocations; + + /* initialize the flex scanner --- should match raw_parser() */ + yyscanner = scanner_init(query, &yyextra, &ScanKeywords, ScanKeywordTokens); + + /* Search for each constant, in sequence */ + for (i = 0; i < jstate->clocations_count; i++) + { + int loc = locs[i].location; + int tok; + + Assert(loc >= 0); + + if (loc <= last_loc) + continue; /* Duplicate constant, ignore */ + + /* Lex tokens until we find the desired constant */ + for (;;) + { + tok = core_yylex(&yylval, &yylloc, yyscanner); + + /* We should not hit end-of-string, but if we do, behave sanely */ + if (tok == 0) + break; /* out of inner for-loop */ + + /* + * We should find the token position exactly, but if we somehow + * run past it, work with that. + */ + if (yylloc >= loc) + { + if (query[loc] == '-') + { + /* + * It's a negative value - this is the one and only case + * where we replace more than a single token. + * + * Do not compensate for the core system's special-case + * adjustment of location to that of the leading '-' + * operator in the event of a negative constant. It is + * also useful for our purposes to start from the minus + * symbol. In this way, queries like "select * from foo + * where bar = 1" and "select * from foo where bar = -2" + * will have identical normalized query strings. + */ + tok = core_yylex(&yylval, &yylloc, yyscanner); + if (tok == 0) + break; /* out of inner for-loop */ + } + + /* + * We now rely on the assumption that flex has placed a zero + * byte after the text of the current token in scanbuf. + */ + locs[i].length = strlen(yyextra.scanbuf + loc); + break; /* out of inner for-loop */ + } + } + + /* If we hit end-of-string, give up, leaving remaining lengths -1 */ + if (tok == 0) + break; + + last_loc = loc; + } + + scanner_finish(yyscanner); +} + +/* + * Generate a normalized version of the query string that will be used to + * represent all similar queries. + * + * Note that the normalized representation may well vary depending on + * just which "equivalent" query is used to create the hashtable entry. + * We assume this is OK. + * + * *query_len_p contains the input string length, and is updated with + * the result string length (which cannot be longer) on exit. + * + * Returns a palloc'd string. + */ +static char * +generate_normalized_query(JumbleState *jstate, const char *query, + int *query_len_p, int encoding) +{ + char *norm_query; + int query_len = *query_len_p; + int i, len_to_wrt, /* Length (in bytes) to write */ + quer_loc = 0, /* Source query byte location */ + n_quer_loc = 0, /* Normalized query byte location */ + last_off = 0, /* Offset from start for previous tok */ + last_tok_len = 0; /* Length (in bytes) of that tok */ + + /* + * Get constants' lengths (core system only gives us locations). Note + * this also ensures the items are sorted by location. + */ + fill_in_constant_lengths(jstate, query); + + /* Allocate result buffer */ + norm_query = palloc(query_len + 1); + + for (i = 0; i < jstate->clocations_count; i++) + { + int off, /* Offset from start for cur tok */ + tok_len; /* Length (in bytes) of that tok */ + + off = jstate->clocations[i].location; + tok_len = jstate->clocations[i].length; + + if (tok_len < 0) + continue; /* ignore any duplicates */ + + /* Copy next chunk (what precedes the next constant) */ + len_to_wrt = off - last_off; + len_to_wrt -= last_tok_len; + + Assert(len_to_wrt >= 0); + memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt); + n_quer_loc += len_to_wrt; + + /* And insert a '?' in place of the constant token */ + norm_query[n_quer_loc++] = '?'; + + quer_loc = off + tok_len; + last_off = off; + last_tok_len = tok_len; + } + + /* + * We've copied up until the last ignorable constant. Copy over the + * remaining bytes of the original query string. + */ + len_to_wrt = query_len - quer_loc; + + Assert(len_to_wrt >= 0); + memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt); + n_quer_loc += len_to_wrt; + + Assert(n_quer_loc <= query_len); + norm_query[n_quer_loc] = '\0'; + + *query_len_p = n_quer_loc; + return norm_query; +} + +char * +gen_normquery(const char *query) +{ + if (!query) + { + return NULL; + } + JumbleState jstate; + jstate.jumble = (unsigned char *) palloc(JUMBLE_SIZE); + jstate.jumble_len = 0; + jstate.clocations_buf_size = 32; + jstate.clocations = (LocationLen *) palloc(jstate.clocations_buf_size * + sizeof(LocationLen)); + jstate.clocations_count = 0; + int query_len = strlen(query); + return generate_normalized_query(&jstate, query, &query_len, + GetDatabaseEncoding()); +} \ No newline at end of file diff --git a/gpcontrib/gp_stats_collector/src/stat_statements_parser/pg_stat_statements_parser.h b/gpcontrib/gp_stats_collector/src/stat_statements_parser/pg_stat_statements_parser.h new file mode 100644 index 00000000000..b6c5dea7b36 --- /dev/null +++ b/gpcontrib/gp_stats_collector/src/stat_statements_parser/pg_stat_statements_parser.h @@ -0,0 +1,45 @@ +/*------------------------------------------------------------------------- + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + * pg_stat_statements_parser.h + * + * IDENTIFICATION + * gpcontrib/gp_stats_collector/src/stat_statements_parser/pg_stat_statements_parser.h + * + *------------------------------------------------------------------------- + */ + +#ifndef PG_STAT_STATEMENTS_PARSER_H +#define PG_STAT_STATEMENTS_PARSER_H + +#ifdef __cplusplus +extern "C" { +#endif + +extern void stat_statements_parser_init(void); +extern void stat_statements_parser_deinit(void); + +StringInfo gen_normplan(const char *executionPlan); +char *gen_normquery(const char *query); + +#ifdef __cplusplus +} +#endif + +#endif /* PG_STAT_STATEMENTS_PARSER_H */ diff --git a/pom.xml b/pom.xml index 0e000093399..b9915331c0c 100644 --- a/pom.xml +++ b/pom.xml @@ -572,9 +572,6 @@ code or new licensing patterns. src/backend/gporca/**/Makefile src/backend/gporca/cmake/FindXerces.cmake - src/backend/gporca/concourse/build_and_test.py - src/backend/gporca/concourse/xerces-c/build_xerces.py - src/backend/gporca/concourse/xerces-c/xerces-c-3.1.2.tar.gz.sha256 src/backend/gporca/server/fixdxl.sh src/backend/gporca/server/include/unittest/gpopt/operators/CScalarIsDistinctFromTest.h src/backend/gporca/server/dxl.xsd @@ -1050,6 +1047,7 @@ code or new licensing patterns. src/backend/postmaster/test/checkpointer_test.c src/backend/postmaster/README.auto-ANALYZE src/backend/mock.mk + src/backend/catalog/system_views_gp.in src/backend/catalog/storage_tablespace.c src/backend/catalog/test/storage_tablespace_test.c src/backend/catalog/sql_features.txt @@ -1272,6 +1270,13 @@ code or new licensing patterns. src/include/task/task_states.h src/include/task/job_metadata.h + + gpcontrib/gp_stats_collector/gp_stats_collector.control + gpcontrib/gp_stats_collector/.clang-format + gpcontrib/gp_stats_collector/Makefile + @@ -1744,7 +1749,8 @@ code or new licensing patterns. devops/deploy/docker/build/rocky8/tests/requirements.txt devops/deploy/docker/build/rocky9/tests/requirements.txt - devops/deploy/docker/build/ubuntu22.04/tests/requirements.txt + devops/deploy/docker/build/rocky10/tests/requirements.txt + devops/deploy/docker/build/ubuntu22.04/tests/requirements.txt devops/deploy/docker/build/ubuntu24.04/tests/requirements.txt