From 2d1a3e2e6c06bc25812ea49e193ee56754ec660c Mon Sep 17 00:00:00 2001
From: Kurt Garloff <kurt@garloff.de>
Date: Tue, 16 Jun 2026 12:30:47 +0200
Subject: [PATCH 1/4] Add some workstation and some new GPUs.

This way we keep the tables current and helpful.

Signed-off-by: Kurt Garloff <kurt@garloff.de>
---
 Standards/scs-0100-v3-flavor-naming.md        |   3 +-
 ...w1-flavor-naming-implementation-testing.md | 101 ++++++++++++++++--
 2 files changed, 95 insertions(+), 9 deletions(-)

diff --git a/Standards/scs-0100-v3-flavor-naming.md b/Standards/scs-0100-v3-flavor-naming.md
index 336bc0d14..eaa329423 100644
--- a/Standards/scs-0100-v3-flavor-naming.md
+++ b/Standards/scs-0100-v3-flavor-naming.md
@@ -438,7 +438,8 @@ Note that the vendor letter X is mandatory, generation and processing units are
 | `A`      | AMD    | compute units (CUs)             |
 | `I`      | Intel  | execution units (EUs)           |
 
-For nVidia, the generation N can be f=Fermi, k=Kepler, m=Maxwell, p=Pascal, v=Volta, t=turing, a=Ampere, l=Ada Lovelace, g=Grace Hopper, b=Blackwell, ...,
+For nVidia, the generation N can be f=Fermi, k=Kepler, m=Maxwell, p=Pascal, v=Volta, t=turing, a=Ampere,
+l=Ada Lovelace, g=Grace Hopper, b=Blackwell, ...,
 for AMD GCN-x=0.x, CDNA-x=x, RDNA-x=x.1, RDNA-3.5=3.5, UDNA-x=x
 for Intel Gen9=0.9, Xe(12.1/DG1)=1, Xe(12.2)=2, Arc(12.7/DG2)=3, BattleImage(20.0)=4, ...
 (Note: This may need further work to properly reflect what's out there.)
diff --git a/Standards/scs-0100-w1-flavor-naming-implementation-testing.md b/Standards/scs-0100-w1-flavor-naming-implementation-testing.md
index d043ba28b..2bdc9f6b8 100644
--- a/Standards/scs-0100-w1-flavor-naming-implementation-testing.md
+++ b/Standards/scs-0100-w1-flavor-naming-implementation-testing.md
@@ -48,7 +48,10 @@ possibly recommended flavors can be created, or the user can set a file containi
 ### GPU table
 
 The most commonly used datacenter GPUs are listed here, showing what GPUs (or partitions
-of a GPU) result in what GPU part of the flavor name.
+of a GPU) result in what GPU part of the flavor name. We provide these for convenience; most
+values are from data sheets and not based on own testing. Providers must look up the values
+(SMs/CUs/EUs and VRAM) really provided to users and correctly fill these into the SCS names.
+This is in particular true for the MIG configurations.
 
 #### Nvidia (`N`)
 
@@ -95,6 +98,15 @@ No MIG support, 128 Cuda Cores and 4 Tensor Cores per SM.
 | L40G       | 568      | 18176      | 142 | 48G GDDR6 | `GNl-142h-48`  |
 | L40S       | 568      | 18176      | 142 | 48G GDDR6 | `GNl-142hh-48` |
 
+| Nvidia GPU   | Tensor C | Cuda Cores | SMs | VRAM      | SCS name piece |
+|--------------|----------|------------|-----|-----------|----------------|
+| RTX2000  Ada |   88     |  2816      |  22 | 16G GDDR6 | `GNl-22-16`    |
+| RTX4000  Ada |  192     |  6144      |  48 | 20G GDDR6 | `GNl-48-20`    |
+| RTX4500  Ada |  240     |  7680      |  60 | 24G GDDR6 | `GNl-60-24`    |
+| RTX5000  Ada |  400     | 12800      | 100 | 32G GDDR6 | `GNl-100-32`   |
+| RTX5880  Ada |  440     | 14080      | 110 | 48G GDDR6 | `GNl-110-48`   |
+| RTX6000  Ada |  568     | 18176      | 142 | 48G GDDR6 | `GNl-142-48`   |
+
 ##### Grace Hopper (`g`)
 
 These have MIG support and 128 Cuda Cores and 4 Tensor Cores per SM.
@@ -112,6 +124,35 @@ These have MIG support and 128 Cuda Cores and 4 Tensor Cores per SM.
 [+] The precise numbers for the 1/7 MIG configurations are not known by the author of
 this document and need validation.
 
+##### Blackwell (`b`)
+
+These have MIG support and 128 Cuda Cores and 4 Tensor Cores per SM.
+
+| Nvidia GPU | Fraction | Tensor C | Cuda Cores | SMs | VRAM       | SCS GPU name   |
+|------------|----------|----------|------------|-----|------------|----------------|
+| GB200      | 1/1      |  640     | 20480      | 160 | 192G HBM3e | `GNb-160-192h` |
+| GB200      | 1/2      |  320     | 10240      |  80 |  96G HBM3e | `GNb-80-96h`   |
+| GB200      | 2/7      |   88+    |  5632+     |  44+|  45G HBM3e+| `GNb-44-45h`+  |
+| GB200      | 1/7      |   44+    |  2816+     |  22+|  23G HBM3e+| `GNb-22-23h`+  |
+| GB300      | 1/1      |  640     | 20480      | 160 | 288G HBM3e | `GNb-160-288h` |
+| GB300      | 1/2      |  320     | 10240      |  80 | 144G HBM3e | `GNb-80-144h`  |
+| ... |
+
+[+] The precise numbers for the 1/7 MIG configurations are not known by the author of
+this document and need validation.
+
+| Nvidia GPU            | Fraction | Tensor C | Cuda Cores | SMs | VRAM       | SCS GPU name   |
+|-----------------------|----------|----------|------------|-----|------------|----------------|
+| RTX Pro2000 Blackwell |  1/1     |  136     |  4352      |  34 |  16G GDDR7 | `GNb-34-16`    |
+| RTX Pro4000 Blackwell |  1/1     |  280     |  8960      |  70 |  24G GDDR7 | `GNb-70-24`    |
+| RTX Pro4500 Blackwell |  1/1     |  328     | 10496      |  82 |  32G GDDR7 | `GNb-82-32`    |
+| RTX Pro5000 Blackwell |  1/1     |  440     | 14080      | 110 |  72G GDDR7 | `GNb-110-72`   |
+| RTX Pro5000 Blackwell |  1/2     |  220     |  7040      |  55 |  36G GDDR7 | `GNb-55-36`    |
+| RTX Pro6000 Blackwell |  1/1     |  752     | 26064      | 188 |  96G GDDR7 | `GNb-188-96`   |
+| RTX Pro6000 Blackwell |  1/2     |  376     | 13032      |  94 |  48G GDDR7 | `GNb-94-48`    |
+| RTX Pro6000 Blackwell |  1/4     |  188     |  6516      |  47 |  24G GDDR7 | `GNb-47-24`    |
+
+
 #### AMD Radeon (`A`)
 
 ##### CDNA 2 (`2`)
@@ -130,28 +171,72 @@ SRIOV partitioning is possible, resulting in pass-through for
 up to 8 partitions, somewhat similar to Nvidia MIG. 4 Tensor
 Cores and 64 Stream Processors per CU.
 
-| AMD GPU     | Tensor C | Stream Proc | CUs | VRAM       | SCS name piece |
-|-------------|----------|-------------|-----|------------|----------------|
-| Inst MI300X | 1216     | 19456       | 304 | 192G HBM3  | `GA3-304-192h` |
-| Inst MI325X | 1216     | 19456       | 304 | 288G HBM3  | `GA3-304-288h` |
+| AMD GPU     | Tensor C | Stream Proc | CUs | VRAM       | SCS name piece  |
+|-------------|----------|-------------|-----|------------|-----------------|
+| Inst MI300X | 1216     | 19456       | 304 | 192G HBM3  | `GA3-304-192h`  |
+| Inst MI325X | 1216     | 19456       | 304 | 288G HBM3  | `GA3-304-288h`  |
+
+##### CDNA 4 (`4`)
+
+SRIOV partitioning is possible, resulting in pass-through for
+up to 8 partitions, somewhat similar to Nvidia MIG. 4 Tensor
+Cores and 64 Stream Processors per CU.
+
+| AMD GPU     | Tensor C | Stream Proc | CUs | VRAM       | SCS name piece  |
+|-------------|----------|-------------|-----|------------|-----------------|
+| Inst MI350X | 1024     | 16384       | 256 | 288G HBM3e | `GA4-256-288h`  |
+| Inst MI355X | 1024     | 16384       | 256 | 288G HBM3e | `GA4-256h-288h` |
+
+The Instinct MI355X has a higher watttage and thus slightly higher clocks
+than the MI350X but is otherwise identical -- we can thus use the `h` modifier
+to identify the higher performance version.
+
+##### Workstation RDNA 3 (`3.1`) and 4 (`4.1`)
+
+2 Tensor Cores and 64 Stream Processors per CU.
+
+| AMD Radeon   | Tensor C | Stream Proc | CUs | VRAM       | SCS name piece  |
+|--------------|----------|-------------|-----|------------|-----------------|
+|    Pro W7900 |  196     |  6144       |  96 |  48G GDDR6 | `GA3.1-96-48`   |
+| AI Pro R9700 |  128     |  4096       |  64 |  32G GDDR6 | `GA4.1-64-32`   |
 
 Note that we previously assumed more similarity of consumer RDNA-x with
-server CDNA-x that actually is the case; the RDNA-x cards now use `x.1`
+server CDNA-x than actually is the case; the RDNA-x cards now use `x.1`
 (since v3.3 as of Oct 2025) to be able to differentiate them. We will
 tolerate potential rare cases of old installations calling RDNA-x as
-generation `x` for the time being.
+generation `x` for the time being. If AMD executes on the merging with
+UDNA-5, we will avoid this split in the future.
 
 #### intel Xe (`I`)
 
 ##### Xe-HPC (Ponte Vecchio) (`3`)
 
-1 EU corresponds to one Tensor Core and contains 128 Shading Units.
+One EU corresponds to one Tensor Core and contains 128 Shading Units.
 
 | intel DC GPU | Tensor C | Shading U | EUs | VRAM       | SCS name part  |
 |--------------|----------|-----------|-----|------------|----------------|
 | Max 1100     |  56      |  7168     |  56 |  48G HBM2e | `GI3-56-48h`   |
 | Max 1550     | 128      | 16384     | 128 | 128G HBM2e | `GI3-128-128h` |
 
+##### Workstation cards Arc B (`4`)
+
+One EU has one tensor core and 16 shading units.
+
+| intel GPU   | Tensor C | Shading U | EUs | VRAM       | SCS name part  |
+|-------------|----------|-----------|-----|------------|----------------|
+| Arc Pro B50 |   128    |  2048     | 128 | 16G GDDR6  | `GI4-128-16`   |
+| Arc Pro B60 |   160    |  2560     | 160 | 24G GDDR6  | `GI4-160-24`   |
+| Arc Pro B65 |   160    |  2560     | 160 | 32G GDDR6  | `GI4-160-32`   |
+| Arc Pro B70 |   256    |  4096     | 256 | 32G GDDR6  | `GI4-256-32`   |
+
+#### Consumer cards
+
+Note that we don't recommend using consumer cards. 
+That said, the schema allows to specify them and for example do PCI pass-through
+of Nvidia RTX4080S (`GNl-80-16`), RTX4090 (`GNl-128-24`), RTX5080S (`GNb-84-24`), 
+RTX5090 (`GNb-170-32`), or AMD Radeon RX7900XTX (`GA3.1-96-24`).
+
+
 ## Automated tests
 
 The following testcases [are implemented](https://github.com/SovereignCloudStack/standards/tree/main/Tests/iaas/openstack_test.py):

From 768317913dd8dda2091a7642d806cf98d25bbedf Mon Sep 17 00:00:00 2001
From: Kurt Garloff <kurt@garloff.de>
Date: Tue, 16 Jun 2026 12:46:54 +0200
Subject: [PATCH 2/4] New letter u for Blackwell Ultra.

The tensor cores are different enough to justify this.
Main use case for GPUs in clouds is AI and the tensor cores do
make a difference there.

Signed-off-by: Kurt Garloff <kurt@garloff.de>
---
 Standards/scs-0100-v3-flavor-naming.md                |  2 +-
 ...cs-0100-w1-flavor-naming-implementation-testing.md | 11 ++++++++---
 2 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/Standards/scs-0100-v3-flavor-naming.md b/Standards/scs-0100-v3-flavor-naming.md
index eaa329423..ca09253bc 100644
--- a/Standards/scs-0100-v3-flavor-naming.md
+++ b/Standards/scs-0100-v3-flavor-naming.md
@@ -439,7 +439,7 @@ Note that the vendor letter X is mandatory, generation and processing units are
 | `I`      | Intel  | execution units (EUs)           |
 
 For nVidia, the generation N can be f=Fermi, k=Kepler, m=Maxwell, p=Pascal, v=Volta, t=turing, a=Ampere,
-l=Ada Lovelace, g=Grace Hopper, b=Blackwell, ...,
+l=Ada Lovelace, g=Grace Hopper, b=Blackwell, u=BlackwellUltra, ...,
 for AMD GCN-x=0.x, CDNA-x=x, RDNA-x=x.1, RDNA-3.5=3.5, UDNA-x=x
 for Intel Gen9=0.9, Xe(12.1/DG1)=1, Xe(12.2)=2, Arc(12.7/DG2)=3, BattleImage(20.0)=4, ...
 (Note: This may need further work to properly reflect what's out there.)
diff --git a/Standards/scs-0100-w1-flavor-naming-implementation-testing.md b/Standards/scs-0100-w1-flavor-naming-implementation-testing.md
index 2bdc9f6b8..7525dc544 100644
--- a/Standards/scs-0100-w1-flavor-naming-implementation-testing.md
+++ b/Standards/scs-0100-w1-flavor-naming-implementation-testing.md
@@ -124,7 +124,7 @@ These have MIG support and 128 Cuda Cores and 4 Tensor Cores per SM.
 [+] The precise numbers for the 1/7 MIG configurations are not known by the author of
 this document and need validation.
 
-##### Blackwell (`b`)
+##### Blackwell (`b`) and Blackwell Ultra (`u`)
 
 These have MIG support and 128 Cuda Cores and 4 Tensor Cores per SM.
 
@@ -134,13 +134,18 @@ These have MIG support and 128 Cuda Cores and 4 Tensor Cores per SM.
 | GB200      | 1/2      |  320     | 10240      |  80 |  96G HBM3e | `GNb-80-96h`   |
 | GB200      | 2/7      |   88+    |  5632+     |  44+|  45G HBM3e+| `GNb-44-45h`+  |
 | GB200      | 1/7      |   44+    |  2816+     |  22+|  23G HBM3e+| `GNb-22-23h`+  |
-| GB300      | 1/1      |  640     | 20480      | 160 | 288G HBM3e | `GNb-160-288h` |
-| GB300      | 1/2      |  320     | 10240      |  80 | 144G HBM3e | `GNb-80-144h`  |
+| GB300      | 1/1      |  640     | 20480      | 160 | 288G HBM3e | `GNu-160-288h` |
+| GB300      | 1/2      |  320     | 10240      |  80 | 144G HBM3e | `GNu-80-144h`  |
 | ... |
 
 [+] The precise numbers for the 1/7 MIG configurations are not known by the author of
 this document and need validation.
 
+Note that Blackwell Ultra tensor cores have significant enough changes vs. Blackwell that we
+gave the BW Ultra GPUs a new letter `u`. In particular, FP4 tensor performance is over 150%
+of std. Blackwell and has more Special Function Units (which helps attention) but has
+regressed INT8 performance.
+
 | Nvidia GPU            | Fraction | Tensor C | Cuda Cores | SMs | VRAM       | SCS GPU name   |
 |-----------------------|----------|----------|------------|-----|------------|----------------|
 | RTX Pro2000 Blackwell |  1/1     |  136     |  4352      |  34 |  16G GDDR7 | `GNb-34-16`    |

From 20a438af00fcdd98d1fa4c6acbf3d5c4d873f8d9 Mon Sep 17 00:00:00 2001
From: Kurt Garloff <kurt@garloff.de>
Date: Tue, 16 Jun 2026 12:56:59 +0200
Subject: [PATCH 3/4] Appease markdownlint.

Signed-off-by: Kurt Garloff <kurt@garloff.de>
---
 ...w1-flavor-naming-implementation-testing.md | 24 +++++++++----------
 1 file changed, 11 insertions(+), 13 deletions(-)

diff --git a/Standards/scs-0100-w1-flavor-naming-implementation-testing.md b/Standards/scs-0100-w1-flavor-naming-implementation-testing.md
index 7525dc544..e52fe1cf7 100644
--- a/Standards/scs-0100-w1-flavor-naming-implementation-testing.md
+++ b/Standards/scs-0100-w1-flavor-naming-implementation-testing.md
@@ -98,14 +98,14 @@ No MIG support, 128 Cuda Cores and 4 Tensor Cores per SM.
 | L40G       | 568      | 18176      | 142 | 48G GDDR6 | `GNl-142h-48`  |
 | L40S       | 568      | 18176      | 142 | 48G GDDR6 | `GNl-142hh-48` |
 
-| Nvidia GPU   | Tensor C | Cuda Cores | SMs | VRAM      | SCS name piece |
-|--------------|----------|------------|-----|-----------|----------------|
-| RTX2000  Ada |   88     |  2816      |  22 | 16G GDDR6 | `GNl-22-16`    |
-| RTX4000  Ada |  192     |  6144      |  48 | 20G GDDR6 | `GNl-48-20`    |
-| RTX4500  Ada |  240     |  7680      |  60 | 24G GDDR6 | `GNl-60-24`    |
-| RTX5000  Ada |  400     | 12800      | 100 | 32G GDDR6 | `GNl-100-32`   |
-| RTX5880  Ada |  440     | 14080      | 110 | 48G GDDR6 | `GNl-110-48`   |
-| RTX6000  Ada |  568     | 18176      | 142 | 48G GDDR6 | `GNl-142-48`   |
+| Nvidia GPU  | Tensor C | Cuda Cores | SMs | VRAM      | SCS name piece |
+|-------------|----------|------------|-----|-----------|----------------|
+| RTX2000 Ada |   88     |  2816      |  22 | 16G GDDR6 | `GNl-22-16`    |
+| RTX4000 Ada |  192     |  6144      |  48 | 20G GDDR6 | `GNl-48-20`    |
+| RTX4500 Ada |  240     |  7680      |  60 | 24G GDDR6 | `GNl-60-24`    |
+| RTX5000 Ada |  400     | 12800      | 100 | 32G GDDR6 | `GNl-100-32`   |
+| RTX5880 Ada |  440     | 14080      | 110 | 48G GDDR6 | `GNl-110-48`   |
+| RTX6000 Ada |  568     | 18176      | 142 | 48G GDDR6 | `GNl-142-48`   |
 
 ##### Grace Hopper (`g`)
 
@@ -157,7 +157,6 @@ regressed INT8 performance.
 | RTX Pro6000 Blackwell |  1/2     |  376     | 13032      |  94 |  48G GDDR7 | `GNb-94-48`    |
 | RTX Pro6000 Blackwell |  1/4     |  188     |  6516      |  47 |  24G GDDR7 | `GNb-47-24`    |
 
-
 #### AMD Radeon (`A`)
 
 ##### CDNA 2 (`2`)
@@ -193,7 +192,7 @@ Cores and 64 Stream Processors per CU.
 | Inst MI355X | 1024     | 16384       | 256 | 288G HBM3e | `GA4-256h-288h` |
 
 The Instinct MI355X has a higher watttage and thus slightly higher clocks
-than the MI350X but is otherwise identical -- we can thus use the `h` modifier
+than the MI350X but is otherwise identical - we can thus use the `h` modifier
 to identify the higher performance version.
 
 ##### Workstation RDNA 3 (`3.1`) and 4 (`4.1`)
@@ -236,12 +235,11 @@ One EU has one tensor core and 16 shading units.
 
 #### Consumer cards
 
-Note that we don't recommend using consumer cards. 
+Note that we don't recommend using consumer cards.
 That said, the schema allows to specify them and for example do PCI pass-through
-of Nvidia RTX4080S (`GNl-80-16`), RTX4090 (`GNl-128-24`), RTX5080S (`GNb-84-24`), 
+of Nvidia RTX4080S (`GNl-80-16`), RTX4090 (`GNl-128-24`), RTX5080S (`GNb-84-24`),
 RTX5090 (`GNb-170-32`), or AMD Radeon RX7900XTX (`GA3.1-96-24`).
 
-
 ## Automated tests
 
 The following testcases [are implemented](https://github.com/SovereignCloudStack/standards/tree/main/Tests/iaas/openstack_test.py):

From 82065b76a433dfc686759fa7986d9a21e4bb93fa Mon Sep 17 00:00:00 2001
From: Kurt Garloff <kurt@garloff.de>
Date: Tue, 16 Jun 2026 19:55:01 +0200
Subject: [PATCH 4/4] Add Blackwell Ultra 'u' to the flavor naming script.

Signed-off-by: Kurt Garloff <kurt@garloff.de>
---
 Tests/iaas/scs_0100_flavor_naming/flavor_names.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/Tests/iaas/scs_0100_flavor_naming/flavor_names.py b/Tests/iaas/scs_0100_flavor_naming/flavor_names.py
index 9bb3e3eea..fbd9519fd 100644
--- a/Tests/iaas/scs_0100_flavor_naming/flavor_names.py
+++ b/Tests/iaas/scs_0100_flavor_naming/flavor_names.py
@@ -233,17 +233,17 @@ class GPU:
     brand = TblAttr("Brand", {"N": "Nvidia", "A": "AMD", "I": "Intel"})
     gen = DepTblAttr("Gen", brand, {
         "N": {'': '(unspecified)', "f": "Fermi", "k": "Kepler", "m": "Maxwell", "p": "Pascal", "v": "Volta",
-              "t": "Turing", "a": "Ampere", "l": "AdaLovelace", "g": "GraceHopper", "b": "Blackwell"},
+              "t": "Turing", "a": "Ampere", "l": "AdaLovelace", "g": "GraceHopper", "b": "Blackwell", "u": "Blackwell Ultra"},
         "A": {'': '(unspecified)', "0.4": "GCN4.0/Polaris", "0.5": "GCN5.0/Vega", "1": "CDNA1", "1.1": "RDNA1/Navi1x",
               "2": "CDNA2", "2.1": "RDNA2/Navi2x", "3": "CDNA3", "3.1": "RDNA3/Navi3x", "3.5": "RDNA3.5", "4": "CDNA4",
-              "4.1": "RDNA-4/Navi4x", "5.1": "RDNA-5/Navi5x"},
+              "4.1": "RDNA4/Navi4x", "5.1": "RDNA5/Navi5x"},
         "I": {'': '(unspecified)', "0.9": "Gen9/Skylake", "0.95": "Gen9.5/KabyLake", "1": "Xe1/Gen12.1/DG1", "2": "Xe2/Gen12.2",
               "3": "Arc/Gen12.7/DG2", "4": "BattleImage/Gen20.0"},
     })
     cu = OptIntAttr("#.N:SMs/A:CUs/I:EUs")
     perf = TblAttr("Frequency", {"": "Std Freq", "h": "High Freq", "hh": "Very High Freq"})
     vram = OptIntAttr("#.V:GiB VRAM")
-    vramperf = TblAttr("Bandwidth", {"": "Std BW {<~1GiB/s)", "h": "High BW", "hh": "Very High BW"})
+    vramperf = TblAttr("Bandwidth", {"": "Std BW (GDDR)", "h": "High BW (HBM)", "hh": "Very High BW"})
 
     def __init__(self, gputype="g", brand="N", gen='', cu=None, perf='', vram=None, vramperf=''):
         self.gputype = gputype