From abb0eb7a37baedf1fc1d0ce8faec1db840854e18 Mon Sep 17 00:00:00 2001 From: Shunping Huang Date: Fri, 8 May 2026 21:49:34 -0400 Subject: [PATCH] Fix flaky BigQuery file loads by safely handling concurrent mkdirs --- sdks/python/apache_beam/io/gcp/bigquery_file_loads.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/sdks/python/apache_beam/io/gcp/bigquery_file_loads.py b/sdks/python/apache_beam/io/gcp/bigquery_file_loads.py index 738ace67a5f7..4e45d0324ee2 100644 --- a/sdks/python/apache_beam/io/gcp/bigquery_file_loads.py +++ b/sdks/python/apache_beam/io/gcp/bigquery_file_loads.py @@ -134,7 +134,13 @@ def _make_new_file_writer( directory = fs.FileSystems.join(file_prefix, destination) if not fs.FileSystems.exists(directory): - fs.FileSystems.mkdirs(directory) + try: + fs.FileSystems.mkdirs(directory) + except IOError: + # Concurrent workers may race to create the same directory. + # Ignore the IOError if another worker successfully created it. + if not fs.FileSystems.exists(directory): + raise file_name = str(uuid.uuid4()) file_path = fs.FileSystems.join(file_prefix, destination, file_name)