Disable parallel plans for RIGHT_SEMI joins

author Richard Guo <rguo@postgresql.org>

Thu, 30 Oct 2025 03:03:15 +0000 (12:03 +0900)

committer Richard Guo <rguo@postgresql.org>

Thu, 30 Oct 2025 03:03:15 +0000 (12:03 +0900)
author Richard Guo <rguo@postgresql.org>
Thu, 30 Oct 2025 03:03:15 +0000 (12:03 +0900)
committer Richard Guo <rguo@postgresql.org>
Thu, 30 Oct 2025 03:03:15 +0000 (12:03 +0900)
diff --git a/src/backend/optimizer/path/joinpath.c b/src/backend/optimizer/path/joinpath.c

index 7aa8f5d799cac8652736004a1b09956b30b05ef0..af5aa8aea845b4ee768b10f527ebe13bfbbb52e9 100644 (file)
--- a/src/backend/optimizer/path/joinpath.c
+++ b/src/backend/optimizer/path/joinpath.c
@@ -2399,13 +2399,25 @@ hash_inner_and_outer(PlannerInfo *root,
  
         /*
          * If the joinrel is parallel-safe, we may be able to consider a
-        * partial hash join.  However, we can't handle JOIN_UNIQUE_OUTER,
-        * because the outer path will be partial, and therefore we won't be
-        * able to properly guarantee uniqueness.  Also, the resulting path
-        * must not be parameterized.
+        * partial hash join.
+        *
+        * However, we can't handle JOIN_UNIQUE_OUTER, because the outer path
+        * will be partial, and therefore we won't be able to properly
+        * guarantee uniqueness.
+        *
+        * Similarly, we can't handle JOIN_RIGHT_SEMI, because the hash table
+        * is either a shared hash table or a private hash table per backend.
+        * In the shared case, there is no concurrency protection for the
+        * match flags, so multiple workers could inspect and set the flags
+        * concurrently, potentially producing incorrect results.  In the
+        * private case, each worker has its own copy of the hash table, so no
+        * single process has all the match flags.
+        *
+        * Also, the resulting path must not be parameterized.
          */
         if (joinrel->consider_parallel &&
             save_jointype != JOIN_UNIQUE_OUTER &&
+           save_jointype != JOIN_RIGHT_SEMI &&
             outerrel->partial_pathlist != NIL &&
             bms_is_empty(joinrel->lateral_relids))
         {
@@ -2439,13 +2451,12 @@ hash_inner_and_outer(PlannerInfo *root,
              * total inner path will also be parallel-safe, but if not, we'll
              * have to search for the cheapest safe, unparameterized inner
              * path.  If doing JOIN_UNIQUE_INNER, we can't use any alternative
-            * inner path.  If full, right, right-semi or right-anti join, we
-            * can't use parallelism (building the hash table in each backend)
-            * because no one process has all the match bits.
+            * inner path.  If full, right, or right-anti join, we can't use
+            * parallelism (building the hash table in each backend) because
+            * no one process has all the match bits.
              */
             if (save_jointype == JOIN_FULL ||
                 save_jointype == JOIN_RIGHT ||
-               save_jointype == JOIN_RIGHT_SEMI ||
                 save_jointype == JOIN_RIGHT_ANTI)
                 cheapest_safe_inner = NULL;
             else if (cheapest_total_inner->parallel_safe)
diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out

index f3de6971c2eced1eadacf2733bfbd2c9b41667cc..6738ba316e684b4a44c62d80bf2abf9d3fb667ea 100644 (file)
--- a/src/test/regress/expected/join.out
+++ b/src/test/regress/expected/join.out
@@ -3075,6 +3075,33 @@ select * from tbl_rs t1 join
   3 | 3 | 4 | 4
  (6 rows)
  
+--
+-- regression test for bug with parallel-hash-right-semi join
+--
+begin;
+-- encourage use of parallel plans
+set local parallel_setup_cost=0;
+set local parallel_tuple_cost=0;
+set local min_parallel_table_scan_size=0;
+set local max_parallel_workers_per_gather=4;
+-- ensure we don't get parallel hash right semi join
+explain (costs off)
+select * from tenk1 t1
+where exists (select 1 from tenk1 t2 where fivethous = t1.fivethous)
+and t1.fivethous < 5;
+                    QUERY PLAN                    
+--------------------------------------------------
+ Gather
+   Workers Planned: 4
+   ->  Parallel Hash Semi Join
+         Hash Cond: (t1.fivethous = t2.fivethous)
+         ->  Parallel Seq Scan on tenk1 t1
+               Filter: (fivethous < 5)
+         ->  Parallel Hash
+               ->  Parallel Seq Scan on tenk1 t2
+(8 rows)
+
+rollback;
  --
  -- regression test for bug #13908 (hash join with skew tuples & nbatch increase)
  --
diff --git a/src/test/regress/sql/join.sql b/src/test/regress/sql/join.sql

index 3533fd3600c8aad6f9bb4a0c8560b10bb1cda34d..87865041e34edb08f64465f6b753a2e1a8b969bd 100644 (file)
--- a/src/test/regress/sql/join.sql
+++ b/src/test/regress/sql/join.sql
@@ -759,6 +759,26 @@ select * from tbl_rs t1 join
              (select t1.a+t3.a from tbl_rs t3) and t2.a < 5)
    on true;
  
+--
+-- regression test for bug with parallel-hash-right-semi join
+--
+
+begin;
+
+-- encourage use of parallel plans
+set local parallel_setup_cost=0;
+set local parallel_tuple_cost=0;
+set local min_parallel_table_scan_size=0;
+set local max_parallel_workers_per_gather=4;
+
+-- ensure we don't get parallel hash right semi join
+explain (costs off)
+select * from tenk1 t1
+where exists (select 1 from tenk1 t2 where fivethous = t1.fivethous)
+and t1.fivethous < 5;
+
+rollback;
+
  --
  -- regression test for bug #13908 (hash join with skew tuples & nbatch increase)
  --
author	Richard Guo <rguo@postgresql.org>
	Thu, 30 Oct 2025 03:03:15 +0000 (12:03 +0900)
committer	Richard Guo <rguo@postgresql.org>
	Thu, 30 Oct 2025 03:03:15 +0000 (12:03 +0900)
src/backend/optimizer/path/joinpath.c		patch \| blob \| blame \| history
src/test/regress/expected/join.out		patch \| blob \| blame \| history
src/test/regress/sql/join.sql		patch \| blob \| blame \| history