Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ REGRESS = security rum rum_validate rum_hash ruminv timestamp \
int2 int4 int8 float4 float8 money oid \
time timetz date interval \
macaddr inet cidr text varchar char bytea bit varbit \
numeric rum_weight expr array
numeric rum_weight expr array rum_vacuum

TAP_TESTS = 1

Expand Down
154 changes: 154 additions & 0 deletions expected/rum_vacuum.out
Original file line number Diff line number Diff line change
@@ -0,0 +1,154 @@
-- Test RUM index scan correctness after concurrent VACUUM removes all
-- posting tree entry items.
SET enable_seqscan TO off;
SET enable_indexscan TO off;
SET enable_bitmapscan TO on;
CREATE TABLE test_rum_vacuum (id int, body tsvector);
ALTER TABLE test_rum_vacuum SET (autovacuum_enabled = false);
INSERT INTO test_rum_vacuum SELECT i, to_tsvector('great ann') FROM generate_series(1, 6) i;
INSERT INTO test_rum_vacuum SELECT i, to_tsvector('great john') FROM generate_series(7, 10000) i;
INSERT INTO test_rum_vacuum SELECT i, to_tsvector('great james') FROM generate_series(10001, 10003) i;
CREATE INDEX ON test_rum_vacuum USING rum (body rum_tsvector_ops);
DELETE FROM test_rum_vacuum WHERE body @@ 'ann'::tsquery AND id <= 5;
DELETE FROM test_rum_vacuum WHERE body @@ 'john'::tsquery AND id <= 9999;
-- test normal result
SELECT * FROM test_rum_vacuum WHERE body @@ to_tsquery('ann');
id | body
----+-------------------
6 | 'ann':2 'great':1
(1 row)

SELECT * FROM test_rum_vacuum WHERE body @@ to_tsquery('john');
id | body
-------+--------------------
10000 | 'great':1 'john':2
(1 row)

SELECT id, body FROM (SELECT id, body, body <=> to_tsquery('ann') AS distance FROM test_rum_vacuum ORDER BY body <=> to_tsquery('ann')) AS sub ORDER BY distance ASC, id ASC;
id | body
-------+--------------------
6 | 'ann':2 'great':1
10000 | 'great':1 'john':2
10001 | 'great':1 'jame':2
10002 | 'great':1 'jame':2
10003 | 'great':1 'jame':2
(5 rows)

SELECT id, body FROM (SELECT id, body, body <=> to_tsquery('john') AS distance FROM test_rum_vacuum ORDER BY body <=> to_tsquery('john')) AS sub ORDER BY distance ASC, id ASC;
id | body
-------+--------------------
10000 | 'great':1 'john':2
6 | 'ann':2 'great':1
10001 | 'great':1 'jame':2
10002 | 'great':1 'jame':2
10003 | 'great':1 'jame':2
(5 rows)

VACUUM test_rum_vacuum;
-- this shouldn't cause a core dump
SELECT * FROM test_rum_vacuum WHERE body @@ to_tsquery('ann');
id | body
----+-------------------
6 | 'ann':2 'great':1
(1 row)

SELECT * FROM test_rum_vacuum WHERE body @@ to_tsquery('john');
id | body
-------+--------------------
10000 | 'great':1 'john':2
(1 row)

SELECT id, body FROM (SELECT id, body, body <=> to_tsquery('ann') AS distance FROM test_rum_vacuum ORDER BY body <=> to_tsquery('ann')) AS sub ORDER BY distance ASC, id ASC;
id | body
-------+--------------------
6 | 'ann':2 'great':1
10000 | 'great':1 'john':2
10001 | 'great':1 'jame':2
10002 | 'great':1 'jame':2
10003 | 'great':1 'jame':2
(5 rows)

SELECT id, body FROM (SELECT id, body, body <=> to_tsquery('john') AS distance FROM test_rum_vacuum ORDER BY body <=> to_tsquery('john')) AS sub ORDER BY distance ASC, id ASC;
id | body
-------+--------------------
10000 | 'great':1 'john':2
6 | 'ann':2 'great':1
10001 | 'great':1 'jame':2
10002 | 'great':1 'jame':2
10003 | 'great':1 'jame':2
(5 rows)

-- test that data can still be found after reinsertion
INSERT INTO test_rum_vacuum SELECT i, to_tsvector('great john') FROM generate_series(10004, 20000) i;
SELECT count(*) FROM test_rum_vacuum WHERE body @@ to_tsquery('john');
count
-------
9998
(1 row)

DELETE FROM test_rum_vacuum WHERE body @@ 'john'::tsquery AND id <= 19999;
VACUUM test_rum_vacuum;
SELECT * FROM test_rum_vacuum WHERE body @@ to_tsquery('ann');
id | body
----+-------------------
6 | 'ann':2 'great':1
(1 row)

SELECT * FROM test_rum_vacuum WHERE body @@ to_tsquery('john');
id | body
-------+--------------------
20000 | 'great':1 'john':2
(1 row)

SELECT id, body FROM (SELECT id, body, body <=> to_tsquery('ann') AS distance FROM test_rum_vacuum ORDER BY body <=> to_tsquery('ann')) AS sub ORDER BY distance ASC, id ASC;
id | body
-------+--------------------
6 | 'ann':2 'great':1
10001 | 'great':1 'jame':2
10002 | 'great':1 'jame':2
10003 | 'great':1 'jame':2
20000 | 'great':1 'john':2
(5 rows)

SELECT id, body FROM (SELECT id, body, body <=> to_tsquery('john') AS distance FROM test_rum_vacuum ORDER BY body <=> to_tsquery('john')) AS sub ORDER BY distance ASC, id ASC;
id | body
-------+--------------------
20000 | 'great':1 'john':2
6 | 'ann':2 'great':1
10001 | 'great':1 'jame':2
10002 | 'great':1 'jame':2
10003 | 'great':1 'jame':2
(5 rows)

-- test if do while loop works when an entry has no non-empty posting tree pages
INSERT INTO test_rum_vacuum SELECT i, to_tsvector('great john') FROM generate_series(7, 10000) i;
DELETE FROM test_rum_vacuum WHERE body @@ 'john'::tsquery;
VACUUM test_rum_vacuum;
SELECT * FROM test_rum_vacuum WHERE body @@ to_tsquery('ann');
id | body
----+-------------------
6 | 'ann':2 'great':1
(1 row)

SELECT * FROM test_rum_vacuum WHERE body @@ to_tsquery('john');
id | body
----+------
(0 rows)

SELECT id, body FROM (SELECT id, body, body <=> to_tsquery('ann') AS distance FROM test_rum_vacuum ORDER BY body <=> to_tsquery('ann')) AS sub ORDER BY distance ASC, id ASC;
id | body
-------+--------------------
6 | 'ann':2 'great':1
10001 | 'great':1 'jame':2
10002 | 'great':1 'jame':2
10003 | 'great':1 'jame':2
(4 rows)

SELECT id, body FROM (SELECT id, body, body <=> to_tsquery('john') AS distance FROM test_rum_vacuum ORDER BY body <=> to_tsquery('john')) AS sub ORDER BY distance ASC, id ASC;
id | body
-------+--------------------
6 | 'ann':2 'great':1
10001 | 'great':1 'jame':2
10002 | 'great':1 'jame':2
10003 | 'great':1 'jame':2
(4 rows)
54 changes: 54 additions & 0 deletions sql/rum_vacuum.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
-- Test RUM index scan correctness after concurrent VACUUM removes all
-- posting tree entry items.

SET enable_seqscan TO off;
SET enable_indexscan TO off;
SET enable_bitmapscan TO on;

CREATE TABLE test_rum_vacuum (id int, body tsvector);
ALTER TABLE test_rum_vacuum SET (autovacuum_enabled = false);

INSERT INTO test_rum_vacuum SELECT i, to_tsvector('great ann') FROM generate_series(1, 6) i;
INSERT INTO test_rum_vacuum SELECT i, to_tsvector('great john') FROM generate_series(7, 10000) i;
INSERT INTO test_rum_vacuum SELECT i, to_tsvector('great james') FROM generate_series(10001, 10003) i;

CREATE INDEX ON test_rum_vacuum USING rum (body rum_tsvector_ops);

DELETE FROM test_rum_vacuum WHERE body @@ 'ann'::tsquery AND id <= 5;
DELETE FROM test_rum_vacuum WHERE body @@ 'john'::tsquery AND id <= 9999;

-- test normal result
SELECT * FROM test_rum_vacuum WHERE body @@ to_tsquery('ann');
SELECT * FROM test_rum_vacuum WHERE body @@ to_tsquery('john');
SELECT id, body FROM (SELECT id, body, body <=> to_tsquery('ann') AS distance FROM test_rum_vacuum ORDER BY body <=> to_tsquery('ann')) AS sub ORDER BY distance ASC, id ASC;
SELECT id, body FROM (SELECT id, body, body <=> to_tsquery('john') AS distance FROM test_rum_vacuum ORDER BY body <=> to_tsquery('john')) AS sub ORDER BY distance ASC, id ASC;

VACUUM test_rum_vacuum;

-- this shouldn't cause a core dump
SELECT * FROM test_rum_vacuum WHERE body @@ to_tsquery('ann');
SELECT * FROM test_rum_vacuum WHERE body @@ to_tsquery('john');
SELECT id, body FROM (SELECT id, body, body <=> to_tsquery('ann') AS distance FROM test_rum_vacuum ORDER BY body <=> to_tsquery('ann')) AS sub ORDER BY distance ASC, id ASC;
SELECT id, body FROM (SELECT id, body, body <=> to_tsquery('john') AS distance FROM test_rum_vacuum ORDER BY body <=> to_tsquery('john')) AS sub ORDER BY distance ASC, id ASC;

-- test that data can still be found after reinsertion
INSERT INTO test_rum_vacuum SELECT i, to_tsvector('great john') FROM generate_series(10004, 20000) i;
SELECT count(*) FROM test_rum_vacuum WHERE body @@ to_tsquery('john');
DELETE FROM test_rum_vacuum WHERE body @@ 'john'::tsquery AND id <= 19999;

VACUUM test_rum_vacuum;

SELECT * FROM test_rum_vacuum WHERE body @@ to_tsquery('ann');
SELECT * FROM test_rum_vacuum WHERE body @@ to_tsquery('john');
SELECT id, body FROM (SELECT id, body, body <=> to_tsquery('ann') AS distance FROM test_rum_vacuum ORDER BY body <=> to_tsquery('ann')) AS sub ORDER BY distance ASC, id ASC;
SELECT id, body FROM (SELECT id, body, body <=> to_tsquery('john') AS distance FROM test_rum_vacuum ORDER BY body <=> to_tsquery('john')) AS sub ORDER BY distance ASC, id ASC;

-- test if do while loop works when an entry has no non-empty posting tree pages
INSERT INTO test_rum_vacuum SELECT i, to_tsvector('great john') FROM generate_series(7, 10000) i;
DELETE FROM test_rum_vacuum WHERE body @@ 'john'::tsquery;
VACUUM test_rum_vacuum;

SELECT * FROM test_rum_vacuum WHERE body @@ to_tsquery('ann');
SELECT * FROM test_rum_vacuum WHERE body @@ to_tsquery('john');
SELECT id, body FROM (SELECT id, body, body <=> to_tsquery('ann') AS distance FROM test_rum_vacuum ORDER BY body <=> to_tsquery('ann')) AS sub ORDER BY distance ASC, id ASC;
SELECT id, body FROM (SELECT id, body, body <=> to_tsquery('john') AS distance FROM test_rum_vacuum ORDER BY body <=> to_tsquery('john')) AS sub ORDER BY distance ASC, id ASC;
97 changes: 90 additions & 7 deletions src/rumget.c
Original file line number Diff line number Diff line change
Expand Up @@ -684,9 +684,23 @@ startScanEntry(RumState * rumstate, RumScanEntry entry, Snapshot snapshot)
}

LockBuffer(entry->buffer, RUM_UNLOCK);
entry->isFinished = setListPositionScanEntry(rumstate, entry);
if (!entry->isFinished)
entry->curItem = entry->list[entry->offset];

/*
* If the current page is empty (nlist == 0), we cannot assume the
* scan is complete, as subsequent pages may exist. Therefore, we
* set isFinished = false and leave entry->nlist = 0 and
* entry->offset = 0 to ensure that entryGetItem advances to the
* next page on the next call. Otherwise, initialize curItem to
* the first valid item.
*/
if (entry->nlist == 0)
entry->isFinished = false;
else
{
entry->isFinished = setListPositionScanEntry(rumstate, entry);
if (!entry->isFinished)
entry->curItem = entry->list[entry->offset];
}
}
else if (RumGetNPosting(itup) > 0)
{
Expand All @@ -699,6 +713,16 @@ startScanEntry(RumState * rumstate, RumScanEntry entry, Snapshot snapshot)
if (!entry->isFinished)
entry->curItem = entry->list[entry->offset];
}
/*
* Else, the posting list for this entry has been entirely vacuumed
* away (nlist == 0 after setListPositionScanEntry). We cannot assume
* the scan is complete, as subsequent pages may exist. Therefore, we
* set isFinished = false and leave entry->nlist = 0 and entry->offset
* = 0 to ensure that entryGetItem advances to the next page on the
* next call.
*/
else
entry->isFinished = false;

if (entry->queryCategory == RUM_CAT_EMPTY_QUERY &&
entry->scanWithAddInfo)
Expand Down Expand Up @@ -1011,6 +1035,13 @@ entryGetNextItem(RumState * rumstate, RumScanEntry entry, Snapshot snapshot)

LockBuffer(entry->buffer, RUM_UNLOCK);

/*
* No valid item if VACUUM removed all items concurrently. Go on
* next page.
*/
if (entry->nlist == 0)
break;

if (entry->offset < 0)
{
if (ScanDirectionIsForward(entry->scanDirection) &&
Expand Down Expand Up @@ -1044,6 +1075,7 @@ entryGetNextItemList(RumState * rumstate, RumScanEntry entry, Snapshot snapshot)
RumItemSetMin(&entry->curItem);
entry->offset = InvalidOffsetNumber;
entry->list = NULL;
entry->nlist = 0;
if (entry->gdi)
{
freeRumBtreeStack(entry->gdi->stack);
Expand Down Expand Up @@ -1151,6 +1183,18 @@ entryGetNextItemList(RumState * rumstate, RumScanEntry entry, Snapshot snapshot)

LockBuffer(entry->buffer, RUM_UNLOCK);
entry->isFinished = false;

/*
* Posting tree's first leaf page is empty due to concurrent VACUUM.
* Advance through empty pages until we find one with items or exhaust
* the tree. entryGetItem does not re-invoke entryGetNextItem after we
* return, so we must do it here to ensure curItem is valid on return.
*/
if (entry->nlist == 0)
{
entryGetNextItem(rumstate, entry, snapshot);
goto entry_done;
}
}
else if (RumGetNPosting(itup) > 0)
{
Expand All @@ -1161,12 +1205,21 @@ entryGetNextItemList(RumState * rumstate, RumScanEntry entry, Snapshot snapshot)
rumReadTuple(rumstate, entry->attnum, itup, entry->list, true);
entry->isFinished = setListPositionScanEntry(rumstate, entry);
}
/* Posting list has been vacuumed. Go to the next entry. */
else
{
ItemPointerSetInvalid(&entry->curItem.iptr);
entry->isFinished = true;
goto entry_done;
}

Assert(entry->nlist > 0 && entry->list);

entry->curItem = entry->list[entry->offset];
entry->offset += entry->scanDirection;

entry_done:

SCAN_ENTRY_GET_KEY(entry, rumstate, itup);

/*
Expand Down Expand Up @@ -1340,8 +1393,24 @@ entryGetItem(RumState * rumstate, RumScanEntry entry, bool *nextEntryList, Snaps
}
else if (entry->stack)
{
entry->offset++;
if (entryGetNextItemList(rumstate, entry, snapshot) && nextEntryList)
/*
* We are responsible for ensuring that we keep advancing through
* ItemLists until we find one that contains at least one valid
* item. This is necessary because concurrent VACUUM may have
* removed all items from a page, leaving an empty ItemList. In
* such cases, we must continue to the next ItemList.
*/
bool success;

Assert(!entry->isFinished);

do
{
entry->isFinished = false;
success = entryGetNextItemList(rumstate, entry, snapshot);
} while (success && entry->nlist == 0);

if (success && nextEntryList)
*nextEntryList = true;
}
else
Expand All @@ -1361,8 +1430,22 @@ entryGetItem(RumState * rumstate, RumScanEntry entry, bool *nextEntryList, Snaps
dropItem(entry));
if (entry->stack && entry->isFinished)
{
entry->isFinished = false;
if (entryGetNextItemList(rumstate, entry, snapshot) && nextEntryList)
/*
* We are responsible for ensuring that we keep advancing through
* ItemLists until we find one that contains at least one valid
* item. This is necessary because concurrent VACUUM may have
* removed all items from a page, leaving an empty ItemList. In
* such cases, we must continue to the next ItemList.
*/
bool success;

do
{
entry->isFinished = false;
success = entryGetNextItemList(rumstate, entry, snapshot);
} while (success && entry->nlist == 0);

if (success && nextEntryList)
*nextEntryList = true;
}
}
Expand Down