When using Merge Vertica will pick the best projection available for the merge statement, for this is important that you provide Vertica with optimized projections. When Vertica makes use of good projections the need of additional sort and data transfer operation is not needed with this making the Merge more efficient. Important condition to get an optimized Merge statement.
create table target_tbl (id int, name varchar(10));
insert into target_tbl values(1,'Adi');
insert into target_tbl values(2,'Adi');
insert into target_tbl values(3,'Adi');
create table source_tbl (id2 int , name2 varchar(10));
insert into source_tbl values(4,'Ju');
insert into source_tbl values(5,'Ju');
insert into source_tbl values(6,'Ju');
MERGE INTO target_tbl
USING source_tbl b
ON id = b.id2
WHEN MATCHED THEN
UPDATE SET id = b.id2, name = b.name2
WHEN NOT MATCHED THEN
INSERT (id, name) VALUES (b.id2, b.name2);
Access Path:
+-DML MERGE [Cost: 0, Rows: 0]
| Target Projection: public.target_tbl_super
| Target Prep:
| +--- JOIN MERGEJOIN(inputs presorted) [RightOuter] [Cost: 110, Rows: 6 (NO STATISTICS)] (PATH ID: 1)
| | Join Cond: (target_tbl.id = VAL(2))
| | +-- Outer - STORAGE ACCESS for <No Alias [Cost: 75, Rows: 6 (NO STATISTICS)] (PATH ID: 2)
| | | Projection: public.target_tbl_super
| | | Materialize: target_tbl.id, target_tbl.name, target_tbl.epoch
| | | Runtime Filter: (SIP1(MergeJoin): target_tbl.id)
| | +-- Inner - SELECT [Cost: 34, Rows: 3] (PATH ID: 3)
| | | +--- STORAGE ACCESS for b [Cost: 34, Rows: 3] (PATH ID: 4)
| | | | Projection: public.source_tbl_super
| | | | Materialize: b.id2, b.name2
This will be the table i will use for the demo to demonstrate an optimized Merge statement
- create the table with a PK on the columns that will part of the join and set the order by clause.
create table target_tbl (
id int PRIMARY KEY NOT NULL,
name varchar(10) encoding RLE
)
order by id,name;
insert into target_tbl values(1,'Adi');
insert into target_tbl values(2,'Adi');
insert into target_tbl values(3,'Adi');
create table source_tbl (
id2 int PRIMARY KEY NOT NULL,
name2 varchar(10) encoding RLE
)
order by id2,name2;
insert into source_tbl values(4,'Ju');
insert into source_tbl values(5,'Ju');
insert into source_tbl values(6,'Ju');
select analyze_statistics('target_tbl');
select analyze_statistics('source_tbl');
explain
MERGE INTO target_tbl
USING source_tbl b
ON id = b.id2
WHEN MATCHED THEN
UPDATE SET id = b.id2 + 1, name = b.name2
WHEN NOT MATCHED THEN
INSERT (id, name) VALUES (b.id2 + 1, b.name2);
Access Path:
+-DML DELETE [Cost: 0, Rows: 0]
| Target Projection: public.target_tbl_super (DELETE ON CONTAINER)
| Target Prep:
| +--- JOIN MERGEJOIN(inputs presorted) [Semi] [Cost: 35, Rows: 3] (PATH ID: 1)
| | Join Cond: (target_tbl.id = VAL(2))
| | +-- Outer - STORAGE ACCESS for target_tbl [Cost: 17, Rows: 3] (PATH ID: 2)
| | | Projection: public.target_tbl_super
| | | Materialize: target_tbl.id
| | | Runtime Filter: (SIP1(MergeJoin): target_tbl.id)
| | +-- Inner - SELECT [Cost: 17, Rows: 3] (PATH ID: 3)
| | | +--- STORAGE ACCESS for b [Cost: 17, Rows: 3] (PATH ID: 4)
| | | | Projection: public.source_tbl_super
| | | | Materialize: b.id2