Migration PostgreSQL → Oracle

Create a new default schema in Oracle, customize it and push the data directly from PostgreSQL without intermediate export files by using oracle_fdw (foreign data wrapper).

Current state of this solution

Works as expected, but very slow because one of the biggest tables asset_feature have a CLOB column and OCI library doesn't allow PostgreSQL to prefetch the rows. For example, a pull of a 40 Million rows table takes 3 hours. There are different workarounds that are described in our internal PostgreSQL mailing list.

Prerequisites

installed, running and empty Oracle database.
access as user oracle to the Oracle DB host, as postgres to the PostgresSQL host, as corpus to the censhare Server host.
port 1521 on Oracle host should be open for PostgreSQL host

Create the default Oracle schema

We use the standard create-user.sql and create-db.sql scripts from the same censhare Server release whose schema we have to migrate

CODE

# get create-db.sql from /opt/corpus/css/database/ on the censhare server
# we have to separate tables from everything else
perl -ne 'print if /^CREATE TABLE/../^\);/ and /^CREATE TABLE/../^ROWDEPENDENCIES;/ ' 
create-db.sql 
> create-db-TABLES.sql
perl -ne 'print if not ( /^CREATE TABLE/../^\);/ and /^CREATE TABLE/../^ROWDEPENDENCIES;/ ) ' create-db.sql > create-db-REST.sql

-- create user and tables 
sqlplus / as sysdba
drop user corpus cascade;
@create-user.sql
@create-db-TABLES.sql

-- create helper tables for sequences and custom columns
create table seqtmp(sequence_name varchar(100), last_value number(19), start_value number(19), increment_by number(19));

create table columnstmp (
table_name varchar2(100), 
column_name varchar2(100), 
data_type varchar2(100), 
is_nullable varchar2(100),
character_maximum_length number(10),
numeric_precision number(10)
);

Stop censhare process and prepare the PostgreSQL database

We create a new temporary table to hold sequence values.

CODE

# stop censhare process at the censhare Server host.

# create a temporary table to transfer sequence values to Oracle
psql -d corpus -U corpus
create table public.seqtmp(sequence_name varchar(100), last_value bigint, start_value bigint, increment_by bigint);

DO $$
DECLARE 
c record;
BEGIN
FOR c IN SELECT * FROM information_schema.sequences where sequence_schema='public' 
and sequence_name not like '%rowid%' 
LOOP
   EXECUTE 'insert into public.seqtmp select '''||c.sequence_name||''', last_value , start_value, increment_by FROM '||c.sequence_name;
END LOOP;
END$$;

commit;

# Help function to help us with customized database schema
CREATE OR REPLACE FUNCTION get_columns(varchar, varchar) RETURNS varchar AS $$
 select string_agg('"'||column_name||'"',',') FROM information_schema.columns  
 WHERE table_name = $1  and table_schema= $2 AND  column_name NOT IN('rowid');
$$ LANGUAGE SQL;

Set up and test Oracle Instant Client

Follow the instructions in the corresponding section in Migration Oracle → PostgreSQL.

Set up oracle_fdw and connect PostgreSQL to Oracle

Follow the instructions in the corresponding section in Migration Oracle → PostgreSQL.

Check PostgreSQL for custom TABLES and create them in Oracle

We compare the custom PostgreSQL schema with default Oracle schema. You can exclude other tables in the list:

CODE

psql -d corpus -U corpus
SELECT table_name FROM information_schema.tables WHERE table_schema='public' and table_name 
not in ('pg_stat_statements') 
EXCEPT
SELECT table_name FROM information_schema.tables WHERE table_schema='corpus';

.. create them manually in Oracle. Then check again.

Collect custom COLUMNS in PostgreSQL

Execute the following commands:

CODE

psql -d corpus -U corpus
drop table public.columnstmp;
create table public.columnstmp as (
SELECT table_name, column_name FROM information_schema.columns WHERE table_schema='public' 
and column_name <> 'rowid' and table_name not in ('pg_stat_statements')
EXCEPT
SELECT table_name, column_name FROM information_schema.columns WHERE table_schema='corpus' 
order by 1,2
);

# Get also the data types for these columns
alter table public.columnstmp add data_type varchar;
alter table public.columnstmp add is_nullable varchar;
alter table public.columnstmp add character_maximum_length integer;
alter table public.columnstmp add numeric_precision integer;

UPDATE public.columnstmp SET 
data_type = c.data_type, 
is_nullable=c.is_nullable,
character_maximum_length=c.character_maximum_length,
numeric_precision=c.numeric_precision
FROM information_schema.columns c
WHERE c.table_name = columnstmp.table_name and c.column_name = columnstmp.column_name;

# transfer the custom collumns to Oracle
insert into corpus.columnstmp select * from public.columnstmp;

Create custom COLUMNS in Oracle

Execute the following commands:

CODE

# create the missing columns in Oracle

sqlplus corpus/corpus
-- set serveroutput on size 30000;
BEGIN
FOR lst IN (select * from columnstmp order by 1)
LOOP
execute immediate 'alter table '|| lst.table_name ||' ADD ('|| 
lst.column_name||' '|| 
CASE lst.data_type 
WHEN 'smallint' THEN 'NUMBER(6)' 
WHEN 'numeric' THEN 'NUMBER('||COALESCE(lst.numeric_precision,38)||')' 
WHEN 'character varying' THEN 'varchar2('||COALESCE(lst.character_maximum_length,4000)||')' 
WHEN 'timestamp without time zone' THEN 'TIMESTAMP(6)' 
END ||' '||
CASE lst.is_nullable 
WHEN 'YES' THEN 'NULL' 
WHEN 'NO' THEN 'NOT NULL' 
END ||')';
END LOOP;
END;
/


# recreate and import the schema again
psql -d corpus -U corpus
drop schema corpus cascade;
CREATE SCHEMA corpus;
IMPORT FOREIGN SCHEMA "CORPUS" FROM SERVER oradb INTO CORPUS;


# check again for differences
SELECT table_name, column_name FROM information_schema.columns WHERE table_schema='public' 
and column_name <> 'rowid' and table_name not in ('pg_stat_statements')
EXCEPT
SELECT table_name, column_name FROM information_schema.columns WHERE table_schema='corpus' 
order by 1,2;

Compare the Oracle database schemas (optional)

Do this only if this database was running on Oracle before the migration to PostgreSQL and if you still have access to this database.

CODE

# get the BEFORE migration to Postgres state by executing this the OLD oracle DB
sqlplus corpus/corpus
set lines 200 pages 3000
col table_name format a30
col column_name format a30
col nullable format a1
col data_type format a15
spool BEFORE-db.txt
select table_name,column_name, nullable, data_type,data_length,data_precision from user_tab_cols order by 1,2;
spool off

# get the AFTER migration to Postgres state by executing this the NEW oracle DB
.. do the same, but change this line
spool AFTER-db.txt



# copy to some linux VM, make a fullscreen and compare
diff -y -W 230 AFTER-db.txt BEFORE-db.txt | less -S

Push the data from PostgreSQL to Oracle

Execute the following commands:

CODE

-- set the prefetch size 
DO $$
DECLARE 
c record;
BEGIN
FOR c IN SELECT table_name FROM information_schema.tables WHERE table_schema='public' and 
table_name not in ('pg_stat_statements') 
LOOP
EXECUTE 'alter foreign table corpus.' || c.table_name || ' OPTIONS (prefetch ''10240'');';
END LOOP;
END$$;

-- create a help table
create table public.columns_list as 
select table_name, get_columns(table_name,'public') FROM information_schema.tables WHERE table_schema='public';

DO $$
DECLARE 
c record;
v timestamp;
BEGIN
FOR c IN SELECT * FROM columns_list where table_name not in ('pg_stat_statements') order by table_name desc
LOOP
select timeofday() into v;
RAISE NOTICE '% BEGIN processing table %', v, c.table_name;
EXECUTE 'insert into corpus.'|| c.table_name ||' (' || c.get_columns || ') select '|| 
c.get_columns || ' FROM public.'|| c.table_name;
END LOOP;
END$$;

-- collect statistics on Oracle
exec dbms_stats.gather_schema_stats('CORPUS',no_invalidate=>false,cascade=>true,degree=>4);

--- create indexes, constraints etc.
sqlplus corpus/corpus
@create-db-REST.sql