安装:
# tar -zxvf pg_bulkload-3.1.6.tar.gz# cd pg_bulkload-3.1.6# . /home/postgres/.bash_profile# make# make install引入扩展:
$ psql psql (9.3.4)Type "help" for help.postgres=# create database test;CREATE DATABASEpostgres=# \c testYou are now connected to database "test" as user "postgres".test=# create extension pg_bulkload ;CREATE EXTENSIONtest=# \dx List of installed extensions Name | Version | Schema | Description -------------+---------+------------+----------------------------------------------------------------- pg_bulkload | 1.0 | public | pg_bulkload is a high speed data loading utility for PostgreSQL plpgsql | 1.0 | pg_catalog | PL/pgSQL procedural language(2 rows)test=# \df List of functions Schema | Name | Result data type | Argument data types | Type --------+-------------+------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+-------- public | pg_bulkload | record | options text[], OUT skip bigint, OUT count bigint, OUT parse_errors bigint, OUT duplicate_new bigint, OUT duplicate_old bigint, OUT system_time double precision, OUT user_time double precision, OUT duration double precision | normal(1 row)创造测试数据:
test=# create table t1(id int,name text);CREATE TABLEtest=# insert into t1 select generate_series(1,50000000),'HighGo';INSERT 0 50000000test=# \d+ List of relations Schema | Name | Type | Owner | Size | Description --------+------+-------+----------+---------+------------- public | t1 | table | postgres | 2112 MB | (1 row)test=# copy t1 to '/opt/pg93/data/t1.csv' with(format 'csv');COPY 50000000
$ du -sh /opt/pg93/data/t1.csv 754M /opt/pg93/data/t1.csvcopy方式载入数据:
(with logged):
test=# truncate t1;TRUNCATE TABLEtest=# \timing Timing is on.test=# test=# copy t1 from '/opt/pg93/data/t1.csv' with(format 'csv');COPY 50000000Time: 139038.099 ms
(without logged):
test=# truncate t1;TRUNCATE TABLE修改t1表为unloggedtest=# update pg_class set relpersistence='u' where relname='t1';UPDATE 1test=# copy t1 from '/opt/pg93/data/t1.csv' with(format 'csv');COPY 50000000Time: 110796.480 ms
pg_blukload载入数据:
(without logged):
$ pg_bulkload -i /opt/pg93/data/t1.csv -O t1 -l t1_bulkload.log -o "TYPE=CSV" -o "WRITER=PARALLEL" -d testNOTICE: BULK LOAD STARTNOTICE: BULK LOAD END 0 Rows skipped. 50000000 Rows successfully loaded. 0 Rows not loaded due to parse errors. 0 Rows not loaded due to duplicate errors. 0 Rows replaced with new rows.Run began on 2014-06-16 05:53:45.025377+08Run ended on 2014-06-16 05:55:00.625057+08CPU 2.80s/33.00u sec elapsed 75.60 sec
$ pg_bulkload -i /opt/pg93/data/t1.csv -O t1 -l t1_bulkload.log -o "TYPE=CSV" -o "WRITER=DIRECT" -d testNOTICE: BULK LOAD STARTNOTICE: BULK LOAD END 0 Rows skipped. 50000000 Rows successfully loaded. 0 Rows not loaded due to parse errors. 0 Rows not loaded due to duplicate errors. 0 Rows replaced with new rows.Run began on 2014-06-16 06:05:40.267198+08Run ended on 2014-06-16 06:07:05.08921+08CPU 6.88s/34.25u sec elapsed 84.82 sec(with logged):
test=# truncate t1;TRUNCATE TABLE修改t1表为loggedtest=# update pg_class set relpersistence='p' where relname='t1';UPDATE 1
$ pg_bulkload -i /opt/pg93/data/t1.csv -O t1 -l t1_bulkload.log -o "TYPE=CSV" -o "WRITER=PARALLEL" -d testNOTICE: BULK LOAD STARTNOTICE: BULK LOAD END 0 Rows skipped. 50000000 Rows successfully loaded. 0 Rows not loaded due to parse errors. 0 Rows not loaded due to duplicate errors. 0 Rows replaced with new rows.Run began on 2014-06-16 05:57:05.620751+08Run ended on 2014-06-16 05:58:20.458029+08CPU 2.80s/33.02u sec elapsed 74.84 sec $ pg_bulkload -i /opt/pg93/data/t1.csv -O t1 -l t1_bulkload.log -o "TYPE=CSV" -o "WRITER=DIRECT" -d testNOTICE: BULK LOAD STARTNOTICE: BULK LOAD END 0 Rows skipped. 50000000 Rows successfully loaded. 0 Rows not loaded due to parse errors. 0 Rows not loaded due to duplicate errors. 0 Rows replaced with new rows.Run began on 2014-06-16 06:02:10.33344+08Run ended on 2014-06-16 06:03:36.986382+08CPU 7.15s/34.93u sec elapsed 86.65 sec
pg_bulkload执行过程中查看进程和连接:
(WRITER=PARALLEL)$ ps -ef | grep postpostgres 24044 22690 0 05:53 pts/2 00:00:00 pg_bulkload -i /opt/pg93/data/t1.csv -O t1 -l t1_bulkload.log -o TYPE=CSV -o WRITER=PARALLEL -d testpostgres 24045 2236 48 05:53 ? 00:00:04 postgres: postgres test [local] SELECTpostgres 24046 2236 20 05:53 ? 00:00:02 postgres: postgres test [local] SELECT
postgres=# select datname,application_name,query from pg_stat_activity; datname | application_name | query ----------+------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- postgres | psql | select datname,application_name,query from pg_stat_activity; test | pg_bulkload | SELECT * FROM pg_bulkload($1) test | | SELECT * FROM pg_bulkload(ARRAY['TYPE=TUPLE','INPUT=' || $1,'WRITER=DIRECT','OUTPUT=' || $2,'ON_DUPLICATE_KEEP=' || $3,'DUPLICATE_ERRORS=' || $4,'DUPLICATE_BADFILE=' || $5,'LOGFILE=' || $6,'VERBOSE=' || $7,'TRUNCATE=' || $8])(3 rows)(WRITER=PARALLEL)
$ ps -ef | grep postpostgres 24124 22690 0 06:02 pts/2 00:00:00 pg_bulkload -i /opt/pg93/data/t1.csv -O t1 -l t1_bulkload.log -o TYPE=CSV -o WRITER=DIRECT -d testpostgres 24125 2236 49 06:02 ? 00:00:13 postgres: postgres test [local] SELECT
test=# select datname,application_name,query from pg_stat_activity; datname | application_name | query ---------+------------------+-------------------------------------------------------------- test | pg_bulkload | SELECT * FROM pg_bulkload($1) test | psql | select datname,application_name,query from pg_stat_activity;(2 rows)