Skip to content

环境介绍

版本信息

  • centos7

  • postgres 14.4

  • citus 110-2

安装步骤

  • 安装

  • 配置

sudo pg_conftool 14 main set wal_level logical
sudo pg_conftool 14 main set listen_addresses '*'
sudo pg_conftool 14 main set shared_preload_libraries citus
sudo vi /etc/postgresql/14/main/pg_hba.conf
  • 创建数据库免密码登录

-- Edit .pgpass in the postgres user’s home directory, hostname:port:database:username:password

节点信息

nodenanme IP role
master01 10.10.20.11 coordinator
worker01 10.10.2.12 worker
worker02 10.10.2.14 worker

搭建集群

创建database&extension

在每个worker节点上执行

CREATE DATABASE newbie;
\c newbie
CREATE EXTENSION citus;

在master接节点上执行

CREATE DATABASE newbie;
\c newbie
CREATE EXTENSION citus;
SELECT citus_set_coordinator_host('10.10.2.11', 5432);
#添加worker节点
SELECT * from citus_add_node('10.10.2.12', 5432);
SELECT * from citus_add_node('10.10.2.14', 5432);
-- ... for all of them

查看集群节点

SELECT * FROM citus_get_active_worker_nodes();
 node_name  | node_port 
------------+-----------
 10.10.2.12 |      5432
 10.10.2.14 |      5432
(2 rows)

查看节点分表

select * from pg_dist_node;
 nodeid | groupid |  nodename  | nodeport | noderack | hasmetadata | isactive | noderole | nodecluster | metadatasynced | shouldhaveshards 
--------+---------+------------+----------+----------+-------------+----------+----------+-------------+----------------+------------------
      1 |       0 | 10.10.2.11 |     5432 | default  | t           | t        | primary  | default     | t              | f
      3 |       2 | 10.10.2.12 |     5432 | default  | t           | t        | primary  | default     | t              | t
      4 |       3 | 10.10.2.14 |     5432 | default  | t           | t        | primary  | default     | t              | t

表管理

表类型

  • 本地表
  • 参考表
  • 分布表

本地表

与传统表使用方式一致,数据只存放在master节点

参考表

每个节点(master , worker)包含一份表的所有数据,对表的DML采用2pc。适用于存放业务元数据,便于与分布表联合查询使用

分布表

根据分布键(通常为表的指定列),将数据分布到每个worker节点中。每个worker节点包含表的部分数据内容

查看表信息

-- 包括参考表和分布表
select * from citus_tables ;

-- 包括所有类型的表及分布信息
select * from citus_shards ;

--- 查看库分布表size
SELECT logicalrelid AS name,
       pg_size_pretty(citus_table_size(logicalrelid)) AS size
  FROM pg_dist_partition where name = '$tablename';

--- 查看分布表分布在每个node上的size 
select table_name, nodename as node_name,round(sum(shard_size)*100.0/citus_table_size(table_name),2) percent, pg_size_pretty(sum(shard_size)) as table_size_node,pg_size_pretty(citus_table_size(table_name)) AS table_size from citus_shards where citus_table_type = 'distributed' group by nodename , table_name;

          table_name           | node_name  | percent | table_size_node | table_size
-------------------------------+------------+---------+-----------------+------------
mydistable | 10.10.2.12 |   32.60 | 1424 kB         | 4368 kB
mydistable | 10.10.2.14 |   60.81 | 2656 kB         | 4368 kB
(2 rows)

表管理

参考表创建

SELECT create_reference_table('tablename');

分布表创建

```istributed relations cannot have UNIQUE, EXCLUDE, or PRIMARY KEY constraints that do not include the partition column (with an equality operator if EXCLUDE) SELECT create_distributed_table('tablename', 'column');


####   注意事项 

`distributed relations cannot have UNIQUE, EXCLUDE, or PRIMARY KEY constraints that do not include the partition column (with an equality operator if EXCLUDE)`

#### 亲和性

SELECT create_distributed_table('github_events', 'repo_id', colocate_with => 'github_repo');


#### 更新表亲和性

SELECT update_distributed_table_colocation('A', colocate_with => 'B');


#### 分片数量

show citus.shard_count; set citus.shard_count = 64;


#### 查看默认分布策略

````
SELECT * FROM pg_dist_rebalance_strategy;
````

#### 设置分布策略

SELECT citus_set_default_rebalance_strategy('by_disk_size');


#### 进度查看

SELECT * FROM get_rebalance_progress();


#### 删除本地数据

-- 在将普通表转化化为分布表或参考表后,清空本地数据,待测试 SELECT truncate_local_data_after_distributing_table('tablename')


### 恢复表为本地表

select undistribute_table('table_name') select undistribute_table('table_name',cascade_via_foreign_keys=>true); # 危险操作,注意所有表关联关系




## 函数管理

### 自定义函数

CREATE OR REPLACE FUNCTION delete_campaign(company_id int, campaign_id int) RETURNS void LANGUAGE plpgsql AS $fn$ BEGIN DELETE FROM campaigns WHERE id = $2 AND campaigns.company_id = $1; DELETE FROM ads WHERE ads.campaign_id = $2 AND ads.company_id = $1; END; $fn$;


### 自定义函数下推

SELECT create_distributed_function( 'delete_campaign(int, int)', 'company_id', colocate_with := 'campaigns' );


### 查看执行计划,查看全部的task

SET citus.explain_all_tasks = 1;


# 高级特性

## 重新分布

加入删除节点时,不停服数据迁移

rebalance_table_shards() #所有 rebalance_table_shards('tabename') #一个表


## 租户隔离

大租户单独分配,独享worker资源

创建分配

-- 根据租户ID隔离的分片 -- 返回新的shard id。 SELECT isolate_tenant_to_new_shard('table_name', tenant_id); SELECT isolate_tenant_to_new_shard('table_name', tenant_id,'CASCADE'); │ isolate_tenant_to_new_shard │ ├─────────────────────────────┤ │ 102240 │


迁移分片

SELECT nodename, nodeport FROM citus_shards WHERE shardid = 102240;

-- 列出可能持有该分片的可用工作节点 SELECT * FROM master_get_active_worker_nodes();

-- 将分片移动到你选择的WORK节点上 --(它也会移动任何用CASCADE选项创建的分片)。 SELECT citus_move_shard_placement( 102240, 'source_host', source_port, 'dest_host', dest_port);


## 时序数据分表管理

-- 自动创建分区表 SELECT create_time_partitions( table_name := 'github_events', partition_interval := '1 month', end_at := now() + '12 months' );


-- 查看分区表 SELECT partition FROM time_partitions WHERE parent_table = 'github_events'::regclass;




## 归档数据列存

--转化为列存 CALL alter_old_partitions_set_access_method( 'github_columnar_events', '2015-01-01 06:00:00' / older_than /, 'columnar' );


-- 查看表的存储方式 SELECT partition, access_method FROM time_partitions WHERE parent_table = 'github_columnar_events'::regclass;


# 读写分离

-- 加入数据库从节点 select * from citus_add_secondary_node('new-node', 12345, 'primary-node', 12345);




-- 开启读写分离 citus.use_secondary_nodes never: (default) All reads happen on primary nodes. always: Reads run against secondary nodes instead, and insert/update statements are disabled.


# 节点管理

## 节点查看

select * from pg_dist_node; nodeid | groupid | nodename | nodeport | noderack | hasmetadata | isactive | noderole | nodecluster | metadatasynced | shouldhaveshards --------+---------+------------+----------+----------+-------------+----------+----------+-------------+----------------+------------------ 1 | 0 | 10.10.2.11 | 5432 | default | t | t | primary | default | t | f 3 | 2 | 10.10.2.12 | 5432 | default | t | t | primary | default | t | t 4 | 3 | 10.10.2.14 | 5432 | default | t | t | primary | default | t | t


## 删除节点

-- 删除一个节点 SELECT * from citus_drain_node('10.0.0.1', 5432);


-- 删除多个节点

在每个节点上执行

SELECT * FROM citus_set_node_property(node_hostname, node_port, 'shouldhaveshards', false);

SELECT * FROM rebalance_table_shards(drain_only := true);


## 更新节点 

select * from citus_update_node(123, 'new-address', 5432);


## 加入备用节点

select * from citus_add_inactive_node('new-node', 12345);


## 激活备用节点

select * from citus_activate_node('new-node', 12345);


# 集群健康管理

SELECT * FROM citus_check_cluster_node_health();


# 高可用管理

select * from citus_add_secondary_node('new-node', 12345, 'primary-node', 12345);


select * from citus_update_node(123, 'new-address', 5432); ```