关于数据同步:otter数据增量同步

otter介绍

原理形容:

  1. 基于Canal开源产品,获取数据库增量日志数据
  2. 典型管理系统架构,manager(web治理)+node(工作节点)

    manager运行时推送同步配置到node节点

    node节点将同步状态反馈到manager上

  3. 基于zookeeper,解决分布式状态调度的,容许多node节点之间协同工作.

mysql数据库中自带的复制技术可分成三步:

  1. master将扭转记录到二进制日志(binary log)中(这些记录叫做二进制日志事件,binary log events,能够通过show binlog events进行查看)
  2. slave将master的binary log events拷贝到它的中继日志(relay log),这里是I/O thread线程
  3. slave重做中继日志中的事件,将扭转反映它本人的数据,这里是SQL thread线程

基于canal&otter的复制技术和mysql复制相似,具备类比性.

  1. Canal对应于I/O thread,接管Master Binary Log.
  2. Otter对应于SQL thread,通过Canal获取Binary Log数据,执行同步插入数据库.

两者的区别在于:

  1. otter目前嵌入式依赖canal,部署为同一个jvm,目前设计为不产生Relay Log,数据不落地.
  2. otter目前容许自定义同步逻辑,解决各类需要.
    a. ETL转化. 比方Slave上指标表的表名,字段名,字段类型不同,字段个数不同等.
    b. 异构数据库. 比方Slave能够是oracle或者其余类型的存储,nosql等.
    c. M-M部署,解决数据一致性问题
    d. 基于manager部署,不便监控同步状态和治理同步工作.

部署manager

官网下载 manager.deployer-4.2.18.tar.gz,otter-otter-4.2.18.zip

https://github.com/alibaba/ot…

初始化otter库

解压包otter-otter-4.2.18.zip,找到数据库初始化脚本

otter-otter-4.2.18\manager\deployer\src\main\resources\sql\otter-manager-schema.sql

新建数据库otter,执行初始化语句otter-manager-schema.sql

CREATE DATABASE /*!32312 IF NOT EXISTS*/ `otter` /*!40100 DEFAULT CHARACTER SET utf8 COLLATE utf8_bin */;

USE `otter`;

SET sql_mode='ONLY_FULL_GROUP_BY,STRICT_TRANS_TABLES,ERROR_FOR_DIVISION_BY_ZERO,NO_AUTO_CREATE_USER,NO_ENGINE_SUBSTITUTION';

CREATE TABLE `ALARM_RULE` (
  `ID` bigint(20) unsigned NOT NULL AUTO_INCREMENT,
  `MONITOR_NAME` varchar(1024) DEFAULT NULL,
  `RECEIVER_KEY` varchar(1024) DEFAULT NULL,
  `STATUS` varchar(32) DEFAULT NULL,
  `PIPELINE_ID` bigint(20) NOT NULL,
  `DESCRIPTION` varchar(256) DEFAULT NULL,
  `GMT_CREATE` timestamp NOT NULL DEFAULT '0000-00-00 00:00:00',
  `GMT_MODIFIED` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
  `MATCH_VALUE` varchar(1024) DEFAULT NULL,
  `PARAMETERS` text DEFAULT NULL,
  PRIMARY KEY (`ID`)
) ENGINE=InnoDB AUTO_INCREMENT=1 DEFAULT CHARSET=utf8;

CREATE TABLE `AUTOKEEPER_CLUSTER` (
  `ID` bigint(20) NOT NULL AUTO_INCREMENT,
  `CLUSTER_NAME` varchar(200) NOT NULL,
  `SERVER_LIST` varchar(1024) NOT NULL,
  `DESCRIPTION` varchar(200) DEFAULT NULL,
  `GMT_CREATE` timestamp NOT NULL DEFAULT '0000-00-00 00:00:00',
  `GMT_MODIFIED` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
  PRIMARY KEY (`ID`)
) ENGINE=InnoDB AUTO_INCREMENT=1 DEFAULT CHARSET=utf8;

CREATE TABLE `CANAL` (
  `ID` bigint(20) unsigned NOT NULL AUTO_INCREMENT,
  `NAME` varchar(200) DEFAULT NULL,
  `DESCRIPTION` varchar(200) DEFAULT NULL,
  `PARAMETERS` text DEFAULT NULL,
  `GMT_CREATE` timestamp NOT NULL DEFAULT '0000-00-00 00:00:00',
  `GMT_MODIFIED` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
  PRIMARY KEY (`ID`),
  UNIQUE KEY `CANALUNIQUE` (`NAME`)
) ENGINE=InnoDB AUTO_INCREMENT=1 DEFAULT CHARSET=utf8;

CREATE TABLE `CHANNEL` (
  `ID` bigint(20) NOT NULL AUTO_INCREMENT,
  `NAME` varchar(200) NOT NULL,
  `DESCRIPTION` varchar(200) DEFAULT NULL,
  `PARAMETERS` text DEFAULT NULL,
  `GMT_CREATE` timestamp NOT NULL DEFAULT '0000-00-00 00:00:00',
  `GMT_MODIFIED` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
  PRIMARY KEY (`ID`),
  UNIQUE KEY `CHANNELUNIQUE` (`NAME`)
) ENGINE=InnoDB AUTO_INCREMENT=1 DEFAULT CHARSET=utf8;

CREATE TABLE `COLUMN_PAIR` (
  `ID` bigint(20) NOT NULL AUTO_INCREMENT,
  `SOURCE_COLUMN` varchar(200) DEFAULT NULL,
  `TARGET_COLUMN` varchar(200) DEFAULT NULL,
  `DATA_MEDIA_PAIR_ID` bigint(20) NOT NULL,
  `GMT_CREATE` timestamp NOT NULL DEFAULT '0000-00-00 00:00:00',
  `GMT_MODIFIED` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
  PRIMARY KEY (`ID`),
  KEY `idx_DATA_MEDIA_PAIR_ID` (`DATA_MEDIA_PAIR_ID`)
) ENGINE=InnoDB AUTO_INCREMENT=1 DEFAULT CHARSET=utf8;

CREATE TABLE `COLUMN_PAIR_GROUP` (
  `ID` bigint(20) NOT NULL AUTO_INCREMENT,
  `DATA_MEDIA_PAIR_ID` bigint(20) NOT NULL,
  `COLUMN_PAIR_CONTENT` text DEFAULT NULL,
  `GMT_CREATE` timestamp NOT NULL DEFAULT '0000-00-00 00:00:00',
  `GMT_MODIFIED` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
  PRIMARY KEY (`ID`),
  KEY `idx_DATA_MEDIA_PAIR_ID` (`DATA_MEDIA_PAIR_ID`)
) ENGINE=InnoDB AUTO_INCREMENT=1 DEFAULT CHARSET=utf8;

CREATE TABLE `DATA_MEDIA` (
  `ID` bigint(20) NOT NULL AUTO_INCREMENT,
  `NAME` varchar(200) NOT NULL,
  `NAMESPACE` varchar(200) NOT NULL,
  `PROPERTIES` varchar(1000) NOT NULL,
  `DATA_MEDIA_SOURCE_ID` bigint(20) NOT NULL,
  `GMT_CREATE` timestamp NOT NULL DEFAULT '0000-00-00 00:00:00',
  `GMT_MODIFIED` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
  PRIMARY KEY (`ID`),
  UNIQUE KEY `DATAMEDIAUNIQUE` (`NAME`,`NAMESPACE`,`DATA_MEDIA_SOURCE_ID`)
) ENGINE=InnoDB AUTO_INCREMENT=1 DEFAULT CHARSET=utf8;

CREATE TABLE `DATA_MEDIA_PAIR` (
  `ID` bigint(20) NOT NULL AUTO_INCREMENT,
  `PULLWEIGHT` bigint(20) DEFAULT NULL,
  `PUSHWEIGHT` bigint(20) DEFAULT NULL,
  `RESOLVER` text DEFAULT NULL,
  `FILTER` text DEFAULT NULL,
  `SOURCE_DATA_MEDIA_ID` bigint(20) DEFAULT NULL,
  `TARGET_DATA_MEDIA_ID` bigint(20) DEFAULT NULL,
  `PIPELINE_ID` bigint(20) NOT NULL,
  `COLUMN_PAIR_MODE` varchar(20) DEFAULT NULL,
  `GMT_CREATE` timestamp NOT NULL DEFAULT '0000-00-00 00:00:00',
  `GMT_MODIFIED` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
  PRIMARY KEY (`ID`),
  KEY `idx_PipelineID` (`PIPELINE_ID`,`ID`)
) ENGINE=InnoDB AUTO_INCREMENT=1 DEFAULT CHARSET=utf8;

CREATE TABLE `DATA_MEDIA_SOURCE` (
  `ID` bigint(20) NOT NULL AUTO_INCREMENT,
  `NAME` varchar(200) NOT NULL,
  `TYPE` varchar(20) NOT NULL,
  `PROPERTIES` varchar(1000) NOT NULL,
  `GMT_CREATE` timestamp NOT NULL DEFAULT '0000-00-00 00:00:00',
  `GMT_MODIFIED` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
  PRIMARY KEY (`ID`),
  UNIQUE KEY `DATAMEDIASOURCEUNIQUE` (`NAME`)
) ENGINE=InnoDB AUTO_INCREMENT=1 DEFAULT CHARSET=utf8;

CREATE TABLE `DELAY_STAT` (
  `ID` bigint(20) NOT NULL AUTO_INCREMENT,
  `DELAY_TIME` bigint(20) NOT NULL,
  `DELAY_NUMBER` bigint(20) NOT NULL,
  `PIPELINE_ID` bigint(20) NOT NULL,
  `GMT_CREATE` timestamp NOT NULL DEFAULT '0000-00-00 00:00:00',
  `GMT_MODIFIED` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
  PRIMARY KEY (`ID`),
  KEY `idx_PipelineID_GmtModified_ID` (`PIPELINE_ID`,`GMT_MODIFIED`,`ID`),
  KEY `idx_Pipeline_GmtCreate` (`PIPELINE_ID`,`GMT_CREATE`),
  KEY `idx_GmtCreate_id` (`GMT_CREATE`,`ID`)
) ENGINE=InnoDB AUTO_INCREMENT=1 DEFAULT CHARSET=utf8;

CREATE TABLE `LOG_RECORD` (
  `ID` bigint(20) NOT NULL AUTO_INCREMENT,
  `NID` varchar(200) DEFAULT NULL,
  `CHANNEL_ID` varchar(200) NOT NULL,
  `PIPELINE_ID` varchar(200) NOT NULL,
  `TITLE` varchar(1000) DEFAULT NULL,
  `MESSAGE` text DEFAULT NULL,
  `GMT_CREATE` timestamp NOT NULL DEFAULT '0000-00-00 00:00:00',
  `GMT_MODIFIED` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
  PRIMARY KEY (`ID`),
  KEY `logRecord_pipelineId` (`PIPELINE_ID`)
) ENGINE=InnoDB AUTO_INCREMENT=1 DEFAULT CHARSET=utf8;

CREATE TABLE `NODE` (
  `ID` bigint(20) NOT NULL AUTO_INCREMENT,
  `NAME` varchar(200) NOT NULL,
  `IP` varchar(200) NOT NULL,
  `PORT` bigint(20) NOT NULL,
  `DESCRIPTION` varchar(200) DEFAULT NULL,
  `PARAMETERS` text DEFAULT NULL,
  `GMT_CREATE` timestamp NOT NULL DEFAULT '0000-00-00 00:00:00',
  `GMT_MODIFIED` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
  PRIMARY KEY (`ID`),
  UNIQUE KEY `NODEUNIQUE` (`NAME`,`IP`)
) ENGINE=InnoDB AUTO_INCREMENT=1 DEFAULT CHARSET=utf8;

CREATE TABLE `PIPELINE` (
  `ID` bigint(20) NOT NULL AUTO_INCREMENT,
  `NAME` varchar(200) NOT NULL,
  `DESCRIPTION` varchar(200) DEFAULT NULL,
  `PARAMETERS` text DEFAULT NULL,
  `CHANNEL_ID` bigint(20) NOT NULL,
  `GMT_CREATE` timestamp NOT NULL DEFAULT '0000-00-00 00:00:00',
  `GMT_MODIFIED` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
  PRIMARY KEY (`ID`),
  UNIQUE KEY `PIPELINEUNIQUE` (`NAME`,`CHANNEL_ID`),
  KEY `idx_ChannelID` (`CHANNEL_ID`,`ID`)
) ENGINE=InnoDB AUTO_INCREMENT=1 DEFAULT CHARSET=utf8;

CREATE TABLE `PIPELINE_NODE_RELATION` (
  `ID` bigint(20) NOT NULL AUTO_INCREMENT,
  `NODE_ID` bigint(20) NOT NULL,
  `PIPELINE_ID` bigint(20) NOT NULL,
  `LOCATION` varchar(20) NOT NULL,
  `GMT_CREATE` timestamp NOT NULL DEFAULT '0000-00-00 00:00:00',
  `GMT_MODIFIED` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
  PRIMARY KEY (`ID`),
  KEY `idx_PipelineID` (`PIPELINE_ID`)
) ENGINE=InnoDB AUTO_INCREMENT=1 DEFAULT CHARSET=utf8;

CREATE TABLE `SYSTEM_PARAMETER` (
  `ID` bigint(20) unsigned NOT NULL,
  `VALUE` text DEFAULT NULL,
  `GMT_CREATE` timestamp NOT NULL DEFAULT '0000-00-00 00:00:00',
  `GMT_MODIFIED` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
  PRIMARY KEY (`ID`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8;

CREATE TABLE `TABLE_HISTORY_STAT` (
  `ID` bigint(20) unsigned NOT NULL AUTO_INCREMENT,
  `FILE_SIZE` bigint(20) DEFAULT NULL,
  `FILE_COUNT` bigint(20) DEFAULT NULL,
  `INSERT_COUNT` bigint(20) DEFAULT NULL,
  `UPDATE_COUNT` bigint(20) DEFAULT NULL,
  `DELETE_COUNT` bigint(20) DEFAULT NULL,
  `DATA_MEDIA_PAIR_ID` bigint(20) DEFAULT NULL,
  `PIPELINE_ID` bigint(20) DEFAULT NULL,
  `START_TIME` timestamp NOT NULL DEFAULT '0000-00-00 00:00:00',
  `END_TIME` timestamp NOT NULL DEFAULT '0000-00-00 00:00:00',
  `GMT_CREATE` timestamp NOT NULL DEFAULT '0000-00-00 00:00:00',
  `GMT_MODIFIED` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
  PRIMARY KEY (`ID`),
  KEY `idx_DATA_MEDIA_PAIR_ID_END_TIME` (`DATA_MEDIA_PAIR_ID`,`END_TIME`),
  KEY `idx_GmtCreate_id` (`GMT_CREATE`,`ID`)
) ENGINE=InnoDB AUTO_INCREMENT=1 DEFAULT CHARSET=utf8;

CREATE TABLE `TABLE_STAT` (
  `ID` bigint(20) NOT NULL AUTO_INCREMENT,
  `FILE_SIZE` bigint(20) NOT NULL,
  `FILE_COUNT` bigint(20) NOT NULL,
  `INSERT_COUNT` bigint(20) NOT NULL,
  `UPDATE_COUNT` bigint(20) NOT NULL,
  `DELETE_COUNT` bigint(20) NOT NULL,
  `DATA_MEDIA_PAIR_ID` bigint(20) NOT NULL,
  `PIPELINE_ID` bigint(20) NOT NULL,
  `GMT_CREATE` timestamp NOT NULL DEFAULT '0000-00-00 00:00:00',
  `GMT_MODIFIED` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
  PRIMARY KEY (`ID`),
  KEY `idx_PipelineID_DataMediaPairID` (`PIPELINE_ID`,`DATA_MEDIA_PAIR_ID`)
) ENGINE=InnoDB AUTO_INCREMENT=1 DEFAULT CHARSET=utf8;

CREATE TABLE `THROUGHPUT_STAT` (
  `ID` bigint(20) NOT NULL AUTO_INCREMENT,
  `TYPE` varchar(20) NOT NULL,
  `NUMBER` bigint(20) NOT NULL,
  `SIZE` bigint(20) NOT NULL,
  `PIPELINE_ID` bigint(20) NOT NULL,
  `START_TIME` timestamp NOT NULL DEFAULT '0000-00-00 00:00:00',
  `END_TIME` timestamp NOT NULL DEFAULT '0000-00-00 00:00:00',
  `GMT_CREATE` timestamp NOT NULL DEFAULT '0000-00-00 00:00:00',
  `GMT_MODIFIED` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
  PRIMARY KEY (`ID`),
  KEY `idx_PipelineID_Type_GmtCreate_ID` (`PIPELINE_ID`,`TYPE`,`GMT_CREATE`,`ID`),
  KEY `idx_PipelineID_Type_EndTime_ID` (`PIPELINE_ID`,`TYPE`,`END_TIME`,`ID`),
  KEY `idx_GmtCreate_id` (`GMT_CREATE`,`ID`)
) ENGINE=InnoDB AUTO_INCREMENT=1 DEFAULT CHARSET=utf8;

CREATE TABLE `USER` (
  `ID` bigint(20) NOT NULL AUTO_INCREMENT,
  `USERNAME` varchar(20) NOT NULL,
  `PASSWORD` varchar(20) NOT NULL,
  `AUTHORIZETYPE` varchar(20) NOT NULL,
  `DEPARTMENT` varchar(20) NOT NULL,
  `REALNAME` varchar(20) NOT NULL,
  `GMT_CREATE` timestamp NOT NULL DEFAULT '0000-00-00 00:00:00',
  `GMT_MODIFIED` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
  PRIMARY KEY (`ID`),
  UNIQUE KEY `USERUNIQUE` (`USERNAME`)
) ENGINE=InnoDB AUTO_INCREMENT=1 DEFAULT CHARSET=utf8;

CREATE TABLE  `DATA_MATRIX` (
  `ID` bigint(20) NOT NULL AUTO_INCREMENT,
  `GROUP_KEY` varchar(200) DEFAULT NULL,
  `MASTER` varchar(200) DEFAULT NULL,
  `SLAVE` varchar(200) DEFAULT NULL,
  `DESCRIPTION` varchar(200) DEFAULT NULL,
  `GMT_CREATE` timestamp NOT NULL DEFAULT '0000-00-00 00:00:00',
  `GMT_MODIFIED` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
  PRIMARY KEY (`ID`),
  KEY `GROUPKEY` (`GROUP_KEY`)
) ENGINE=InnoDB AUTO_INCREMENT=1 DEFAULT CHARSET=utf8;

CREATE TABLE IF NOT EXISTS `meta_history` (
  `id` bigint(20) unsigned NOT NULL AUTO_INCREMENT COMMENT '主键',
  `gmt_create` datetime NOT NULL COMMENT '创立工夫',
  `gmt_modified` datetime NOT NULL COMMENT '批改工夫',
  `destination` varchar(128) DEFAULT NULL COMMENT '通道名称',
  `binlog_file` varchar(64) DEFAULT NULL COMMENT 'binlog文件名',
  `binlog_offest` bigint(20) DEFAULT NULL COMMENT 'binlog偏移量',
  `binlog_master_id` varchar(64) DEFAULT NULL COMMENT 'binlog节点id',
  `binlog_timestamp` bigint(20) DEFAULT NULL COMMENT 'binlog利用的工夫戳',
  `use_schema` varchar(1024) DEFAULT NULL COMMENT '执行sql时对应的schema',
  `sql_schema` varchar(1024) DEFAULT NULL COMMENT '对应的schema',
  `sql_table` varchar(1024) DEFAULT NULL COMMENT '对应的table',
  `sql_text` longtext DEFAULT NULL COMMENT '执行的sql',
  `sql_type` varchar(256) DEFAULT NULL COMMENT 'sql类型',
  `extra` text DEFAULT NULL COMMENT '额定的扩大信息',
  PRIMARY KEY (`id`),
  UNIQUE KEY binlog_file_offest(`destination`,`binlog_master_id`,`binlog_file`,`binlog_offest`),
  KEY `destination` (`destination`),
  KEY `destination_timestamp` (`destination`,`binlog_timestamp`),
  KEY `gmt_modified` (`gmt_modified`)
) ENGINE=InnoDB AUTO_INCREMENT=1 DEFAULT CHARSET=utf8 COMMENT='表构造变动明细表';

CREATE TABLE IF NOT EXISTS `meta_snapshot` (
  `id` bigint(20) unsigned NOT NULL AUTO_INCREMENT COMMENT '主键',
  `gmt_create` datetime NOT NULL COMMENT '创立工夫',
  `gmt_modified` datetime NOT NULL COMMENT '批改工夫',
  `destination` varchar(128) DEFAULT NULL COMMENT '通道名称',
  `binlog_file` varchar(64) DEFAULT NULL COMMENT 'binlog文件名',
  `binlog_offest` bigint(20) DEFAULT NULL COMMENT 'binlog偏移量',
  `binlog_master_id` varchar(64) DEFAULT NULL COMMENT 'binlog节点id',
  `binlog_timestamp` bigint(20) DEFAULT NULL COMMENT 'binlog利用的工夫戳',
  `data` longtext DEFAULT NULL COMMENT '表构造数据',
  `extra` text DEFAULT NULL COMMENT '额定的扩大信息',
  PRIMARY KEY (`id`),
  UNIQUE KEY binlog_file_offest(`destination`,`binlog_master_id`,`binlog_file`,`binlog_offest`),
  KEY `destination` (`destination`),
  KEY `destination_timestamp` (`destination`,`binlog_timestamp`),
  KEY `gmt_modified` (`gmt_modified`)
) ENGINE=InnoDB AUTO_INCREMENT=1 DEFAULT CHARSET=utf8 COMMENT='表构造记录表快照表';


insert into USER(ID,USERNAME,PASSWORD,AUTHORIZETYPE,DEPARTMENT,REALNAME,GMT_CREATE,GMT_MODIFIED) values(null,'admin','801fc357a5a74743894a','ADMIN','admin','admin',now(),now());
insert into USER(ID,USERNAME,PASSWORD,AUTHORIZETYPE,DEPARTMENT,REALNAME,GMT_CREATE,GMT_MODIFIED) values(null,'guest','471e02a154a2121dc577','OPERATOR','guest','guest',now(),now());

linux服务器新建文件夹/data/program/otter/manager,解压manager.deployer-4.2.18.tar.gz

批改配置conf/otter.properties

## otter manager domain name
otter.domainName = 192.168.8.122
## otter manager http port
otter.port = 8080
## jetty web config xml
otter.jetty = jetty.xml

## otter manager database config
otter.database.driver.class.name = com.mysql.jdbc.Driver
otter.database.driver.url = jdbc:mysql://192.168.8.122:3306/otter
otter.database.driver.username = xgjk_w
otter.database.driver.password = xgjk@2019

## otter communication port
otter.communication.manager.port = 1099

otter.zookeeper.cluster.default = 192.168.8.122:2181

启动manager服务

./bin/startup.sh

启动日志门路 logs/manager.log

拜访http://192.168.8.122:8080呈现otter的页面,即代表启动胜利

初始密码为:admin/admin,即可实现登录. 目前:匿名用户只有只读查看的权限,登录为管理员才能够有操作权限

部署node

实现manager装置后,须要在manager页面为node定义配置信息,并生一个惟一id.

配置zk信息

新增node信息

机器增加实现后,跳转到机器列表页面,获取对应的机器序号nid

通过这两部操作,获取到了node节点对应的惟一标示,称之为node id,简称:nid. 记录该nid,后续启动nid时会应用

开始部署node,新建文件夹/data/program/otter/manager,在文文件夹中解压node.deployer-4.2.18.tar.gz

#nid配置 (将环境筹备中增加机器后获取到的序号,保留到conf目录下的nid文件,比方我增加的机器对应序号为1)
echo 1 > conf/nid

otter.properties配置批改

## otter manager服务地址
otter.manager.address = 192.168.8.122:1099

启动 ./bin/startup.sh,可通过manager控制台查问node状态

启动日志门路 logs/node/node.log

otter表同步配置

mysql1 192.168.8.122:3307

mysql2 192.168.8.122:3308

把mysql1中的chenj321库中的user,stu表同步到 mysql2库中user,stu表

操作步骤:
1.  增加数据库
    a.  源库 jdbc:mysql://192.168.8.122:3307
    b.  指标库 jdbc:mysql://192.168.8.122:3308
2.  增加canal
    a.  提供数据库ip信息 
3.  增加同步表信息
    a.  源数据表 mysql1库中user,stu
    b.  指标数据表 mysql2库中user,stu
4.  增加channel
5.  增加pipeline
    a.  抉择node节点
    b.  抉择canal
6.  增加同步映射规定
    a.  定义源表和指标表的同步关系
7.  启动
8.  测试数据 

增加数据源

配置canal(获取数据库二进制日志)

参数信息

canal序号:    1
canal名称:    canal122
运行状态:    
运行模式:    EMBEDDED
Zookeeper集群:    zk122 [ 192.168.8.122:2181 ]
数据源类型:    MYSQL
数据库地址:    192.168.8.122:3307;
数据库帐号:    canal
connectionCharset:    UTF-8
是否启用gtid位点:    false
位点信息:    {"journalName":"mysql-bin.000011","position":4,"timestamp":0};
是否开启表构造TSDB:    false
rds accesskey:    
rds secretkey:    
rds instanceId:    
存储机制:    MEMORY
内存存储batch获取模式:    MEMSIZE
内存存储buffer记录数:    32768
内存存储buffer记录单元大小:    1024
HA机制:    HEARTBEAT
是否开启心跳:    false
meta机制:    MIXED
索引机制:    MEMORY_META_FAILBACK
服务端口:    11111
默认连贯超时:    
接管BufferSize:    16384
发送BufferSize:    16384
切换回退工夫:    60
过滤表达式:    
形容信息:    

阐明

connectionCharset ==> 获取binlog时指定的编码
位点自定义设置 ==> 格局:{"journalName":"","position":0,"timestamp":0};
指定地位:{"journalName":"","position":0};
指定工夫:{"timestamp":0};

otter中应用canal,不容许pipeline共享一个canal. otter中配置的canal即为一个instance,而otter就是为其一个client,目前canal不反对一个instance多个client的模式,会导致数据失落,谨慎

配置表信息

增加channel

进入该channel,新建一个Pipeline

在pipeline的高级设置中,能够敞开DDL同步,则表构造的批改不会同步

参数信息

Pipeline序号:    1
Pipeline名字:    docker122-mysql1-pipeline
Select机器:    node122;
Load机器:    node122;
并行度:    5
数据反查线程数:    10
数据载入线程数:    15
文件载入线程数:    15
主站点:    false
同步数据起源:    Canal
Canal名字:    canal122
主道生产批次大小:    6000
获取批次数据超时工夫:    -1
形容信息:    
是否显示高级设置:    
应用batch:    true
跳过Select异样:    敞开
跳过Load异样:    敞开
仲裁器调度模式:    主动抉择
负载平衡算法:    Stick
传输模式:    主动抉择
记录selector日志:    开启
记录selector具体日志:    敞开
记录load日志:    敞开
dryRun模式:    敞开
反对ddl同步:    敞开
跳过ddl异样:    敞开
文件反复同步比照:    敞开
文件传输加密:    敞开
启用公网同步:    敞开
跳过自在门数据:    敞开
跳过反查无记录数据:    敞开
启用数据表类型转化:    敞开
兼容字段新增同步:    开启
自定义同步标记:    

增加同步映射规定

参数信息

映射关系序号:    6
源端数据表:    Schema Name:chenj321 Table Name:stu Source Name:docker122-mysql1
指标数据表:    Schema Name:chenj321 Table Name:stu Source Name:docker122-mysql2
权重:    5
视图模式:    INCLUDE
Event Processor:    
File Resolver:    
字段同步:    [ id - id ] [ name - name ]
组合同步:    


映射关系序号:    5
源端数据表:    Schema Name:chenj321Table Name:userSource Name:docker122-mysql1
指标数据表:    Schema Name:chenj321Table Name:userSource Name:docker122-mysql2
权重:    5
视图模式:    INCLUDE
Event Processor:    
File Resolver:    
字段同步:    [ id - id ] [ name - name ] [ code - code ]
组合同步:    

映射规定配置阐明 https://github.com/alibaba/ot…

全副配置好之后,启动channel即可

表同步日志门路:node零碎中的 logs/1/1.log

otter同步出错后从新同步过程

首先须要了解一下canal的地位治理,次要有两个位点信息:起始地位 和 运行地位(记录最初一次失常生产的地位). 优先加载运行地位,第一次启动无运行地位,就会应用起始地位进行初始化,第一次客户端反馈了ack信号后,就会记录运行地位.

所以重置地位的几步操作:

1.删除运行地位. (pipeline同步进度页面配置)

2.配置canal从新同步的点位信息

{"journalName":"mysql-bin.000011","position":4,"timestamp":0};

otter目前存在的同步限度

暂不反对无主键表同步. (同步的表必须要有主键)

反对局部ddl同步 (反对create table / drop table / alter table / truncate table / rename table / create index / drop index,其余类型的暂不反对,比方grant,create user,trigger等等),同时ddl语句不反对幂等性操作,所以呈现反复同步时,会导致同步挂起,可通过配置高级参数:跳过ddl异样,来解决这个问题.

不反对带外键的记录同步. (数据载入算法会打散事务,进行并行处理,会导致外键束缚无奈满足)

数据库上trigger配置谨慎. (比方源库,有一张A表配置了trigger,将A表上的变动记录到B表中,而B表也须要同步。如果指标库也有这trigger,在同步时会插入一次A表,2次B表,因为A表的同步插入也会触发trigger插入一次B表,所以有2次B表同步)

otter全量数据同步

首先须要在源库中,须要减少一个retl库和一个retl用户,明码retl

CREATE DATABASE retl;
-- 创立用户
CREATE USER retl@'%' IDENTIFIED BY 'retl';

-- 用户受权 给同步用户受权
GRANT USAGE ON *.* TO `retl`@'%';
GRANT SELECT, REPLICATION SLAVE, REPLICATION CLIENT ON *.* TO `retl`@'%';
GRANT SELECT, INSERT, UPDATE, DELETE, EXECUTE ON `retl`.* TO `retl`@'%';
-- 业务表受权,这里能够限定只受权同步业务的表
GRANT SELECT, INSERT, UPDATE, DELETE ON *.* TO `retl`@'%'; 

在retl库中创立零碎表

CREATE TABLE retl_buffer 
   (    
    ID BIGINT AUTO_INCREMENT,   ## 无意义,自增即可
    TABLE_ID INT(11) NOT NULL,   ## tableId, 可通过该链接查问:http://otter.alibaba-inc.com/data_media_list.htm,即序号这一列,如果配置的是正则,须要指定full_name,以后table_id设置为0. 
    FULL_NAME varchar(512),  ## schemaName + '.' +  tableName  (如果明确指定了table_id,能够不必指定full_name)
    TYPE CHAR(1) NOT NULL,   ## I/U/D ,别离对应于insert/update/delete
    PK_DATA VARCHAR(256) NOT NULL, ## 多个pk之间应用char(1)进行分隔
    GMT_CREATE datetime NOT NULL DEFAULT CURRENT_TIMESTAMP, ## 无意义,零碎工夫即可
    GMT_MODIFIED datetime NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,  ## 无意义,零碎工夫即可
    CONSTRAINT RETL_BUFFER_ID PRIMARY KEY (ID) 
   )  ENGINE=InnoDB DEFAULT CHARSET=utf8;

CREATE TABLE retl_mark
(    
    ID BIGINT AUTO_INCREMENT,
    CHANNEL_ID INT(11),
    CHANNEL_INFO varchar(128),
    CONSTRAINT RETL_MARK_ID PRIMARY KEY (ID) 
) ENGINE=InnoDB AUTO_INCREMENT=1 DEFAULT CHARSET=utf8;

CREATE TABLE xdual (
  ID BIGINT(20) NOT NULL AUTO_INCREMENT,
  X timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP,
  PRIMARY KEY (ID)
) ENGINE=InnoDB AUTO_INCREMENT=1 DEFAULT CHARSET=utf8;

-- 插入初始化数据
INSERT INTO retl.xdual(id, x) VALUES (1,now()) ON DUPLICATE KEY UPDATE x = now();

把你想要同步表的表名及记录ID插入到retl_buffer表即可

-- 如把chenj321.user表 触发全量同步, 待数据同步实现后,该表能够清空掉
insert into retl_buffer(TABLE_ID, FULL_NAME,TYPE,PK_DATA) (select 0,'chenj321.user','I',id from chenj321.user); 

评论

发表回复

您的邮箱地址不会被公开。 必填项已用 * 标注

这个站点使用 Akismet 来减少垃圾评论。了解你的评论数据如何被处理