Spring Batch是一个优秀的离线批处理框架。可以批量处理的数据格式可以是文本文件、xml文件以及持久化的数据。当然Spring Batch还提供了丰富的接口可以扩展,处理不同格式的数据。处理规模为中小型(对于海量的数据可以考虑Hadoop)。使用场景可以考虑账单对账,数据迁移,定时批量更新数据等等。
层次架构如上图。分三层:应用层,核心层,基础设施层。应用层包括所有的batch作业和用户开发的代码。核心层包括在运行期运行一个作业所需要的类,例如:JobLauncher,Job和Step的实现。应用和核心层都在基础设施层之上,基础设施层包括通用的读写器(readers and writers)以及如RetryTemplate等服务。
下面演示从customer表批量导入到customer2表,customer表结构如下
CREATE TABLE `customer` ( `id` int(11) NOT NULL, `name` varchar(45) NOT NULL, `age` int(11) NOT NULL, `address` varchar(45) NOT NULL DEFAULT '', `code` varchar(45) NOT NULL DEFAULT '', PRIMARY KEY (`id`) ) ENGINE=InnoDB DEFAULT CHARSET=latin1;作业元数据保存在mysql的时候配置
<!-- connect to database --> <bean id="jobMetaDataSource" class="org.springframework.jdbc.datasource.DriverManagerDataSource"> <property name="driverClassName" value="com.mysql.jdbc.Driver"/> <property name="url" value="jdbc:mysql://localhost:3306/spring_batch_job"/> <property name="username" value="root"/> <property name="password" value="123456"/> </bean> <!-- 首次创建job-meta的时候使用 --> <!-- Create job-meta tables automatically --> <jdbc:initialize-database data-source="jobMetaDataSource"> <jdbc:script location="org/springframework/batch/core/schema-drop-mysql.sql"/> <jdbc:script location="org/springframework/batch/core/schema-mysql.sql"/> </jdbc:initialize-database> <bean id="myDataSource" class="com.alibaba.druid.pool.DruidDataSource" init-method="init" destroy-method="close"> <property name="driverClassName" value="com.mysql.jdbc.Driver"/> <property name="url" value="jdbc:mysql://localhost:3306/example"/> <property name="username" value="root"/> <property name="password" value="123456"/> <property name="maxActive" value="100"/> <property name="maxWait" value="1000"/> <property name="defaultAutoCommit" value="true"/> <property name="validationQuery" value="SELECT 1"/> <property name="initialSize" value="30"/> <property name="minIdle" value="30"/> <property name="testWhileIdle" value="true"/> <!--<property name="testOnBorrow" value="true"/>--> <!--<property name="testOnReturn" value="true"/>--> <!--<property name="removeAbandoned" value="true"/>--> <!--<property name="removeAbandonedTimeout" value="1800"/>--> <property name="timeBetweenEvictionRunsMillis" value="7200000"/> <property name="connectionInitSqls" value="set names utf8mb4"/> </bean> <bean id="transactionManager" class="org.springframework.jdbc.datasource.DataSourceTransactionManager"> <property name="dataSource" ref="jobMetaDataSource"/> </bean> <!-- stored job-metadata in database --> <bean id="jobRepository" class="org.springframework.batch.core.repository.support.JobRepositoryFactoryBean"> <property name="dataSource" ref="jobMetaDataSource"/> <property name="transactionManager" ref="transactionManager"/> <property name="databaseType" value="mysql"/> </bean>也可以将作业元数据保存到内存中
<!-- stored job-metadata in memory --> <bean id="jobRepository" class="org.springframework.batch.core.repository.support.MapJobRepositoryFactoryBean"> <property name="transactionManager" ref="transactionManager"/> </bean>CustomerRowMapper类源码
public class CustomerRowMapper implements RowMapper<Customer> { @Override public Customer mapRow(ResultSet rs, int rowNum) throws SQLException { Customer customer = new Customer(); customer.setId(rs.getLong(Customer.ID)); customer.setAge(rs.getInt(Customer.AGE)); customer.setName(rs.getString(Customer.NAME)); customer.setAddress(Customer.ADDRESS); customer.setCode(rs.getString(Customer.CODE)); return customer; } }