Java操作HBase API
添加依赖
<dependencies> <dependency> <groupId>org.springframework.boot</groupId> <artifactId>spring-boot-starter-web</artifactId> </dependency> <dependency> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-hdfs</artifactId> <version>3.1.3</version> </dependency> <dependency> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-client</artifactId> <version>3.1.3</version> </dependency> <dependency> <groupId>org.apache.hbase</groupId> <artifactId>hbase-server</artifactId> <version>2.2.5</version> </dependency> <!--java.lang.NoSuchMethodError: 'void org.apache.hadoop.security.HadoopKerberosName.setRuleMechanism(java.lang.String)'--> <dependency> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-auth</artifactId> <version>3.1.3</version> </dependency> <dependency> <groupId>org.apache.hbase</groupId> <artifactId>hbase-client</artifactId> <version>2.2.5</version> </dependency> <dependency> <groupId>junit</groupId> <artifactId>junit</artifactId> <version>4.13</version> </dependency> </dependencies>
添加配置文件
添加Hadoop和HBase的配置文件到项目Resources目录
core-site.xmlhbase-site.xmlhdfs-site.xmlmapred-site.xmlyarn-site.xml
初始化与资源释放
import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.hbase.HBaseConfiguration;import org.apache.hadoop.hbase.NamespaceDescriptor;import org.apache.hadoop.hbase.TableName;import org.apache.hadoop.hbase.client.*;import org.apache.hadoop.hbase.util.Bytes;public class HelloHBaseDDL { /** * 获取HBase管理员类 */ private Admin admin; /** * 获取数据库连接 */ private Connection connection; /** * 初始化 * * @throws IOException */ @Before public void init() throws IOException { Configuration configuration = HBaseConfiguration.create(); this.connection = ConnectionFactory.createConnection(configuration); this.admin = connection.getAdmin(); } /** * 资源释放 */ @After public void destory() throws IOException { if (admin != null) { admin.close(); } if (connection != null) { connection.close(); } }}
创建命名空间
@Test public void createNameSpace() throws IOException { NamespaceDescriptor mkNameSpace = NamespaceDescriptor.create("hbaseNamespace").build(); this.admin.createNamespace(mkNameSpace); }
指定名称空间创建多列族的表
@Test public void createMultiPartColumnFamilyTable() throws IOException { TableDescriptorBuilder table = TableDescriptorBuilder.newBuilder(TableName.valueOf("hbaseNamespace:user")); ColumnFamilyDescriptorBuilder infoCF = ColumnFamilyDescriptorBuilder.newBuilder(Bytes.toBytes("info")); ColumnFamilyDescriptorBuilder scoreCF = ColumnFamilyDescriptorBuilder.newBuilder(Bytes.toBytes("score")); List<ColumnFamilyDescriptor> columnFamilyDescriptors = new ArrayList<>(); columnFamilyDescriptors.add(infoCF.build()); columnFamilyDescriptors.add(scoreCF.build()); table.setColumnFamilies(columnFamilyDescriptors); admin.createTable(table.build()); }
默认名称空间创建单列族的表
@Test public void createOneColumnFamilyTable() throws IOException { TableDescriptorBuilder table = TableDescriptorBuilder.newBuilder(TableName.valueOf("user")); ColumnFamilyDescriptorBuilder columnBuilder = ColumnFamilyDescriptorBuilder.newBuilder(Bytes.toBytes("info")); ColumnFamilyDescriptor familyDescriptor = columnBuilder.build(); table.setColumnFamily(familyDescriptor); admin.createTable(table.build()); }
查询所有表信息
@Test public void listTables() throws IOException { TableName[] tableNames = admin.listTableNames(); for (TableName tableName : tableNames) { System.out.println("tableName:" + tableName); } }
tableName:usertableName:hbaseNamespace:user
列出指定命名空间的表信息
@Test public void listTablesByNameSpace() throws IOException { TableName[] tableNames = admin.listTableNamesByNamespace("hbaseNamespace"); for (TableName tableName : tableNames) { System.out.println("tableName:" + tableName); } }
tableName:hbaseNamespace:user
添加数据
core-site.xmlhbase-site.xmlhdfs-site.xmlmapred-site.xmlyarn-site.xml0
core-site.xmlhbase-site.xmlhdfs-site.xmlmapred-site.xmlyarn-site.xml1
批量添加数据
core-site.xmlhbase-site.xmlhdfs-site.xmlmapred-site.xmlyarn-site.xml2
core-site.xmlhbase-site.xmlhdfs-site.xmlmapred-site.xmlyarn-site.xml3
删除数据
core-site.xmlhbase-site.xmlhdfs-site.xmlmapred-site.xmlyarn-site.xml4
core-site.xmlhbase-site.xmlhdfs-site.xmlmapred-site.xmlyarn-site.xml5
根据rowKey获取一行数据及获取列信息
core-site.xmlhbase-site.xmlhdfs-site.xmlmapred-site.xmlyarn-site.xml6
core-site.xmlhbase-site.xmlhdfs-site.xmlmapred-site.xmlyarn-site.xml7
获取多行数据
core-site.xmlhbase-site.xmlhdfs-site.xmlmapred-site.xmlyarn-site.xml8
core-site.xmlhbase-site.xmlhdfs-site.xmlmapred-site.xmlyarn-site.xml7core-site.xmlhbase-site.xmlhdfs-site.xmlmapred-site.xmlyarn-site.xml7
扫描整个表空间
import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.hbase.HBaseConfiguration;import org.apache.hadoop.hbase.NamespaceDescriptor;import org.apache.hadoop.hbase.TableName;import org.apache.hadoop.hbase.client.*;import org.apache.hadoop.hbase.util.Bytes;public class HelloHBaseDDL { /** * 获取HBase管理员类 */ private Admin admin; /** * 获取数据库连接 */ private Connection connection; /** * 初始化 * * @throws IOException */ @Before public void init() throws IOException { Configuration configuration = HBaseConfiguration.create(); this.connection = ConnectionFactory.createConnection(configuration); this.admin = connection.getAdmin(); } /** * 资源释放 */ @After public void destory() throws IOException { if (admin != null) { admin.close(); } if (connection != null) { connection.close(); } }}0
rowKey:rowKey003core-site.xmlhbase-site.xmlhdfs-site.xmlmapred-site.xmlyarn-site.xml7rowKey:rowKey004core-site.xmlhbase-site.xmlhdfs-site.xmlmapred-site.xmlyarn-site.xml7rowKey:rowKey005core-site.xmlhbase-site.xmlhdfs-site.xmlmapred-site.xmlyarn-site.xml7
HBase和MapReduce整合
添加依赖
在Java操作HBase API的依赖基础上添加额外依赖
import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.hbase.HBaseConfiguration;import org.apache.hadoop.hbase.NamespaceDescriptor;import org.apache.hadoop.hbase.TableName;import org.apache.hadoop.hbase.client.*;import org.apache.hadoop.hbase.util.Bytes;public class HelloHBaseDDL { /** * 获取HBase管理员类 */ private Admin admin; /** * 获取数据库连接 */ private Connection connection; /** * 初始化 * * @throws IOException */ @Before public void init() throws IOException { Configuration configuration = HBaseConfiguration.create(); this.connection = ConnectionFactory.createConnection(configuration); this.admin = connection.getAdmin(); } /** * 资源释放 */ @After public void destory() throws IOException { if (admin != null) { admin.close(); } if (connection != null) { connection.close(); } }}2
从HDFS数据写入HBase
map类
import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.hbase.HBaseConfiguration;import org.apache.hadoop.hbase.NamespaceDescriptor;import org.apache.hadoop.hbase.TableName;import org.apache.hadoop.hbase.client.*;import org.apache.hadoop.hbase.util.Bytes;public class HelloHBaseDDL { /** * 获取HBase管理员类 */ private Admin admin; /** * 获取数据库连接 */ private Connection connection; /** * 初始化 * * @throws IOException */ @Before public void init() throws IOException { Configuration configuration = HBaseConfiguration.create(); this.connection = ConnectionFactory.createConnection(configuration); this.admin = connection.getAdmin(); } /** * 资源释放 */ @After public void destory() throws IOException { if (admin != null) { admin.close(); } if (connection != null) { connection.close(); } }}3
reduce类
import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.hbase.HBaseConfiguration;import org.apache.hadoop.hbase.NamespaceDescriptor;import org.apache.hadoop.hbase.TableName;import org.apache.hadoop.hbase.client.*;import org.apache.hadoop.hbase.util.Bytes;public class HelloHBaseDDL { /** * 获取HBase管理员类 */ private Admin admin; /** * 获取数据库连接 */ private Connection connection; /** * 初始化 * * @throws IOException */ @Before public void init() throws IOException { Configuration configuration = HBaseConfiguration.create(); this.connection = ConnectionFactory.createConnection(configuration); this.admin = connection.getAdmin(); } /** * 资源释放 */ @After public void destory() throws IOException { if (admin != null) { admin.close(); } if (connection != null) { connection.close(); } }}4
创建表
import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.hbase.HBaseConfiguration;import org.apache.hadoop.hbase.NamespaceDescriptor;import org.apache.hadoop.hbase.TableName;import org.apache.hadoop.hbase.client.*;import org.apache.hadoop.hbase.util.Bytes;public class HelloHBaseDDL { /** * 获取HBase管理员类 */ private Admin admin; /** * 获取数据库连接 */ private Connection connection; /** * 初始化 * * @throws IOException */ @Before public void init() throws IOException { Configuration configuration = HBaseConfiguration.create(); this.connection = ConnectionFactory.createConnection(configuration); this.admin = connection.getAdmin(); } /** * 资源释放 */ @After public void destory() throws IOException { if (admin != null) { admin.close(); } if (connection != null) { connection.close(); } }}5
job类
import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.hbase.HBaseConfiguration;import org.apache.hadoop.hbase.NamespaceDescriptor;import org.apache.hadoop.hbase.TableName;import org.apache.hadoop.hbase.client.*;import org.apache.hadoop.hbase.util.Bytes;public class HelloHBaseDDL { /** * 获取HBase管理员类 */ private Admin admin; /** * 获取数据库连接 */ private Connection connection; /** * 初始化 * * @throws IOException */ @Before public void init() throws IOException { Configuration configuration = HBaseConfiguration.create(); this.connection = ConnectionFactory.createConnection(configuration); this.admin = connection.getAdmin(); } /** * 资源释放 */ @After public void destory() throws IOException { if (admin != null) { admin.close(); } if (connection != null) { connection.close(); } }}6
测试
Apache HBase™ is the Hadoop database, a distributed, scalable, big data store. Use Apache HBase™ when you need random, realtime read/write access to your Big Data. This project's goal is the hosting of very large tables billions of rows X millions of columns -- atop clusters of commodity hardware. Apache HBase is an open-source, distributed, versioned, non-relational database modeled after Google's Bigtable: A Distributed Storage System for Structured Data by Chang et al. Just as Bigtable leverages the distributed data storage provided by the Google File System, Apache HBase provides Bigtable-like capabilities on top of Hadoop and HDFS.
import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.hbase.HBaseConfiguration;import org.apache.hadoop.hbase.NamespaceDescriptor;import org.apache.hadoop.hbase.TableName;import org.apache.hadoop.hbase.client.*;import org.apache.hadoop.hbase.util.Bytes;public class HelloHBaseDDL { /** * 获取HBase管理员类 */ private Admin admin; /** * 获取数据库连接 */ private Connection connection; /** * 初始化 * * @throws IOException */ @Before public void init() throws IOException { Configuration configuration = HBaseConfiguration.create(); this.connection = ConnectionFactory.createConnection(configuration); this.admin = connection.getAdmin(); } /** * 资源释放 */ @After public void destory() throws IOException { if (admin != null) { admin.close(); } if (connection != null) { connection.close(); } }}7
从HBase导出到HDFS
import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.hbase.HBaseConfiguration;import org.apache.hadoop.hbase.NamespaceDescriptor;import org.apache.hadoop.hbase.TableName;import org.apache.hadoop.hbase.client.*;import org.apache.hadoop.hbase.util.Bytes;public class HelloHBaseDDL { /** * 获取HBase管理员类 */ private Admin admin; /** * 获取数据库连接 */ private Connection connection; /** * 初始化 * * @throws IOException */ @Before public void init() throws IOException { Configuration configuration = HBaseConfiguration.create(); this.connection = ConnectionFactory.createConnection(configuration); this.admin = connection.getAdmin(); } /** * 资源释放 */ @After public void destory() throws IOException { if (admin != null) { admin.close(); } if (connection != null) { connection.close(); } }}8
import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.hbase.HBaseConfiguration;import org.apache.hadoop.hbase.NamespaceDescriptor;import org.apache.hadoop.hbase.TableName;import org.apache.hadoop.hbase.client.*;import org.apache.hadoop.hbase.util.Bytes;public class HelloHBaseDDL { /** * 获取HBase管理员类 */ private Admin admin; /** * 获取数据库连接 */ private Connection connection; /** * 初始化 * * @throws IOException */ @Before public void init() throws IOException { Configuration configuration = HBaseConfiguration.create(); this.connection = ConnectionFactory.createConnection(configuration); this.admin = connection.getAdmin(); } /** * 资源释放 */ @After public void destory() throws IOException { if (admin != null) { admin.close(); } if (connection != null) { connection.close(); } }}9
@Test public void createNameSpace() throws IOException { NamespaceDescriptor mkNameSpace = NamespaceDescriptor.create("hbaseNamespace").build(); this.admin.createNamespace(mkNameSpace); }0