首页>>互联网>>大数据->Java操作Apache HBase API以及HBase和MapReduce整合

Java操作Apache HBase API以及HBase和MapReduce整合

时间:2023-11-29 本站 点击:0

Java操作HBase API

添加依赖

    <dependencies>        <dependency>            <groupId>org.springframework.boot</groupId>            <artifactId>spring-boot-starter-web</artifactId>        </dependency>        <dependency>            <groupId>org.apache.hadoop</groupId>            <artifactId>hadoop-hdfs</artifactId>            <version>3.1.3</version>        </dependency>        <dependency>            <groupId>org.apache.hadoop</groupId>            <artifactId>hadoop-client</artifactId>            <version>3.1.3</version>        </dependency>        <dependency>            <groupId>org.apache.hbase</groupId>            <artifactId>hbase-server</artifactId>            <version>2.2.5</version>        </dependency>        <!--java.lang.NoSuchMethodError: 'void org.apache.hadoop.security.HadoopKerberosName.setRuleMechanism(java.lang.String)'-->        <dependency>            <groupId>org.apache.hadoop</groupId>            <artifactId>hadoop-auth</artifactId>            <version>3.1.3</version>        </dependency>        <dependency>            <groupId>org.apache.hbase</groupId>            <artifactId>hbase-client</artifactId>            <version>2.2.5</version>        </dependency>        <dependency>            <groupId>junit</groupId>            <artifactId>junit</artifactId>            <version>4.13</version>        </dependency>    </dependencies>

添加配置文件

添加Hadoop和HBase的配置文件到项目Resources目录

core-site.xmlhbase-site.xmlhdfs-site.xmlmapred-site.xmlyarn-site.xml

初始化与资源释放

import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.hbase.HBaseConfiguration;import org.apache.hadoop.hbase.NamespaceDescriptor;import org.apache.hadoop.hbase.TableName;import org.apache.hadoop.hbase.client.*;import org.apache.hadoop.hbase.util.Bytes;public class HelloHBaseDDL {    /**     * 获取HBase管理员类     */    private Admin admin;    /**     * 获取数据库连接     */    private Connection connection;    /**     * 初始化     *     * @throws IOException     */    @Before    public void init() throws IOException {        Configuration configuration = HBaseConfiguration.create();        this.connection = ConnectionFactory.createConnection(configuration);        this.admin = connection.getAdmin();    }    /**     * 资源释放     */    @After    public void destory() throws IOException {        if (admin != null) {            admin.close();        }        if (connection != null) {            connection.close();        }    }}

创建命名空间

    @Test    public void createNameSpace() throws IOException {        NamespaceDescriptor mkNameSpace = NamespaceDescriptor.create("hbaseNamespace").build();        this.admin.createNamespace(mkNameSpace);    }

指定名称空间创建多列族的表

    @Test    public void createMultiPartColumnFamilyTable() throws IOException {        TableDescriptorBuilder table = TableDescriptorBuilder.newBuilder(TableName.valueOf("hbaseNamespace:user"));        ColumnFamilyDescriptorBuilder infoCF = ColumnFamilyDescriptorBuilder.newBuilder(Bytes.toBytes("info"));        ColumnFamilyDescriptorBuilder scoreCF = ColumnFamilyDescriptorBuilder.newBuilder(Bytes.toBytes("score"));        List<ColumnFamilyDescriptor> columnFamilyDescriptors = new ArrayList<>();        columnFamilyDescriptors.add(infoCF.build());        columnFamilyDescriptors.add(scoreCF.build());        table.setColumnFamilies(columnFamilyDescriptors);        admin.createTable(table.build());    }

默认名称空间创建单列族的表

    @Test    public void createOneColumnFamilyTable() throws IOException {        TableDescriptorBuilder table = TableDescriptorBuilder.newBuilder(TableName.valueOf("user"));        ColumnFamilyDescriptorBuilder columnBuilder = ColumnFamilyDescriptorBuilder.newBuilder(Bytes.toBytes("info"));        ColumnFamilyDescriptor familyDescriptor = columnBuilder.build();        table.setColumnFamily(familyDescriptor);        admin.createTable(table.build());    }

查询所有表信息

    @Test    public void listTables() throws IOException {        TableName[] tableNames = admin.listTableNames();        for (TableName tableName : tableNames) {            System.out.println("tableName:" + tableName);        }    }

tableName:usertableName:hbaseNamespace:user

列出指定命名空间的表信息

    @Test    public void listTablesByNameSpace() throws IOException {        TableName[] tableNames = admin.listTableNamesByNamespace("hbaseNamespace");        for (TableName tableName : tableNames) {            System.out.println("tableName:" + tableName);        }    }

tableName:hbaseNamespace:user

添加数据

core-site.xmlhbase-site.xmlhdfs-site.xmlmapred-site.xmlyarn-site.xml0

core-site.xmlhbase-site.xmlhdfs-site.xmlmapred-site.xmlyarn-site.xml1

批量添加数据

core-site.xmlhbase-site.xmlhdfs-site.xmlmapred-site.xmlyarn-site.xml2

core-site.xmlhbase-site.xmlhdfs-site.xmlmapred-site.xmlyarn-site.xml3

删除数据

core-site.xmlhbase-site.xmlhdfs-site.xmlmapred-site.xmlyarn-site.xml4

core-site.xmlhbase-site.xmlhdfs-site.xmlmapred-site.xmlyarn-site.xml5

根据rowKey获取一行数据及获取列信息

core-site.xmlhbase-site.xmlhdfs-site.xmlmapred-site.xmlyarn-site.xml6

core-site.xmlhbase-site.xmlhdfs-site.xmlmapred-site.xmlyarn-site.xml7

获取多行数据

core-site.xmlhbase-site.xmlhdfs-site.xmlmapred-site.xmlyarn-site.xml8

core-site.xmlhbase-site.xmlhdfs-site.xmlmapred-site.xmlyarn-site.xml7core-site.xmlhbase-site.xmlhdfs-site.xmlmapred-site.xmlyarn-site.xml7

扫描整个表空间

import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.hbase.HBaseConfiguration;import org.apache.hadoop.hbase.NamespaceDescriptor;import org.apache.hadoop.hbase.TableName;import org.apache.hadoop.hbase.client.*;import org.apache.hadoop.hbase.util.Bytes;public class HelloHBaseDDL {    /**     * 获取HBase管理员类     */    private Admin admin;    /**     * 获取数据库连接     */    private Connection connection;    /**     * 初始化     *     * @throws IOException     */    @Before    public void init() throws IOException {        Configuration configuration = HBaseConfiguration.create();        this.connection = ConnectionFactory.createConnection(configuration);        this.admin = connection.getAdmin();    }    /**     * 资源释放     */    @After    public void destory() throws IOException {        if (admin != null) {            admin.close();        }        if (connection != null) {            connection.close();        }    }}0

rowKey:rowKey003core-site.xmlhbase-site.xmlhdfs-site.xmlmapred-site.xmlyarn-site.xml7rowKey:rowKey004core-site.xmlhbase-site.xmlhdfs-site.xmlmapred-site.xmlyarn-site.xml7rowKey:rowKey005core-site.xmlhbase-site.xmlhdfs-site.xmlmapred-site.xmlyarn-site.xml7

HBase和MapReduce整合

添加依赖

在Java操作HBase API的依赖基础上添加额外依赖

import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.hbase.HBaseConfiguration;import org.apache.hadoop.hbase.NamespaceDescriptor;import org.apache.hadoop.hbase.TableName;import org.apache.hadoop.hbase.client.*;import org.apache.hadoop.hbase.util.Bytes;public class HelloHBaseDDL {    /**     * 获取HBase管理员类     */    private Admin admin;    /**     * 获取数据库连接     */    private Connection connection;    /**     * 初始化     *     * @throws IOException     */    @Before    public void init() throws IOException {        Configuration configuration = HBaseConfiguration.create();        this.connection = ConnectionFactory.createConnection(configuration);        this.admin = connection.getAdmin();    }    /**     * 资源释放     */    @After    public void destory() throws IOException {        if (admin != null) {            admin.close();        }        if (connection != null) {            connection.close();        }    }}2

从HDFS数据写入HBase

map类

import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.hbase.HBaseConfiguration;import org.apache.hadoop.hbase.NamespaceDescriptor;import org.apache.hadoop.hbase.TableName;import org.apache.hadoop.hbase.client.*;import org.apache.hadoop.hbase.util.Bytes;public class HelloHBaseDDL {    /**     * 获取HBase管理员类     */    private Admin admin;    /**     * 获取数据库连接     */    private Connection connection;    /**     * 初始化     *     * @throws IOException     */    @Before    public void init() throws IOException {        Configuration configuration = HBaseConfiguration.create();        this.connection = ConnectionFactory.createConnection(configuration);        this.admin = connection.getAdmin();    }    /**     * 资源释放     */    @After    public void destory() throws IOException {        if (admin != null) {            admin.close();        }        if (connection != null) {            connection.close();        }    }}3

reduce类

import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.hbase.HBaseConfiguration;import org.apache.hadoop.hbase.NamespaceDescriptor;import org.apache.hadoop.hbase.TableName;import org.apache.hadoop.hbase.client.*;import org.apache.hadoop.hbase.util.Bytes;public class HelloHBaseDDL {    /**     * 获取HBase管理员类     */    private Admin admin;    /**     * 获取数据库连接     */    private Connection connection;    /**     * 初始化     *     * @throws IOException     */    @Before    public void init() throws IOException {        Configuration configuration = HBaseConfiguration.create();        this.connection = ConnectionFactory.createConnection(configuration);        this.admin = connection.getAdmin();    }    /**     * 资源释放     */    @After    public void destory() throws IOException {        if (admin != null) {            admin.close();        }        if (connection != null) {            connection.close();        }    }}4

创建表

import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.hbase.HBaseConfiguration;import org.apache.hadoop.hbase.NamespaceDescriptor;import org.apache.hadoop.hbase.TableName;import org.apache.hadoop.hbase.client.*;import org.apache.hadoop.hbase.util.Bytes;public class HelloHBaseDDL {    /**     * 获取HBase管理员类     */    private Admin admin;    /**     * 获取数据库连接     */    private Connection connection;    /**     * 初始化     *     * @throws IOException     */    @Before    public void init() throws IOException {        Configuration configuration = HBaseConfiguration.create();        this.connection = ConnectionFactory.createConnection(configuration);        this.admin = connection.getAdmin();    }    /**     * 资源释放     */    @After    public void destory() throws IOException {        if (admin != null) {            admin.close();        }        if (connection != null) {            connection.close();        }    }}5

job类

import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.hbase.HBaseConfiguration;import org.apache.hadoop.hbase.NamespaceDescriptor;import org.apache.hadoop.hbase.TableName;import org.apache.hadoop.hbase.client.*;import org.apache.hadoop.hbase.util.Bytes;public class HelloHBaseDDL {    /**     * 获取HBase管理员类     */    private Admin admin;    /**     * 获取数据库连接     */    private Connection connection;    /**     * 初始化     *     * @throws IOException     */    @Before    public void init() throws IOException {        Configuration configuration = HBaseConfiguration.create();        this.connection = ConnectionFactory.createConnection(configuration);        this.admin = connection.getAdmin();    }    /**     * 资源释放     */    @After    public void destory() throws IOException {        if (admin != null) {            admin.close();        }        if (connection != null) {            connection.close();        }    }}6

测试

Apache HBase™ is the Hadoop database, a distributed, scalable, big data store. Use Apache HBase™ when you need random, realtime read/write access to your Big Data. This project's goal is the hosting of very large tables  billions of rows X millions of columns -- atop clusters of commodity hardware. Apache HBase is an open-source, distributed, versioned, non-relational database modeled after Google's Bigtable: A Distributed Storage System for Structured Data by Chang et al. Just as Bigtable leverages the distributed data storage provided by the Google File System, Apache HBase provides Bigtable-like capabilities on top of Hadoop and HDFS.

import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.hbase.HBaseConfiguration;import org.apache.hadoop.hbase.NamespaceDescriptor;import org.apache.hadoop.hbase.TableName;import org.apache.hadoop.hbase.client.*;import org.apache.hadoop.hbase.util.Bytes;public class HelloHBaseDDL {    /**     * 获取HBase管理员类     */    private Admin admin;    /**     * 获取数据库连接     */    private Connection connection;    /**     * 初始化     *     * @throws IOException     */    @Before    public void init() throws IOException {        Configuration configuration = HBaseConfiguration.create();        this.connection = ConnectionFactory.createConnection(configuration);        this.admin = connection.getAdmin();    }    /**     * 资源释放     */    @After    public void destory() throws IOException {        if (admin != null) {            admin.close();        }        if (connection != null) {            connection.close();        }    }}7

从HBase导出到HDFS

import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.hbase.HBaseConfiguration;import org.apache.hadoop.hbase.NamespaceDescriptor;import org.apache.hadoop.hbase.TableName;import org.apache.hadoop.hbase.client.*;import org.apache.hadoop.hbase.util.Bytes;public class HelloHBaseDDL {    /**     * 获取HBase管理员类     */    private Admin admin;    /**     * 获取数据库连接     */    private Connection connection;    /**     * 初始化     *     * @throws IOException     */    @Before    public void init() throws IOException {        Configuration configuration = HBaseConfiguration.create();        this.connection = ConnectionFactory.createConnection(configuration);        this.admin = connection.getAdmin();    }    /**     * 资源释放     */    @After    public void destory() throws IOException {        if (admin != null) {            admin.close();        }        if (connection != null) {            connection.close();        }    }}8

import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.hbase.HBaseConfiguration;import org.apache.hadoop.hbase.NamespaceDescriptor;import org.apache.hadoop.hbase.TableName;import org.apache.hadoop.hbase.client.*;import org.apache.hadoop.hbase.util.Bytes;public class HelloHBaseDDL {    /**     * 获取HBase管理员类     */    private Admin admin;    /**     * 获取数据库连接     */    private Connection connection;    /**     * 初始化     *     * @throws IOException     */    @Before    public void init() throws IOException {        Configuration configuration = HBaseConfiguration.create();        this.connection = ConnectionFactory.createConnection(configuration);        this.admin = connection.getAdmin();    }    /**     * 资源释放     */    @After    public void destory() throws IOException {        if (admin != null) {            admin.close();        }        if (connection != null) {            connection.close();        }    }}9

    @Test    public void createNameSpace() throws IOException {        NamespaceDescriptor mkNameSpace = NamespaceDescriptor.create("hbaseNamespace").build();        this.admin.createNamespace(mkNameSpace);    }0


本文来自互联网用户投稿,该文观点仅代表作者本人,不代表本站立场。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。
如若转载,请注明出处:/BigData/1284.html