Read and Write Tables From Hive With Java/Scala

How to read and write tables from Hive with Java/Scala.

Without a Kerberized Cluster

  1. Install the following Maven dependency:

    <dependency>
      <groupId>org.apache.hadoop</groupId>
      <artifactId>hadoop-client</artifactId>
      <version>${hadoop.version}</version>
    </dependency>
    <dependency>
       <groupId>org.apache.hive</groupId>
       <artifactId>hive-jdbc</artifactId>
       <version>${hive-jdbc.version}</version>
    </dependency>
  2. Initialize the connection to Hive by running the following lines of code:

    private static final String HOST_HIVE = System.getenv("HOST_HIVE");
    private static final String PORT_HIVE = System.getenv("PORT_HIVE");
    private static final String LOGIN = System.getenv("LOGIN");
    private static final String PASSWORD = System.getenv("PASSWORD");
    
    private static final String JDBC_DRIVER_NAME = "org.apache.hive.jdbc.HiveDriver";
    // To set the JDBC Hive driver.
    Class.forName(JDBC_DRIVER_NAME);
    
    // To connect to Hive.
    // Choose a user who has write access to /user/hive/warehouse/
    // (e.g. hdfs)
    con = DriverManager.getConnection("jdbc:hive2://" + HOST_HIVE + ":" + PORT_HIVE + "/;ssl=false", LOGIN, PASSWORD);
    
    // To initialize the statement.
    Statement stmt = con.createStatement();
    The Hive connection URL format must be jdbc:hive2://<hiveserver>:10000/;ssl=false, where 10000 is the default port.
  3. You can now create Hive tables, and insert and select data in tables by running the following lines of code:

    • Create Tables in Hive

    • Insert Data Into Hive Tables

    • Select Data From Hive Tables

    String sqlStatementDrop = "DROP TABLE IF EXISTS helloworld";
    String sqlStatementCreate = "CREATE TABLE helloworld (message String) STORED AS PARQUET";
    
    // To execute the DROP TABLE query.
    stmt.execute(sqlStatementDrop);
    
    // To execute the CREATE query.
    stmt.execute(sqlStatementCreate);
    String sqlStatementInsert = "INSERT INTO helloworld VALUES (\"helloworld\")";
    
    // To execute the INSERT query.
    stmt.execute(sqlStatementInsert);
    String sqlStatementSelect = "SELECT * from helloworld";
    
    // To execute the SELECT query.
    ResultSet rs = stmt.executeQuery(sqlStatementSelect);
    
    // To process results.
    while(rs.next()) {
        logger.info(rs.getString(1));
    }

With a Kerberized Cluster

  1. Install the following Maven dependencies:

    <dependency>
         <groupId>org.apache.hadoop</groupId>
         <artifactId>hadoop-hdfs</artifactId>
         <version>2.6.0-cdh5.16.1.1</version>
    </dependency>
    <dependency>
         <groupId>org.apache.hadoop</groupId>
         <artifactId>hadoop-common</artifactId>
         <version>2.6.0-cdh5.16.1.1</version>
    </dependency>
    <dependency>
         <groupId>org.apache.hive</groupId>
         <artifactId>hive-jdbc</artifactId>
         <version>1.1.1</version>
    </dependency>
  2. Add the jaas.conf file under src/main/resources by running the following lines of code:

    Main {
     com.sun.security.auth.module.Krb5LoginModule required client=TRUE;
    };
  3. Initialize your connection by creating a login context function with the following lines of code:

    private static final String JDBC_DRIVER_NAME = "org.apache.hive.jdbc.HiveDriver";
    private static String HADOOP_CONF_DIR = System.getenv("HADOOP_CONF_DIR");
    
    private static LoginContext kinit(String username, String password) throws LoginException {
        LoginContext lc = new LoginContext(Main.class.getSimpleName(), callbacks -> {
            for (Callback c : callbacks) {
                if (c instanceof NameCallback)
                    ((NameCallback) c).setName(username);
                if (c instanceof PasswordCallback)
                    ((PasswordCallback) c).setPassword(password.toCharArray());
            }
        });
        lc.login();
        return lc;
    }
  4. Initialize the connection to Hive by running the following lines of code:

    private static final String HOST_HIVE = System.getenv("HOST_HIVE");
    private static final String PORT_HIVE = System.getenv("PORT_HIVE");
    private static final String LOGIN = System.getenv("LOGIN");
    private static final String PASSWORD = System.getenv("PASSWORD");
    
    URL url = Main.class.getClassLoader().getResource("jaas.conf");
    System.setProperty("java.security.auth.login.config", url.toExternalForm());
    
    Configuration conf = new Configuration();
    conf.addResource(new Path("file:///" + HADOOP_CONF_DIR + "/core-site.xml"));
    conf.addResource(new Path("file:///" + HADOOP_CONF_DIR + "/hdfs-site.xml"));
    
    UserGroupInformation.setConfiguration(conf);
    
    LoginContext lc = kinit(LOGIN, PASSWORD);
    UserGroupInformation.loginUserFromSubject(lc.getSubject());
    String kerberosRealm = UserGroupInformation.getLoginUser().getUserName().split("@")[1];
    
    Class.forName(JDBC_DRIVER_NAME);
    
    // Hive
    Connection hiveConnection = DriverManager.getConnection("jdbc:hive2://" + HIVE_HOSTNAME + ":" + PORT_HIVE + "/;principal=hive/" + HIVE_HOSTNAME + "@" + kerberosRealm);
  5. You can now create Hive tables, and insert and select data in tables by running the following lines of code:

    • Create Tables in Hive

    • Insert Data Into Hive Tables

    • Select Data From Hive Tables

    String sqlStatementDrop = "DROP TABLE IF EXISTS hivetest";
    String sqlStatementCreate = "CREATE TABLE hivetest (message String) STORED AS PARQUET";
    Statement stmt = hiveConnection.createStatement();
    
    // To execute the DROP TABLE query.
    stmt.execute(sqlStatementDrop);
    
    // To execute the CREATE query.
    stmt.execute(sqlStatementCreate);
    String sqlStatementInsert = "INSERT INTO helloworld VALUES (\"helloworld\")";
    
    // To execute the INSERT query.
    stmt.execute(sqlStatementInsert);
    String sqlStatementSelect = "SELECT * from helloworld";
    
    // To execute the SELECT query.
    ResultSet rs = stmt.executeQuery(sqlStatementSelect);
    
    // To process results.
    while(rs.next()) {
       logger.info(rs.getString(1));
    }