Read and Write Tables From Impala With Java/Scala

How to read and write tables from Impala with Java/Scala.

Without a Kerberized Cluster

  1. Install the following Maven dependency:

    <dependency>
        <groupId>org.apache.hadoop</groupId>
        <artifactId>hadoop-client</artifactId>
        <version>${hadoop.version}</version>
    </dependency>
    <dependency>
        <groupId>org.apache.hive</groupId>
        <artifactId>hive-jdbc</artifactId>
        <version>1.2.1</version>
    </dependency>
  2. Initialize the connection to Impala by running the following lines of code:

    private static final String HOST_IMPALA = System.getenv("HOST_IMPALA");
    private static final String PORT_IMPALA = System.getenv("PORT_IMPALA");
    private static final String LOGIN = System.getenv("LOGIN");
    private static final String PASSWORD = System.getenv("PASSWORD");
    
    private static final String JDBC_DRIVER_NAME = "org.apache.hive.jdbc.HiveDriver";
    // To set the JDBC Impala driver.
    Class.forName(JDBC_DRIVER_NAME);
    
    // To connect to Impala.
    impalaConnection = DriverManager.getConnection("jdbc:hive2://" + HOST_IMPALA + ":" + PORT_IMPALA + "/;ssl=false", LOGIN, PASSWORD);
    
    // To initialize the statement.
    Statement stmt = con.createStatement();
    The Impala connection URL format must be jdbc:hive2://impalahost:21050/database, where 21050 is the default port.
  3. You can now create Impala tables, and insert and select data in tables by running the following lines of code:

    • Create Tables in Impala

    • Insert Data Into Impala Tables

    • Select Data From Impala Tables

    String sqlStatementInvalidate = "INVALIDATE METADATA";
    String sqlStatementDrop = "DROP TABLE IF EXISTS helloworld";
    String sqlStatementCreate = "CREATE TABLE helloworld (message String) STORED AS PARQUET";
    
    // To invalidate metadata to update changes.
    stmt.execute(sqlStatementInvalidate);
    
    // To execute the DROP TABLE query.
    stmt.execute(sqlStatementDrop);
    
    // To execute the CREATE query.
    stmt.execute(sqlStatementCreate);
    String sqlStatementInsert = "INSERT INTO helloworld VALUES (\"helloworld\")";
    
    // To execute the INSERT query.
    stmt.execute(sqlStatementInsert);
    String sqlStatementSelect = "SELECT * from helloworld";
    
    // To execute the SELECT query.
    ResultSet rs = stmt.executeQuery(sqlStatementSelect);
    
    // To process results.
    while(rs.next()) {
       logger.info(rs.getString(1));
    }
    
    // To invalidate metadata to update changes.
    stmt.execute(sqlStatementInvalidate);

With a Kerberized Cluster

  1. Install the following Maven dependencies:

    <dependency>
         <groupId>org.apache.hadoop</groupId>
         <artifactId>hadoop-hdfs</artifactId>
         <version>2.6.0-cdh5.16.1.1</version>
    </dependency>
    <dependency>
         <groupId>org.apache.hadoop</groupId>
         <artifactId>hadoop-common</artifactId>
         <version>2.6.0-cdh5.16.1.1</version>
    </dependency>
    <dependency>
         <groupId>org.apache.hive</groupId>
         <artifactId>hive-jdbc</artifactId>
         <version>1.1.1</version>
    </dependency>
  2. Add the jaas.conf file under src/main/resources by running the following lines of code:

    Main {
     com.sun.security.auth.module.Krb5LoginModule required client=TRUE;
    };
  3. Initialize your connection by creating a login context function with the following lines of code:

    private static final String JDBC_DRIVER_NAME = "org.apache.hive.jdbc.HiveDriver";
    private static String HADOOP_CONF_DIR = System.getenv("HADOOP_CONF_DIR");
    
    private static LoginContext kinit(String username, String password) throws LoginException {
        LoginContext lc = new LoginContext(Main.class.getSimpleName(), callbacks -> {
            for (Callback c : callbacks) {
                if (c instanceof NameCallback)
                    ((NameCallback) c).setName(username);
                if (c instanceof PasswordCallback)
                    ((PasswordCallback) c).setPassword(password.toCharArray());
            }
        });
        lc.login();
        return lc;
    }
  4. Initialize the connection to Impala by running the following lines of code:

    private static final String HOST_IMPALA = System.getenv("HOST_IMPALA");
    private static final String PORT_IMPALA = System.getenv("PORT_IMPALA");
    private static final String LOGIN = System.getenv("LOGIN");
    private static final String PASSWORD = System.getenv("PASSWORD");
    
    URL url = Main.class.getClassLoader().getResource("jaas.conf");
    System.setProperty("java.security.auth.login.config", url.toExternalForm());
    
    Configuration conf = new Configuration();
    conf.addResource(new Path("file:///" + HADOOP_CONF_DIR + "/core-site.xml"));
    conf.addResource(new Path("file:///" + HADOOP_CONF_DIR + "/hdfs-site.xml"));
    
    UserGroupInformation.setConfiguration(conf);
    
    LoginContext lc = kinit(LOGIN, PASSWORD);
    UserGroupInformation.loginUserFromSubject(lc.getSubject());
    String kerberosRealm = UserGroupInformation.getLoginUser().getUserName().split("@")[1];
    
    Class.forName(JDBC_DRIVER_NAME);
    // Impala
    Connection impalaConnection = DriverManager.getConnection("jdbc:hive2://" + IMPALA_HOSTNAME + ":" + PORT_IMPALA + "/;principal=impala/" + IMPALA_HOSTNAME + "@" + kerberosRealm);
  5. You can now create Impala tables, and insert and select data in tables by running the following lines of code:

    • Create Impala Tables

    • Insert Data Into Impala Tables

    • Select Data From Impala Tables

    String sqlStatementDrop = "DROP TABLE IF EXISTS impalatest";
    String sqlStatementCreate = "CREATE TABLE impalatest (message String) STORED AS PARQUET";
    Statement stmt =impalaConnection.createStatement();
    
    // To execute the DROP TABLE query.
    stmt.execute(sqlStatementDrop);
    
    // To execute the CREATE query.
    stmt.execute(sqlStatementCreate);
    String sqlStatementInsert = "INSERT INTO helloworld VALUES (\"helloworld\")";
    
    // To execute the INSERT query.
    stmt.execute(sqlStatementInsert);
    String sqlStatementSelect = "SELECT * from helloworld";
    
    // To execute the SELECT query.
    ResultSet rs = stmt.executeQuery(sqlStatementSelect);
    
    // To process results.
    while(rs.next()) {
       logger.info(rs.getString(1));
    }