Read and Write Tables From Impala With Java/Scala
Without a Kerberized Cluster
-
Install the following Maven dependency:
<dependency> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-client</artifactId> <version>${hadoop.version}</version> </dependency> <dependency> <groupId>org.apache.hive</groupId> <artifactId>hive-jdbc</artifactId> <version>1.2.1</version> </dependency>
-
Initialize the connection to Impala by running the following lines of code:
private static final String HOST_IMPALA = System.getenv("HOST_IMPALA"); private static final String PORT_IMPALA = System.getenv("PORT_IMPALA"); private static final String LOGIN = System.getenv("LOGIN"); private static final String PASSWORD = System.getenv("PASSWORD"); private static final String JDBC_DRIVER_NAME = "org.apache.hive.jdbc.HiveDriver"; // To set the JDBC Impala driver. Class.forName(JDBC_DRIVER_NAME); // To connect to Impala. impalaConnection = DriverManager.getConnection("jdbc:hive2://" + HOST_IMPALA + ":" + PORT_IMPALA + "/;ssl=false", LOGIN, PASSWORD); // To initialize the statement. Statement stmt = con.createStatement();
The Impala connection URL format must be jdbc:hive2://impalahost:21050/database
, where21050
is the default port. -
You can now create Impala tables, and insert and select data in tables by running the following lines of code:
String sqlStatementInvalidate = "INVALIDATE METADATA"; String sqlStatementDrop = "DROP TABLE IF EXISTS helloworld"; String sqlStatementCreate = "CREATE TABLE helloworld (message String) STORED AS PARQUET"; // To invalidate metadata to update changes. stmt.execute(sqlStatementInvalidate); // To execute the DROP TABLE query. stmt.execute(sqlStatementDrop); // To execute the CREATE query. stmt.execute(sqlStatementCreate);
String sqlStatementInsert = "INSERT INTO helloworld VALUES (\"helloworld\")"; // To execute the INSERT query. stmt.execute(sqlStatementInsert);
String sqlStatementSelect = "SELECT * from helloworld"; // To execute the SELECT query. ResultSet rs = stmt.executeQuery(sqlStatementSelect); // To process results. while(rs.next()) { logger.info(rs.getString(1)); } // To invalidate metadata to update changes. stmt.execute(sqlStatementInvalidate);
With a Kerberized Cluster
-
Install the following Maven dependencies:
<dependency> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-hdfs</artifactId> <version>2.6.0-cdh5.16.1.1</version> </dependency> <dependency> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-common</artifactId> <version>2.6.0-cdh5.16.1.1</version> </dependency> <dependency> <groupId>org.apache.hive</groupId> <artifactId>hive-jdbc</artifactId> <version>1.1.1</version> </dependency>
-
Add the
jaas.conf
file undersrc/main/resources
by running the following lines of code:Main { com.sun.security.auth.module.Krb5LoginModule required client=TRUE; };
-
Initialize your connection by creating a login context function with the following lines of code:
private static final String JDBC_DRIVER_NAME = "org.apache.hive.jdbc.HiveDriver"; private static String HADOOP_CONF_DIR = System.getenv("HADOOP_CONF_DIR"); private static LoginContext kinit(String username, String password) throws LoginException { LoginContext lc = new LoginContext(Main.class.getSimpleName(), callbacks -> { for (Callback c : callbacks) { if (c instanceof NameCallback) ((NameCallback) c).setName(username); if (c instanceof PasswordCallback) ((PasswordCallback) c).setPassword(password.toCharArray()); } }); lc.login(); return lc; }
-
Initialize the connection to Impala by running the following lines of code:
private static final String HOST_IMPALA = System.getenv("HOST_IMPALA"); private static final String PORT_IMPALA = System.getenv("PORT_IMPALA"); private static final String LOGIN = System.getenv("LOGIN"); private static final String PASSWORD = System.getenv("PASSWORD"); URL url = Main.class.getClassLoader().getResource("jaas.conf"); System.setProperty("java.security.auth.login.config", url.toExternalForm()); Configuration conf = new Configuration(); conf.addResource(new Path("file:///" + HADOOP_CONF_DIR + "/core-site.xml")); conf.addResource(new Path("file:///" + HADOOP_CONF_DIR + "/hdfs-site.xml")); UserGroupInformation.setConfiguration(conf); LoginContext lc = kinit(LOGIN, PASSWORD); UserGroupInformation.loginUserFromSubject(lc.getSubject()); String kerberosRealm = UserGroupInformation.getLoginUser().getUserName().split("@")[1]; Class.forName(JDBC_DRIVER_NAME); // Impala Connection impalaConnection = DriverManager.getConnection("jdbc:hive2://" + IMPALA_HOSTNAME + ":" + PORT_IMPALA + "/;principal=impala/" + IMPALA_HOSTNAME + "@" + kerberosRealm);
-
You can now create Impala tables, and insert and select data in tables by running the following lines of code:
String sqlStatementDrop = "DROP TABLE IF EXISTS impalatest"; String sqlStatementCreate = "CREATE TABLE impalatest (message String) STORED AS PARQUET"; Statement stmt =impalaConnection.createStatement(); // To execute the DROP TABLE query. stmt.execute(sqlStatementDrop); // To execute the CREATE query. stmt.execute(sqlStatementCreate);
String sqlStatementInsert = "INSERT INTO helloworld VALUES (\"helloworld\")"; // To execute the INSERT query. stmt.execute(sqlStatementInsert);
String sqlStatementSelect = "SELECT * from helloworld"; // To execute the SELECT query. ResultSet rs = stmt.executeQuery(sqlStatementSelect); // To process results. while(rs.next()) { logger.info(rs.getString(1)); }