Read and Write Tables From Hive With Java/Scala
Without a Kerberized Cluster
-
Install the following Maven dependency:
<dependency> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-client</artifactId> <version>${hadoop.version}</version> </dependency> <dependency> <groupId>org.apache.hive</groupId> <artifactId>hive-jdbc</artifactId> <version>${hive-jdbc.version}</version> </dependency>
-
Initialize the connection to Hive by running the following lines of code:
private static final String HOST_HIVE = System.getenv("HOST_HIVE"); private static final String PORT_HIVE = System.getenv("PORT_HIVE"); private static final String LOGIN = System.getenv("LOGIN"); private static final String PASSWORD = System.getenv("PASSWORD"); private static final String JDBC_DRIVER_NAME = "org.apache.hive.jdbc.HiveDriver"; // To set the JDBC Hive driver. Class.forName(JDBC_DRIVER_NAME); // To connect to Hive. // Choose a user who has write access to /user/hive/warehouse/ // (e.g. hdfs) con = DriverManager.getConnection("jdbc:hive2://" + HOST_HIVE + ":" + PORT_HIVE + "/;ssl=false", LOGIN, PASSWORD); // To initialize the statement. Statement stmt = con.createStatement();
The Hive connection URL format must be jdbc:hive2://<hiveserver>:10000/;ssl=false
, where10000
is the default port. -
You can now create Hive tables, and insert and select data in tables by running the following lines of code:
String sqlStatementDrop = "DROP TABLE IF EXISTS helloworld"; String sqlStatementCreate = "CREATE TABLE helloworld (message String) STORED AS PARQUET"; // To execute the DROP TABLE query. stmt.execute(sqlStatementDrop); // To execute the CREATE query. stmt.execute(sqlStatementCreate);
String sqlStatementInsert = "INSERT INTO helloworld VALUES (\"helloworld\")"; // To execute the INSERT query. stmt.execute(sqlStatementInsert);
String sqlStatementSelect = "SELECT * from helloworld"; // To execute the SELECT query. ResultSet rs = stmt.executeQuery(sqlStatementSelect); // To process results. while(rs.next()) { logger.info(rs.getString(1)); }
With a Kerberized Cluster
-
Install the following Maven dependencies:
<dependency> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-hdfs</artifactId> <version>2.6.0-cdh5.16.1.1</version> </dependency> <dependency> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-common</artifactId> <version>2.6.0-cdh5.16.1.1</version> </dependency> <dependency> <groupId>org.apache.hive</groupId> <artifactId>hive-jdbc</artifactId> <version>1.1.1</version> </dependency>
-
Add the
jaas.conf
file undersrc/main/resources
by running the following lines of code:Main { com.sun.security.auth.module.Krb5LoginModule required client=TRUE; };
-
Initialize your connection by creating a login context function with the following lines of code:
private static final String JDBC_DRIVER_NAME = "org.apache.hive.jdbc.HiveDriver"; private static String HADOOP_CONF_DIR = System.getenv("HADOOP_CONF_DIR"); private static LoginContext kinit(String username, String password) throws LoginException { LoginContext lc = new LoginContext(Main.class.getSimpleName(), callbacks -> { for (Callback c : callbacks) { if (c instanceof NameCallback) ((NameCallback) c).setName(username); if (c instanceof PasswordCallback) ((PasswordCallback) c).setPassword(password.toCharArray()); } }); lc.login(); return lc; }
-
Initialize the connection to Hive by running the following lines of code:
private static final String HOST_HIVE = System.getenv("HOST_HIVE"); private static final String PORT_HIVE = System.getenv("PORT_HIVE"); private static final String LOGIN = System.getenv("LOGIN"); private static final String PASSWORD = System.getenv("PASSWORD"); URL url = Main.class.getClassLoader().getResource("jaas.conf"); System.setProperty("java.security.auth.login.config", url.toExternalForm()); Configuration conf = new Configuration(); conf.addResource(new Path("file:///" + HADOOP_CONF_DIR + "/core-site.xml")); conf.addResource(new Path("file:///" + HADOOP_CONF_DIR + "/hdfs-site.xml")); UserGroupInformation.setConfiguration(conf); LoginContext lc = kinit(LOGIN, PASSWORD); UserGroupInformation.loginUserFromSubject(lc.getSubject()); String kerberosRealm = UserGroupInformation.getLoginUser().getUserName().split("@")[1]; Class.forName(JDBC_DRIVER_NAME); // Hive Connection hiveConnection = DriverManager.getConnection("jdbc:hive2://" + HIVE_HOSTNAME + ":" + PORT_HIVE + "/;principal=hive/" + HIVE_HOSTNAME + "@" + kerberosRealm);
-
You can now create Hive tables, and insert and select data in tables by running the following lines of code:
String sqlStatementDrop = "DROP TABLE IF EXISTS hivetest"; String sqlStatementCreate = "CREATE TABLE hivetest (message String) STORED AS PARQUET"; Statement stmt = hiveConnection.createStatement(); // To execute the DROP TABLE query. stmt.execute(sqlStatementDrop); // To execute the CREATE query. stmt.execute(sqlStatementCreate);
String sqlStatementInsert = "INSERT INTO helloworld VALUES (\"helloworld\")"; // To execute the INSERT query. stmt.execute(sqlStatementInsert);
String sqlStatementSelect = "SELECT * from helloworld"; // To execute the SELECT query. ResultSet rs = stmt.executeQuery(sqlStatementSelect); // To process results. while(rs.next()) { logger.info(rs.getString(1)); }