
gitee 案例代码: https://gitee.com/xionggd/aiot-learning.git
场景: java调用sqoop
工具版本: hadoop3.0.0 + sqoop1.4.7
本来是打算编写远程执行客户端的,,但是考虑到实际需求,需要覆盖hive表,sqoop添加hive相关指令,远程执行的时候数据过去了表自动创建;考虑到后面遇到的更多环境问题,故本次打算把执行器开发完成部署在 hadoop带sqoop环境的地方部署;下面就是核心执行的一下代码:
更多测试样例可以参考gitee上的代码和 下面比较好的博客文章。
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.sqoop.Sqoop;
import org.apache.sqoop.hive.HiveConfig;
import org.apache.sqoop.tool.SqoopTool;
import org.springframework.web.bind.annotation.GetMapping;
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RestController;
import java.io.File;
import java.io.IOException;
import java.util.Date;
public String sqoopExe() {
System.out.println("====== 执行开始 ==========");
try {
// 设置hadoop classpath 环境依赖:根据自己的maven路径调整
StringBuilder curClasspath = new StringBuilder();
curClasspath.append(System.getProperty ("java.class.path"))
.append(File.pathSeparator)
.append("D:\Tools\apache-maven-3.6.3\Repository\org\apache\hadoop\hadoop-common\2.10.0\hadoop-common-2.10.0.jar")
.append(File.pathSeparator)
.append("D:\Tools\apache-maven-3.6.3\Repository\org\apache\sqoop\sqoop\1.4.7\sqoop-1.4.7-hadoop260.jar")
.append(File.pathSeparator)
.append("D:\Tools\apache-maven-3.6.3\Repository\org\apache\hadoop\hadoop-common\2.10.0\hadoop-mapreduce-client-core-2.10.0.jar");
System.setProperty ("java.class.path", curClasspath.toString());
long startTime = new Date().getTime();
String[] argument = new String[]{
"--connect", "jdbc:mysql://192.168.3.103:3306/xgd_test?useUnicode=true&characterEncoding=UTF-8&autoReconnect=true&useSSL=false&zeroDateTimeBehavior=convertToNull&serverTimezone=Asia/Shanghai&allowMultiQueries=true",
"--username", "root",
"--password", "Lyhdata@mysql105",
"--table", "test6_reader100w",
"-m", "1",
"--fields-terminated-by", "\t",
// "--delete-target-dir",
// 指定hdfs存放数据目录
"--target-dir", "/data/hive/warehouse/xgd_test.db/test6_reader100w",
// 指向依赖的各种jar包的位置
// "--hadoop-mapred-home", "D:\Tools\apache-maven-3.6.3\Repository",
// "--hadoop-mapred-home", "/home/maven/Repository",
// hdfs增量追加
"--incremental", "append",
"--check-column","uid", // 增量字段
};
SqoopTool sqoopTool = SqoopTool.getTool("import"); //关系型数据库导hive export
//SqoopTool sqoopTool = SqoopTool.getTool("export"); //hive导关系型数据库
Configuration conf = new Configuration();
conf.set("fs.default.name", "hdfs://192.168.0.166:8020");
// Configuration hive = HiveConfig.getHiveConf(conf);
Sqoop sqoop = new Sqoop((com.cloudera.sqoop.tool.SqoopTool)sqoopTool, SqoopTool.loadPlugins(conf));
int res = Sqoop.runSqoop(sqoop, argument);
System.out.println(res);
System.out.println("====== 执行sqoop结束 ==========");
long endTime = new Date().getTime();
System.out.println("执行时常:" + (endTime - startTime) + "");
return "执行完毕!!!!";
} catch (Exception e) {
// e.printStackTrace();
System.out.println("异常:" + e.getMessage());
return "执行完毕!!!!";
}
}
远程调用hive 需要添加 hive-site.xml 配置文件到项目根目录
参考多篇文章也许对你们也有帮助:
https://blog.csdn.net/qq_39035773/article/details/107961561
https://blog.csdn.net/ashzdw/article/details/108979077
https://blog.csdn.net/ashzdw/article/details/108979077