Mysql中数据:
mysql> select * from lxw_tbls; +---------------------+----------------+ | TBL_NAME | TBL_TYPE | +---------------------+----------------+ | lxw_test_table | EXTERNAL_TABLE | | lxw_t | MANAGED_TABLE | | lxw_t1 | MANAGED_TABLE | | tt | MANAGED_TABLE | | tab_partition | MANAGED_TABLE | | lxw_hbase_table_1 | MANAGED_TABLE | | lxw_hbase_user_info | MANAGED_TABLE | | t | EXTERNAL_TABLE | | lxw_jobid | MANAGED_TABLE | +---------------------+----------------+ 9 rows in set (0.01 sec) mysql> select * from lxw_tbls where TBL_NAME like 'lxw%' order by TBL_NAME; +---------------------+----------------+ | TBL_NAME | TBL_TYPE | +---------------------+----------------+ | lxw_hbase_table_1 | MANAGED_TABLE | | lxw_hbase_user_info | MANAGED_TABLE | | lxw_jobid | MANAGED_TABLE | | lxw_t | MANAGED_TABLE | | lxw_t1 | MANAGED_TABLE | | lxw_test_table | EXTERNAL_TABLE | +---------------------+----------------+ 6 rows in set (0.00 sec)
MapReduce程序代码,ConnMysql.java:
package com.lxw.study; import java.io.DataInput; import java.io.DataOutput; import java.io.IOException; import java.net.URI; import java.sql.PreparedStatement; import java.sql.ResultSet; import java.sql.SQLException; import java.util.Iterator; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.filecache.DistributedCache; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.io.Writable; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.Reducer; import org.apache.hadoop.mapreduce.lib.db.DBConfiguration; import org.apache.hadoop.mapreduce.lib.db.DBInputFormat; import org.apache.hadoop.mapreduce.lib.db.DBWritable; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; public class ConnMysql { private static Configuration conf = new Configuration(); static { conf.addResource(new Path("F:/lxw-hadoop/hdfs-site.xml")); conf.addResource(new Path("F:/lxw-hadoop/mapred-site.xml")); conf.addResource(new Path("F:/lxw-hadoop/core-site.xml")); conf.set("mapred.job.tracker", "10.133.103.21:50021"); } public static class TblsRecord implements Writable, DBWritable { String tbl_name; String tbl_type; public TblsRecord() { } @Override public void write(PreparedStatement statement) throws SQLException { // TODO Auto-generated method stub statement.setString(1, this.tbl_name); statement.setString(2, this.tbl_type); } @Override public void readFields(ResultSet resultSet) throws SQLException { // TODO Auto-generated method stub this.tbl_name = resultSet.getString(1); this.tbl_type = resultSet.getString(2); } @Override public void write(DataOutput out) throws IOException { // TODO Auto-generated method stub Text.writeString(out, this.tbl_name); Text.writeString(out, this.tbl_type); } @Override public void readFields(DataInput in) throws IOException { // TODO Auto-generated method stub this.tbl_name = Text.readString(in); this.tbl_type = Text.readString(in); } public String toString() { return new String(this.tbl_name + " " + this.tbl_type); } } public static class ConnMysqlMapper extends Mapper<LongWritable,TblsRecord,Text,Text> { public void map(LongWritable key,TblsRecord values,Context context) throws IOException,InterruptedException { context.write(new Text(values.tbl_name), new Text(values.tbl_type)); } } public static class ConnMysqlReducer extends Reducer<Text,Text,Text,Text> { public void reduce(Text key,Iterable<Text> values,Context context) throws IOException,InterruptedException { for(Iterator<Text> itr = values.iterator();itr.hasNext();) { context.write(key, itr.next()); } } } public static void main(String[] args) throws Exception { Path output = new Path("/user/lxw/output/"); FileSystem fs = FileSystem.get(URI.create(output.toString()), conf); if (fs.exists(output)) { fs.delete(output); } //mysql的jdbc驱动 DistributedCache.addFileToClassPath(new Path( "hdfs://hd022-test.nh.sdo.com/user/liuxiaowen/mysql-connector-java-5.1.13-bin.jar"), conf); DBConfiguration.configureDB(conf, "com.mysql.jdbc.Driver", "jdbc:mysql://10.133.103.22:3306/hive", "hive", "hive"); Job job = new Job(conf,"test mysql connection"); job.setJarByClass(ConnMysql.class); job.setMapperClass(ConnMysqlMapper.class); job.setReducerClass(ConnMysqlReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setInputFormatClass(DBInputFormat.class); FileOutputFormat.setOutputPath(job, output); //列名 String[] fields = { "TBL_NAME", "TBL_TYPE" }; //六个参数分别为: //1.Job;2.Class<? extends DBWritable> //3.表名;4.where条件 //5.order by语句;6.列名 DBInputFormat.setInput(job, TblsRecord.class, "lxw_tbls", "TBL_NAME like 'lxw%'", "TBL_NAME", fields); System.exit(job.waitForCompletion(true) ? 0 : 1); } }
运行结果:
[lxw@hd025-test ~]$ hadoop fs -cat /user/lxw/output/part-r-00000 lxw_hbase_table_1 MANAGED_TABLE lxw_hbase_user_info MANAGED_TABLE lxw_jobid MANAGED_TABLE lxw_t MANAGED_TABLE lxw_t1 MANAGED_TABLE lxw_test_table EXTERNAL_TABLE
相关推荐
简单的在MapReduce中实现两个表的join连接简单的在MapReduce中实现两个表的join连接简单的在MapReduce中实现两个表的join连接
mapreduce基本数据读取,通俗易懂。 此项目情景为,遗传关系族谱。 (爷爷 、父母 、孩子) 经典案例
【MapReduce篇07】MapReduce之数据清洗ETL1
MapReduce操作实例-数据去重.pdf 学习资料 复习资料 教学资源
大数据分析技术基础PPT课件(共9单元)4-MapReduce 编程.pdf大数据分析技术基础PPT课件(共9单元)4-MapReduce 编程.pdf大数据分析技术基础PPT课件(共9单元)4-MapReduce 编程.pdf大数据分析技术基础PPT课件(共9单元)4-...
hadoop连接数据库查询数据,并添加到hdfs;从hdfs进行mapreduce数据导入到数据库 hadoop连接数据库查询数据,并添加到hdfs;从hdfs进行mapreduce数据导入到数据库 hadoop连接数据库查询数据,并添加到hdfs;从hdfs...
为了更加有效和简洁的处理此类问题,Google 提出了 MapReduce 编程模型,它可以隐藏并行化、容错、数据分布、负载均衡等细节,把这些公共的细节抽象到一个库中,由一个运行时系统来负责。而将对数据的操作抽象为 map...
为MapReduce框架对电话号码的上行流量和下行流量及总流量进行统计的模板数据
介绍mapreduce的开发架构及相关资源
使用hadoop的eclipse插件开发的mapreduce程序,实现对数据的简单统计处理,包括可视化结果
mapreduce mapreduce mapreduce mapreduce mapreduce mapreduce mapreduce mapreduce mapreduce mapreduce mapreduce mapreduce mapreduce mapreduce mapreduce mapreduce mapreduce mapreduce mapreduce mapreduce ...
基于Hadoop MapReduce的短视频主播数据分析项目代码+数据集.rar
基于Hadoop MapReduce的电影点评网站数据分析项目代码+数据集.rar
mapreduce案例测试数据
mapreduce案例测试数据
MapReduce框架下已存在很多相似性连接算法,但仍然存在一些不足,如大量的索引加大时间、空间的开销;现有算法不能有效地完成增量式数据集的相似性连接等。针对海量增量式数据集进行了研究,采用抽样技术得到有效...
基于Hadoop MapReduce的电商网站商品数据分析.rar