HadoopMapRduce重写DBOutputFormat更新mysql数据库_MySQL_动视

HadoopMapRduce重写DBOutputFormat更新mysql数据库_MySQL

2020-11-09 19:26:23 责编:小采

在http://blog.csdn.net/sunflower_cao/article/details/28266939 写过可以通过继承 Writable, DBWritable实现在reduce过程中讲结果写入到mysql数据库里边，但是一直有一个问题就是只能实现insert 没法去更新已经存在的数据，这就导致不同的mapreduce程序获得的数据只能插入到不同的数据库中在使用的时候需要建立view或者使用复杂的sql语句去查询，今天调查了下，发现可以通过重写DBOutputFormat

上代码：

TblsWritable.java

import java.io.DataInput;import java.io.DataOutput;import java.io.IOException;import java.sql.PreparedStatement;import java.sql.ResultSet;import java.sql.SQLException;import org.apache.hadoop.io.Writable;import org.apache.hadoop.mapreduce.lib.db.DBWritable;/** * 重写DBWritable * * @author caozw TblsWritable需要向mysql中写入数据 */public class TblsWritable implements Writable, DBWritable {	String initTime;	String new_android_user;	String new_ios_user;	String new_total_user;	String iosUserTotal;	String androidUserTotal;	String userTotal;	public TblsWritable() {	}	public TblsWritable(String initTime, String new_android_user,	String new_ios_user, String new_total_user,	String iosUserTotal, String androidUserTotal, String userTotal) {	this.initTime = initTime;	this.new_android_user = new_android_user;	this.new_ios_user = new_ios_user;	this.new_total_user = new_total_user;	this.iosUserTotal = iosUserTotal;	this.androidUserTotal = androidUserTotal;	this.userTotal = userTotal;	}	@Override	public void write(PreparedStatement statement) throws SQLException {	statement.setString(1, this.new_android_user);	statement.setString(2, this.new_ios_user);	statement.setString(3, this.new_total_user);	statement.setString(4, this.androidUserTotal);	statement.setString(5, this.iosUserTotal);	statement.setString(6, this.userTotal);	statement.setString(7, this.initTime);	}	@Override	public void readFields(ResultSet resultSet) throws SQLException {	this.new_android_user = resultSet.getString(1);	this.new_ios_user = resultSet.getString(2);	this.new_total_user = resultSet.getString(3);	this.androidUserTotal = resultSet.getString(4);	this.iosUserTotal = resultSet.getString(5);	this.userTotal = resultSet.getString(6);	this.initTime = resultSet.getString(7);	}	@Override	public void write(DataOutput out) throws IOException {	out.writeUTF(this.new_android_user);	out.writeUTF(this.new_ios_user);	out.writeUTF(this.new_total_user);	out.writeUTF(this.androidUserTotal);	out.writeUTF(this.iosUserTotal);	out.writeUTF(this.userTotal);	out.writeUTF(this.initTime);	}	@Override	public void readFields(DataInput in) throws IOException {	this.new_android_user = in.readUTF();	this.new_ios_user = in.readUTF();	this.new_total_user = in.readUTF();	this.androidUserTotal = in.readUTF();	this.iosUserTotal = in.readUTF();	this.userTotal = in.readUTF();	this.initTime = in.readUTF();	}	public String toString() {	return new String(this.initTime + " " + this.new_android_user + " "	+ this.new_ios_user + " " + this.new_total_user + " "	+ this.androidUserTotal + " " + this.iosUserTotal + " "	+ this.userTotal);	}}

WriteDataToMysql.java

import java.io.IOException;import java.text.ParseException;import java.text.SimpleDateFormat;import java.util.Date;import java.util.Iterator;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.Path;import org.apache.hadoop.io.IntWritable;import org.apache.hadoop.io.LongWritable;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapreduce.Job;import org.apache.hadoop.mapreduce.Mapper;import org.apache.hadoop.mapreduce.Reducer;import org.apache.hadoop.mapreduce.lib.db.DBConfiguration;import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;/** * 将mapreduce的结果数据写入mysql中 * * @author caozw */public class WriteDataToMysql {	public static String[] fieldNames = { "INITTIME", "NEW_ANDROID_USER",	"NEW_IOS_USER", "NEW_USER_TOTAL", "TOTAL_ANDROID_USER",	"TOTAL_IOS_USER", "TOTAL_USER" };	public static String table = "USER_INFO_STATIC";	public static class ConnMysqlMapper extends	Mapper {	enum Counter {	LINESKIP,	}	private final static IntWritable one = new IntWritable(1);	private final static IntWritable zero = new IntWritable(0);	public void map(LongWritable key, Text value, Context context)	throws IOException, InterruptedException {	try {	String line = value.toString();	String[] strings = line.split("/t");	String initTime = strings[1];	String devType = strings[4];	if (initTime.length() == 19) {	SimpleDateFormat sdf = new SimpleDateFormat(	"yyyy-MM-dd HH:mm:ss");	Date date = sdf.parse(initTime);	if ("1".equals(devType)) {	context.write(new Text(initTime.substring(0, 10)), one);	context.write(new Text(initTime.substring(0, 10)), zero);	}	} else {	// System.err.println(initTime);	context.getCounter(Counter.LINESKIP).increment(1);	}	// } catch (ArrayIndexOutOfBoundsException e) {	} catch (ArrayIndexOutOfBoundsException e) {	context.getCounter(Counter.LINESKIP).increment(1);	return;	} catch (ParseException e) {	context.getCounter(Counter.LINESKIP).increment(1);	return;	}	}	}	public static class ConnMysqlReducer extends	Reducer {	static int iosUserTotal = 0;	static int androidUserTotal = 0;	public void reduce(Text key, Iterable values,	Context context) throws IOException, InterruptedException {	int android = 0;	int ios = 0;	int total = 0;	for (Iterator itr = values.iterator(); itr.hasNext();) {	total++;	if (0 == itr.next().get()) {	android++;	} else {	ios++;	}	}	iosUserTotal += ios;	androidUserTotal += android;	/*	 * System.err.println(key.toString() + ":" + String.valueOf(android)	 * + ":" + String.valueOf(ios) + ":" + String.valueOf(total));	 */	context.write(	new TblsWritable(key.toString(), String.valueOf(android),	String.valueOf(ios), String.valueOf(total), String	.valueOf(androidUserTotal), String	.valueOf(iosUserTotal), String	.valueOf(androidUserTotal + iosUserTotal)),	null);	}	}	public static void main(String args[]) throws IOException,	InterruptedException, ClassNotFoundException {	Configuration conf = new Configuration();	DBConfiguration.configureDB(conf, "com.mysql.jdbc.Driver",	"jdbc:mysql://127.0.0.1:3306/XINGXUNTONG", "hadoop", "123456");	Job job = new Job(conf, "test mysql connection");	job.setJarByClass(WriteDataToMysql.class);	job.setMapperClass(ConnMysqlMapper.class);	job.setReducerClass(ConnMysqlReducer.class);	job.setOutputKeyClass(Text.class);	job.setOutputValueClass(IntWritable.class);	job.setSpeculativeExecution(false);	job.setInputFormatClass(TextInputFormat.class);	job.setOutputFormatClass(MysqlDBOutputFormat.class);	//job.setOutputFormatClass(DBOutputFormat.class);	FileInputFormat.addInputPath(job, new Path(args[0]));	// DBOutputFormat.setOutput(job, "test", "initTime", "new_user_total");	//DBOutputFormat.setOutput(job, table, fieldNames);	MysqlDBOutputFormat.setOutput(job, table, fieldNames);	System.exit(job.waitForCompletion(true) ? 0 : 1);	}}

MysqlDBOutputFormat.java

/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */import java.io.IOException;import java.sql.Connection;import java.sql.PreparedStatement;import java.sql.SQLException;import org.apache.commons.logging.Log;import org.apache.commons.logging.LogFactory;import org.apache.hadoop.classification.InterfaceAudience;import org.apache.hadoop.classification.InterfaceStability;import org.apache.hadoop.mapreduce.Job;import org.apache.hadoop.mapreduce.JobContext;import org.apache.hadoop.mapreduce.OutputCommitter;import org.apache.hadoop.mapreduce.OutputFormat;import org.apache.hadoop.mapreduce.RecordWriter;import org.apache.hadoop.mapreduce.TaskAttemptContext;import org.apache.hadoop.mapreduce.lib.db.DBConfiguration;import org.apache.hadoop.mapreduce.lib.db.DBWritable;import org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter;import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;import org.apache.hadoop.util.StringUtils;/** * A OutputFormat that sends the reduce output to a SQL table. *  * {@link MysqlDBOutputFormat} accepts  pairs, where * key has a type extending DBWritable. Returned {@link RecordWriter} * writes only the key to the database with a batch SQL query. * */@InterfaceAudience.Public@InterfaceStability.Stablepublic class MysqlDBOutputFormat extends OutputFormat { private static final Log LOG = LogFactory.getLog(MysqlDBOutputFormat.class); public void checkOutputSpecs(JobContext context) throws IOException, InterruptedException {} public OutputCommitter getOutputCommitter(TaskAttemptContext context) throws IOException, InterruptedException { return new FileOutputCommitter(FileOutputFormat.getOutputPath(context), context); } /** * A RecordWriter that writes the reduce output to a SQL table */ @InterfaceStability.Evolving public class DBRecordWriter extends RecordWriter { private Connection connection; private PreparedStatement statement; public DBRecordWriter() throws SQLException { } public DBRecordWriter(Connection connection , PreparedStatement statement) throws SQLException { this.connection = connection; this.statement = statement; this.connection.setAutoCommit(false); } public Connection getConnection() { return connection; } public PreparedStatement getStatement() { return statement; } /** {@inheritDoc} */ public void close(TaskAttemptContext context) throws IOException { try { statement.executeBatch(); connection.commit(); } catch (SQLException e) { try { connection.rollback(); } catch (SQLException ex) { LOG.warn(StringUtils.stringifyException(ex)); } throw new IOException(e.getMessage()); } finally { try { statement.close(); connection.close(); } catch (SQLException ex) { throw new IOException(ex.getMessage()); } } } /** {@inheritDoc} */ public void write(K key, V value) throws IOException { try { key.write(statement); statement.addBatch(); } catch (SQLException e) { e.printStackTrace(); } } } /** * Constructs the query used as the prepared statement to insert data. * * @param table * the table to insert into * @param fieldNames * the fields to insert into. If field names are unknown, supply an * array of nulls. */ public String constructQuery(String table, String[] fieldNames) {	if (fieldNames == null) {	throw new IllegalArgumentException(	"Field names may not be null");	}	StringBuilder query = new StringBuilder();	query.append("UPDATE ").append(table);	System.err.println("fieldNames.length:" + fieldNames.length);	if (fieldNames.length > 0) {	query.append(" SET ");	query.append(fieldNames[1] + " = ?");	query.append("," + fieldNames[2] + " = ?");	query.append("," + fieldNames[3] + " = ?");	query.append("," + fieldNames[4] + " = ?");	query.append("," + fieldNames[5] + " = ?");	query.append("," + fieldNames[6] + " = ?");	query.append(" WHERE ");	query.append(fieldNames[0] + " = ?");	System.err.println(query.toString());	return query.toString();	} else {	return null;	} } /** {@inheritDoc} */ public RecordWriter getRecordWriter(TaskAttemptContext context) throws IOException { DBConfiguration dbConf = new DBConfiguration(context.getConfiguration()); String tableName = dbConf.getOutputTableName(); String[] fieldNames = dbConf.getOutputFieldNames(); if(fieldNames == null) { fieldNames = new String[dbConf.getOutputFieldCount()]; } try { Connection connection = dbConf.getConnection(); PreparedStatement statement = null; statement = connection.prepareStatement( constructQuery(tableName, fieldNames)); return new DBRecordWriter(connection, statement); } catch (Exception ex) { throw new IOException(ex.getMessage()); } } /** * Initializes the reduce-part of the job with * the appropriate output settings * * @param job The job * @param tableName The table to insert data into * @param fieldNames The field names in the table. */ public static void setOutput(Job job, String tableName, String... fieldNames) throws IOException { if(fieldNames.length > 0 && fieldNames[0] != null) { DBConfiguration dbConf = setOutput(job, tableName); dbConf.setOutputFieldNames(fieldNames); } else { if (fieldNames.length > 0) { setOutput(job, tableName, fieldNames.length); } else { throw new IllegalArgumentException( "Field names must be greater than 0"); } } } /** * Initializes the reduce-part of the job * with the appropriate output settings * * @param job The job * @param tableName The table to insert data into * @param fieldCount the number of fields in the table. */ public static void setOutput(Job job, String tableName, int fieldCount) throws IOException { DBConfiguration dbConf = setOutput(job, tableName); dbConf.setOutputFieldCount(fieldCount); } private static DBConfiguration setOutput(Job job, String tableName) throws IOException { job.setOutputFormatClass(MysqlDBOutputFormat.class); job.setReduceSpeculativeExecution(false); DBConfiguration dbConf = new DBConfiguration(job.getConfiguration()); dbConf.setOutputTableName(tableName); return dbConf; }}

MysqlDBOutputFormat.java是将hadoop源码里边的DBOutputFormat.java拿过来重写了里边的constructQuery的方法，在生成sql语句的时候产生update的sql语句来实现的

DBOutputFormat.java里边的constructQuery

public String constructQuery(String table, String[] fieldNames) { if(fieldNames == null) { throw new IllegalArgumentException("Field names may not be null"); } StringBuilder query = new StringBuilder(); query.append("INSERT INTO ").append(table); if (fieldNames.length > 0 && fieldNames[0] != null) { query.append(" ("); for (int i = 0; i < fieldNames.length; i++) { query.append(fieldNames[i]); if (i != fieldNames.length - 1) { query.append(","); } } query.append(")"); } query.append(" VALUES ("); for (int i = 0; i < fieldNames.length; i++) { query.append("?"); if(i != fieldNames.length - 1) { query.append(","); } } query.append(");"); return query.toString(); }

重写后：

public String constructQuery(String table, String[] fieldNames) {	if (fieldNames == null) {	throw new IllegalArgumentException(	"Field names may not be null");	}	StringBuilder query = new StringBuilder();	query.append("UPDATE ").append(table);	System.err.println("fieldNames.length:" + fieldNames.length);	if (fieldNames.length > 0) {	query.append(" SET ");	query.append(fieldNames[1] + " = ?");	query.append("," + fieldNames[2] + " = ?");	query.append("," + fieldNames[3] + " = ?");	query.append("," + fieldNames[4] + " = ?");	query.append("," + fieldNames[5] + " = ?");	query.append("," + fieldNames[6] + " = ?");	query.append(" WHERE ");	query.append(fieldNames[0] + " = ?");	System.err.println(query.toString());	return query.toString();	} else {	return null;	} }

按照java的思想我这里其实可以通过extends DBOutputFomat来实现update mysql的功能，但是我试了死活不行，暂且记下有时间再来仔细揣摩

重载方式代码（测试未通过）有知道为什么的还请不吝赐教

import org.apache.hadoop.mapreduce.lib.db.DBOutputFormat;import org.apache.hadoop.mapreduce.lib.db.DBWritable;public class MysqlOutputFormat1 extends DBOutputFormat {	public String constructQuery(String table, String[] fieldNames) {	if (fieldNames == null) {	throw new IllegalArgumentException(	"Field names may not be null");	}	StringBuilder query = new StringBuilder();	query.append("UPDATE ").append(table);	System.err.println("fieldNames.length:"+fieldNames.length);	if (fieldNames.length > 0) {	query.append(" SET ");	query.append(fieldNames[1] + " = ?");	query.append("," + fieldNames[2] + " = ?");	query.append("," + fieldNames[3] + " = ?");	query.append("," + fieldNames[4] + " = ?");	query.append("," + fieldNames[5] + " = ?");	query.append("," + fieldNames[6] + " = ?");	query.append(" WHERE ");	query.append(fieldNames[0] + " = ?");	System.err.println(query.toString());	return query.toString();	} else {	return null;	}	}}

另外可以通过我感觉可以通过sql语句的merge方式生成merge语句从而实现有则更新没有则插入参考网页http://en.wikipedia.org/wiki/Merge_%28SQL%29

--http://en.wikipedia.org/wiki/Merge_%28SQL%29 MERGE INTO tablename USING table_reference ON (condition) WHEN MATCHED THEN UPDATE SET column1 = value1 [, column2 = value2 ...] WHEN NOT MATCHED THEN INSERT (column1 [, column2 ...]) VALUES (value1 [, value2 ...

下载本文

显示全文

全部频道