hive中怎么使用udaf函数求中位数
发表于:2025-12-03 作者:千家信息网编辑
千家信息网最后更新 2025年12月03日,hive中怎么使用udaf函数求中位数,针对这个问题,这篇文章详细介绍了相对应的分析和解答,希望可以帮助更多想解决这个问题的小伙伴找到更简单易行的方法。看下中位数定义:MEDIAN 中位数(一组数据按
千家信息网最后更新 2025年12月03日hive中怎么使用udaf函数求中位数
hive中怎么使用udaf函数求中位数,针对这个问题,这篇文章详细介绍了相对应的分析和解答,希望可以帮助更多想解决这个问题的小伙伴找到更简单易行的方法。
看下中位数定义:
MEDIAN 中位数(一组数据按从小到大的顺序依次排列,处在中间位置的一个数或最中间两个数据的平均数)
写成genericUDAF的形式
1 2 3 4 中位数 2+3/2=2.51 2 3 中位数 2
代码如下
package org.apache.hadoop.hive.ql.udf.generic; import java.util.ArrayList;import java.util.Collections;import java.util.List; import org.apache.commons.logging.Log;import org.apache.commons.logging.LogFactory;import org.apache.hadoop.hive.ql.exec.Description;import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;import org.apache.hadoop.hive.ql.metadata.HiveException;import org.apache.hadoop.hive.ql.parse.SemanticException;import org.apache.hadoop.hive.serde2.io.DoubleWritable;import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector;import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils;import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;import org.apache.hadoop.hive.serde2.objectinspector.StandardListObjectInspector;import org.apache.hadoop.hive.serde2.objectinspector.StructField;import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils;import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;import org.apache.hadoop.io.IntWritable;import org.apache.hadoop.io.LongWritable;import org.apache.hadoop.util.StringUtils; @Description(name="median",value="" + "_FUNC_(x) return the median number of a number array. eg: median(x)")public class GenericUDAFMedian extends AbstractGenericUDAFResolver { static final Log LOG = LogFactory.getLog(GenericUDAFMedian.class.getName()); @Override public GenericUDAFEvaluator getEvaluator(TypeInfo[] parameters) throws SemanticException { if(parameters.length != 1) { throw new UDFArgumentTypeException(parameters.length-1, "Only 1 parameter is accepted!"); } ObjectInspector objectInspector = TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo(parameters[0]); if(!ObjectInspectorUtils.compareSupported(objectInspector)) { throw new UDFArgumentTypeException(parameters.length - 1, "Cannot support comparison of map<> type or complex type containing map<>."); } switch (((PrimitiveTypeInfo)parameters[0]).getPrimitiveCategory()) { case BYTE: case SHORT: case INT: return new GenericUDAFMedianEvaluatorInt(); case LONG: return new GenericUDAFMedianEvaluatorLong(); case FLOAT: case DOUBLE: return new GenericUDAFMedianEvaluatorDouble(); case STRING: case BOOLEAN: default: throw new UDFArgumentTypeException(0, "Only numeric type(int long double) arguments are accepted but " + parameters[0].getTypeName() + " was passed as parameter of index->1."); } } public static class GenericUDAFMedianEvaluatorInt extends GenericUDAFEvaluator { private DoubleWritable result = new DoubleWritable() ; PrimitiveObjectInspector inputOI; StructObjectInspector structOI; StandardListObjectInspector listOI; StructField listField; Object[] partialResult; ListObjectInspector listFieldOI; @Override public ObjectInspector init(Mode m, ObjectInspector[] parameters) throws HiveException { assert (parameters.length == 1); super.init(m, parameters); listOI = ObjectInspectorFactory.getStandardListObjectInspector( PrimitiveObjectInspectorFactory.writableIntObjectInspector); //init input if(m == Mode.PARTIAL1 || m == Mode.COMPLETE) { inputOI = (PrimitiveObjectInspector) parameters[0]; } else { structOI = (StructObjectInspector) parameters[0]; listField = structOI.getStructFieldRef("list"); listFieldOI = (ListObjectInspector) listField.getFieldObjectInspector(); } //init output if(m == Mode.PARTIAL1 || m == Mode.PARTIAL2) { ArrayList foi = new ArrayList(); foi.add(listOI); ArrayList fname = new ArrayList(); fname.add("list"); partialResult = new Object[1]; partialResult[0] = new ArrayList(); return ObjectInspectorFactory.getStandardStructObjectInspector(fname, foi); }else { return PrimitiveObjectInspectorFactory.writableDoubleObjectInspector; } } static class MedianNumberAgg implements AggregationBuffer { List aggIntegerList; } @Override public AggregationBuffer getNewAggregationBuffer() throws HiveException { MedianNumberAgg resultAgg = new MedianNumberAgg(); reset(resultAgg); return resultAgg; } @Override public void reset(AggregationBuffer agg) throws HiveException { MedianNumberAgg medianNumberAgg = (MedianNumberAgg)agg; medianNumberAgg.aggIntegerList = null; medianNumberAgg.aggIntegerList = new ArrayList(); } boolean warned = false; @Override public void iterate(AggregationBuffer agg, Object[] parameters) throws HiveException { assert(parameters.length == 1); if(parameters[0] != null) { MedianNumberAgg medianNumberAgg = (MedianNumberAgg) agg; int val = 0; try { val = PrimitiveObjectInspectorUtils.getInt(parameters[0], (PrimitiveObjectInspector)inputOI); } catch (NullPointerException e) { LOG.warn("got a null value, skip it"); }catch (NumberFormatException e) { if(!warned) { warned = true; LOG.warn(getClass().getSimpleName() + " " + StringUtils.stringifyException(e)); LOG.warn("ignore similar exceptions."); } } medianNumberAgg.aggIntegerList.add(new IntWritable(val)); } } @SuppressWarnings("unchecked") @Override public Object terminate(AggregationBuffer agg) throws HiveException { MedianNumberAgg medianNumberAgg = (MedianNumberAgg) agg; Collections.sort(medianNumberAgg.aggIntegerList); int size = medianNumberAgg.aggIntegerList.size(); if(size == 1) { result.set((double)medianNumberAgg.aggIntegerList.get(0).get()); return result; } double rs = 0.0;// int midIndex = (int) Math.floor(((double) size / 2)); int midIndex = size / 2; if(size%2 == 1) { rs = (double) medianNumberAgg.aggIntegerList.get(midIndex).get(); } else if(size%2 == 0) { rs =( medianNumberAgg.aggIntegerList.get(midIndex - 1).get() + medianNumberAgg.aggIntegerList.get(midIndex).get() ) / 2.0 ; } result.set(rs); return result; } @Override public Object terminatePartial(AggregationBuffer agg) throws HiveException { MedianNumberAgg medianNumberAgg = (MedianNumberAgg) agg; partialResult[0] = new ArrayList(medianNumberAgg.aggIntegerList.size()); ((ArrayList) partialResult[0]).addAll( medianNumberAgg.aggIntegerList); return partialResult; } @Override public void merge(AggregationBuffer agg, Object partial) throws HiveException { MedianNumberAgg medianNumberAgg = (MedianNumberAgg) agg; Object partialObject = structOI.getStructFieldData(partial, listField); ArrayList resultList = (ArrayList) listFieldOI.getList(partialObject); for( IntWritable i : resultList) { medianNumberAgg.aggIntegerList.add(i); } } } public static class GenericUDAFMedianEvaluatorDouble extends GenericUDAFEvaluator { private DoubleWritable result = new DoubleWritable() ; PrimitiveObjectInspector inputOI; StructObjectInspector structOI; StandardListObjectInspector listOI; StructField listField; Object[] partialResult; ListObjectInspector listFieldOI; @Override public ObjectInspector init(Mode m, ObjectInspector[] parameters) throws HiveException { assert (parameters.length == 1); super.init(m, parameters); listOI = ObjectInspectorFactory.getStandardListObjectInspector( PrimitiveObjectInspectorFactory.writableDoubleObjectInspector); //init input if(m == Mode.PARTIAL1 || m == Mode.COMPLETE) { inputOI = (PrimitiveObjectInspector) parameters[0]; } else { structOI = (StructObjectInspector) parameters[0]; listField = structOI.getStructFieldRef("list"); listFieldOI = (ListObjectInspector) listField.getFieldObjectInspector(); } //init output if(m == Mode.PARTIAL1 || m == Mode.PARTIAL2) { ArrayList foi = new ArrayList(); foi.add(listOI); ArrayList fname = new ArrayList(); fname.add("list"); partialResult = new Object[1]; return ObjectInspectorFactory.getStandardStructObjectInspector(fname, foi); }else { return PrimitiveObjectInspectorFactory.writableDoubleObjectInspector; } } static class MedianNumberAgg implements AggregationBuffer { List aggIntegerList; } @Override public AggregationBuffer getNewAggregationBuffer() throws HiveException { MedianNumberAgg resultAgg = new MedianNumberAgg(); reset(resultAgg); return resultAgg; } @Override public void reset(AggregationBuffer agg) throws HiveException { MedianNumberAgg medianNumberAgg = (MedianNumberAgg)agg; medianNumberAgg.aggIntegerList = null; medianNumberAgg.aggIntegerList = new ArrayList(); } boolean warned = false; @Override public void iterate(AggregationBuffer agg, Object[] parameters) throws HiveException { assert(parameters.length == 1); if(parameters[0] != null) { MedianNumberAgg medianNumberAgg = (MedianNumberAgg) agg; doubleval = 0.0; try { val = PrimitiveObjectInspectorUtils.getDouble(parameters[0], (PrimitiveObjectInspector)inputOI); } catch (NullPointerException e) { LOG.warn("got a null value, skip it"); }catch (NumberFormatException e) { if(!warned) { warned = true; LOG.warn(getClass().getSimpleName() + " " + StringUtils.stringifyException(e)); LOG.warn("ignore similar exceptions."); } } medianNumberAgg.aggIntegerList.add(new DoubleWritable(val)); } } @SuppressWarnings("unchecked") @Override public Object terminate(AggregationBuffer agg) throws HiveException { MedianNumberAgg medianNumberAgg = (MedianNumberAgg) agg; Collections.sort(medianNumberAgg.aggIntegerList); int size = medianNumberAgg.aggIntegerList.size(); if(size == 1) { result.set((double)medianNumberAgg.aggIntegerList.get(0).get()); return result; } double rs = 0.0;// int midIndex = (int) Math.floor(((double) size / 2)); int midIndex = size / 2; if(size%2 == 1) { rs = (double) medianNumberAgg.aggIntegerList.get(midIndex).get(); } else if(size%2 == 0) { rs =( medianNumberAgg.aggIntegerList.get(midIndex - 1).get() + medianNumberAgg.aggIntegerList.get(midIndex).get() ) / 2.0 ; } result.set(rs); return result; } @Override public Object terminatePartial(AggregationBuffer agg) throws HiveException { MedianNumberAgg medianNumberAgg = (MedianNumberAgg) agg; partialResult[0] = new ArrayList(medianNumberAgg.aggIntegerList.size()); ((ArrayList) partialResult[0]).addAll(medianNumberAgg.aggIntegerList); return partialResult; } @Override public void merge(AggregationBuffer agg, Object partial) throws HiveException { MedianNumberAgg medianNumberAgg = (MedianNumberAgg) agg; Object partialObject = structOI.getStructFieldData(partial, listField); ArrayList resultList = (ArrayList) listFieldOI.getList(partialObject); for( DoubleWritable i : resultList) { medianNumberAgg.aggIntegerList.add(i); } } } public static class GenericUDAFMedianEvaluatorLong extends GenericUDAFEvaluator { private DoubleWritable result = new DoubleWritable() ; PrimitiveObjectInspector inputOI; StructObjectInspector structOI; StandardListObjectInspector listOI; StructField listField; Object[] partialResult; ListObjectInspector listFieldOI; @Override public ObjectInspector init(Mode m, ObjectInspector[] parameters) throws HiveException { assert (parameters.length == 1); super.init(m, parameters); listOI = ObjectInspectorFactory.getStandardListObjectInspector( PrimitiveObjectInspectorFactory.writableLongObjectInspector); //init input if(m == Mode.PARTIAL1 || m == Mode.COMPLETE) { inputOI = (PrimitiveObjectInspector) parameters[0]; } else { structOI = (StructObjectInspector) parameters[0]; listField = structOI.getStructFieldRef("list"); listFieldOI = (ListObjectInspector) listField.getFieldObjectInspector(); } //init output if(m == Mode.PARTIAL1 || m == Mode.PARTIAL2) { ArrayList foi = new ArrayList(); foi.add(listOI); ArrayList fname = new ArrayList(); fname.add("list"); partialResult = new Object[1]; partialResult[0] = new ArrayList(); return ObjectInspectorFactory.getStandardStructObjectInspector(fname, foi); }else { return PrimitiveObjectInspectorFactory.writableDoubleObjectInspector; } } static class MedianNumberAgg implements AggregationBuffer { List aggIntegerList; } @Override public AggregationBuffer getNewAggregationBuffer() throws HiveException { MedianNumberAgg resultAgg = new MedianNumberAgg(); reset(resultAgg); return resultAgg; } @Override public void reset(AggregationBuffer agg) throws HiveException { MedianNumberAgg medianNumberAgg = (MedianNumberAgg)agg; medianNumberAgg.aggIntegerList = null; medianNumberAgg.aggIntegerList = new ArrayList(); } boolean warned = false; @Override public void iterate(AggregationBuffer agg, Object[] parameters) throws HiveException { assert(parameters.length == 1); if(parameters[0] != null) { MedianNumberAgg medianNumberAgg = (MedianNumberAgg) agg; long val = 0L; try { val = PrimitiveObjectInspectorUtils.getLong(parameters[0], (PrimitiveObjectInspector)inputOI); } catch (NullPointerException e) { LOG.warn("got a null value, skip it"); }catch (NumberFormatException e) { if(!warned) { warned = true; LOG.warn(getClass().getSimpleName() + " " + StringUtils.stringifyException(e)); LOG.warn("ignore similar exceptions."); } } medianNumberAgg.aggIntegerList.add(new LongWritable(val)); } } @SuppressWarnings("unchecked") @Override public Object terminate(AggregationBuffer agg) throws HiveException { MedianNumberAgg medianNumberAgg = (MedianNumberAgg) agg; Collections.sort(medianNumberAgg.aggIntegerList); int size = medianNumberAgg.aggIntegerList.size(); if(size == 1) { result.set((double)medianNumberAgg.aggIntegerList.get(0).get()); return result; } double rs = 0.0;// int midIndex = (int) Math.floor(((double) size / 2)); int midIndex = size / 2; if(size%2 == 1) { rs = (double) medianNumberAgg.aggIntegerList.get(midIndex).get(); } else if(size%2 == 0) { rs =( medianNumberAgg.aggIntegerList.get(midIndex - 1).get() + medianNumberAgg.aggIntegerList.get(midIndex).get() ) / 2.0 ; } result.set(rs); return result; } @Override public Object terminatePartial(AggregationBuffer agg) throws HiveException { MedianNumberAgg medianNumberAgg = (MedianNumberAgg) agg; partialResult[0] = new ArrayList(medianNumberAgg.aggIntegerList.size()); ((ArrayList) partialResult[0]).addAll(medianNumberAgg.aggIntegerList); return partialResult; } @Override public void merge(AggregationBuffer agg, Object partial) throws HiveException { MedianNumberAgg medianNumberAgg = (MedianNumberAgg) agg; Object partialObject = structOI.getStructFieldData(partial, listField); ArrayList resultList = (ArrayList) listFieldOI.getList(partialObject); for( LongWritable i : resultList) { medianNumberAgg.aggIntegerList.add(i); } } } } 测试:
use datawarehouse;add jar /home/hadoop/shengli/median.jar;create temporary function median as 'org.apache.hadoop.hive.ql.udf.generic.GenericUDAFMedian';select median(id) from(select 7 id from dualunion allselect 8 id from dualunion allselect 1 id from dual ) a; select median(id) from(select cast(1 as bigint) id from dualunion all select cast(2 as bigint) id from dual) a select median(id) from(select 1.0 id from dualunion all select 2.3 id from dual) a select median(id) from(select 1 id from dualunion allselect 2 id from dualunion allselect 3 id from dual) a select median(id) from( select null id from dual) a---------------------------------select type,median(id) from(select 'a' type,3 id from dualunion allselect 'a' type,-2 id from dualunion allselect 'a' type,1 id from dualunion allselect 'a' type,4 id from dualunion allselect 'b' type,6 id from dualunion allselect 'b' type,5 id from dualunion allselect 'b' type,4 id from dual) agroup by type
关于hive中怎么使用udaf函数求中位数问题的解答就分享到这里了,希望以上内容可以对大家有一定的帮助,如果你还有很多疑惑没有解开,可以关注行业资讯频道了解更多相关知识。
中位数
问题
函数
数据
更多
帮助
解答
易行
从小到大
简单易行
两个
个数
从小
代码
位置
内容
处在
小伙
小伙伴
平均数
数据库的安全要保护哪些东西
数据库安全各自的含义是什么
生产安全数据库录入
数据库的安全性及管理
数据库安全策略包含哪些
海淀数据库安全审计系统
建立农村房屋安全信息数据库
易用的数据库客户端支持安全管理
连接数据库失败ssl安全错误
数据库的锁怎样保障安全
软件开发款会计分录
加密的数据库怎么打开吗
杭州冠基网络技术有限公司
log4j 日志服务器
数据库的意义是什么
程序员是用什么软件开发的
根据网络安全法规定什么
宁德时代软件开发工资
软件开发的沟通属于哪个部门
以太坊数据库在哪个国家
网络安全导向在哪里
定制网络技术咨询套餐
三级数据库技术z怎么准备
mapgis软件连接数据库
怀旧服能开新服务器吗
网络技术支撑工作报告
书画网书法家石俊海数据库
bsc北京网络安全大会主持人
fm2021数据库怎么切换
数据库限制输出
网络安全linux代码
网络安全信息安全保研夏令营
互联网科技和老人议论文
数据库恢复的实际技术有
软件开发工程师工作任务
电信有线电视服务器连接方式
中外标准数据库网址
网络安全知识网上答题答案
静安区上门软件开发价格实惠
小型服务器搭建步骤