本文主要是介绍使用canopy生成和k-means聚类对新闻进行聚类,希望对大家解决编程问题提供一定的参考价值,需要的开发者们随着小编来一起学习吧!
<strong><span style="font-size:18px;">/***** @author YangXin* @info 使用canopy生成和k-means聚类对新闻进行聚类*/
package unitNine;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.Text;
import org.apache.lucene.analysis.Analyzer;
import org.apache.mahout.common.HadoopUtil;
import org.apache.mahout.math.VectorWritable;
import org.apache.mahout.vectorizer.DictionaryVectorizer;
import org.apache.mahout.vectorizer.DocumentProcessor;
import org.apache.mahout.vectorizer.tfidf.TFIDFConverter;
public class ReutersToSparseVectors {public static void main(String args[]) throws Exception {int minSupport = 5;int minDf = 5;int maxDFPercent = 95;int maxNGramSize = 1;float minLLRValue = 50;int reduceTasks = 1;int chunkSize &#
这篇关于使用canopy生成和k-means聚类对新闻进行聚类的文章就介绍到这儿,希望我们推荐的文章对编程师们有所帮助!