本文主要是介绍预测产品的分词结果、相似度降序排序.,希望对大家解决编程问题提供一定的参考价值,需要的开发者们随着小编来一起学习吧!
CommonCount1.java将预测产品的分词结果和每行都算相似度(分词重合的个数除以预测产品的分词个数)将一行产品的相似度最高值保存为该行的相似度值,然后根据相似度降序排序.
将大于相似度大于0.8的前100条搭配套餐的行号记录下来(输出文件:line_0.8)
将大于相似度大于0.6的前100条搭配套餐的行号记录下来。(输出文件:line_0.6)
将大于相似度大于0.6的前100条搭配套餐的行号记录下来。如果该行最高的相似度达不到0.6,那只取第一条(输出文件:line0.6_100)
将大于相似度大于0.6的前10条搭配套餐的行号记录下来。如果该行最高的相似度达不到0.6,那只取第一条(输出文件:line0.6_10)
将line0.6_10中补上line_0.8中出现不止10条的记录(输出文件:line0.6_10_0.8)
package test;import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.UnsupportedEncodingException;
import java.util.Map;public class CommonCount1 {public static int count(String[] s1,String[] s2){int count1=0;for(int k=0;k<s2.length;k++){for(int j=0;j<s1.length;j++){if(s2[k].equals(s1[j]))count1++;}}return count1;}public static void appendMethod(String fileName, String content) {try {//打开一个写文件器,构造函数中的第二个参数true表示以追加形式写文件FileWriter writer = new FileWriter(fileName, true);writer.write(content);writer.close();} catch (IOException e){e.printStackTrace();}}public static double[] bubbleSort(double[] a,int[] b) { for (int i = 0; i < 21; i++){ for (int j = i + 1; j < a.length; j++){ if(a[i] < a[j]){ double temp;int temp1; temp = a[j]; a[j] = a[i]; a[i] = temp; temp1 = b[j]; b[j] = b[i]; b[i] = temp1; } } } return a; } public static void main(String args[]){int count =0;double temp;double a[]=new double[23105];int b[]=new int[23105];String fileName = "/public/home/dsj/Public/sundujing/fpgrowth/line1.txt";String content;FileInputStream fis;InputStreamReader isr;BufferedReader br = null;try {//fis = new FileInputStream("/public/home/dsj/Public/sundujing/fpgrowth/IdToItem.txt");fis = new FileInputStream("/public/home/dsj/Public/sundujing/fpgrowth/di.txt");isr = new InputStreamReader(fis, "UTF-8");br = new BufferedReader(isr);} catch (FileNotFoundException e) {e.printStackTrace();} catch (UnsupportedEncodingException e) {e.printStackTrace();}String[] strings = new String[1];String str;try {while ((str = br.readLine()) != null){for(int i=0;i<23105;i++){a[i]=0;b[i]=i+1;}count=0;String[] str1 = str.split(" ");
// for(int k=0;k<str1.length;k++)
// {//str1[k]//读Toterms1文件,每行比较,选取相似度最高的100个,记录行号即可FileInputStream fis1;InputStreamReader isr1;BufferedReader br1 = null;try { fis1 = new FileInputStream("/public/home/dsj/Public/sundujing/fpgrowth/ToTerms1.txt");isr1 = new InputStreamReader(fis1, "UTF-8");br1 = new BufferedReader(isr1);} catch (FileNotFoundException e) {e.printStackTrace();} catch (UnsupportedEncodingException e) {e.printStackTrace();}String str2;try {while ((str2 = br1.readLine()) != null){temp=0;a[count]=0;String[] str3 = str2.split(",");//将原先的一行所有分词,换成一个一个产品的分词for(int i1=0;i1<str3.length;i1++){String[] str4=str3[i1].split(" ");temp=(double)count(str1,str4)/str1.length;if(temp>(double)a[count]/str1.length){a[count]=temp;}}count++;}} catch (IOException e) {e.printStackTrace();}//sortbubbleSort(a,b);
// content=b[0-100];for(int j=0;j<20;j++){
// if(a[0]<=0.4)
// {//content=b[0]+" "+b[1]+" "+b[2]+" "+b[3]+" "+b[4]+" "+b[5]+" "+b[6]+" "+b[7]+" "+b[8];content=b[j]+" ";appendMethod(fileName, content);
// break;
// }
// if(a[j]>0.4)
// {
// content=b[j]+" ";
// appendMethod(fileName, content);
// }
// }appendMethod(fileName, "\n");}} catch (IOException e) {e.printStackTrace();}}
}
这篇关于预测产品的分词结果、相似度降序排序.的文章就介绍到这儿,希望我们推荐的文章对编程师们有所帮助!