本文主要是介绍just recode for myself 统计大模型SFT的结果与version2中text的结果bad case,希望对大家解决编程问题提供一定的参考价值,需要的开发者们随着小编来一起学习吧!
问题描述:
利用Qwen大模型进行SFT (lora)。将生成的结果与version2版本下的中文test.txt进行bad case分析
代码实现:
from tqdm import tqdm, trange
import os
import re
from typing import List
import json
from pdb import set_trace as stoppipeline_data_path = "/public/home/hongy/qtxu/Qwen-main/data/version2/Ele-COQE/test.txt"
llm_generated_path= "/public/home/hongy/qtxu/Qwen-main/results/Ele_lora/pred_20231230_model2.jsonl" # 大模型的生成结果保存路径# dic_en = { -1: 'worse', 0: 'equal', 1: 'better', 2: 'different'}
dic_zh = { -1: '更差', 0: '等同', 1: '更好', 2: '不同'}def str_to_span(input_str):#[3&高 4&端 5&机]--> 高端机if len(input_str) == 0:span_str = ''indexs_str = ''else:if ' , ' in input_str: # '21&没 22&有 , 25&细 26&致' --> '21&没 22&有 25&细 26&致'input_str = input_str.replace(' , ', ' ')indexs, span = zip(*[i.split('&') for i in input_str.split()])indexs_str = ':'.join(indexs)span_str = ''.join(span)return indexs_str, span_strdef process_line(text_line, label_line, kind, i):text = text_line.split('\t')[0].strip() # text_line:当前行, text:sentencehave_triples = int(text_line.split('\t')[1]) # obtain the label is comparative (1) or no-comparative (0)re_result = re.findall(r'\[\[(.*?)\];\[(.*?)\];\[(.*?)\];\[(.*?)\];\[(.*?)\]\]', label_line)raw_labels: List = [[x for x in y] for y in re_result] #一个样本label 存放在一个list中 if have_triples == 1:test_sent = textfinal_quintuples = ''number = 0for label in raw_labels: # 比较句number += 1sub, obj, asp, op, polarity = label[0], label[1], label[2], label[3], label[4]sub_index, sub_span = str_to_span(sub)obj_index, obj_span = str_to_span(obj)asp_index, asp_span = str_to_span(asp)op_index, op_span = str_to_span(op)polarity = dic_zh[int(polarity)]quintuple_span= "("+sub_span+","+obj_span +","+asp_span+","+op_span+","+polarity+")"if number >= 2:final_quintuples = quintuple_span + ';' + final_quintupleselse:final_quintuples = quintuple_spanreturn test_sent, final_quintuples def load_data(path, kind):raw_data = []# with open(os.path.join(args.data_path, f'{mode}_char.txt'), 'r') as f:with open(path, 'r') as f:for line in f:raw_data.append(line)all_test_sents = []all_test_labels = []line_id, i = 0, 0text_line, label_line = '', ''for line_id in trange(len(raw_data), desc='processing data for mode'):cur_line = raw_data[line_id]if len(cur_line.split('\t')) != 2:label_line += '\n' + cur_lineelse:if text_line != '' and label_line != '\n[[];[];[];[];[]]\n':test_sent, test_label = process_line(text_line, label_line, kind, i)all_test_sents.append(test_sent)all_test_labels.append(test_label)i += 1text_line = cur_linelabel_line = ''if label_line != '\n[[];[];[];[];[]]\n':test_sent, test_label = process_line(text_line, label_line, kind, i)all_test_sents.append(test_sent)all_test_labels.append(test_label) return all_test_sents,all_test_labelsdef obtain_llms_predicted_labels(path):with open(llm_generated_path, 'r') as fr:llms_predicted = []for line in fr:cur_line = json.loads(line)cur_sent = cur_line['query'].split('\n\n')[-1][7:-57].strip()compar = cur_line['type'] # 是否是比较句if compar == 1:# fw.write(cur_sent + "\n")result = cur_line['output'].strip().split('\n')final_quintuple= ''for i in range(0, len(result), 2):cur_quintuple = result[i][7:].strip() # 有几个特殊的,不能以逗号分隔cur_quintuple_list = cur_quintuple[1:-1].split(',')sub, obj, asp, op, polarity = cur_quintuple_list[0].strip(), cur_quintuple_list[1].strip(), cur_quintuple_list[2].strip(), cur_quintuple_list[3].strip(), cur_quintuple_list[-1].strip()cur_quintuple = '('+sub +','+obj+','+ asp + ','+ op+','+polarity+')'if i > 1:final_quintuple= cur_quintuple + ';' + final_quintuple else:final_quintuple= cur_quintuplellms_predicted.append(final_quintuple) return llms_predicteddef simple_identify_two_list(gold_label, predicted_label):if gold_label == predicted_label:return Trueelse:return Falsedef bad_case(gold_sentences, gold_labels, predicted_labels, bad_case_path):assert len(gold_labels) == len(gold_sentences) == len(predicted_labels), "gold_labels, gold_sentences, predicted_labels not equal to each other!!!"if not os.path.exists(bad_case_path):with open(bad_case_path, 'x') as file:print(f"文件{bad_case_path} 创建成功!")# stop()with open(bad_case_path, 'w') as fw:bad_case_num = 0for sent, gold_label, predicted_label in zip(gold_sentences, gold_labels, predicted_labels):# stop()if ", " in gold_label:gold_label = gold_label.replace(", ", ",")if ", " in predicted_label:predicted_label = predicted_label.replace(", ", ",")# stop()equal = simple_identify_two_list(gold_label, predicted_label)if not equal:bad_case_num += 1fw.write(sent + '\n')fw.write("gold:"+ gold_label)fw.write("\n")fw.write("predicted:"+predicted_label +"\n")# fw.write("\n")fw.write("bad case num is " + str(bad_case_num))kind = 'en'
all_test_sents, all_test_labels = load_data(pipeline_data_path, kind) # 从version2的test.txt中获取比较句的句子,labels
llms_predicted = obtain_llms_predicted_labels(llm_generated_path)bad_case_path = "/public/home/hongy/qtxu/Qwen-main/bad_case/ele/bad_case.txt"
bad_case(all_test_sents, all_test_labels, llms_predicted, bad_case_path)
这篇关于just recode for myself 统计大模型SFT的结果与version2中text的结果bad case的文章就介绍到这儿,希望我们推荐的文章对编程师们有所帮助!