just recode for myself 统计大模型SFT的结果与version2中text的结果bad case

本文主要是介绍just recode for myself 统计大模型SFT的结果与version2中text的结果bad case，希望对大家解决编程问题提供一定的参考价值，需要的开发者们随着小编来一起学习吧！

问题描述：

利用Qwen大模型进行SFT (lora)。将生成的结果与version2版本下的中文test.txt进行bad case分析

代码实现：

from tqdm import tqdm, trange
import os
import re
from typing import List
import json
from pdb import set_trace as stoppipeline_data_path = "/public/home/hongy/qtxu/Qwen-main/data/version2/Ele-COQE/test.txt"
llm_generated_path= "/public/home/hongy/qtxu/Qwen-main/results/Ele_lora/pred_20231230_model2.jsonl" # 大模型的生成结果保存路径# dic_en = { -1: 'worse', 0: 'equal', 1: 'better', 2: 'different'}
dic_zh = { -1: '更差', 0: '等同', 1: '更好', 2: '不同'}def str_to_span(input_str):#[3&高 4&端 5&机]--> 高端机if len(input_str) == 0:span_str = ''indexs_str = ''else:if ' , ' in input_str:  # '21&没 22&有 , 25&细 26&致' --> '21&没 22&有 25&细 26&致'input_str = input_str.replace(' , ', ' ')indexs, span = zip(*[i.split('&') for i in input_str.split()])indexs_str = ':'.join(indexs)span_str = ''.join(span)return indexs_str, span_strdef process_line(text_line, label_line, kind, i):text = text_line.split('\t')[0].strip() # text_line:当前行, text：sentencehave_triples = int(text_line.split('\t')[1]) # obtain the label is comparative (1) or no-comparative (0)re_result = re.findall(r'\[\[(.*?)\];\[(.*?)\];\[(.*?)\];\[(.*?)\];\[(.*?)\]\]', label_line)raw_labels: List = [[x for x in y] for y in re_result] #一个样本label 存放在一个list中 if have_triples == 1:test_sent = textfinal_quintuples = ''number = 0for label in raw_labels: # 比较句number += 1sub, obj, asp, op, polarity = label[0], label[1], label[2], label[3], label[4]sub_index, sub_span =  str_to_span(sub)obj_index, obj_span =  str_to_span(obj)asp_index, asp_span =  str_to_span(asp)op_index, op_span =  str_to_span(op)polarity = dic_zh[int(polarity)]quintuple_span= "("+sub_span+","+obj_span +","+asp_span+","+op_span+","+polarity+")"if number >= 2:final_quintuples = quintuple_span + ';' + final_quintupleselse:final_quintuples = quintuple_spanreturn test_sent, final_quintuples    def load_data(path, kind):raw_data = []# with open(os.path.join(args.data_path, f'{mode}_char.txt'), 'r') as f:with open(path, 'r') as f:for line in f:raw_data.append(line)all_test_sents = []all_test_labels = []line_id, i = 0, 0text_line, label_line = '', ''for line_id in trange(len(raw_data), desc='processing data for mode'):cur_line = raw_data[line_id]if len(cur_line.split('\t')) != 2:label_line += '\n' + cur_lineelse:if text_line != '' and label_line != '\n[[];[];[];[];[]]\n':test_sent, test_label = process_line(text_line, label_line, kind, i)all_test_sents.append(test_sent)all_test_labels.append(test_label)i += 1text_line = cur_linelabel_line = ''if label_line != '\n[[];[];[];[];[]]\n':test_sent, test_label = process_line(text_line, label_line, kind, i)all_test_sents.append(test_sent)all_test_labels.append(test_label) return all_test_sents,all_test_labelsdef obtain_llms_predicted_labels(path):with open(llm_generated_path, 'r') as fr:llms_predicted = []for line in fr:cur_line = json.loads(line)cur_sent = cur_line['query'].split('\n\n')[-1][7:-57].strip()compar = cur_line['type'] # 是否是比较句if compar == 1:# fw.write(cur_sent + "\n")result = cur_line['output'].strip().split('\n')final_quintuple= ''for i in range(0, len(result), 2):cur_quintuple = result[i][7:].strip() # 有几个特殊的，不能以逗号分隔cur_quintuple_list = cur_quintuple[1:-1].split(',')sub, obj, asp, op, polarity = cur_quintuple_list[0].strip(), cur_quintuple_list[1].strip(), cur_quintuple_list[2].strip(), cur_quintuple_list[3].strip(), cur_quintuple_list[-1].strip()cur_quintuple = '('+sub +','+obj+','+ asp + ','+ op+','+polarity+')'if i > 1:final_quintuple=  cur_quintuple + ';' + final_quintuple else:final_quintuple= cur_quintuplellms_predicted.append(final_quintuple)  return  llms_predicteddef simple_identify_two_list(gold_label, predicted_label):if gold_label == predicted_label:return Trueelse:return Falsedef bad_case(gold_sentences, gold_labels, predicted_labels, bad_case_path):assert len(gold_labels) == len(gold_sentences) == len(predicted_labels), "gold_labels, gold_sentences, predicted_labels not equal to each other!!!"if not os.path.exists(bad_case_path):with open(bad_case_path, 'x') as file:print(f"文件{bad_case_path} 创建成功！")# stop()with open(bad_case_path, 'w') as fw:bad_case_num = 0for sent, gold_label, predicted_label in zip(gold_sentences, gold_labels, predicted_labels):# stop()if ", " in gold_label:gold_label = gold_label.replace(", ", ",")if ", " in predicted_label:predicted_label = predicted_label.replace(", ", ",")# stop()equal = simple_identify_two_list(gold_label, predicted_label)if not equal:bad_case_num += 1fw.write(sent + '\n')fw.write("gold:"+ gold_label)fw.write("\n")fw.write("predicted:"+predicted_label +"\n")# fw.write("\n")fw.write("bad case num is " + str(bad_case_num))kind = 'en' 
all_test_sents, all_test_labels = load_data(pipeline_data_path, kind) # 从version2的test.txt中获取比较句的句子，labels
llms_predicted = obtain_llms_predicted_labels(llm_generated_path)bad_case_path = "/public/home/hongy/qtxu/Qwen-main/bad_case/ele/bad_case.txt"
bad_case(all_test_sents, all_test_labels, llms_predicted, bad_case_path)

这篇关于just recode for myself 统计大模型SFT的结果与version2中text的结果bad case的文章就介绍到这儿，希望我们推荐的文章对编程师们有所帮助！