本文主要是介绍第TR6周:Transformer 实现文本分类 - Embedding版,希望对大家解决编程问题提供一定的参考价值,需要的开发者们随着小编来一起学习吧!
- 本文为🔗365天深度学习训练营 中的学习记录博客
- 原作者:K同学啊
任务:
●在《第TR5周:Transformer实战:文本分类》代码基础上,将嵌入方式改为Embedding嵌入
●理解文中代码逻辑并成功运行
●根据自己的理解对代码进行调优,使验证集准确率达到79%
一、准备工作
- 环境配置
这是一个使用PyTorch实现的简单文本分类实战案例。
import torch,torchvision
print(torch.__version__) #注意是双下划线
print(torchvision.__version__)
代码输出
2.0.0+cpu
0.15.1+cpu
import math,os,PIL,pathlib,warnings
import torch.nn as nn
import numpy as np
from torchvision import transforms, datasetswarnings.filterwarnings("ignore") #忽略警告信息# 设置GPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device
代码输出
device(type='cpu')
- 加载数据
import pandas as pd# 加载自定义中文数据
train_data = pd.read_csv('./TR6/train.csv', sep='\t', header=None)
train_data.head()
代码输出
0 | 1 | |
---|---|---|
0 | 还有双鸭山到淮阴的汽车票吗13号的 | Travel-Query |
1 | 从这里怎么回家 | Travel-Query |
2 | 随便播放一首专辑阁楼里的佛里的歌 | Music-Play |
3 | 给看一下墓王之王嘛 | FilmTele-Play |
4 | 我想看挑战两把s686打突变团竞的游戏视频 | Video-Play |
label_name = list(set(train_data[1].values[:]))
print(label_name)
代码输出
['Radio-Listen', 'TVProgram-Play', 'Video-Play', 'Travel-Query', 'Weather-Query', 'Music-Play', 'HomeAppliance-Control', 'Other', 'Calendar-Query', 'FilmTele-Play', 'Alarm-Update', 'Audio-Play']
二、数据预处理
- 构建词典
需要另外安装jieba分词库
from torchtext.data.utils import get_tokenizer
from torchtext.vocab import build_vocab_from_iterator
import jieba# 中文分词方法
tokenizer = jieba.lcutdef yield_tokens(data_iter):for text in data_iter:yield tokenizer(text)vocab = build_vocab_from_iterator(yield_tokens(train_data[0].values[:]), specials=["<unk>"])
vocab.set_default_index(vocab["<unk>"]) # 设置默认索引,如果找不到单词,则会选择默认索引
代码输出
Building prefix dict from the default dictionary ...
Loading model from cache C:\Users\xzy\AppData\Local\Temp\jieba.cache
Loading model cost 0.934 seconds.
Prefix dict has been built successfully.
text_pipeline = lambda x: torch.tensor(vocab(tokenizer(x)))
label_pipeline = lambda x: torch.tensor(label_name.index(x))print(text_pipeline('我想看和平精英上战神必备技巧的游戏视频'))
print(label_pipeline('Video-Play'))
代码输出
tensor([ 2, 10, 13, 973, 1079, 146, 7724, 7574, 7793, 1, 186, 28])
tensor(2)
- 进行one-hot编码
from functools import partialX = [text_pipeline(i) for i in train_data[0].values[:]]
y = [label_pipeline(i) for i in train_data[1].values[:]]# 对便签 y 进行 one-hot 编码
numbers_array = np.array(y) # 转换为 NumPy 数组
num_classes = np.max(numbers_array)+1 # 计算类别数量
y = np.eye(num_classes)[numbers_array] # 进行 one-hot 编码
- 自定义数据集类
from torch.utils.data import DataLoader, Datasetclass TextDataset(Dataset):def __init__(self, texts, labels):self.texts = textsself.labels = labelsdef __len__(self):return len(self.labels)def __getitem__(self, idx):return self.texts[idx], self.labels[idx]
- 定义填充函数
import torch.nn.functional as Fmax_len = max(len(i) for i in X)def collate_batch(batch, max_len):texts, labels = zip(*batch)padded_texts = [F.pad(text, (0, max_len - len(text)), value=0) for text in texts]padded_texts = torch.stack(padded_texts)labels = torch.tensor(labels, dtype=torch.float)#.unsqueeze(1)return padded_texts.to(device), labels.to(device)# 使用 partial 函数创建 collate_fn,传入参数
collate_fn = partial(collate_batch, max_len=max_len)
max_len
代码输出
30
- 构建数据集
from torch.utils.data.dataset import random_split# 示例词典大小和嵌入维度
vocab_size = 10
embed_dim = 6# 创建数据集和数据加载器
dataset = TextDataset(X, y)train_dataset, valid_dataset = random_split(dataset,[int(len(dataset)*0.8),int(len(dataset)*0.2)])train_dataloader = DataLoader(train_dataset, batch_size=128,shuffle=True, collate_fn=collate_fn)
valid_dataloader = DataLoader(valid_dataset, batch_size=128,shuffle=True, collate_fn=collate_fn)
三、模型构建
- 定义位置编码函数
class PositionalEncoding(nn.Module):def __init__(self, embed_dim, max_len=100):super(PositionalEncoding, self).__init__()# 创建一个大小为 [max_len, embed_dim] 的零张量pe = torch.zeros(max_len, embed_dim) # 创建一个形状为 [max_len, 1] 的位置索引张量position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1) div_term = torch.exp(torch.arange(0, embed_dim, 2).float() * (-math.log(100.0) / embed_dim))pe[:, 0::2] = torch.sin(position * div_term) # 计算 PE(pos, 2i)pe[:, 1::2] = torch.cos(position * div_term) # 计算 PE(pos, 2i+1)pe = pe.unsqueeze(0)# 将位置编码张量注册为模型的缓冲区,参数不参与梯度下降,保存model的时候会将其保存下来self.register_buffer('pe', pe)def forward(self, x):# x 的形状为[batch_size, seq_len, embedding_dim]# 将位置编码添加到输入张量中,注意位置编码的形状# print(x.shape, self.pe.shape, self.pe[:,:x.size(1), :].shape)x = x + self.pe[:,:x.size(1)]# print(x.shape)return x
- 定义Transformer模型
from tempfile import TemporaryDirectory
from typing import Tuple
from torch import Tensor
from torch.nn import TransformerEncoder, TransformerEncoderLayer
from torch.utils.data import datasetclass TransformerModel(nn.Module):def __init__(self, vocab_size, embed_dim, max_len, num_class, nhead=8, d_hid=256, nlayers=12, dropout=0.1):super().__init__()self.embedding = nn.Embedding(vocab_size, # 词典大小embed_dim) # 嵌入的维度self.pos_encoder = PositionalEncoding(embed_dim)# 定义编码器层encoder_layers = TransformerEncoderLayer(embed_dim, nhead, d_hid, dropout)self.transformer_encoder = TransformerEncoder(encoder_layers, nlayers)self.embed_dim = embed_dimself.linear = nn.Linear(embed_dim*max_len, num_class)def forward(self, src, src_mask=None):src = self.embedding(src)src = self.pos_encoder(src)output = self.transformer_encoder(src, src_mask)output = output.view(output.size(0), -1)output = self.linear(output)return output
- 定义训练函数
# 训练循环
def train(dataloader, model, loss_fn, optimizer):size = len(dataloader.dataset) # 训练集的大小num_batches = len(dataloader) # 批次数目, (size/batch_size,向上取整)train_loss, train_acc = 0, 0 # 初始化训练损失和正确率for X, y in dataloader: # 获取图片及其标签X, y = X.to(device), y.to(device)# 计算预测误差pred = model(X) # 网络输出loss = loss_fn(pred, y) # 计算网络输出和真实值之间的差距,targets为真实值,计算二者差值即为损失# 反向传播optimizer.zero_grad() # grad属性归零loss.backward() # 反向传播optimizer.step() # 每一步自动更新# 记录acc与losstrain_acc += (pred.argmax(1) == y.argmax(1)).type(torch.float).sum().item()train_loss += loss.item()train_acc /= sizetrain_loss /= num_batchesreturn train_acc, train_loss
- 定义测试函数
def test(dataloader, model, loss_fn):size = len(dataloader.dataset) # 测试集的大小num_batches = len(dataloader) # 批次数目, (size/batch_size,向上取整)test_loss, test_acc = 0, 0# 当不进行训练时,停止梯度更新,节省计算内存消耗with torch.no_grad():for imgs, target in dataloader:imgs, target = imgs.to(device), target.to(device)# 计算losstarget_pred = model(imgs)loss = loss_fn(target_pred, target)test_loss += loss.item()test_acc += (target_pred.argmax(1) == target.argmax(1)).type(torch.float).sum().item()test_acc /= sizetest_loss /= num_batchesreturn test_acc, test_loss
四、训练模型
vocab_size = len(vocab) # 词汇表的大小
embed_dim = 64 # 嵌入维度
num_class = len(label_name)# 创建 Transformer 模型,并将其移动到设备上
model = TransformerModel(vocab_size, embed_dim, max_len,num_class).to(device)
- 模型训练
import torch.optim as optim# 超参数
EPOCHS = 50 criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=1e-2)for epoch in range(1, EPOCHS + 1):model.train() # 切换为训练模式train_acc, train_loss = train(train_dataloader, model, criterion, optimizer)model.eval() # 切换为测试模式test_acc, test_loss = test(valid_dataloader, model, criterion)# 获取当前的学习率lr = optimizer.state_dict()['param_groups'][0]['lr']print('| epoch {:1d} | train_acc {:4.4f} train_loss {:4.4f} | lr {:4.4f}'.format(epoch,train_acc,train_loss,lr))print('| epoch {:1d} | test_acc {:4.4f} test_loss {:4.4f} | lr {:4.4f}'.format(epoch,test_acc,test_loss,lr))print('-' * 69)
代码输出
| epoch 1 | train_acc 0.1150 train_loss 2.6222 | lr 0.0100
| epoch 1 | test_acc 0.1128 test_loss 2.6013 | lr 0.0100
---------------------------------------------------------------------
| epoch 2 | train_acc 0.1405 train_loss 2.5153 | lr 0.0100
| epoch 2 | test_acc 0.1860 test_loss 2.3967 | lr 0.0100
---------------------------------------------------------------------
| epoch 3 | train_acc 0.1987 train_loss 2.2949 | lr 0.0100
| epoch 3 | test_acc 0.1938 test_loss 2.2864 | lr 0.0100
---------------------------------------------------------------------
| epoch 4 | train_acc 0.2620 train_loss 2.1522 | lr 0.0100
| epoch 4 | test_acc 0.3000 test_loss 2.0298 | lr 0.0100
---------------------------------------------------------------------
| epoch 5 | train_acc 0.3127 train_loss 2.0171 | lr 0.0100
| epoch 5 | test_acc 0.3149 test_loss 1.9701 | lr 0.0100
---------------------------------------------------------------------
| epoch 6 | train_acc 0.3714 train_loss 1.8808 | lr 0.0100
| epoch 6 | test_acc 0.3508 test_loss 1.9691 | lr 0.0100
---------------------------------------------------------------------
| epoch 7 | train_acc 0.4099 train_loss 1.7816 | lr 0.0100
| epoch 7 | test_acc 0.4322 test_loss 1.7314 | lr 0.0100
---------------------------------------------------------------------
| epoch 8 | train_acc 0.4507 train_loss 1.6637 | lr 0.0100
| epoch 8 | test_acc 0.4678 test_loss 1.6182 | lr 0.0100
---------------------------------------------------------------------
| epoch 9 | train_acc 0.4877 train_loss 1.5725 | lr 0.0100
| epoch 9 | test_acc 0.5041 test_loss 1.5307 | lr 0.0100
---------------------------------------------------------------------
| epoch 10 | train_acc 0.5080 train_loss 1.4986 | lr 0.0100
| epoch 10 | test_acc 0.4579 test_loss 1.6603 | lr 0.0100
---------------------------------------------------------------------
| epoch 11 | train_acc 0.5290 train_loss 1.4443 | lr 0.0100
| epoch 11 | test_acc 0.5182 test_loss 1.4684 | lr 0.0100
---------------------------------------------------------------------
| epoch 12 | train_acc 0.5518 train_loss 1.3683 | lr 0.0100
| epoch 12 | test_acc 0.5145 test_loss 1.4765 | lr 0.0100
---------------------------------------------------------------------
| epoch 13 | train_acc 0.5759 train_loss 1.3115 | lr 0.0100
| epoch 13 | test_acc 0.5512 test_loss 1.3966 | lr 0.0100
---------------------------------------------------------------------
| epoch 14 | train_acc 0.5948 train_loss 1.2573 | lr 0.0100
| epoch 14 | test_acc 0.5153 test_loss 1.4676 | lr 0.0100
---------------------------------------------------------------------
| epoch 15 | train_acc 0.6159 train_loss 1.1923 | lr 0.0100
| epoch 15 | test_acc 0.6103 test_loss 1.2591 | lr 0.0100
---------------------------------------------------------------------
| epoch 16 | train_acc 0.6320 train_loss 1.1504 | lr 0.0100
| epoch 16 | test_acc 0.5942 test_loss 1.2556 | lr 0.0100
---------------------------------------------------------------------
| epoch 17 | train_acc 0.6514 train_loss 1.1079 | lr 0.0100
| epoch 17 | test_acc 0.5876 test_loss 1.2930 | lr 0.0100
---------------------------------------------------------------------
| epoch 18 | train_acc 0.6612 train_loss 1.0774 | lr 0.0100
| epoch 18 | test_acc 0.6202 test_loss 1.1998 | lr 0.0100
---------------------------------------------------------------------
| epoch 19 | train_acc 0.6725 train_loss 1.0297 | lr 0.0100
| epoch 19 | test_acc 0.6492 test_loss 1.1037 | lr 0.0100
---------------------------------------------------------------------
| epoch 20 | train_acc 0.6890 train_loss 0.9942 | lr 0.0100
| epoch 20 | test_acc 0.6727 test_loss 1.0470 | lr 0.0100
---------------------------------------------------------------------
| epoch 21 | train_acc 0.6955 train_loss 0.9585 | lr 0.0100
| epoch 21 | test_acc 0.6620 test_loss 1.0686 | lr 0.0100
---------------------------------------------------------------------
| epoch 22 | train_acc 0.7079 train_loss 0.9375 | lr 0.0100
| epoch 22 | test_acc 0.6835 test_loss 1.0248 | lr 0.0100
---------------------------------------------------------------------
| epoch 23 | train_acc 0.7182 train_loss 0.8986 | lr 0.0100
| epoch 23 | test_acc 0.6645 test_loss 1.0549 | lr 0.0100
---------------------------------------------------------------------
| epoch 24 | train_acc 0.7319 train_loss 0.8636 | lr 0.0100
| epoch 24 | test_acc 0.7079 test_loss 0.9836 | lr 0.0100
---------------------------------------------------------------------
| epoch 25 | train_acc 0.7354 train_loss 0.8479 | lr 0.0100
| epoch 25 | test_acc 0.6818 test_loss 1.0112 | lr 0.0100
---------------------------------------------------------------------
| epoch 26 | train_acc 0.7474 train_loss 0.8160 | lr 0.0100
| epoch 26 | test_acc 0.7033 test_loss 0.9536 | lr 0.0100
---------------------------------------------------------------------
| epoch 27 | train_acc 0.7545 train_loss 0.7959 | lr 0.0100
| epoch 27 | test_acc 0.7256 test_loss 0.9125 | lr 0.0100
---------------------------------------------------------------------
| epoch 28 | train_acc 0.7575 train_loss 0.7768 | lr 0.0100
| epoch 28 | test_acc 0.7273 test_loss 0.9175 | lr 0.0100
---------------------------------------------------------------------
| epoch 29 | train_acc 0.7687 train_loss 0.7512 | lr 0.0100
| epoch 29 | test_acc 0.7260 test_loss 0.8718 | lr 0.0100
---------------------------------------------------------------------
| epoch 30 | train_acc 0.7731 train_loss 0.7341 | lr 0.0100
| epoch 30 | test_acc 0.7343 test_loss 0.8861 | lr 0.0100
---------------------------------------------------------------------
| epoch 31 | train_acc 0.7822 train_loss 0.7141 | lr 0.0100
| epoch 31 | test_acc 0.7492 test_loss 0.8450 | lr 0.0100
---------------------------------------------------------------------
| epoch 32 | train_acc 0.7870 train_loss 0.6887 | lr 0.0100
| epoch 32 | test_acc 0.7517 test_loss 0.8145 | lr 0.0100
---------------------------------------------------------------------
| epoch 33 | train_acc 0.7909 train_loss 0.6792 | lr 0.0100
| epoch 33 | test_acc 0.7450 test_loss 0.8440 | lr 0.0100
---------------------------------------------------------------------
| epoch 34 | train_acc 0.7959 train_loss 0.6637 | lr 0.0100
| epoch 34 | test_acc 0.7607 test_loss 0.8006 | lr 0.0100
---------------------------------------------------------------------
| epoch 35 | train_acc 0.8019 train_loss 0.6419 | lr 0.0100
| epoch 35 | test_acc 0.7612 test_loss 0.7860 | lr 0.0100
---------------------------------------------------------------------
| epoch 36 | train_acc 0.8079 train_loss 0.6362 | lr 0.0100
| epoch 36 | test_acc 0.7603 test_loss 0.8057 | lr 0.0100
---------------------------------------------------------------------
| epoch 37 | train_acc 0.8098 train_loss 0.6221 | lr 0.0100
| epoch 37 | test_acc 0.7653 test_loss 0.7906 | lr 0.0100
---------------------------------------------------------------------
| epoch 38 | train_acc 0.8177 train_loss 0.6031 | lr 0.0100
| epoch 38 | test_acc 0.7616 test_loss 0.7955 | lr 0.0100
---------------------------------------------------------------------
| epoch 39 | train_acc 0.8175 train_loss 0.5919 | lr 0.0100
| epoch 39 | test_acc 0.7694 test_loss 0.7917 | lr 0.0100
---------------------------------------------------------------------
| epoch 40 | train_acc 0.8215 train_loss 0.5807 | lr 0.0100
| epoch 40 | test_acc 0.7719 test_loss 0.7851 | lr 0.0100
---------------------------------------------------------------------
| epoch 41 | train_acc 0.8254 train_loss 0.5681 | lr 0.0100
| epoch 41 | test_acc 0.7835 test_loss 0.7447 | lr 0.0100
---------------------------------------------------------------------
| epoch 42 | train_acc 0.8294 train_loss 0.5581 | lr 0.0100
| epoch 42 | test_acc 0.7872 test_loss 0.7360 | lr 0.0100
---------------------------------------------------------------------
| epoch 43 | train_acc 0.8365 train_loss 0.5316 | lr 0.0100
| epoch 43 | test_acc 0.7818 test_loss 0.7307 | lr 0.0100
---------------------------------------------------------------------
| epoch 44 | train_acc 0.8353 train_loss 0.5370 | lr 0.0100
| epoch 44 | test_acc 0.7893 test_loss 0.7321 | lr 0.0100
---------------------------------------------------------------------
| epoch 45 | train_acc 0.8385 train_loss 0.5235 | lr 0.0100
| epoch 45 | test_acc 0.7872 test_loss 0.7364 | lr 0.0100
---------------------------------------------------------------------
| epoch 46 | train_acc 0.8419 train_loss 0.5157 | lr 0.0100
| epoch 46 | test_acc 0.7595 test_loss 0.7891 | lr 0.0100
---------------------------------------------------------------------
| epoch 47 | train_acc 0.8457 train_loss 0.5024 | lr 0.0100
| epoch 47 | test_acc 0.7897 test_loss 0.7117 | lr 0.0100
---------------------------------------------------------------------
| epoch 48 | train_acc 0.8458 train_loss 0.4942 | lr 0.0100
| epoch 48 | test_acc 0.7901 test_loss 0.7154 | lr 0.0100
---------------------------------------------------------------------
| epoch 49 | train_acc 0.8523 train_loss 0.4861 | lr 0.0100
| epoch 49 | test_acc 0.7860 test_loss 0.7141 | lr 0.0100
---------------------------------------------------------------------
| epoch 50 | train_acc 0.8543 train_loss 0.4706 | lr 0.0100
| epoch 50 | test_acc 0.8008 test_loss 0.6949 | lr 0.0100
---------------------------------------------------------------------
- 模型评估
model.eval() # 切换为测试模式
test_acc, test_loss = test(valid_dataloader, model, criterion)
print('模型准确率为:{:5.4f}'.format(test_acc))
代码输出
模型准确率为:0.7926
五、对代码进行调优,提高验证集准确率
要提高验证集的准确率,可以尝试以下几种方法:
- 使用学习率调度器
固定的学习率在训练初期效果很好,但在训练后期,较小的学习率可能更有利于模型的收敛。可以使用学习率调度器自动调整学习率。
from torch.optim.lr_scheduler import StepLR# 初始化优化器和学习率调度器
optimizer = torch.optim.SGD(model.parameters(), lr=1e-2)
scheduler = StepLR(optimizer, step_size=10, gamma=0.1)for epoch in range(1, EPOCHS + 1):model.train()train_acc, train_loss = train(train_dataloader, model, criterion, optimizer)model.eval()test_acc, test_loss = test(valid_dataloader, model, criterion)# 更新学习率scheduler.step()print('| epoch {:1d} | train_acc {:4.4f} train_loss {:4.4f} | lr {:4.4f}'.format(epoch,train_acc,train_loss,scheduler.get_last_lr()[0]))print('| epoch {:1d} | test_acc {:4.4f} test_loss {:4.4f} | lr {:4.4f}'.format(epoch,test_acc,test_loss,scheduler.get_last_lr()[0]))print('-' * 69)
- 引入正则化
Dropout: 在模型中添加 dropout 层,可以防止模型过度拟合训练集数据。Dropout 随机丢弃一部分神经元,使得模型在每次前向传播时都是不同的,有助于提升模型的泛化能力。
权重衰减: 在优化器中引入权重衰减(L2 正则化),可以限制模型参数的大小,从而减少过拟合。
optimizer = torch.optim.SGD(model.parameters(), lr=1e-2, weight_decay=1e-4)
-
使用更复杂的模型架构
如果当前的模型表现已经接近瓶颈,可以尝试使用更复杂的模型架构,比如更多的层数、更大的卷积核、更深的网络等。
但需要注意,复杂模型容易过拟合,因此需要配合正则化和数据增强策略。 -
交叉验证
使用交叉验证方法,将数据集划分为多个部分,在不同的训练集和验证集组合上训练模型,从而获得更稳健的验证集性能。 -
提前停止(Early Stopping)
在训练过程中监控验证集的损失值或准确率,如果在若干个 epoch 内验证集表现没有提升,则停止训练。这样可以防止模型过拟合。
early_stopping_patience = 5
best_test_acc = 0
epochs_no_improve = 0for epoch in range(1, EPOCHS + 1):model.train()train_acc, train_loss = train(train_dataloader, model, criterion, optimizer)model.eval()test_acc, test_loss = test(valid_dataloader, model, criterion)if test_acc > best_test_acc:best_test_acc = test_accepochs_no_improve = 0else:epochs_no_improve += 1if epochs_no_improve >= early_stopping_patience:print("Early stopping!")breakprint(f'| epoch {epoch:1d} | train_acc {train_acc:4.4f} train_loss {train_loss:4.4f}')print(f'| epoch {epoch:1d} | test_acc {test_acc:4.4f} test_loss {test_loss:4.4f}')print('-' * 69)
- 增加训练轮数
增加训练轮数 (epochs) 可以让模型有更多时间去学习和调整参数。训练更多轮次,但要注意过拟合风险。
这里仅是增加训练轮数,将EPOCHS从50调整到100
# 超参数
EPOCHS = 100
代码的其他地方不变,训练结果如下所示:
| epoch 1 | train_acc 0.1100 train_loss 2.6860 | lr 0.0100
| epoch 1 | test_acc 0.1025 test_loss 2.5499 | lr 0.0100
---------------------------------------------------------------------
| epoch 2 | train_acc 0.1191 train_loss 2.5679 | lr 0.0100
| epoch 2 | test_acc 0.1079 test_loss 2.5401 | lr 0.0100
---------------------------------------------------------------------
| epoch 3 | train_acc 0.1339 train_loss 2.4612 | lr 0.0100
| epoch 3 | test_acc 0.1727 test_loss 2.5991 | lr 0.0100
---------------------------------------------------------------------
| epoch 4 | train_acc 0.1904 train_loss 2.3322 | lr 0.0100
| epoch 4 | test_acc 0.1587 test_loss 2.2755 | lr 0.0100
---------------------------------------------------------------------
| epoch 5 | train_acc 0.2350 train_loss 2.2031 | lr 0.0100
| epoch 5 | test_acc 0.2653 test_loss 2.1075 | lr 0.0100
---------------------------------------------------------------------
| epoch 6 | train_acc 0.3068 train_loss 2.0392 | lr 0.0100
| epoch 6 | test_acc 0.2955 test_loss 2.0239 | lr 0.0100
---------------------------------------------------------------------
| epoch 7 | train_acc 0.3441 train_loss 1.9375 | lr 0.0100
| epoch 7 | test_acc 0.3603 test_loss 1.9546 | lr 0.0100
---------------------------------------------------------------------
| epoch 8 | train_acc 0.3837 train_loss 1.8525 | lr 0.0100
| epoch 8 | test_acc 0.3921 test_loss 1.8370 | lr 0.0100
---------------------------------------------------------------------
| epoch 9 | train_acc 0.4190 train_loss 1.7458 | lr 0.0100
| epoch 9 | test_acc 0.3992 test_loss 1.7703 | lr 0.0100
---------------------------------------------------------------------
| epoch 10 | train_acc 0.4540 train_loss 1.6658 | lr 0.0100
| epoch 10 | test_acc 0.4153 test_loss 1.7673 | lr 0.0100
---------------------------------------------------------------------
| epoch 11 | train_acc 0.4804 train_loss 1.5843 | lr 0.0100
| epoch 11 | test_acc 0.3950 test_loss 1.8209 | lr 0.0100
---------------------------------------------------------------------
| epoch 12 | train_acc 0.5040 train_loss 1.5307 | lr 0.0100
| epoch 12 | test_acc 0.4579 test_loss 1.6927 | lr 0.0100
---------------------------------------------------------------------
| epoch 13 | train_acc 0.5271 train_loss 1.4553 | lr 0.0100
| epoch 13 | test_acc 0.4942 test_loss 1.5702 | lr 0.0100
---------------------------------------------------------------------
| epoch 14 | train_acc 0.5620 train_loss 1.3798 | lr 0.0100
| epoch 14 | test_acc 0.5368 test_loss 1.4292 | lr 0.0100
---------------------------------------------------------------------
| epoch 15 | train_acc 0.5806 train_loss 1.3181 | lr 0.0100
| epoch 15 | test_acc 0.5607 test_loss 1.3745 | lr 0.0100
---------------------------------------------------------------------
| epoch 16 | train_acc 0.5943 train_loss 1.2770 | lr 0.0100
| epoch 16 | test_acc 0.5413 test_loss 1.4253 | lr 0.0100
---------------------------------------------------------------------
| epoch 17 | train_acc 0.6144 train_loss 1.2165 | lr 0.0100
| epoch 17 | test_acc 0.5963 test_loss 1.2720 | lr 0.0100
---------------------------------------------------------------------
| epoch 18 | train_acc 0.6276 train_loss 1.1874 | lr 0.0100
| epoch 18 | test_acc 0.5727 test_loss 1.3354 | lr 0.0100
---------------------------------------------------------------------
| epoch 19 | train_acc 0.6441 train_loss 1.1350 | lr 0.0100
| epoch 19 | test_acc 0.5938 test_loss 1.2605 | lr 0.0100
---------------------------------------------------------------------
| epoch 20 | train_acc 0.6520 train_loss 1.0976 | lr 0.0100
| epoch 20 | test_acc 0.6302 test_loss 1.1466 | lr 0.0100
---------------------------------------------------------------------
| epoch 21 | train_acc 0.6690 train_loss 1.0629 | lr 0.0100
| epoch 21 | test_acc 0.6174 test_loss 1.1901 | lr 0.0100
---------------------------------------------------------------------
| epoch 22 | train_acc 0.6833 train_loss 1.0311 | lr 0.0100
| epoch 22 | test_acc 0.6306 test_loss 1.1429 | lr 0.0100
---------------------------------------------------------------------
| epoch 23 | train_acc 0.6856 train_loss 0.9998 | lr 0.0100
| epoch 23 | test_acc 0.6508 test_loss 1.0855 | lr 0.0100
---------------------------------------------------------------------
| epoch 24 | train_acc 0.7022 train_loss 0.9647 | lr 0.0100
| epoch 24 | test_acc 0.6851 test_loss 1.0307 | lr 0.0100
---------------------------------------------------------------------
| epoch 25 | train_acc 0.7095 train_loss 0.9341 | lr 0.0100
| epoch 25 | test_acc 0.6847 test_loss 1.0116 | lr 0.0100
---------------------------------------------------------------------
| epoch 26 | train_acc 0.7164 train_loss 0.9098 | lr 0.0100
| epoch 26 | test_acc 0.6893 test_loss 1.0023 | lr 0.0100
---------------------------------------------------------------------
| epoch 27 | train_acc 0.7263 train_loss 0.8844 | lr 0.0100
| epoch 27 | test_acc 0.6909 test_loss 0.9758 | lr 0.0100
---------------------------------------------------------------------
| epoch 28 | train_acc 0.7319 train_loss 0.8612 | lr 0.0100
| epoch 28 | test_acc 0.7054 test_loss 0.9510 | lr 0.0100
---------------------------------------------------------------------
| epoch 29 | train_acc 0.7398 train_loss 0.8410 | lr 0.0100
| epoch 29 | test_acc 0.6963 test_loss 0.9592 | lr 0.0100
---------------------------------------------------------------------
| epoch 30 | train_acc 0.7447 train_loss 0.8146 | lr 0.0100
| epoch 30 | test_acc 0.7074 test_loss 0.9212 | lr 0.0100
---------------------------------------------------------------------
| epoch 31 | train_acc 0.7527 train_loss 0.7987 | lr 0.0100
| epoch 31 | test_acc 0.7244 test_loss 0.8965 | lr 0.0100
---------------------------------------------------------------------
| epoch 32 | train_acc 0.7588 train_loss 0.7785 | lr 0.0100
| epoch 32 | test_acc 0.6909 test_loss 0.9812 | lr 0.0100
---------------------------------------------------------------------
| epoch 33 | train_acc 0.7652 train_loss 0.7639 | lr 0.0100
| epoch 33 | test_acc 0.7029 test_loss 0.9290 | lr 0.0100
---------------------------------------------------------------------
| epoch 34 | train_acc 0.7743 train_loss 0.7354 | lr 0.0100
| epoch 34 | test_acc 0.7136 test_loss 0.8977 | lr 0.0100
---------------------------------------------------------------------
| epoch 35 | train_acc 0.7774 train_loss 0.7161 | lr 0.0100
| epoch 35 | test_acc 0.7269 test_loss 0.8735 | lr 0.0100
---------------------------------------------------------------------
| epoch 36 | train_acc 0.7880 train_loss 0.6959 | lr 0.0100
| epoch 36 | test_acc 0.7467 test_loss 0.8207 | lr 0.0100
---------------------------------------------------------------------
| epoch 37 | train_acc 0.7886 train_loss 0.6838 | lr 0.0100
| epoch 37 | test_acc 0.7360 test_loss 0.8512 | lr 0.0100
---------------------------------------------------------------------
| epoch 38 | train_acc 0.7910 train_loss 0.6707 | lr 0.0100
| epoch 38 | test_acc 0.7298 test_loss 0.8624 | lr 0.0100
---------------------------------------------------------------------
| epoch 39 | train_acc 0.8020 train_loss 0.6531 | lr 0.0100
| epoch 39 | test_acc 0.7393 test_loss 0.8207 | lr 0.0100
---------------------------------------------------------------------
| epoch 40 | train_acc 0.7981 train_loss 0.6479 | lr 0.0100
| epoch 40 | test_acc 0.7463 test_loss 0.8105 | lr 0.0100
---------------------------------------------------------------------
| epoch 41 | train_acc 0.8081 train_loss 0.6322 | lr 0.0100
| epoch 41 | test_acc 0.7603 test_loss 0.7817 | lr 0.0100
---------------------------------------------------------------------
| epoch 42 | train_acc 0.8083 train_loss 0.6245 | lr 0.0100
| epoch 42 | test_acc 0.7612 test_loss 0.7648 | lr 0.0100
---------------------------------------------------------------------
| epoch 43 | train_acc 0.8164 train_loss 0.6028 | lr 0.0100
| epoch 43 | test_acc 0.7570 test_loss 0.7884 | lr 0.0100
---------------------------------------------------------------------
| epoch 44 | train_acc 0.8200 train_loss 0.5906 | lr 0.0100
| epoch 44 | test_acc 0.7628 test_loss 0.7643 | lr 0.0100
---------------------------------------------------------------------
| epoch 45 | train_acc 0.8220 train_loss 0.5814 | lr 0.0100
| epoch 45 | test_acc 0.7599 test_loss 0.7597 | lr 0.0100
---------------------------------------------------------------------
| epoch 46 | train_acc 0.8271 train_loss 0.5710 | lr 0.0100
| epoch 46 | test_acc 0.7649 test_loss 0.7450 | lr 0.0100
---------------------------------------------------------------------
| epoch 47 | train_acc 0.8240 train_loss 0.5696 | lr 0.0100
| epoch 47 | test_acc 0.7707 test_loss 0.7441 | lr 0.0100
---------------------------------------------------------------------
| epoch 48 | train_acc 0.8361 train_loss 0.5423 | lr 0.0100
| epoch 48 | test_acc 0.7764 test_loss 0.7465 | lr 0.0100
---------------------------------------------------------------------
| epoch 49 | train_acc 0.8362 train_loss 0.5341 | lr 0.0100
| epoch 49 | test_acc 0.7764 test_loss 0.7185 | lr 0.0100
---------------------------------------------------------------------
| epoch 50 | train_acc 0.8366 train_loss 0.5239 | lr 0.0100
| epoch 50 | test_acc 0.7781 test_loss 0.7407 | lr 0.0100
---------------------------------------------------------------------
| epoch 51 | train_acc 0.8400 train_loss 0.5241 | lr 0.0100
| epoch 51 | test_acc 0.7690 test_loss 0.7480 | lr 0.0100
---------------------------------------------------------------------
| epoch 52 | train_acc 0.8428 train_loss 0.5145 | lr 0.0100
| epoch 52 | test_acc 0.7785 test_loss 0.7178 | lr 0.0100
---------------------------------------------------------------------
| epoch 53 | train_acc 0.8459 train_loss 0.5084 | lr 0.0100
| epoch 53 | test_acc 0.7847 test_loss 0.7062 | lr 0.0100
---------------------------------------------------------------------
| epoch 54 | train_acc 0.8494 train_loss 0.4929 | lr 0.0100
| epoch 54 | test_acc 0.7855 test_loss 0.6915 | lr 0.0100
---------------------------------------------------------------------
| epoch 55 | train_acc 0.8520 train_loss 0.4837 | lr 0.0100
| epoch 55 | test_acc 0.7901 test_loss 0.6986 | lr 0.0100
---------------------------------------------------------------------
| epoch 56 | train_acc 0.8536 train_loss 0.4714 | lr 0.0100
| epoch 56 | test_acc 0.7905 test_loss 0.6908 | lr 0.0100
---------------------------------------------------------------------
| epoch 57 | train_acc 0.8552 train_loss 0.4691 | lr 0.0100
| epoch 57 | test_acc 0.7802 test_loss 0.7152 | lr 0.0100
---------------------------------------------------------------------
| epoch 58 | train_acc 0.8580 train_loss 0.4606 | lr 0.0100
| epoch 58 | test_acc 0.7888 test_loss 0.6932 | lr 0.0100
---------------------------------------------------------------------
| epoch 59 | train_acc 0.8627 train_loss 0.4457 | lr 0.0100
| epoch 59 | test_acc 0.7959 test_loss 0.6943 | lr 0.0100
---------------------------------------------------------------------
| epoch 60 | train_acc 0.8639 train_loss 0.4419 | lr 0.0100
| epoch 60 | test_acc 0.7926 test_loss 0.6772 | lr 0.0100
---------------------------------------------------------------------
| epoch 61 | train_acc 0.8676 train_loss 0.4364 | lr 0.0100
| epoch 61 | test_acc 0.7934 test_loss 0.6725 | lr 0.0100
---------------------------------------------------------------------
| epoch 62 | train_acc 0.8654 train_loss 0.4291 | lr 0.0100
| epoch 62 | test_acc 0.7806 test_loss 0.7024 | lr 0.0100
---------------------------------------------------------------------
| epoch 63 | train_acc 0.8704 train_loss 0.4224 | lr 0.0100
| epoch 63 | test_acc 0.7983 test_loss 0.6707 | lr 0.0100
---------------------------------------------------------------------
| epoch 64 | train_acc 0.8725 train_loss 0.4165 | lr 0.0100
| epoch 64 | test_acc 0.7983 test_loss 0.6757 | lr 0.0100
---------------------------------------------------------------------
| epoch 65 | train_acc 0.8715 train_loss 0.4137 | lr 0.0100
| epoch 65 | test_acc 0.8017 test_loss 0.6568 | lr 0.0100
---------------------------------------------------------------------
| epoch 66 | train_acc 0.8767 train_loss 0.4060 | lr 0.0100
| epoch 66 | test_acc 0.8004 test_loss 0.6595 | lr 0.0100
---------------------------------------------------------------------
| epoch 67 | train_acc 0.8814 train_loss 0.3886 | lr 0.0100
| epoch 67 | test_acc 0.7946 test_loss 0.6955 | lr 0.0100
---------------------------------------------------------------------
| epoch 68 | train_acc 0.8821 train_loss 0.3856 | lr 0.0100
| epoch 68 | test_acc 0.7959 test_loss 0.6693 | lr 0.0100
---------------------------------------------------------------------
| epoch 69 | train_acc 0.8839 train_loss 0.3832 | lr 0.0100
| epoch 69 | test_acc 0.8083 test_loss 0.6565 | lr 0.0100
---------------------------------------------------------------------
| epoch 70 | train_acc 0.8832 train_loss 0.3805 | lr 0.0100
| epoch 70 | test_acc 0.8074 test_loss 0.6497 | lr 0.0100
---------------------------------------------------------------------
| epoch 71 | train_acc 0.8862 train_loss 0.3706 | lr 0.0100
| epoch 71 | test_acc 0.8116 test_loss 0.6288 | lr 0.0100
---------------------------------------------------------------------
| epoch 72 | train_acc 0.8856 train_loss 0.3694 | lr 0.0100
| epoch 72 | test_acc 0.8087 test_loss 0.6400 | lr 0.0100
---------------------------------------------------------------------
| epoch 73 | train_acc 0.8920 train_loss 0.3560 | lr 0.0100
| epoch 73 | test_acc 0.8062 test_loss 0.6314 | lr 0.0100
---------------------------------------------------------------------
| epoch 74 | train_acc 0.8910 train_loss 0.3549 | lr 0.0100
| epoch 74 | test_acc 0.8087 test_loss 0.6251 | lr 0.0100
---------------------------------------------------------------------
| epoch 75 | train_acc 0.8929 train_loss 0.3463 | lr 0.0100
| epoch 75 | test_acc 0.8074 test_loss 0.6462 | lr 0.0100
---------------------------------------------------------------------
| epoch 76 | train_acc 0.8964 train_loss 0.3415 | lr 0.0100
| epoch 76 | test_acc 0.8091 test_loss 0.6330 | lr 0.0100
---------------------------------------------------------------------
| epoch 77 | train_acc 0.8954 train_loss 0.3419 | lr 0.0100
| epoch 77 | test_acc 0.8107 test_loss 0.6410 | lr 0.0100
---------------------------------------------------------------------
| epoch 78 | train_acc 0.8998 train_loss 0.3287 | lr 0.0100
| epoch 78 | test_acc 0.8157 test_loss 0.6170 | lr 0.0100
---------------------------------------------------------------------
| epoch 79 | train_acc 0.9002 train_loss 0.3264 | lr 0.0100
| epoch 79 | test_acc 0.8124 test_loss 0.6240 | lr 0.0100
---------------------------------------------------------------------
| epoch 80 | train_acc 0.8993 train_loss 0.3249 | lr 0.0100
| epoch 80 | test_acc 0.8145 test_loss 0.6394 | lr 0.0100
---------------------------------------------------------------------
| epoch 81 | train_acc 0.9054 train_loss 0.3138 | lr 0.0100
| epoch 81 | test_acc 0.8120 test_loss 0.6194 | lr 0.0100
---------------------------------------------------------------------
| epoch 82 | train_acc 0.9001 train_loss 0.3185 | lr 0.0100
| epoch 82 | test_acc 0.8145 test_loss 0.6423 | lr 0.0100
---------------------------------------------------------------------
| epoch 83 | train_acc 0.9060 train_loss 0.3104 | lr 0.0100
| epoch 83 | test_acc 0.8083 test_loss 0.6386 | lr 0.0100
---------------------------------------------------------------------
| epoch 84 | train_acc 0.9077 train_loss 0.2967 | lr 0.0100
| epoch 84 | test_acc 0.8227 test_loss 0.6292 | lr 0.0100
---------------------------------------------------------------------
| epoch 85 | train_acc 0.9094 train_loss 0.2975 | lr 0.0100
| epoch 85 | test_acc 0.8140 test_loss 0.6235 | lr 0.0100
---------------------------------------------------------------------
| epoch 86 | train_acc 0.9110 train_loss 0.2937 | lr 0.0100
| epoch 86 | test_acc 0.8157 test_loss 0.6236 | lr 0.0100
---------------------------------------------------------------------
| epoch 87 | train_acc 0.9105 train_loss 0.2895 | lr 0.0100
| epoch 87 | test_acc 0.8186 test_loss 0.6264 | lr 0.0100
---------------------------------------------------------------------
| epoch 88 | train_acc 0.9123 train_loss 0.2863 | lr 0.0100
| epoch 88 | test_acc 0.8178 test_loss 0.6175 | lr 0.0100
---------------------------------------------------------------------
| epoch 89 | train_acc 0.9137 train_loss 0.2817 | lr 0.0100
| epoch 89 | test_acc 0.8140 test_loss 0.6343 | lr 0.0100
---------------------------------------------------------------------
| epoch 90 | train_acc 0.9122 train_loss 0.2823 | lr 0.0100
| epoch 90 | test_acc 0.8165 test_loss 0.6248 | lr 0.0100
---------------------------------------------------------------------
| epoch 91 | train_acc 0.9157 train_loss 0.2743 | lr 0.0100
| epoch 91 | test_acc 0.8207 test_loss 0.6083 | lr 0.0100
---------------------------------------------------------------------
| epoch 92 | train_acc 0.9163 train_loss 0.2702 | lr 0.0100
| epoch 92 | test_acc 0.8207 test_loss 0.6032 | lr 0.0100
---------------------------------------------------------------------
| epoch 93 | train_acc 0.9175 train_loss 0.2722 | lr 0.0100
| epoch 93 | test_acc 0.8207 test_loss 0.6216 | lr 0.0100
---------------------------------------------------------------------
| epoch 94 | train_acc 0.9195 train_loss 0.2605 | lr 0.0100
| epoch 94 | test_acc 0.8186 test_loss 0.6282 | lr 0.0100
---------------------------------------------------------------------
| epoch 95 | train_acc 0.9209 train_loss 0.2630 | lr 0.0100
| epoch 95 | test_acc 0.8149 test_loss 0.6254 | lr 0.0100
---------------------------------------------------------------------
| epoch 96 | train_acc 0.9224 train_loss 0.2527 | lr 0.0100
| epoch 96 | test_acc 0.8231 test_loss 0.6092 | lr 0.0100
---------------------------------------------------------------------
| epoch 97 | train_acc 0.9218 train_loss 0.2570 | lr 0.0100
| epoch 97 | test_acc 0.8190 test_loss 0.6195 | lr 0.0100
---------------------------------------------------------------------
| epoch 98 | train_acc 0.9281 train_loss 0.2497 | lr 0.0100
| epoch 98 | test_acc 0.8231 test_loss 0.6062 | lr 0.0100
---------------------------------------------------------------------
| epoch 99 | train_acc 0.9268 train_loss 0.2455 | lr 0.0100
| epoch 99 | test_acc 0.8165 test_loss 0.6241 | lr 0.0100
---------------------------------------------------------------------
| epoch 100 | train_acc 0.9269 train_loss 0.2423 | lr 0.0100
| epoch 100 | test_acc 0.8223 test_loss 0.6115 | lr 0.0100
---------------------------------------------------------------------
模型评估的代码:
model.eval() # 切换为测试模式
test_acc, test_loss = test(valid_dataloader, model, criterion)
print('模型准确率为:{:5.4f}'.format(test_acc))
输出结果为:
模型准确率为:0.8207
这篇关于第TR6周:Transformer 实现文本分类 - Embedding版的文章就介绍到这儿,希望我们推荐的文章对编程师们有所帮助!