本文主要是介绍文件编码检测chardet及乱码处理,希望对大家解决编程问题提供一定的参考价值,需要的开发者们随着小编来一起学习吧!
def save_data(line):with open("new微博评论.csv","a+",newline="",encoding="utf-8") as f:f.write(line)
f = open("微博评论.csv","rb")#二进制格式读文件
i = 0
while True:i += 1# print(i)line = f.readline()if not line:breakelse:try:n_line = line.decode('utf8')save_data(n_line)except Exception as e:print(type(e),e)print("=========================")print(i,line)
编码检查chardet
import chardetdef judge(data):return chardet.detect(data)["encoding"]def error(e,q=1):input(e)if q:exit(0)def trans(path):data = open(path, "rb").read()coding = judge(data)if coding == "GB2312":coding = "GBK"try:arr = [i.rstrip() for i in data.decode(coding).split("\n")]if len(arr) == 1:return [i for i in arr[0].split("\r")]return arrexcept Exception as e:print(e)error("[!] 无法使用此文本,请使用utf8编码的文本")print(trans("123.txt"))
这篇关于文件编码检测chardet及乱码处理的文章就介绍到这儿,希望我们推荐的文章对编程师们有所帮助!