transformers调用llama的方式

本文主要是介绍transformers调用llama的方式，希望对大家解决编程问题提供一定的参考价值，需要的开发者们随着小编来一起学习吧！

transformers调用llama的使用方式

不同版本llama对应的transformers库版本
llama2
llama3
- Meta-Llama-3-8B-Instruct
- Meta-Llama-3-8B
llama3.1
- Meta-Llama-3.1-8B-Instruct

不同版本llama对应的transformers库版本

# llama2
pip install torch==1.13.1+cu116 torchvision==0.14.1+cu116 torchaudio==0.13.1 --extra-index-url https://download.pytorch.org/whl/cu116
pip install transformers==4.32.1
pip install accelerate==0.22.0
# llama3
pip install torch==1.13.1+cu116 torchvision==0.14.1+cu116 torchaudio==0.13.1 --extra-index-url https://download.pytorch.org/whl/cu116
pip install transformers==4.35.0
pip install accelerate==0.22.0
# llama3.1
pip install torch==2.1.2 torchvision==0.16.2 torchaudio==2.1.2 --index-url https://download.pytorch.org/whl/cu118
pip install transformers==4.43.1
pip install accelerate==0.22.0

llama2

待补充

llama3

Meta-Llama-3-8B-Instruct

可用于QA，summarize，示例代码

from transformers import AutoTokenizer, AutoModelForCausalLM
import torchmodel_id = "meta-llama/Meta-Llama-3-8B-Instruct"tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(model_id,torch_dtype=torch.bfloat16,device_map='cuda',
)messages = [{"role": "system", "content": "You are an assistant who provides precise and direct answers."},{"role": "user", "content": "In the sentence 'A boy is playing football', what is the exact action activity described? Provide only the exact phrase."},
]
input_ids = tokenizer.apply_chat_template(messages,add_generation_prompt=True,return_tensors="pt"
).to(model.device)terminators = [tokenizer.eos_token_id,tokenizer.convert_tokens_to_ids("<|eot_id|>")
]outputs = model.generate(input_ids,max_new_tokens=20,eos_token_id=terminators,do_sample=False,temperature=0.0,top_p=1.0,
)
response = outputs[0][input_ids.shape[-1]:]
print(tokenizer.decode(response, skip_special_tokens=True)) # 输出 "playing football"

Meta-Llama-3-8B

可用于文本生成，使用体验一般

import transformers
import torch
from transformers import AutoTokenizer
model_id = "/home/mayunchuan/.cache/huggingface/transformers/meta-llama/Meta-Llama-3-8B"
tokenizer = AutoTokenizer.from_pretrained(model_id)
pipeline = transformers.pipeline("text-generation", model=model_id, model_kwargs={"torch_dtype": torch.bfloat16}, device_map="cuda",max_length=40,num_return_sequences=1,eos_token_id=tokenizer.eos_token_id
)
result = pipeline("Hey how are you doing today?")
print(result) # 输出 [{'generated_text': 'Hey how are you doing today? I am doing well. I am a little bit tired because I have been working a lot. I am a little bit tired because I have been working a lot.'}]

llama3.1

Meta-Llama-3.1-8B-Instruct

可用于QA，summarize，可使用llama3-chat同样的示例代码

from transformers import AutoTokenizer, AutoModelForCausalLM
import torchmodel_id = "meta-llama/Meta-Llama-3.1-8B-Instruct"tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(model_id,torch_dtype=torch.bfloat16,device_map='cuda',
)messages = [{"role": "system", "content": "You are an assistant who provides precise and direct answers."},{"role": "user", "content": "In the sentence 'A boy is playing football', what is the exact action activity described? Provide only the exact phrase."},
]
input_ids = tokenizer.apply_chat_template(messages,add_generation_prompt=True,return_tensors="pt"
).to(model.device)terminators = [tokenizer.eos_token_id,tokenizer.convert_tokens_to_ids("<|eot_id|>")
]outputs = model.generate(input_ids,max_new_tokens=20,eos_token_id=terminators,do_sample=False,temperature=0.0,top_p=1.0,
)
response = outputs[0][input_ids.shape[-1]:]
print(tokenizer.decode(response, skip_special_tokens=True)) # 输出 Playing football.

也可以使用另一个demo

import transformers
import torch
from transformers import AutoTokenizer
model_id = "meta-llama/Meta-Llama-3.1-8B"
tokenizer = AutoTokenizer.from_pretrained(model_id)
pipeline = transformers.pipeline("text-generation", model=model_id, model_kwargs={"torch_dtype": torch.bfloat16}, device_map="cuda",max_length=35,num_return_sequences=1,eos_token_id=tokenizer.eos_token_id
)
result = pipeline("who are you?")
print(result)import transformers
import torchmodel_id = "meta-llama/Meta-Llama-3.1-8B-Instruct"pipeline = transformers.pipeline("text-generation",model=model_id,model_kwargs={"torch_dtype": torch.bfloat16},device_map="auto",
)messages = [{"role": "system", "content": "You are an assistant who provides precise and direct answers."},{"role": "user", "content": "In the sentence 'A boy is playing football', what is the exact action activity described? Provide only the exact phrase."},
]
outputs = pipeline(messages,max_new_tokens=256,
)
print(outputs[0]["generated_text"][-1]) # 输出 {'role': 'assistant', 'content': 'Playing football.'}