Instructions to use deepseek-ai/DeepSeek-R1 with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use deepseek-ai/DeepSeek-R1 with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("text-generation", model="deepseek-ai/DeepSeek-R1", trust_remote_code=True) messages = [ {"role": "user", "content": "Who are you?"}, ] pipe(messages)# Load model directly from transformers import AutoTokenizer, AutoModelForCausalLM tokenizer = AutoTokenizer.from_pretrained("deepseek-ai/DeepSeek-R1", trust_remote_code=True) model = AutoModelForCausalLM.from_pretrained("deepseek-ai/DeepSeek-R1", trust_remote_code=True) messages = [ {"role": "user", "content": "Who are you?"}, ] inputs = tokenizer.apply_chat_template( messages, add_generation_prompt=True, tokenize=True, return_dict=True, return_tensors="pt", ).to(model.device) outputs = model.generate(**inputs, max_new_tokens=40) print(tokenizer.decode(outputs[0][inputs["input_ids"].shape[-1]:])) - Inference
- HuggingChat
- Notebooks
- Google Colab
- Kaggle
- Local Apps Settings
- vLLM
How to use deepseek-ai/DeepSeek-R1 with vLLM:
Install from pip and serve model
# Install vLLM from pip: pip install vllm # Start the vLLM server: vllm serve "deepseek-ai/DeepSeek-R1" # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:8000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "deepseek-ai/DeepSeek-R1", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }'Use Docker
docker model run hf.co/deepseek-ai/DeepSeek-R1
- SGLang
How to use deepseek-ai/DeepSeek-R1 with SGLang:
Install from pip and serve model
# Install SGLang from pip: pip install sglang # Start the SGLang server: python3 -m sglang.launch_server \ --model-path "deepseek-ai/DeepSeek-R1" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "deepseek-ai/DeepSeek-R1", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }'Use Docker images
docker run --gpus all \ --shm-size 32g \ -p 30000:30000 \ -v ~/.cache/huggingface:/root/.cache/huggingface \ --env "HF_TOKEN=<secret>" \ --ipc=host \ lmsysorg/sglang:latest \ python3 -m sglang.launch_server \ --model-path "deepseek-ai/DeepSeek-R1" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "deepseek-ai/DeepSeek-R1", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }' - Docker Model Runner
How to use deepseek-ai/DeepSeek-R1 with Docker Model Runner:
docker model run hf.co/deepseek-ai/DeepSeek-R1
输出乱码
async def event_generator():
input_ids = model_manager.tokenizer.encode(input_text, return_tensors="pt")
if model_manager.model.device.type == 'cuda':
input_ids = input_ids.cuda()
generated_ids = []
for _ in range(max_length):
if await request.is_disconnected():
break
output = model_manager.model.generate(input_ids, max_new_tokens=1, return_dict_in_generate=True,output_scores=True)
next_token_id = output.sequences[0, -1].item()
generated_ids.append(next_token_id)
input_ids = output.sequences
# 解码并输出当前生成的词
# 检查词元 ID 是否有效
if next_token_id < len(model_manager.tokenizer):
# 解码并输出当前生成的词
print(f"解码并输出当前生成的词 : {model_manager.tokenizer}")
print(f" 解码并输出当前生成的词 next_token_id: {next_token_id}")
current_word = model_manager.tokenizer.decode([next_token_id], skip_special_tokens=True)
print(f"")
print(current_word, end="", flush=True)
print(f"")
else:
print(f"Invalid token ID: {next_token_id}", end="", flush=True)
event_data = {
"event": "message",
"data": current_word
}
# 打印要发送的事件数据
print(f"Sending SSE data: {event_data}")
yield event_data
await asyncio.sleep(0.1)
print(f"model_manager.tokenizer.eos_token_id: {model_manager.tokenizer.eos_token_id}")
print(f"next_token_id: {next_token_id}")
# 如果生成了结束标记,停止生成
if next_token_id == model_manager.tokenizer.eos_token_id:
break
输出结果
解码并输出当前生成的词 : LlamaTokenizerFast(name_or_path='../DeepSeek-R1-Distill-Qwen-7B', vocab_size=151643, model_max_length=16384, is_fast=True, padding_side='left', truncation_side='right', special_tokens={'bos_token': '<|begin▁of▁sentence|>', 'eos_token': '<|end▁of▁sentence|>', 'pad_token': '<|end▁of▁sentence|>'}, clean_up_tokenization_spaces=False), added_tokens_decoder={
151643: AddedToken("<|end▁of▁sentence|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
151644: AddedToken("<|User|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=False),
151645: AddedToken("<|Assistant|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=False),
151646: AddedToken("<|begin▁of▁sentence|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
151647: AddedToken("<|EOT|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=False),
151648: AddedToken("", rstrip=False, lstrip=False, single_word=False, normalized=False, special=False),
151649: AddedToken("", rstrip=False, lstrip=False, single_word=False, normalized=False, special=False),
151650: AddedToken("<|quad_start|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
151651: AddedToken("<|quad_end|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
151652: AddedToken("<|vision_start|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
151653: AddedToken("<|vision_end|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
151654: AddedToken("<|vision_pad|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
151655: AddedToken("<|image_pad|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
151656: AddedToken("<|video_pad|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
151657: AddedToken("", rstrip=False, lstrip=False, single_word=False, normalized=False, special=False),
151658: AddedToken("", rstrip=False, lstrip=False, single_word=False, normalized=False, special=False),
151659: AddedToken("<|fim_prefix|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=False),
151660: AddedToken("<|fim_middle|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=False),
151661: AddedToken("<|fim_suffix|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=False),
151662: AddedToken("<|fim_pad|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=False),
151663: AddedToken("<|repo_name|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=False),
151664: AddedToken("<|file_sep|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=False),
}
解码并输出当前生成的词 next_token_id: 242
�
Sending SSE data: {'event': 'message', 'data': '�'}
model_manager.tokenizer.eos_token_id: 151643
next_token_id: 242
请问 � 这个乱码什么问题导致
我也遇到的乱码问题
</אּí(
δŸㅿ�mericfsr」。
November │
》-->
!」____
,
’
[
0™*__襀
200Ð[
409ꯉ0۹!
OK