./bin/mindieservice_daemon启动成功
接MindIE大模型测试及报错Fatal Python error: PyThreadState_Get: the function must be called with the GIL held,-CSDN博客经过调整如下红色部分参数,昇腾310P3跑起来了7b模型:
root@dev-8242526b-01f2-4a54-b89d-f6d9c57c692d-qjhpf:/home/apulis-dev/teamdata/Qwen2.5-Coder-7B-Instruct# grep save q.log
2024-11-07 14:13:21 (295 MB/s) - ‘.gitattributes’ saved [1795/1795]
2024-11-07 14:13:21 (144 MB/s) - ‘config.json’ saved [663/663]
2024-11-07 14:13:21 (578 KB/s) - ‘configuration.json’ saved [2/2]
2024-11-07 14:13:22 (69.9 MB/s) - ‘generation_config.json’ saved [242/242]
2024-11-07 14:13:22 (81.9 MB/s) - ‘LICENSE’ saved [11343/11343]
2024-11-07 14:13:23 (4.76 MB/s) - ‘merges.txt’ saved [1671839/1671839]
2024-11-07 14:33:40 (3.83 MB/s) - ‘model-00001-of-00004.safetensors’ saved [4877660776/4877660776]
2024-11-07 14:56:19 (3.46 MB/s) - ‘model-00002-of-00004.safetensors’ saved [4932751008/4932751008]
2024-11-07 15:18:14 (3.15 MB/s) - ‘model-00003-of-00004.safetensors’ saved [4330865200/4330865200]
2024-11-07 15:21:53 (4.76 MB/s) - ‘model-00004-of-00004.safetensors’ saved [1089994880/1089994880]
2024-11-07 15:21:53 (796 KB/s) - ‘model.safetensors.index.json’ saved [27752/27752]
2024-11-07 15:21:53 (103 MB/s) - ‘README.md’ saved [5837/5837]
2024-11-07 15:21:55 (6.53 MB/s) - ‘tokenizer.json’ saved [7031645/7031645]
2024-11-07 15:21:55 (71.1 MB/s) - ‘tokenizer_config.json’ saved [7305/7305]
2024-11-07 15:21:56 (5.61 MB/s) - ‘vocab.json’ saved [2776833/2776833]
root@dev-8242526b-01f2-4a54-b89d-f6d9c57c692d-qjhpf:/usr/local/Ascend/mindie/latest/mindie-service# more conf/config.json
{
"OtherParam" :
{
"ResourceParam" :
{
"cacheBlockSize" : 128
},
"LogParam" :
{
"logLevel" : "Info",
"logPath" : "logs/mindservice.log"
},
"ServeParam" :
{
"ipAddress" : "127.0.0.1",
"managementIpAddress" : "127.0.0.2",
"port" : 1025,
"managementPort" : 1026,
"maxLinkNum" : 1000,
"httpsEnabled" : false,
"tlsCaPath" : "security/ca/",
"tlsCaFile" : ["ca.pem"],
"tlsCert" : "security/certs/server.pem",
"tlsPk" : "security/keys/server.key.pem",
"tlsPkPwd" : "security/pass/mindie_server_key_pwd.txt",
"tlsCrl" : "security/certs/server_crl.pem",
"managementTlsCaFile" : ["management_ca.pem"],
"managementTlsCert" : "security/certs/management_server.pem",
"managementTlsPk" : "security/keys/management_server.key.pem",
"managementTlsPkPwd" : "security/pass/management_mindie_server_key_pwd.txt",
"managementTlsCrl" : "security/certs/management_server_crl.pem",
"kmcKsfMaster" : "tools/pmt/master/ksfa",
"kmcKsfStandby" : "tools/pmt/standby/ksfb",
"multiNodesInferPort" : 1120,
"interNodeTLSEnabled" : true,
"interNodeTlsCaFile" : "security/ca/ca.pem",
"interNodeTlsCert" : "security/certs/server.pem",
"interNodeTlsPk" : "security/keys/server.key.pem",
"interNodeTlsPkPwd" : "security/pass/mindie_server_key_pwd.txt",
"interNodeKmcKsfMaster" : "tools/pmt/master/ksfa",
"interNodeKmcKsfStandby" : "tools/pmt/standby/ksfb"
}
},
"WorkFlowParam" :
{
"TemplateParam" :
{
"templateType" : "Standard",
"templateName" : "Standard_llama"
}
},
"ModelDeployParam" :
{
"engineName" : "mindieservice_llm_engine",
"modelInstanceNumber" : 1,
"tokenizerProcessNumber" : 8,
"maxSeqLen" : 512,
"npuDeviceIds" : [[0]],
"multiNodesInferEnabled" : false,
"ModelParam" : [
{
"modelInstanceType" : "Standard",
"modelName" : "qwen",
"modelWeightPath" : "/home/apulis-dev/teamdata/Qwen2.5-Coder-7B-Instruct",
"worldSize" : 1,
"cpuMemSize" : 1,
"npuMemSize" : 2,
"backendType" : "atb",
"pluginParams" : ""
}
]
},
"ScheduleParam" :
{
"maxPrefillBatchSize" : 4,
"maxPrefillTokens" : 5120,
"prefillTimeMsPerReq" : 150,
"prefillPolicyType" : 0,
"decodeTimeMsPerReq" : 50,
"decodePolicyType" : 0,
"maxBatchSize" : 4,
"maxIterTimes" : 256,
"maxPreemptCount" : 0,
"supportSelectBatch" : false,
"maxQueueDelayMicroseconds" : 5000
}
}
root@dev-8242526b-01f2-4a54-b89d-f6d9c57c692d-qjhpf:/home/apulis-dev/teamdata/Qwen2.5-Coder-7B-Instruct# pwd
/home/apulis-dev/teamdata/Qwen2.5-Coder-7B-Instruct
root@dev-8242526b-01f2-4a54-b89d-f6d9c57c692d-qjhpf:/home/apulis-dev/teamdata/Qw
root@dev-8242526b-01f2-4a54-b89d-f6d9c57c692d-qjhpf:/home/apulis-dev/teamdata/Qwen2.5-Coder-7B-Instruct# cd ../qwen2.5-72B-Instruct/
root@dev-8242526b-01f2-4a54-b89d-f6d9c57c692d-qjhpf:/home/apulis-dev/teamdata/qwen2.5-72B-Instruct# pwd
/home/apulis-dev/teamdata/qwen2.5-72B-Instruct
root@dev-8242526b-01f2-4a54-b89d-f6d9c57c692d-qjhpf:/home/apulis-dev/teamdata/qwen2.5-72B-Instruct# cd /usr/local/Ascend/
root@dev-8242526b-01f2-4a54-b89d-f6d9c57c692d-qjhpf:/usr/local/Ascend# ls
add-ons ascend-toolkit driver llm_model mindie nnal
root@dev-8242526b-01f2-4a54-b89d-f6d9c57c692d-qjhpf:/usr/local/Ascend# cd mindie/
root@dev-8242526b-01f2-4a54-b89d-f6d9c57c692d-qjhpf:/usr/local/Ascend/mindie# ls
1.0.RC2 latest set_env.sh
root@dev-8242526b-01f2-4a54-b89d-f6d9c57c692d-qjhpf:/usr/local/Ascend/mindie# cd latest
root@dev-8242526b-01f2-4a54-b89d-f6d9c57c692d-qjhpf:/usr/local/Ascend/mindie/latest# ls
mindie-llm mindie-rt mindie-service mindie-torch scripts version.info
root@dev-8242526b-01f2-4a54-b89d-f6d9c57c692d-qjhpf:/usr/local/Ascend/mindie/latest# cd mindie-service/
root@dev-8242526b-01f2-4a54-b89d-f6d9c57c692d-qjhpf:/usr/local/Ascend/mindie/latest/mindie-service# nohup ./bin/mindieservice_daemon > code7b-02.log 2>&1 &
[1] 230525
root@dev-8242526b-01f2-4a54-b89d-f6d9c57c692d-qjhpf:/usr/local/Ascend/mindie/latest/mindie-service#
root@dev-8242526b-01f2-4a54-b89d-f6d9c57c692d-qjhpf:/usr/local/Ascend/mindie/latest/mindie-service#
root@dev-8242526b-01f2-4a54-b89d-f6d9c57c692d-qjhpf:/usr/local/Ascend/mindie/latest/mindie-service#
root@dev-8242526b-01f2-4a54-b89d-f6d9c57c692d-qjhpf:/usr/local/Ascend/mindie/latest/mindie-service#
root@dev-8242526b-01f2-4a54-b89d-f6d9c57c692d-qjhpf:/usr/local/Ascend/mindie/latest/mindie-service#
root@dev-8242526b-01f2-4a54-b89d-f6d9c57c692d-qjhpf:/usr/local/Ascend/mindie/latest/mindie-service# tail -f code7b-02.log
nohup: ignoring input
2024-11-07 20:06:56,726 [INFO] [pid: 230549] env.py-55: {'use_ascend': True, 'max_memory_gb': None, 'reserved_memory_gb': 3, 'skip_warmup': False, 'visible_devices': None, 'use_host_chooser': True, 'bind_cpu': True}
2024-11-07 20:06:58,611 [INFO] [pid: 230549] cpu_binding.py-206: rank_id: 0, device_id: 0, numa_id: 2, shard_devices: [0], cpus: [48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71]
2024-11-07 20:06:58,613 [INFO] [pid: 230549] cpu_binding.py-231: process 230549, new_affinity is [48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71], cpu count 24
The argument `trust_remote_code` is to be used with Auto classes. It has no effect here and is ignored.
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
2024-11-07 20:06:59,422 [INFO] [pid: 230549] logging.py-53: model_runner.quantize: None
, model_runner.kv_quant: None
, model_runner.dytpe: torch.float16
2024-11-07 20:06:59,422 [INFO] [pid: 230549] logging.py-53: Rank table file location:
[W compiler_depend.ts:623] Warning: expandable_segments currently defaults to false. You can enable this feature by `export PYTORCH_NPU_ALLOC_CONF = expandable_segments:True`. (function operator())
2024-11-07 20:07:05,193 [INFO] [pid: 230549] dist.py-94: initialize_distributed has been Set
2024-11-07 20:07:05,195 [INFO] [pid: 230549] logging.py-53: init tokenizer done: Qwen2TokenizerFast(name_or_path='/home/apulis-dev/teamdata/Qwen2.5-Coder-7B-Instruct', vocab_size=151643, model_max_length=131072, is_fast=True, padding_side='left', truncation_side='right', special_tokens={'eos_token': '<|im_end|>', 'pad_token': '<|endoftext|>', 'additional_special_tokens': ['<|im_start|>', '<|im_end|>', '<|object_ref_start|>', '<|object_ref_end|>', '<|box_start|>', '<|box_end|>', '<|quad_start|>', '<|quad_end|>', '<|vision_start|>', '<|vision_end|>', '<|vision_pad|>', '<|image_pad|>', '<|video_pad|>']}, clean_up_tokenization_spaces=False), added_tokens_decoder={
151643: AddedToken("<|endoftext|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
151644: AddedToken("<|im_start|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
151645: AddedToken("<|im_end|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
151646: AddedToken("<|object_ref_start|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
151647: AddedToken("<|object_ref_end|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
151648: AddedToken("<|box_start|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
151649: AddedToken("<|box_end|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
151650: AddedToken("<|quad_start|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
151651: AddedToken("<|quad_end|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
151652: AddedToken("<|vision_start|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
151653: AddedToken("<|vision_end|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
151654: AddedToken("<|vision_pad|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
151655: AddedToken("<|image_pad|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
151656: AddedToken("<|video_pad|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
151657: AddedToken("<tool_call>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=False),
151658: AddedToken("</tool_call>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=False),
151659: AddedToken("<|fim_prefix|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=False),
151660: AddedToken("<|fim_middle|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=False),
151661: AddedToken("<|fim_suffix|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=False),
151662: AddedToken("<|fim_pad|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=False),
151663: AddedToken("<|repo_name|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=False),
151664: AddedToken("<|file_sep|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=False),
}
2024-11-07 20:07:05,312 [INFO] [pid: 230549] logging.py-53: NPUSocInfo(soc_name='', soc_version=202, need_nz=True)
2024-11-07 20:07:05,339 [INFO] [pid: 230549] flash_causal_qwen2.py-52: >>>> qwen_DecoderModel is called.
2024-11-07 20:07:34,132 [INFO] [pid: 230549] logging.py-53: model:
FlashQwen2ForCausalLM(
(rotary_embedding): PositionRotaryEmbedding()
(attn_mask): AttentionMask()
(transformer): FlashQwenModel(
(wte): TensorParallelEmbedding()
(h): ModuleList(
(0-27): 28 x FlashQwenLayer(
(attn): FlashQwenAttention(
(rotary_emb): PositionRotaryEmbedding()
(c_attn): TensorParallelColumnLinear(
(linear): FastLinear()
)
(c_proj): TensorParallelRowLinear(
(linear): FastLinear()
)
)
(mlp): QwenMLP(
(act): SiLU()
(w2_w1): TensorParallelColumnLinear(
(linear): FastLinear()
)
(c_proj): TensorParallelRowLinear(
(linear): FastLinear()
)
)
(ln_1): QwenRMSNorm()
(ln_2): QwenRMSNorm()
)
)
(ln_f): QwenRMSNorm()
)
(lm_head): TensorParallelHead(
(linear): FastLinear()
)
)
2024-11-07 20:07:35,402 [INFO] [pid: 230549] logging.py-53: trans to 2
2024-11-07 20:07:35,403 [INFO] [pid: 230549] logging.py-53: trans to 29
2024-11-07 20:07:35,403 [INFO] [pid: 230549] logging.py-53: trans to 2
2024-11-07 20:07:35,403 [INFO] [pid: 230549] logging.py-53: trans to 29
2024-11-07 20:07:35,404 [INFO] [pid: 230549] logging.py-53: trans to 2
2024-11-07 20:07:35,406 [INFO] [pid: 230549] logging.py-53: trans to 29
2024-11-07 20:07:35,407 [INFO] [pid: 230549] logging.py-53: trans to 29
2024-11-07 20:07:35,408 [INFO] [pid: 230549] logging.py-53: trans to 2
2024-11-07 20:07:35,408 [INFO] [pid: 230549] logging.py-53: trans to 29
2024-11-07 20:07:35,409 [INFO] [pid: 230549] logging.py-53: trans to 2
2024-11-07 20:07:35,409 [INFO] [pid: 230549] logging.py-53: trans to 29
2024-11-07 20:07:35,409 [INFO] [pid: 230549] logging.py-53: trans to 2
2024-11-07 20:07:35,410 [INFO] [pid: 230549] logging.py-53: trans to 29
2024-11-07 20:07:35,410 [INFO] [pid: 230549] logging.py-53: trans to 29
2024-11-07 20:07:35,411 [INFO] [pid: 230549] logging.py-53: trans to 2
2024-11-07 20:07:35,411 [INFO] [pid: 230549] logging.py-53: trans to 29
2024-11-07 20:07:35,411 [INFO] [pid: 230549] logging.py-53: trans to 2
2024-11-07 20:07:35,412 [INFO] [pid: 230549] logging.py-53: trans to 29
2024-11-07 20:07:35,412 [INFO] [pid: 230549] logging.py-53: trans to 2
2024-11-07 20:07:35,412 [INFO] [pid: 230549] logging.py-53: trans to 29
2024-11-07 20:07:35,413 [INFO] [pid: 230549] logging.py-53: trans to 29
2024-11-07 20:07:35,413 [INFO] [pid: 230549] logging.py-53: trans to 2
2024-11-07 20:07:35,414 [INFO] [pid: 230549] logging.py-53: trans to 29
2024-11-07 20:07:35,414 [INFO] [pid: 230549] logging.py-53: trans to 2
2024-11-07 20:07:35,414 [INFO] [pid: 230549] logging.py-53: trans to 29
2024-11-07 20:07:35,414 [INFO] [pid: 230549] logging.py-53: trans to 2
2024-11-07 20:07:35,415 [INFO] [pid: 230549] logging.py-53: trans to 29
2024-11-07 20:07:35,415 [INFO] [pid: 230549] logging.py-53: trans to 29
2024-11-07 20:07:35,415 [INFO] [pid: 230549] logging.py-53: trans to 2
2024-11-07 20:07:35,415 [INFO] [pid: 230549] logging.py-53: trans to 29
2024-11-07 20:07:35,416 [INFO] [pid: 230549] logging.py-53: trans to 2
2024-11-07 20:07:35,416 [INFO] [pid: 230549] logging.py-53: trans to 29
2024-11-07 20:07:35,416 [INFO] [pid: 230549] logging.py-53: trans to 2
2024-11-07 20:07:35,416 [INFO] [pid: 230549] logging.py-53: trans to 29
2024-11-07 20:07:35,417 [INFO] [pid: 230549] logging.py-53: trans to 29
2024-11-07 20:07:35,417 [INFO] [pid: 230549] logging.py-53: trans to 2
2024-11-07 20:07:35,417 [INFO] [pid: 230549] logging.py-53: trans to 29
2024-11-07 20:07:35,417 [INFO] [pid: 230549] logging.py-53: trans to 2
2024-11-07 20:07:35,418 [INFO] [pid: 230549] logging.py-53: trans to 29
2024-11-07 20:07:35,418 [INFO] [pid: 230549] logging.py-53: trans to 2
2024-11-07 20:07:35,418 [INFO] [pid: 230549] logging.py-53: trans to 29
2024-11-07 20:07:35,418 [INFO] [pid: 230549] logging.py-53: trans to 29
2024-11-07 20:07:35,419 [INFO] [pid: 230549] logging.py-53: trans to 2
2024-11-07 20:07:35,419 [INFO] [pid: 230549] logging.py-53: trans to 29
2024-11-07 20:07:35,419 [INFO] [pid: 230549] logging.py-53: trans to 2
2024-11-07 20:07:35,420 [INFO] [pid: 230549] logging.py-53: trans to 29
2024-11-07 20:07:35,420 [INFO] [pid: 230549] logging.py-53: trans to 2
2024-11-07 20:07:35,420 [INFO] [pid: 230549] logging.py-53: trans to 29
2024-11-07 20:07:35,420 [INFO] [pid: 230549] logging.py-53: trans to 29
2024-11-07 20:07:35,421 [INFO] [pid: 230549] logging.py-53: trans to 2
2024-11-07 20:07:35,421 [INFO] [pid: 230549] logging.py-53: trans to 29
2024-11-07 20:07:35,421 [INFO] [pid: 230549] logging.py-53: trans to 2
2024-11-07 20:07:35,421 [INFO] [pid: 230549] logging.py-53: trans to 29
2024-11-07 20:07:35,422 [INFO] [pid: 230549] logging.py-53: trans to 2
2024-11-07 20:07:35,422 [INFO] [pid: 230549] logging.py-53: trans to 29
2024-11-07 20:07:35,422 [INFO] [pid: 230549] logging.py-53: trans to 29
2024-11-07 20:07:35,422 [INFO] [pid: 230549] logging.py-53: trans to 2
2024-11-07 20:07:35,423 [INFO] [pid: 230549] logging.py-53: trans to 29
2024-11-07 20:07:35,423 [INFO] [pid: 230549] logging.py-53: trans to 2
2024-11-07 20:07:35,423 [INFO] [pid: 230549] logging.py-53: trans to 29
2024-11-07 20:07:35,423 [INFO] [pid: 230549] logging.py-53: trans to 2
2024-11-07 20:07:35,424 [INFO] [pid: 230549] logging.py-53: trans to 29
2024-11-07 20:07:35,424 [INFO] [pid: 230549] logging.py-53: trans to 29
2024-11-07 20:07:35,424 [INFO] [pid: 230549] logging.py-53: trans to 2
2024-11-07 20:07:35,424 [INFO] [pid: 230549] logging.py-53: trans to 29
2024-11-07 20:07:35,425 [INFO] [pid: 230549] logging.py-53: trans to 2
2024-11-07 20:07:35,425 [INFO] [pid: 230549] logging.py-53: trans to 29
2024-11-07 20:07:35,425 [INFO] [pid: 230549] logging.py-53: trans to 2
2024-11-07 20:07:35,425 [INFO] [pid: 230549] logging.py-53: trans to 29
2024-11-07 20:07:35,426 [INFO] [pid: 230549] logging.py-53: trans to 29
2024-11-07 20:07:35,426 [INFO] [pid: 230549] logging.py-53: trans to 2
2024-11-07 20:07:35,426 [INFO] [pid: 230549] logging.py-53: trans to 29
2024-11-07 20:07:35,426 [INFO] [pid: 230549] logging.py-53: trans to 2
2024-11-07 20:07:35,427 [INFO] [pid: 230549] logging.py-53: trans to 29
2024-11-07 20:07:35,427 [INFO] [pid: 230549] logging.py-53: trans to 2
2024-11-07 20:07:35,427 [INFO] [pid: 230549] logging.py-53: trans to 29
2024-11-07 20:07:35,427 [INFO] [pid: 230549] logging.py-53: trans to 29
2024-11-07 20:07:35,428 [INFO] [pid: 230549] logging.py-53: trans to 2
2024-11-07 20:07:35,428 [INFO] [pid: 230549] logging.py-53: trans to 29
2024-11-07 20:07:35,428 [INFO] [pid: 230549] logging.py-53: trans to 2
2024-11-07 20:07:35,429 [INFO] [pid: 230549] logging.py-53: trans to 29
2024-11-07 20:07:35,429 [INFO] [pid: 230549] logging.py-53: trans to 2
2024-11-07 20:07:35,429 [INFO] [pid: 230549] logging.py-53: trans to 29
2024-11-07 20:07:35,429 [INFO] [pid: 230549] logging.py-53: trans to 29
2024-11-07 20:07:35,430 [INFO] [pid: 230549] logging.py-53: trans to 2
2024-11-07 20:07:35,430 [INFO] [pid: 230549] logging.py-53: trans to 29
2024-11-07 20:07:35,430 [INFO] [pid: 230549] logging.py-53: trans to 2
2024-11-07 20:07:35,430 [INFO] [pid: 230549] logging.py-53: trans to 29
2024-11-07 20:07:35,431 [INFO] [pid: 230549] logging.py-53: trans to 2
2024-11-07 20:07:35,431 [INFO] [pid: 230549] logging.py-53: trans to 29
2024-11-07 20:07:35,431 [INFO] [pid: 230549] logging.py-53: trans to 29
2024-11-07 20:07:35,432 [INFO] [pid: 230549] logging.py-53: trans to 2
2024-11-07 20:07:35,432 [INFO] [pid: 230549] logging.py-53: trans to 29
2024-11-07 20:07:35,432 [INFO] [pid: 230549] logging.py-53: trans to 2
2024-11-07 20:07:35,432 [INFO] [pid: 230549] logging.py-53: trans to 29
2024-11-07 20:07:35,433 [INFO] [pid: 230549] logging.py-53: trans to 2
2024-11-07 20:07:35,433 [INFO] [pid: 230549] logging.py-53: trans to 29
2024-11-07 20:07:35,433 [INFO] [pid: 230549] logging.py-53: trans to 29
2024-11-07 20:07:35,433 [INFO] [pid: 230549] logging.py-53: trans to 2
2024-11-07 20:07:35,434 [INFO] [pid: 230549] logging.py-53: trans to 29
2024-11-07 20:07:35,434 [INFO] [pid: 230549] logging.py-53: trans to 2
2024-11-07 20:07:35,434 [INFO] [pid: 230549] logging.py-53: trans to 29
2024-11-07 20:07:35,434 [INFO] [pid: 230549] logging.py-53: trans to 2
2024-11-07 20:07:35,435 [INFO] [pid: 230549] logging.py-53: trans to 29
2024-11-07 20:07:35,435 [INFO] [pid: 230549] logging.py-53: trans to 29
2024-11-07 20:07:35,435 [INFO] [pid: 230549] logging.py-53: trans to 2
2024-11-07 20:07:35,436 [INFO] [pid: 230549] logging.py-53: trans to 29
2024-11-07 20:07:35,436 [INFO] [pid: 230549] logging.py-53: trans to 2
2024-11-07 20:07:35,436 [INFO] [pid: 230549] logging.py-53: trans to 29
2024-11-07 20:07:35,436 [INFO] [pid: 230549] logging.py-53: trans to 2
2024-11-07 20:07:35,436 [INFO] [pid: 230549] logging.py-53: trans to 29
2024-11-07 20:07:35,437 [INFO] [pid: 230549] logging.py-53: trans to 29
2024-11-07 20:07:35,437 [INFO] [pid: 230549] logging.py-53: trans to 2
2024-11-07 20:07:35,437 [INFO] [pid: 230549] logging.py-53: trans to 29
2024-11-07 20:07:35,438 [INFO] [pid: 230549] logging.py-53: trans to 2
2024-11-07 20:07:35,438 [INFO] [pid: 230549] logging.py-53: trans to 29
2024-11-07 20:07:35,438 [INFO] [pid: 230549] logging.py-53: trans to 2
2024-11-07 20:07:35,438 [INFO] [pid: 230549] logging.py-53: trans to 29
2024-11-07 20:07:35,439 [INFO] [pid: 230549] logging.py-53: trans to 29
2024-11-07 20:07:35,439 [INFO] [pid: 230549] logging.py-53: trans to 2
2024-11-07 20:07:35,439 [INFO] [pid: 230549] logging.py-53: trans to 29
2024-11-07 20:07:35,439 [INFO] [pid: 230549] logging.py-53: trans to 2
2024-11-07 20:07:35,440 [INFO] [pid: 230549] logging.py-53: trans to 29
2024-11-07 20:07:35,440 [INFO] [pid: 230549] logging.py-53: trans to 2
2024-11-07 20:07:35,440 [INFO] [pid: 230549] logging.py-53: trans to 29
2024-11-07 20:07:35,440 [INFO] [pid: 230549] logging.py-53: trans to 29
2024-11-07 20:07:35,441 [INFO] [pid: 230549] logging.py-53: trans to 2
2024-11-07 20:07:35,441 [INFO] [pid: 230549] logging.py-53: trans to 29
2024-11-07 20:07:35,441 [INFO] [pid: 230549] logging.py-53: trans to 2
2024-11-07 20:07:35,442 [INFO] [pid: 230549] logging.py-53: trans to 29
2024-11-07 20:07:35,442 [INFO] [pid: 230549] logging.py-53: trans to 2
2024-11-07 20:07:35,442 [INFO] [pid: 230549] logging.py-53: trans to 29
2024-11-07 20:07:35,442 [INFO] [pid: 230549] logging.py-53: trans to 29
2024-11-07 20:07:35,443 [INFO] [pid: 230549] logging.py-53: trans to 2
2024-11-07 20:07:35,443 [INFO] [pid: 230549] logging.py-53: trans to 29
2024-11-07 20:07:35,443 [INFO] [pid: 230549] logging.py-53: trans to 2
2024-11-07 20:07:35,443 [INFO] [pid: 230549] logging.py-53: trans to 29
2024-11-07 20:07:35,444 [INFO] [pid: 230549] logging.py-53: trans to 2
2024-11-07 20:07:35,444 [INFO] [pid: 230549] logging.py-53: trans to 29
2024-11-07 20:07:35,444 [INFO] [pid: 230549] logging.py-53: trans to 29
2024-11-07 20:07:35,444 [INFO] [pid: 230549] logging.py-53: trans to 2
2024-11-07 20:07:35,445 [INFO] [pid: 230549] logging.py-53: trans to 29
2024-11-07 20:07:35,445 [INFO] [pid: 230549] logging.py-53: trans to 2
2024-11-07 20:07:35,445 [INFO] [pid: 230549] logging.py-53: trans to 29
2024-11-07 20:07:35,445 [INFO] [pid: 230549] logging.py-53: trans to 2
2024-11-07 20:07:35,446 [INFO] [pid: 230549] logging.py-53: trans to 29
2024-11-07 20:07:35,446 [INFO] [pid: 230549] logging.py-53: trans to 29
2024-11-07 20:07:35,446 [INFO] [pid: 230549] logging.py-53: trans to 2
2024-11-07 20:07:35,446 [INFO] [pid: 230549] logging.py-53: trans to 29
2024-11-07 20:07:35,447 [INFO] [pid: 230549] logging.py-53: trans to 2
2024-11-07 20:07:35,447 [INFO] [pid: 230549] logging.py-53: trans to 29
2024-11-07 20:07:35,447 [INFO] [pid: 230549] logging.py-53: trans to 2
2024-11-07 20:07:35,447 [INFO] [pid: 230549] logging.py-53: trans to 29
2024-11-07 20:07:35,448 [INFO] [pid: 230549] logging.py-53: trans to 29
2024-11-07 20:07:35,448 [INFO] [pid: 230549] logging.py-53: trans to 2
2024-11-07 20:07:35,448 [INFO] [pid: 230549] logging.py-53: trans to 29
2024-11-07 20:07:35,448 [INFO] [pid: 230549] logging.py-53: trans to 2
2024-11-07 20:07:35,449 [INFO] [pid: 230549] logging.py-53: trans to 29
2024-11-07 20:07:35,449 [INFO] [pid: 230549] logging.py-53: trans to 2
2024-11-07 20:07:35,449 [INFO] [pid: 230549] logging.py-53: trans to 29
2024-11-07 20:07:35,449 [INFO] [pid: 230549] logging.py-53: trans to 29
2024-11-07 20:07:35,450 [INFO] [pid: 230549] logging.py-53: trans to 2
2024-11-07 20:07:35,450 [INFO] [pid: 230549] logging.py-53: trans to 29
2024-11-07 20:07:35,450 [INFO] [pid: 230549] logging.py-53: trans to 2
2024-11-07 20:07:35,451 [INFO] [pid: 230549] logging.py-53: trans to 29
2024-11-07 20:07:35,451 [INFO] [pid: 230549] logging.py-53: trans to 2
2024-11-07 20:07:35,451 [INFO] [pid: 230549] logging.py-53: trans to 29
2024-11-07 20:07:35,451 [INFO] [pid: 230549] logging.py-53: trans to 29
2024-11-07 20:07:35,452 [INFO] [pid: 230549] logging.py-53: trans to 2
2024-11-07 20:07:35,452 [INFO] [pid: 230549] logging.py-53: trans to 29
2024-11-07 20:07:35,452 [INFO] [pid: 230549] logging.py-53: trans to 2
2024-11-07 20:07:35,452 [INFO] [pid: 230549] logging.py-53: trans to 29
2024-11-07 20:07:35,453 [INFO] [pid: 230549] logging.py-53: trans to 2
2024-11-07 20:07:35,453 [INFO] [pid: 230549] logging.py-53: trans to 29
2024-11-07 20:07:35,453 [INFO] [pid: 230549] logging.py-53: trans to 29
2024-11-07 20:07:35,453 [INFO] [pid: 230549] logging.py-53: trans to 2
2024-11-07 20:07:35,454 [INFO] [pid: 230549] logging.py-53: trans to 29
2024-11-07 20:07:35,454 [INFO] [pid: 230549] logging.py-53: trans to 2
2024-11-07 20:07:35,454 [INFO] [pid: 230549] logging.py-53: trans to 29
2024-11-07 20:07:35,454 [INFO] [pid: 230549] logging.py-53: trans to 2
2024-11-07 20:07:35,455 [INFO] [pid: 230549] logging.py-53: trans to 29
2024-11-07 20:07:35,455 [INFO] [pid: 230549] logging.py-53: trans to 29
2024-11-07 20:07:35,455 [INFO] [pid: 230549] logging.py-53: trans to 2
2024-11-07 20:07:35,455 [INFO] [pid: 230549] logging.py-53: trans to 29
2024-11-07 20:07:35,456 [INFO] [pid: 230549] logging.py-53: trans to 2
2024-11-07 20:07:35,456 [INFO] [pid: 230549] logging.py-53: trans to 29
2024-11-07 20:07:35,456 [INFO] [pid: 230549] logging.py-53: trans to 2
2024-11-07 20:07:35,456 [INFO] [pid: 230549] logging.py-53: trans to 29
2024-11-07 20:07:35,457 [INFO] [pid: 230549] logging.py-53: trans to 29
2024-11-07 20:07:35,457 [INFO] [pid: 230549] logging.py-53: trans to 2
2024-11-07 20:07:35,457 [INFO] [pid: 230549] logging.py-53: trans to 29
2024-11-07 20:07:35,457 [INFO] [pid: 230549] logging.py-53: trans to 2
2024-11-07 20:07:35,458 [INFO] [pid: 230549] logging.py-53: trans to 29
2024-11-07 20:07:35,458 [INFO] [pid: 230549] logging.py-53: trans to 2
2024-11-07 20:07:35,458 [INFO] [pid: 230549] logging.py-53: trans to 29
2024-11-07 20:07:35,458 [INFO] [pid: 230549] logging.py-53: trans to 29
2024-11-07 20:07:35,459 [INFO] [pid: 230549] logging.py-53: trans to 2
2024-11-07 20:07:35,466 [INFO] [pid: 230549] logging.py-53: trans to 29
2024-11-07 20:07:35,525 [INFO] [pid: 230549] logging.py-53: <<<<<<< ori k_caches[0].shape=torch.Size([292, 32, 128, 16])
2024-11-07 20:07:35,527 [INFO] [pid: 230549] flash_causal_lm.py-127: <<<<<<<after transdata k_caches[0].shape=torch.Size([292, 32, 128, 16])
2024-11-07 20:07:35,527 [INFO] [pid: 230549] logging.py-53: >>>>>>id of kcache is 281473057754064 id of vcache is 281473057754144
2024-11-07 20:07:43,860 [INFO] [pid: 231914] env.py-55: {'use_ascend': True, 'max_memory_gb': None, 'reserved_memory_gb': 3, 'skip_warmup': False, 'visible_devices': None, 'use_host_chooser': True, 'bind_cpu': True}
2024-11-07 20:07:43,876 [INFO] [pid: 231908] env.py-55: {'use_ascend': True, 'max_memory_gb': None, 'reserved_memory_gb': 3, 'skip_warmup': False, 'visible_devices': None, 'use_host_chooser': True, 'bind_cpu': True}
2024-11-07 20:07:43,925 [INFO] [pid: 231911] env.py-55: {'use_ascend': True, 'max_memory_gb': None, 'reserved_memory_gb': 3, 'skip_warmup': False, 'visible_devices': None, 'use_host_chooser': True, 'bind_cpu': True}
2024-11-07 20:07:43,994 [INFO] [pid: 231910] env.py-55: {'use_ascend': True, 'max_memory_gb': None, 'reserved_memory_gb': 3, 'skip_warmup': False, 'visible_devices': None, 'use_host_chooser': True, 'bind_cpu': True}
2024-11-07 20:07:44,052 [INFO] [pid: 231912] env.py-55: {'use_ascend': True, 'max_memory_gb': None, 'reserved_memory_gb': 3, 'skip_warmup': False, 'visible_devices': None, 'use_host_chooser': True, 'bind_cpu': True}
2024-11-07 20:07:44,051 [INFO] [pid: 231913] env.py-55: {'use_ascend': True, 'max_memory_gb': None, 'reserved_memory_gb': 3, 'skip_warmup': False, 'visible_devices': None, 'use_host_chooser': True, 'bind_cpu': True}
2024-11-07 20:07:44,051 [INFO] [pid: 231909] env.py-55: {'use_ascend': True, 'max_memory_gb': None, 'reserved_memory_gb': 3, 'skip_warmup': False, 'visible_devices': None, 'use_host_chooser': True, 'bind_cpu': True}
2024-11-07 20:07:44,064 [INFO] [pid: 231915] env.py-55: {'use_ascend': True, 'max_memory_gb': None, 'reserved_memory_gb': 3, 'skip_warmup': False, 'visible_devices': None, 'use_host_chooser': True, 'bind_cpu': True}
The argument `trust_remote_code` is to be used with Auto classes. It has no effect here and is ignored.
The argument `trust_remote_code` is to be used with Auto classes. It has no effect here and is ignored.
The argument `trust_remote_code` is to be used with Auto classes. It has no effect here and is ignored.
The argument `trust_remote_code` is to be used with Auto classes. It has no effect here and is ignored.
The argument `trust_remote_code` is to be used with Auto classes. It has no effect here and is ignored.
The argument `trust_remote_code` is to be used with Auto classes. It has no effect here and is ignored.
The argument `trust_remote_code` is to be used with Auto classes. It has no effect here and is ignored.
The argument `trust_remote_code` is to be used with Auto classes. It has no effect here and is ignored.
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Daemon start success!
root@dev-8242526b-01f2-4a54-b89d-f6d9c57c692d-qjhpf:/usr/local/Ascend/mindie/latest/mindie-service# npu-smi info
+--------------------------------------------------------------------------------------------------------+
| npu-smi 24.1.rc2.b030 Version: 24.1.rc2.b030 |
+-------------------------------+-----------------+------------------------------------------------------+
| NPU Name | Health | Power(W) Temp(C) Hugepages-Usage(page) |
| Chip Device | Bus-Id | AICore(%) Memory-Usage(MB) |
+===============================+=================+======================================================+
| 32896 310P3 | OK | NA 60 9610 / 9610 |
| 0 0 | 0000:85:00.0 | 0 21064/ 21527 |
+===============================+=================+======================================================+
+-------------------------------+-----------------+------------------------------------------------------+
| NPU Chip | Process id | Process name | Process memory(MB) |
+===============================+=================+======================================================+
| 32896 0 | 230549 | mindieservice_b | 19294 |
+===============================+=================+======================================================+
root@dev-8242526b-01f2-4a54-b89d-f6d9c57c692d-qjhpf:/usr/local/Ascend/mindie/latest/mindie-service# free -g
total used free shared buff/cache available
Mem: 1005 25 341 0 638 974
Swap: 0 0 0
root@dev-8242526b-01f2-4a54-b89d-f6d9c57c692d-qjhpf:/usr/local/Ascend/mindie/latest/mindie-service#
root@dev-8242526b-01f2-4a54-b89d-f6d9c57c692d-qjhpf:/usr/local/Ascend/mindie/latest/mindie-service# curl -w "\ntime_total=%{time_total}\n" -H "Accept: application/json" -H "Content-type: application/json" -X POST -d '{"inputs":"你是什么名字?你的强项是什么?会写sql语句吗","stream": false}' http://127.0.0.1:1025/health
time_total=0.001019
root@dev-8242526b-01f2-4a54-b89d-f6d9c57c692d-qjhpf:/usr/local/Ascend/mindie/latest/mindie-service# curl -H "Accept: application/json" -H "Content-type: application/json" --cacert /home/runs/static_conf/ca/ca.pem --cert /home/runs/static_conf/cert/client.pem --key /home/runs/static_conf/cert/client.key.pem -X POST -d '{
"prompt": "My name is Olivier and I",
"stream": true,
"repetition_penalty": 1.0,
"top_p": 1.0,
"top_k": 10,
"max_tokens": 16,
"temperature": 1.0
}' http://127.0.0.1:1025/generate | cat
% Total % Received % Xferd Average Speed Time Time Time Current
Dload Upload Total Spent Left Speed
100 446 0 298 100 148 207 103 0:00:01 0:00:01 --:--:-- 310
{"text":[" am"]}{"text":[" here"]}{"text":[" to"]}{"text":[" learn"]}{"text":[" from"]}{"text":[" you"]}{"text":["."]}{"text":[" I"]}{"text":[" am"]}{"text":[" a"]}{"text":[" software"]}{"text":[" developer"]}{"text":[" working"]}{"text":[" in"]}{"text":[" the"]}{"text":[" field"]}root@dev-8242526b-01f2-4a54-b89d-f6d9c57c692d-qjhpf:/usr/local/Ascend/mindie/latest/mindie-service#
root@dev-8242526b-01f2-4a54-b89d-f6d9c57c692d-qjhpf:/usr/local/Ascend/mindie/latest/mindie-service#
root@dev-8242526b-01f2-4a54-b89d-f6d9c57c692d-qjhpf:/usr/local/Ascend/mindie/latest/mindie-service#
root@dev-8242526b-01f2-4a54-b89d-f6d9c57c692d-qjhpf:/usr/local/Ascend/mindie/latest/mindie-service#
root@dev-8242526b-01f2-4a54-b89d-f6d9c57c692d-qjhpf:/usr/local/Ascend/mindie/latest/mindie-service#
root@dev-8242526b-01f2-4a54-b89d-f6d9c57c692d-qjhpf:/usr/local/Ascend/mindie/latest/mindie-service# curl -w "\ntime_total=%{time_total}\n" -H "Accept: application/json" -H "Content-type: application/json" -X POST -d '{"inputs": "I love Beijing, because","stream": false}' http://127.0.0.1:1025/v1/models
time_total=0.000755
root@dev-8242526b-01f2-4a54-b89d-f6d9c57c692d-qjhpf:/usr/local/Ascend/mindie/latest/mindie-service# curl -w "\ntime_total=%{time_total}\n" -H "Accept: application/json" -H "Content-type: application/json" -X POST -d '{"inputs": "I love Beijing, because","stream": false}' http://127.0.0.1:1025
[{"generated_text":" it is a very beautiful city. I like to visit the Forbidden City, the Summer Palace, and"}]
time_total=1.781473
root@dev-8242526b-01f2-4a54-b89d-f6d9c57c692d-qjhpf:/usr/local/Ascend/mindie/latest/mindie-service#