{"id":64292,"date":"2023-11-20T11:07:11","date_gmt":"2023-11-20T02:07:11","guid":{"rendered":"https:\/\/smilegate.ai\/?p=64292"},"modified":"2023-11-20T11:07:14","modified_gmt":"2023-11-20T02:07:14","slug":"llm%ec%9d%84-%eb%b9%a0%eb%a5%b4%ea%b2%8c-%ec%84%9c%eb%b9%99%ed%95%98%eb%8a%94-%eb%b0%a9%eb%b2%95","status":"publish","type":"post","link":"https:\/\/smilegate.ai\/en\/2023\/11\/20\/llm%ec%9d%84-%eb%b9%a0%eb%a5%b4%ea%b2%8c-%ec%84%9c%eb%b9%99%ed%95%98%eb%8a%94-%eb%b0%a9%eb%b2%95\/","title":{"rendered":"LLM\uc744 \ube60\ub974\uac8c \uc11c\ube59\ud558\ub294 \ubc29\ubc95"},"content":{"rendered":"
[\ubd84\uc11dAI\uc11c\ube44\uc2a4\ud300 \ubc15\ud6a8\uc8fc]<\/p>\n\n\n\n
\ucd5c\uadfc \uc5b8\uc5b4 \ubaa8\ub378\uc758 \ubc1c\uc804\uc740 \ud070 \uc784\ud329\ud2b8\ub97c \uc8fc\uc5c8\uace0, \uadf8\ub9cc\ud07c \uc5f0\uad6c \ub610\ud55c \ud65c\ubc1c\ud788 \uc774\ub8e8\uc5b4\uc9c0\uace0 \uc788\uc2b5\ub2c8\ub2e4.<\/p>\n\n\n
\uadf8\ub7ec\ub098 \uc774\ub7ec\ud55c \ubaa8\ub378\uc758 \uc11c\ube59 \uacfc\uc815\uc5d0\uc11c \ubc1c\uc0dd\ud558\ub294 \uace0\ub3c4\uc758 \uacc4\uc0b0 \uc791\uc5c5\uacfc \uba54\ubaa8\ub9ac \uc694\uad6c\ub7c9\uc740 \uc0c8\ub85c\uc6b4 \ub3c4\uc804 \uacfc\uc81c\ub97c \uc81c\uc2dc\ud558\uace0 \uc788\uc2b5\ub2c8\ub2e4. \ucd5c\uc801\ud654\ub41c \uc11c\ube59\uc740 \uc774\ub7ec\ud55c LLM\uc744 \ud6a8\uc728\uc801\uc73c\ub85c \uc6b4\uc601\ud558\uae30 \uc704\ud55c \uc911\uc694\ud55c \uc5ed\ud560\uc744 \uc218\ud589\ud569\ub2c8\ub2e4. \ubcf8\ubb38\uc5d0\uc11c\ub294 \uc774\ub7ec\ud55c \uc911\uc694\ud55c \uc5ed\ud560\uc744 \ud558\ub294 LLM \ucd5c\uc801\ud654 \ub77c\uc774\ube0c\ub7ec\ub9ac 3\uac1c\ub97c \uc18c\uac1c\ud569\ub2c8\ub2e4.<\/p>\n\n\n\n
FasterTransformer <\/strong>(GitHub<\/a>)<\/p>\n\n\n\n \uac00\uc7a5 \uc720\uba85\ud55c \ubaa8\ub378 \ucd5c\uc801\ud654 \ub77c\uc774\ube0c\ub7ec\ub9ac\ub85c LLM\uc774 \ub098\uc624\uae30 \uc804\ubd80\ud130 \uc0ac\uc6a9\ub418\ub2e4 LLM\uc774 \ub4f1\uc7a5\ud558\uba74\uc11c \ub354 \ub9ce\uc774 \uc0ac\uc6a9\ub418\uace0 \uc788\uc2b5\ub2c8\ub2e4. \ud070 \ud2b9\uc9d5\uc740 C++, CUDA Native\ub85c \ub3d9\uc791\ud558\uace0 Model Parallel(Tensor Parallel\/Pipeline Parallel) \uae30\ubc18\uc758 \ucd94\ub860 \uac00\uc18d \uc5d4\uc9c4\uc744 \uad6c\ud604\ud588\ub2e4\ub294 \uac83\uc785\ub2c8\ub2e4.<\/p>\n\n\n\n \ub2e4\ub9cc 2023\ub144 \ucd08\uae4c\uc9c0 \ubc84\uc804\uc744 Release\ud558\ub2e4 \ud604\uc7ac\ub294 \uac1c\ubc1c\uc774 \uc911\ub2e8\ub418\uc5c8\uace0, \uc774\ud6c4 \uac1c\ubc1c \uac74\uc774 TensorRT-LLM\uc73c\ub85c \uc774\uad00\ub418\uc5c8\uc2b5\ub2c8\ub2e4.<\/p>\n\n\n\n FasterTransformer\uc5d0\uc11c \uc9c0\uc6d0\ub418\ub294 \ubaa8\ub378\uc758 \uc885\ub958\ub294 \uc5ec\uae30<\/a>\uc11c \ud655\uc778\ud560 \uc218 \uc788\uc2b5\ub2c8\ub2e4.<\/p>\n\n\n\n vLLM<\/strong> (GitHub<\/a>)<\/p>\n\n\n\n 2023\ub144 6\uc6d4\uc5d0 \ub4f1\uc7a5\ud574\uc11c \ud604\uc7ac \uc778\uae30\uc788\ub294 \ubaa8\ub378 \ucd5c\uc801\ud654 \ub77c\uc774\ube0c\ub7ec\ub9ac \uc911 \ud558\ub098 \uc785\ub2c8\ub2e4. \ucc98\uc74c \ub4f1\uc7a5\ud588\uc744 \ub54c \uc9c0\uc6d0\ub418\ub294 \ubaa8\ub378\uc774 \ub9ce\uc9c0 \uc54a\uc558\ub358 FasterTransformer\uc5d0 \ube44\ud574 \ub2e4\uc218\uc758 \ucd5c\uc2e0 \ubaa8\ub378\uc744 \uc9c0\uc6d0\ud558\uba74\uc11c \ucd5c\uc2e0 \uc5b8\uc5b4 \ubaa8\ub378\ub4e4\uc744 \ud6a8\uc728\uc801\uc73c\ub85c \ub2e4\ub8f0 \uc218 \uc788\uac8c \ub418\uc5c8\uc2b5\ub2c8\ub2e4. \ud070 \ud2b9\uc9d5\uc740 Paged Attention \uae30\ubc95\uc744 \ud65c\uc6a9\ud574\uc11c latency\ub97c \uc720\uc9c0\ud558\uba74\uc11c throughput\uc740 2-4X \ud5a5\uc0c1\ud588\ub2e4\ub294 \uac83\uc785\ub2c8\ub2e4.<\/p>\n\n\n\n vLLM\uc5d0\uc11c \uc9c0\uc6d0\ub418\ub294 \ubaa8\ub378\uc758 \uc885\ub958\ub294 \uc5ec\uae30<\/a>\uc11c \ud655\uc778\ud560 \uc218 \uc788\uc2b5\ub2c8\ub2e4.<\/p>\n\n\n\n TensorRT-LLM<\/strong> (GitHub<\/a>)<\/p>\n\n\n\n \uac00\uc7a5 \ucd5c\uadfc\uc5d0 \uacf5\uac1c(2023\ub144 10\uc6d4)\ud55c \ub77c\uc774\ube0c\ub7ec\ub9ac\ub85c, FasterTransformer\uc758 \ud6c4\uc18d \ubc84\uc804\uc785\ub2c8\ub2e4. \uc55e\uc5d0\uc11c \uc124\uba85\ub4dc\ub9b0 \uac83\ucc98\ub7fc FasterTransformer\uc758 \uac1c\ubc1c\uc774 \uc911\ub2e8\ub418\uace0 \uc774\ud6c4 \uae30\ub2a5\uc774 \ubaa8\ub450 TensorRT-LLM\uc5d0 \uc774\uad00 \uac1c\ubc1c\ub418\uc5c8\uc2b5\ub2c8\ub2e4. \ud070 \ud2b9\uc9d5\uc740 TensorRT \ub525\ub7ec\ub2dd \ucef4\ud37c\uc77c\ub7ec\uc640 \ucd5c\uc801\ud654\ub41c \ucee4\ub110, \uc804\/\ud6c4\ucc98\ub9ac \ub2e8\uacc4, Multi-GPU\/Multi-Node \ud1b5\uc2e0 \uae30\ubcf8\uc694\uc18c\uc5d0 In-flight Batching\uc774\ub77c \ubd88\ub9ac\ub294 \ucd5c\uc801\ud654\ub41c \uc2a4\ucf00\uc904\ub9c1 \uae30\uc220\uc774 \ud3ec\ud568\ub410\ub2e4\ub294 \uac83 \uc785\ub2c8\ub2e4.<\/p>\n\n\n\n TensorRT-LLM\uc5d0\uc11c \uc9c0\uc6d0\ub418\ub294 \ubaa8\ub378\uc758 \uc885\ub958\ub294 \uc5ec\uae30<\/a>\uc11c \ud655\uc778\ud560 \uc218 \uc788\uc2b5\ub2c8\ub2e4.<\/p>\n\n\n\n \ud65c\uc131\ud654 \ucd94\uc774 \ube44\uad50<\/strong><\/p>\n\n\n\n \uc544\ub798 \uadf8\ub798\ud504\ub294 \uac01 \ub77c\uc774\ube0c\ub7ec\ub9ac \ubcc4 GitHub stars \ub204\uc801 \ucd94\uc774\uc785\ub2c8\ub2e4.<\/p>\n\n\n \uadf8\ub798\ud504\ub97c \ubcf4\uba74 FasterTransformer\uac00 \ucd9c\uc2dc\ub41c\uc9c0 \uc624\ub798\ub41c \ub9cc\ud07c \uafb8\uc900\ud55c \uc0c1\uc2b9\uc138\ub97c \ubcf4\uc774\uace0 \uc788\uace0, vLLM\uacfc TensorRT-LLM\uc774 \ub4f1\uc7a5\uacfc \ub3d9\uc2dc\uc5d0 \uae09\ub4f1\ud558\ub294 \uac83\uc744 \ud655\uc778\ud560 \uc218 \uc788\uc2b5\ub2c8\ub2e4. \uc774\ub97c \ubd10\ub3c4 vLLM\uacfc TensorRT-LLM\uc774 \uc5bc\ub9c8\ub098 \ud544\uc694\ud55c \ub77c\uc774\ube0c\ub7ec\ub9ac\uc778\uc9c0 \ud655\uc778\ud560 \uc218 \uc788\uc2b5\ub2c8\ub2e4.<\/p>\n\n\n\n \uc9c0\uc6d0 \ubaa8\ub378 \ube44\uad50<\/strong><\/p>\n\n\n\n \uac01 \ub77c\uc774\ube0c\ub7ec\ub9ac\uc5d0\uc11c \uc9c0\uc6d0\ud558\ub294 \ubaa8\ub378\uc744 \ube44\uad50\ud574\ubcf4\uba74 \uc544\ub798 \ud45c\uc640 \uac19\uc2b5\ub2c8\ub2e4.<\/p>\n\n\n\n FasterTransformer<\/strong>\ub294 \uc62c \ucd08\uc5d0 \uac1c\ubc1c\uc744 \uc911\ub2e8\ud588\uae30 \ub54c\ubb38\uc5d0 \ucd5c\uc2e0 \ubaa8\ub378\uc740 \uc9c0\uc6d0\ub418\uc9c0 \uc54a\uc2b5\ub2c8\ub2e4. vLLM<\/strong>\uc740 \ub300\ubd80\ubd84\uc758 \ubaa8\ub378\uc744 \uc9c0\uc6d0\ud558\uba70 GLM \ubaa8\ub378\ub3c4 \uace7 \uc9c0\uc6d0\ub420 \uc608\uc815(v0.2.2<\/a>)\uc785\ub2c8\ub2e4. TensorRT-LLM<\/strong>\uc740 \uc9c0\uc6d0\uc774 \ub9ce\uc774 \uc548\ub418\ub294 \uac83\ucc98\ub7fc \ubcf4\uc774\uc9c0\ub9cc \ub300\ubd80\ubd84\uc758 \uae30\ubc18 \ubaa8\ub378\uc744 \uc9c0\uc6d0\ud558\uae30 \ub54c\ubb38\uc5d0 \ub300\ubd80\ubd84\uc758 \ubaa8\ub378\uc774 \uc9c0\uc6d0\ub41c\ub2e4\uace0 \ubcfc \uc218 \uc788\uc2b5\ub2c8\ub2e4. \ub2e4\ub9cc \uc774 \ub54c\ubb38\uc5d0 \ubaa8\ub378\uc5d0 \ub300\ud574 \uc870\uae08\uc740 \uc774\ud574\ud558\uace0 \uc0ac\uc6a9\ud574\uc57c \ud560 \ud544\uc694\uac00 \uc788\uc2b5\ub2c8\ub2e4.<\/p>\n\n\n\n \ucc98\ub9ac\ub7c9 \uc131\ub2a5 \ube44\uad50<\/strong><\/p>\n\n\n\n \uac1c\ubc1c\uc774 \uc911\ub2e8\ub41c FasterTransformer\ub97c \uc81c\uc678\ud558\uace0 TensorRT-LLM\uacfc vLLM\uc758 \uc131\ub2a5\uc744 \ube44\uad50\ud55c \uacb0\uacfc\ub294 \uc544\ub798\uc640 \uac19\uc2b5\ub2c8\ub2e4.<\/p>\n\n\n\ub77c\uc774\ube0c\ub7ec\ub9ac \ube44\uad50<\/h3>\n\n\n\n
<\/th> FasterTransformer<\/strong><\/th> vLLM<\/strong><\/th> TensorRT-LLM<\/strong><\/th><\/tr><\/thead> BLOOM<\/td> O<\/td> O<\/td> O<\/td><\/tr> GPT-J<\/td> O<\/td> O<\/td> O<\/td><\/tr> GPT-NeoX<\/td> O<\/td> O<\/td> O<\/td><\/tr> OPT<\/td> O<\/td> O<\/td> O<\/td><\/tr> GPT<\/td> O<\/td> <\/td> O<\/td><\/tr> BERT<\/td> O<\/td> <\/td> O<\/td><\/tr> T5<\/td> O<\/td> <\/td> <\/td><\/tr> ViT<\/td> O<\/td> <\/td> <\/td><\/tr> BART<\/td> O<\/td> <\/td> <\/td><\/tr> DeBERTa<\/td> O<\/td> <\/td> <\/td><\/tr> Aquila<\/td> <\/td> O<\/td> <\/td><\/tr> Baichuan<\/td> <\/td> O<\/td> O<\/td><\/tr> Falcon<\/td> <\/td> O<\/td> O<\/td><\/tr> GPT-2<\/td> <\/td> O<\/td> <\/td><\/tr> StarCoder<\/td> <\/td> O<\/td> O<\/td><\/tr> SantaCoder<\/td> <\/td> O<\/td> O<\/td><\/tr> WizardCoder<\/td> <\/td> O<\/td> <\/td><\/tr> Dolly v2<\/td> <\/td> O<\/td> <\/td><\/tr> StableLM<\/td> <\/td> O<\/td> <\/td><\/tr> InternLM<\/td> <\/td> O<\/td> <\/td><\/tr> LLaMA<\/td> <\/td> O<\/td> O<\/td><\/tr> LLaMA-2<\/td> <\/td> O<\/td> O<\/td><\/tr> Vicuna<\/td> <\/td> O<\/td> <\/td><\/tr> Alpaca<\/td> <\/td> O<\/td> <\/td><\/tr> Koala<\/td> <\/td> O<\/td> <\/td><\/tr> Guanaco<\/td> <\/td> O<\/td> <\/td><\/tr> Mistral<\/td> <\/td> O<\/td> <\/td><\/tr> MPT<\/td> <\/td> O<\/td> O<\/td><\/tr> Qwen<\/td> <\/td> O<\/td> <\/td><\/tr> Blip2<\/td> <\/td> <\/td> O<\/td><\/tr> ChatGLM-6B<\/td> <\/td> \uc608\uc815<\/td> O<\/td><\/tr> ChatGLM2-6B<\/td> <\/td> \uc608\uc815<\/td> O<\/td><\/tr><\/tbody><\/table><\/figure>\n\n\n\n