{"id":63701,"date":"2023-06-15T15:55:41","date_gmt":"2023-06-15T06:55:41","guid":{"rendered":"https:\/\/smilegate.ai\/?p=63701"},"modified":"2023-06-15T15:55:43","modified_gmt":"2023-06-15T06:55:43","slug":"triton-inference-server%eb%a1%9c-%eb%aa%a8%eb%8d%b8-%ec%84%9c%eb%b9%99-%ec%84%b1%eb%8a%a5-%eb%81%8c%ec%96%b4%ec%98%ac%eb%a6%ac%ea%b8%b0","status":"publish","type":"post","link":"https:\/\/smilegate.ai\/cn\/2023\/06\/15\/triton-inference-server%eb%a1%9c-%eb%aa%a8%eb%8d%b8-%ec%84%9c%eb%b9%99-%ec%84%b1%eb%8a%a5-%eb%81%8c%ec%96%b4%ec%98%ac%eb%a6%ac%ea%b8%b0\/","title":{"rendered":"Triton Inference Server\ub85c \ubaa8\ub378 \uc11c\ube59 \uc131\ub2a5 \ub04c\uc5b4\uc62c\ub9ac\uae30"},"content":{"rendered":"

[\ubd84\uc11d\uc9c0\ub2a5\uac1c\ubc1c\ud300 \ubc15\ud6a8\uc8fc]<\/p>\n\n\n\n

\ub525\ub7ec\ub2dd \uae30\uc220\uc758 \ubc1c\uc804\uc73c\ub85c AI \ubaa8\ub378\uc758 \uc131\ub2a5\uc740 \uc810\uc810 \ud5a5\uc0c1\ub418\uace0 \uc788\uace0 \uc788\uc2b5\ub2c8\ub2e4. \ud558\uc9c0\ub9cc \uadf8\ub9cc\ud07c \ubaa8\ub378\uc758 \ud06c\uae30\ub294 \uc810\uc810 \uac70\ub300\ud574\uc9c0\uace0 \ucd94\ub860 \uc18d\ub3c4\ub294 \ub290\ub824\uc9c0\uace0 \uc788\uc2b5\ub2c8\ub2e4. \ub354 \uc88b\uc740 GPU\ub97c \uc0ac\uc6a9\ud558\uba74 \ud6a8\uacfc\ub97c \ubcfc \uc218 \uc788\uc9c0\ub9cc \uadf8 \ube44\uc6a9\uc774 \ub9e4\uc6b0 \ud06c\uae30 \ub54c\ubb38\uc5d0, AI \uc11c\ube44\uc2a4\ub97c \ud6a8\uc728\uc801\uc73c\ub85c \uc6b4\uc601\ud558\uae30 \uc704\ud574\uc11c \ubaa8\ub378\uc758 \uacbd\ub7c9\ud654\ub294 \uc120\ud0dd\uc774 \uc544\ub2cc \ud544\uc218\uac00 \ub418\uc5c8\uc2b5\ub2c8\ub2e4.<\/p>\n\n\n\n

<\/div>\n\n\n
\n
\"\"
< \uc2dc\uac04\uc5d0 \ub530\ub978 \ubaa8\ub378 \ud06c\uae30\uc758 \ubcc0\ud654 ><\/figcaption><\/figure><\/div>\n\n\n
<\/div>\n\n\n\n

\ubaa8\ub378\uc744 \uacbd\ub7c9\ud654\ud558\uae30 \uc704\ud574\uc11c\ub294 PyTorch \ud639\uc740 TensorFlow \ub4f1\uc73c\ub85c \ud559\uc2b5\ud574\uc11c \ub098\uc628 \ubaa8\ub378\uc744 ONNX \ub610\ub294 TensorRT \ud3ec\ub9f7\uc73c\ub85c \ubcc0\ud658\ud574\uc57c \ud569\ub2c8\ub2e4. \ud558\uc9c0\ub9cc \uc774\ub807\uac8c \ubcc0\ud658\ub41c \ubaa8\ub378\uc744 \uc0ac\uc6a9\ud558\ub824\uba74 \ubcc4\ub3c4\uc758 \ucd94\ub860 \ucf54\ub4dc\ub97c \uc791\uc131\ud574\uc57c \ud558\uace0, \uc5ec\uae30\uc11c \uc131\ub2a5\uc744 \ub354 \uc62c\ub9ac\uae30 \uc704\ud574\uc11c\ub294 \ucd94\ub860 \ucf54\ub4dc\ub97c C++\ub85c \uc791\uc131\ud574\uc57c \ud558\ub294\ub370 \uc774 \uacfc\uc815\uc774 \uc27d\uc9c0 \uc54a\uc2b5\ub2c8\ub2e4. \uc774\ub7f0 \uc5b4\ub824\uc6b4 \ubd80\ubd84\uc744 Triton Inference Server<\/strong>\ub97c \uc774\uc6a9\ud558\uba74 \uc27d\uac8c \ud574\uacb0\ud560 \uc218 \uc788\uc2b5\ub2c8\ub2e4.<\/p>\n\n\n\n

<\/div>\n\n\n
\n
\"\"
< Triton Inference Server Architecture ><\/figcaption><\/figure><\/div>\n\n\n
<\/div>\n\n\n\n

Triton Inference Server<\/h3>\n\n\n\n

Triton Inference Server\ub294 \uace0\uc131\ub2a5 \ucd94\ub860\uc5d0 \ucd5c\uc801\ud654\ub41c \uc624\ud508\uc18c\uc2a4 \uc18c\ud504\ud2b8\uc6e8\uc5b4\uc785\ub2c8\ub2e4. \ub2e4\uc591\ud55c \ubaa8\ub378 \ud3ec\ub9f7, \ud2b9\ud788 TensorRT\uc758 \ucd94\ub860 \uae30\ub2a5\uc744 \uc81c\uacf5\ud558\uae30 \ub54c\ubb38\uc5d0 \ubcc4\ub3c4\uc758 \ucd94\ub860 \ucf54\ub4dc\ub97c \uc791\uc131\ud560 \ud544\uc694\uac00 \uc5c6\uc73c\uba70, C++ \uae30\ubc18\uc73c\ub85c \ub9cc\ub4e4\uc5c8\uae30 \ub54c\ubb38\uc5d0 Python\ubcf4\ub2e4 \ube60\ub978 \uc18d\ub3c4\ub85c \ucd94\ub860\uc774 \uac00\ub2a5\ud569\ub2c8\ub2e4. \uc2e4\uc81c\ub85c \uc0ac\uc6a9\ud560 \ub54c\ub294 \uc544\ub798 config.pbtxt\ucc98\ub7fc \ubaa8\ub378\uc758 \ubc30\ud3ec \uc2a4\ud399\ub9cc \uc815\uc758\ud558\uba74 Triton Inference Server\ub85c \ubc14\ub85c \ubc30\ud3ec\uac00 \uac00\ub2a5\ud569\ub2c8\ub2e4.<\/p>\n\n\n\n

name: \"cifar10_vgg16_pt\"\r\nplatform: \"pytorch_libtorch\"\r\nmax_batch_size: 8\r\ndynamic_batching {\r\n  max_queue_delay_microseconds: 100\r\n}\r\ninstance_group [\r\n  {\r\n    count: 1\r\n    kind: KIND_GPU\r\n  }\r\n]\r\ninput [\r\n  {\r\n    name: \"input__0\"\r\n    data_type: TYPE_FP32\r\n    dims: [ 3, 32, 32 ]\r\n  }\r\n]\r\noutput [\r\n  {\r\n    name: \"output__0\"\r\n    data_type: TYPE_FP32\r\n    dims: [ 10 ]\r\n  }\r\n]\r<\/code><\/pre>\n\n\n\n

< config.pbtxt \uc608\uc2dc ><\/p>\n\n\n\n

<\/div>\n\n\n\n

Triton Inference Server\ub97c \uc0ac\uc6a9\ud588\uc744 \ub54c\uc758 \uc8fc\uc694 \uc7a5\uc810\uc744 \uc815\ub9ac\ud558\uba74 \uc544\ub798\uc640 \uac19\uc73c\uba70, \uc774\uc678\uc5d0\ub3c4 \ub2e4\uc591\ud55c \uae30\ub2a5\ub4e4\uc744 \uc81c\uacf5\ud569\ub2c8\ub2e4.<\/p>\n\n\n\n

\n
\n
\n
<\/div>\n\n\n\n
\n
    \n
  1. \ub2e4\uc591\ud55c \ubaa8\ub378 \ud504\ub808\uc784\uc6cc\ud06c \uc9c0\uc6d0 \ubc0f \ucd94\ub860 \uae30\ub2a5 \uc81c\uacf5<\/li>\n\n\n\n
  2. C++ level\uc758 \uace0\uc131\ub2a5 \ucd94\ub860<\/li>\n\n\n\n
  3. Kubernetes\ub97c \uc774\uc6a9\ud55c \ud655\uc7a5 \ubc0f \ubaa8\ub2c8\ud130\ub9c1 \uc9c0\uc6d0<\/li>\n\n\n\n
  4. \ubaa8\ub378 \uc559\uc0c1\ube14<\/li>\n\n\n\n
  5. Single GPU, Multiple Model<\/li>\n\n\n\n
  6. \ubaa8\ub378 \ubc84\uc804 \uad00\ub9ac<\/li>\n<\/ol>\n<\/div>\n<\/div>\n<\/div>\n<\/div>\n\n\n\n
    <\/div>\n\n\n\n

    \uc131\ub2a5 \ube44\uad50<\/h3>\n\n\n\n

    \uc2e4\uc81c\ub85c Triton Inference Server\ub97c \uc0ac\uc6a9\ud588\uc744 \ub54c \uc5b4\ub290 \uc815\ub3c4 \ud6a8\uacfc\uac00 \uc788\ub294\uc9c0 \ud655\uc778\ud574\ubcf4\uaca0\uc2b5\ub2c8\ub2e4. \ud655\uc778\uc744 \uc704\ud574 CIFAR-10 \uc774\ubbf8\uc9c0\ub97c \uc774\uc6a9\ud574\uc11c \u706b\u70ac<\/strong>\ub85c VGG16<\/strong> \ubaa8\ub378\uc744 \ud559\uc2b5\ud588\uace0, ONNX<\/strong>\u54c7 TensorRT<\/strong>\ub85c \uac01\uac01 \ubcc0\ud658\ud55c \ub4a4 \ubaa8\ub378\uc758 \uba54\ubaa8\ub9ac \uc0ac\uc6a9\ub7c9<\/u>\uacfc \ucc98\ub9ac\ub7c9<\/u>\uc744 \ube44\uad50\ud588\uc2b5\ub2c8\ub2e4. TensorRT\uc758 \uacbd\uc6b0 FP32\uc5d0\uc11c FP16\uc73c\ub85c \uc591\uc790\ud654(Quantization)\ud574\uc11c \ube44\uad50\ud588\uc2b5\ub2c8\ub2e4.<\/p>\n\n\n\n

    <\/div>\n\n\n\n

    \uba54\ubaa8\ub9ac \uc0ac\uc6a9\ub7c9<\/strong><\/p>\n\n\n

    \n
    \"\"<\/figure><\/div>\n\n\n

    \ucc28\ud2b8\ub294 \uac01 \ubaa8\ub378 \ud3ec\ub9f7\ubcc4\ub85c \ubc30\ud3ec \uc2dc GPU \uba54\ubaa8\ub9ac \uc0ac\uc6a9\ub7c9\uc744 \uce21\uc815\ud55c \uacb0\uacfc\uc785\ub2c8\ub2e4. \uacb0\uacfc\ub97c \ubcf4\uba74 \uae30\uc874PyTorch \ubaa8\ub378\uacfc \ube44\uad50\ud588\uc744 \ub54c ONNX\uc758 \uacbd\uc6b0 \uba54\ubaa8\ub9ac\ub97c \ub354 \ub9ce\uc774 \uc0ac\uc6a9\ud558\uace0 \uc788\uc9c0\ub9cc, TensorRT\uc758 \uacbd\uc6b0 \uc808\ubc18\ub3c4 \uc548 \ub418\ub294 \uba54\ubaa8\ub9ac\ub9cc \uc0ac\uc6a9\ub418\ub294 \uac83\uc744 \ubcfc \uc218 \uc788\uc2b5\ub2c8\ub2e4.<\/p>\n\n\n\n

    <\/div>\n\n\n\n

    \ucc98\ub9ac\ub7c9<\/strong><\/p>\n\n\n\n

    \ucc98\ub9ac\ub7c9 \uce21\uc815\uc740 NVIDIA\uc5d0\uc11c \uc81c\uacf5\ub418\ub294 Performance Analyzer<\/a><\/strong>\ub97c \uc774\uc6a9\ud588\uc2b5\ub2c8\ub2e4. Concurrency\ub97c 1\uc5d0\uc11c 10\uae4c\uc9c0 \ub192\uc5ec\uac00\uba74\uc11c \uac01 \ubaa8\ub378 \ud3ec\ub9f7\uc758 \ucc98\ub9ac\ub7c9\uc744 \ube44\uad50\ud588\uc73c\uba70, \uac01\uac01\uc758 \ubaa8\ub378\uc744 \ubc30\ud3ec\ud560 \ub54c PyTorch\uc640 ONNX\ub294 max-batch\ub97c 8\ub85c, TensorRT\ub294 1\ub85c \uc124\uc815\ud558\uace0 \uc9c4\ud589\ud588\uc2b5\ub2c8\ub2e4.<\/p>\n\n\n

    \n
    \"\"<\/figure><\/div>\n\n\n

    \uacb0\uacfc\ub97c \ubcf4\uba74 ONNX\uc758 \uacbd\uc6b0 Concurrency\uac00 \ub192\uc544\uc9c8\uc218\ub85d PyTorch\ubcf4\ub2e4 \ucc98\ub9ac\ub7c9\uc774 \uc57d\uac04 \ub192\uc544\uc9c0\uc9c0\ub9cc, TensorRT\uc758 \uacbd\uc6b0 \ucc98\ub9ac\ub7c9\uc774 \uc57d 2\ubc30 \uc774\uc0c1 \ub192\uc740 \uac83\uc744 \ud655\uc778\ud560 \uc218 \uc788\uc2b5\ub2c8\ub2e4. TensorRT\uac00 batch \ucc98\ub9ac\ub418\uc9c0 \uc54a\uae30 \ub54c\ubb38\uc5d0 Concurrency 3\ubd80\ud130 \ucc98\ub9ac\ub7c9\uc774 \uc62c\ub77c\uac00\uc9c0 \uc54a\uc9c0\ub9cc, \uadf8\uac78 \uac10\uc548\ud558\ub354\ub77c\ub3c4 TensorRT\uc758 \ucc98\ub9ac\ub7c9\uc774 \uc555\ub3c4\uc801\uc73c\ub85c \ub192\uc740 \uac83\uc744 \ubcfc \uc218 \uc788\uc2b5\ub2c8\ub2e4.<\/p>\n\n\n\n

    <\/div>\n\n\n\n

    \ub9c8\uce68<\/h3>\n\n\n\n

    GPU \uc790\uc6d0\uc758 \uc0ac\uc6a9\ub8cc\uac00 \ube44\uc2fc \ub9cc\ud07c \ubaa8\ub378\uc758 \uc790\uc6d0 \uc0ac\uc6a9\ub7c9\uacfc \ucc98\ub9ac\ub7c9\uc5d0 \ub300\ud55c \ucd5c\uc801\ud654\ub294 AI \uc11c\ube44\uc2a4 \uc131\uacf5 \uc5ec\ubd80\uc5d0 \ud070 \ubd80\ubd84\uc744 \ucc28\uc9c0\ud558\uace0 \uc788\uc2b5\ub2c8\ub2e4. \ucd5c\uadfc \uac00\uc7a5 \ud06c\uac8c \ud654\uc81c\uac00 \ub418\uace0 \uc788\ub294 \uc0dd\uc131\ubaa8\ub378\uc758 \uacbd\uc6b0\ub3c4 \uc790\uc6d0 \uc0ac\uc6a9\ub7c9\uacfc \ucc98\ub9ac\ub7c9 \uce21\uba74\uc5d0\uc11c \ub9e4\uc6b0 \ud6a8\uc728\uc774 \ub0ae\uae30 \ub54c\ubb38\uc5d0, \uc774\ub7ec\ud55c \ucd5c\uc801\ud654\ub97c \uc798 \uc801\uc6a9\ud574\uc11c \ud6a8\uc728\uc801\uc73c\ub85c \ubc30\ud3ec\ud560 \ud544\uc694\uac00 \uc788\uc2b5\ub2c8\ub2e4.<\/p>\n\n\n\n

    <\/div>\n\n\n\n

    \u53c2\u8003<\/strong><\/p>\n\n\n\n