{"id":9907,"date":"2020-07-03T09:02:36","date_gmt":"2020-07-03T00:02:36","guid":{"rendered":"http:\/\/www.gisdeveloper.co.kr\/?p=9907"},"modified":"2020-07-05T07:39:31","modified_gmt":"2020-07-04T22:39:31","slug":"onehot-%ec%9d%b8%ec%bd%94%eb%94%a9encoding-%eb%b0%8f-%ec%8a%a4%ec%bc%80%ec%9d%bc%eb%a7%81scaling","status":"publish","type":"post","link":"http:\/\/www.gisdeveloper.co.kr\/?p=9907","title":{"rendered":"OneHot \uc778\ucf54\ub529(Encoding) \ubc0f \uc2a4\ucf00\uc77c\ub9c1(Scaling)"},"content":{"rendered":"<p>\ud559\uc2b5 \ub370\uc774\ud130\uc758 \ud2b9\uc131\ub4e4\uc740 \uc218\uce58\uac12 \ubfd0\ub9cc \uc544\ub2c8\ub77c &#8216;\ud06c\ub2e4&#8217;, &#8216;\uc911\uac04&#8217;, &#8216;\uc791\ub2e4&#8217; \ub610\ub294 &#8216;\uc5ec\uc790&#8217;, &#8216;\ub0a8\uc790&#8217;\uc640 \uac19\uc740 \ubc94\uc8fc\uac12\ub3c4 \uc874\uc7ac\ud569\ub2c8\ub2e4. \uba3c\uc800 \ubc94\uc8fc\ud615 \uac12\uc744 \ucc98\ub9ac\ud558\uae30 \uc704\ud574\uc11c\ub294 \uc774 \ubc94\uc8fc\ud615 \uac12\uc744 \uc218\uce58\uac12\uc73c\ub85c \ubcc0\ud658\ud574\uc57c \ud569\ub2c8\ub2e4. \ub9cc\uc57d \ubc94\uc8fc\ud615 \uac12\uc774 &#8216;A\ub4f1\uae09&#8217;, &#8216;B\ub4f1\uae09&#8217;, &#8216;C\ub4f1\uae09&#8217;\ucc98\ub7fc \uadf8 \uc758\ubbf8\uc5d0 \uc21c\uc704\uc801 \uc5f0\uc18d\uc131\uc774 \uc874\uc7ac\ud55c\ub2e4\uba74 \uadf8\ub0e5 3, 2, 1\uacfc \uac19\uc774 \uc218\uce58\uac12\uc73c\ub85c \ub4f1\uae09\uc744 \ub9e4\uce6d\ud558\uba74 \ub429\ub2c8\ub2e4. \ud558\uc9c0\ub9cc &#8216;\uc5ec\uc790&#8217;, &#8216;\ub0a8\uc790&#8217;\ucc98\ub7fc \uc21c\uc704\ub3c4 \uc5f0\uc18d\uc131\ub3c4 \uc5c6\ub2e4\uba74 \ubc18\ub4dc\uc2dc \ub2e4\ub978 \uc758\ubbf8\ub85c\uc758 \uc218\uce58\uac12\uc73c\ub85c \ubcc0\ud658\ud574\uc57c \ud558\ub294\ub370, \uadf8 \ubcc0\ud658\uc740 OnHot \uc778\ucf54\ub529\uc774\ub77c\uace0 \ud569\ub2c8\ub2e4. \uacb0\ub860\uc744 \ubbf8\ub9ac \ub9d0\ud558\uba74 &#8216;\uc5ec\uc790&#8217;\ub77c\uba74 \ubca1\ud130 (1,0)\uc73c\ub85c, \ub0a8\uc790\ub77c\uba74 (0,1)\uc73c\ub85c \ubcc0\uacbd\ud574\uc57c \ud569\ub2c8\ub2e4.<\/p>\n<p>\uc0d8\ud50c \ub370\uc774\ud130\ub97c \ud1b5\ud574 \uc774 OneHot \uc778\ucf54\ub529\uc744 \ud558\ub294 \ubc29\ubc95\uc5d0 \ub300\ud574 \uc5b8\uae09\ud558\uaca0\uc2b5\ub2c8\ub2e4. \uc0d8\ud50c \ub370\uc774\ud130\ub294 \uc544\ub798\uc758 \uae00\uc5d0\uc11c \uc18c\uac1c\ud55c \ub370\uc774\ud130\ub97c \uc0ac\uc6a9\ud569\ub2c8\ub2e4.<\/p>\n<blockquote class=\"wp-embedded-content\" data-secret=\"hSmCURzekp\"><p><a href=\"http:\/\/www.gisdeveloper.co.kr\/?p=9871\">\ubd84\uc11d\uac00 \uad00\uc810\uc5d0\uc11c \ub370\uc774\ud130\ub97c \uac1c\ub7b5\uc801\uc73c\ub85c \uc0b4\ud3b4\ubcf4\uae30<\/a><\/p><\/blockquote>\n<p><iframe loading=\"lazy\" class=\"wp-embedded-content\" sandbox=\"allow-scripts\" security=\"restricted\" style=\"position: absolute; clip: rect(1px, 1px, 1px, 1px);\" title=\"&#8220;\ubd84\uc11d\uac00 \uad00\uc810\uc5d0\uc11c \ub370\uc774\ud130\ub97c \uac1c\ub7b5\uc801\uc73c\ub85c \uc0b4\ud3b4\ubcf4\uae30&#8221; &#8212; GIS Developer\" src=\"http:\/\/www.gisdeveloper.co.kr\/?p=9871&#038;embed=true#?secret=noXhIYJLZ5#?secret=hSmCURzekp\" data-secret=\"hSmCURzekp\" width=\"525\" height=\"296\" frameborder=\"0\" marginwidth=\"0\" marginheight=\"0\" scrolling=\"no\"><\/iframe><\/p>\n<p>\uba3c\uc800 \uc0d8\ud50c \ub370\uc774\ud130\ub97c \ubd88\ub7ec\uc635\ub2c8\ub2e4.<\/p>\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"python\">\r\nimport pandas as pd\r\n\r\nraw_data = pd.read_csv('.\/datasets\/datasets_1495_2672_abalone.data.csv', \r\n        names=['sex', 'tall', 'radius', 'height', 'weg1', 'weg2', 'weg3', 'weg4', 'ring_cnt'])\r\n    #names=['\uc131\ubcc4', '\ud0a4', '\uc9c0\ub984', '\ub192\uc774', '\uc804\uccb4\ubb34\uac8c', '\ubab8\ud1b5\ubb34\uac8c', '\ub0b4\uc7a5\ubb34\uac8c', '\uaecd\uc9c8\ubb34\uac8c', '\uaecd\uc9c8\uc758\uace0\ub9ac\uc218']\r\n<\/pre>\n<p>\uc774 \uc911 sex \uceec\ub7fc\uc740 \uc131\ubcc4\uc778\ub370, \uc774 \uceec\ub7fc\uc758 \uac12\uc744 \uc544\ub798\uc758 \ucf54\ub4dc\ub97c \ud1b5\ud574 \ucd9c\ub825\ud574 \ubd05\ub2c8\ub2e4.<\/p>\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"python\">\r\nprint(raw_data[\"sex\"][:10])\r\n<\/pre>\n<pre class='code'>\r\n0    M\r\n1    M\r\n2    F\r\n3    M\r\n4    I\r\n5    I\r\n6    F\r\n7    F\r\n8    M\r\n9    F\r\n<\/pre>\n<p>\ubc94\uc8fc\ud615 \uac12\uc774\ub77c\ub294 \uac83\uc744 \uc54c\uc218\uc788\ub294\ub370, M\uc740 \uc22b\ucef7, F\ub294 \uc554\ucef7, I\ub294 \uc720\ucda9\uc785\ub2c8\ub2e4. \uc774 sex \uceec\ub7fc\uc5d0 \ub300\ud574 OneHot \uc778\ucf54\ub529 \ucc98\ub9ac\ub97c \uc704\ud574 \uba3c\uc800 \ubb38\uc790\ud615\uc744 \uc22b\uc790\ud615\uc73c\ub85c \ubcc0\ud658\ud574\uc8fc\ub294 OrdinalEncoder \ud074\ub798\uc2a4\ub97c \ud1b5\ud574 \ucc98\ub9ac\ud569\ub2c8\ub2e4.<\/p>\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"python\">\r\nraw_data_labels = raw_data[\"ring_cnt\"].copy()\r\nraw_data = raw_data.drop(\"ring_cnt\", axis=1)\r\n\r\nraw_data_cat = raw_data[[\"sex\"]]\r\n\r\nfrom sklearn.preprocessing import OrdinalEncoder\r\n\r\nordinal_encoder = OrdinalEncoder()\r\nraw_data_encoded = ordinal_encoder.fit_transform(raw_data_cat)\r\n\r\nprint(raw_data_encoded[:10])\r\nprint(ordinal_encoder.categories_)\r\n<\/pre>\n<pre class='code'>\r\n[[2.]\r\n [2.]\r\n [0.]\r\n [2.]\r\n [1.]\r\n [1.]\r\n [0.]\r\n [0.]\r\n [2.]\r\n [0.]]\r\n[array(['F', 'I', 'M'], dtype=object)]\r\n<\/pre>\n<p>OrdinalEncoder\ub294 \ubc94\uc8fc\ud615 \ub370\uc774\ud130\ub97c \ud76c\uc18c\ud589\ub82c(Sparse Matrix)\ub85c \uadf8 \uacb0\uacfc\ub97c \ubc18\ud658\ud569\ub2c8\ub2e4. \ub2e4\uc2dc \uc774 \ud76c\uc18c\ud589\ub82c\uc744 OneHot \uc778\ucf54\ub529\uc744 \uc2dc\ud0a4\uae30 \uc704\ud574 \uc544\ub798\uc758 \ucf54\ub4dc\ub97c \uc218\ud589\ud569\ub2c8\ub2e4.<\/p>\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"python\">\r\nfrom sklearn.preprocessing import OneHotEncoder\r\nonehot_encoder = OneHotEncoder()\r\nraw_data_cat_onehot = onehot_encoder.fit_transform(raw_data_cat)\r\nprint(raw_data_cat_onehot.toarray()[:10])\r\nprint(onehot_encoder.categories_)\r\n<\/pre>\n<pre class='code'>\r\n[[0. 0. 1.]\r\n [0. 0. 1.]\r\n [1. 0. 0.]\r\n [0. 0. 1.]\r\n [0. 1. 0.]\r\n [0. 1. 0.]\r\n [1. 0. 0.]\r\n [1. 0. 0.]\r\n [0. 0. 1.]\r\n [1. 0. 0.]]\r\n[array(['F', 'I', 'M'], dtype=object)]\r\n<\/pre>\n<p>\uc774\uc81c \ubc94\uc8fc\ud615 \uceec\ub7fc\uc778 sex \ub300\uc2e0 OneHot \uc778\ucf54\ub529\ub41c \uac12\uc744 \ub370\uc774\ud130\uc5d0 \ucd94\uac00\ud558\ub3c4\ub85d \ud569\ub2c8\ub2e4.<\/p>\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"python\">\r\nraw_data = raw_data.drop(\"sex\", axis=1)\r\nraw_data = np.c_[raw_data_cat_onehot.toarray(), raw_data]\r\nprint(raw_data[:10])\r\n<\/pre>\n<pre class='code'>\r\n[[0.0 0.0 1.0 0.455 0.365  0.095 0.514  0.2245 0.100999 0.15  4]\r\n [0.0 0.0 1.0 0.35  0.265  0.09  0.2255 0.0995 0.0485   0.07  2]\r\n [1.0 0.0 0.0 0.53  0.42   0.135 0.677  0.2565 0.1415   0.21  4]\r\n [0.0 0.0 1.0 0.44  0.365  0.125 0.516  0.2155 0.114    0.155 4]\r\n [0.0 1.0 0.0 0.33  0.255  0.08  0.205  0.0895 0.0395   0.055 2]\r\n [0.0 1.0 0.0 0.425 0.3    0.095 0.3515 0.141  0.0775   0.12  3]\r\n [1.0 0.0 0.0 0.53  0.415  0.15  0.7775 0.237  0.1415   0.33  4]\r\n [1.0 0.0 0.0 0.545 0.425  0.125 0.768  0.294  0.1495   0.26  4]\r\n [0.0 0.0 1.0 0.475 0.37   0.125 0.5095 0.2165 0.1125   0.165 4]\r\n [1.0 0.0 0.0 0.55  0.44   0.15  0.8945 0.3145 0.151    0.32  4]]\r\n<\/pre>\n<p>\ubc94\uc8fc\ud615 \ud0c0\uc785\uc778 sex\uac00 \uc81c\uac70\ub418\uace0 \uc774 sex\uc5d0 \ub300\ud55c \ucd94\uac00\uc801\uc778 3\uac1c\uc758 \uceec\ub7fc\uc774 \ucd94\uac00\ub418\uc5c8\uc2b5\ub2c8\ub2e4. \ubc14\ub85c \uc774 3\uac1c\uc758 \uceec\ub7fc\uc774 OneHot \uc778\ucf54\ub529\ub41c \uac12\uc785\ub2c8\ub2e4.<\/p>\n<p>\uc774\uc81c \uc218\uce58\ud615 \ub370\uc774\ud130\uc5d0 \ub300\ud55c \uc2a4\ucf00\uc77c\ub9c1\uc785\ub2c8\ub2e4. \uc5ec\uae30\uc11c \uc2a4\ucf00\uc77c\ub9c1\uc774\ub780 \uc11c\ub85c \ub2e4\ub978 \ud2b9\uc131\ub4e4\uc744 \uc77c\uc815\ud55c \uac12\uc758 \ubc94\uc704\ub85c \ub9de\ucdb0\uc8fc\ub294 \uac83\uc785\ub2c8\ub2e4. \ud754\ud788 \uc0ac\uc6a9\ud558\ub294 \ubc29\uc2dd\uc740 Min-Max \uc2a4\ucf00\uc77c\ub9c1\uacfc \ud45c\uc900\ud654(Standardization)\uc774 \uc788\uc2b5\ub2c8\ub2e4. \uba3c\uc800 Min-Max \uc2a4\ucf00\uc77c\ub9c1\uc740 \ud2b9\uc9d5\uc758 \ucd5c\uc18c\uac12\uacfc \ucd5c\ub300\uac12\uc744 \uba3c\uc800 \uacc4\uc0b0\ud558\uace0 \uc774 \uac12\uc744 \uc774\uc6a9\ud558\uc5ec \uc804\uccb4 \ud2b9\uc9d5\uac12\ub4e4\uc744 0~1 \uc0ac\uc774\uc758 \uac12\uc73c\ub85c \ubcc0\uacbd\uc2dc\ud0b5\ub2c8\ub2e4. \ud45c\uc900\ud654\ub294 \uba3c\uc800 \ud3c9\uade0\uacfc \ud45c\uc900\ud3b8\ucc28\ub97c \uad6c\ud558\uace0 \uc804\uccb4 \ub370\uc774\ud130 \uac01\uac01\uc5d0 \ub300\ud574 \ud3c9\uade0\uc744 \ube80 \ud6c4 \ud45c\uc900\ud3b8\ucc28\ub85c \ub098\ub220 \ubd84\uc0b0\uc774 1\uc774 \ub418\ub3c4\ub85d \ub370\uc774\ud130\ub97c \uc870\uc815\ud569\ub2c8\ub2e4. \uac01\uac01 sklearn\uc5d0\uc11c \uc81c\uacf5\ud558\ub294 MinMaxScaler\uc640 StandardScaler \ud074\ub798\uc2a4\ub97c \ud1b5\ud574 \uc218\ud589\uc774 \uac00\ub2a5\ud569\ub2c8\ub2e4. \uc544\ub798\uc758 \ucf54\ub4dc\ub294 Min-Max \uc2a4\ucf00\uc77c\ub9c1\uc744 \uc218\ud589\ud558\ub294 \ucf54\ub4dc\uc785\ub2c8\ub2e4.<\/p>\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"python\">\r\nfrom sklearn.preprocessing import MinMaxScaler\r\n\r\nminmax_scaler = MinMaxScaler()\r\nraw_data = minmax_scaler.fit_transform(raw_data)\r\n\r\nprint(raw_data[:10])\r\n<\/pre>\n<pre class='code'>\r\n[[0. 0.  1.   0.51351351 0.5210084  0.0840708   0.18133522 0.15030262 0.1323239  0.14798206 0.75 ]\r\n [0. 0.  1.   0.37162162 0.35294118 0.07964602  0.07915707 0.06624075 0.06319947 0.06826109 0.25 ]\r\n [1. 0.  0.   0.61486486 0.61344538 0.11946903  0.23906499 0.17182246 0.18564845 0.2077728  0.75 ]\r\n [0. 0.  1.   0.49324324 0.5210084  0.11061947  0.18204356 0.14425017 0.14944042 0.15296462 0.75 ]\r\n [0. 1.  0.   0.34459459 0.33613445 0.07079646  0.07189658 0.0595158  0.05134957 0.0533134  0.25 ]\r\n [0. 1.  0.   0.47297297 0.41176471 0.0840708   0.12378254 0.09414929 0.10138249 0.1180867  0.5  ]\r\n [1. 0.  0.   0.61486486 0.60504202 0.13274336  0.27465911 0.15870881 0.18564845 0.32735426 0.75 ]\r\n [1. 0.  0.   0.63513514 0.62184874 0.11061947  0.27129449 0.19704102 0.1961817  0.25759841 0.75 ]\r\n [0. 0.  1.   0.54054054 0.52941176 0.11061947  0.17974146 0.14492266 0.14746544 0.16292975 0.75 ]\r\n [1. 0.  0.   0.64189189 0.64705882 0.13274336  0.31609704 0.21082717 0.19815668 0.31738914 0.75 ]]\r\n<\/pre>\n<p>\uacb0\uacfc\ub97c \ubcf4\uba74 \uc804\uccb4 \ub370\uc774\ud130\uac00 \ubaa8\ub450 0~1 \uc0ac\uc774\uc758 \uac12\uc73c\ub85c \ubcc0\ud658\ub41c \uac83\uc744 \uc54c \uc218 \uc788\uc2b5\ub2c8\ub2e4.<\/p>\n","protected":false},"excerpt":{"rendered":"<p>\ud559\uc2b5 \ub370\uc774\ud130\uc758 \ud2b9\uc131\ub4e4\uc740 \uc218\uce58\uac12 \ubfd0\ub9cc \uc544\ub2c8\ub77c &#8216;\ud06c\ub2e4&#8217;, &#8216;\uc911\uac04&#8217;, &#8216;\uc791\ub2e4&#8217; \ub610\ub294 &#8216;\uc5ec\uc790&#8217;, &#8216;\ub0a8\uc790&#8217;\uc640 \uac19\uc740 \ubc94\uc8fc\uac12\ub3c4 \uc874\uc7ac\ud569\ub2c8\ub2e4. \uba3c\uc800 \ubc94\uc8fc\ud615 \uac12\uc744 \ucc98\ub9ac\ud558\uae30 \uc704\ud574\uc11c\ub294 \uc774 \ubc94\uc8fc\ud615 \uac12\uc744 \uc218\uce58\uac12\uc73c\ub85c \ubcc0\ud658\ud574\uc57c \ud569\ub2c8\ub2e4. \ub9cc\uc57d \ubc94\uc8fc\ud615 \uac12\uc774 &#8216;A\ub4f1\uae09&#8217;, &#8216;B\ub4f1\uae09&#8217;, &#8216;C\ub4f1\uae09&#8217;\ucc98\ub7fc \uadf8 \uc758\ubbf8\uc5d0 \uc21c\uc704\uc801 \uc5f0\uc18d\uc131\uc774 \uc874\uc7ac\ud55c\ub2e4\uba74 \uadf8\ub0e5 3, 2, 1\uacfc \uac19\uc774 \uc218\uce58\uac12\uc73c\ub85c \ub4f1\uae09\uc744 \ub9e4\uce6d\ud558\uba74 \ub429\ub2c8\ub2e4. \ud558\uc9c0\ub9cc &#8216;\uc5ec\uc790&#8217;, &#8216;\ub0a8\uc790&#8217;\ucc98\ub7fc \uc21c\uc704\ub3c4 \uc5f0\uc18d\uc131\ub3c4 \uc5c6\ub2e4\uba74 \ubc18\ub4dc\uc2dc \ub2e4\ub978 \uc758\ubbf8\ub85c\uc758 &hellip; <\/p>\n<p class=\"link-more\"><a href=\"http:\/\/www.gisdeveloper.co.kr\/?p=9907\" class=\"more-link\">\ub354 \ubcf4\uae30<span class=\"screen-reader-text\"> &#8220;OneHot \uc778\ucf54\ub529(Encoding) \ubc0f \uc2a4\ucf00\uc77c\ub9c1(Scaling)&#8221;<\/span><\/a><\/p>\n","protected":false},"author":1,"featured_media":0,"comment_status":"open","ping_status":"open","sticky":false,"template":"","format":"standard","meta":{"footnotes":""},"categories":[131,133,132],"tags":[],"class_list":["post-9907","post","type-post","status-publish","format-standard","hentry","category-python","category-tensorflow","category-deep-machine-learning"],"_links":{"self":[{"href":"http:\/\/www.gisdeveloper.co.kr\/index.php?rest_route=\/wp\/v2\/posts\/9907","targetHints":{"allow":["GET"]}}],"collection":[{"href":"http:\/\/www.gisdeveloper.co.kr\/index.php?rest_route=\/wp\/v2\/posts"}],"about":[{"href":"http:\/\/www.gisdeveloper.co.kr\/index.php?rest_route=\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"http:\/\/www.gisdeveloper.co.kr\/index.php?rest_route=\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"http:\/\/www.gisdeveloper.co.kr\/index.php?rest_route=%2Fwp%2Fv2%2Fcomments&post=9907"}],"version-history":[{"count":9,"href":"http:\/\/www.gisdeveloper.co.kr\/index.php?rest_route=\/wp\/v2\/posts\/9907\/revisions"}],"predecessor-version":[{"id":9921,"href":"http:\/\/www.gisdeveloper.co.kr\/index.php?rest_route=\/wp\/v2\/posts\/9907\/revisions\/9921"}],"wp:attachment":[{"href":"http:\/\/www.gisdeveloper.co.kr\/index.php?rest_route=%2Fwp%2Fv2%2Fmedia&parent=9907"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"http:\/\/www.gisdeveloper.co.kr\/index.php?rest_route=%2Fwp%2Fv2%2Fcategories&post=9907"},{"taxonomy":"post_tag","embeddable":true,"href":"http:\/\/www.gisdeveloper.co.kr\/index.php?rest_route=%2Fwp%2Fv2%2Ftags&post=9907"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}