{"id":9891,"date":"2020-07-02T08:35:07","date_gmt":"2020-07-01T23:35:07","guid":{"rendered":"http:\/\/www.gisdeveloper.co.kr\/?p=9891"},"modified":"2020-07-02T09:07:52","modified_gmt":"2020-07-02T00:07:52","slug":"%ea%b3%84%ec%b8%b5%ec%a0%81-%ec%83%98%ed%94%8c%eb%a7%81stratified-sampling","status":"publish","type":"post","link":"http:\/\/www.gisdeveloper.co.kr\/?p=9891","title":{"rendered":"\uacc4\uce35\uc801 \uc0d8\ud50c\ub9c1(Stratified Sampling)"},"content":{"rendered":"<p>\uacc4\uce35\uc801 \uc0d8\ud50c\ub9c1\uc774\ub780 \ubaa8\uc9d1\ub2e8\uc758 \ub370\uc774\ud130 \ubd84\ud3ec \ube44\uc728\uc744 \uc720\uc9c0\ud558\uba74\uc11c \ub370\uc774\ud130\ub97c \uc0d8\ud50c\ub9c1(\ucde8\ub4dd)\ud558\ub294 \uac83\uc744 \ub9d0\ud569\ub2c8\ub2e4. \uc608\ub97c\ub4e4\uc5b4, \ubaa8\uc9d1\ub2e8\uc758 \ub0a8\ub140 \uc131\ube44\uac00 \uac01\uac01 54%, 46%\ub77c\uace0 \ud55c\ub2e4\uba74 \uc774 \ubaa8\uc9d1\ub2e8\uc5d0\uc11c \ucde8\ub4dd\ud55c \uc0d8\ud50c \ub370\uc774\ud130 \uc5ed\uc2dc \ub0a8\ub140 \uc131\ube44\uac00 \uac01\uac01 54%, 46%\uac00 \ub418\ub3c4\ub85d \ud558\ub294 \uac83\uc785\ub2c8\ub2e4.<\/p>\n<p>\uacc4\uce35\uc801 \uc0d8\ud50c\ub9c1\uc758 \uc2e4\uc81c \ud65c\uc6a9\uc740 \ud559\uc2b5 \ub370\uc774\ud130\uc640 \ud14c\uc2a4\ud2b8 \ub370\uc774\ud130 \ub610\ub294 \uac80\uc99d \ub370\uc774\ud130\ub97c \uc77c\uc815\ud55c \ube44\uc728\ub85c \ub098\ub220 \uad6c\ubd84\ud560\ub54c \ubc18\ub4dc\uc2dc \uc801\uc6a9\ub418\uc5b4\uc57c \ud569\ub2c8\ub2e4. \uacc4\uce35\uc801 \uc0d8\ud50c\ub9c1\uc744 \uc801\uc6a9\ud558\uc9c0 \uc54a\uace0 \ubd84\ud560\ud55c\ub2e4\uace0 \ud574\ub3c4 \ud655\ub960\uc801\uc73c\ub85c \ube44\uc728\uc774 \uc720\uc9c0\ub420 \uc218 \uc788\ub2e4\uace0 \uae30\ub300\ud558\uaca0\uc9c0\ub9cc \uc774\ub294 \uc0c1\ud669\uc5d0 \ub530\ub77c \uc801\uc808\ud55c \uc548\uc815\uc7a5\uce58\uac00 \ub418\uc9c0 \ubabb\ud569\ub2c8\ub2e4.<\/p>\n<p>\uac04\ub2e8\ud55c \ub370\uc774\ud130\uc14b\uc744 \ud1b5\ud574 \uc774 \uacc4\uce35\uc801 \uc0d8\ud50c\ub9c1\uc744 \uc801\uc6a9\ud558\ub294 \ub0b4\uc6a9\uc744 \uc815\ub9ac\ud558\uaca0\uc2b5\ub2c8\ub2e4. \ub370\uc774\ud130\uc14b\uc740 \uc544\ub798\uc758 \uae00\uc5d0\uc11c \uc18c\uac1c\ud55c \uc804\ubcf5 \ub370\uc774\ud130\uc785\ub2c8\ub2e4.<\/p>\n<blockquote class=\"wp-embedded-content\" data-secret=\"CZVFYgL4TK\"><p><a href=\"http:\/\/www.gisdeveloper.co.kr\/?p=9871\">\ubd84\uc11d\uac00 \uad00\uc810\uc5d0\uc11c \ub370\uc774\ud130\ub97c \uac1c\ub7b5\uc801\uc73c\ub85c \uc0b4\ud3b4\ubcf4\uae30<\/a><\/p><\/blockquote>\n<p><iframe loading=\"lazy\" class=\"wp-embedded-content\" sandbox=\"allow-scripts\" security=\"restricted\" style=\"position: absolute; clip: rect(1px, 1px, 1px, 1px);\" title=\"&#8220;\ubd84\uc11d\uac00 \uad00\uc810\uc5d0\uc11c \ub370\uc774\ud130\ub97c \uac1c\ub7b5\uc801\uc73c\ub85c \uc0b4\ud3b4\ubcf4\uae30&#8221; &#8212; GIS Developer\" src=\"http:\/\/www.gisdeveloper.co.kr\/?p=9871&#038;embed=true#?secret=hDQJC9p50L#?secret=CZVFYgL4TK\" data-secret=\"CZVFYgL4TK\" width=\"525\" height=\"296\" frameborder=\"0\" marginwidth=\"0\" marginheight=\"0\" scrolling=\"no\"><\/iframe><\/p>\n<p>\uc704\uc758 \uae00\uc5d0\uc11c \ud30c\uc545\ud55c \uc804\ubcf5 \ub370\uc774\ud130\ub97c \uac00\uc838\uc624\ub294 \ucf54\ub4dc\ub294 \ub2e4\uc74c\uacfc \uac19\uc2b5\ub2c8\ub2e4.<\/p>\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"python\">\r\nimport pandas as pd\r\n\r\nraw_data = pd.read_csv('.\/datasets\/datasets_1495_2672_abalone.data.csv', \r\n        names=['sex', 'tall', 'radius', 'height', 'weg1', 'weg2', 'weg3', 'weg4', 'ring_cnt'])\r\n<\/pre>\n<p>\uc774\uc81c \uc774 \ub370\uc774\ud130\uc14b\uc5d0\uc11c \uc9c0\ub984(radius)\ub97c \ucd1d 5\uac1c\uc758 \uacc4\uce35\uc73c\ub85c \ub098\ub204\uace0, \ubd84\ud3ec\ub97c \uc2dc\uac01\ud654\ud574\ubd05\ub2c8\ub2e4. \uc9c0\ub984\uc744 \uacc4\uce35\uc801 \uc0d8\ud50c\ub9c1\uc758 \uae30\uc900\uc73c\ub85c \uc0bc\uc740 \uc774\uc720\ub294 \uc774 \uc9c0\uae08\uc774 \ubd84\uc11d\ud558\uace0\uc790 \ud558\ub294 \uacb0\uacfc\uc5d0 \uac00\uc7a5 \uc911\uc694\ud55c \uc758\ubbf8\ub97c \uac00\uc9c4\ub2e4\ub294 \uc5b4\ub5a4 \ud310\ub2e8(\ub300\ud45c\uc801\uc73c\ub85c \ud45c\uc900\uc0c1\uad00\uacc4\uc218;Standard Correlation Coefficient \ubd84\uc11d\uc744 \ud1b5\ud568)\uc5d0 \uc758\ud568\uc785\ub2c8\ub2e4.<\/p>\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"python\">\r\nimport numpy as np\r\nfrom sklearn.model_selection import train_test_split\r\nimport matplotlib.pyplot as plt\r\n\r\nraw_data[\"radius_cat\"] = pd.cut(raw_data[\"radius\"], bins=[0., 0.13, 0.28, 0.35, 0.56, np.inf], labels=[1,2,3,4,5])\r\nraw_data[\"radius_cat\"].hist()\r\nplt.show()\r\n<\/pre>\n<p>[0,0.13)\uc744 1\ub85c, [0.13,0.28]\uc744 2\ub85c, [0.28,0.35)\ub97c 3\uc73c\ub85c, [0.35,0.56)\uc744 4\ub85c, [0.56,inf]\ub97c 5\ub85c \uacc4\uce35\ud654\uc2dc\ud0a8 \uac12\uc744 radius_cat \uceec\ub7fc\uc5d0 \ucd94\uac00\ud558\uace0, \uac01 \uacc4\uce35\ubcc4 \ubd84\ud3ec \ud30c\uc545\uc744 \uc704\ud55c \ud788\uc2a4\ud1a0\uadf8\ub7a8\uc740 \uc704 \ucf54\ub4dc\uc758 \uacb0\uacfc\ub85c\uc368 \ub2e4\uc74c\uacfc \uac19\uc2b5\ub2c8\ub2e4.<\/p>\n<p><img loading=\"lazy\" decoding=\"async\" src=\"http:\/\/www.gisdeveloper.co.kr\/wp-content\/uploads\/2020\/07\/ssampling_1.png\" alt=\"\" width=\"1596\" height=\"925\" class=\"aligncenter size-full wp-image-9892\" \/><\/p>\n<p>\uc774\uc81c \uc774 \ub370\uc774\ud130\uc14b\uc744 \ud559\uc2b5 \ub370\uc774\ud130\uc14b\uacfc \ud14c\uc2a4\ud2b8 \ub370\uc774\ud130\uc14b\uc73c\ub85c \ub098\ub204\ub294 \ucf54\ub4dc\ub294 \ub2e4\uc74c\uacfc \uac19\uc2b5\ub2c8\ub2e4.<\/p>\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"python\">\r\nfrom sklearn.model_selection import StratifiedShuffleSplit\r\n\r\nsplit = StratifiedShuffleSplit(n_splits=1, test_size=0.2, random_state=42)\r\nfor train_index, test_index in split.split(raw_data, raw_data[\"radius_cat\"]):\r\n    strat_train_set = raw_data.loc[train_index]\r\n    strat_test_set = raw_data.loc[test_index]\r\n\r\nstrat_train_set[\"radius_cat\"].hist()\r\nplt.show()\r\nstrat_test_set[\"radius_cat\"].hist()\r\nplt.show()\r\n<\/pre>\n<p>\uacc4\uce35\uc801 \uc0d8\ud50c\ub9c1\ub41c \ud559\uc2b5 \ub370\uc774\ud130\uc14b\uacfc \ud14c\uc2a4\ud2b8 \ub370\uc774\ud130\uc14b\uc740 \uac01\uac01 strat_train_set, strat_test_set \uc778\ub370\uc694. \uc774 \ub450 \ub370\uc774\ud130\uc14b\uc5d0 \ub300\ud55c \ubd84\ud3ec\ub97c \ud788\uc2a4\ud2b8\uadf8\ub7a8\uc73c\ub85c \ud45c\uc2dc\ud574 \ubcf4\uba74 \ub2e4\uc74c\uacfc \uac19\uc2b5\ub2c8\ub2e4.<\/p>\n<p><img loading=\"lazy\" decoding=\"async\" src=\"http:\/\/www.gisdeveloper.co.kr\/wp-content\/uploads\/2020\/07\/ssampling_2.png\" alt=\"\" width=\"1277\" height=\"479\" class=\"aligncenter size-full wp-image-9893\" \/><\/p>\n<p>\uc704\uc758 \uacb0\uacfc\ub97c \ubcf4\uba74 \uc2dc\uac01\uc801\uc73c\ub85c\ub3c4 \ud559\uc2b5 \ub370\uc774\ud130\uc14b\uacfc \ud14c\uc2a4\ud2b8 \ub370\uc774\ud130\uc14b\uc5d0\uc11c \uc9c0\ub984\uc5d0 \ub300\ud55c \uceec\ub7fc\uc5d0 \ub300\ud574 \uc6d0\ubcf8 \ub370\uc774\ud130\uc14b\uacfc \ub3d9\uc77c \ube44\uc728\ub85c \uad6c\uc131\ub418\uace0 \uc788\ub2e4\ub294 \uac83\uc744 \uc54c \uc218 \uc788\uc2b5\ub2c8\ub2e4.<\/p>\n<p>\uc55e\uc11c \uacc4\uce35\uc801 \uc0d8\ud50c\ub9c1\uc744 \uc704\ud574 \ucd94\uac00\ud55c radius_cat \ud544\ub4dc\ub294 \ub354 \uc774\uc0c1 \ud544\uc694\uce58 \uc54a\uc73c\ubbc0\ub85c \ub2e4\uc74c \ucf54\ub4dc\ub97c \ud1b5\ud574 \uc81c\uac70\ud560 \uc218 \uc788\uc2b5\ub2c8\ub2e4.<\/p>\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"python\">\r\nfor d in (strat_train_set, strat_test_set):\r\n    d.drop(\"radius_cat\", axis=1, inplace=True)\r\n<\/pre>\n<p>\ub05d\uc73c\ub85c \ud2b9\uc131\uac04\uc758 \uc0c1\uad00\uad00\uacc4\ub97c \uc870\uc0ac\ud558\uae30 \uc704\ud55c \ubc29\ubc95\uc740 \uc544\ub798 \uae00\uc744 \ucc38\uace0 \ud558\uae30 \ubc14\ub78d\ub2c8\ub2e4.<\/p>\n<blockquote class=\"wp-embedded-content\" data-secret=\"oeQMF02lOm\"><p><a href=\"http:\/\/www.gisdeveloper.co.kr\/?p=9898\">\uc0c1\uad00\uad00\uacc4 \uc870\uc0ac(Correlation Surveying)<\/a><\/p><\/blockquote>\n<p><iframe loading=\"lazy\" class=\"wp-embedded-content\" sandbox=\"allow-scripts\" security=\"restricted\" style=\"position: absolute; clip: rect(1px, 1px, 1px, 1px);\" title=\"&#8220;\uc0c1\uad00\uad00\uacc4 \uc870\uc0ac(Correlation Surveying)&#8221; &#8212; GIS Developer\" src=\"http:\/\/www.gisdeveloper.co.kr\/?p=9898&#038;embed=true#?secret=gP0uodDM1M#?secret=oeQMF02lOm\" data-secret=\"oeQMF02lOm\" width=\"525\" height=\"296\" frameborder=\"0\" marginwidth=\"0\" marginheight=\"0\" scrolling=\"no\"><\/iframe><\/p>\n","protected":false},"excerpt":{"rendered":"<p>\uacc4\uce35\uc801 \uc0d8\ud50c\ub9c1\uc774\ub780 \ubaa8\uc9d1\ub2e8\uc758 \ub370\uc774\ud130 \ubd84\ud3ec \ube44\uc728\uc744 \uc720\uc9c0\ud558\uba74\uc11c \ub370\uc774\ud130\ub97c \uc0d8\ud50c\ub9c1(\ucde8\ub4dd)\ud558\ub294 \uac83\uc744 \ub9d0\ud569\ub2c8\ub2e4. \uc608\ub97c\ub4e4\uc5b4, \ubaa8\uc9d1\ub2e8\uc758 \ub0a8\ub140 \uc131\ube44\uac00 \uac01\uac01 54%, 46%\ub77c\uace0 \ud55c\ub2e4\uba74 \uc774 \ubaa8\uc9d1\ub2e8\uc5d0\uc11c \ucde8\ub4dd\ud55c \uc0d8\ud50c \ub370\uc774\ud130 \uc5ed\uc2dc \ub0a8\ub140 \uc131\ube44\uac00 \uac01\uac01 54%, 46%\uac00 \ub418\ub3c4\ub85d \ud558\ub294 \uac83\uc785\ub2c8\ub2e4. \uacc4\uce35\uc801 \uc0d8\ud50c\ub9c1\uc758 \uc2e4\uc81c \ud65c\uc6a9\uc740 \ud559\uc2b5 \ub370\uc774\ud130\uc640 \ud14c\uc2a4\ud2b8 \ub370\uc774\ud130 \ub610\ub294 \uac80\uc99d \ub370\uc774\ud130\ub97c \uc77c\uc815\ud55c \ube44\uc728\ub85c \ub098\ub220 \uad6c\ubd84\ud560\ub54c \ubc18\ub4dc\uc2dc \uc801\uc6a9\ub418\uc5b4\uc57c \ud569\ub2c8\ub2e4. \uacc4\uce35\uc801 \uc0d8\ud50c\ub9c1\uc744 \uc801\uc6a9\ud558\uc9c0 \uc54a\uace0 &hellip; <\/p>\n<p class=\"link-more\"><a href=\"http:\/\/www.gisdeveloper.co.kr\/?p=9891\" class=\"more-link\">\ub354 \ubcf4\uae30<span class=\"screen-reader-text\"> &#8220;\uacc4\uce35\uc801 \uc0d8\ud50c\ub9c1(Stratified Sampling)&#8221;<\/span><\/a><\/p>\n","protected":false},"author":1,"featured_media":0,"comment_status":"open","ping_status":"open","sticky":false,"template":"","format":"standard","meta":{"footnotes":""},"categories":[131,132],"tags":[],"class_list":["post-9891","post","type-post","status-publish","format-standard","hentry","category-python","category-deep-machine-learning"],"_links":{"self":[{"href":"http:\/\/www.gisdeveloper.co.kr\/index.php?rest_route=\/wp\/v2\/posts\/9891","targetHints":{"allow":["GET"]}}],"collection":[{"href":"http:\/\/www.gisdeveloper.co.kr\/index.php?rest_route=\/wp\/v2\/posts"}],"about":[{"href":"http:\/\/www.gisdeveloper.co.kr\/index.php?rest_route=\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"http:\/\/www.gisdeveloper.co.kr\/index.php?rest_route=\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"http:\/\/www.gisdeveloper.co.kr\/index.php?rest_route=%2Fwp%2Fv2%2Fcomments&post=9891"}],"version-history":[{"count":3,"href":"http:\/\/www.gisdeveloper.co.kr\/index.php?rest_route=\/wp\/v2\/posts\/9891\/revisions"}],"predecessor-version":[{"id":9906,"href":"http:\/\/www.gisdeveloper.co.kr\/index.php?rest_route=\/wp\/v2\/posts\/9891\/revisions\/9906"}],"wp:attachment":[{"href":"http:\/\/www.gisdeveloper.co.kr\/index.php?rest_route=%2Fwp%2Fv2%2Fmedia&parent=9891"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"http:\/\/www.gisdeveloper.co.kr\/index.php?rest_route=%2Fwp%2Fv2%2Fcategories&post=9891"},{"taxonomy":"post_tag","embeddable":true,"href":"http:\/\/www.gisdeveloper.co.kr\/index.php?rest_route=%2Fwp%2Fv2%2Ftags&post=9891"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}