class: title-slide, left, bottom # Introduction to deeplearning ---- ## **Neural Net training with Torch** ### Issac ### 2021년 5월 27일 --- class: inverse, middle, center # 지난시간 복습 Neural Net 생성 코드 --- # 신경망 생성 절차 .left-column[ ## nn_module ] .right-column[ 기본적으로 세가지 요소가 필요 * classname * initialize 신경망 객체가 만들어질때 처음으로 실행되는 함수 신경망에 들어가는 요소들 (layer, activation function)을 정의하는 단계 * forward 실제 신경망 구조를 정의 데이터가 들어와서 나가는 흐름을 정의하는 단계 ] --- # 신경망 생성 예제 .left-column[ ## 1단 신경망 ] .right-column[ ```r TwoLayerNet <- nn_module( classname = "TowLayerNet", initialize = function(data_in, hidden, data_out){ self$hidden1 <- nn_linear(data_in, hidden) self$output_layer <- nn_linear(hidden, data_out) self$tanh <- nn_tanh() }, forward = function(X) { x <- self$tanh(self$hidden1(X)) y_hat <- self$output_layer(x) return(y_hat) } ) ``` ] --- class: inverse, middle, center # 신경망 학습 구조 Neural Net training with `torch` --- # 신경망 학습에 필요한 것들 .left-column[ ] .right-column[ * 신경망 ```r my_net <- TwoLayerNet(8, 10, 3) ``` * 손실함수 알맞은 손실함수를 `torch` 라이브러리에서 선택 ```r mse_loss <- nn_mse_loss(reduction = "mean") ``` * 경사하강법 구현 ```r optimizer <- optim_sgd(my_net$parameters, lr = 1e-5) ``` ] --- # 신경망 학습 구조 .panelset[ .panel[.panel-name[손실함수계산] .left-column[ ] .right-column[ ```r store_loss <- rep(0, 50000) for (epoch in 1:50000){ optimizer$zero_grad() * output <- my_net(x_tensor) * loss <- mse_loss(output, y_tensor) loss$backward() optimizer$step() store_loss[epoch] <- as.numeric(loss$item()) if (epoch %% 5000 == 0){ cat(sprintf("Loss at epoch %d: %.2f\n", epoch, store_loss[epoch])) } } ``` ] ] .panel[.panel-name[경사하강법] .left-column[ ] .right-column[ ```r store_loss <- rep(0, 50000) for (epoch in 1:50000){ * optimizer$zero_grad() output <- my_net(x_tensor) loss <- mse_loss(output, y_tensor) * loss$backward() * optimizer$step() store_loss[epoch] <- as.numeric(loss$item()) if (epoch %% 5000 == 0){ cat(sprintf("Loss at epoch %d: %.2f\n", epoch, store_loss[epoch])) } } ``` ] ] ] --- class: inverse, middle, center # 데이터 준비하기 Dataset & Dataloader in `torch` --- # `torch` 데이터셋 * 잊지말아야 할 사실! `torch`와 `R`은 엄연히 다름. .left-column[ ## Dataset 클래스 생성자 ] .right-column[ 신경망 생성과 거의 같은 구조로 데이터셋 객체를 만들어 줌. * name * initialize * .getitem * .length ] --- # `torch` 데이터셋 예제 .panelset[ .panel[.panel-name[펭귄데이터] .left-column[ ] .right-column[ ```r library(palmerpenguins) penguins %>% head(5) ``` ``` ## # A tibble: 5 x 8 ## species island bill_length_mm bill_depth_mm flipper_length_… body_mass_g sex ## <fct> <fct> <dbl> <dbl> <int> <int> <fct> ## 1 Adelie Torge… 39.1 18.7 181 3750 male ## 2 Adelie Torge… 39.5 17.4 186 3800 fema… ## 3 Adelie Torge… 40.3 18 195 3250 fema… ## 4 Adelie Torge… NA NA NA NA <NA> ## 5 Adelie Torge… 36.7 19.3 193 3450 fema… ## # … with 1 more variable: year <int> ``` ] ] .panel[.panel-name[전처리] .left-column[ ] .right-column[ ```r library(recipes) penguin_data <- penguins %>% recipe(species ~ .) %>% step_impute_mode(all_nominal()) %>% step_impute_mean(all_numeric()) %>% step_dummy(all_nominal(), -all_outcomes()) %>% step_integer(all_nominal()) %>% step_normalize(all_predictors(), -all_outcomes()) %>% prep() %>% juice() penguin_data %>% dim() ``` ``` ## [1] 344 9 ``` ] ] .panel[.panel-name[입력 features] .left-column[ ] .right-column[ ```r penguin_data %>% head() ``` ``` ## # A tibble: 6 x 9 ## bill_length_mm bill_depth_mm flipper_length_mm body_mass_g year species ## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> ## 1 -0.886 0.787 -1.42 -0.565 -1.26 1 ## 2 -0.812 0.126 -1.06 -0.502 -1.26 1 ## 3 -0.665 0.431 -0.422 -1.19 -1.26 1 ## 4 0 0 0.00601 0.000305 -1.26 1 ## 5 -1.33 1.09 -0.565 -0.940 -1.26 1 ## 6 -0.849 1.75 -0.779 -0.690 -1.26 1 ## # … with 3 more variables: island_Dream <dbl>, island_Torgersen <dbl>, ## # sex_male <dbl> ``` ] ] .panel[.panel-name[Dataset 클래스] .left-column[ ] .right-column[ ```r penguin_dataset <- dataset( name = "penguin_data", initialize = function() { self$data <- torch_tensor(as.matrix(select(penguin_data, species, everything()))) }, .getitem = function(index) { x <- self$data[index, 2:9] y <- self$data[index, 1] list(x, y) }, .length = function() { self$data$size()[[1]] } ) ``` ] ] ] --- # 펭귄 데이터셋 .panelset[ .panel[.panel-name[데이터 객체생성] .left-column[ 데이터 객체생성 ] .right-column[ ```r torch_penguin_data <- penguin_dataset() torch_penguin_data ``` ``` ## <penguin_data> ## Inherits from: <dataset> ## Public: ## .getitem: function (index) ## .length: function () ## clone: function (deep = FALSE) ## data: torch_tensor, R7 ## initialize: function () ``` ] ] .panel[.panel-name[데이터 샘플] .left-column[ ] .right-column[ ```r torch_penguin_data$.getitem(1:6) ``` ``` ## [[1]] ## torch_tensor ## -0.8858 0.7866 -1.4205 -0.5650 -1.2575 -0.7497 2.3662 0.9587 ## -0.8123 0.1264 -1.0638 -0.5024 -1.2575 -0.7497 2.3662 -1.0400 ## -0.6654 0.4311 -0.4219 -1.1903 -1.2575 -0.7497 2.3662 -1.0400 ## 0.0000 0.0000 0.0060 0.0003 -1.2575 -0.7497 2.3662 0.9587 ## -1.3267 1.0913 -0.5646 -0.9401 -1.2575 -0.7497 2.3662 -1.0400 ## -0.8491 1.7515 -0.7785 -0.6900 -1.2575 -0.7497 2.3662 0.9587 ## [ CPUFloatType{6,8} ] ## ## [[2]] ## torch_tensor ## 1 ## 1 ## 1 ## 1 ## 1 ## 1 ## [ CPUFloatType{6} ] ``` ] ] .panel[.panel-name[데이터로더] .left-column[ ] .right-column[ ```r penguin_dl <- dataloader(torch_penguin_data, batch_size = 8) penguin_dl$.length() ``` ``` ## [1] 43 ``` ] ] .panel[.panel-name[batch 구조이해] .left-column[ ] .right-column[ ```r b <- penguin_dl$.iter()$.next() length(b) ``` ``` ## [1] 2 ``` ```r b[[1]] ``` ``` ## torch_tensor ## -0.8858 0.7866 -1.4205 -0.5650 -1.2575 -0.7497 2.3662 0.9587 ## -0.8123 0.1264 -1.0638 -0.5024 -1.2575 -0.7497 2.3662 -1.0400 ## -0.6654 0.4311 -0.4219 -1.1903 -1.2575 -0.7497 2.3662 -1.0400 ## 0.0000 0.0000 0.0060 0.0003 -1.2575 -0.7497 2.3662 0.9587 ## -1.3267 1.0913 -0.5646 -0.9401 -1.2575 -0.7497 2.3662 -1.0400 ## -0.8491 1.7515 -0.7785 -0.6900 -1.2575 -0.7497 2.3662 0.9587 ## -0.9225 0.3295 -1.4205 -0.7213 -1.2575 -0.7497 2.3662 -1.0400 ## -0.8674 1.2437 -0.4219 0.5918 -1.2575 -0.7497 2.3662 0.9587 ## [ CPUFloatType{8,8} ] ``` ] ] ] --- # 학습하기 .panelset[ .panel[.panel-name[학습준비] .left-column[ ] .right-column[ ```r device <- if (cuda_is_available()) torch_device("cuda:0") else "cpu" my_net <- my_net$to(device = device) criterion <- nn_cross_entropy_loss() optimizer <- optim_sgd(my_net$parameters, lr = 0.1, momentum = 0.9) num_epochs <- 5 my_net$train() ``` ] ] .panel[.panel-name[학습루프] .left-column[ ] .right-column[ ```r for (epoch in 1:num_epochs) { train_losses <- c() coro::loop(for (b in penguin_dl) { optimizer$zero_grad() output <- my_net(b[[1]]$to(device = device)) loss <- criterion(output, b[[2]]$to(device = device, dtype = torch_long())) loss$backward() optimizer$step() loss <- loss$item() train_losses <- c(train_losses, loss) }) cat(sprintf("\nLoss at epoch %d: training: %3f\n", epoch, mean(train_losses))) } ``` ``` ## ## Loss at epoch 1: training: 0.852929 ## ## Loss at epoch 2: training: 2.296094 ## ## Loss at epoch 3: training: 3.181354 ## ## Loss at epoch 4: training: 4.416221 ## ## Loss at epoch 5: training: 1.522098 ``` ] ] .panel[.panel-name[학습결과] .left-column[ ] .right-column[ ``` ## ## Loss at epoch 1: training: 0.664985 ## ## Loss at epoch 2: training: 0.045079 ## ## Loss at epoch 3: training: 0.042830 ## ## Loss at epoch 4: training: 0.022387 ## ## Loss at epoch 5: training: 0.016322 ``` ] ] ]