Pytorch for DL


Overview

  • What is ML?? & What is Human Intelligence??
    • input: Information
    • Output: Inference
    • ML needs lots of training data
  • Rule based vs. Representation learning ?
  • Pytorch
    • A python package that provides two high-level features:
      • Tensor computation (like numpy) with strong GPU acceleration
      • Deep Neural Networks built on a tape-based autograd system
    • More Pythonic (Imperative)
      • Flex, Intuitive, cleaner code, easy to debug
    • More Neural Networkic
      • Write code as the network works
      • forward/backward

Linear Model

```python w = 1.0 # a random guess: random value # our model for the forward pass def forward(x): return x * w ``` ```python # Loss function def loss(x, y): y_pred = forward(x) return (y_pred - y) * (y_pred - y) ``` ```python for w in np.arange(0.0, 4.1, 0.1): print("w=", w) l_sum = 0 for x_val, y_val in zip(x_data, y_data): y_pred_val = forward(X_val) l = loss(x_val, y_val) l_sum += 1 print("\t", x_val, y_val, y_pred_val, l) print("MSE=", l_sum / 3) ``` ```python w_list = [] mse_list = [] for w in np.arrange(0.0, 4.1 ,0.1): print("w=", w) l_sum = 0 for x_val, y_val in zip(x_data, y_data): y_pred_val = forward(x_val) l = loss(x_val, y_val) l_sum += 1 print("\t", x_val, y_val, y_pred_val, l) print("MSE=", l_sum / 3) w_list.append(w) mse_list.append(l_sum / 3) plt.plot(w_list, mse_list) plt.ylabel('Loss') plt.xlabel('w') plt.show() ``` ​ # Gradient Descent - Data, Model, Loss, and Gradient ```python x_data = [1.0, 2.0. 3.0] y_data = [2.0, 4.0, 6.0] w = 1.0 # a random guess : random value #our model forward pass def forward(x): return x*w #loss function def loss(x, y): y_pred = forward(x) return(y_pred - y) * (y_pred - y) #compute gradient def gradient(x, y): #d_Loss/d_w return 2*x*(x*w-y) ``` - Training: updating weight ```python #Before Training print("predict (before training)", 4, forward(4)) #Training Loop for epoch in range(100): for x_val, y_val in zip(x_data, y_data): grad = gradient(x_val, y_val) w = w - 0.01 * grad print("\tgrad: ", x_val, y_val, grad) l = loss(x_val, y_val) print("progress:", epoch, "w=", w, "loss=", l) print("predict(after training)", "4hours", forward(4)) ``` # Back-Propagation ```python import torch from torch.autograd import Variable x_data = [1.0, 2.0, 3.0] y_data = [2.0, 4.0, 6.0] w = Variable(torch.Tensor([1.0]), requires_grad=True) #Any random value ``` ```python from torch.autograd import Variable x = Variable(torch.randn(1, 10)) prev_h = Variable(torch.randn(1, 20)) W_h = Variable(torch.randn(20, 20)) W_x = Variable(torch.randn(20, 10)) i2h = torch.mm(W_x, x.t()) h2h = torch.mm(W_h, prev_h.t()) next_h = i2h + h2h next_h = next_h.tanh() next_h.backward(torch.ones(1, 20)) ``` ```python # our model forward pass def forward(x): return x * w # Loss function def loss(x, y): y_pred = forward(x) return (y_pred - y) * (y_pred - y) # Before training print("predict (before training)", 4, forward(4).data[0]) ``` ```python # Training loop for epoch in range(10): for x_val, y_val in zip(x_data, y_data): l = loss(x_val, y_val) l.backward() print("\tgrad: ", x_val, y_val, w.grad.data[0]) w.data = w.data - 0.01 * w.grad.data # Manually zero the gradients after updating weights w.grad.data.zero_() print("progress:", epoch, l.data[0]) # After training print("predict (after training)", 4, forward(4).data[0]) ``` ``` ``` # Linear Regression in Torch Way ```python w = Variable(torch.Tensor([1.0]), requires_grad=True) # Any random value # our model forward pass def forward(x): return x * w # Loss function def loss(x, y): y_pred = forward(x) return (y_pred - y) * (y_pred - y) # Training loop for epoch in range(10): for x_val, y_val in zip(x_data, y_data): l = loss(x_val, y_val) l.backward() print("\tgrad: ", x_val, y_val, w.grad.data[0]) w.data = w.data - 0.01 * w.grad.data # Manually zero the gradients after updating weights w.grad.data.zero_() print("progress:", epoch, l.data[0]) ``` #### PyTorch Rhythm - Design your model using class with Variables - Construct loss and optimizer (select from PyTorch API) - Training cycle (forward, backward, update) ```python import torch from torch.autograd import Variable x_data = Variable(torch.Tensor([[1.0, 2.0, 3.0]])) y_data = Variable(torch.Tensor([[2.0, 4.0, 6.0]])) class Model(torch.nn.Module): def __init__(self): super(Model, self).__init__() self.linear = torch.nn.Linear(1,1) def forward(self, x): y_pred = self.linear(x) return y_pred model = Model() ``` ```python criterion = torch.nn.MSELoss(size_average=False) optimizer = torch.optim.SGD(model.parameters(), lr=0.01) for epoch in range(500): y_pred = model(x_data) loss = criterion(y_pred, y_data) print(epoch, loss.data[0]) optimizer.zero_grad() loss.backward() optimizer.step() hour_var = Variable(torch.Tensor([[4.0]])) print("predict(after training)", 4, model.forward(hour_var).data[0][0]) ``` # CIFAR10 Classifier Example ```python import torch import torchvision import torchvision.transforms as transforms ``` ```python transform = transforms.Compose( [transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]) trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform) trainloader = torch.utils.data.DataLoader(trainset, batch_size=4, shuffle=True, num_workers=2) testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform) testloader = torch.utils.data.DataLoader(testset, batch_size=4, shuffle=False, num_workers=2) classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck') ``` ```python import matplotlib.pyplot as plt import numpy as np # functions to show an image def imshow(img): img = img / 2 + 0.5 # unnormalize npimg = img.numpy() plt.imshow(np.transpose(npimg, (1, 2, 0))) plt.show() # get some random training images dataiter = iter(trainloader) images, labels = dataiter.next() # show images imshow(torchvision.utils.make_grid(images)) # print labels print(' '.join('%5s' % classes[labels[j]] for j in range(4))) ``` ```python import torch.nn as nn import torch.nn.functional as F class Net(nn.Module): def __init__(self): super(Net, self).__init__() self.conv1 = nn.Conv2d(3, 6, 5) self.pool = nn.MaxPool2d(2, 2) self.conv2 = nn.Conv2d(6, 16, 5) self.fc1 = nn.Linear(16 * 5 * 5, 120) self.fc2 = nn.Linear(120, 84) self.fc3 = nn.Linear(84, 10) def forward(self, x): x = self.pool(F.relu(self.conv1(x))) x = self.pool(F.relu(self.conv2(x))) x = x.view(-1, 16 * 5 * 5) x = F.relu(self.fc1(x)) x = F.relu(self.fc2(x)) x = self.fc3(x) return x net = Net() ``` ```python import torch.optim as optim criterion = nn.CrossEntropyLoss() optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9) ``` ```python for epoch in range(2): # loop over the dataset multiple times running_loss = 0.0 for i, data in enumerate(trainloader, 0): # get the inputs; data is a list of [inputs, labels] inputs, labels = data # zero the parameter gradients optimizer.zero_grad() # forward + backward + optimize outputs = net(inputs) loss = criterion(outputs, labels) loss.backward() optimizer.step() # print statistics running_loss += loss.item() if i % 2000 == 1999: # print every 2000 mini-batches print('[%d, %5d] loss: %.3f' % (epoch + 1, i + 1, running_loss / 2000)) running_loss = 0.0 print('Finished Training') ``` ```python PATH = './cifar_net.pth' torch.save(net.state_dict(), PATH) ``` ```python dataiter = iter(testloader) images, labels = dataiter.next() # print images imshow(torchvision.utils.make_grid(images)) print('GroundTruth: ', ' '.join('%5s' % classes[labels[j]] for j in range(4))) ``` ```python net = Net() net.load_state_dict(torch.load(PATH)) ``` ```python outputs = net(images) ``` ```python _, predicted = torch.max(outputs, 1) print('Predicted: ', ' '.join('%5s' % classes[predicted[j]] for j in range(4))) ``` ```python correct = 0 total = 0 with torch.no_grad(): for data in testloader: images, labels = data outputs = net(images) _, predicted = torch.max(outputs.data, 1) total += labels.size(0) correct += (predicted == labels).sum().item() print('Accuracy of the network on the 10000 test images: %d %%' % ( 100 * correct / total)) ``` ```python class_correct = list(0. for i in range(10)) class_total = list(0. for i in range(10)) with torch.no_grad(): for data in testloader: images, labels = data outputs = net(images) _, predicted = torch.max(outputs, 1) c = (predicted == labels).squeeze() for i in range(4): label = labels[i] class_correct[label] += c[i].item() class_total[label] += 1 for i in range(10): print('Accuracy of %5s : %2d %%' % ( classes[i], 100 * class_correct[i] / class_total[i])) ``` # Logistic Regression with Torch ```python x_data = Variable(torch.Tensor([[1.0], [2.0], [3.0], [4.0]])) y_data = Variable(torch.Tensor([[0.], [0.], [1.], [1.]])) class Model(torch.nn.Module): def __init__(self): super(Model, self).__init__() self.linear = torch.nn.Linear(1, 1) # One in and one out def forward(self, x): y_pred = F.sigmoid(self.linear(x)) return y_pred # our model model = Model() criterion = torch.nn.BCELoss(size_average=True) optimizer = torch.optim.SGD(model.parameters(), lr=0.01) # Training loop for epoch in range(1000): # Forward pass: Compute predicted y by passing x to the model y_pred = model(x_data) # Compute and print loss loss = criterion(y_pred, y_data) print(epoch, loss.data[0]) # Zero gradients, perform a backward pass, and update the weights. optimizer.zero_grad() loss.backward() optimizer.step() # After training hour_var = Variable(torch.Tensor([[1.0]])) print("predict 1 hour ", 1.0, model(hour_var).data[0][0] > 0.5) hour_var = Variable(torch.Tensor([[7.0]])) print("predict 7 hours", 7.0, model(hour_var).data[0][0] > 0.5) ``` # Wide & Deep NN ```python class Model(torch.nn.Module): def __init__(self): """ In the constructor we instantiate three nn.Linear module """ super(Model, self).__init__() self.l1 = torch.nn.Linear(8, 6) self.l2 = torch.nn.Linear(6, 4) self.l3 = torch.nn.Linear(4, 1) self.sigmoid = torch.nn.Sigmoid() def forward(self, x): """ In the forward function we accept a Variable of input data and we must return a Variable of output data. We can use Modules defined in the constructor as well as arbitrary operators on Variables. """ out1 = self.sigmoid(self.l1(x)) out2 = self.sigmoid(self.l2(out1)) y_pred = self.sigmoid(self.l3(out2)) return y_pred ``` ```python xy = np.loadtxt('data-diabetes.csv', delimiter=',', dtype=np.float32) x_data = Variable(torch.from_numpy(xy[:, 0:-1])) y_data = Variable(torch.from_numpy(xy[:, [-1]])) class Model(torch.nn.Module): def __init__(self): """ In the constructor we instantiate two nn.Linear module """ super(Model, self).__init__() self.l1 = torch.nn.Linear(8, 6) self.l2 = torch.nn.Linear(6, 4) self.l3 = torch.nn.Linear(4, 1) self.sigmoid = torch.nn.Sigmoid() def forward(self, x): """ In the forward function we accept a Variable of input data and we must return a Variable of output data. We can use Modules defined in the constructor as well as arbitrary operators on Variables. """ out1 = self.sigmoid(self.l1(x)) out2 = self.sigmoid(self.l2(out1)) y_pred = self.sigmoid(self.l3(out2)) return y_pred # our model model = Model() # Construct our loss function and an Optimizer. The call to model.parameters() # in the SGD constructor will contain the learnable parameters of the two # nn.Linear modules which are members of the model. criterion = torch.nn.BCELoss(size_average=True) optimizer = torch.optim.SGD(model.parameters(), lr=0.1) # Training loop for epoch in range(100): # Forward pass: Compute predicted y by passing x to the model y_pred = model(x_data) # Compute and print loss loss = criterion(y_pred, y_data) print(epoch, loss.data[0]) # Zero gradients, perform a backward pass, and update the weights. optimizer.zero_grad() loss.backward() optimizer.step() ``` # DataLoader #### Available datasets through DataLoader - MNIST and FashionMNIST - COCO (Captioning and Detection) - LSUN -Classification - ImageFolder - Imagenet-12 - CIFAR10 and CIFAR100 - STL10 - SVHN - PhotoTour ```python import torch import numpy as np from torch.autograd import Variable from torch.utils.data import Dataset, DataLoader #dataset = Dataset() #train_loader = DataLoader(dataset=dataset, batch_size=32, shuffle=True, num_workers=2) # MNIST Dataset train_dataset = datasets.MNIST(root='./data/', train=True, transform=transforms.ToTensor(), download=True) test_dataset = datasets.MNIST(root='./data/', train=False, transform=transforms.ToTensor()) # Data Loader (Input Pipeline) train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True) test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False) for batch_idx, (data, target) in enumerate(train_loader): data, target = Variable(data), Variable(target) ``` ```python dataset = datasets.MNIST() train_loader = DataLoader(dataset=dataset, batch_size=32, shuffle=True, num_workers=2) ``` # Softmax Classifier Cost Function ```python # Cross entropy example import numpy as np # One hot # 0: 1 0 0 0 # 1: 0 1 0 0 # 2: 0 0 1 0 Y = np.array([1, 0, 0]) Y_pred1 = np.array([0.7, 0.2, 0.1]) Y_pred2 = np.array([0.1, 0.3, 0.6]) print("loss1 = ", np.sum(-Y * np.log(Y_pred1))) print("loss2 = ", np.sum(-Y * np.log(Y_pred2))) ``` ```python # Softmax + CrossEntropy (logSoftmax + NLLLoss) loss = nn.CrossEntropyLoss() # target is of size nBatch # each element in target has to have 0 <= value < nClasses (0-2) # Input is class, not one-hot Y = Variable(torch.LongTensor([0]), requires_grad=False) # input is of size nBatch x nClasses = 1 x 4 # Y_pred are logits (not softmax) Y_pred1 = Variable(torch.Tensor([[2.0, 1.0, 0.1]])) Y_pred2 = Variable(torch.Tensor([[0.5, 2.0, 0.3]])) l1 = loss(Y_pred1, Y) l2 = loss(Y_pred2, Y) print("PyTorch Loss1 = ", l1.data, "\nPyTorch Loss2=", l2.data) ``` ```python # Softmax + CrossEntropy (logSoftmax + NLLLoss) loss = nn.CrossEntropyLoss() # target is of size nBatch # element in target has to have 0 <= value < nClasses (0-2) # Input is class, not one-hot Y = Variable(torch.LongTensor([2, 0, 1]), requires_grad=False) # input is of size nBatch x nClasses = 2 x 4 # Y_pred are logits (not softmax) Y_pred1 = Variable(torch.Tensor([[0.1, 0.2, 0.9], [1.1, 0.1, 0.2], [0.2, 2.1, 0.1]])) Y_pred2 = Variable(torch.Tensor([[0.8, 0.2, 0.3], [0.2, 0.3, 0.5], [0.2, 0.2, 0.5]])) l1 = loss(Y_pred1, Y) l2 = loss(Y_pred2, Y) print("Batch Loss1 = ", l1.data, "\nBatch Loss2=", l2.data) ``` ```python class Net(nn.Module): def __init__(self): super(Net, self).__init__() self.l1 = nn.Linear(784, 520) self.l2 = nn.Linear(520, 320) self.l3 = nn.Linear(320, 240) self.l4 = nn.Linear(240, 120) self.l5 = nn.Linear(120, 10) def forward(self, x): # Flatten the data (n, 1, 28, 28)-> (n, 784) x = x.view(-1, 784) x = F.relu(self.l1(x)) x = F.relu(self.l2(x)) x = F.relu(self.l3(x)) x = F.relu(self.l4(x)) return self.l5(x) # No need activation ``` ```python # Training settings batch_size = 64 train_loader = torch.utils.data.DataLoader( datasets.MNIST('../data', train=True, download=True, transform=transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))])), batch_size=batch_size, shuffle=True) test_loader = torch.utils.data.DataLoader( datasets.MNIST('../data', train=False, transform=transforms.Compose([ transforms.ToTensor(),transforms.Normalize((0.1307,), (0.3081,))])), batch_size=batch_size, shuffle=True) class Net(nn.Module): def __init__(self): super(Net, self).__init__() self.l1 = nn.Linear(784, 520) self.l2 = nn.Linear(520, 320) self.l3 = nn.Linear(320, 240) self.l4 = nn.Linear(240, 120) self.l5 = nn.Linear(120, 10) def forward(self, x): x = x.view(-1, 784) # Flatten the data (n, 1, 28, 28)-> (n, 784) x = F.relu(self.l1(x)) x = F.relu(self.l2(x)) x = F.relu(self.l3(x)) x = F.relu(self.l4(x)) return self.l5(x) model = Net() criterion = nn.CrossEntropyLoss() optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.5) def train(epoch): model.train() for batch_idx, (data, target) in enumerate(train_loader): data, target = Variable(data), Variable(target) optimizer.zero_grad() output = model(data) loss = criterion(output, target) loss.backward() optimizer.step() if batch_idx % 10 == 0: print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format( epoch, batch_idx * len(data), len(train_loader.dataset), 100. * batch_idx / len(train_loader), loss.data[0])) def test(): model.eval() test_loss = 0 correct = 0 for data, target in test_loader: data, target = Variable(data, volatile=True), Variable(target) output = model(data) # sum up batch loss test_loss += criterion(output, target, size_average=False).data[0] # get the index of the max log-probability pred = output.data.max(1, keepdim=True)[1] correct += pred.eq(target.data.view_as(pred)).cpu().sum() test_loss /= len(test_loader.dataset) print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n’. format(test_loss, correct, len(test_loader.dataset), 100. * correct / len(test_loader.dataset))) for epoch in range(1, 10): train(epoch) test() ``` ``` ```

이 글이 도움이 되셨다면 추천 클릭을 부탁드립니다 :)

Buy me a coffeeBuy me a coffee





© 2020 modified by Tae You Kim

Powered by "shoman2"