Здесь я постараюсь поделиться своим опытом изучения CNN. Я поместил простые небольшие примеры (коды), чтобы быстро понять. Python (≥3.6) и Keras (≥2) используются с Tensorflow в серверной части. Блокнот Jupyter лучше всего подходит для этих примеров. Что еще? Запустите коды и получайте удовольствие…

1. Распознавание почерка

Здесь загружается набор данных MNIST. После обучения и проверки модели производительность оценивается с использованием тестовых данных. Для запуска кода требуется графический процессор / более высокая оперативная память. Также требуется подключение к Интернету.

#importing libraries
import numpy
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPooling2D
from keras.utils import np_utils
from keras import backend as K
K.set_image_dim_ordering(‘tf’)
#seed input for random values
seed = 2018
numpy.random.seed(seed)
#loading MNIST data & reshaping
(X_train, y_train), (X_test, y_test) = mnist.load_data()
X_train = X_train.reshape(X_train.shape[0], 28, 28, 1).astype('float32')
X_test = X_test.reshape(X_test.shape[0], 28, 28, 1).astype('float32')
#data pre-processing
X_train = X_train / 255
X_test = X_test / 255
y_train = np_utils.to_categorical(y_train)
y_test = np_utils.to_categorical(y_test)
num_classes = y_test.shape[1]
#function for creating deep network model
def create_model():
 model = Sequential()
 model.add(Conv2D(32, (5, 5), input_shape=(28, 28,1), activation='relu'))
 model.add(MaxPooling2D(pool_size=(2, 2)))
 model.add(Dropout(0.2))
 model.add(Flatten())
 model.add(Dense(128, activation='relu'))
 model.add(Dense(num_classes, activation='softmax'))
 model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
 return model
#training, validating & testing
model = create_model()
model.summary()
model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=10, batch_size=200, verbose=1)
scores = model.evaluate(X_test, y_test, verbose=1)
print("CNN Error: %.2f%%" % (100-scores[1]*100))

2. Распознавание объектов

Здесь сеть VGG16, предварительно обученная с помощью набора данных IMAGENET, используется для распознавания объекта (обычный объект в реальной жизни). GPU не требуется. Требуется подключение к Интернету.

#importing libraries
import numpy as np
from IPython.display import Image, display
from keras.applications import VGG16, imagenet_utils
from keras.preprocessing.image import img_to_array, load_img
#pre-processing input
inputShape = (224, 224)
preprocess = imagenet_utils.preprocess_input
#loading VGG16 with 'imagenet' pre-trained weights
model = VGG16(weights="imagenet")
#displaying, loading & pre-processing test image (one needs to give path for his test image)
display(Image('./test.jpg'))
image = load_img("./test.jpg", target_size=inputShape)
image = img_to_array(image)
image = np.expand_dims(image, axis=0)
image = preprocess(image)
#predicting the output
preds = model.predict(image)
P = imagenet_utils.decode_predictions(preds)
for (i, (imagenetID, label, prob)) in enumerate(P[0]):
 print("{}. {}: {:.2f}%".format(i + 1, label, prob * 100))

3. Обнаружение одиночного объекта (с ограничивающей рамкой)

Здесь создается набор данных. Каждое изображение содержит прямоугольник в качестве объекта. Используется простая нейронная сеть. Графический процессор / Интернет не требуется.

#importing libraries
import numpy as np
import matplotlib.pyplot as plt
import matplotlib
#creating database
num_imgs = 1000
img_size = 8
min_object_size = 1
max_object_size = 4
num_objects = 1
bboxes = np.zeros((num_imgs, num_objects, 4))
imgs = np.zeros((num_imgs, img_size, img_size))  # set background to 0
for i_img in range(num_imgs):
    for i_object in range(num_objects):
        w, h = np.random.randint(min_object_size, max_object_size, size=2)
        x = np.random.randint(0, img_size - w)
        y = np.random.randint(0, img_size - h)
        imgs[i_img, x:x+w, y:y+h] = 1.  # set rectangle to 1
        bboxes[i_img, i_object] = [x, y, w, h]
        
imgs.shape, bboxes.shape
#plotting sample data
i = 0
plt.imshow(imgs[i].T, cmap='Greys', interpolation='none', origin='lower', extent=[0, img_size, 0, img_size])
for bbox in bboxes[i]:
    plt.gca().add_patch(matplotlib.patches.Rectangle((bbox[0], bbox[1]), bbox[2], bbox[3], ec='r', fc='none'))
#reshaping input
X = (imgs.reshape(num_imgs, -1) - np.mean(imgs)) / np.std(imgs)
X.shape, np.mean(X), np.std(X)
#reshaping output
y = bboxes.reshape(num_imgs, -1) / img_size
y.shape, np.mean(y), np.std(y)
#final training & testing data
i = int(0.8 * num_imgs)
train_X = X[:i]
test_X = X[i:]
train_y = y[:i]
test_y = y[i:]
test_imgs = imgs[i:]
test_bboxes = bboxes[i:]
#creating deep network model
from keras.models import Sequential
from keras.layers import Dense, Activation, Dropout, Convolution2D, MaxPooling2D 
from keras.optimizers import SGD
model = Sequential([
        Dense(500, input_dim=X.shape[-1]),
        Activation('relu'),
        Dense(300), 
        Activation('relu'), 
        Dense(100), 
        Activation('relu'), 
        Dropout(0.2), 
        Dense(y.shape[-1])
    ])
model.compile('adadelta', 'mse')
#training & validating
model.fit(train_X, train_y, nb_epoch=50, validation_data=(test_X, test_y), verbose=2)
#predicting on test data
pred_y = model.predict(test_X)
pred_bboxes = pred_y * img_size
pred_bboxes = pred_bboxes.reshape(len(pred_bboxes), num_objects, -1)
pred_bboxes.shape
#plotting the prediction
plt.figure(figsize=(12, 3))
for i_subplot in range(1, 6):
    plt.subplot(1, 5, i_subplot)
    i = np.random.randint(len(test_imgs))
    plt.imshow(test_imgs[i].T, cmap='Greys', interpolation='none', origin='lower', extent=[0, img_size, 0, img_size])
    for pred_bbox, exp_bbox in zip(pred_bboxes[i], test_bboxes[i]):
        plt.gca().add_patch(matplotlib.patches.Rectangle((pred_bbox[0], pred_bbox[1]), pred_bbox[2], pred_bbox[3], ec='r', fc='none'))

4. Обнаружение нескольких объектов (с формами)

Здесь создается набор данных. Внимательно прочтите комментарии. Графический процессор / Интернет не требуется.

# by Sujoy Kumar Goswami
# 
# USE JUPYTER NOTEBOOK ONLY
#
# Python(>=3.6) & Keras(>=2.0)
# importing libraries
import numpy as np
import matplotlib.pyplot as plt
import matplotlib
# creating dataset
# here 0-4 black objects (different shapes with random sizes) are placed in a noisy image (24 x 24). 
# the image is divided into 4 quadrants (w.r.t. image center) & each quadrant contains 0-1 object randomly.
# 4000 such images are taken.
# the objects with rectangular & lower-triangular shapes are of our interest.
# the upper-traingular shapes are dummy.
# due to randomness few images may be blank or with upper-triangular shape (dummy object) only.
# bounding boxes of the interested objects are also saved.
num_imgs = 4000
img_size = 24
min_rect_size = 3
max_rect_size = 9
max_num_objects = 5
bboxes = np.zeros((num_imgs, max_num_objects, 4))
imgs = np.random.rand(num_imgs, img_size, img_size)
shapes = np.zeros((num_imgs, max_num_objects, 1))
for i_img in range(num_imgs):
    i_object = 0
    if np.random.choice([True, False]):
        width, height = np.random.randint(min_rect_size, max_rect_size, size=2)
        x = np.random.randint(0, img_size/2 - width)
        y = np.random.randint(0, img_size/2 - height)
        imgs[i_img, x:x+width, y:y+height] = 1.
        bboxes[i_img, i_object] = [x, y, width, height]
        shapes[i_img, i_object] = [0]
        i_object += 1
    if np.random.choice([True, False]):
        size = np.random.randint(min_rect_size, max_rect_size)
        x, y = np.random.randint(img_size/2, img_size - size, size=2)
        mask = np.tril_indices(size)
        imgs[i_img, x + mask[0], y + mask[1]] = 1.
        bboxes[i_img, i_object] = [x, y, size, size]
        shapes[i_img, i_object] = [1]
        i_object += 1
    if np.random.choice([True, False]):
        width, height = np.random.randint(min_rect_size, max_rect_size, size=2)
        x = np.random.randint(img_size/2, img_size - width)
        y = np.random.randint(0, img_size/2 - height)
        imgs[i_img, x:x+width, y:y+height] = 1.
        bboxes[i_img, i_object] = [x, y, width, height]
        shapes[i_img, i_object] = [0]
        i_object += 1
    if np.random.choice([True, False]):
        size = np.random.randint(min_rect_size, max_rect_size)
        x = np.random.randint(0, img_size/2 - size)
        y = np.random.randint(img_size/2, img_size - size)
        mask = np.triu_indices(size)
        imgs[i_img, x + mask[0], y + mask[1]] = 1.
        #bboxes[i_img, i_object] = [x, y, size, size]
        #shapes[i_img, i_object] = [1]
        #i_object += 1
    for i in range(i_object, max_num_objects):
        bboxes[i_img, i] = [-1, -1, -1, -1]
        shapes[i_img, i] = [-1]
            
imgs.shape, bboxes.shape
# plotting sample input data
# see 5 randomly chosen input images. the bounding boxes of interested objects are marked red.
plt.figure(figsize=(24, 8))
for i_subplot in range(1, 6):
    plt.subplot(1, 5, i_subplot)
    i = np.random.randint(num_imgs)
    plt.imshow(imgs[i].T, cmap='Greys', interpolation='none', origin='lower', extent=[0, img_size, 0, img_size])
    for bbox, shape in zip(bboxes[i], shapes[i]):
        plt.gca().add_patch(matplotlib.patches.Rectangle((bbox[0], bbox[1]), bbox[2], bbox[3], ec='r', fc='none'))
# pre-processing data
X = (imgs.reshape(num_imgs, img_size, img_size, 1) - np.mean(imgs)) / np.std(imgs)
y = np.concatenate([bboxes / img_size, shapes], axis=-1).reshape(num_imgs, -1)
X.shape, y.shape
# final training & testing data
i = int(0.8 * num_imgs)
train_X = X[:i]
test_X = X[i:]
train_y = y[:i]
test_y = y[i:]
test_imgs = imgs[i:]
test_bboxes = bboxes[i:]
# creating deep network model
from keras.models import Sequential
from keras.layers import Dense, Activation, Dropout, Convolution2D, MaxPooling2D, Flatten
from keras.optimizers import SGD
model = Sequential([
        Convolution2D(8, (3, 3), activation='relu', input_shape=(24, 24, 1)),
        Convolution2D(8, (3, 3), activation='relu'),
        MaxPooling2D(pool_size=(2, 2)),
        Convolution2D(8, (3, 3), activation='relu'),
        MaxPooling2D(pool_size=(2, 2)),
        Flatten(),
        Dense(3000),
        Activation('relu'),
        Dropout(0.3),
        Dense(1500), 
        Activation('relu'), 
        Dense(500), 
        Activation('relu'),
        Dropout(0.3),
        Dense(50),
        Activation('relu'),
        Dense(y.shape[-1])
    ])
model.compile('adadelta', 'mse')
# training the model & validating
model.fit(train_X, train_y, nb_epoch=100, validation_data=(test_X, test_y), verbose=2)
# predicting on test data
pred_y = model.predict(test_X)
pred_y = pred_y.reshape(len(pred_y), max_num_objects, -1)
pred_bboxes = pred_y[..., :4] * img_size
pred_shapes = pred_y[..., 4:5]
pred_bboxes.shape, pred_shapes.shape
# plotting the predictions
# see 5 randomly chosen output predictions (in blue/ green shapes). 
# note that no upper-triangular shape has got predicted.
# accuracy could be improved by other Deep Models or/and by tuning the various associated parameters/ variables/ methods.
plt.figure(figsize=(24, 8))
for i_subplot in range(1, 6):
    plt.subplot(1, 5, i_subplot)
    i = np.random.randint(len(test_X))
    plt.imshow(test_imgs[i].T, cmap='Greys', interpolation='none', origin='lower', extent=[0, img_size, 0, img_size])
    for pred_bbox, pred_shape in zip(pred_bboxes[i], pred_shapes[i]):
        if pred_shape[0] <= 0.5:
            plt.gca().add_patch(matplotlib.patches.Rectangle((pred_bbox[0], pred_bbox[1]), pred_bbox[2], pred_bbox[3], fc='b', alpha=0.5))
        else:
            xy = ([[pred_bbox[0]+pred_bbox[2], pred_bbox[1]+pred_bbox[3]],
                    [pred_bbox[0]+pred_bbox[2], pred_bbox[1]],
                    [pred_bbox[0], pred_bbox[1]]])
            plt.gca().add_patch(matplotlib.patches.Polygon(xy, True, fc='g', alpha=0.5))

References:
- https://towardsdatascience.com/object-detection-with-neural-networks-a4e2c46b4491

Пожалуйста, ПОЖАЛУЙСТА за публикацию, если она вам понравилась, и также поделитесь ею. Оставайтесь на связи, я скоро добавлю еще коды…