cnn-hga-basic-v01.py
3.6 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
import numpy as np
import matplotlib.pyplot as plt
from keras.datasets import mnist
from keras.utils import np_utils
from keras import Sequential
from keras.layers import Dense, Dropout, Conv2D, MaxPool1D, MaxPool2D, Flatten, BatchNormalization
from keras.regularizers import l1_l2
# Load Human Genome Annotation (HGA) data set
# GRCh38.92
# From https://www.kaggle.com/alfrandom/human-genome-annotation
hga_csv =
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train = x_train.astype('float64') / 255.0
x_test = x_test.astype('float64') / 255.0
x_train = x_train.reshape((x_train.shape[0], x_train.shape[1], x_train.shape[2], 1))
x_test = x_test.reshape(((x_test.shape[0], x_test.shape[1], x_test.shape[2], 1)))
y_train = np_utils.to_categorical(y_train)
y_test = np_utils.to_categorical(y_test)
print("Training set:")
print(x_train.shape)
print(y_train.shape)
print("Test set:")
print(x_test.shape)
print(y_test.shape)
# Split train into train and validation
validation_rate = 0.2
n_train_samples = round(validation_rate * len(x_train))
print("Taking {} validation samples".format(n_train_samples))
x_val = x_train[:n_train_samples]
y_val = y_train[:n_train_samples]
x_train = x_train[n_train_samples:]
y_train = y_train[n_train_samples:]
print("Training set:")
print(x_train.shape)
print(y_train.shape)
print("Validation set:")
print(x_val.shape)
print(y_val.shape)
# Build model architecture (layers with activations)
_, n_rows, n_cols, n_chans = x_train.shape # Shape of input data
model = Sequential()
model = Sequential()
# Feature learning
## Conv Layer 1
model.add(Conv2D(input_shape=(n_rows, n_cols, n_chans), filters=16, kernel_size=(3, 3), activation='relu', padding='same'))
model.add(Conv2D(32, (3, 3), activation='relu'))
model.add(MaxPool2D(pool_size=(2, 2)))
## Conv Layer 2
model.add(Conv2D(32, (3, 3), activation='relu'))
model.add(MaxPool2D(pool_size=(2, 2)))
## Conv Layer 3
model.add(Conv2D(64, (3, 3), activation='relu', padding='same'))
model.add(MaxPool2D(pool_size=(2, 2)))
# Clasification
model.add(Flatten())
model.add(BatchNormalization())
model.add(Dense(64, activation='relu', kernel_regularizer=l1_l2(9e-4)))
model.add(Dense(units=y_train.shape[1], activation='softmax'))
model.summary()
# Compile model (define optimizer and loss function)
model.compile(optimizer='adadelta', loss='categorical_crossentropy', metrics=['accuracy'])
# Train your model
num_epochs = 15
losses = np.zeros((num_epochs, 2))
accura = np.zeros((num_epochs, 2))
print("Training on {x_train.shape[0]} samples - validating on {x_val.shape[0]} samples.")
for epoch in range(num_epochs):
print("Epoch: {epoch+1:3d} -- ", end="")
model.fit(x_train, y_train, epochs=1, batch_size=256, verbose=False)
losses[epoch, 0], accura[epoch, 0] = model.evaluate(x_train, y_train, verbose=False)
losses[epoch, 1], accura[epoch, 1] = model.evaluate(x_val, y_val, verbose=False)
print("Train loss: {losses[epoch, 0]:6.4f}, acc: {accura[epoch, 0]:6.4f} -- Val loss: {losses[epoch, 1]:6.4f}, acc: {accura[epoch, 1]:6.4f}")
# Plot training history
plt.figure(figsize=(15, 10))
plt.plot(losses[:, 0], label='Loss: Training', linewidth=2)
plt.plot(losses[:, 1], label='Loss: Validation', linewidth=2)
plt.plot(accura[:, 0], label='Accu: Training', linewidth=2)
plt.plot(accura[:, 1], label='Accu: Validation', linewidth=2)
plt.legend(fontsize=18)
plt.xlabel("Epoch", fontsize=18)
plt.ylabel("Loss", fontsize=18)
plt.xticks(np.arange(1, len(losses)))
plt.tick_params(labelsize=18)
plt.grid()
y_hat = model.predict(x_test)
test_loss, test_acc = model.evaluate(x_test, y_test)
print("Test loss: {:6.4f}, acc: {:6.4f}".format(test_loss, test_acc))