Showing
4 changed files
with
141 additions
and
1 deletions
cnn/cnn-Larisa.py
0 → 100644
1 | +import numpy as np | ||
2 | +import matplotlib.pyplot as plt | ||
3 | +from keras.datasets import mnist | ||
4 | +from keras.utils import np_utils | ||
5 | +from keras import Sequential | ||
6 | +from keras.layers import Dense, Dropout, Conv2D, MaxPool1D, MaxPool2D, Flatten, BatchNormalization | ||
7 | +from keras.regularizers import l1_l2 | ||
8 | + | ||
9 | +# Load MNIST data set | ||
10 | + | ||
11 | +(x_train, y_train), (x_test, y_test) = mnist.load_data() | ||
12 | + | ||
13 | +x_train = x_train.astype('float64') / 255.0 | ||
14 | +x_test = x_test.astype('float64') / 255.0 | ||
15 | + | ||
16 | +x_train = x_train.reshape((x_train.shape[0], x_train.shape[1], x_train.shape[2], 1)) | ||
17 | +x_test = x_test.reshape(((x_test.shape[0], x_test.shape[1], x_test.shape[2], 1))) | ||
18 | + | ||
19 | +y_train = np_utils.to_categorical(y_train) | ||
20 | +y_test = np_utils.to_categorical(y_test) | ||
21 | + | ||
22 | +print("Training set:") | ||
23 | +print(x_train.shape) | ||
24 | +print(y_train.shape) | ||
25 | + | ||
26 | +print("Test set:") | ||
27 | +print(x_test.shape) | ||
28 | +print(y_test.shape) | ||
29 | + | ||
30 | +# Split train into train and validation | ||
31 | + | ||
32 | +validation_rate = 0.2 | ||
33 | +n_train_samples = round(validation_rate * len(x_train)) | ||
34 | +print("Taking {} validation samples".format(n_train_samples)) | ||
35 | + | ||
36 | +x_val = x_train[:n_train_samples] | ||
37 | +y_val = y_train[:n_train_samples] | ||
38 | + | ||
39 | +x_train = x_train[n_train_samples:] | ||
40 | +y_train = y_train[n_train_samples:] | ||
41 | + | ||
42 | +print("Training set:") | ||
43 | +print(x_train.shape) | ||
44 | +print(y_train.shape) | ||
45 | + | ||
46 | +print("Validation set:") | ||
47 | +print(x_val.shape) | ||
48 | +print(y_val.shape) | ||
49 | + | ||
50 | +# Build model architecture (layers with activations) | ||
51 | + | ||
52 | +_, n_rows, n_cols, n_chans = x_train.shape # Shape of input data | ||
53 | +model = Sequential() | ||
54 | + | ||
55 | +model = Sequential() | ||
56 | + | ||
57 | +# Feature learning | ||
58 | + | ||
59 | +## Conv Layer 1 | ||
60 | +model.add(Conv2D(input_shape=(n_rows, n_cols, n_chans), filters=16, kernel_size=(3, 3), activation='relu', padding='same')) | ||
61 | +model.add(Conv2D(32, (3, 3), activation='relu')) | ||
62 | +model.add(MaxPool2D(pool_size=(2, 2))) | ||
63 | +## Conv Layer 2 | ||
64 | +model.add(Conv2D(32, (3, 3), activation='relu')) | ||
65 | +model.add(MaxPool2D(pool_size=(2, 2))) | ||
66 | +## Conv Layer 3 | ||
67 | +model.add(Conv2D(64, (3, 3), activation='relu', padding='same')) | ||
68 | +model.add(MaxPool2D(pool_size=(2, 2))) | ||
69 | + | ||
70 | +# Clasification | ||
71 | + | ||
72 | +model.add(Flatten()) | ||
73 | +model.add(BatchNormalization()) | ||
74 | +model.add(Dense(64, activation='relu', kernel_regularizer=l1_l2(9e-4))) | ||
75 | +model.add(Dense(units=y_train.shape[1], activation='softmax')) | ||
76 | +model.summary() | ||
77 | + | ||
78 | +# Compile model (define optimizer and loss function) | ||
79 | +model.compile(optimizer='adadelta', loss='categorical_crossentropy', metrics=['accuracy']) | ||
80 | + | ||
81 | +# Train your model | ||
82 | +num_epochs = 15 | ||
83 | +losses = np.zeros((num_epochs, 2)) | ||
84 | +accura = np.zeros((num_epochs, 2)) | ||
85 | +print("Training on {x_train.shape[0]} samples - validating on {x_val.shape[0]} samples.") | ||
86 | +for epoch in range(num_epochs): | ||
87 | + print("Epoch: {epoch+1:3d} -- ", end="") | ||
88 | + model.fit(x_train, y_train, epochs=1, batch_size=256, verbose=False) | ||
89 | + losses[epoch, 0], accura[epoch, 0] = model.evaluate(x_train, y_train, verbose=False) | ||
90 | + losses[epoch, 1], accura[epoch, 1] = model.evaluate(x_val, y_val, verbose=False) | ||
91 | + print("Train loss: {losses[epoch, 0]:6.4f}, acc: {accura[epoch, 0]:6.4f} -- Val loss: {losses[epoch, 1]:6.4f}, acc: {accura[epoch, 1]:6.4f}") | ||
92 | + | ||
93 | +# Plot training history | ||
94 | +plt.figure(figsize=(15, 10)) | ||
95 | + | ||
96 | +plt.plot(losses[:, 0], label='Loss: Training', linewidth=2) | ||
97 | +plt.plot(losses[:, 1], label='Loss: Validation', linewidth=2) | ||
98 | +plt.plot(accura[:, 0], label='Accu: Training', linewidth=2) | ||
99 | +plt.plot(accura[:, 1], label='Accu: Validation', linewidth=2) | ||
100 | + | ||
101 | +plt.legend(fontsize=18) | ||
102 | +plt.xlabel("Epoch", fontsize=18) | ||
103 | +plt.ylabel("Loss", fontsize=18) | ||
104 | + | ||
105 | +plt.xticks(np.arange(1, len(losses))) | ||
106 | +plt.tick_params(labelsize=18) | ||
107 | +plt.grid() | ||
108 | + | ||
109 | +y_hat = model.predict(x_test) | ||
110 | +test_loss, test_acc = model.evaluate(x_test, y_test) | ||
111 | +print("Test loss: {:6.4f}, acc: {:6.4f}".format(test_loss, test_acc)) | ||
... | \ No newline at end of file | ... | \ No newline at end of file |
... | @@ -6,7 +6,13 @@ from keras import Sequential | ... | @@ -6,7 +6,13 @@ from keras import Sequential |
6 | from keras.layers import Dense, Dropout, Conv2D, MaxPool1D, MaxPool2D, Flatten, BatchNormalization | 6 | from keras.layers import Dense, Dropout, Conv2D, MaxPool1D, MaxPool2D, Flatten, BatchNormalization |
7 | from keras.regularizers import l1_l2 | 7 | from keras.regularizers import l1_l2 |
8 | 8 | ||
9 | -# Load MNIST data set | 9 | +# Load Human Genome Annotation (HGA) data set |
10 | +# GRCh38.92 | ||
11 | +# From https://www.kaggle.com/alfrandom/human-genome-annotation | ||
12 | + | ||
13 | +hga_csv = | ||
14 | + | ||
15 | + | ||
10 | 16 | ||
11 | (x_train, y_train), (x_test, y_test) = mnist.load_data() | 17 | (x_train, y_train), (x_test, y_test) = mnist.load_data() |
12 | 18 | ... | ... |
data-sets/human-genome-annotation/README.txt
0 → 100644
1 | +# Get source data set by combining Human Genome Annotation data set (csv) | ||
2 | +# with FASTA files to obtain sequences corresponding to object in human genome | ||
3 | +# using "start" and "end" columns from human-genome-annotation | ||
4 | + | ||
5 | +# Input files: | ||
6 | +# FASTA all chromosomes: /home/cmendezc/data-FASTA-Homo_sapiens.GRCh38.dna | ||
7 | + | ||
8 | +# Output tab-separated format: | ||
9 | +# Start End Sequence Feature | ||
10 | + | ||
11 | +import argparse | ||
12 | + | ||
13 | +if __name__ == "__main__": | ||
14 | + parser = argparse.ArgumentParser(description='Get source data set for Human Genome Annotation.') | ||
15 | + parser.add_argument('--fastaPath', dest='fastaPath', action='store_const', | ||
16 | + const=sum, default=max, | ||
17 | + help='sum the integers (default: find the max)') | ||
18 | + | ||
19 | + args = parser.parse_args() | ||
20 | + print(args.accumulate(args.integers)) | ||
21 | + |
-
Please register or login to post a comment