Deep Learning Workshop

Carlos-Francisco Méndez-Cruz
Commit fe15902c4208cf89f08b1dd4e4962ae5d4533b02 fe15902c 1 parent f61a4234
Showing 1 changed file with 66 additions and 3 deletions
data-sets/get-hga-training-test.py
--- a/data-sets/get-hga-training-test.py
View file @fe15902
+++ b/data-sets/get-hga-training-test.py
View file @fe15902
@@ -21,6 +21,11 @@ import os
 from sklearn.preprocessing import LabelEncoder, OneHotEncoder
 import numpy as np
 from sklearn.model_selection import train_test_split
+from tensorflow.keras.layers import Conv1D, Dense, MaxPooling1D, Flatten
+from tensorflow.keras.models import Sequential
+import matplotlib.pyplot as plt
+from sklearn.metrics import confusion_matrix
+import itertools
 if __name__ == "__main__":
     parser = argparse.ArgumentParser(description='Get training and test data sets for Human Genome Annotation.')
@@ -79,10 +84,68 @@ if __name__ == "__main__":
     train_features, test_features, train_labels, test_labels = train_test_split(
         input_features, input_labels, test_size=0.25, random_state=42)
+    # Model definition
+    model = Sequential()
+    model.add(Conv1D(filters=32, kernel_size=12,
+                     input_shape=(train_features.shape[1], 4)))
+    model.add(MaxPooling1D(pool_size=4))
+    model.add(Flatten())
+    model.add(Dense(16, activation='relu'))
+    model.add(Dense(2, activation='softmax'))
+
+    model.compile(loss='binary_crossentropy', optimizer='adam',
+                  metrics=['binary_accuracy'])
+    model.summary()
+
+    # Model training and validation
+    history = model.fit(train_features, train_labels,
+                        epochs=50, verbose=0, validation_split=0.25)
+
+    # Plot training-validation loss
+    plt.figure()
+    plt.plot(history.history['loss'])
+    plt.plot(history.history['val_loss'])
+    plt.title('model loss')
+    plt.ylabel('loss')
+    plt.xlabel('epoch')
+    plt.legend(['train', 'validation'])
+    # plt.show()
+    plt.savefig('training-validation-loss.png')
+
+    # Plot training-validation accuracy
+    plt.figure()
+    plt.plot(history.history['binary_accuracy'])
+    plt.plot(history.history['val_binary_accuracy'])
+    plt.title('model accuracy')
+    plt.ylabel('accuracy')
+    plt.xlabel('epoch')
+    plt.legend(['train', 'validation'])
+    # plt.show()
+    plt.savefig('training-validation-binary-accuracy.png')
+
+    # Predict with rest data set
+    predicted_labels = model.predict(np.stack(test_features))
+    # Print confusion matrix
+    cm = confusion_matrix(np.argmax(test_labels, axis=1),
+                          np.argmax(predicted_labels, axis=1))
+    print('Confusion matrix:\n', cm)
+    cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
+
+    # Plot confusion matrix
+    plt.imshow(cm, cmap=plt.cm.Blues)
+    plt.title('Normalized confusion matrix')
+    plt.colorbar()
+    plt.xlabel('True label')
+    plt.ylabel('Predicted label')
+    plt.xticks([0, 1]);
+    plt.yticks([0, 1])
+    plt.grid('off')
+    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
+        plt.text(j, i, format(cm[i, j], '.2f'),
+                 horizontalalignment='center',
+                 color='white' if cm[i, j] > 0.5 else 'black')
+
-    with open(os.path.join(args.outputPath, args.outputFile), mode="w") as oFile:
-        for elem in list_rows:
-            oFile.write(elem)