Deep Learning Workshop

Carlos-Francisco Méndez-Cruz
Commit e6cd562a4a4ff3ab3e2485c0810511fdf8b8fe35 e6cd562a 1 parent 67a4474b
Showing 4 changed files with 141 additions and 1 deletions
cnn/cnn-Larisa.py
cnn/cnn-hga-basic-v01.py
data-sets/human-genome-annotation/README.txt
data-sets/human-genome-annotation/get-hga-data-set.py
--- a/cnn/cnn-Larisa.py 0 → 100644
View file @e6cd562
+++ b/cnn/cnn-Larisa.py 0 → 100644
View file @e6cd562
+ import numpy as np
+ import matplotlib.pyplot as plt
+ from keras.datasets import mnist
+ from keras.utils import np_utils
+ from keras import Sequential
+ from keras.layers import Dense, Dropout, Conv2D, MaxPool1D, MaxPool2D, Flatten, BatchNormalization
+ from keras.regularizers import l1_l2
+ 
+ # Load MNIST data set
+ 
+ (x_train, y_train), (x_test, y_test) = mnist.load_data()
+ 
+ x_train = x_train.astype('float64') / 255.0
+ x_test = x_test.astype('float64') / 255.0
+ 
+ x_train = x_train.reshape((x_train.shape[0], x_train.shape[1], x_train.shape[2], 1))
+ x_test = x_test.reshape(((x_test.shape[0], x_test.shape[1], x_test.shape[2], 1)))
+ 
+ y_train = np_utils.to_categorical(y_train)
+ y_test = np_utils.to_categorical(y_test)
+ 
+ print("Training set:")
+ print(x_train.shape)
+ print(y_train.shape)
+ 
+ print("Test set:")
+ print(x_test.shape)
+ print(y_test.shape)
+ 
+ # Split train into train and validation
+ 
+ validation_rate = 0.2
+ n_train_samples = round(validation_rate * len(x_train))
+ print("Taking {} validation samples".format(n_train_samples))
+ 
+ x_val = x_train[:n_train_samples]
+ y_val = y_train[:n_train_samples]
+ 
+ x_train = x_train[n_train_samples:]
+ y_train = y_train[n_train_samples:]
+ 
+ print("Training set:")
+ print(x_train.shape)
+ print(y_train.shape)
+ 
+ print("Validation set:")
+ print(x_val.shape)
+ print(y_val.shape)
+ 
+ # Build model architecture (layers with activations)
+ 
+ _, n_rows, n_cols, n_chans = x_train.shape # Shape of input data
+ model = Sequential()
+ 
+ model = Sequential()
+ 
+ # Feature learning
+ 
+ ## Conv Layer 1
+ model.add(Conv2D(input_shape=(n_rows, n_cols, n_chans), filters=16, kernel_size=(3, 3), activation='relu', padding='same'))
+ model.add(Conv2D(32, (3, 3), activation='relu'))
+ model.add(MaxPool2D(pool_size=(2, 2)))
+ ## Conv Layer 2
+ model.add(Conv2D(32, (3, 3), activation='relu'))
+ model.add(MaxPool2D(pool_size=(2, 2)))
+ ## Conv Layer 3
+ model.add(Conv2D(64, (3, 3), activation='relu', padding='same'))
+ model.add(MaxPool2D(pool_size=(2, 2)))
+ 
+ # Clasification
+ 
+ model.add(Flatten())
+ model.add(BatchNormalization())
+ model.add(Dense(64, activation='relu', kernel_regularizer=l1_l2(9e-4)))
+ model.add(Dense(units=y_train.shape[1], activation='softmax'))
+ model.summary()
+ 
+ # Compile model (define optimizer and loss function)
+ model.compile(optimizer='adadelta', loss='categorical_crossentropy', metrics=['accuracy'])
+ 
+ # Train your model
+ num_epochs = 15
+ losses = np.zeros((num_epochs, 2))
+ accura = np.zeros((num_epochs, 2))
+ print("Training on {x_train.shape[0]} samples - validating on {x_val.shape[0]} samples.")
+ for epoch in range(num_epochs):
+     print("Epoch: {epoch+1:3d} -- ", end="")
+     model.fit(x_train, y_train, epochs=1, batch_size=256, verbose=False)
+     losses[epoch, 0], accura[epoch, 0] = model.evaluate(x_train, y_train, verbose=False)
+     losses[epoch, 1], accura[epoch, 1] = model.evaluate(x_val, y_val, verbose=False)
+     print("Train loss: {losses[epoch, 0]:6.4f}, acc: {accura[epoch, 0]:6.4f} -- Val loss: {losses[epoch, 1]:6.4f}, acc: {accura[epoch, 1]:6.4f}")
+ 
+ # Plot training history
+ plt.figure(figsize=(15, 10))
+ 
+ plt.plot(losses[:, 0], label='Loss: Training', linewidth=2)
+ plt.plot(losses[:, 1], label='Loss: Validation', linewidth=2)
+ plt.plot(accura[:, 0], label='Accu: Training', linewidth=2)
+ plt.plot(accura[:, 1], label='Accu: Validation', linewidth=2)
+ 
+ plt.legend(fontsize=18)
+ plt.xlabel("Epoch", fontsize=18)
+ plt.ylabel("Loss", fontsize=18)
+ 
+ plt.xticks(np.arange(1, len(losses)))
+ plt.tick_params(labelsize=18)
+ plt.grid()
+ 
+ y_hat = model.predict(x_test)
+ test_loss, test_acc = model.evaluate(x_test, y_test)
+ print("Test loss: {:6.4f}, acc: {:6.4f}".format(test_loss, test_acc))
\ No newline at end of file
--- a/cnn/cnn-hga-basic-v01.py
View file @e6cd562
+++ b/cnn/cnn-hga-basic-v01.py
View file @e6cd562
@@ -6,7 +6,13 @@ from keras import Sequential
 from keras.layers import Dense, Dropout, Conv2D, MaxPool1D, MaxPool2D, Flatten, BatchNormalization
 from keras.regularizers import l1_l2
 
- # Load MNIST data set
+ # Load Human Genome Annotation (HGA) data set
+ # GRCh38.92
+ # From https://www.kaggle.com/alfrandom/human-genome-annotation
+ 
+ hga_csv =
+ 
+ 
 
 (x_train, y_train), (x_test, y_test) = mnist.load_data()
 
--- a/data-sets/human-genome-annotation/README.txt 0 → 100644
View file @e6cd562
+++ b/data-sets/human-genome-annotation/README.txt 0 → 100644
View file @e6cd562
+ FASTA data from ftp://ftp.ensembl.org/pub/release-96/fasta/homo_sapiens/dna/
+ Human Genome Annotation data from https://www.kaggle.com/alfrandom/human-genome-annotation
\ No newline at end of file
--- a/data-sets/human-genome-annotation/get-hga-data-set.py 0 → 100644
View file @e6cd562
+++ b/data-sets/human-genome-annotation/get-hga-data-set.py 0 → 100644
View file @e6cd562
+ # Get source data set by combining Human Genome Annotation data set (csv)
+ # with FASTA files to obtain sequences corresponding to object in human genome
+ # using "start" and "end" columns from human-genome-annotation
+ 
+ # Input files:
+ # FASTA all chromosomes: /home/cmendezc/data-FASTA-Homo_sapiens.GRCh38.dna
+ 
+ # Output tab-separated format:
+ # Start End Sequence Feature
+ 
+ import argparse
+ 
+ if __name__ == "__main__":
+     parser = argparse.ArgumentParser(description='Get source data set for Human Genome Annotation.')
+     parser.add_argument('--fastaPath', dest='fastaPath', action='store_const',
+                         const=sum, default=max,
+                         help='sum the integers (default: find the max)')
+ 
+     args = parser.parse_args()
+     print(args.accumulate(args.integers))
+