Toggle navigation
Toggle navigation
This project
Loading...
Sign in
Carlos-Francisco Méndez-Cruz
/
deep-learning-workshop
Go to a project
Toggle navigation
Toggle navigation pinning
Projects
Groups
Snippets
Help
Project
Activity
Repository
Pipelines
Graphs
Issues
0
Merge Requests
0
Wiki
Snippets
Network
Create a new issue
Builds
Commits
Issue Boards
Authored by
Carlos-Francisco Méndez-Cruz
2019-04-24 16:29:06 -0500
Browse Files
Options
Browse Files
Download
Email Patches
Plain Diff
Commit
e6cd562a4a4ff3ab3e2485c0810511fdf8b8fe35
e6cd562a
1 parent
67a4474b
Deep Learning Workshop
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
141 additions
and
1 deletions
cnn/cnn-Larisa.py
cnn/cnn-hga-basic-v01.py
data-sets/human-genome-annotation/README.txt
data-sets/human-genome-annotation/get-hga-data-set.py
cnn/cnn-Larisa.py
0 → 100644
View file @
e6cd562
import
numpy
as
np
import
matplotlib.pyplot
as
plt
from
keras.datasets
import
mnist
from
keras.utils
import
np_utils
from
keras
import
Sequential
from
keras.layers
import
Dense
,
Dropout
,
Conv2D
,
MaxPool1D
,
MaxPool2D
,
Flatten
,
BatchNormalization
from
keras.regularizers
import
l1_l2
# Load MNIST data set
(
x_train
,
y_train
),
(
x_test
,
y_test
)
=
mnist
.
load_data
()
x_train
=
x_train
.
astype
(
'float64'
)
/
255.0
x_test
=
x_test
.
astype
(
'float64'
)
/
255.0
x_train
=
x_train
.
reshape
((
x_train
.
shape
[
0
],
x_train
.
shape
[
1
],
x_train
.
shape
[
2
],
1
))
x_test
=
x_test
.
reshape
(((
x_test
.
shape
[
0
],
x_test
.
shape
[
1
],
x_test
.
shape
[
2
],
1
)))
y_train
=
np_utils
.
to_categorical
(
y_train
)
y_test
=
np_utils
.
to_categorical
(
y_test
)
print
(
"Training set:"
)
print
(
x_train
.
shape
)
print
(
y_train
.
shape
)
print
(
"Test set:"
)
print
(
x_test
.
shape
)
print
(
y_test
.
shape
)
# Split train into train and validation
validation_rate
=
0.2
n_train_samples
=
round
(
validation_rate
*
len
(
x_train
))
print
(
"Taking {} validation samples"
.
format
(
n_train_samples
))
x_val
=
x_train
[:
n_train_samples
]
y_val
=
y_train
[:
n_train_samples
]
x_train
=
x_train
[
n_train_samples
:]
y_train
=
y_train
[
n_train_samples
:]
print
(
"Training set:"
)
print
(
x_train
.
shape
)
print
(
y_train
.
shape
)
print
(
"Validation set:"
)
print
(
x_val
.
shape
)
print
(
y_val
.
shape
)
# Build model architecture (layers with activations)
_
,
n_rows
,
n_cols
,
n_chans
=
x_train
.
shape
# Shape of input data
model
=
Sequential
()
model
=
Sequential
()
# Feature learning
## Conv Layer 1
model
.
add
(
Conv2D
(
input_shape
=
(
n_rows
,
n_cols
,
n_chans
),
filters
=
16
,
kernel_size
=
(
3
,
3
),
activation
=
'relu'
,
padding
=
'same'
))
model
.
add
(
Conv2D
(
32
,
(
3
,
3
),
activation
=
'relu'
))
model
.
add
(
MaxPool2D
(
pool_size
=
(
2
,
2
)))
## Conv Layer 2
model
.
add
(
Conv2D
(
32
,
(
3
,
3
),
activation
=
'relu'
))
model
.
add
(
MaxPool2D
(
pool_size
=
(
2
,
2
)))
## Conv Layer 3
model
.
add
(
Conv2D
(
64
,
(
3
,
3
),
activation
=
'relu'
,
padding
=
'same'
))
model
.
add
(
MaxPool2D
(
pool_size
=
(
2
,
2
)))
# Clasification
model
.
add
(
Flatten
())
model
.
add
(
BatchNormalization
())
model
.
add
(
Dense
(
64
,
activation
=
'relu'
,
kernel_regularizer
=
l1_l2
(
9e-4
)))
model
.
add
(
Dense
(
units
=
y_train
.
shape
[
1
],
activation
=
'softmax'
))
model
.
summary
()
# Compile model (define optimizer and loss function)
model
.
compile
(
optimizer
=
'adadelta'
,
loss
=
'categorical_crossentropy'
,
metrics
=
[
'accuracy'
])
# Train your model
num_epochs
=
15
losses
=
np
.
zeros
((
num_epochs
,
2
))
accura
=
np
.
zeros
((
num_epochs
,
2
))
print
(
"Training on {x_train.shape[0]} samples - validating on {x_val.shape[0]} samples."
)
for
epoch
in
range
(
num_epochs
):
print
(
"Epoch: {epoch+1:3d} -- "
,
end
=
""
)
model
.
fit
(
x_train
,
y_train
,
epochs
=
1
,
batch_size
=
256
,
verbose
=
False
)
losses
[
epoch
,
0
],
accura
[
epoch
,
0
]
=
model
.
evaluate
(
x_train
,
y_train
,
verbose
=
False
)
losses
[
epoch
,
1
],
accura
[
epoch
,
1
]
=
model
.
evaluate
(
x_val
,
y_val
,
verbose
=
False
)
print
(
"Train loss: {losses[epoch, 0]:6.4f}, acc: {accura[epoch, 0]:6.4f} -- Val loss: {losses[epoch, 1]:6.4f}, acc: {accura[epoch, 1]:6.4f}"
)
# Plot training history
plt
.
figure
(
figsize
=
(
15
,
10
))
plt
.
plot
(
losses
[:,
0
],
label
=
'Loss: Training'
,
linewidth
=
2
)
plt
.
plot
(
losses
[:,
1
],
label
=
'Loss: Validation'
,
linewidth
=
2
)
plt
.
plot
(
accura
[:,
0
],
label
=
'Accu: Training'
,
linewidth
=
2
)
plt
.
plot
(
accura
[:,
1
],
label
=
'Accu: Validation'
,
linewidth
=
2
)
plt
.
legend
(
fontsize
=
18
)
plt
.
xlabel
(
"Epoch"
,
fontsize
=
18
)
plt
.
ylabel
(
"Loss"
,
fontsize
=
18
)
plt
.
xticks
(
np
.
arange
(
1
,
len
(
losses
)))
plt
.
tick_params
(
labelsize
=
18
)
plt
.
grid
()
y_hat
=
model
.
predict
(
x_test
)
test_loss
,
test_acc
=
model
.
evaluate
(
x_test
,
y_test
)
print
(
"Test loss: {:6.4f}, acc: {:6.4f}"
.
format
(
test_loss
,
test_acc
))
\ No newline at end of file
cnn/cnn-hga-basic-v01.py
View file @
e6cd562
...
...
@@ -6,7 +6,13 @@ from keras import Sequential
from
keras.layers
import
Dense
,
Dropout
,
Conv2D
,
MaxPool1D
,
MaxPool2D
,
Flatten
,
BatchNormalization
from
keras.regularizers
import
l1_l2
# Load MNIST data set
# Load Human Genome Annotation (HGA) data set
# GRCh38.92
# From https://www.kaggle.com/alfrandom/human-genome-annotation
hga_csv
=
(
x_train
,
y_train
),
(
x_test
,
y_test
)
=
mnist
.
load_data
()
...
...
data-sets/human-genome-annotation/README.txt
0 → 100644
View file @
e6cd562
FASTA data from ftp://ftp.ensembl.org/pub/release-96/fasta/homo_sapiens/dna/
Human Genome Annotation data from https://www.kaggle.com/alfrandom/human-genome-annotation
\ No newline at end of file
data-sets/human-genome-annotation/get-hga-data-set.py
0 → 100644
View file @
e6cd562
# Get source data set by combining Human Genome Annotation data set (csv)
# with FASTA files to obtain sequences corresponding to object in human genome
# using "start" and "end" columns from human-genome-annotation
# Input files:
# FASTA all chromosomes: /home/cmendezc/data-FASTA-Homo_sapiens.GRCh38.dna
# Output tab-separated format:
# Start End Sequence Feature
import
argparse
if
__name__
==
"__main__"
:
parser
=
argparse
.
ArgumentParser
(
description
=
'Get source data set for Human Genome Annotation.'
)
parser
.
add_argument
(
'--fastaPath'
,
dest
=
'fastaPath'
,
action
=
'store_const'
,
const
=
sum
,
default
=
max
,
help
=
'sum the integers (default: find the max)'
)
args
=
parser
.
parse_args
()
print
(
args
.
accumulate
(
args
.
integers
))
Please
register
or
login
to post a comment