diff --git a/Digit Recognizer.py b/Digit Recognizer.py new file mode 100644 index 0000000..abe0df4 --- /dev/null +++ b/Digit Recognizer.py @@ -0,0 +1,133 @@ +import pandas as pd +import matplotlib.pyplot as plt +from sklearn.model_selection import train_test_split +from keras.utils.np_utils import to_categorical +from keras.models import Sequential +from keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPool2D +from keras.optimizers import RMSprop +from keras.preprocessing.image import ImageDataGenerator +from keras.callbacks import ReduceLROnPlateau, CSVLogger + +TRAINING_LOGS_FILE = "training_logs11.csv" +MODEL_SUMMARY_FILE = "model_summary11.txt" +MODEL_FILE = "model11.h5" +TRAINING_PLOT_FILE = "training11.png" +VALIDATION_PLOT_FILE = "validation11.png" +KAGGLE_SUBMISSION_FILE = "kaggle_submission_final2.csv" + + + +VERBOSITY = 1 +EPOCHS = 100 +BATCH_SIZE = 512 +CLASSES = 10 +CHANNELS = 1 +IMAGE_SIZE = 28 +IMAGE_WIDTH, IMAGE_HEIGHT = IMAGE_SIZE, IMAGE_SIZE +VALIDATION_RATIO = 0.1 + +# Load the data (https://www.kaggle.com/c/digit-recognizer/data) + +train = pd.read_csv("train.csv") +test = pd.read_csv("test.csv") + +y = train["label"] +x = train.drop(labels = ["label"], axis = 1) + +# Reshape data +x = x.values.reshape(-1, IMAGE_WIDTH, IMAGE_HEIGHT, CHANNELS) +test = test.values.reshape(-1, IMAGE_WIDTH, IMAGE_HEIGHT, CHANNELS) + +# One-Hot encoding +y = to_categorical(y, num_classes=CLASSES) + +# Prepare training/validation sets +x_training, x_validation, y_training, y_validation = train_test_split(x, + y, + test_size=VALIDATION_RATIO, + shuffle = True) + +# Model (0.99471) +model = Sequential() + +model.add(Conv2D(filters=32, + kernel_size=(5,5), + padding='Same', + activation='relu', + input_shape=(IMAGE_WIDTH, IMAGE_HEIGHT, CHANNELS))) +model.add(Conv2D(filters=32, + kernel_size=(5,5), + padding='Same', + activation='relu')) +model.add(MaxPool2D(pool_size=(2,2))) +model.add(Dropout(0.3)) + +model.add(Conv2D(filters=64, kernel_size=(3,3),padding='Same', + activation='relu')) +model.add(Conv2D(filters=64, kernel_size=(3,3),padding='Same', + activation='relu')) +model.add(MaxPool2D(pool_size=(2,2), strides=(2,2))) +model.add(Dropout(0.3)) + +model.add(Flatten()) +model.add(Dense(8192, activation='relu')) +model.add(Dropout(0.3)) + +model.add(Dense(2048, activation='relu')) +model.add(Dropout(0.5)) + +model.add(Dense(CLASSES, activation="softmax")) + +model.compile(optimizer=RMSprop(lr=0.0001, + rho=0.9, + epsilon=1e-08, + decay=0.00001), + loss="categorical_crossentropy", + metrics=["accuracy"]) + +with open(MODEL_SUMMARY_FILE,"w") as fh: + model.summary(print_fn=lambda line: fh.write(line + "\n")) + +# Data augmentation +data_generator = ImageDataGenerator(rescale=1./255, + rotation_range=1, + zoom_range=0.1, + width_shift_range=0.05, + height_shift_range=0.05) +data_generator.fit(x_training) + +# Training +history = model.fit_generator(data_generator.flow(x_training, + y_training, + batch_size=BATCH_SIZE), + epochs=EPOCHS, + validation_data=(x_validation, y_validation), + verbose=VERBOSITY, + steps_per_epoch=x_training.shape[0] // BATCH_SIZE) +model.save_weights(MODEL_FILE) + +# Testing +predictions = model.predict_classes(test, verbose=1) +pd.DataFrame({"ImageId":list(range(1,len(predictions)+1)), + "Label":predictions}).to_csv(KAGGLE_SUBMISSION_FILE, + index=False, + header=True) + +# Drawing plots +epochs = [i for i in range(1, len(history.history['loss'])+1)] + +plt.plot(epochs, history.history['loss'], color='blue', label="training_loss") +plt.plot(epochs, history.history['val_loss'], color='red', label="validation_loss") +plt.legend(loc='best') +plt.title('training') +plt.xlabel('epoch') +plt.savefig(TRAINING_PLOT_FILE, bbox_inches='tight') +plt.close() + +plt.plot(epochs, history.history['acc'], color='blue', label="training_accuracy") +plt.plot(epochs, history.history['val_acc'], color='red',label="validation_accuracy") +plt.legend(loc='best') +plt.title('validation') +plt.xlabel('epoch') +plt.savefig(VALIDATION_PLOT_FILE, bbox_inches='tight') +plt.close()