Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
/__pycache__/
log*
*.ipynb
*.pyc
3 changes: 0 additions & 3 deletions .vscode/settings.json

This file was deleted.

Binary file removed __pycache__/basemodel.cpython-38.pyc
Binary file not shown.
Binary file removed __pycache__/layers.cpython-38.pyc
Binary file not shown.
Binary file removed __pycache__/model.cpython-38.pyc
Binary file not shown.
Binary file removed __pycache__/utils.cpython-38.pyc
Binary file not shown.
54 changes: 11 additions & 43 deletions basemodel.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,16 @@
"""
Author:
Weichen Shen,weichenswc@163.com
Bowen Sun,550165764@qq.com
"""
from __future__ import print_function

import time
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.utils.data as Data
from torch.utils.data import DataLoader
import oneflow as torch
import oneflow.nn as nn
import oneflow.nn.functional as F
import oneflow.utils.data as Data
from oneflow.utils.data import DataLoader
from sklearn.metrics import *
from tqdm import tqdm

Expand All @@ -33,11 +33,7 @@ def __init__(self, feature_columns, feature_index, init_std=0.0001, device='cpu'
self.embedding_dict = create_embedding_matrix(feature_columns, init_std, linear=True, sparse=False,
device=device)

# nn.ModuleDict(
# {feat.embedding_name: nn.Embedding(feat.dimension, 1, sparse=True) for feat in
# self.sparse_feature_columns}
# )
# .to("cuda:1")

for tensor in self.embedding_dict.values():
nn.init.normal_(tensor.weight, mean=0, std=init_std)

Expand Down Expand Up @@ -66,7 +62,6 @@ def forward(self, X, sparse_feat_refine_weight=None):
if len(sparse_embedding_list) > 0:
sparse_embedding_cat = torch.cat(sparse_embedding_list, dim=-1)
if sparse_feat_refine_weight is not None:
# w_{x,i}=m_{x,i} * w_i (in IFM and DIFM)
sparse_embedding_cat = sparse_embedding_cat * sparse_feat_refine_weight.unsqueeze(1)
sparse_feat_logit = torch.sum(sparse_embedding_cat, dim=-1, keepdim=False)
linear_logit += sparse_feat_logit
Expand Down Expand Up @@ -100,10 +95,6 @@ def __init__(self, linear_feature_columns, dnn_feature_columns, l2_reg_linear=1e
self.dnn_feature_columns = dnn_feature_columns

self.embedding_dict = create_embedding_matrix(dnn_feature_columns, init_std, sparse=False, device=device)
# nn.ModuleDict(
# {feat.embedding_name: nn.Embedding(feat.dimension, embedding_size, sparse=True) for feat in
# self.dnn_feature_columns}
# )

self.linear_model = Linear(
linear_feature_columns, self.feature_index, device=device)
Expand All @@ -116,11 +107,6 @@ def __init__(self, linear_feature_columns, dnn_feature_columns, l2_reg_linear=1e
self.out = PredictionLayer(task, )
self.to(device)

# parameters for callbacks
self._is_graph_network = True # used for ModelCheckpoint in tf2
self._ckpt_saved_epoch = False # used for EarlyStopping in tf1.14
# self.history = History()


def fit(self, x=None, y=None, batch_size=None, epochs=1, verbose=1, initial_epoch=0, validation_split=0.,
validation_data=None, shuffle=True, callbacks=None):
Expand Down Expand Up @@ -202,21 +188,11 @@ def fit(self, x=None, y=None, batch_size=None, epochs=1, verbose=1, initial_epoc
sample_num = len(train_tensor_data)
steps_per_epoch = (sample_num - 1) // batch_size + 1

# configure callbacks
# callbacks = (callbacks or []) + [self.history] # add history callback
# callbacks = CallbackList(callbacks)
# callbacks.set_model(self)
# callbacks.on_train_begin()
# callbacks.set_model(self)
# if not hasattr(callbacks, 'model'): # for tf1.4
# callbacks.__setattr__('model', self)
# callbacks.model.stop_training = False

# Train
print("Train on {0} samples, validate on {1} samples, {2} steps per epoch".format(
len(train_tensor_data), len(val_y), steps_per_epoch))
for epoch in range(initial_epoch, epochs):
# callbacks.on_epoch_begin(epoch)
epoch_logs = {}
start_time = time.time()
loss_epoch = 0
Expand All @@ -231,7 +207,7 @@ def fit(self, x=None, y=None, batch_size=None, epochs=1, verbose=1, initial_epoc
y_pred = model(x).squeeze()

optim.zero_grad()
loss = loss_func(y_pred, y.squeeze(), reduction='sum')
loss = loss_func(y_pred, y.squeeze())
reg_loss = self.get_regularization_loss()

total_loss = loss + reg_loss + self.aux_loss
Expand Down Expand Up @@ -280,13 +256,6 @@ def fit(self, x=None, y=None, batch_size=None, epochs=1, verbose=1, initial_epoc
eval_str += " - " + "val_" + name + \
": {0: .4f}".format(epoch_logs["val_" + name])
print(eval_str)
# callbacks.on_epoch_end(epoch, epoch_logs)
# if self.stop_training:
# break

# callbacks.on_train_end()

# return self.history


def evaluate(self, x, y, batch_size=256):
Expand Down Expand Up @@ -442,11 +411,11 @@ def _get_optim(self, optimizer):
def _get_loss_func(self, loss):
if isinstance(loss, str):
if loss == "binary_crossentropy":
loss_func = F.binary_cross_entropy
loss_func = nn.BCELoss(reduction="sum")
elif loss == "mse":
loss_func = F.mse_loss
loss_func = nn.MSELoss(reduction="sum")
elif loss == "mae":
loss_func = F.l1_loss
loss_func = nn.L1Loss(reduction="sum")
else:
raise NotImplementedError
else:
Expand Down Expand Up @@ -482,7 +451,6 @@ def _get_metrics(self, metrics, set_eps=False):
return metrics_

def _in_multi_worker_mode(self):
# used for EarlyStopping in tf1.15
return None

@property
Expand Down
11 changes: 3 additions & 8 deletions layers.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
import itertools

import torch
import torch.nn as nn
import torch.nn.functional as F
import oneflow as torch
import oneflow.nn as nn
import oneflow.nn.functional as F
import numpy as np

class DNN(nn.Module):
Expand Down Expand Up @@ -260,13 +260,8 @@ def activation_layer(act_name, hidden_size=None, dice_dim=2):
if isinstance(act_name, str):
if act_name.lower() == 'sigmoid':
act_layer = nn.Sigmoid()
# elif act_name.lower() == 'linear':
# act_layer = Identity()
elif act_name.lower() == 'relu':
act_layer = nn.ReLU(inplace=True)
# elif act_name.lower() == 'dice':
# assert dice_dim
# act_layer = Dice(hidden_size, dice_dim)
elif act_name.lower() == 'prelu':
act_layer = nn.PReLU()
elif issubclass(act_name, nn.Module):
Expand Down
2 changes: 1 addition & 1 deletion main.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import pandas as pd
import torch
import oneflow as torch
from sklearn.metrics import log_loss, roc_auc_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
Expand Down
10 changes: 5 additions & 5 deletions model.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.utils.data as Data
from torch.utils.data import DataLoader
import oneflow as torch
import oneflow.nn as nn
import oneflow.nn.functional as F
import oneflow.utils.data as Data
from oneflow.utils.data import DataLoader
from sklearn.metrics import *
from tqdm import tqdm

Expand Down
17 changes: 4 additions & 13 deletions utils.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
from collections import OrderedDict, namedtuple, defaultdict


import torch
import torch.nn as nn
import oneflow as torch
import oneflow.nn as nn
from layers import SequencePoolingLayer


Expand Down Expand Up @@ -150,8 +150,6 @@ def get_varlen_pooling_list(embedding_dict, features, feature_index, varlen_spar


def create_embedding_matrix(feature_columns, init_std=0.0001, linear=False, sparse=False, device='cpu'):
# Return nn.ModuleDict: for sparse features, {embedding_name: nn.Embedding}
# for varlen sparse features, {embedding_name: nn.EmbeddingBag}
sparse_feature_columns = list(
filter(lambda x: isinstance(x, SparseFeat), feature_columns)) if len(feature_columns) else []

Expand All @@ -164,9 +162,7 @@ def create_embedding_matrix(feature_columns, init_std=0.0001, linear=False, spar
sparse_feature_columns + varlen_sparse_feature_columns}
)

# for feat in varlen_sparse_feature_columns:
# embedding_dict[feat.embedding_name] = nn.EmbeddingBag(
# feat.dimension, embedding_size, sparse=sparse, mode=feat.combiner)


for tensor in embedding_dict.values():
nn.init.normal_(tensor.weight, mean=0, std=init_std)
Expand All @@ -178,12 +174,7 @@ def varlen_embedding_lookup(X, embedding_dict, sequence_input_dict, varlen_spars
for fc in varlen_sparse_feature_columns:
feature_name = fc.name
embedding_name = fc.embedding_name
if fc.use_hash:
# lookup_idx = Hash(fc.vocabulary_size, mask_zero=True)(sequence_input_dict[feature_name])
# TODO: add hash function
lookup_idx = sequence_input_dict[feature_name]
else:
lookup_idx = sequence_input_dict[feature_name]
lookup_idx = sequence_input_dict[feature_name]
varlen_embedding_vec_dict[feature_name] = embedding_dict[embedding_name](
X[:, lookup_idx[0]:lookup_idx[1]].long()) # (lookup_idx)

Expand Down