Commit 25e6341b authored by Jana Schor's avatar Jana Schor
Browse files

Switch to 2-neuron output (FNN)

one-hot encode y
parent 95c4aecf
......@@ -36,7 +36,8 @@ test_train_opts = options.Options(
compressFeatures=True,
activationFunction="selu",
lossFunction='bce',
optimizer='Adam'
optimizer='Adam',
fnnType='FNN'
)
logging.basicConfig(level=logging.INFO)
......@@ -174,7 +175,7 @@ def main():
predict_opts,
inputFile=makePathAbsolute(predict_opts.inputFile),
outputDir=makePathAbsolute(predict_opts.outputDir),
outputFile=makePathAbsolute(path.join(predict_opts.outputDir,predict_opts.outputFile)),
outputFile=makePathAbsolute(path.join(predict_opts.outputDir, predict_opts.outputFile)),
ecModelDir=makePathAbsolute(predict_opts.ecModelDir),
fnnModelDir=makePathAbsolute(predict_opts.fnnModelDir),
trainAC=False,
......
......@@ -18,16 +18,16 @@ def autoencoder_callback(checkpoint_path: str, opts: options.Options) -> list:
# enable this checkpoint to restore the weights of the best performing model
checkpoint = ModelCheckpoint(checkpoint_path,
monitor="val_auc",
mode='max',
monitor="val_loss",
mode='min',
verbose=1,
period=settings.ac_train_check_period,
save_best_only=True,
save_weights_only=True)
# enable early stopping if val_loss is not improving anymore
early_stop = EarlyStopping(monitor="val_auc",
mode='max',
early_stop = EarlyStopping(monitor="val_loss",
mode='min',
patience=settings.ac_train_patience,
min_delta=settings.ac_train_min_delta,
verbose=1,
......
......@@ -137,8 +137,9 @@ conversion_rules = {
# "S_dataset.csv": importSmilesCSV,
# "S_dataset_extended.csv": importSmilesCSV,
# "D_dataset.tsv": importDstoxTSV,
"train_data.csv": importSmilesCSV,
"predict_data.csv": importDstoxTSV
# "train_data.csv": importSmilesCSV,
# "predict_data.csv": importDstoxTSV,
"B_data_ER.csv": importDstoxTSV
}
......
......@@ -21,6 +21,7 @@ from tensorflow.keras.layers import Dense, Dropout, AlphaDropout
from tensorflow.keras.models import Model
# for NN model functions
from tensorflow.keras.models import Sequential
from tensorflow.keras.utils import to_categorical
from dfpl import callbacks as cb
from dfpl import options
......@@ -92,6 +93,7 @@ def prepare_nn_training_data(df: pd.DataFrame, target: str, opts: options.Option
dtype=settings.nn_target_numpy_type,
copy=settings.numpy_copy_values
)
y_one_hot = to_categorical(y)
else:
logging.info("Using uncompressed fingerprints")
df_fp = df[df[target].notna() & df["fp"].notnull()]
......@@ -107,6 +109,7 @@ def prepare_nn_training_data(df: pd.DataFrame, target: str, opts: options.Option
)
x = np.array(dfX["fp"].to_list(), dtype=settings.ac_fp_numpy_type, copy=settings.numpy_copy_values)
y = np.array(dfX[target].to_list(), copy=settings.numpy_copy_values)
y_one_hot = to_categorical(y)
else:
logging.info("Fraction sampling is OFF")
# how many ones, how many zeros
......@@ -115,7 +118,8 @@ def prepare_nn_training_data(df: pd.DataFrame, target: str, opts: options.Option
x = np.array(df_fp["fp"].to_list(), dtype=settings.ac_fp_numpy_type, copy=settings.numpy_copy_values)
y = np.array(df_fp[target].to_list(), copy=settings.numpy_copy_values)
return x, y, opts
y_one_hot = to_categorical(y)
return x, y_one_hot, opts
def build_fnn_network(input_size: int, opts: options.Options) -> Model:
......@@ -164,8 +168,8 @@ def build_fnn_network(input_size: int, opts: options.Options) -> Model:
sys.exit(-1)
# output layer
model.add(Dense(units=1,
activation='sigmoid'))
model.add(Dense(units=2,
activation='softmax'))
return model
......@@ -177,7 +181,7 @@ def build_snn_network(input_size: int, opts: options.Options) -> Model:
for i in range(7):
model.add(Dense(units=50, activation="selu", kernel_initializer="lecun_normal"))
model.add(AlphaDropout(opts.dropout))
model.add(Dense(units=1, activation="sigmoid"))
model.add(Dense(units=2, activation="softmax"))
return model
......@@ -360,12 +364,12 @@ def train_single_label_models(df: pd.DataFrame, opts: options.Options) -> None:
if opts.wabTracking:
x_train, x_test, y_train, y_test = train_test_split(x, y,
test_size=opts.testSize,
stratify=True,
stratify=y,
random_state=1)
else:
x_train, x_test, y_train, y_test = train_test_split(x, y,
test_size=opts.testSize,
stratify=True)
stratify=y)
performance = fit_and_evaluate_model(x_train=x_train, x_test=x_test, y_train=y_train, y_test=y_test,
fold=0, target=target, opts=opts)
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment