Save one of the last spots in our Shell + Git + GitLab introdoctury course on Friday and Monday next week. 👉 Register here.

Commit 157c6685 authored by halirutan's avatar halirutan
Browse files

Implement and test running from json input

parent 34093012
from argparse import Namespace
import logging
import pandas as pd
import pathlib
import dataclasses
from os import path
import options
from utils import makePathAbsolute
import fingerprint as fp
import autoencoder as ac
import feedforwardNN as fNN
# import feedforwardNN as fNN
project_directory = pathlib.Path(__file__).parent.parent.absolute()
test_train_args = options.TrainOptions(
......@@ -36,22 +37,21 @@ def train(opts: options.TrainOptions):
if opts.trainAC:
# train an autoencoder on the full feature matrix
encoder = ac.trainfullac(df, opts)
encoder.save_weights(opts.acFile)
encoder.save_weights(path.join(opts.outputDir, opts.acFile))
# encoder = dfpl.trainfullac(X=xmatrix, y=ymatrix, epochs=args.e, encdim=args.d,
# useweights=args.a, verbose=args.v)
else:
# load trained model for autoencoder
(_, encoder) = ac.autoencoderModel(input_size=opts.fpSize, encoding_dim=opts.encFPSize)
encoder.load_weights(opts.acFile)
encoder.load_weights(path.join(opts.outputDir, opts.acFile))
# compress the fingerprints using the autoencoder
df = ac.compressfingerprints(df, encoder)
# train FNNs with compressed features
fNN.trainNNmodels(df=df, opts=opts, usecompressed=True)
# fNN.trainNNmodels(df=df, opts=opts, usecompressed=True)
# train FNNs with uncompressed features
fNN.trainNNmodels(df=df, opts=opts, usecompressed=False)
# fNN.trainNNmodels(df=df, opts=opts, usecompressed=False)
# train multi-label models
# with comrpessed features
......@@ -80,20 +80,29 @@ def predict(args: Namespace) -> None:
return None
def makePathAbsolute(p: str) -> str:
path = pathlib.Path(p)
if path.is_absolute():
return p
else:
return str(path.absolute())
def createLogger(filename: str) -> None:
# get root logger and set its level
logger = logging.getLogger()
logger.setLevel(logging.INFO)
# create file handler which logs info messages
fh = logging.FileHandler(filename)
fh.setLevel(logging.INFO)
# create console handler
ch = logging.StreamHandler()
ch.setLevel(logging.INFO)
# create formatter and add it to the handlers
formatterFile = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
formatterConsole = logging.Formatter('%(levelname)-8s %(message)s')
fh.setFormatter(formatterFile)
ch.setFormatter(formatterConsole)
# add the handlers to the logger
logger.addHandler(fh)
logger.addHandler(ch)
if __name__ == '__main__':
FORMAT = '[%(levelname)] %(asctime)-15s %(message)s'
logging.basicConfig(format=FORMAT)
parser = options.createCommandlineParser()
prog_args: Namespace = parser.parse_args()
logging.info(f"The following arguments are received or filled with default values:\n{prog_args}")
try:
if prog_args.method == "train":
......@@ -103,6 +112,8 @@ if __name__ == '__main__':
inputFile=makePathAbsolute(train_opts.inputFile),
outputDir=makePathAbsolute(train_opts.outputDir)
)
createLogger(path.join(fixed_opts.outputDir, "train.log"))
logging.info(f"The following arguments are received or filled with default values:\n{prog_args}")
train(fixed_opts)
exit(0)
elif prog_args.method == "predict":
......
......@@ -11,7 +11,7 @@ from keras import optimizers
from sklearn.model_selection import train_test_split
from keras.callbacks import ModelCheckpoint, EarlyStopping
from dfpl import options
import options
def autoencoderModel(
......
......@@ -4,6 +4,8 @@ import jsonpickle
import argparse
from pathlib import Path
from utils import makePathAbsolute
@dataclass
class TrainOptions:
......@@ -39,11 +41,14 @@ class TrainOptions:
@classmethod
def fromCmdArgs(cls, args: argparse.Namespace) -> TrainOptions:
"""Creates TrainOptions instance from cmdline arguments"""
jsonFile = Path(args.f)
if jsonFile.exists() and jsonFile.is_file():
with jsonFile.open() as f:
content = f.read()
return jsonpickle.decode(content)
if args.f is not "":
jsonFile = Path(makePathAbsolute(args.f))
if jsonFile.exists() and jsonFile.is_file():
with jsonFile.open() as f:
content = f.read()
return jsonpickle.decode(content)
else:
raise ValueError("Could not find JSON input file")
else:
return cls(
inputFile=args.i,
......@@ -95,7 +100,7 @@ def parseInputTrain(parser: argparse.ArgumentParser) -> None:
"form of a fingerprint or a SMILES (see -t option). "
"The remaining columns contain the outcome(s) (Y matrix). "
"A header is expected and respective column names are used "
"to refer to outcome(s) (target(s)).", required=True)
"to refer to outcome(s) (target(s)).", required=False)
parser.add_argument('-o', metavar='FILE', type=str,
help='Prefix of output file name. Trained model(s) and '
'respective stats will be returned in 2 output files '
......@@ -179,11 +184,14 @@ class PredictOptions:
@classmethod
def fromCmdArgs(cls, args: argparse.Namespace) -> PredictOptions:
"""Creates TrainOptions instance from cmdline arguments"""
jsonFile = Path(args.f)
if jsonFile.exists() and jsonFile.is_file():
with jsonFile.open() as f:
content = f.read()
return jsonpickle.decode(content)
if args.f is not "":
jsonFile = Path(makePathAbsolute(args.f))
if jsonFile.exists() and jsonFile.is_file():
with jsonFile.open() as f:
content = f.read()
return jsonpickle.decode(content)
else:
raise ValueError("Could not find JSON input file")
else:
return cls(
inputFile=args.i,
......
import pathlib
def makePathAbsolute(p: str) -> str:
path = pathlib.Path(p)
if path.is_absolute():
return p
else:
return str(path.absolute())
......@@ -9,7 +9,7 @@ project_directory = pathlib.Path(__file__).parent.parent.absolute()
test_train_args = opt.TrainOptions(
inputFile=f"{project_directory}/data/Sun_etal_dataset.csv",
outputDir=f"{project_directory}/modeltraining",
acFile="",
acFile="Sun_etal_encoder.weights.hdf5",
type='smiles',
fpType='topological',
epochs=512,
......@@ -22,4 +22,11 @@ test_train_args = opt.TrainOptions(
)
p
\ No newline at end of file
def runAutoencoder(opts: opt.TrainOptions) -> None:
logging.basicConfig(format="DFPL-%(levelname)s: %(message)s", level=logging.INFO)
logging.info("Adding fingerprint to dataset")
df = fp.processInParallel(opts.inputFile, import_function=fp.importSmilesCSV, fp_size=opts.fpSize)
logging.info("Training autoencoder")
ac.trainfullac(df, opts)
logging.info("Done")
......@@ -10,7 +10,7 @@ project_directory = pathlib.Path(__file__).parent.parent.absolute()
test_train_args = opt.TrainOptions(
inputFile=f"{project_directory}/data/Sun_etal_dataset.csv",
outputDir=f"{project_directory}/modeltraining/",
acFile=f"{project_directory}/modeltraining/Sun_etal_encoder.weights.hdf5",
acFile="Sun_etal_encoder.weights.hdf5",
type='smiles',
fpType='topological',
epochs=10,
......
import pytest
import pandas as pd
import dfpl.fingerprint as fp
p
correct_smiles = [
"CC1(C)OC2CC3C4CC(F)C5=CC(=O)CCC5(C)C4C(O)CC3(C)C2(O1)C(=O)CO",
"CC1(C)OC2CC3C4CCC5=CC(=O)C=CC5(C)C4(F)C(O)CC3(C)C2(O1)C(=O)CO",
......
{
"py/object": "dfpl.options.TrainOptions",
"inputFile": "data/Sun_etal_dataset.csv",
"outputDir": "modeltraining",
"outputDir": "validation/case_01/results",
"acFile": "Sun_etal_dataset.AC.encoder.weights.hdf5",
"type": "smiles",
"fpType": "topological",
"epochs": 512,
"epochs": 10,
"fpSize": 2048,
"encFPSize": 256,
"kFolds": 5,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment