Commit c7c09add authored by mdolling-gfz's avatar mdolling-gfz
Browse files

Resolve "Follow-up from "add example script to init; redo internal structure to a more oo style""

parent b1bb6a84
Pipeline #77299 passed with stage
in 44 seconds
<!--
hifis-surveyval
Framework to help developing analysis scripts for the HIFIS Software survey.
SPDX-FileCopyrightText: 2021 HIFIS Software <support@hifis.net>
SPDX-License-Identifier: GPL-3.0-or-later
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
-->
# License Hint
Copyright © 2021 HIFIS Software <support@hifis.net>
......
......@@ -42,13 +42,16 @@ It can be used as a handy facility for running the task from a command line.
"""
import logging
import pathlib
from csv import reader
import click
import pkg_resources
import yaml
from hifis_surveyval.core import util
from hifis_surveyval.core.dispatch import Dispatcher
from hifis_surveyval.core.settings import Settings
from hifis_surveyval.data_container import DataContainer
from hifis_surveyval.hifis_surveyval import HIFISSurveyval
settings: Settings = Settings()
......@@ -62,7 +65,7 @@ settings: Settings = Settings()
default=0,
show_default=True,
help="Enable verbose output. "
"Increase verbosity by setting this option up to 3 times.",
"Increase verbosity by setting this option up to 3 times.",
)
def cli(verbose: int) -> None:
"""
......@@ -100,7 +103,7 @@ def version() -> None:
is_flag=True,
show_default=True,
help="Create a default config as file. "
"Overwrites any existing configuration file.",
"Overwrites any existing configuration file.",
)
@click.option(
"--script",
......@@ -108,7 +111,7 @@ def version() -> None:
is_flag=True,
show_default=True,
help="Create an example script in the given script folder. "
"Overwrites any existing example script file.",
"Overwrites any existing example script file.",
)
def init(config: bool, script: bool) -> None:
"""
......@@ -131,12 +134,10 @@ def init(config: bool, script: bool) -> None:
util.create_example_script(settings)
@click.argument("survey_data",
type=click.Path(
exists=True,
dir_okay=False,
path_type=pathlib.Path)
)
@click.argument(
"survey_data",
type=click.Path(exists=True, dir_okay=False, path_type=pathlib.Path),
)
@cli.command()
def analyze(survey_data: click.Path) -> None:
"""
......@@ -150,10 +151,21 @@ def analyze(survey_data: click.Path) -> None:
settings.load_config_file()
surveyval: HIFISSurveyval = HIFISSurveyval(settings=settings)
surveyval.prepare_environment()
data = DataContainer
logging.info(f"Analyzing file {survey_data.name}")
surveyval.load_all_data(data_file=survey_data)
dispatcher: Dispatcher = Dispatcher(surveyval=surveyval)
# Load the metadata
logging.info(f"Attempt to load metadata from {settings.METADATA}")
with settings.METADATA.open(mode="r") as metadata_io_stream:
metadata_yaml = yaml.safe_load(metadata_io_stream)
data.load_metadata(metadata_yaml)
# Load the actual survey data
with survey_data.open(mode="r") as data_io_stream:
csv_reader = reader(data_io_stream)
data.load_survey_data(csv_data=list(csv_reader))
dispatcher: Dispatcher = Dispatcher(surveyval=surveyval, data=data)
dispatcher.discover()
dispatcher.load_all_modules()
......@@ -19,12 +19,14 @@
# along with this program. If not, see <http://www.gnu.org/licenses/>.
"""This module allows discovery and dispatch of analysis functions."""
import copy
import importlib.util
import logging
import traceback
from pathlib import Path
from typing import List
from hifis_surveyval.data_container import DataContainer
from hifis_surveyval.hifis_surveyval import HIFISSurveyval
......@@ -36,7 +38,7 @@ class Dispatcher(object):
module names to be given at initialization.
"""
def __init__(self, surveyval: HIFISSurveyval) -> None:
def __init__(self, surveyval: HIFISSurveyval, data: DataContainer) -> None:
"""
Initialize the Dispatcher.
......@@ -46,6 +48,7 @@ class Dispatcher(object):
particular analysis scripts.
"""
self.surveyval: HIFISSurveyval = surveyval
self.data: DataContainer = data
self.module_folder: Path = self.surveyval.settings.SCRIPT_FOLDER
self.module_names: List[str] = self.surveyval.settings.SCRIPT_NAMES
self.module_name_paths: List[Path] = []
......@@ -145,7 +148,10 @@ class Dispatcher(object):
logging.error(f"Failed to load module {module_name}." f"{error}")
try:
module.run(hifis_surveyval=self.surveyval)
module.run(
hifis_surveyval=copy.deepcopy(self.surveyval),
data=copy.deepcopy(self.data),
)
except AttributeError as error:
traceback.print_exc()
logging.error(
......
......@@ -30,8 +30,9 @@ from hifis_surveyval.core.settings import Settings
def dataframe_value_counts(
dataframe: DataFrame, relative_values: bool = False,
drop_nans: bool = True,
dataframe: DataFrame,
relative_values: bool = False,
drop_nans: bool = True,
) -> DataFrame:
"""
Count how often a unique value appears in each column of a data frame.
......@@ -124,10 +125,10 @@ def cross_reference_sum(data: DataFrame, grouping: Series) -> DataFrame:
# TODO Remove filter and group for Questions?
def filter_and_group_series(
base_data: Series,
group_by: Series,
min_value: Optional[float] = None,
max_value: Optional[float] = None,
base_data: Series,
group_by: Series,
min_value: Optional[float] = None,
max_value: Optional[float] = None,
) -> DataFrame:
"""
Filter a series and group its values according to another series.
......
......@@ -29,7 +29,7 @@ functions.
"""
from logging import warning
from typing import List, Union, Dict
from typing import Dict, List, Union
from hifis_surveyval.models.mixins.identifiable import Identifiable
from hifis_surveyval.models.mixins.yaml_constructable import YamlDict, YamlList
......@@ -95,10 +95,12 @@ class DataContainer(object):
A YAML mapping containing the data for one question collection.
"""
new_collection = QuestionCollection.from_yaml_dictionary(
new_collection_yaml)
new_collection_yaml
)
if new_collection.full_id in self._survey_questions:
raise ValueError("Attempt to add QuestionCollection "
"with duplicate ID")
raise ValueError(
"Attempt to add QuestionCollection " "with duplicate ID"
)
self._survey_questions[new_collection.full_id] = new_collection
def load_survey_data(self, csv_data: List[List[str]]) -> None:
......
......@@ -24,10 +24,11 @@ This is an example script for an analysis.
It is a file payload of the package `hifis_surveyval`.
"""
from hifis_surveyval.data_container import DataContainer
from hifis_surveyval.hifis_surveyval import HIFISSurveyval
def run(hifis_surveyval: HIFISSurveyval):
def run(hifis_surveyval: HIFISSurveyval, data: DataContainer):
"""Execute example script."""
for question in hifis_surveyval.survey_questions:
print(question)
frame = data.get_by_id()
hifis_surveyval.printer.print_dataframe(frame)
......@@ -23,15 +23,9 @@
# -*- coding: utf-8 -*-
"""This project is used to develop analysis scripts for surveys."""
import logging
import sys
from csv import reader
from pathlib import Path
import yaml
from hifis_surveyval.core.settings import Settings
from hifis_surveyval.data_container import DataContainer
from hifis_surveyval.plotting.matplotlib_plotter import MatplotlibPlotter
from hifis_surveyval.printing.printer import Printer
......@@ -53,10 +47,6 @@ class HIFISSurveyval:
in. It will be populated with the related settings during the
initialization of the HIFISSurveyval object.
"""
#: A global copy-on-read container for providing the survey data
#: to the analysis functions
self.dataContainer: DataContainer = DataContainer()
#: The settings storage
self.settings: Settings = settings
......@@ -68,8 +58,9 @@ class HIFISSurveyval:
# register printer
self.printer: Printer = Printer()
self._prepare_environment()
def prepare_environment(self) -> None:
def _prepare_environment(self) -> None:
"""
Prepare the runtime environment.
......@@ -83,27 +74,3 @@ class HIFISSurveyval:
if self.settings.ANALYSIS_OUTPUT_PATH is not None:
if not self.settings.ANALYSIS_OUTPUT_PATH.exists():
self.settings.ANALYSIS_OUTPUT_PATH.mkdir(parents=True)
def load_all_data(self, data_file: Path) -> None:
"""
Populate the data container with the survey results and metadata.
Args:
data_file (click.File): File that contains the data for the
analysis.
Raises:
IOError: Exception thrown if data could not be parsed.
IOError: Exception thrown if metadata could not be parsed.
"""
# Load the metadata
logging.info(f"Attempt to load metadata from {self.settings.METADATA}")
with self.settings.METADATA.open(mode="r") as metadata_io_stream:
metadata_yaml = yaml.safe_load(metadata_io_stream)
self.dataContainer.load_metadata(metadata_yaml)
# Load the actual survey data
with data_file.open(mode="r") as data_io_stream:
csv_reader = reader(data_io_stream)
self.dataContainer.load_survey_data(csv_data=list(csv_reader))
......@@ -25,7 +25,7 @@ from schema import Schema
from hifis_surveyval.models.mixins.identifiable import Identifiable
from hifis_surveyval.models.mixins.yaml_constructable import (
YamlConstructable,
YamlDict
YamlDict,
)
from hifis_surveyval.models.translated import Translated
......@@ -37,18 +37,10 @@ class AnswerOption(YamlConstructable, Identifiable):
token_LABEL = "label"
token_TEXT = "text"
schema = Schema({
token_ID: str,
token_LABEL: str,
token_TEXT: dict
})
schema = Schema({token_ID: str, token_LABEL: str, token_TEXT: dict})
def __init__(
self,
parent_id: str,
option_id: str,
text: Translated,
label: str
self, parent_id: str, option_id: str, text: Translated, label: str
) -> None:
"""
Create an answer option from the metadata.
......@@ -118,5 +110,7 @@ class AnswerOption(YamlConstructable, Identifiable):
parent_id=parent_id,
option_id=yaml[AnswerOption.token_ID],
label=yaml[AnswerOption.token_LABEL],
text=Translated.from_yaml_dictionary(yaml[AnswerOption.token_TEXT])
text=Translated.from_yaml_dictionary(
yaml[AnswerOption.token_TEXT]
),
)
......@@ -25,7 +25,7 @@ This module contains the base class for all objects that carry a unique ID.
IDs are composed of multiple parts interjected by a hierarchy separator.
"""
from typing import Set, Optional
from typing import Optional, Set
class Identifiable(object):
......@@ -44,9 +44,7 @@ class Identifiable(object):
HIERARCHY_SEPARATOR: str = "/"
known_ids: Set[str] = set()
def __init__(self,
object_id: str,
parent_id: Optional[str] = None):
def __init__(self, object_id: str, parent_id: Optional[str] = None):
"""
Create a new identifiable object with a given ID.
......@@ -72,13 +70,12 @@ class Identifiable(object):
)
if object_id in Identifiable.known_ids:
raise ValueError(
f"Attempted to assign duplicate ID {object_id}"
)
raise ValueError(f"Attempted to assign duplicate ID {object_id}")
self._full_id: str = (
f"{parent_id}{Identifiable.HIERARCHY_SEPARATOR}{object_id}"
if parent_id else object_id
if parent_id
else object_id
)
Identifiable.known_ids.add(self._full_id)
......@@ -101,6 +98,7 @@ class Identifiable(object):
The string identifying this object with respect to its siblings
"""
return self._full_id.split(Identifiable.HIERARCHY_SEPARATOR)[-1]
# TODO: Decide whether to cache the short id
@property
......
......@@ -26,8 +26,9 @@ structure of the YAML to be parsed.
"""
from abc import ABC, abstractmethod
from typing import Dict, Union, List
from schema import Schema, Or
from typing import Dict, List, Union
from schema import Or, Schema
# A shorthand type for the kind of lists and dictionaries that can be
# encountered when parsing YAML data
......@@ -66,9 +67,9 @@ class YamlConstructable(ABC):
pass
@classmethod
def from_yaml_dictionary(cls,
yaml: YamlDict,
**kwargs) -> "YamlConstructable":
def from_yaml_dictionary(
cls, yaml: YamlDict, **kwargs
) -> "YamlConstructable":
"""
Instantiate an object of this class from a given YamlDict.
......
......@@ -24,9 +24,8 @@ This module contains classes to represent survey questions.
These can be constructed from YAML through the YamlConstructable abstract
class.
"""
from typing import Dict, List
# alias name to avoid clash with schema.Optional
from typing import Dict, List
from typing import Optional as typing_Optional
from schema import Optional, Schema
......@@ -36,7 +35,7 @@ from hifis_surveyval.models.answer_types import VALID_ANSWER_TYPES
from hifis_surveyval.models.mixins.identifiable import Identifiable
from hifis_surveyval.models.mixins.yaml_constructable import (
YamlConstructable,
YamlDict
YamlDict,
)
from hifis_surveyval.models.translated import Translated
......@@ -58,27 +57,29 @@ class Question(YamlConstructable, Identifiable):
token_DATA_TYPE = "datatype"
token_MANDATORY = "mandatory"
schema = Schema({
token_ID: str,
token_LABEL: str,
token_TEXT: dict,
token_DATA_TYPE: lambda t: t in VALID_ANSWER_TYPES,
token_MANDATORY: bool,
Optional(token_ANSWER_OPTIONS, default=[]): list,
Optional(str): object # Catchall for unsupported yaml data
})
schema = Schema(
{
token_ID: str,
token_LABEL: str,
token_TEXT: dict,
token_DATA_TYPE: lambda t: t in VALID_ANSWER_TYPES,
token_MANDATORY: bool,
Optional(token_ANSWER_OPTIONS, default=[]): list,
Optional(str): object, # Catchall for unsupported yaml data
}
)
# TODO: log unsupported elements in YAML?
def __init__(
self,
parent_id: str,
question_id: str,
text: Translated,
label: str,
answer_type: type,
mandatory: bool,
answer_options: List[AnswerOption],
self,
parent_id: str,
question_id: str,
text: Translated,
label: str,
answer_type: type,
mandatory: bool,
answer_options: List[AnswerOption],
):
"""
Initialize a question object with metadata.
......@@ -194,9 +195,9 @@ class Question(YamlConstructable, Identifiable):
answer_options = [
AnswerOption.from_yaml_dictionary(
yaml=answer_yaml,
parent_id=question_id
) for answer_yaml in yaml[Question.token_ANSWER_OPTIONS]
yaml=answer_yaml, parent_id=question_id
)
for answer_yaml in yaml[Question.token_ANSWER_OPTIONS]
]
return Question(
......@@ -206,5 +207,5 @@ class Question(YamlConstructable, Identifiable):
text=Translated(yaml[Question.token_TEXT]),
answer_type=answer_type,
answer_options=answer_options,
mandatory=yaml[Question.token_MANDATORY]
mandatory=yaml[Question.token_MANDATORY],
)
......@@ -24,14 +24,14 @@ This module contains classes to represent groups of survey questions.
These can be constructed from YAML through the YamlConstructable abstract
class.
"""
from typing import List, Dict
from typing import Dict, List
from schema import Schema, Optional
from schema import Optional, Schema
from hifis_surveyval.models.mixins.identifiable import Identifiable
from hifis_surveyval.models.mixins.yaml_constructable import (
YamlConstructable,
YamlDict
YamlDict,
)
from hifis_surveyval.models.question import Question
from hifis_surveyval.models.translated import Translated
......@@ -50,20 +50,22 @@ class QuestionCollection(YamlConstructable, Identifiable):
token_TEXT = "text"
token_QUESTIONS = "questions"
schema = Schema({
token_ID: str,
token_LABEL: str,
token_TEXT: dict,
Optional(token_QUESTIONS, default=[]): list,
Optional(str): object # catchall
})
schema = Schema(
{
token_ID: str,
token_LABEL: str,
token_TEXT: dict,
Optional(token_QUESTIONS, default=[]): list,
Optional(str): object, # catchall
}
)
def __init__(
self,
collection_id: str,
text: Translated,
label: str,
questions: List[Question],
self,
collection_id: str,
text: Translated,
label: str,
questions: List[Question],
) -> None:
"""
Initialize an empty question collection.
......@@ -88,7 +90,8 @@ class QuestionCollection(YamlConstructable, Identifiable):
self._text: Translated = text
self._label: str = label
self._questions: Dict[str, Question] = {
question.short_id: question for question in questions}
question.short_id: question for question in questions
}
def question_for_id(self, question_short_id: str) -> Question:
"""
......@@ -105,8 +108,9 @@ class QuestionCollection(YamlConstructable, Identifiable):
return self._questions[question_short_id]
@staticmethod
def _from_yaml_dictionary(yaml: YamlDict,
**kwargs) -> "QuestionCollection":
def _from_yaml_dictionary(
yaml: YamlDict, **kwargs
) -> "QuestionCollection":
"""
Generate a new QuestionCollection-instance from YAML data.
......@@ -122,8 +126,8 @@ class QuestionCollection(YamlConstructable, Identifiable):
questions = [
Question.from_yaml_dictionary(
yaml=question_yaml,
parent_id=collection_id)
yaml=question_yaml, parent_id=collection_id
)
for question_yaml in yaml[QuestionCollection.token_QUESTIONS]
]
......@@ -135,5 +139,5 @@ class QuestionCollection(YamlConstructable, Identifiable):
collection_id=collection_id,
text=text,
label=yaml[QuestionCollection.token_LABEL],
questions=questions
questions=questions,
)
......@@ -22,11 +22,12 @@
from typing import Dict, List
from schema import Schema, And, Regex
from schema import And, Regex, Schema
from hifis_surveyval.models.mixins.yaml_constructable import (
YamlConstructable,
YamlDict)
YamlDict,
)
class Translated(YamlConstructable):
......@@ -36,11 +37,15 @@ class Translated(YamlConstructable):
Languages are identified by their ISO 693-1 two-letter codes.
"""
schema = Schema({
And(str, Regex("^[a-z]{2}$")):
And(str, lambda s: s,
error="Translation must neither be empty nor None")
})
schema = Schema(
{
And(str, Regex("^[a-z]{2}$")): And(
str,
lambda s: s,
error="Translation must neither be empty nor None",
)
}
)
"""
The validation schema used for translation dictionaries.
* The dictionary may not be empty
......
This diff is collapsed.
......@@ -20,7 +20,7 @@
[tool.poetry]
name = "hifis-surveyval"
version = "0.4.0"
version = "0.5.0"
description = "This project is used to develop analysis scripts for the HIFIS Software survey."