Commit ed492ac3 authored by Erxleben, Fredo's avatar Erxleben, Fredo Committed by Hueser, Christian (FWCC) - 138593
Browse files

Offer Getters for _text and _label in QuestionCollections, Questions and AnswerOptions

* Offer Mixins for properties _text and _label as well as for settings.
* Adapt models accordingly.
* Adapt unit test cases.
* Move HIERARCHY_SEPARATOR to class Settings.
parent 3f99e96b
Pipeline #103870 passed with stages
in 5 minutes and 37 seconds
......@@ -196,6 +196,10 @@ class FileSettings(BaseSettings):
# question collection by LimeSurvey
ANONYMOUS_QUESTION_ID: str = "_"
# Separator used to distinguish the elements in the hierarchical IDs of
# model elements.
HIERARCHY_SEPARATOR: str = "/"
# Separator used in header of CSV data to separate QuestionCollection ID
# and Question ID.
DATA_ID_SEPARATOR: str = "_"
......
......@@ -35,7 +35,6 @@ import pandas
from pandas import DataFrame
from hifis_surveyval.core.settings import Settings
from hifis_surveyval.models.mixins.identifiable import Identifiable
from hifis_surveyval.models.mixins.yaml_constructable import YamlDict, YamlList
from hifis_surveyval.models.question import Question
from hifis_surveyval.models.question_collection import QuestionCollection
......@@ -145,23 +144,18 @@ class DataContainer(object):
body: List[List[str]] = csv_data[1:]
question_cache: Dict[int, Question] = {}
# The question cache associates column indices with questions.
# It is here to avoid having to constantly look up the questions all
# over again. This expects that in each row the indices for the
# questions are identical, which, given the input is CSV data,
# should be the case.
# Step 0: Check if all questions are present in the header
for question_collection in self._survey_questions.values():
for question in question_collection.questions:
if question.full_id not in header:
logging.warning(f"Question {question.full_id} was in "
f"metadata but not in the CSV file")
"""
The question cache associates column indices with questions.
It is here to avoid having to constantly look up the questions all
over again. This expects that in each row the indices for the
questions are identical, which, given the input is CSV data,
should be the case.
"""
# Step 1: Find the column for the participant IDs
# Step 0: Find the column for the participant IDs
id_column_index = header.index(self._settings.ID_COLUMN_NAME)
# Step 2: Find the Question for each of the headings
# Step 1: Find the Question for each of the headings
for index in range(0, len(header)):
if index == id_column_index:
# no need to check this, it will not be a question
......@@ -176,7 +170,11 @@ class DataContainer(object):
if self._settings.DATA_ID_SEPARATOR in potential_question_id:
potential_question_id = potential_question_id.replace(
self._settings.DATA_ID_SEPARATOR,
Identifiable.HIERARCHY_SEPARATOR)
self._settings.HIERARCHY_SEPARATOR
)
header[index] = potential_question_id
# Update the cached header, for later cross referencing.
# (This does not touch the actual CSV file header in any way)
# Limesurvey has that thing where questions may be at the top
# level (i.e. not within a collection) but still named as if
......@@ -187,9 +185,10 @@ class DataContainer(object):
# indicator for this special case because there won't be any
# clashes with the question IDs allowed by Limesurvey and hence
# there won't be any naming confusion introduced here.
if Identifiable.HIERARCHY_SEPARATOR not in potential_question_id:
potential_question_id += Identifiable.HIERARCHY_SEPARATOR
if self._settings.HIERARCHY_SEPARATOR not in potential_question_id:
potential_question_id += self._settings.HIERARCHY_SEPARATOR
potential_question_id += self._settings.ANONYMOUS_QUESTION_ID
header[index] = potential_question_id
# Handle the regular case
try:
......@@ -204,6 +203,13 @@ class DataContainer(object):
assert id_column_index not in question_cache
# Step 2: Check if all questions are present in the header
for question_collection in self._survey_questions.values():
for question in question_collection.questions:
if question.full_id not in header:
logging.warning(f"Question {question.full_id} was in "
f"metadata but not in the CSV file")
# Step 3: Iterate through each row and insert the values for answer
for row in body:
participant_id = row[id_column_index]
......@@ -245,7 +251,7 @@ class DataContainer(object):
If either the collection or the question for the given ID
could not be found.
"""
parts: List[str] = full_id.split(Identifiable.HIERARCHY_SEPARATOR)
parts: List[str] = full_id.split(self._settings.HIERARCHY_SEPARATOR)
collection_id = parts[0]
question_id = parts[1]
collection = self.collection_for_id(collection_id)
......
......@@ -22,7 +22,8 @@
from schema import Schema
from hifis_surveyval.models.mixins.identifiable import Identifiable
from hifis_surveyval.core.settings import Settings
from hifis_surveyval.models.mixins.mixins import HasLabel, HasText, HasID
from hifis_surveyval.models.mixins.yaml_constructable import (
YamlConstructable,
YamlDict,
......@@ -30,17 +31,22 @@ from hifis_surveyval.models.mixins.yaml_constructable import (
from hifis_surveyval.models.translated import Translated
class AnswerOption(YamlConstructable, Identifiable):
class AnswerOption(YamlConstructable, HasID, HasLabel, HasText):
"""The AnswerOption models allowed answers for a specific Question."""
token_ID = "id"
token_LABEL = "label"
token_TEXT = "text"
schema = Schema({token_ID: str, token_LABEL: str, token_TEXT: dict})
schema = Schema(
{token_ID: str, HasLabel.YAML_TOKEN: str, HasText.YAML_TOKEN: dict}
)
def __init__(
self, parent_id: str, option_id: str, text: Translated, label: str
self,
parent_id: str,
option_id: str,
text: Translated,
label: str,
settings: Settings
) -> None:
"""
Create an answer option from the metadata.
......@@ -55,10 +61,16 @@ class AnswerOption(YamlConstructable, Identifiable):
answer option across various languages.
label:
A short string used to represent the answer option in plotting.
settings:
An object reflecting the application settings.
"""
super().__init__(option_id, parent_id)
self._text = text
self._label = label
super().__init__(
object_id=option_id,
parent_id=parent_id,
label=label,
translations=text,
settings=settings
)
def __str__(self) -> str:
"""
......@@ -69,27 +81,6 @@ class AnswerOption(YamlConstructable, Identifiable):
"""
return f"{self.full_id}: {self._label}"
@property
def text(self) -> Translated:
"""
Obtain the full text that was associated with this answer.
Returns:
An object containing all the translations for text associated with
this answer option.
"""
return self._text
@property
def label(self) -> str:
"""
Get the label of this answer option.
Returns:
A label serving as a short description of this option.
"""
return self._label
@staticmethod
def _from_yaml_dictionary(yaml: YamlDict, **kwargs) -> "AnswerOption":
"""
......@@ -105,12 +96,14 @@ class AnswerOption(YamlConstructable, Identifiable):
A new AnswerOption containing the provided data
"""
parent_id = kwargs["parent_id"]
settings: Settings = kwargs["settings"]
return AnswerOption(
parent_id=parent_id,
option_id=yaml[AnswerOption.token_ID],
label=yaml[AnswerOption.token_LABEL],
label=yaml[HasLabel.YAML_TOKEN],
text=Translated.from_yaml_dictionary(
yaml[AnswerOption.token_TEXT]
yaml[HasText.YAML_TOKEN]
),
settings=settings
)
......@@ -18,17 +18,120 @@
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
"""
This module contains the base class for all objects that carry a unique ID.
This module provides mixins for model classes with certain properties.
IDs are composed of multiple parts interjected by a hierarchy separator.
They are designed to co-operate with other mixins and forwards unused
initialization arguments down to other mixins in the inheritance order.
"""
from abc import ABC
from typing import Set, Optional
from hifis_surveyval.models.mixins.uses_settings import UsesSettings
from hifis_surveyval.models.translated import Translated
from typing import Optional, Set
class HasLabel(ABC):
"""
This mixin provides a label property.
class Identifiable(object):
This is used as a shorthand for objects with more complex descriptions
that do not fit nicely in some places (e.g. as labels for graph axis).
"""
YAML_TOKEN = "label"
"""The token used in metadata YAML files to identify labels."""
def __init__(self, label: str, *args, **kwargs):
"""
Initialize a labelled object.
Args:
label:
The label to be given to the object.
*args:
Will be forwarded to other mixins in the initialization order.
**kwargs:
Will be forwarded to other mixins in the initialization order.
"""
super(HasLabel, self).__init__(*args, **kwargs)
self._label = label
@property
def label(self) -> str:
"""
Get the current label of the object.
Returns:
The current object label.
"""
return self._label
def relabel(self, new_label: str) -> None:
"""
Set a new label for this object.
If the new labels string representation is empty, nothing will be
changed.
Args:
new_label:
The new label to be used for the object. If required,
the input will be cast to string before processing.
"""
if not isinstance(new_label, str):
new_label = str(new_label)
if new_label:
self._label = new_label
class HasText(ABC):
"""
This mixin provides a text property.
This is used as a more detailed description of the object, e.g. a
verbatim question text. These texts may be translated, so when accessing
them, providing a language is often required.
"""
YAML_TOKEN = "text"
"""The token used in metadata YAML files to identify labels."""
def __init__(self, translations: Translated, *args, **kwargs) -> None:
"""
Initialize an object with a translatable description.
Args:
translations:
The possible translations of the description.
*args:
Will be forwarded to other mixins in the initialization order.
**kwargs:
Will be forwarded to other mixins in the initialization order.
"""
super(HasText, self).__init__(*args, **kwargs)
self._text: Translated = translations
def text(self, language_code: str) -> str:
"""
Get the description text in a specific language.
Args:
language_code:
The IETF code for the language.
Returns:
The translated description. if available.
Raises:
KeyError:
If no translation for the requested language (with or
without region code) can be found.
"""
return self._text.get_translation(language_code=language_code)
class HasID(UsesSettings):
"""
This is the abstract superclass for all objects that carry an ID.
......@@ -41,10 +144,15 @@ class Identifiable(object):
same.
"""
HIERARCHY_SEPARATOR: str = "/"
known_ids: Set[str] = set()
def __init__(self, object_id: str, parent_id: Optional[str] = None):
def __init__(
self,
object_id: str,
parent_id: Optional[str] = None,
*args,
**kwargs,
) -> None:
"""
Create a new identifiable object with a given ID.
......@@ -60,24 +168,30 @@ class Identifiable(object):
(Optional, Default=None) The full ID of another identifiable
object that forms the hierarchical parent of this one. Used
to generate the full ID.
*args:
Will be forwarded to other mixins in the initialization order.
**kwargs:
Will be forwarded to other mixins in the initialization order.
Raises:
ValueError:
Signals either a duplicate or invalid object_id
"""
super(HasID, self).__init__(*args, **kwargs)
if not object_id:
raise ValueError(
"ID of an identifiable object may neither be empty nor None"
)
if object_id in Identifiable.known_ids:
if object_id in HasID.known_ids:
raise ValueError(f"Attempted to assign duplicate ID {object_id}")
self._full_id: str = (
f"{parent_id}{Identifiable.HIERARCHY_SEPARATOR}{object_id}"
f"{parent_id}{self._settings.HIERARCHY_SEPARATOR}{object_id}"
if parent_id
else object_id
)
Identifiable.known_ids.add(self._full_id)
HasID.known_ids.add(self._full_id)
def __del__(self) -> None:
"""
......@@ -86,7 +200,7 @@ class Identifiable(object):
The used ID will be removed from the known IDs and can be re-used.
"""
try:
Identifiable.known_ids.remove(self._full_id)
HasID.known_ids.remove(self._full_id)
# FIXME For some reason removing the full ID from the list of
# known IDs fails due to them already being removed. But why?
# This has been put into this little exception-catch box to not
......@@ -103,7 +217,7 @@ class Identifiable(object):
Returns:
The string identifying this object with respect to its siblings
"""
return self._full_id.split(Identifiable.HIERARCHY_SEPARATOR)[-1]
return self._full_id.split(self._settings.HIERARCHY_SEPARATOR)[-1]
# TODO: Decide whether to cache the short id
......@@ -114,7 +228,7 @@ class Identifiable(object):
Returns:
The string identifying the object with respect to any other
Identifiable
HasID
"""
return self._full_id
......
# hifis-surveyval
# Framework to help developing analysis scripts for the HIFIS Software survey.
#
# SPDX-FileCopyrightText: 2021 HIFIS Software <support@hifis.net>
#
# SPDX-License-Identifier: GPL-3.0-or-later
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
"""
This module contains the base class for all objects that carry a unique ID.
IDs are composed of multiple parts interjected by a hierarchy separator.
"""
from abc import ABC
from hifis_surveyval.core.settings import Settings
class UsesSettings(ABC):
"""
This is a mixin for objects that need access to the settings to function.
It caches a reference to the settings instance.
"""
def __init__(self, settings: Settings, *args, **kwargs):
"""
Initialize an object that uses the application settings.
Args:
settings:
The applications settings object.
*args:
Will be forwarded to other mixins in the initialization order.
**kwargs:
Will be forwarded to other mixins in the initialization order.
"""
super(UsesSettings, self).__init__(*args, **kwargs)
if not settings:
raise ValueError(
"This object requires access to the application settings"
)
self._settings: Settings = settings
......@@ -34,13 +34,13 @@ from pandas import Series
from hifis_surveyval.core.settings import Settings
from hifis_surveyval.models.answer_option import AnswerOption
from hifis_surveyval.models.answer_types import VALID_ANSWER_TYPES
from hifis_surveyval.models.mixins.identifiable import Identifiable
from hifis_surveyval.models.mixins.mixins import HasLabel, HasText, HasID
from hifis_surveyval.models.mixins.yaml_constructable import (
YamlConstructable, YamlDict)
from hifis_surveyval.models.translated import Translated
class Question(YamlConstructable, Identifiable):
class Question(YamlConstructable, HasID, HasLabel, HasText):
"""
Questions model concrete questions that could be answered in the survey.
......@@ -51,8 +51,6 @@ class Question(YamlConstructable, Identifiable):
"""
token_ID = "id"
token_LABEL = "label"
token_TEXT = "text"
token_ANSWER_OPTIONS = "answers"
token_DATA_TYPE = "datatype"
token_MANDATORY = "mandatory"
......@@ -60,8 +58,8 @@ class Question(YamlConstructable, Identifiable):
schema = schema.Schema(
{
token_ID: str,
token_LABEL: str,
token_TEXT: dict,
HasLabel.YAML_TOKEN: str,
HasText.YAML_TOKEN: dict,
token_DATA_TYPE: lambda t: t in VALID_ANSWER_TYPES,
token_MANDATORY: bool,
schema.Optional(token_ANSWER_OPTIONS, default=[]): list,
......@@ -114,10 +112,13 @@ class Question(YamlConstructable, Identifiable):
settings:
An object reflecting the application settings.
"""
super().__init__(question_id, parent_id)
self._settings = settings
self._text = text
self._label = label
super().__init__(
object_id=question_id,
parent_id=parent_id,
label=label,
translations=text,
settings=settings
)
self._answer_type = answer_type
self._mandatory = mandatory
......@@ -154,9 +155,11 @@ class Question(YamlConstructable, Identifiable):
If answer options were present, but none of the answer options
had an ID that matched the given value
"""
# TODO this check should be performed when marking invalid answers,
# but must not prevent answers from being included in the first place
# Mandatory questions must have an answer
if self._mandatory and not value:
raise ValueError("No answer was given, but it was mandatory")
# if self._mandatory and not value:
# raise ValueError("No answer was given, but it was mandatory")
if not value:
# Convert empty strings to None to properly indicate that no
......@@ -263,23 +266,24 @@ class Question(YamlConstructable, Identifiable):
"""
question_id = yaml[Question.token_ID]
parent_id = kwargs["parent_id"]
settings: Settings = kwargs["settings"]
answer_type: type = VALID_ANSWER_TYPES[yaml[Question.token_DATA_TYPE]]
answer_options = [
AnswerOption.from_yaml_dictionary(
yaml=answer_yaml, parent_id=question_id
yaml=answer_yaml,
parent_id=question_id,
settings=settings
)
for answer_yaml in yaml[Question.token_ANSWER_OPTIONS]
]
settings: Settings = kwargs["settings"]
return Question(
question_id=question_id,
parent_id=parent_id,
label=yaml[Question.token_LABEL],
text=Translated(yaml[Question.token_TEXT]),
label=yaml[HasLabel.YAML_TOKEN],
text=Translated(yaml[HasText.YAML_TOKEN]),
answer_type=answer_type,
answer_options=answer_options,
mandatory=yaml[Question.token_MANDATORY],
......
......@@ -31,7 +31,8 @@ from typing import Union
from pandas import DataFrame, Series, concat
from schema import Optional, Schema
from hifis_surveyval.models.mixins.identifiable import Identifiable
from hifis_surveyval.core.settings import Settings
from hifis_surveyval.models.mixins.mixins import HasLabel, HasText, HasID
from hifis_surveyval.models.mixins.yaml_constructable import (
YamlConstructable,
YamlDict,
......@@ -40,7 +41,7 @@ from hifis_surveyval.models.question import Question
from hifis_surveyval.models.translated import Translated
class QuestionCollection(YamlConstructable, Identifiable):
class QuestionCollection(YamlConstructable, HasID, HasLabel, HasText):
"""
QuestionCollections group a set of questions into a common context.
......@@ -49,15 +50,13 @@ class QuestionCollection(YamlConstructable, Identifiable):
"""
token_ID = "id"
token_LABEL = "label"
token_TEXT = "text"
token_QUESTIONS = "questions"
schema = Schema(
{
token_ID: str,
token_LABEL: str,
token_TEXT: dict,
HasLabel.YAML_TOKEN: str,
HasText.YAML_TOKEN: dict,
Optional(token_QUESTIONS, default=[]): list,
Optional(str): object, # catchall
}
......@@ -69,6 +68,7 @@ class QuestionCollection(YamlConstructable, Identifiable):
text: Translated,
label: str,
questions: List[Question],
settings: Settings,
) -> None:
"""
Initialize an empty question collection.
......@@ -79,6 +79,8 @@ class QuestionCollection(YamlConstructable, Identifiable):
Args:
collection_id:
The unique ID that is to be assigned to the collection.
Since QuestionCollections have no parent the collection ID
serves as the full ID as well as the short ID.
text:
A Translated object representing the text that describes the