Verified Commit 378beb3d authored by Erxleben, Fredo's avatar Erxleben, Fredo
Browse files

Merge branch 'main' into...

Merge branch 'main' into 173-allow-mandatory-flag-in-questioncollections-for-multiple-choice-questioncollections
parents 8e3172e7 2e14edd8
Pipeline #107194 failed with stages
in 1 minute and 32 seconds
......@@ -31,6 +31,25 @@ Group your changes into these categories:
`Added`, `Changed`, `Deprecated`, `Removed`, `Fixed`, `Security`.
## [1.3.0](https://gitlab.hzdr.de/hifis/overall/surveys/hifis-surveyval/-/releases/v1.3.0) - 2021-10-26
[List of commits](https://gitlab.hzdr.de/hifis/overall/surveys/hifis-surveyval/-/compare/v1.2.0...v1.3.0)
### Fixed
- Fix full IDs of AnswerOptions to be a concatenation of QuestionCollection ID, Question ID and AnswerOption ID
([!146](https://gitlab.hzdr.de/hifis/overall/surveys/hifis-surveyval/-/merge_requests/146)
by [erxleb87](https://gitlab.hzdr.de/erxleb87)).
### Changed
- Use Python's type hinting in example script 01 about accessing data and metadata
([!144](https://gitlab.hzdr.de/hifis/overall/surveys/hifis-surveyval/-/merge_requests/144)
by [hueser93](https://gitlab.hzdr.de/hueser93))
### Added
- Allow AnswerOptions to also have value representations aside from labels
([!147](https://gitlab.hzdr.de/hifis/overall/surveys/hifis-surveyval/-/merge_requests/147)
by [erxleb87](https://gitlab.hzdr.de/erxleb87)).
## [1.2.0](https://gitlab.hzdr.de/hifis/overall/surveys/hifis-surveyval/-/releases/v1.2.0) - 2021-10-07
[List of commits](https://gitlab.hzdr.de/hifis/overall/surveys/hifis-surveyval/-/compare/v1.2.0-preview...v1.2.0)
......
......@@ -38,7 +38,7 @@ Setting up the Environment
Because the scripts use `hifis-surveyval` as dependency, it is wise to set
up a dedicated environment for your analysis. We show an example workflow
using `Poetry <https://python-poetry.org/>`_ to achieve that, but other
solutions like `Pipenv <https://pipenv.pypa.io/en/latest/>`_ also work.
solutions like `Pipenv` also work.
First, we need to install poetry on our system.
......
......@@ -217,9 +217,15 @@ class DataContainer(object):
for (question_index, question) in question_cache.items():
answer: str = row[question_index]
try:
question.add_answer(participant_id, answer)
question.add_answer(
participant_id=participant_id,
value_text=answer
)
except (KeyError, ValueError) as error:
warning(f"When loading CSV data: {error}")
warning(
f"When loading CSV data for {question.full_id}:"
f" {error}"
)
def collection_for_id(self, full_id: str) -> QuestionCollection:
"""
......
......@@ -19,10 +19,13 @@
# along with this program. If not, see <http://www.gnu.org/licenses/>.
"""This module contains a class to represent survey answers."""
import logging
from typing import Optional as typing_Optional, Generic
from schema import Schema
from schema import Schema, Optional
from hifis_surveyval.core.settings import Settings
from hifis_surveyval.models.answer_types import AnswerType
from hifis_surveyval.models.mixins.mixins import HasLabel, HasText, HasID
from hifis_surveyval.models.mixins.yaml_constructable import (
YamlConstructable,
......@@ -31,13 +34,25 @@ from hifis_surveyval.models.mixins.yaml_constructable import (
from hifis_surveyval.models.translated import Translated
class AnswerOption(YamlConstructable, HasID, HasLabel, HasText):
class AnswerOption(
Generic[AnswerType],
YamlConstructable,
HasID,
HasLabel,
HasText
):
"""The AnswerOption models allowed answers for a specific Question."""
token_ID = "id"
token_VALUE = "value"
schema = Schema(
{token_ID: str, HasLabel.YAML_TOKEN: str, HasText.YAML_TOKEN: dict}
{
token_ID: str,
HasLabel.YAML_TOKEN: str,
HasText.YAML_TOKEN: dict,
Optional(token_VALUE, default=None): object
}
)
def __init__(
......@@ -46,7 +61,8 @@ class AnswerOption(YamlConstructable, HasID, HasLabel, HasText):
option_id: str,
text: Translated,
label: str,
settings: Settings
settings: Settings,
value: typing_Optional[AnswerType],
) -> None:
"""
Create an answer option from the metadata.
......@@ -63,6 +79,10 @@ class AnswerOption(YamlConstructable, HasID, HasLabel, HasText):
A short string used to represent the answer option in plotting.
settings:
An object reflecting the application settings.
value:
(Optional) A value to represent the AnswerOption, e.g. for
sorting, calculations or to represent an underlying value if
the label can not easily be cast to the intended AnswerType.
"""
super(AnswerOption, self).__init__(
object_id=option_id,
......@@ -71,6 +91,7 @@ class AnswerOption(YamlConstructable, HasID, HasLabel, HasText):
translations=text,
settings=settings
)
self._value: typing_Optional[AnswerType] = value
def __str__(self) -> str:
"""
......@@ -81,6 +102,16 @@ class AnswerOption(YamlConstructable, HasID, HasLabel, HasText):
"""
return f"{self.full_id}: {self._label}"
@property
def value(self) -> typing_Optional[AnswerType]:
"""
Access the underlying value type, if present.
Returns:
The underlying value if it is defined, None otherwise.
"""
return self._value
@staticmethod
def _from_yaml_dictionary(yaml: YamlDict, **kwargs) -> "AnswerOption":
"""
......@@ -90,20 +121,45 @@ class AnswerOption(YamlConstructable, HasID, HasLabel, HasText):
yaml:
A YAML dictionary describing the AnswerOption
**kwargs:
Must contain the ID of the Question-instance to which the newly
generated AnswerOption belongs as the parameter "parent_id".
parent_id:
The ID of the Question-instance to which the newly
generated AnswerOption belongs.
settings:
The used settings instance
answer_type:
The data type of which the answer value should be.
Returns:
A new AnswerOption containing the provided data
"""
parent_id = kwargs["parent_id"]
settings: Settings = kwargs["settings"]
answer_type: type = kwargs["answer_type"]
option_id = yaml[AnswerOption.token_ID]
label = yaml[HasLabel.YAML_TOKEN]
yaml_value = yaml[AnswerOption.token_VALUE]
value = None
if yaml_value is not None:
value = yaml_value
else:
# If no explicit value was given, try to infer it from the label
try:
value = answer_type(label)
except ValueError:
logging.error(
f"Could not infer answer option value from label for "
f"{parent_id}/{option_id}: \"{label}\" (wanted to cast to"
f" {answer_type.__name__})"
)
return AnswerOption(
return AnswerOption[answer_type](
parent_id=parent_id,
option_id=yaml[AnswerOption.token_ID],
label=yaml[HasLabel.YAML_TOKEN],
option_id=option_id,
label=label,
text=Translated.from_yaml_dictionary(
yaml[HasText.YAML_TOKEN]
),
settings=settings
settings=settings,
value=value
)
......@@ -18,11 +18,27 @@
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
"""This contains information about the data types supported by answers."""
from typing import Dict
"""
This contains information about the data types supported by answers.
The currently supported types are bool, int, float and str. The type names
are the only valid identifiers in YAML metadata for the data type of questions.
"""
from typing import Dict, TypeVar
VALID_ANSWER_TYPES: Dict[str, type] = {
valid_type.__name__: valid_type for valid_type in (bool, int, float, str)
}
"""
A mapping from the names of accepted types to the actual types.
This allows to infer the actual types from the string representations present
in the YAML metadata.
"""
AnswerType = TypeVar("AnswerType")
"""
Type variable for the generic type used in AnswerOption.
# FIXME: How to properly document this?
It may be a placeholder for any of the accepted answer types.
"""
......@@ -26,14 +26,14 @@ class.
"""
# alias name to avoid clash with schema.Optional
import logging
from typing import Dict, Optional, Set
from typing import Dict, Optional, Set, Generic, get_args
import schema
from pandas import Series
from hifis_surveyval.core.settings import Settings
from hifis_surveyval.models.answer_option import AnswerOption
from hifis_surveyval.models.answer_types import VALID_ANSWER_TYPES
from hifis_surveyval.models.answer_types import VALID_ANSWER_TYPES, AnswerType
from hifis_surveyval.models.mixins.mixins import (
HasLabel, HasText, HasID, HasMandatory,
)
......@@ -44,6 +44,7 @@ from hifis_surveyval.models.translated import Translated
class Question(
Generic[AnswerType],
YamlConstructable,
HasID,
HasLabel,
......@@ -88,7 +89,6 @@ class Question(
question_id: str,
text: Translated,
label: str,
answer_type: type,
mandatory: bool,
settings: Settings,
):
......@@ -96,6 +96,9 @@ class Question(
Initialize a question object with metadata.
The answers have to be added separately via add_answer().
The data type of the answers is given by the generic AnswerType.
It must be one of the supported data types. See also:
hifis_surveyval.models.answer_types.VALID_ANSWER_TYPES
Args:
parent_id:
......@@ -109,10 +112,6 @@ class Question(
label:
A short label that can be used in plotting to represent the
question collection.
answer_type:
The data type of the answers. Must be one of the supported
data types. See also
hifis_surveyval.models.answer_types.VALID_ANSWER_TYPES
mandatory:
Whether there is an answer to this question expected from each
participant in order to consider the participant's answer data
......@@ -128,16 +127,34 @@ class Question(
is_mandatory=mandatory,
settings=settings
)
self._answer_type = answer_type
# Answer options are stored with their short ID as keys for easy
# lookup when associating answers, since answers contain these as
# values when selected.
self._answer_options: Dict[str, AnswerOption] = dict()
self._answer_options: Dict[str, AnswerOption[AnswerType]] = dict()
# The actual answers are not part of the metadata but have to be read
# from other sources in a separate step
self._answers: Dict[str, Optional[answer_type]] = {}
self._answers: Dict[str, Optional[AnswerType]] = {}
@property
def _answer_type(self) -> type:
"""
Get the underlying answer type of the question.
Returns:
The underlying type that answers to this question are supposed
to have
"""
return get_args(self.__orig_class__)[0]
# NOTE: This involves some trickery from the typing library. The
# initial idea comes from
# https://stackoverflow.com/questions/48572831/how-to-access-the
# -type-arguments-of-typing-generic
# and https://www.py4u.net/discuss/144134
# The approach does not work during __init__ though since then the
# instantiation has not yes completed, so caching the type is
# probably not an option.
def _add_answer_option(self, new_answer_option: AnswerOption) -> None:
"""
......@@ -160,7 +177,7 @@ class Question(
self._answer_options[new_answer_option.short_id] = new_answer_option
def add_answer(self, participant_id: str, value: str):
def add_answer(self, participant_id: str, value_text: str) -> None:
"""
Store a given answer to this question.
......@@ -169,50 +186,50 @@ class Question(
Args:
participant_id:
The ID of the participant who gave the answer
value:
value_text:
The text-version of the answer as stored in the CSV.
If the question is mandatory, the value must not be empty.
If answer options are defined the value must match the short id
of the selected answer option.
Raises:
ValueError:
if the question was marked as mandatory but the given value was
an empty string
KeyError:
If answer options were present, but none of the answer options
had an ID that matched the given value
"""
if not value:
if not value_text:
# Convert empty strings to None to properly indicate that no
# data was provided
value = None
elif self._answer_options:
self._answers[participant_id] = None
return
if self._answer_options:
# If answer options are defined, the answer value is expected to
# be the short id of the corresponding answer option
# The label of the option then will be casted to the desired
# data type
option = self._answer_options[value]
value = self._answer_type(option.label)
# FIXME change: answer option values become a separate field,
# no longer derived from labels
elif self._answer_type == bool:
# be the short id of the corresponding answer option to be
# looked up. The actual value is taken from there.
option = self._answer_options[value_text]
self._answers[participant_id] = option.value
return
if self._answer_type == bool:
# When casting to boolean values, Python casts any non-empty string
# to True and only empty strings to False. Consequently, values
# are transformed according to a set of valid true and false
# values to allow for different truth values.
if value in self._settings.TRUE_VALUES:
value = True
elif value in self._settings.FALSE_VALUES:
value = False
if value_text in self._settings.TRUE_VALUES:
bool_value = True
elif value_text in self._settings.FALSE_VALUES:
bool_value = False
else:
logging.error(f"Boolean data is an invalid truth value "
f"in question {self.full_id}: {value}.")
value = None
else:
# try to cast the answer value to the expected type
value = self._answer_type(value)
f"in question {self.full_id}: {value_text}.")
bool_value = None
self._answers[participant_id] = bool_value
return
self._answers[participant_id] = value
# try to cast the answer value to the expected type
self._answers[participant_id] = self._answer_type(value_text)
# FIXME catch if conversion fails
def remove_answers(self, participant_ids: Set[str]) -> None:
"""
......@@ -228,7 +245,7 @@ class Question(
del self._answers[participant_id]
@property
def answers(self) -> Dict[str, Optional[object]]: # NOTE (0) below
def answers(self) -> Dict[str, Optional[AnswerType]]:
"""
Obtain the given answers as read from the survey data.
......@@ -245,10 +262,6 @@ class Question(
"""
return self._answers
# (0) Sadly I found no better way to narrow down the type since I could
# not refer to self._answer_type when specifying the return type.
# Suggestions for improvement are welcome.
def as_series(self) -> Series:
"""
Obtain the answers to this question as a pandas.Series.
......@@ -291,12 +304,11 @@ class Question(
answer_type: type = VALID_ANSWER_TYPES[yaml[Question.token_DATA_TYPE]]
new_question: Question = Question(
new_question: Question = Question[answer_type](
question_id=question_id,
parent_id=parent_id,
label=yaml[HasLabel.YAML_TOKEN],
text=Translated(yaml[HasText.YAML_TOKEN]),
answer_type=answer_type,
mandatory=yaml[HasMandatory.YAML_TOKEN],
settings=settings
)
......@@ -305,7 +317,8 @@ class Question(
new_answer_option = AnswerOption.from_yaml_dictionary(
yaml=answer_yaml,
parent_id=new_question.full_id,
settings=settings
settings=settings,
answer_type=answer_type
)
new_question._add_answer_option(new_answer_option)
......
......@@ -20,7 +20,7 @@
[tool.poetry]
name = "hifis-surveyval"
version = "1.2.0"
version = "1.3.0"
description = "This project is used to develop analysis scripts for the HIFIS Software survey."
license = "GPL-3.0-or-later"
authors = ["HIFIS Software <software@hifis.net>"]
......
......@@ -62,7 +62,8 @@ class TestAnswerOption(object):
answer_option: AnswerOption = AnswerOption.from_yaml_dictionary(
yaml=metadata_fixture[0],
parent_id=TestAnswerOption.question_id,
settings=settings_fixture
settings=settings_fixture,
answer_type=str
)
# Make sure that object retrieved from metadata YAML file given is of
# type AnswerOption.
......@@ -89,7 +90,8 @@ class TestAnswerOption(object):
answer_option: AnswerOption = AnswerOption.from_yaml_dictionary(
yaml=metadata_fixture[0],
parent_id=TestAnswerOption.question_id,
settings=settings_fixture
settings=settings_fixture,
answer_type=str
)
actual_answer_option_id: str = answer_option.short_id
# Make sure that AnswerOption object retrieved from metadata YAML file
......@@ -117,7 +119,8 @@ class TestAnswerOption(object):
answer_option: AnswerOption = AnswerOption.from_yaml_dictionary(
yaml=metadata_fixture[0],
parent_id=TestAnswerOption.question_id,
settings=settings_fixture
settings=settings_fixture,
answer_type=str
)
actual_translated_answer_option_text: str = (
answer_option.text(TestAnswerOption.language_code)
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment