question.py 9.81 KB
Newer Older
1
# hifis-surveyval
mdolling-gfz's avatar
add sqa    
mdolling-gfz committed
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
# Framework to help developing analysis scripts for the HIFIS Software survey.
#
# SPDX-FileCopyrightText: 2021 HIFIS Software <support@hifis.net>
#
# SPDX-License-Identifier: GPL-3.0-or-later
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.

21
22
23
"""
This module contains classes to represent survey questions.

24
25
These can be constructed from YAML through the YamlConstructable abstract
class.
26
"""
27
# alias name to avoid clash with schema.Optional
28
from typing import Dict, List, Optional, Set
29

30
31
import schema
from pandas import Series
32

33
from hifis_surveyval.core.settings import Settings
34
35
36
37
38
from hifis_surveyval.models.answer_option import AnswerOption
from hifis_surveyval.models.answer_types import VALID_ANSWER_TYPES
from hifis_surveyval.models.mixins.identifiable import Identifiable
from hifis_surveyval.models.mixins.yaml_constructable import (
    YamlConstructable,
39
    YamlDict,
40
41
)
from hifis_surveyval.models.translated import Translated
42
43


44
class Question(YamlConstructable, Identifiable):
45
46
47
    """
    Questions model concrete questions that could be answered in the survey.

48
49
50
51
    They can be constructed from YAML metadata via from_yaml_dictionary(). For
    this to be successful the YAML data has to adhere to Question.schema which
    describes the required fields and their data types.
    Answers then have to be added separately via add_answer().
52
53
    """

54
55
56
57
58
59
60
    token_ID = "id"
    token_LABEL = "label"
    token_TEXT = "text"
    token_ANSWER_OPTIONS = "answers"
    token_DATA_TYPE = "datatype"
    token_MANDATORY = "mandatory"

61
    schema = schema.Schema(
62
63
64
65
66
67
        {
            token_ID: str,
            token_LABEL: str,
            token_TEXT: dict,
            token_DATA_TYPE: lambda t: t in VALID_ANSWER_TYPES,
            token_MANDATORY: bool,
68
69
            schema.Optional(token_ANSWER_OPTIONS, default=[]): list,
            schema.Optional(str): object,  # Catchall for unsupported yaml data
70
71
        }
    )
72
73

    # TODO: log unsupported elements in YAML?
74

75
    def __init__(
76
77
78
79
80
81
82
83
        self,
        parent_id: str,
        question_id: str,
        text: Translated,
        label: str,
        answer_type: type,
        mandatory: bool,
        answer_options: List[AnswerOption],
84
        settings: Settings,
85
    ):
86
        """
87
        Initialize a question object with metadata.
88

89
        The answers have to be added separately via add_answer().
90
91

        Args:
92
93
94
95
96
97
98
            parent_id:
                The ID of the question collection this question is embedded in.
            question_id:
                An identifier assigned to the question. Must be unique within
                the question collection.
            text:
                A Translated object representing the text that describes the
99
                question.
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
            label:
                A short label that can be used in plotting to represent the
                question collection.
            answer_type:
                The data type of the answers. Must be one of the supported
                data types. See also
                hifis_surveyval.models.answer_types.VALID_ANSWER_TYPES
            mandatory:
                Whether there is an answer to this question expected from each
                participant in oder to consider the participant's answer data
                complete.
            answer_options:
                An optional list of predefined answers. If there are none
                given, the question can have any answer, otherwise the answer
                must be the short ID of the selected answer option.
115
116
            settings:
                An object reflecting the application settings.
117
118
        """
        super().__init__(question_id, parent_id)
119
        self._settings = settings
120
121
122
123
124
125
126
127
128
129
130
131
132
133
        self._text = text
        self._label = label
        self._answer_type = answer_type
        self._mandatory = mandatory

        # Answer options are stored with their short ID as keys for easy
        # lookup when associating answers, since answers contain these as
        # values when selected.
        self._answer_options: Dict[str, AnswerOption] = {
            option.short_id: option for option in answer_options
        }

        # The actual answers are not part of the metadata but have to be read
        # from other sources in a separate step
134
        self._answers: Dict[str, Optional[answer_type]] = {}
135
136
137
138
139
140

    def add_answer(self, participant_id: str, value: str):
        """
        Store a given answer to this question.

        The answer value will be casted to the expected answer type.
141
142

        Args:
143
144
145
146
147
148
            participant_id:
                The ID of the participant who gave the answer
            value:
                The text-version of the answer as stored in the CSV.
                If the question is mandatory, the value must not be empty.
                If answer options are defined the value must match the short id
149
                of the selected answer option.
150
        Raises:
mdolling-gfz's avatar
mdolling-gfz committed
151
            ValueError:
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
                if the question was marked as mandatory but the given value was
                 an empty string
            KeyError:
                If answer options were present, but none of the answer options
                had an ID that matched the given value
        """
        # Mandatory questions must have an answer
        if self._mandatory and not value:
            raise ValueError("No answer was given, but it was mandatory")

        if not value:
            # Convert empty strings to None to properly indicate that no
            # data was provided
            value = None
        elif self._answer_options:
            # If answer options are defined, the answer value is expected to
            # be the short id of the corresponding answer option
            # The label of the option then will be casted to the desired
            # data type
            option = self._answer_options[value]
            value = self._answer_type(option.label)
            # FIXME change: answer option values become a separate field,
            #  no longer derived from labels
        else:
            # try to cast the answer value to the expected type
            value = self._answer_type(value)

        self._answers[participant_id] = value

181
182
183
184
185
186
187
188
189
190
    def remove_answers(self, participant_ids: Set[str]) -> None:
        """
        Remove the answers by the specified participants.

        Args:
            participant_ids:
                The IDs of the participants whose answers are to be removed.
                Invalid IDs are ignored.
        """
        for participant_id in participant_ids:
191
192
            if participant_id in self._answers:
                del self._answers[participant_id]
193

194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
    @property
    def answers(self) -> Dict[str, Optional[object]]:  # NOTE (0) below
        """
        Obtain the given answers as read from the survey data.

        The answers are given as a mapping:
        participant ID -> participant answer

        The participant ID will be a string, while the answers may be
        assumed to be of the answer_type of the Question.
        If the Question is not mandatory, answers may also be None.

        Returns:
            The mapping from participant ID to the participant's answer for
            this question.
        """
        return self._answers

    # (0) Sadly I found no better way to narrow down the type since I could
    # not refer to self._answer_type when specifying the return type.
    # Suggestions for improvement are welcome.

    def as_series(self) -> Series:
        """
        Obtain the answers to this question as a pandas.Series.

        The series' index are the participant IDs, while data for the
        indices are the respective answers.

        The series will be named with the question's full ID.

        Returns:
            A pandas.Series representing the answers for each participant
        """
        series = Series(self._answers)
        series.name = self.full_id
230
        series.index.name = self._settings.ID_COLUMN_NAME
231
232
        return series

233
234
235
236
    @staticmethod
    def _from_yaml_dictionary(yaml: YamlDict, **kwargs) -> "Question":
        """
        Generate a new Question-instance from YAML data.
237
238

        Args:
239
240
            yaml:
                A YAML dictionary describing the Question
241
242
243
244
245
246
247
248

        Keyword Args:
            parent_id:
                (Required) The full ID of the QuestionCollection this Question
                belongs to.
            settings:
                (Required) An object reflecting the applications settings.

249
        Returns:
250
            A new Question containing the provided data
251
        """
252
253
        question_id = yaml[Question.token_ID]
        parent_id = kwargs["parent_id"]
254

255
        answer_type: type = VALID_ANSWER_TYPES[yaml[Question.token_DATA_TYPE]]
256

257
258
        answer_options = [
            AnswerOption.from_yaml_dictionary(
259
260
261
                yaml=answer_yaml, parent_id=question_id
            )
            for answer_yaml in yaml[Question.token_ANSWER_OPTIONS]
262
        ]
263

264
265
        settings: Settings = kwargs["settings"]

266
267
268
269
270
271
272
        return Question(
            question_id=question_id,
            parent_id=parent_id,
            label=yaml[Question.token_LABEL],
            text=Translated(yaml[Question.token_TEXT]),
            answer_type=answer_type,
            answer_options=answer_options,
273
            mandatory=yaml[Question.token_MANDATORY],
274
            settings=settings
mdolling-gfz's avatar
mdolling-gfz committed
275
        )