Verified Commit 6c118fa1 authored by Hueser, Christian (FWCC) - 138593's avatar Hueser, Christian (FWCC) - 138593
Browse files

Introduce type hinting into script and rename variables

* Introduce type hinting so that variables can be renamed and shortened.
parent 758e2a8c
Pipeline #105063 passed with stages
in 2 minutes and 24 seconds
......@@ -20,10 +20,14 @@
"""This example script demonstrates filtering survey data."""
from pathlib import Path
from typing import List, Dict
from pandas import DataFrame, Series
from hifis_surveyval.core import util
from hifis_surveyval.data_container import DataContainer
from hifis_surveyval.hifis_surveyval import HIFISSurveyval
from hifis_surveyval.models.question_collection import QuestionCollection
def run(hifis_surveyval: HIFISSurveyval, data: DataContainer):
......@@ -51,11 +55,11 @@ def run(hifis_surveyval: HIFISSurveyval, data: DataContainer):
# the data set in particular if the resulting data is normalized and given
# in relative frequencies.
###
dataframe_years = data.collection_for_id("Q002").as_data_frame()
years_freq_abs = util.dataframe_value_counts(dataframe_years)
years_freq_rel = util.dataframe_value_counts(dataframe_years,
relative_values=True)
years_order_list = [
data_years: DataFrame = data.collection_for_id("Q002").as_data_frame()
years_freq_abs: DataFrame = util.dataframe_value_counts(data_years)
years_freq_rel: DataFrame = \
util.dataframe_value_counts(data_years, relative_values=True)
years_order_list: List[str] = [
"0 years",
"Up to one year",
"1 - 3 years",
......@@ -63,29 +67,32 @@ def run(hifis_surveyval: HIFISSurveyval, data: DataContainer):
"6 - 10 years",
"More than 10 years"
]
years_freq_abs = years_freq_abs.reindex(years_order_list)
years_freq_rel = years_freq_rel.reindex(years_order_list)
years_freq_abs: DataFrame = years_freq_abs.reindex(years_order_list)
years_freq_rel: DataFrame = years_freq_rel.reindex(years_order_list)
print("1) ===== Calculate absolute and relative frequencies =====")
hifis_surveyval.printer.print_dataframe(years_freq_abs)
hifis_surveyval.printer.print_dataframe(years_freq_rel)
###
# Grouping is also quite important to calculate frequencies of answers
# given grouped by answer options of another question. First step is to
# group the answers based on answers participants gave regarding another
# question. The second step is then to calculate the frequencies of
# given answers based on another question's answer options.
# given grouped by answer options of another question. The first step is
# to group the answers based on answers participants gave regarding
# another question. Be aware that it is recommended to drop _NaN_ values
# in the _group-by_ Series before passing it as an argument to the
# respective function. The second step is then to calculate the
# frequencies of given answers based on another question's answer options.
###
series_years = data.question_for_id("Q002/_").as_series()
series_center = data.question_for_id("Q001/_").as_series()
dataframe_years_per_center = \
util.filter_and_group_series(series_years, series_center)
dataframe_frequencies = \
util.dataframe_value_counts(dataframe_years_per_center)
dataframe_frequencies = dataframe_frequencies.reindex(years_order_list)
data_years: Series = data.question_for_id("Q002/_").as_series()
data_centers: Series = data.question_for_id("Q001/_").as_series()
years_per_center: DataFrame = \
util.filter_and_group_series(data_years, data_centers.dropna())
years_per_center_freq_abs: DataFrame = \
util.dataframe_value_counts(years_per_center)
years_per_center_freq_abs = \
years_per_center_freq_abs.reindex(years_order_list)
print("2) == Group answers of a question by another question's answers ==")
hifis_surveyval.printer.print_dataframe(dataframe_years_per_center)
hifis_surveyval.printer.print_dataframe(dataframe_frequencies)
hifis_surveyval.printer.print_dataframe(years_per_center)
hifis_surveyval.printer.print_dataframe(years_per_center_freq_abs)
###
# Another way to calculate frequencies of given answers grouped by another
......@@ -99,23 +106,25 @@ def run(hifis_surveyval: HIFISSurveyval, data: DataContainer):
# translated into numeric data will then not just be counted / summed
# but also grouped by another question's answer options. The resulting
# table is then made up of frequencies of one question's answer options
# grouped by another question's answer options.
# grouped by another question's answer options. Here again, it is
# recommended to drop _NaN_ values from the _grouping_ Series before
# passing it to the respective function.
###
collection_vcs = data.collection_for_id("Q015")
dataframe_vcs = collection_vcs.as_data_frame()
vcs_recode_dict = {"Yes": 1,
"Don’t know it": 0,
"Not relevant": 0,
"Don’t know how": 0,
"Doesn’t fit my needs": 0,
"Not available": 0}
dataframe_vcs = dataframe_vcs.replace(vcs_recode_dict)
series_center = data.question_for_id("Q001/_").as_series()
cross_ref_sum = util.cross_reference_sum(dataframe_vcs,
series_center)
vcs_name_map_dict = {
vcs: QuestionCollection = data.collection_for_id("Q015")
data_vcs: DataFrame = vcs.as_data_frame()
vcs_recode_dict: Dict[str, int] = {"Yes": 1,
"Don’t know it": 0,
"Not relevant": 0,
"Don’t know how": 0,
"Doesn’t fit my needs": 0,
"Not available": 0}
data_vcs_recoded: DataFrame = data_vcs.replace(vcs_recode_dict)
data_centers: Series = data.question_for_id("Q001/_").as_series()
cross_ref_sum: DataFrame = \
util.cross_reference_sum(data_vcs_recoded, data_centers.dropna())
vcs_name_map_dict: Dict[str, str] = {
question.full_id: question.label
for question in collection_vcs.questions
for question in vcs.questions
}
cross_ref_sum = cross_ref_sum.rename(vcs_name_map_dict)
print("3) Calculate frequencies of answers grouped by another question")
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment