Verified Commit 1a66ed40 authored by Hueser, Christian (FWCC) - 138593's avatar Hueser, Christian (FWCC) - 138593
Browse files

Introduce type hinting into script and rename variables

* Introduce type hinting so that variables can be renamed and shortened.
parent 1e9d629a
Pipeline #105067 passed with stages
in 2 minutes and 41 seconds
......@@ -20,12 +20,14 @@
"""This example script demonstrates plotting survey data."""
from pathlib import Path
from typing import List
from pandas import DataFrame
from pandas import DataFrame, Series
from hifis_surveyval.core import util
from hifis_surveyval.data_container import DataContainer
from hifis_surveyval.hifis_surveyval import HIFISSurveyval
from hifis_surveyval.models.question import Question
def run(hifis_surveyval: HIFISSurveyval, data: DataContainer):
......@@ -50,17 +52,17 @@ def run(hifis_surveyval: HIFISSurveyval, data: DataContainer):
# Plotting bar charts can be done after preparing a DataFrame of data of a
# specific shape, e.g. frequencies of answers given in absolute numbers.
###
question_id = "Q001/_"
question_center = data.question_for_id(question_id)
dataframe_center = DataFrame(question_center.as_series())
center_frequencies = util.dataframe_value_counts(dataframe_center)
center_frequencies = center_frequencies.rename({
question_id: question_center.label
question_id: str = "Q001/_"
centers: Question = data.question_for_id(question_id)
data_centers: DataFrame = DataFrame(centers.as_series())
centers_freq_abs: DataFrame = util.dataframe_value_counts(data_centers)
centers_freq_abs = centers_freq_abs.rename({
question_id: centers.label
}, axis="columns")
print("1) ===== Plot bar chart of frequencies in absolute numbers =====")
hifis_surveyval.printer.print_dataframe(center_frequencies)
hifis_surveyval.printer.print_dataframe(centers_freq_abs)
hifis_surveyval.plotter.plot_bar_chart(
center_frequencies,
centers_freq_abs,
plot_title="Survey Participants per Helmholtz Centre",
x_axis_label="Helmholtz Centre",
y_axis_label="Frequencies",
......@@ -71,20 +73,25 @@ def run(hifis_surveyval: HIFISSurveyval, data: DataContainer):
# of a specific shape, e.g. frequencies of answers given to a question in
# absolute numbers grouped by answer options of a second question.
###
series_years = data.question_for_id("Q002/_").as_series()
series_center = data.question_for_id("Q001/_").as_series()
dataframe_years_per_center = \
util.filter_and_group_series(series_years, series_center.dropna())
dataframe_frequencies = \
util.dataframe_value_counts(dataframe_years_per_center)
years_order_list = ["0 years", "Up to one year", "1 - 3 years",
"3 - 6 years", "6 - 10 years", "More than 10 years"]
dataframe_frequencies = dataframe_frequencies.reindex(years_order_list)
data_years: Series = data.question_for_id("Q002/_").as_series()
data_centers: Series = data.question_for_id("Q001/_").as_series()
years_per_center: DataFrame = \
util.filter_and_group_series(data_years, data_centers.dropna())
years_per_center_freq_abs: DataFrame = \
util.dataframe_value_counts(years_per_center)
years_order_list: List[str] = ["0 years",
"Up to one year",
"1 - 3 years",
"3 - 6 years",
"6 - 10 years",
"More than 10 years"]
years_per_center_freq_abs = \
years_per_center_freq_abs.reindex(years_order_list)
print("2) ==== Plot matrix chart of frequencies in absolute numbers ====")
hifis_surveyval.printer.print_dataframe(dataframe_years_per_center)
hifis_surveyval.printer.print_dataframe(dataframe_frequencies)
hifis_surveyval.printer.print_dataframe(years_per_center)
hifis_surveyval.printer.print_dataframe(years_per_center_freq_abs)
hifis_surveyval.plotter.plot_matrix_chart(
dataframe_frequencies,
years_per_center_freq_abs,
plot_title="Years of Center Affiliation per Helmholtz Centre",
x_axis_label="Helmholtz Centre",
y_axis_label="Years of Center Affiliation",
......@@ -95,17 +102,16 @@ def run(hifis_surveyval: HIFISSurveyval, data: DataContainer):
# specific shape, e.g. numeric data of a question grouped by answer options
# of another question.
###
question_coding_time = data.question_for_id("Q007/_")
question_center = data.question_for_id("Q001/_")
series_coding_time = question_coding_time.as_series()
series_center = question_center.as_series()
dataframe_time_center = \
util.filter_and_group_series(series_coding_time,
series_center.dropna())
times: Question = data.question_for_id("Q007/_")
centers: Question = data.question_for_id("Q001/_")
data_times: Series = times.as_series()
data_centers: Series = centers.as_series()
data_times_per_center: DataFrame = \
util.filter_and_group_series(data_times, data_centers.dropna())
print("3) ===== Plot box chart of statistics of numeric data given =====")
hifis_surveyval.printer.print_dataframe(dataframe_time_center)
hifis_surveyval.printer.print_dataframe(data_times_per_center)
hifis_surveyval.plotter.plot_box_chart(
data_frames=[dataframe_time_center],
data_frames=[data_times_per_center],
plot_title="Box plot of Coding Time Spent per Helmholtz Centre",
x_axis_label="Helmholtz centre",
y_axis_label="Coding Time Spent",
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment