Commit 2c33e349 authored by Timm Schoening's avatar Timm Schoening
Browse files

removed notebooks and moved them to separate repo

parent 4b530b2b
data:
base_paths: [/volumes/project/]
base_paths_remote: [/volumes/project/]
use_gear_folders: false
equipment:
CAM:
- {eqid: ADD CAM EQUIPMENT HERE}
PFM:
- {eqid: ADD PFM EQUIPMENT HERE}
images:
artist: Holothurian Impact team
copyright: '(c) International Ocean Research. Contact: press@foobar.de'
credit: Holothurian Impact & Dr. Jane Doe
description: 'Acquired by camera ___DEPLOYMENT:CAMERAID___ mounted on platform ___DEPLOYMENT:PLATFORM___
during cruise ___CRUISE:NUMBER___ (station: ___DEPLOYMENT:STATION___). Navigation
data were automatically edited by the MarIQT software (removal of outliers, smoothed
and splined to fill time gaps) and linked to the image data by timestamp.'
editor: John Doe
license: CC-BY
pfdo: {acquisition: photo, deployment: survey, illumination: artificial light, image-quality: raw,
navigation: beacon, resolution: mm, scale-reference: laser marker, spectral-resolution: rgb,
zone: seafloor}
navigation_data:
processing_parameters:
DEFAULT:
- {name: source, value: DSHIP}
- {name: beacon_id, value: 2}
- {name: max_vertical_speed, unit: m/s, value: 3.0}
- {name: max_lateral_speed, unit: m/s, value: 2.0}
- {name: max_time_gap, unit: s, value: 300}
- {name: smoothing_gauss_half_width, unit: s, value: 60}
- {name: outlier_check_min_neighbors, unit: number, value: 5}
- {name: max_allowed_outlier_lateral_dist, unit: m, value: 10}
- {name: max_allowed_outlier_vertical_dist, unit: m, value: 10}
- {name: outlier_check_time_window_size, unit: s, value: 60}
MUC:
- {name: processing_type, value: station}
- {name: beacon_id, value: 1}
ROV:
- {name: processing_type, value: transect}
- {name: beacon_id, value: 4}
sources:
DSHIP:
dship_all_device_operations_file: /Users/tschoening/dev/repos/mariqt-test/files/PRJ23_all-device-operations.dat
dship_all_underwater_navigation_file: /Users/tschoening/dev/repos/mariqt-test/files/PRJ23_all-underwater-navigation.dat
data_frequency_seconds: 5
date_format: '%Y/%m/%d %H:%M:%S'
dship_event_navigation_folder: /Users/tschoening/dev/repos/mariqt-test/files/dship_zips/
dship_user_mail: jdoe1@foobar.de
dship_user_name: JohnDoe
max_depth: 6000
satellite_navigation: {sensor_equipment_id: ADD_EQUIPMENT_ID_HERE}
underwater_navigation: {sensor_equipment_id: ADD_EQUIPMENT_ID_HERE}
FIXED: {latitude: 0.0, longitude: 0.0}
project:
acronym: Holothurian Impact
copyright: '(c) International Ocean Research. Contact: press@foobar.de'
data-pi: {affiliation: International Ocean Research, email: jdoe1@foobar.de, name: John
Doe, orcid: 9876-5432-1000-0000}
end: '2020-05-27 08:00:00'
funding: Funding for this project was provided by the International Funding Agency
(1234ABCD987)
info: {de: Ein deutscher Text mit ca. 1000 Zeichen der das Projekt beschreibt.,
en: 'An english text of ca. 1000 characters length, describing the project.'}
license: CC-BY
number: PRJ23
pi: {affiliation: International Ocean Research, email: jdoe@foobar.de, name: Dr.
Jane Doe, orcid: 0000-0001-2345-6789}
start: '2019-02-15 06:00:00'
title: Assessing the impacts of holothurian harvesting.
{
"cells": [
{
"cell_type": "markdown",
"id": "demonstrated-experiment",
"metadata": {},
"source": [
"# Curation Overview\n",
"This notebook provides an overview of the curation process in your data folders. It stores results in the `../files/<project>_curation-cache.yaml` cache files so that subsequent runs of this notebook will be faster. You can clear the cache and rescan everything by setting the `rescan` variable in the next cell to `True` and then running the notebook."
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "designed-sandwich",
"metadata": {},
"outputs": [],
"source": [
"rescan = False"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "sustained-fluid",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Issue: Base path /volumes/project/ not found\n"
]
},
{
"ename": "NameError",
"evalue": "Could not find: /Users/tschoening/dev/repos/mariqt-test/files/PRJ23_all-device-operations.dat",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m<ipython-input-2-3a63bfd7ab9c>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 25\u001b[0m \u001b[0;31m# Check curation paths\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 26\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0;34m\"DSHIP\"\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mcfg\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'navigation_data'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'sources'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 27\u001b[0;31m \u001b[0mmiqtc\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0massertExists\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmiqtpf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcfgValue\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcfg\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m'navigation_data:sources:DSHIP:dship_all_device_operations_file'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 28\u001b[0m \u001b[0mmiqtc\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0massertExists\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmiqtpf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcfgValue\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcfg\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m'navigation_data:sources:DSHIP:dship_all_underwater_navigation_file'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 29\u001b[0m \u001b[0mmiqtc\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0massertExists\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmiqtpf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcfgValue\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcfg\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m'navigation_data:sources:DSHIP:dship_event_navigation_data_folder'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/mariqt/core.py\u001b[0m in \u001b[0;36massertExists\u001b[0;34m(path)\u001b[0m\n\u001b[1;32m 7\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0massertExists\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpath\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 8\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mos\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpath\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mexists\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpath\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 9\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mNameError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"Could not find: \"\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0mpath\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 10\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 11\u001b[0m \u001b[0;31m### Asserts that a path string to a directory ends with a slash\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;31mNameError\u001b[0m: Could not find: /Users/tschoening/dev/repos/mariqt-test/files/PRJ23_all-device-operations.dat"
]
}
],
"source": [
"#################################################################################################################\n",
"### You should not see - and not modify (!) - this cell, unless you are sure what you are doing! Just run it. ###\n",
"#################################################################################################################\n",
"import mariqt.processing.files as miqtpf\n",
"cfg = miqtpf.cfgFileLoadProjectDefault()\n",
"\n",
"import os\n",
"import mariqt.core as miqtc\n",
"\n",
"# Check base_paths where data resides\n",
"all_good = True\n",
"for bp in cfg['data']['base_paths']:\n",
" if not os.path.exists(bp):\n",
" all_good = False\n",
" print(\"Issue: Base path\",bp,\"not found\")\n",
" elif not os.path.isdir(bp):\n",
" all_good = False\n",
" print(\"Issue: Base path\",bp,\"points to a file but we require a directory.\")\n",
" elif len(os.listdir(bp)) == 0:\n",
" all_good = False\n",
" print(\"Issue: No data available in base path\",bp)\n",
"if all_good:\n",
" print(\"It looks like your base path settings are good. There is data. Lets continue to start curating.\")\n",
"\n",
"# Check curation paths\n",
"if \"DSHIP\" in cfg['navigation_data']['sources']:\n",
" miqtc.assertExists(miqtpf.cfgValue(cfg,'navigation_data:sources:DSHIP:dship_all_device_operations_file'))\n",
" miqtc.assertExists(miqtpf.cfgValue(cfg,'navigation_data:sources:DSHIP:dship_all_underwater_navigation_file'))\n",
" miqtc.assertExists(miqtpf.cfgValue(cfg,'navigation_data:sources:DSHIP:dship_event_navigation_data_folder'))\n",
"print(\"All is good.\")"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "classified-welcome",
"metadata": {},
"outputs": [
{
"ename": "ModuleNotFoundError",
"evalue": "No module named 'marqit'",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m<ipython-input-8-39fd363a0933>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 10\u001b[0m \u001b[0;31m# events[device_operation] = {'code':<device acronym>,'actions':[{'action':<action>,'lat':<latitude>,'lon':<longitude>,'dep':<depth>,'utc':<timestamp>,...],'start':<start timestamp>}\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 11\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0;34m\"DSHIP\"\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mcfg\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'navigation_data'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'sources'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 12\u001b[0;31m \u001b[0;32mimport\u001b[0m \u001b[0mmariqt\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msources\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdship\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mmiqtsd\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 13\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 14\u001b[0m \u001b[0mevents\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mmiqtsd\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mparseDSHIPDeviceOperationsOrEventsFile\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmiqtpf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcfgValue\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcfg\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m'navigation_data:sources:DSHIP:dship_all_device_operations_file'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/mariqt/sources/dship.py\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 6\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 7\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mmariqt\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mgeo\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mmiqtg\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 8\u001b[0;31m \u001b[0;32mimport\u001b[0m \u001b[0mmarqit\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msource\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdship_settings\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 9\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 10\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0maddEndToDSHIPEventsByLastActionBeforeNextEvent\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdship_events\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'marqit'"
]
}
],
"source": [
"#################################################################################################################\n",
"### You should not see - and not modify (!) - this cell, unless you are sure what you are doing! Just run it. ###\n",
"#################################################################################################################\n",
"\n",
"import copy\n",
"import yaml\n",
"import datetime\n",
"\n",
"# Get list of events, expected to be formatted like so:\n",
"# events[device_operation] = {'code':<device acronym>,'actions':[{'action':<action>,'lat':<latitude>,'lon':<longitude>,'dep':<depth>,'utc':<timestamp>,...],'start':<start timestamp>}\n",
"if \"DSHIP\" in cfg['navigation_data']['sources']:\n",
" import mariqt.sources.dship as miqtsd\n",
" \n",
" events = miqtsd.parseDSHIPDeviceOperationsOrEventsFile(miqtpf.cfgValue(cfg,'navigation_data:sources:DSHIP:dship_all_device_operations_file'))\n",
" miqtsd.removeEventsByOtherCruises(events,cfg['project']['number'])\n",
" miqtsd.renameEvents(events)\n",
"else:\n",
" raise Exception(\"Can only process DSHIP events by now. Sorry.\")\n",
"\n",
"\n",
"# These are the status information fields that will be collected for each event in the following\n",
"one_event_status = {\"changed\":False,\"dir_exists\":False,\"event_exists\":False,\"doi\":\"\",\"num_actions\":0,\"num_sensors\":0,\"has_gps_nav_raw\":False,\"has_gps_nav_cur\":False,\"has_usbl_nav_raw\":False,\"has_usbl_nav_cur\":False,\"has_protocol\":False,\n",
" \"raw_data_vol\":0,\"raw_data_num\":0,\"cur_data_vol\":0,\"cur_data_num\":0,\"prt_data_vol\":0,\"prt_data_num\":0,\"prd_data_vol\":0,\"prd_data_num\":0,\"ext_data_vol\":0,\"ext_data_num\":0,\n",
" \"has_images\":False}\n",
"\n",
"# How to map the path names of the folder convention to the short names used here in the script\n",
"path_to_key = {\"external\":\"ext\",\"raw\":\"raw\",\"protocol\":\"prt\",\"products\":\"prd\",\"processed\":\"cur\"}\n",
"\n",
"# Check whether a cache file exists and shall be loaded\n",
"if os.path.exists(\"../files/\" + cfg['project']['number']+\"_curation-cache.yaml\") and not rescan:\n",
" with open(\"../files/\" + cfg['project']['number']+\"_curation-cache.yaml\",\"r\") as yaml_file:\n",
" cache = yaml.safe_load(yaml_file)\n",
" all_event_status = cache['events']\n",
" print(\"Showing cached status from \",cache['date_created'])\n",
" cache_unix = datetime.datetime.strptime(cache['date_created']+\"+0000\",\"%Y-%m-%d %H:%M:%S.%f%z\").timestamp()\n",
"else:\n",
" rescan = True\n",
" \n",
"# Find all events\n",
"all_event_status = {}\n",
"for event in events:\n",
" if event not in all_event_status:\n",
" all_event_status[event] = copy.deepcopy(one_event_status)\n",
" all_event_status[event]['event_exists'] = True\n",
" all_event_status[event]['num_actions'] = len(events[event]['actions'])\n",
"\n",
" \n",
"# Browse all the data base_paths folders and look for event subfolders\n",
"event_folders = {}\n",
"for path in cfg['data']['base_paths']:\n",
" tmp_events_folders = os.listdir(path)\n",
" for tmp_event in tmp_event_folders:\n",
" if not tmp_event.startswith('.') and os.path.isdir(path+tmp_event):\n",
" if not tmp_event in event_folders:\n",
" event_folders[tmp_event] = [path]\n",
" else:\n",
" events_folders[tmp_event].append(path)\n",
" \n",
" if os.path.getmtime(path+tmp_event) > cache_unix:\n",
" all_event_status[tmp_event]['changed'] = True\n",
" \n",
" # Did we find events that are not known in the event files we opened earlier?\n",
" if tmp_event not in all_event_status:\n",
" all_event_status[tmp_event] = copy.deepcopy(one_event_status)\n",
" all_event_status[tmp_event]['changed'] = True\n",
" else:\n",
" all_event_status[event]['dir_exists'] = True \n",
" \n",
" \n",
"if rescan:\n",
" \n",
" import mariqt.definitions as miqtd\n",
" \n",
" satellite_navigation_sensor = miqtpf.cfgValue(cfg,['navigation:sources:DSHIP:satellite_navigation:sensor_equipment_id'])\n",
" underwater_navigation_sensor = miqtpf.cfgValue(cfg,['navigation:sources:DSHIP:underwater_navigation:sensor_equipment_id'])\n",
" \n",
" for event in event_folders:\n",
" \n",
" # Find sensors for event\n",
" event_sensors = []\n",
" for base_folder in event_folders[event]:\n",
" tmp_sensors = os.listdir(base_folder+event)\n",
" for tmp_sensor in tmp_sensors:\n",
" if tmp_sensor[0] != \".\" and tmp_sensor not in event_sensors:\n",
" if tmp_sensor == \"protocol\":\n",
" all_event_status[event]['has_protocol'] += (len([f for f in os.listdir(base_folder+event+\"/protocol\") if not f.startswith('.')]) > 0)\n",
" else:\n",
" event_sensors.append(tmp_sensor) \n",
" all_event_status[event]['num_sensors'] += len(event_sensors)\n",
"\n",
" # Iterate through all sensors and fetch file information\n",
" for sensor in event_sensors:\n",
" \n",
" data_volume = {}\n",
" for sub in path_to_key:\n",
" data_volume[path_to_key[sub]+\"_data_num\"] = 0\n",
" data_volume[path_to_key[sub]+\"_data_vol\"] = 0\n",
" \n",
" for base_folder in event_folders[event]:\n",
" for sub in path_to_key:\n",
" tmp = miqtpf.recursiveFileStat(base_folder+event+\"/\"+sensor+\"/\"+sub+\"/\")\n",
" data_volume[path_to_key[sub]+\"_data_num\"] += tmp['num']\n",
" data_volume[path_to_key[sub]+\"_data_vol\"] += tmp['size']\n",
" \n",
" all_event_status[event][path_to_key[sub]+\"_data_num\"] += tmp['num']\n",
" all_event_status[event][path_to_key[sub]+\"_data_vol\"] += tmp['size']\n",
"\n",
" tmp = miqtpf.recursiveFileStat(base_folder+event+\"/\"+sensor+\"/raw/\",miqtd.image_types)\n",
" if tmp['num'] > 0:\n",
" all_event_status[event]['has_images'] = True\n",
"\n",
" if sensor == satellite_navigation_sensor:\n",
" all_event_status[event]['has_gps_nav_raw'] = data_volume[\"raw_data_num\"] > 0 or all_event_status[event]['has_gps_nav_raw']\n",
" all_event_status[event]['has_gps_nav_cur'] = data_volume[\"cur_data_num\"] > 0 or all_event_status[event]['has_gps_nav_cur']\n",
" elif sensor == underwater_navigation_sensor:\n",
" all_event_status[event]['has_usbl_nav_raw'] = data_volume[\"raw_data_num\"] > 0 or all_event_status[event]['has_usbl_nav_raw']\n",
" all_event_status[event]['has_usbl_nav_cur'] = data_volume[\"cur_data_num\"] > 0 or all_event_status[event]['has_usbl_nav_cur']\n",
" \n",
" with open(\"../files/\" + cfg['project']['number']+\"_curation-cache.yaml\",\"w\") as yaml_file:\n",
" yaml.dump({'date_created':datetime.datetime.now(),'events':all_event_status[event]},yaml_file)\n",
" "
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "polar-circus",
"metadata": {},
"outputs": [],
"source": [
"#################################################################################################################\n",
"### You should not see - and not modify (!) - this cell, unless you are sure what you are doing! Just run it. ###\n",
"#################################################################################################################\n",
"\n",
"import pandas as pd\n",
"pd.set_option('display.max_rows', None)\n",
"\n",
"def color_false_red(val):\n",
" color = 'red' if val == False or val == \"\" or val == \"0\" else 'black'\n",
" return 'color: %s' % color\n",
"\n",
"print_copy = copy.deepcopy(all_event_status)\n",
"for event in print_copy:\n",
" for key in path_to_key:\n",
" print_copy[event][path_to_key[key]+\"_data_vol\"] = gmrcc.helper.humanReadable(print_copy[event][path_to_key[key]+\"_data_vol\"])\n",
" \n",
"df = pd.DataFrame(print_copy).T\n",
"df.style.applymap(color_false_red)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.8"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "uniform-married",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Basepath: /Volumes/ Path: {<dp.PRJ: 0>: 'SO268', <dp.GEAR: 1>: '', <dp.EVENT: 2>: 'SO268-1_21-1_OFOS', <dp.SENSOR: 3>: 'SO_CAM-1_Photo_OFOS', <dp.TYPE: 4>: 'raw'}\n",
"/Volumes/SO268/SO268-1_21-1_OFOS/SO_CAM-1_Photo_OFOS/raw/\n",
"True\n",
"/Volumes/SO268/SO268-1_21-1_OFOS/foobar/\n",
"\n",
"False\n"
]
}
],
"source": [
"import mariqt.paths as miqtp\n",
"p = miqtp.Path(\"/Volumes/\",\"/SO268/SO268-1_21-1_OFOS/SO_CAM-1_Photo_OFOS/raw/\")\n",
"p.dump()\n",
"print(p.str())\n",
"print(p.validDataPath())\n",
"p1 = p.replaceCreatePath('SENSOR','foobar')\n",
"print(p1.str())\n",
"print(p1.type())\n",
"print(p1.validDataPath())"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "protected-editor",
"metadata": {},
"outputs": [],
"source": [
"import mariqt.provenance as miqto\n",
"\n",
"name = \"SO268-1_21-1_OFOS_SO_CAM-1\"\n",
"executable = \"pounding\"\n",
"version = \"1.1\"\n",
"path = p.replace(\"TYPE\",\"intermediate\")\n",
"params = {}\n",
"\n",
"createProvenanceFile(path,name,executable,version,params)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "looking-tooth",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.8"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
This diff is collapsed.
{
"cells": [
{
"cell_type": "markdown",
"id": "compound-landing",
"metadata": {},
"source": [
"# Curation Overview\n",
"This notebook provides an overview of the curation process in your data folders. It stores results in the `../files/<project>_curation-cache.yaml` cache files so that subsequent runs of this notebook will be faster. You can clear the cache and rescan everything by setting the `rescan` variable in the next cell to `True` and then running the notebook."
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "everyday-insured",
"metadata": {},
"outputs": [],
"source": [
"rescan = False"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "foreign-reserve",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Issue: Base path /volumes/project/ not found\n"
]
},
{
"ename": "NameError",
"evalue": "Could not find: /Users/tschoening/dev/repos/mariqt-test/files/PRJ23_all-device-operations.dat",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m<ipython-input-2-3a63bfd7ab9c>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 25\u001b[0m \u001b[0;31m# Check curation paths\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 26\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0;34m\"DSHIP\"\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mcfg\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'navigation_data'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'sources'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 27\u001b[0;31m \u001b[0mmiqtc\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0massertExists\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmiqtpf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcfgValue\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcfg\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m'navigation_data:sources:DSHIP:dship_all_device_operations_file'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 28\u001b[0m \u001b[0mmiqtc\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0massertExists\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmiqtpf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcfgValue\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcfg\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m'navigation_data:sources:DSHIP:dship_all_underwater_navigation_file'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 29\u001b[0m \u001b[0mmiqtc\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0massertExists\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmiqtpf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcfgValue\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcfg\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m'navigation_data:sources:DSHIP:dship_event_navigation_data_folder'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/mariqt/core.py\u001b[0m in \u001b[0;36massertExists\u001b[0;34m(path)\u001b[0m\n\u001b[1;32m 7\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0massertExists\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpath\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 8\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mos\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpath\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mexists\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpath\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 9\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mNameError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"Could not find: \"\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0mpath\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 10\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 11\u001b[0m \u001b[0;31m### Asserts that a path string to a directory ends with a slash\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;31mNameError\u001b[0m: Could not find: /Users/tschoening/dev/repos/mariqt-test/files/PRJ23_all-device-operations.dat"
]
}
],
"source": [
"#################################################################################################################\n",
"### You should not see - and not modify (!) - this cell, unless you are sure what you are doing! Just run it. ###\n",
"#################################################################################################################\n",
"import mariqt.processing.files as miqtpf\n",
"cfg = miqtpf.cfgFileLoadProjectDefault()\n",
"\n",
"import os\n",
"import mariqt.core as miqtc\n",
"\n",
"# Check base_paths where data resides\n",
"all_good = True\n",
"for bp in cfg['data']['base_paths']:\n",
" if not os.path.exists(bp):\n",
" all_good = False\n",
" print(\"Issue: Base path\",bp,\"not found\")\n",
" elif not os.path.isdir(bp):\n",
" all_good = False\n",
" print(\"Issue: Base path\",bp,\"points to a file but we require a directory.\")\n",
" elif len(os.listdir(bp)) == 0:\n",
" all_good = False\n",
" print(\"Issue: No data available in base path\",bp)\n",
"if all_good:\n",
" print(\"It looks like your base path settings are good. There is data. Lets continue to start curating.\")\n",
"\n",
"# Check curation paths\n",
"if \"DSHIP\" in cfg['navigation_data']['sources']:\n",
" miqtc.assertExists(miqtpf.cfgValue(cfg,'navigation_data:sources:DSHIP:dship_all_device_operations_file'))\n",
" miqtc.assertExists(miqtpf.cfgValue(cfg,'navigation_data:sources:DSHIP:dship_all_underwater_navigation_file'))\n",
" miqtc.assertExists(miqtpf.cfgValue(cfg,'navigation_data:sources:DSHIP:dship_event_navigation_data_folder'))\n",
"print(\"All is good.\")"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "dangerous-thousand",
"metadata": {},
"outputs": [
{
"ename": "ModuleNotFoundError",
"evalue": "No module named 'marqit'",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m<ipython-input-8-39fd363a0933>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 10\u001b[0m \u001b[0;31m# events[device_operation] = {'code':<device acronym>,'actions':[{'action':<action>,'lat':<latitude>,'lon':<longitude>,'dep':<depth>,'utc':<timestamp>,...],'start':<start timestamp>}\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 11\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0;34m\"DSHIP\"\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mcfg\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'navigation_data'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'sources'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 12\u001b[0;31m \u001b[0;32mimport\u001b[0m \u001b[0mmariqt\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msources\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdship\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mmiqtsd\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 13\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 14\u001b[0m \u001b[0mevents\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mmiqtsd\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mparseDSHIPDeviceOperationsOrEventsFile\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmiqtpf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcfgValue\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcfg\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m'navigation_data:sources:DSHIP:dship_all_device_operations_file'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/mariqt/sources/dship.py\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 6\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 7\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mmariqt\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mgeo\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mmiqtg\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 8\u001b[0;31m \u001b[0;32mimport\u001b[0m \u001b[0mmarqit\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msource\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdship_settings\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 9\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 10\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0maddEndToDSHIPEventsByLastActionBeforeNextEvent\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdship_events\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'marqit'"
]
}
],
"source": [
"#################################################################################################################\n",
"### You should not see - and not modify (!) - this cell, unless you are sure what you are doing! Just run it. ###\n",
"#################################################################################################################\n",
"\n",
"import copy\n",
"import yaml\n",
"import datetime\n",
"\n",
"# Get list of events, expected to be formatted like so:\n",
"# events[device_operation] = {'code':<device acronym>,'actions':[{'action':<action>,'lat':<latitude>,'lon':<longitude>,'dep':<depth>,'utc':<timestamp>,...],'start':<start timestamp>}\n",
"if \"DSHIP\" in cfg['navigation_data']['sources']:\n",
" import mariqt.sources.dship as miqtsd\n",
" \n",
" events = miqtsd.parseDSHIPDeviceOperationsOrEventsFile(miqtpf.cfgValue(cfg,'navigation_data:sources:DSHIP:dship_all_device_operations_file'))\n",
" miqtsd.removeEventsByOtherCruises(events,cfg['project']['number'])\n",
" miqtsd.renameEvents(events)\n",
"else:\n",
" raise Exception(\"Can only process DSHIP events by now. Sorry.\")\n",
"\n",
"\n",
"# These are the status information fields that will be collected for each event in the following\n",
"one_event_status = {\"changed\":False,\"dir_exists\":False,\"event_exists\":False,\"doi\":\"\",\"num_actions\":0,\"num_sensors\":0,\"has_gps_nav_raw\":False,\"has_gps_nav_cur\":False,\"has_usbl_nav_raw\":False,\"has_usbl_nav_cur\":False,\"has_protocol\":False,\n",
" \"raw_data_vol\":0,\"raw_data_num\":0,\"cur_data_vol\":0,\"cur_data_num\":0,\"prt_data_vol\":0,\"prt_data_num\":0,\"prd_data_vol\":0,\"prd_data_num\":0,\"ext_data_vol\":0,\"ext_data_num\":0,\n",
" \"has_images\":False}\n",
"\n",
"# How to map the path names of the folder convention to the short names used here in the script\n",
"path_to_key = {\"external\":\"ext\",\"raw\":\"raw\",\"protocol\":\"prt\",\"products\":\"prd\",\"processed\":\"cur\"}\n",
"\n",
"# Check whether a cache file exists and shall be loaded\n",
"if os.path.exists(\"../files/\" + cfg['project']['number']+\"_curation-cache.yaml\") and not rescan:\n",
" with open(\"../files/\" + cfg['project']['number']+\"_curation-cache.yaml\",\"r\") as yaml_file:\n",
" cache = yaml.safe_load(yaml_file)\n",
" all_event_status = cache['events']\n",
" print(\"Showing cached status from \",cache['date_created'])\n",
" cache_unix = datetime.datetime.strptime(cache['date_created']+\"+0000\",\"%Y-%m-%d %H:%M:%S.%f%z\").timestamp()\n",
"else:\n",
" rescan = True\n",
" \n",
"# Find all events\n",
"all_event_status = {}\n",
"for event in events:\n",
" if event not in all_event_status:\n",
" all_event_status[event] = copy.deepcopy(one_event_status)\n",
" all_event_status[event]['event_exists'] = True\n",
" all_event_status[event]['num_actions'] = len(events[event]['actions'])\n",
"\n",
" \n",
"# Browse all the data base_paths folders and look for event subfolders\n",
"event_folders = {}\n",
"for path in cfg['data']['base_paths']:\n",
" tmp_event_folders = os.listdir(path)\n",
" for tmp_event in tmp_event_folders:\n",
" if not tmp_event.startswith('.') and os.path.isdir(path+tmp_event):\n",
" if not tmp_event in event_folders:\n",
" event_folders[tmp_event] = [path]\n",
" else:\n",
" events_folders[tmp_event].append(path)\n",
" \n",
" if os.path.getmtime(path+tmp_event) > cache_unix:\n",
" all_event_status[tmp_event]['changed'] = True\n",
" \n",
" # Did we find events that are not known in the event files we opened earlier?\n",
" if tmp_event not in all_event_status:\n",