Parallel Analog Ensemble
|
Typedefs | |
using | Matrix = boost::numeric::ublas::matrix< double, boost::numeric::ublas::column_major, std::vector< double > > |
Functions | |
void | createObsMap (std::unordered_map< std::string, std::size_t > &map, const std::vector< std::size_t > &id, const Parameters &) |
void | toValues (Array4D &, std::size_t, const Array4D &, const Observations &) |
void | toValues (Array4D &, std::size_t, const Array4D &, const Array4D &, const Observations &) |
void | setSearchStations (const Stations &stations, Matrix &table, double distance, bool exclude_closest_location=false) |
std::size_t | findClosest (const Station &station, const Stations &stations) |
std::vector< std::size_t > | findClosest (const Stations &targets, const Stations &pool, Verbose verbose) |
Verbose | itov (int) |
int | vtoi (Verbose) |
std::string | vtos (Verbose) |
void | updateTimeTable (const Times &fcst_times, const std::vector< std::size_t > &fcst_times_index, const Times &fcst_flts, const Times &obs_times, Matrix &table) |
double | sdLinear (const std::vector< double > &values) |
double | sdCircular (const std::vector< double > °s) |
double | sum (const std::vector< double > &values, std::size_t max_nan_allowed=_MAX_SIZE_T) |
double | mean (const std::vector< double > &values, std::size_t max_nan_allowed=_MAX_SIZE_T) |
double | sum (const std::vector< double > &values, const double max_nan_allowed=NAN) |
double | variance (const std::vector< double > &values) |
double | diffCircular (double i, double j) |
double | wind_speed (double u, double v) |
double | wind_dir (double u, double v) |
size_t | levenshtein (const std::string &str1, const std::string &str2, size_t w=0, size_t s=2, size_t a=1, size_t d=3) |
long | toSeconds (const std::string &datetime_str, const std::string &origin_str, bool iso_string) |
void | collapseLeadTimes (Observations &, const Forecasts &) |
void | unwrapTimeSeries (Forecasts &, const Times &, const Times &, const Observations &) |
void | randomizeForecasts (Forecasts &fcsts, double nan_prob, size_t min_valid_count=0) |
void | randomizeObservations (Observations &obs, double nan_prob) |
int | getStartIndex (int total, int num_procs, int rank) |
int | getEndIndex (int total, int num_procs, int rank) |
int | getSubTotal (int grand_total, int num_procs, int rank) |
template<typename T > | |
std::string | format (const std::vector< T > &vec, const std::string &delim=",", std::size_t preview=5) |
template<typename T > | |
std::string | format (const T *ptr, std::size_t len, const std::string &delim=",", std::size_t preview=5) |
template<class T > | |
void | toIndex (std::vector< std::size_t > &index, const T &query, const T &pool) |
template<class T > | |
void | guess_arguments (const std::vector< std::basic_string< T > > &unregistered_keys, const std::vector< std::string > &available_options, std::ostream &os) |
using Functions::Matrix = typedef boost::numeric::ublas::matrix< double, boost::numeric::ublas::column_major, std::vector<double> > |
The Matrix type is from boost uBLAS matrix.
It is a double matrix because it needs to be ablt to hold NAN.
Its internal storage is in column major because it is easier to be converted to an R structure and for file I/O with file formats like NetCDF.
The storage type is std::vector because the default storage type, unbounded_array, does not model sequence operators. Using vectors will make it easier for value initialization. Some basic profiling has shown that the creation will be slightly slower, but the indexing will be slightly faster when std::vector is used.
void Functions::collapseLeadTimes | ( | Observations & | observations, |
const Forecasts & | forecasts | ||
) |
Collapse the time and lead time dimensions of a forecasts and convert them to observations;
Observations | Observations to store results |
Forecasts | Forecasts to be collapsed |
void Functions::createObsMap | ( | std::unordered_map< std::string, std::size_t > & | map, |
const std::vector< std::size_t > & | id, | ||
const Parameters & | |||
) |
Create an unordered map with parameter names as keys and the id as values
obs_map | An unordered map |
obs_id | Vectors with variables ID |
parameters | Parameters |
double Functions::diffCircular | ( | double | i, |
double | j | ||
) |
Computes the difference of two circular numbers
i | A double. |
j | A double. |
Find the index of the closest station.
station | The target station |
stations | The pool of stations to search from @Param verbose Verbose level |
vector< size_t > Functions::findClosest | ( | const Stations & | targets, |
const Stations & | pool, | ||
Verbose | verbose | ||
) |
std::string Functions::format | ( | const std::vector< T > & | vec, |
const std::string & | delim = "," , |
||
std::size_t | preview = 5 |
||
) |
Format a vector as a string for printing.
vec | A vector |
len | Length of the pointed object |
ptr | A pointer |
delim | A string deliminator |
std::string Functions::format | ( | const T * | ptr, |
std::size_t | len, | ||
const std::string & | delim = "," , |
||
std::size_t | preview = 5 |
||
) |
int Functions::getEndIndex | ( | int | total, |
int | num_procs, | ||
int | rank | ||
) |
int Functions::getStartIndex | ( | int | total, |
int | num_procs, | ||
int | rank | ||
) |
Functions to calculate indices to split a consecutive vector. These are designed for MPI tasks. The rank 0 is considered as the master process. So there will be num_procs - 1 chunks to be distributed.
int Functions::getSubTotal | ( | int | grand_total, |
int | num_procs, | ||
int | rank | ||
) |
void Functions::guess_arguments | ( | const std::vector< std::basic_string< T > > & | unregistered_keys, |
const std::vector< std::string > & | available_options, | ||
std::ostream & | os | ||
) |
Guess the unregistered parameters.
unregistered_keys | The unregistered keys returned by boost::program_options::collect_unrecognized. |
available_options | A vector of string for available options. |
os | An output stream. |
Verbose Functions::itov | ( | int | flag | ) |
Convert an integer to Verbose and vice versa
An | integer |
size_t Functions::levenshtein | ( | const std::string & | str1, |
const std::string & | str2, | ||
size_t | w = 0 , |
||
size_t | s = 2 , |
||
size_t | a = 1 , |
||
size_t | d = 3 |
||
) |
Computes the Levenshtein distance of two strings. The function can be used to guess the intended argument of an unknown one. This function is used by the function Functions::guess_arguments.
The implementation is referenced from Github https://github.com/git/git/blob/master/levenshtein.h
Thanks to the pointer of Vlad Lazarenko http://lazarenko.me/smart-getopt/
str1 | A string. |
str2 | A string. |
w | Weight suggested by GitHub. |
s | Weight suggested by GitHub. |
a | Weight suggested by GitHub. |
d | Weight suggested by GitHub. |
double Functions::mean | ( | const std::vector< double > & | values, |
std::size_t | max_nan_allowed = _MAX_SIZE_T |
||
) |
Computes the mean of a vector.
values | A vector of values. |
max_nan_allowed | The number of NAN values allowed in the vector. Set it to NAN to allow any number of NAN values. |
void Functions::randomizeForecasts | ( | Forecasts & | fcsts, |
double | nan_prob, | ||
size_t | min_valid_count = 0 |
||
) |
Initialize forecasts or observations with random values. These functions are used for testing.
fcsts | Forecasts |
nan_prob | The portion of NAN values |
min_valid_count | the minimum number of valid values in times |
obs | Observations |
void Functions::randomizeObservations | ( | Observations & | obs, |
double | nan_prob | ||
) |
double Functions::sdCircular | ( | const std::vector< double > & | degs | ) |
Computes the standard deviation for angles in degree.
values | A vector of values. |
double Functions::sdLinear | ( | const std::vector< double > & | values | ) |
Computes the standard deviation for linear numbers.
values | A vector of values. |
void Functions::setSearchStations | ( | const Stations & | stations, |
Matrix & | table, | ||
double | distance, | ||
bool | exclude_closest_location = false |
||
) |
Set the search stations based on distance and nearest neighbors.
stations | Stations to find neighbors |
table | An index table. Each row shows the indices for neighbor stations of a particular station in Stations. |
distance | Distance threshold. |
exclude_closest_location | Whether to exclude search from the closest station. This station is usually the current station itself. |
double Functions::sum | ( | const std::vector< double > & | values, |
const double | max_nan_allowed = NAN |
||
) |
Computes the sum of a vector.
values | A vector of values. |
max_nan_allowed | The number of NAN values allowed in the vector. Set it to NAN to allow any number of NAN values. |
double Functions::sum | ( | const std::vector< double > & | values, |
std::size_t | max_nan_allowed = _MAX_SIZE_T |
||
) |
void Functions::toIndex | ( | std::vector< std::size_t > & | index, |
const T & | query, | ||
const T & | pool | ||
) |
Calculate the indices for each query object from the pool objects.
index | A vector to store indices. |
query | Objects to query. It can be Parameters, Times, Stations. |
pool | Objects from which indices are generated. It can be Parameters, Times, Stations. |
long Functions::toSeconds | ( | const std::string & | datetime_str, |
const std::string & | origin_str, | ||
bool | iso_string | ||
) |
Convert a date time string to the number of seconds since origin time.
The format should follow the section 'Construct from String' from here https://www.boost.org/doc/libs/1_72_0/doc/html/date_time/posix_time.html
datetime_str | A date time string for start time |
origin_str | A date time string for the original time |
iso_string | Whether the string is in ISO format |
void Functions::toValues | ( | Array4D & | , |
std::size_t | , | ||
const Array4D & | , | ||
const Array4D & | , | ||
const Observations & | |||
) |
void Functions::toValues | ( | Array4D & | , |
std::size_t | , | ||
const Array4D & | , | ||
const Observations & | |||
) |
Converts analogs time index to analogs value using the specified observation ID.
analogs | Analogs for storing values |
obs_id | Observation ID |
analogs_time_index | Analogs time index from AnEn |
analogs_station_index | Analogs station index from AnEn. This is usually the similarity station index because they are the same. |
observations | Observations |
void Functions::unwrapTimeSeries | ( | Forecasts & | forecasts, |
const Times & | times, | ||
const Times & | flts, | ||
const Observations & | observations | ||
) |
Unwrap the times series of observations to reconstruct forecast times and lead times. This is the reverse process of collapseLeadTimes
.
Forecasts | Forecasts to store results |
times | Forecast times to reconstruct |
flts | Forecast lead times to reconstruct |
Observations | Observations to be unwrapped |
void Functions::updateTimeTable | ( | const Times & | fcst_times, |
const std::vector< std::size_t > & | fcst_times_index, | ||
const Times & | fcst_flts, | ||
const Times & | obs_times, | ||
Matrix & | table | ||
) |
Computes a lookup table which maps from forecast time and lead time indices to observation time indices. If the observation time index is not found, the cell value in the table will stay untouched.
double Functions::variance | ( | const std::vector< double > & | values | ) |
Computes the variance of a vector.
values | A vector of values. |
average | The average of input values. |
int Functions::vtoi | ( | Verbose | verbose | ) |
string Functions::vtos | ( | Verbose | verbose | ) |
double Functions::wind_dir | ( | double | u, |
double | v | ||
) |
double Functions::wind_speed | ( | double | u, |
double | v | ||
) |
Calculate wind speed and direction from U and V components.
u | U component value |
v | V component value |