Parallel Analog Ensemble
Typedefs | Functions
Functions Namespace Reference

Typedefs

using Matrix = boost::numeric::ublas::matrix< double, boost::numeric::ublas::column_major, std::vector< double > >
 

Functions

void createObsMap (std::unordered_map< std::string, std::size_t > &map, const std::vector< std::size_t > &id, const Parameters &)
 
void toValues (Array4D &, std::size_t, const Array4D &, const Observations &)
 
void toValues (Array4D &, std::size_t, const Array4D &, const Array4D &, const Observations &)
 
void setSearchStations (const Stations &stations, Matrix &table, double distance, bool exclude_closest_location=false)
 
std::size_t findClosest (const Station &station, const Stations &stations)
 
std::vector< std::size_t > findClosest (const Stations &targets, const Stations &pool, Verbose verbose)
 
Verbose itov (int)
 
int vtoi (Verbose)
 
std::string vtos (Verbose)
 
void updateTimeTable (const Times &fcst_times, const std::vector< std::size_t > &fcst_times_index, const Times &fcst_flts, const Times &obs_times, Matrix &table)
 
double sdLinear (const std::vector< double > &values)
 
double sdCircular (const std::vector< double > &degs)
 
double sum (const std::vector< double > &values, std::size_t max_nan_allowed=_MAX_SIZE_T)
 
double mean (const std::vector< double > &values, std::size_t max_nan_allowed=_MAX_SIZE_T)
 
double sum (const std::vector< double > &values, const double max_nan_allowed=NAN)
 
double variance (const std::vector< double > &values)
 
double diffCircular (double i, double j)
 
double wind_speed (double u, double v)
 
double wind_dir (double u, double v)
 
size_t levenshtein (const std::string &str1, const std::string &str2, size_t w=0, size_t s=2, size_t a=1, size_t d=3)
 
long toSeconds (const std::string &datetime_str, const std::string &origin_str, bool iso_string)
 
void collapseLeadTimes (Observations &, const Forecasts &)
 
void unwrapTimeSeries (Forecasts &, const Times &, const Times &, const Observations &)
 
void randomizeForecasts (Forecasts &fcsts, double nan_prob, size_t min_valid_count=0)
 
void randomizeObservations (Observations &obs, double nan_prob)
 
int getStartIndex (int total, int num_procs, int rank)
 
int getEndIndex (int total, int num_procs, int rank)
 
int getSubTotal (int grand_total, int num_procs, int rank)
 
template<typename T >
std::string format (const std::vector< T > &vec, const std::string &delim=",", std::size_t preview=5)
 
template<typename T >
std::string format (const T *ptr, std::size_t len, const std::string &delim=",", std::size_t preview=5)
 
template<class T >
void toIndex (std::vector< std::size_t > &index, const T &query, const T &pool)
 
template<class T >
void guess_arguments (const std::vector< std::basic_string< T > > &unregistered_keys, const std::vector< std::string > &available_options, std::ostream &os)
 

Typedef Documentation

◆ Matrix

using Functions::Matrix = typedef boost::numeric::ublas::matrix< double, boost::numeric::ublas::column_major, std::vector<double> >

The Matrix type is from boost uBLAS matrix.

It is a double matrix because it needs to be ablt to hold NAN.

Its internal storage is in column major because it is easier to be converted to an R structure and for file I/O with file formats like NetCDF.

The storage type is std::vector because the default storage type, unbounded_array, does not model sequence operators. Using vectors will make it easier for value initialization. Some basic profiling has shown that the creation will be slightly slower, but the indexing will be slightly faster when std::vector is used.

Function Documentation

◆ collapseLeadTimes()

void Functions::collapseLeadTimes ( Observations observations,
const Forecasts forecasts 
)

Collapse the time and lead time dimensions of a forecasts and convert them to observations;

Parameters
ObservationsObservations to store results
ForecastsForecasts to be collapsed

◆ createObsMap()

void Functions::createObsMap ( std::unordered_map< std::string, std::size_t > &  map,
const std::vector< std::size_t > &  id,
const Parameters  
)

Create an unordered map with parameter names as keys and the id as values

Parameters
obs_mapAn unordered map
obs_idVectors with variables ID
parametersParameters

◆ diffCircular()

double Functions::diffCircular ( double  i,
double  j 
)

Computes the difference of two circular numbers

Parameters
iA double.
jA double.
Returns
A double.

◆ findClosest() [1/2]

size_t Functions::findClosest ( const Station station,
const Stations stations 
)

Find the index of the closest station.

Parameters
stationThe target station
stationsThe pool of stations to search from @Param verbose Verbose level
Returns
An index of the closest station from the pool

◆ findClosest() [2/2]

vector< size_t > Functions::findClosest ( const Stations targets,
const Stations pool,
Verbose  verbose 
)

◆ format() [1/2]

template<typename T >
std::string Functions::format ( const std::vector< T > &  vec,
const std::string &  delim = ",",
std::size_t  preview = 5 
)

Format a vector as a string for printing.

Parameters
vecA vector
lenLength of the pointed object
ptrA pointer
delimA string deliminator
Returns
A formatted string

◆ format() [2/2]

template<typename T >
std::string Functions::format ( const T *  ptr,
std::size_t  len,
const std::string &  delim = ",",
std::size_t  preview = 5 
)

◆ getEndIndex()

int Functions::getEndIndex ( int  total,
int  num_procs,
int  rank 
)

◆ getStartIndex()

int Functions::getStartIndex ( int  total,
int  num_procs,
int  rank 
)

Functions to calculate indices to split a consecutive vector. These are designed for MPI tasks. The rank 0 is considered as the master process. So there will be num_procs - 1 chunks to be distributed.

◆ getSubTotal()

int Functions::getSubTotal ( int  grand_total,
int  num_procs,
int  rank 
)

◆ guess_arguments()

template<class T >
void Functions::guess_arguments ( const std::vector< std::basic_string< T > > &  unregistered_keys,
const std::vector< std::string > &  available_options,
std::ostream &  os 
)

Guess the unregistered parameters.

Parameters
unregistered_keysThe unregistered keys returned by boost::program_options::collect_unrecognized.
available_optionsA vector of string for available options.
osAn output stream.

◆ itov()

Verbose Functions::itov ( int  flag)

Convert an integer to Verbose and vice versa

Parameters
Aninteger
Returns
A Verbose

◆ levenshtein()

size_t Functions::levenshtein ( const std::string &  str1,
const std::string &  str2,
size_t  w = 0,
size_t  s = 2,
size_t  a = 1,
size_t  d = 3 
)

Computes the Levenshtein distance of two strings. The function can be used to guess the intended argument of an unknown one. This function is used by the function Functions::guess_arguments.

The implementation is referenced from Github https://github.com/git/git/blob/master/levenshtein.h

Thanks to the pointer of Vlad Lazarenko http://lazarenko.me/smart-getopt/

Parameters
str1A string.
str2A string.
wWeight suggested by GitHub.
sWeight suggested by GitHub.
aWeight suggested by GitHub.
dWeight suggested by GitHub.
Returns
A distance measure

◆ mean()

double Functions::mean ( const std::vector< double > &  values,
std::size_t  max_nan_allowed = _MAX_SIZE_T 
)

Computes the mean of a vector.

Parameters
valuesA vector of values.
max_nan_allowedThe number of NAN values allowed in the vector. Set it to NAN to allow any number of NAN values.

◆ randomizeForecasts()

void Functions::randomizeForecasts ( Forecasts fcsts,
double  nan_prob,
size_t  min_valid_count = 0 
)

Initialize forecasts or observations with random values. These functions are used for testing.

Parameters
fcstsForecasts
nan_probThe portion of NAN values
min_valid_countthe minimum number of valid values in times
obsObservations

◆ randomizeObservations()

void Functions::randomizeObservations ( Observations obs,
double  nan_prob 
)

◆ sdCircular()

double Functions::sdCircular ( const std::vector< double > &  degs)

Computes the standard deviation for angles in degree.

Parameters
valuesA vector of values.

◆ sdLinear()

double Functions::sdLinear ( const std::vector< double > &  values)

Computes the standard deviation for linear numbers.

Parameters
valuesA vector of values.

◆ setSearchStations()

void Functions::setSearchStations ( const Stations stations,
Matrix table,
double  distance,
bool  exclude_closest_location = false 
)

Set the search stations based on distance and nearest neighbors.

Parameters
stationsStations to find neighbors
tableAn index table. Each row shows the indices for neighbor stations of a particular station in Stations.
distanceDistance threshold.
exclude_closest_locationWhether to exclude search from the closest station. This station is usually the current station itself.

◆ sum() [1/2]

double Functions::sum ( const std::vector< double > &  values,
const double  max_nan_allowed = NAN 
)

Computes the sum of a vector.

Parameters
valuesA vector of values.
max_nan_allowedThe number of NAN values allowed in the vector. Set it to NAN to allow any number of NAN values.

◆ sum() [2/2]

double Functions::sum ( const std::vector< double > &  values,
std::size_t  max_nan_allowed = _MAX_SIZE_T 
)

◆ toIndex()

template<class T >
void Functions::toIndex ( std::vector< std::size_t > &  index,
const T &  query,
const T &  pool 
)

Calculate the indices for each query object from the pool objects.

Parameters
indexA vector to store indices.
queryObjects to query. It can be Parameters, Times, Stations.
poolObjects from which indices are generated. It can be Parameters, Times, Stations.

◆ toSeconds()

long Functions::toSeconds ( const std::string &  datetime_str,
const std::string &  origin_str,
bool  iso_string 
)

Convert a date time string to the number of seconds since origin time.

The format should follow the section 'Construct from String' from here https://www.boost.org/doc/libs/1_72_0/doc/html/date_time/posix_time.html

Parameters
datetime_strA date time string for start time
origin_strA date time string for the original time
iso_stringWhether the string is in ISO format
Returns
The number of seconds since the origin;

◆ toValues() [1/2]

void Functions::toValues ( Array4D ,
std::size_t  ,
const Array4D ,
const Array4D ,
const Observations  
)

◆ toValues() [2/2]

void Functions::toValues ( Array4D ,
std::size_t  ,
const Array4D ,
const Observations  
)

Converts analogs time index to analogs value using the specified observation ID.

Parameters
analogsAnalogs for storing values
obs_idObservation ID
analogs_time_indexAnalogs time index from AnEn
analogs_station_indexAnalogs station index from AnEn. This is usually the similarity station index because they are the same.
observationsObservations

◆ unwrapTimeSeries()

void Functions::unwrapTimeSeries ( Forecasts forecasts,
const Times times,
const Times flts,
const Observations observations 
)

Unwrap the times series of observations to reconstruct forecast times and lead times. This is the reverse process of collapseLeadTimes.

Parameters
ForecastsForecasts to store results
timesForecast times to reconstruct
fltsForecast lead times to reconstruct
ObservationsObservations to be unwrapped

◆ updateTimeTable()

void Functions::updateTimeTable ( const Times fcst_times,
const std::vector< std::size_t > &  fcst_times_index,
const Times fcst_flts,
const Times obs_times,
Matrix table 
)

Computes a lookup table which maps from forecast time and lead time indices to observation time indices. If the observation time index is not found, the cell value in the table will stay untouched.

Parameters
fcst_timesForecast Times.
fcst_times_indexThe indices to compute mapping.
fcst_fltsForecast FLTs.
obs_timesObservation Times.
tableA matrix storing the indices with forecast times in rows and forecast lead times in columns.

◆ variance()

double Functions::variance ( const std::vector< double > &  values)

Computes the variance of a vector.

Parameters
valuesA vector of values.
averageThe average of input values.

◆ vtoi()

int Functions::vtoi ( Verbose  verbose)

◆ vtos()

string Functions::vtos ( Verbose  verbose)

◆ wind_dir()

double Functions::wind_dir ( double  u,
double  v 
)

◆ wind_speed()

double Functions::wind_speed ( double  u,
double  v 
)

Calculate wind speed and direction from U and V components.

Parameters
uU component value
vV component value
Returns
wind speed or direction value