Skip to content

Commit 2bacc61

Browse files
v1.15.2 Spaces in column names. Remove SMap warining.
1 parent c41f7f5 commit 2bacc61

10 files changed

Lines changed: 191 additions & 25 deletions

File tree

doc/cppEDM.pdf

1.82 KB
Binary file not shown.

src/API.cc

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -464,9 +464,6 @@ SMapValues SMap( DataFrame< double > & DF,
464464
msg << "WARNING: SMap() " << DF.NanRows().size()
465465
<< " nan rows detected in columns or target. "
466466
<< "Original number of rows " << DF.NRows() << ".\n";
467-
if ( not parameters.embedded ) {
468-
msg << "Time delay embedding presumption violated.\n";
469-
}
470467
std::cout << msg.str();
471468

472469
if ( parameters.verbose ) {

src/Common.cc

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -36,9 +36,9 @@ std::string ToLower( std::string str ) {
3636
//
3737
// Return: vector of tokens
3838
//----------------------------------------------------------------
39-
std::vector<std::string> SplitString( std::string inString,
40-
std::string delimeters ) {
41-
39+
std::vector<std::string> SplitString( std::string inString,
40+
std::string delimeters,
41+
bool removeWhitespace ) {
4242
size_t pos = 0;
4343
size_t eos = 0;
4444
size_t wordStart = 0;
@@ -78,9 +78,10 @@ std::vector<std::string> SplitString( std::string inString,
7878

7979
word = inString.substr( wordStart, wordEnd - wordStart );
8080

81-
// remove whitespace
82-
word.erase( std::remove_if( word.begin(), word.end(), ::isspace ),
83-
word.end() );
81+
if ( removeWhitespace ) {
82+
word.erase( std::remove_if( word.begin(), word.end(), ::isspace ),
83+
word.end() );
84+
}
8485

8586
splitString.push_back( word );
8687
}

src/Common.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,8 @@ struct MultiviewValues {
7676
std::string ToLower( std::string str );
7777

7878
std::vector<std::string> SplitString( std::string inString,
79-
std::string delimeters );
79+
std::string delimeters,
80+
bool removeWhitespace );
8081

8182
VectorError ComputeError( std::valarray< double > obs,
8283
std::valarray< double > pred );

src/DataFrame.h

Lines changed: 16 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -13,9 +13,10 @@
1313
#include <iterator>
1414
#include <set>
1515

16-
// Common.cc
16+
// Common.cc : default delimeters = "," for .csv
1717
extern std::vector<std::string> SplitString( std::string inString,
18-
std::string delimeters = "," );
18+
std::string delimeters = ",",
19+
bool removeWhitespace = true );
1920

2021
// Type definition for CSV NamedData to pair column names & column data
2122
typedef std::vector<std::pair<std::string, std::vector<double>>> NamedData;
@@ -80,7 +81,7 @@ class DataFrame {
8081

8182
//-----------------------------------------------------------------
8283
// Empty DataFrame of size (rows, columns) with column names in a
83-
// single whitespace delimited string.
84+
// single string.
8485
//-----------------------------------------------------------------
8586
DataFrame( size_t rows, size_t columns, std::string colNames ):
8687
n_rows( rows ), n_columns( columns ), elements( columns * rows ),
@@ -247,7 +248,6 @@ class DataFrame {
247248
std::vector< size_t > col_i_vec;
248249

249250
// Map column names to indices
250-
std::vector< std::string >::iterator si;
251251
for ( auto ci = colNames.begin(); ci != colNames.end(); ++ci ) {
252252
auto si = find( columnNames.begin(), columnNames.end(), *ci );
253253

@@ -442,7 +442,16 @@ class DataFrame {
442442
void BuildColumnNameIndex( std::string colNames ) {
443443
// If colNames provided populate columnNames, columnNameToIndex
444444
if ( colNames.size() ) {
445-
columnNames = SplitString( colNames, " ,\t" );
445+
446+
// If ',' in colNames, ignore whitespace in delimeter
447+
// to allow space in names
448+
if ( colNames.find( ',' ) != colNames.npos ) {
449+
columnNames = SplitString( colNames, ",", false );
450+
}
451+
else {
452+
columnNames = SplitString( colNames, " \t,\n" );
453+
}
454+
446455
if ( columnNames.size() != n_columns ) {
447456
std::stringstream errMsg;
448457
errMsg << "DataFrame::BuildColumnNameIndex(s) "
@@ -693,7 +702,8 @@ class DataFrame {
693702
std::vector< std::string > colNames;
694703

695704
// First line of .csv is REQUIRED header / column names
696-
std::vector<std::string> firstLineWords = SplitString( dataLines[0] );
705+
std::vector<std::string> firstLineWords =
706+
SplitString( dataLines[0], ",", false );
697707

698708
// Get named columns from header line
699709
for (size_t colIdx = 0; colIdx < firstLineWords.size(); colIdx++){

src/Eval.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -555,7 +555,7 @@ DataFrame< double > PredictNonlinear( DataFrame< double > & data,
555555
// Use theta values passed in as parameter string
556556
ThetaValues.clear();
557557

558-
std::vector< std::string > theta_vec = SplitString( theta, " \t,\n" );
558+
std::vector< std::string > theta_vec = SplitString(theta," \t,\n",true);
559559

560560
try {
561561
for ( auto ci = theta_vec.begin(); ci != theta_vec.end(); ++ci ) {

src/Parameter.cc

Lines changed: 25 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -102,7 +102,7 @@ Parameters::Parameters(
102102
validated ( false ),
103103

104104
// Instantiate Version
105-
version( 1, 15, 1, "2023-10-27" )
105+
version( 1, 15, 2, "2023-11-27" )
106106
{
107107
// Constructor code
108108
if ( method != Method::None ) {
@@ -144,8 +144,16 @@ void Parameters::Validate() {
144144
//--------------------------------------------------------------
145145
if ( columns_str.size() ) {
146146

147-
std::vector<std::string> columns_vec = SplitString( columns_str,
148-
" \t,\n" );
147+
std::vector<std::string> columns_vec;
148+
149+
// If ',' in columns_str, do not use whitespace delimeter
150+
// to allow space in names
151+
if ( columns_str.find( ',' ) != columns_str.npos ) {
152+
columns_vec = SplitString( columns_str, ",", false );
153+
}
154+
else {
155+
columns_vec = SplitString( columns_str, " \t,\n", true );
156+
}
149157
columnNames = columns_vec;
150158
}
151159

@@ -161,8 +169,16 @@ void Parameters::Validate() {
161169
// All other use targetName[0].
162170
//--------------------------------------------------------------
163171
if ( target_str.size() ) {
164-
std::vector<std::string> columns_vec = SplitString( target_str,
165-
" \t,\n" );
172+
std::vector<std::string> columns_vec;
173+
174+
// If ',' in target_str, do not use whitespace delimeter
175+
// to allow space in names
176+
if ( target_str.find( ',' ) != target_str.npos ) {
177+
columns_vec = SplitString( target_str, ",\n", false );
178+
}
179+
else {
180+
columns_vec = SplitString( target_str, " \t,\n", true );
181+
}
166182
targetNames = columns_vec;
167183
}
168184

@@ -201,7 +217,8 @@ void Parameters::Validate() {
201217
// if increment < stop generate the library sequence.
202218
// if increment > stop presume list of 3 library sizes.
203219
// 2) Otherwise: "x y ..." : list of library sizes.
204-
std::vector<std::string> libsize_vec = SplitString(libSizes_str," \t,");
220+
std::vector<std::string> libsize_vec = SplitString( libSizes_str,
221+
" \t,", true );
205222

206223
bool libSizeSequence = false;
207224
int start;
@@ -351,7 +368,7 @@ void Parameters::Validate() {
351368
//--------------------------------------------------------------
352369
if ( lib_str.size() ) {
353370
// Parse lib_str into vector of strings
354-
std::vector<std::string> lib_vec = SplitString( lib_str, " \t," );
371+
std::vector<std::string> lib_vec = SplitString( lib_str, " \t,", true );
355372
if ( lib_vec.size() % 2 != 0 ) {
356373
std::string errMsg( "Parameters::Validate(): "
357374
"library must be even number of integers.\n" );
@@ -455,7 +472,7 @@ void Parameters::Validate() {
455472
//--------------------------------------------------------------
456473
if ( pred_str.size() ) {
457474
// Parse pred_str into vector of strings
458-
std::vector<std::string> pred_vec = SplitString( pred_str, " \t," );
475+
std::vector<std::string> pred_vec = SplitString(pred_str, " \t,", true);
459476
if ( pred_vec.size() % 2 != 0 ) {
460477
std::string errMsg( "Parameters::Validate(): "
461478
"prediction must be even number of integers.\n");

tests/SimplexTest.cc

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -190,4 +190,21 @@ int main () {
190190

191191
// Comparison
192192
MakeTest ( "Simplex: disjoint library 3", pyOutput, cppOutput );
193+
194+
//----------------------------------------------------------
195+
// Simplex column names with spaces
196+
// NOTE: ',' in columns or target ignores whitespace
197+
//----------------------------------------------------------
198+
pyOutput = DataFrame < double > ("./data/","columnNameSpace_valid.csv");
199+
200+
// Generate cpp output
201+
S = Simplex ( "./data/", "columnNameSpace.csv",
202+
"./data/", "columnNameSpace_cppEDM.csv",
203+
"1 80", "81 100", 5, 1, 0, -1, 0,
204+
"Var 5 1,", "Var 2,", false, false, false );
205+
206+
cppOutput = S.predictions;
207+
208+
// Comparison
209+
MakeTest ( "Simplex: column name space", pyOutput, cppOutput );
193210
}

tests/data/columnNameSpace.csv

Lines changed: 101 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,101 @@
1+
Time,Var 1,Var 2,Var3,Var 4 A,Var 5 1
2+
10.0000,2.4873,1.0490,3.4093,8.6502,-2.4232
3+
10.0500,3.5108,2.2832,4.0464,7.8964,-2.1931
4+
10.1000,4.1666,3.7791,4.7456,6.8123,-1.8866
5+
10.1500,4.4836,5.4255,5.2821,5.3070,-1.6077
6+
10.2000,4.5246,7.0599,5.3571,3.4137,-1.3198
7+
10.2500,4.3996,8.4636,4.6880,1.4224,-0.9423
8+
10.3000,4.2834,9.4393,3.1935,-0.1486,-0.5056
9+
10.3500,4.3073,9.8903,1.1267,-0.8165,-0.1553
10+
10.4000,4.4727,9.8108,-0.9880,-0.4087,0.0987
11+
10.4500,4.7750,9.2343,-2.6227,0.8390,0.5478
12+
10.5000,5.2694,8.1564,-3.5549,2.3684,1.5622
13+
10.5500,5.8840,6.4730,-3.9150,3.5582,3.2580
14+
10.6000,6.2747,4.0804,-3.9256,3.9698,5.3755
15+
10.6500,5.9798,1.1828,-3.6515,3.4702,7.3548
16+
10.7000,4.8229,-1.5331,-3.0541,2.3313,8.6562
17+
10.7500,3.1194,-3.3321,-2.2364,1.1273,9.1889
18+
10.8000,1.3612,-3.9845,-1.4564,0.3045,9.2723
19+
10.8500,-0.1160,-3.7001,-0.8990,-0.0616,9.2205
20+
10.9000,-1.1470,-2.8043,-0.5533,-0.1029,9.1611
21+
10.9500,-1.6706,-1.6010,-0.2858,0.0596,9.1057
22+
11.0000,-1.7193,-0.3557,0.0287,0.3794,9.0351
23+
11.0500,-1.4171,0.7312,0.4408,0.8467,8.9310
24+
11.1000,-0.9447,1.5593,0.9428,1.4513,8.7813
25+
11.1500,-0.4833,2.1310,1.5163,2.1765,8.5727
26+
11.2000,-0.1712,2.5180,2.1634,3.0042,8.2731
27+
11.2500,-0.0800,2.8143,2.9131,3.9106,7.8129
28+
11.3000,-0.2043,3.0919,3.8121,4.8445,7.0745
29+
11.3500,-0.4558,3.3637,4.9019,5.6909,5.9108
30+
11.4000,-0.6691,3.5753,6.1693,6.2319,4.2228
31+
11.4500,-0.6551,3.6687,7.4890,6.1556,2.1102
32+
11.5000,-0.3438,3.7049,8.6398,5.1820,-0.0129
33+
11.5500,0.0721,3.8562,9.4183,3.2690,-1.5430
34+
11.6000,0.2719,4.1705,9.7019,0.7431,-2.0048
35+
11.6500,0.2453,4.5032,9.4438,-1.7504,-1.2694
36+
11.7000,0.4567,4.8154,8.6786,-3.5679,0.3507
37+
11.7500,1.4043,5.2452,7.4276,-4.4855,2.1613
38+
11.8000,3.1508,5.7590,5.6035,-4.7268,3.4247
39+
11.8500,5.2766,6.0012,3.1470,-4.5951,3.6950
40+
11.9000,7.1091,5.5794,0.3417,-4.1856,2.9408
41+
11.9500,8.1517,4.4745,-2.1384,-3.4571,1.5714
42+
12.0000,8.4443,3.0595,-3.7288,-2.4716,0.2430
43+
12.0500,8.3848,1.6978,-4.3566,-1.4446,-0.5674
44+
12.1000,8.2949,0.5253,-4.2545,-0.6003,-0.7672
45+
12.1500,8.2687,-0.4507,-3.6663,-0.0443,-0.5179
46+
12.2000,8.2672,-1.2258,-2.7571,0.2623,-0.0327
47+
12.2500,8.2292,-1.7673,-1.6480,0.4537,0.5427
48+
12.3000,8.1135,-2.0491,-0.4604,0.6836,1.1576
49+
12.3500,7.8931,-2.0921,0.6805,1.0652,1.8202
50+
12.4000,7.5370,-1.9712,1.6733,1.6522,2.5461
51+
12.4500,6.9967,-1.7855,2.4634,2.4543,3.3242
52+
12.5000,6.2014,-1.6136,3.0387,3.4580,4.0994
53+
12.5500,5.0760,-1.4770,3.4044,4.6271,4.7596
54+
12.6000,3.5989,-1.3272,3.5685,5.8826,5.1285
55+
12.6500,1.8906,-1.0787,3.5654,7.0895,4.9966
56+
12.7000,0.2564,-0.6981,3.4985,8.0907,4.2129
57+
12.7500,-0.9107,-0.2798,3.5126,8.7724,2.7926
58+
12.8000,-1.3123,0.0230,3.6762,9.0833,0.9622
59+
12.8500,-0.8653,0.2023,3.9451,9.0087,-0.8906
60+
12.9000,0.2582,0.4946,4.2866,8.5548,-2.3714
61+
12.9500,1.6621,1.2152,4.7464,7.7137,-3.2800
62+
13.0000,2.8722,2.5121,5.3179,6.4143,-3.6765
63+
13.0500,3.5139,4.2590,5.8040,4.5557,-3.7360
64+
13.1000,3.4142,6.0832,5.8602,2.1825,-3.5519
65+
13.1500,2.6736,7.5249,5.2268,-0.3191,-3.0988
66+
13.2000,1.6784,8.3393,3.9664,-2.3406,-2.3916
67+
13.2500,0.8730,8.6445,2.4020,-3.4743,-1.6045
68+
13.3000,0.4617,8.7053,0.8579,-3.7001,-0.9625
69+
13.3500,0.3872,8.6893,-0.4510,-3.2108,-0.5622
70+
13.4000,0.5144,8.6439,-1.3878,-2.2395,-0.3279
71+
13.4500,0.7685,8.5669,-1.8826,-1.0196,-0.1021
72+
13.5000,1.1444,8.4458,-1.9544,0.2243,0.2407
73+
13.5500,1.6540,8.2653,-1.7130,1.3160,0.7457
74+
13.6000,2.2915,8.0039,-1.3185,2.1680,1.4006
75+
13.6500,3.0306,7.6268,-0.9254,2.7828,2.1824
76+
13.7000,3.8270,7.0748,-0.6388,3.2168,3.0843
77+
13.7500,4.6051,6.2629,-0.4903,3.5348,4.1136
78+
13.8000,5.2323,5.1053,-0.4307,3.7759,5.2679
79+
13.8500,5.5015,3.5885,-0.3466,3.9535,6.4968
80+
13.9000,5.1640,1.8718,-0.1244,4.1004,7.6775
81+
13.9500,4.0419,0.3219,0.2392,4.3123,8.6365
82+
14.0000,2.1766,-0.6144,0.5950,4.6856,9.1986
83+
14.0500,-0.0862,-0.6322,0.8223,5.1964,9.2140
84+
14.1000,-2.1936,0.2438,1.0998,5.7541,8.5816
85+
14.1500,-3.6997,1.5712,1.8471,6.3451,7.2397
86+
14.2000,-4.4966,2.6264,3.2892,6.9081,5.1089
87+
14.2500,-4.6571,2.7715,5.1113,7.1247,2.2047
88+
14.3000,-4.1610,1.8723,6.5860,6.6282,-1.0241
89+
14.3500,-2.9785,0.5213,7.2304,5.4477,-3.7272
90+
14.4000,-1.3859,-0.3750,7.2799,3.9256,-5.3338
91+
14.4500,0.0508,-0.3573,7.2344,2.2696,-5.8842
92+
14.5000,0.8421,0.3782,7.2629,0.4986,-5.6675
93+
14.5500,0.8699,1.3096,7.2362,-1.3184,-4.8707
94+
14.6000,0.4312,2.0095,7.0367,-2.9361,-3.5570
95+
14.6500,0.0453,2.4035,6.6736,-4.0598,-1.8403
96+
14.7000,0.1333,2.6900,6.1907,-4.5597,0.0002
97+
14.7500,0.8256,3.0496,5.5779,-4.5304,1.6105
98+
14.8000,2.0047,3.4827,4.7767,-4.1853,2.7363
99+
14.8500,3.4387,3.8536,3.7343,-3.7203,3.2977
100+
14.9000,4.8791,3.9968,2.4676,-3.2374,3.3521
101+
14.9500,6.1180,3.7996,1.1058,-2.7505,3.0370
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
Time,Observations,Predictions,Pred_Variance
2+
14.0000,-0.6144,nan,nan
3+
14.0500,-0.6322,-2.9153,4.0327
4+
14.1000,0.2438,-1.8449,5.7641
5+
14.1500,1.5712,-0.3591,5.7001
6+
14.2000,2.6264,2.7014,0.3026
7+
14.2500,2.7715,3.3193,0.1227
8+
14.3000,1.8723,3.3924,0.8893
9+
14.3500,0.5213,2.6678,2.8997
10+
14.4000,-0.3750,2.5840,2.9712
11+
14.4500,-0.3573,2.8702,3.1072
12+
14.5000,0.3782,5.2930,3.3165
13+
14.5500,1.3096,7.2163,2.2162
14+
14.6000,2.0095,7.9717,0.8142
15+
14.6500,2.4035,8.0942,0.7050
16+
14.7000,2.6900,8.4241,0.1544
17+
14.7500,3.0496,8.6051,0.0159
18+
14.8000,3.4827,8.4499,1.6581
19+
14.8500,3.8536,4.2120,13.6366
20+
14.9000,3.9968,4.7338,7.0001
21+
14.9500,3.7996,2.7842,10.5012
22+
15,nan,2.1054,7.2550

0 commit comments

Comments
 (0)