OpenWalnut  1.5.0dev
WProtonData.cpp
1 //---------------------------------------------------------------------------
2 //
3 // Project: OpenWalnut ( http://www.openwalnut.org )
4 //
5 // Copyright 2009 OpenWalnut Community, BSV@Uni-Leipzig and CNCF@MPI-CBS
6 // For more information see http://www.openwalnut.org/copying
7 //
8 // This file is part of OpenWalnut.
9 //
10 // OpenWalnut is free software: you can redistribute it and/or modify
11 // it under the terms of the GNU Lesser General Public License as published by
12 // the Free Software Foundation, either version 3 of the License, or
13 // (at your option) any later version.
14 //
15 // OpenWalnut is distributed in the hope that it will be useful,
16 // but WITHOUT ANY WARRANTY; without even the implied warranty of
17 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 // GNU Lesser General Public License for more details.
19 //
20 // You should have received a copy of the GNU Lesser General Public License
21 // along with OpenWalnut. If not, see <http://www.gnu.org/licenses/>.
22 //
23 //---------------------------------------------------------------------------
24 
25 #include <regex>
26 #include <list>
27 #include <string>
28 #include <vector>
29 #include <boost/lexical_cast.hpp>
30 
31 #include "WProtonData.h"
32 
33 
35 {
36  setCSVHeader( csvHeader );
37  setCSVData( csvData );
38 }
39 
41 {
42  if( csvHeader == nullptr )
43  {
44  throw WException( "Can not set header! No header specified!" );
45  }
46 
47  if( csvHeader->empty() )
48  {
49  throw WException( "No empty header allowed!" );
50  }
51 
52  m_csvHeader = csvHeader;
53 
54  std::vector< std::string > header = m_csvHeader->at( 0 );
55 
56  // Set Name with Index of csv to map
57  m_columnMap.clear();
58  for( size_t i = 0; i < header.size(); i++ )
59  {
60  m_columnMap[header[i]] = i;
61  }
62 }
63 
65 {
66  if( csvData == nullptr )
67  {
68  throw WException( "Can not set data! No data specified!" );
69  }
70 
71  if( csvData->empty() )
72  {
73  throw WException( "Can not set data! No data content found!" );
74  }
75 
76  m_csvData = csvData;
77 
79 }
80 
82 {
83  return m_csvData;
84 }
85 
87 {
88  return m_csvHeader;
89 }
90 
91 void WProtonData::setStateIndex( std::string columnName, int index )
92 {
93  //0 is not accepted as an index because it is the default value if no map has been created. (index + 1)
94  m_ColumnMapSelectedIndex[ columnName ] = index + 1;
95 }
96 
97 bool WProtonData::isColumnAvailable( std::string columnName )
98 {
99  return m_ColumnMapSelectedIndex[ columnName ] > 0;
100 }
101 
102 int WProtonData::getColumnIndexBySelection( std::string selectedName )
103 {
104  //to get the original index value, have to calculate minus 1
105  return m_ColumnMapSelectedIndex[ selectedName ] - 1;
106 }
107 
108 int WProtonData::getColumnIndex( std::string columnName )
109 {
110  if( m_columnMap.find( columnName ) == m_columnMap.end() )
111  {
112  return -1;
113  }
114 
115  return m_columnMap[ columnName ];
116 }
117 
119 {
120  return m_columnTypes;
121 }
122 
124 {
125  m_columnTypes = WDataSetCSV::ContentElemSPtr( new std::vector< std::string >() );
126 
127  auto currentRow = csvData->begin();
128 
129  // determine column types based on first csv data row
130  for( auto cell : *currentRow )
131  {
132  m_columnTypes->push_back( determineColumnTypeByString( cell ) );
133  }
134 
135  assert( m_columnTypes != nullptr );
136  assert( !m_columnTypes->empty() );
137  assert( m_columnTypes->size() == m_csvHeader->at( 0 ).size() );
138 
139  for( size_t idx = 0; idx < m_columnTypes->size(); idx++ )
140  {
141  if( m_columnTypes->at( idx ) == WDataType::getDouble() )
142  {
144  {
145  m_columnTypes->at( idx ) = WDataType::getInt();
146  }
147  }
148  }
149 }
150 
151 std::string WProtonData::determineColumnTypeByString( std::string cellValue )
152 {
153  std::regex regexInt( R"(^[-\+]?[[:d:]]+([eE]\+?0?[1-9])?$)" );
154  std::regex regexDouble( R"(^([+-]?(?:[[:d:]]+\.?|[[:d:]]*\.[[:d:]]+))(?:[Ee][+-]?[[:d:]]+)?$)" );
155 
156  if( std::regex_search( cellValue, regexInt ) )
157  {
158  return WDataType::getInt();
159  }
160  else if( std::regex_search( cellValue, regexDouble ) )
161  {
162  return WDataType::getDouble();
163  }
164  else
165  {
166  return WDataType::getString();
167  }
168 }
169 
171 {
172  double doubleValue;
173  int intValue;
174 
175  for( auto row : *m_csvData )
176  {
177  doubleValue = boost::lexical_cast< double >( row.at( columnNumber ) );
178  intValue = ( int )doubleValue;
179 
180  if( doubleValue - intValue != 0 )
181  {
182  return false;
183  }
184  }
185 
186  return true;
187 }
188 
189 std::vector< std::string > WProtonData::getHeaderFromType( std::list< std::string > typeNames )
190 {
191  std::vector< std::string > header = m_csvHeader->at( 0 );
192  std::vector< std::string > columnTypes = *m_columnTypes;
193  std::vector< std::string > filterHeader;
194 
195  for( size_t i = 0; i < columnTypes.size(); i++ )
196  {
197  for( std::string type : typeNames )
198  {
199  if( type == WDataType::getDefault() )
200  {
201  return header;
202  }
203 
204  if( columnTypes[ i ] == type )
205  {
206  filterHeader.push_back( header[i] );
207  }
208  }
209  }
210 
211  return filterHeader;
212 }
std::shared_ptr< std::vector< std::vector< std::string > > > ContentSPtr
represents a pointer to the Content
Definition: WDataSetCSV.h:52
std::shared_ptr< std::vector< std::string > > ContentElemSPtr
represents a shared pointer to a ContentElem object.
Definition: WDataSetCSV.h:67
static std::string getDefault()
getter
Definition: WDataType.cpp:44
static std::string getInt()
getter
Definition: WDataType.cpp:29
static std::string getString()
getter
Definition: WDataType.cpp:39
static std::string getDouble()
getter
Definition: WDataType.cpp:34
Basic exception handler.
Definition: WException.h:39
WDataSetCSV::ContentElemSPtr m_columnTypes
Stores the information, which data type is stored in associated column.
Definition: WProtonData.h:159
int getColumnIndex(std::string columnName)
getter
WDataSetCSV::ContentElemSPtr getColumnTypes()
Get column types, stored in a string vector.
WDataSetCSV::ContentSPtr m_csvData
Stores data from obtained input file.
Definition: WProtonData.h:144
void detectColumnTypesFromCsvData(WDataSetCSV::ContentSPtr csvData)
Reads csv data and stores column types in m_columnTypes.
std::map< std::string, int > m_ColumnMapSelectedIndex
Stores index of the selected single-selector (ColumnPropertyHandler)
Definition: WProtonData.h:154
int getColumnIndexBySelection(std::string selectedName)
getter
WDataSetCSV::ContentSPtr m_csvHeader
Stores column index of data.
Definition: WProtonData.h:139
std::string determineColumnTypeByString(std::string cellValue)
Determines column type due to cellValue.
WProtonData(WDataSetCSV::ContentSPtr csvHeader, WDataSetCSV::ContentSPtr csvData)
constructor
Definition: WProtonData.cpp:34
void setCSVData(WDataSetCSV::ContentSPtr csvData)
setter
Definition: WProtonData.cpp:64
void setStateIndex(std::string columnName, int index)
setter
Definition: WProtonData.cpp:91
WDataSetCSV::ContentSPtr getCSVHeader()
getter
Definition: WProtonData.cpp:86
void setCSVHeader(WDataSetCSV::ContentSPtr csvHeader)
setter
Definition: WProtonData.cpp:40
bool checkIfDoubleColumnCanBeInteger(int columnNumber)
Checks, if values of a column, containing double values, can be converted to integers.
WDataSetCSV::ContentSPtr getCSVData()
getter
Definition: WProtonData.cpp:81
std::vector< std::string > getHeaderFromType(std::list< std::string > typeNames)
Return a vector of filtered Headers.
std::map< std::string, int > m_columnMap
Stores data as map.
Definition: WProtonData.h:149
bool isColumnAvailable(std::string columnName)
checks whether columns are available
Definition: WProtonData.cpp:97