OpenWalnut  1.5.0dev
WReaderCSV.cpp
1 //---------------------------------------------------------------------------
2 //
3 // Project: OpenWalnut ( http://www.openwalnut.org )
4 //
5 // Copyright 2009 OpenWalnut Community, BSV@Uni-Leipzig and CNCF@MPI-CBS
6 // For more information see http://www.openwalnut.org/copying
7 //
8 // This file is part of OpenWalnut.
9 //
10 // OpenWalnut is free software: you can redistribute it and/or modify
11 // it under the terms of the GNU Lesser General Public License as published by
12 // the Free Software Foundation, either version 3 of the License, or
13 // (at your option) any later version.
14 //
15 // OpenWalnut is distributed in the hope that it will be useful,
16 // but WITHOUT ANY WARRANTY; without even the implied warranty of
17 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 // GNU Lesser General Public License for more details.
19 //
20 // You should have received a copy of the GNU Lesser General Public License
21 // along with OpenWalnut. If not, see <http://www.gnu.org/licenses/>.
22 //
23 //---------------------------------------------------------------------------
24 
25 #include <memory>
26 #include <string>
27 #include <vector>
28 
29 #include "WReaderCSV.h"
30 
31 
32 WReaderCSV::WReaderCSV( std::string fname )
33  : WReader( fname )
34 {
35 }
36 
38 {
39 }
40 
41 
42 static std::istream& safeGetLine( std::istream& is, std::string& t ) // NOLINT
43 {
44  // reads a line with a non specific line ending
45  t.clear();
46  std::istream::sentry se( is, true );
47  std::streambuf* sb = is.rdbuf();
48 
49  int c;
50  while( ( c = sb->sbumpc() ) != std::streambuf::traits_type::eof() )
51  {
52  switch( c )
53  {
54  case '\n': // Handle Unix linebreaks
55  return is;
56  case '\r': // Handle old mac linebreaks
57  if( sb->sgetc() == '\n' ) // Handle windows linebreaks
58  {
59  sb->sbumpc();
60  }
61  return is;
62  default:
63  t.push_back( c );
64  }
65  }
66 
67  if( t.empty() )
68  {
69  is.setstate( std::ios::eofbit );
70  }
71  return is;
72 }
73 
74 std::shared_ptr< WDataSetCSV > WReaderCSV::read()
75 {
76  size_t columnCount = 0;
77  std::string line;
78  std::vector< std::string > row;
79  WDataSetCSV::SeperatedRowSPtr rawRow = WDataSetCSV::SeperatedRowSPtr( new std::vector< std::string >() );
80 
83 
84  std::fstream file;
85  file.open( m_fname );
86 
87  if( !file.is_open() )
88  {
89  throw WException( "File could not be opened!" );
90  }
91 
92  // treat first line as header
93  safeGetLine( file, line );
94  if( line == "" )
95  {
96  throw WException( "CSV file is empty!" );
97  }
98 
99  rawRow->push_back( line );
100  header->push_back( transformLineToVector( line ) );
101  columnCount = header->at( 0 ).size();
102 
103  // treat remaining lines as data
104  while( safeGetLine( file, line ) )
105  {
106  if( line.empty() && file.eof() ) // Ignore empty line at the end of a file.
107  {
108  break;
109  }
110  rawRow->push_back( line );
111  row = transformLineToVector( line );
112  if( row.size() != columnCount )
113  {
114  throw WException( "Data row count does not equal header count!" );
115  }
116  else
117  {
118  data->push_back( row );
119  }
120  }
121 
122  if( data->size() == 0 )
123  {
124  throw WException( "CSV File does not contain data!" );
125  }
126 
127  file.close();
128 
129  std::shared_ptr< WDataSetCSV > datasetcsv = std::shared_ptr< WDataSetCSV >( new WDataSetCSV( header, data ) );
130  datasetcsv->setRawDataSet( rawRow );
131 
132  return datasetcsv;
133 }
134 
135 std::vector< std::string > WReaderCSV::transformLineToVector( std::string line )
136 {
137  std::string cell;
138  std::vector< std::string > row;
139 
140  size_t pos = 0;
141  while( ( pos = line.find( ',' ) ) != std::string::npos )
142  {
143  cell = line.substr( 0, pos );
144  row.push_back( cell );
145  line.erase( 0, pos + 1 );
146  }
147 
148  row.push_back( line );
149 
150  return row;
151 }
Represents a CSV dataset.
Definition: WDataSetCSV.h:38
std::shared_ptr< std::vector< std::vector< std::string > > > ContentSPtr
represents a pointer to the Content
Definition: WDataSetCSV.h:52
std::shared_ptr< std::vector< std::string > > SeperatedRowSPtr
represents a pointer to a vector of csv-raw-row as string
Definition: WDataSetCSV.h:57
std::vector< std::vector< std::string > > Content
represents a vector containing a vector of strings.
Definition: WDataSetCSV.h:47
Basic exception handler.
Definition: WException.h:39
virtual ~WReaderCSV()
Destroys this instance.
Definition: WReaderCSV.cpp:37
WReaderCSV(std::string fname)
Constructs a CSV reader object.
Definition: WReaderCSV.cpp:32
virtual std::shared_ptr< WDataSetCSV > read()
Read the file and create a dataset as a vector.
Definition: WReaderCSV.cpp:74
std::vector< std::string > transformLineToVector(std::string line)
Transforms a given string into a string vector, by a predefined delimiter.
Definition: WReaderCSV.cpp:135
Read some data from a given file.
Definition: WReader.h:40
std::string m_fname
Absolute path of the file to read from.
Definition: WReader.h:68