OpenWalnut  1.5.0dev
WValueSetHistogram.cpp
1 //---------------------------------------------------------------------------
2 //
3 // Project: OpenWalnut ( http://www.openwalnut.org )
4 //
5 // Copyright 2009 OpenWalnut Community, BSV@Uni-Leipzig and CNCF@MPI-CBS
6 // For more information see http://www.openwalnut.org/copying
7 //
8 // This file is part of OpenWalnut.
9 //
10 // OpenWalnut is free software: you can redistribute it and/or modify
11 // it under the terms of the GNU Lesser General Public License as published by
12 // the Free Software Foundation, either version 3 of the License, or
13 // (at your option) any later version.
14 //
15 // OpenWalnut is distributed in the hope that it will be useful,
16 // but WITHOUT ANY WARRANTY; without even the implied warranty of
17 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 // GNU Lesser General Public License for more details.
19 //
20 // You should have received a copy of the GNU Lesser General Public License
21 // along with OpenWalnut. If not, see <http://www.gnu.org/licenses/>.
22 //
23 //---------------------------------------------------------------------------
24 
25 #include <algorithm>
26 #include <cstring> // memset()
27 #include <memory>
28 #include <numeric>
29 #include <string>
30 #include <utility>
31 
32 #include "../../common/WAssert.h"
33 #include "../../common/WLimits.h"
34 #include "../../common/exceptions/WOutOfBounds.h"
35 #include "../WDataHandlerEnums.h"
36 #include "WValueSetHistogram.h"
37 
38 WValueSetHistogram::WValueSetHistogram( std::shared_ptr< WValueSetBase > valueSet, size_t buckets ):
39  WHistogram( valueSet->getMinimumValue(), valueSet->getMaximumValue(), buckets )
40 {
41  buildHistogram( *valueSet );
42 }
43 
44 WValueSetHistogram::WValueSetHistogram( const WValueSetBase& valueSet, size_t buckets ):
45  WHistogram( valueSet.getMinimumValue(), valueSet.getMaximumValue(), buckets )
46 {
47  buildHistogram( valueSet );
48 }
49 
50 WValueSetHistogram::WValueSetHistogram( std::shared_ptr< WValueSetBase > valueSet, double min, double max, size_t buckets ):
51  WHistogram( min, max, buckets )
52 {
53  buildHistogram( *valueSet );
54 }
55 
56 WValueSetHistogram::WValueSetHistogram( const WValueSetBase& valueSet, double min, double max, size_t buckets ):
57  WHistogram( min, max, buckets )
58 {
59  buildHistogram( valueSet );
60 }
61 
63 {
65 
66  // create base histogram
67  WAssert( m_nbBuckets > 1, "WValueSetHistogram::buildHistogram : number of buckets needs to be larger than 1." );
69  m_initialBucketSize = ( m_maximum - m_minimum ) / static_cast< double >( m_nInitialBuckets );
70  WAssert( m_initialBucketSize > 0.0, "WValueSetHistogram::buildHistogram() : m_initialBucketSize to small." );
71 
72  // NOTE: as all the intervals are right-open, we need an additional slot in our array for the last interval [m_maximum,\infinity). For the
73  // calculation of interval sizes, the value must not be incremented
75 
76  // create and initialize array to zero which finally contains the counts
77  size_t* initialBuckets = new size_t[ m_nInitialBuckets ];
78  memset( initialBuckets, 0, m_nInitialBuckets * sizeof( size_t ) );
79  // *initialBuckets = { 0 }; // this should works with C++0x (instead memset), TEST IT!
80 
81  // the array can be shared among several instances of WValueSetHistogram.
82  m_initialBuckets = boost::shared_array< size_t >( initialBuckets );
83 
84  // no mapping applied yet. Initial and mapped are equal
88 
89  // and finally create the histogram
90  for( size_t i = 0; i < valueSet.size(); ++i )
91  {
92  double tmp = valueSet.getScalarDouble( i );
93  insert( tmp );
94  }
95 
96  m_nbTotalElements = valueSet.size();
97 }
98 
99 WValueSetHistogram::WValueSetHistogram( const WValueSetHistogram& histogram, size_t buckets ):
100  WHistogram( histogram ),
101  m_initialBucketSize( histogram.m_initialBucketSize ),
102  m_initialBuckets( histogram.m_initialBuckets ),
103  m_nInitialBuckets( histogram.m_nInitialBuckets ),
104  m_mappedBuckets( histogram.m_mappedBuckets ),
105  m_nMappedBuckets( histogram.m_nMappedBuckets ),
106  m_mappedBucketSize( histogram.m_mappedBucketSize ),
107  m_nbTotalElements( histogram.m_nbTotalElements )
108 {
109  // apply modification of the histogram bucket size?
110  if( ( buckets == 0 ) || ( buckets == m_nMappedBuckets ) )
111  {
112  return;
113  }
114 
115  WAssert( buckets > 1, "WValueSetHistogram::WValueSetHistogram : number of buckets needs to be larger than 1." );
116  WAssert( buckets < m_nInitialBuckets,
117  "WValueSetHistogram::WValueSetHistogram : number of buckets needs to be smaller than the initial bucket count." );
118 
119  // number of elements in the new mapped histogram = division + (round up)
120  m_nMappedBuckets = buckets - 1;
121  m_mappedBucketSize = ( m_maximum - m_minimum ) / static_cast< double >( m_nMappedBuckets );
122 
123  // NOTE: as all the intervals are right-open, we need an additional slot in our array for the last interval [m_maximum,\infinity). For the
124  // calculation of interval sizes, the value must not be incremented
126 
127  size_t ratio = static_cast<size_t>( m_nInitialBuckets / m_nMappedBuckets );
128 
129  m_mappedBuckets.reset();
130 
131  size_t* mappedBuckets = new size_t[ m_nMappedBuckets ];
132  memset( mappedBuckets, 0, m_nMappedBuckets * sizeof( size_t ) );
133  // *mappedBuckets = { 0 }; // works with C++0x
134  m_mappedBuckets = boost::shared_array< size_t >( mappedBuckets );
135 
136  // map it
137  size_t index = 0;
138  for( size_t i = 0; i < m_nInitialBuckets; ++i )
139  {
140  if( ( i % ratio == 0 ) && ( i != 0 ) && ( i != m_nInitialBuckets - 1 ) )
141  {
142  index++;
143  }
144  m_mappedBuckets[ index ] += m_initialBuckets[i];
145  }
146 }
147 
149 {
150  if( this != &other ) // protect against invalid self-assignment
151  {
152  m_minimum = other.m_minimum;
153  m_maximum = other.m_maximum;
158 
159  // copy the initial/mapped buckets reference, no deep copy here!
162  }
163 
164  return *this;
165 }
166 
168 {
169 }
170 
171 boost::shared_array< size_t > WValueSetHistogram::getInitialBuckets() const
172 {
173  return m_initialBuckets;
174 }
175 
177 {
178  return m_nInitialBuckets;
179 }
180 
182 {
183  return m_initialBucketSize;
184 }
185 
186 double WValueSetHistogram::getBucketSize( size_t /* index */ ) const
187 {
188  // ignore index as each bucket has the same width
189  return m_mappedBucketSize;
190 }
191 
192 void WValueSetHistogram::insert( double value )
193 {
194  m_mappedBuckets[ getIndexForValue( value ) ]++;
195 }
196 
197 size_t WValueSetHistogram::operator[]( size_t index ) const
198 {
199  // if no mapping has been applied, mappedBuckets points to the initial bucket
200  return m_mappedBuckets[ index ];
201 }
202 
203 size_t WValueSetHistogram::at( size_t index ) const
204 {
205  WAssert( index < m_nMappedBuckets, "WValueSetHistogram::at() : index out of range." );
206 
207  // if no mapping has been applied, mappedBuckets points to the initial bucket
208  return m_mappedBuckets[ index ];
209 }
210 
212 {
213  return m_nMappedBuckets; // overwrite the WHistogram::size here as we have our own size.
214 }
215 
217 {
218  return m_nbTotalElements;
219 }
220 
221 std::pair< double, double > WValueSetHistogram::getIntervalForIndex( size_t index ) const
222 {
223  double first = m_minimum + m_mappedBucketSize * index;
224  double second = m_minimum + m_mappedBucketSize * ( index + 1 );
225  return std::make_pair( first, second );
226 }
227 
228 size_t WValueSetHistogram::accumulate( size_t startIndex, size_t endIndex ) const
229 {
230  if( startIndex > endIndex )
231  {
232  std::swap( startIndex, endIndex );
233  }
234 
235  // valid index?
236  if( endIndex > size() ) // as endIndex is exclusive, it is allowed to equal size()
237  {
238  throw WOutOfBounds( std::string( "The specified endIndex is out of bounds." ) );
239  }
240 
241  // unfortunately, shared_array can't be used for std::accumulate
242  size_t acc = 0;
243  while( startIndex != endIndex )
244  {
245  acc += m_mappedBuckets[ startIndex++ ];
246  }
247 
248  return acc;
249 }
250 
251 std::ostream& operator<<( std::ostream& out, const WValueSetHistogram& h )
252 {
253  for( size_t i = 0; i < h.size() - 1; ++i )
254  {
255  std::pair< double, double > interval = h.getIntervalForIndex( i );
256  // NOTE: the notation for open intervals is [a,b) or alternatively [a,b[.
257  //out << i << " = [" << interval.first << ", " << interval.second << ") = " << h[ i ] << std::endl;
258  out << interval.first << " " << interval.second << " " << std::min( h[ i ], size_t( 3000 ) ) << std::endl;
259  }
260  // the last interval is handled special
261  //out << h.size() - 1 << " = [" << h.getIntervalForIndex( h.size() - 1 ).first << ", inf) = " << h[ h.size() - 1 ] << std::endl;
262  out << h.getIntervalForIndex( h.size() - 1 ).first << " inf " << std::min( h[ h.size() - 1 ], size_t( 3000 ) ) << std::endl;
263 
264  return out;
265 }
266 
Container which associate values with (uniform width) bins (aka intervals or buckets).
Definition: WHistogram.h:36
double m_minimum
The smallest value.
Definition: WHistogram.h:122
double m_nbBuckets
The number of buckets.
Definition: WHistogram.h:132
double m_maximum
The biggest value.
Definition: WHistogram.h:127
Indicates invalid element access of a container.
Definition: WOutOfBounds.h:37
Abstract base class to all WValueSets.
Definition: WValueSetBase.h:60
virtual size_t size() const =0
virtual double getScalarDouble(size_t i) const =0
Used to find the occurrence frequencies of values in a value set.
virtual size_t accumulate(size_t startIndex, size_t endIndex) const
Sums up the buckets in the specified interval.
double m_mappedBucketSize
Size of one bucket in the mapped histogram.
virtual size_t at(size_t index) const
Get the count of the bucket.
WValueSetHistogram(std::shared_ptr< WValueSetBase > valueSet, size_t buckets=1000)
Constructor.
boost::shared_array< size_t > getInitialBuckets() const
Return the initial buckets.
boost::shared_array< size_t > m_initialBuckets
Pointer to all initial buckets of the histogram.
size_t getNInitialBuckets() const
Return the number of initial buckets.
size_t m_nInitialBuckets
Number of buckets in the initial histogram.
virtual size_t getIndexForValue(double value) const
Returns the right index to the bucket containing the given value.
virtual void insert(double value)
increment the value by one, contains the logic to find the element place in the array.
virtual size_t getTotalElementCount() const
This returns the number of value set entries added to the histogram.
double getInitialBucketSize() const
Return the size of one initial bucket.
virtual double getBucketSize(size_t index=0) const
Return the size of one bucket.
double m_initialBucketSize
Size of one bucket in the initial histogram.
virtual size_t operator[](size_t index) const
Get the count of the bucket.
void buildHistogram(const WValueSetBase &valueSet)
Actually builds the histogram.
size_t m_nMappedBuckets
Tracks the number of a buckets in the mapped histogram.
virtual size_t size() const
Returns the number of buckets in the histogram with the actual mapping.
virtual std::pair< double, double > getIntervalForIndex(size_t index) const
Returns the actual interval associated with the given index.
WValueSetHistogram & operator=(const WValueSetHistogram &other)
Copy assignment.
size_t m_nbTotalElements
The number of elements distributed in the buckets.
~WValueSetHistogram()
Destructor.
boost::shared_array< size_t > m_mappedBuckets
Pointer to all initial buckets of the histogram.