I noticed this when testing a real-world test case. STCK values are used as unique keys on z/OS for UOW tokens for CICS/IMS/DB2 etc.

Our products use STCK as keys to queues. In the case of unordered_set/unordered_map this showed up like a sore thumb! I can

circumvnet the problem by using a good 64bit hash function (thanks Thomas Wang) but would like IBM to address this issue.

The implementation (below) is is not a hash function, it's cast which casts a 64bit integer to a 32bit integer which

just takes the low 32 bits. In the case of a STCK value, this results in a sub-optimal value (it's a shocker)! FWIW, I also tested

the boost::hash<unsigned long long> which is just as pathetic. But the boost hash tables use a prime number for the bucket size

which fixes bad hashes. The IBM version uses a linear hash table, which is great - much faster for expands/shrinks - but does not

protect against bad hash functions.

```
// TEMPLATE CLASS hash<unsigned long long> template struct hash<unsigned
```**
long** long> :
**
public** unary_function<unsigned
**
long** long, size_t>
{ size_t operator()(unsigned
**
long**
**
long** _Val)
**
const**
{
**
return** static_cast<size_t>(_Val);
}
};

I've coded a simple hash test which, IIRC, comes from Kerinhan & Pikes book "The art of programming". It's based on markov

chains an prints a histogram of the length of synonym chains.

```
#define __IBMCPP_TR1__ 1 #include <iostream> #include <iomanip> #include <string> #include <cmath> #include <ctime> #include <vector> #include <algorithm> #include <numeric> #include <function> #include <builtins.h> #include <inttypes.h> #include <functional> using namespace std;
```**
double** cputime(
**
void** )
{
**
double** time; time = (
**
double**) clock(); time = time / CLOCKS_PER_SEC;
**
return** time;
}
// thanks to Thomas Wong - adapted from Java struct GoodHash :
**
public** unary_function<uint64_t, size_t>
{ size_t operator()(uint64_t key)
**
const**
{ key = (~key) + (key << 18); key = key ^ (key >> 31); key = (key + (key << 2)) + (key << 4); key = key ^ (key >> 11); key = key + (key << 6); key = key ^ (key >> 22);
**
return** static_cast<uint32_t>( key );
}
}; struct CalcAverage
{ CalcAverage() : numBuckets(0), sum(0)
{
}
**
void** operator() (
**
const** size_t& n )
{
**
if** ( n > 0 )
{ sum++; numBuckets += n;
}
} size_t result()
{
**
if** ( sum > 0 )
{
**
return** numBuckets / sum;
}
**
return** 0;
} size_t numBuckets; size_t sum;
}; bool isPowerOfTwo(
**
int** n )
{
**
return** n && !( n & ( n - 1 ) );
} bool isPrime(
**
const**
**
int** x )
{
**
const**
**
int** max = static_cast<int>( std::sqrt( static_cast<double>( x ) ) ) + 1;
**
for** (
**
int** i = 2; i != max; ++i )
{
**
if** ( x % i == 0 )
**
return**
**
false**;
}
**
return**
**
true**;
} template <typename K, typename H>
**
class** HashTester
{
**
public**: HashTester( size_t size );
**
public**:
**
void** insert(
**
const** K& k );
**
void** printReport();
**
private**: H m_hasher; size_t m_size; size_t m_numInserts; std::vector<uint32_t> m_buckets;
**
double** m_hashTime;
}; template <typename K, typename H> HashTester<K,H>::HashTester( size_t size ) : m_size( size ) , m_numInserts( 0 ) , m_buckets( m_size, 0 )
{
} template <typename K, typename H>
**
void** HashTester<K,H>::insert(
**
const** K& k )
{ m_numInserts++;
**
double** startTime = cputime(); uint32_t hash = m_hasher( k ); m_hashTime += cputime() - startTime; m_buckets[hash % m_size]++;
} template <typename K, typename H>
**
void** HashTester<K,H>::printReport()
{ uint32_t maxBucket = 0;
**
const** size_t freqsize = 16;
// good size to fix on a screen uint32_t freq[freqsize + 1] =
{0
};
// +1 for overflow
// build the frequency array
**
for** (
**
int** i = 0; i < m_size; i++ )
{ maxBucket = max( maxBucket, m_buckets[i] );
**
if** ( m_buckets[i] > freqsize )
{ freq[freqsize] += m_buckets[i];
}
**
else**
**
if** ( m_buckets[i] == 0 )
{ freq[0]++;
}
**
else**
{ freq[m_buckets[i]] += m_buckets[i];
}
} cout.setf(ios::fixed); size_t average = for_each( m_buckets.begin(), m_buckets.end(), CalcAverage() ).result(); string tableType;
**
if** ( isPrime( m_size) ) tableType =
"(Prime)";
**
else**
**
if** ( isPowerOfTwo( m_size ) ) tableType =
"(Power of two)"; cout <<
"Statistics:\n"; cout <<
" Hash table size . . . . . " << m_size <<
" " << tableType <<
"\n"; cout <<
" Total inserts . . . . . . " << m_size <<
"\n"; cout <<
" Maximum bucket size . . . " << maxBucket <<
"\n"; cout <<
" Average bucket size . . . " << average <<
"\n"; cout <<
" Hash time in CPU secs . . " << left << setw(10) << setprecision(6) << m_hashTime <<
"\n"; cout <<
"Histogram:\n"; cout <<
" Size Count %total\n"; cout <<
" ----- ------- ------- *....1....2....3....4....5....6....7....8....9....*\n";
**
float** percent; size_t stars;
**
for** (
**
int** i = 0; i < freqsize; i++ )
{ percent = i > 0 ? (freq[i] * 100.0) / m_numInserts : 0; stars = static_cast<int>( percent * 0.5 ); cout << setw(5) << right << i <<
" " << setw(7) << freq[i] <<
" " << setw(6) << setprecision(2) << percent <<
"%" <<
" " << string( stars,
'*' ) <<
"\n";
} percent = (freq[freqsize] * 100.0) / m_numInserts; stars = static_cast<int>( percent * 0.5 ); cout <<
" >" << setw(2) << freqsize - 1 <<
" " << setw(7) << freq[freqsize] <<
" " << setw(6) << setprecision(2) << percent <<
"%" <<
" " << string( stars,
'*' ) <<
"\n";
}
**
int** main(
**
int** argc,
**
char** *argv[] )
{
// set defaults size_t tableSize = 16384; size_t inserts = tableSize * 8;
**
switch** ( argc )
{
**
case** 2: tableSize = atoi( argv[1] ); inserts = tableSize * 8;
**
break**;
**
case** 3: tableSize = atoi( argv[1] ); inserts = atoi( argv[2] );
} typedef std::vector<uint64_t> KeyArray; KeyArray keys;
**
while** ( inserts-- )
{ unsigned
**
long**
**
long** s; __stck( &s ); keys.push_back( s );
} HashTester<uint64_t, std::tr1::hash<uint64_t> > bad( tableSize );
**
for** ( KeyArray::iterator i = keys.begin(); i < keys.end(); i++ )
{ bad.insert( *i );
} bad.printReport(); HashTester<uint64_t, GoodHash> good( tableSize );
**
for** ( KeyArray::iterator i = keys.begin(); i < keys.end(); i++ )
{ good.insert( *i );
} good.printReport();
}

David Crayford