/*
 * mapping.cpp -- associative array implementations
 * by pts@fazekas.hu at Fri Mar 15 21:13:47 CET 2002
 */

#ifdef __GNUC__
#ifndef __clang__
#pragma implementation
#endif
#endif

#include "mapping.hpp"
#include "error.hpp"
#include <string.h>
#include <stdlib.h> /* abort() */

/* --- */

bool Mapping::Gen::update(char const*key, slen_t keylen, char const*data) {
  char *found_data=get(key, keylen);
  if (found_data==NULLP) return true;
  memcpy(found_data, data, datalen);
  return false;
}

/* --- */

bool Mapping::DoubleHash::obj_assert() {
  assert(len <= used);
  assert(minlen <= len);
  assert(used <= maxused);
  assert(maxused < alloced);
  assert(alloced < (slen_t)-2);
  /* Imp: re-count used and len */
  return true;
}

bool Mapping::DoubleHash::set     (char const*  key, slen_t  keylen, char const*data) {
  assert(obj_assert());
  slen_t h1=vi_h1(key,keylen);
  assert(h1<alloced);
  Ary *p=ary+h1;
  if (p->keylen==NEVER_USED) { added:
    /* Dat: we shouldn't stop on p->keylen==DELETED */
    /* This will be deleted by `delete [] (pp->keydata-datalen);' called from
     * Mapping::DoubleHash::clear() called from the destructor of
     * Mapping::DoubleHash15, also known as Mapping::H.
     */
    memcpy(p->keydata=new char[keylen+datalen], data, datalen);
    memcpy(p->keydata+=datalen, key, p->keylen=keylen); len++;
    //if (p->keylen==NEVER_USED && used++==maxused) { p->keylen=keylen; rehash(); }
    //                                         else { p->keylen=keylen; }
    if (used++==maxused) rehash();
    assert(obj_assert());
    return true; /* new value added */
  }
  if (p->keylen==keylen && 0==memcmp(p->keydata, key, keylen)) { updated:
    memcpy(p->keydata-datalen, data, datalen);
    return false; /* value updated */
  }
  /* Not found for the 1st try. We have to walk. */
  slen_t h2=vi_h2(key,keylen);
  assert(1<=h2 && h1<alloced);
  while (1) { /* Dat: maxlen < alloced, so this can safely be an infinite loop. */
    if (h1>=h2) { h1-=h2; p-=h2; }
           else { h1+=alloced-h2; p+=alloced-h2; }
    if (p->keylen==NEVER_USED) goto added;
    if (p->keylen==keylen && 0==memcmp(p->keydata, key, keylen)) goto updated;
  }
}
char*Mapping::DoubleHash::get     (char const*  key, slen_t  keylen) {
  assert(obj_assert());
  slen_t h1=vi_h1(key,keylen);
  assert(h1<alloced);
  Ary *p=ary+h1;
  if (p->keylen==NEVER_USED) return (char*)NULLP;
  if (p->keylen==keylen && 0==memcmp(p->keydata, key, keylen)) return p->keydata-datalen;
  /* Not found for the 1st try. We have to walk. */
  slen_t h2=vi_h2(key,keylen);
  assert(1<=h2 && h1<alloced);
  /* Dat: assert(lnko(...)) better in vi_h2 */
  while (1) { /* Dat: maxlen < alloced, so this can safely be an infinite loop. */
    if (h1>=h2) { h1-=h2; p-=h2; }
           else { h1+=alloced-h2; p+=alloced-h2; }
    if (p->keylen==NEVER_USED) return (char*)NULLP;
    if (p->keylen==keylen && 0==memcmp(p->keydata, key, keylen)) return p->keydata-datalen;
  }
}
bool Mapping::DoubleHash::deletee (char const*  key, slen_t  keylen) {
  /* We must not set `p->keylen=NEVER_USED' in this method because this would
   * ruin searches jumping on `p', but not coming from `p+h2'. So we just set
   * `p->keylen=DELETED'.
   */
  assert(obj_assert());
  slen_t h1=vi_h1(key,keylen);
  assert(h1<alloced);
  Ary *p=ary+h1;
  if (p->keylen==NEVER_USED) return true; /* key to be deleted does not exist */
  if (p->keylen==keylen && 0==memcmp(p->keydata, key, keylen)) { found:
    vi_dtor(p->keydata-datalen);
    delete [] (p->keydata-datalen);
    p->keylen=(slen_t)DELETED;
    // p->keydata=(char*)NULLP;
    if (len--==minlen) rehash();
    assert(obj_assert());
    return false; /* key-value pair deleted */
  }
  /* Not found for the 1st try. We have to walk. */
  slen_t h2=vi_h2(key,keylen);
  assert(1<=h2 && h1<alloced);
  while (1) { /* Dat: maxlen < alloced, so this can safely be an infinite loop. */
    if (h1>=h2) { h1-=h2; p-=h2; }
           else { h1+=alloced-h2; p+=alloced-h2; }
    if (p->keylen==(slen_t)NEVER_USED) return true; /* key to be deleted does not exist */
    if (p->keylen==keylen && 0==memcmp(p->keydata, key, keylen)) goto found;
  }
}
void Mapping::DoubleHash::getFirst(char const*const*& key, slen_t &keylen, char *& data) {
  assert(obj_assert());
  Ary *p=ary, *pend=p+alloced;
  while (p!=pend && p->keylen>=(slen_t)-2) p++;
  if (p==pend) { key=(char const*const*)NULLP; }
          else { key=&p->keydata; data=p->keydata-datalen; keylen=p->keylen; }
}
void Mapping::DoubleHash::getNext (char const*const*& key, slen_t &keylen, char *& data) {
  assert(obj_assert());
  /* vvv Dat: this operation is safe despite of the fact that it increases
   *          signedness off pointer target type ((char*) -> (Ary*)).
   */
  Ary *p=PTS_align_cast(Ary*,
   ( (char*)const_cast<char**>(key)
     - ((char*)&((Ary*)0)->keydata-(char*)0) )), *pend=ary+alloced;
  p++;
  while (p!=pend && p->keylen>=(slen_t)-2) p++;
  if (p==pend) { key=(char const*const*)NULLP; }
          else { key=&p->keydata; data=p->keydata-datalen; keylen=p->keylen; }
}
void Mapping::DoubleHash::rehash() {
  unsigned old_scale=scale;
  slen_t old_alloced=alloced;
#ifndef NDEBUG
  slen_t old_len=len;
  slen_t old_used=used;
#endif
  Ary *old_ary=ary;
  // printf("rehash minlen=%u len=%u used=%u maxused=%u alloced=%u\n", minlen, len, used, maxused, alloced);
  vi_scale();
  // printf("scaled minlen=%u len=%u used=%u maxused=%u alloced=%u\n", minlen, len, used, maxused, alloced);
  // assert( minlen <= len);
  if (scale==old_scale && used<=maxused) { assert(obj_assert()); return; }
  Ary *pp=old_ary, *ppend=pp+old_alloced;
  slen_t calclen=0, calcused=0;
  ary=new Ary[alloced];
  // printf("alloced=%u\n", alloced);
  memset(ary, '\377', sizeof(Ary)*alloced); /* fill with NEVER_USED */
  /* insert the old values to the new hash */
  for (; pp!=ppend; pp++) {
    if (pp->keylen!=NEVER_USED) calcused++;
    if (pp->keylen<(slen_t)-2) {
      // printf("%u %u\n", (unsigned char)pp->keydata[0], (unsigned char)pp->keydata[1]);
      calclen++;
      slen_t h1=vi_h1(pp->keydata,pp->keylen);
      assert(h1<alloced);
      Ary *p=ary+h1;
      assert(p->keylen!=DELETED);
      if (p->keylen==NEVER_USED) { found:
        p->keylen=pp->keylen;
        p->keydata=pp->keydata;
        continue;
      }
      assert(!(p->keylen==pp->keylen && 0==memcmp(p->keydata, pp->keydata, pp->keylen)) && "dupicate key");
      /* Not found for the 1st try. We have to walk. */
      slen_t h2=vi_h2(pp->keydata,pp->keylen);
      assert(1<=h2 && h1<alloced);
      while (1) { /* Dat: maxlen < alloced, so this can safely be an infinite loop. */
        if (h1>=h2) { h1-=h2; p-=h2; }
               else { h1+=alloced-h2; p+=alloced-h2; }
        assert(p->keylen!=DELETED);
        if (p->keylen==NEVER_USED) goto found;
        assert(!(p->keylen==pp->keylen && 0==memcmp(p->keydata, pp->keydata, pp->keylen)) && "dupicate key");
      } /* WHILE */
    }
  } /* NEXT */
  used=len=calclen;
  assert(calclen==old_len);
  assert(calcused==old_used);
  assert(old_len==len);
  assert(old_len==used);
  assert(obj_assert());
  delete [] old_ary;
}
void Mapping::DoubleHash::clear() {
  assert(obj_assert());
  Ary *pp=ary, *ppend=pp+alloced;
  for (; pp!=ppend; pp++) if (pp->keylen<(slen_t)-2) {
    vi_dtor(pp->keydata-datalen); /* this is quite fast */
    delete [] (pp->keydata-datalen); /* this is really slow for millions of values */
    pp->keylen=(slen_t)DELETED;
    len--;
  }
  assert(len==0);
  if (minlen!=0) rehash();
}

/* --- */

static slen_t const d15_primes[]= {
  0, 10, 13, 23, 37, 59, 89, 137, 211, 317, 479, 719, 1087, 1637, 2459, 3691,
  5557, 8353, 12539, 18839, 28277, 42433, 63659,
  95507UL, 143261UL, 214913UL, 322397UL, 483611UL, 725423UL, 1088159UL, 1632259UL, 2448389UL,
  3672593UL, 5508913UL, 8263373UL, 12395069UL, 18592631UL, 27888947UL, 41833427UL,
  62750147UL, 94125247UL, 141187901UL, 211781873UL, 317672813UL, 476509223UL,
  714763843UL, 1072145771UL, 1608218669UL, 2412328031UL, 3618492049UL,
#if SIZEOF_SLEN_T>=8
  5427738097UL, 8141607167UL, 12212410753UL, 18318616157UL, 27477924239UL,
  41216886467UL, 61825329719UL, 92737994593UL, 139106991917UL, 208660487887UL,
  312990731839UL, 469486097807UL, 704229146717UL, 1056343720093UL,
  1584515580187UL, 2376773370313UL, 3565160055487UL, 5347740083243UL,
  8021610124867UL, 12032415187319UL, 18048622780987UL, 27072934171487UL,
  40609401257291UL, 60914101885951UL, 91371152828947UL, 137056729243439UL,
  205585093865189UL, 308377640797829UL, 462566461196803UL,
  693849691795229UL, 1040774537692907UL, 1561161806539393UL,
  2341742709809177UL, 3512614064713777UL, 5268921097070759UL,
  7903381645606193UL, 11855072468409349UL, 17782608702614033UL,
  26673913053921061UL, 40010869580881603UL, 60016304371322423UL,
  90024456556983643UL, 135036684835475519UL, 202555027253213279UL,
  303832540879819943UL, 455748811319729951UL, 683623216979594929UL,
  1025434825469392417UL, 1538152238204088631UL, 2307228357306132983UL,
  3460842535959199481UL, 5191263803938799269UL,
#endif
  0
};

void Mapping::DoubleHash15::vi_scale() {
#if 0
  // <lloced=63659 used=59671 maxused=59670
  // alloced=1637 used=59671 maxused=1534

  // <lloced=1637 used=1535 maxused=1534
  // |lloced=2459 used=1535 maxused=1534
  // |lloced=1637 used=1535 maxused=1534
  // alloced=1637 used=1535 maxused=1534


  printf("<lloced=%u used=%u maxused=%u\n", alloced, used, maxused);
  if (len<minlen) { again:
    /* assert(used==minlen-1); */
    assert(scale>=1);
    alloced=(d15_primes+2)[--scale];
    if (scale>1) minlen=(d15_primes+2)[scale-2];
    else if (scale==1) minlen=10;
    else minlen=0;
    if (len<minlen) goto again;
  } else if (used>maxused) {
    assert(used==maxused+1);
    if ((alloced=(d15_primes+2)[++scale])==0) abort(); /* Imp: better overflow reporting */
    if (scale>1) minlen=(d15_primes+2)[scale-2];
    else if (scale==1) minlen=10;
    else minlen=0;
    // if (len<minlen) minlen=len;
  } else assert(0);
  printf("|lloced=%u used=%u maxused=%u\n", alloced, used, maxused);
  if (alloced<=4000U) maxused=(alloced*15)>>4;
                 else maxused=(alloced>>4)*15;
  if (used>maxused) printf("alloced=%u used=%u maxused=%u\n", alloced, used, maxused);
#endif

  /* Dat: we respect only `len', not used. */
  /* Imp: make calculation of scale relative to previous one */

  scale=0; while (1) {
    // printf("xscale=%u\n", scale);
    if (0==(alloced=(d15_primes+2)[scale])) { assert(0); abort(); } /* Imp: better overflow reporting */
    if (alloced<=4000U) maxused=(alloced*15)>>4;
                   else maxused=(alloced>>4)*15; /* avoid overflows */
    if (len<=maxused) break; /* _not_ used<=maxused, because of rehash() */
    scale++;
  }
  minlen=(d15_primes)[scale];
  // printf("ret alloced=%u used=%u maxused=%u\n", alloced, used, maxused);
}
slen_t Mapping::DoubleHash15::vi_h1(register char const*key, register slen_t keylen) {
  register slen_t hval=0, m=alloced;
  while (keylen--!=0) hval=((hval<<8)+*key++)%m;
  /* Imp: make this accurate and faster (the `%' operation is rather slow) */
  return hval;
}
slen_t Mapping::DoubleHash15::vi_h2(char const*key, slen_t keylen) {
  register slen_t hval=0, m=alloced-1;
  while (keylen--!=0) hval=((hval<<8)+*key++)%m;
  /* Imp: make this accurate and faster (the `%' operation is rather slow) */
  return hval+1;
}
void Mapping::DoubleHash15::vi_dtor(char *) {}

Mapping::DoubleHash15::DoubleHash15(slen_t datalen_) {
  ary=new Ary[alloced=(d15_primes+2)[scale=0]];
  memset(ary, '\377', sizeof(Ary)*alloced); /* fill with NEVER_USED */
  datalen=datalen_;
  len=used=minlen=0;
  maxused=(alloced>>4)*15;
}
Mapping::DoubleHash15::~DoubleHash15() {
  minlen=0;
  clear();
  delete [] ary;
}

/* __END__ */