Adrian Lita

Generating all combinations of a string using the binary counter method

by Adrian Lita - on 2018-06-26

Keywords: #combinations #string #strings #binary-counter #academic #snippet #counter
Permalink: https://adrian.lita.me/blog_string-combinations-binary-counter-method.html
 

Generating all k-combinations of n elements can be done in various ways. Here I'm preseting to you, for academic purpose, the binary counter method.

First of all, let's calculate how many k-combinations of n exists:

/*
  Calculates kCn

  kCn = n!/(k! * (n-k)!)
*/
unsigned int total_combinations(int n, int k)
{
  //error
  if (k > n)
  {
    return 0;
  }

  unsigned int c = 1;	//final result

  //check which one is greater, so we can simplify
  if (n - k > k)
  {
    k = n - k;
  }

  for (int i = k + 1; i <= n; i++)
  {
    c *= i;
  }

  k = n - k;
  for (int i = 2; i <= k; i++)
  {
    c /= i;
  }

  return c;
}

In our algorithm, we don't actually need the number of combinations, but it's nice to know our algorithm is doing ok. Now let's get to the binary-counter method. This method requires a counter with n bits, which will count from 0 to 2n. On each iteration, we'll check how many of the n-bits are 1s. When this perfectly matches k, it means we've reached a combination. We then form the combination by outputting string elements that have indexes where our bits are 1. For example, if we have let's the string "12345678" and our counter is 0b11000011, we'll output 1278. Needless to say that the complexity of this aglorithm is O(2n), where n is the nubmer of elements we need to combine.

Let's get to work. First of all, we need a function to count the number of ones in a buffer. Please note that this function is usually named popCount for historical reasons. Some CPUs actually implement the popCount assembly instruction. We'll just do an efficient implementation of popCount in C, with a complexity of O(n), (n == number of bits that we need to count).

/*
  this function counts how many binary 1s are in a data set
*/
unsigned int count_ones(int n, const void *data)
{
  unsigned int count = 0;
  for (int i = 0; i < n; i++)
  {
    unsigned char d = ((const unsigned char*)data)[i];
    while (d)
    {
      d &= (d - 1);
      count++;
    }
  }
  return count;
}

Moving further, let's implement the actual binary-counter combination generator function:

/*
  - prints out all k combinations of n on a given set (string) s
  - Memory efficient method to show all combinations
  - uses only 1 bit per element
  - uses the counter method
*/
void combinations(const int n, const int k, const char *s)
{
  //initialize the counter
  int no_bytes = (n / 8) + 1;
  unsigned char *v = (unsigned char*)malloc(no_bytes);
  if (v == NULL)
  {
    cout << "Allocation error!" << endl;
    return;
  }
  for (int i = 0; i < no_bytes; i++)
  {
    v[i] = 0;
  }

  //apply counter method
  unsigned int no_bits = 0; //no_bits represents the count of binary 1s
  while (no_bits != n)
  {
    //increment the first element with 1
    v[0]++;

    //if the increment causes overflows, we'll carry that bit further
    for (int i = 0; i < no_bytes; i++)
    {
      if (v[i] != 0)
      {
        break;
      }
      else
      {
        v[i + 1]++;
      }
    }

    //now let's check if your current element v matches what we're looking for (has k bits equal to 1)
    no_bits = count_ones(no_bytes, v);
    if (no_bits == k)
    {
      //it's a match, print it as efficiently as possible
      int j = 0;
      int vcounter = 0;
      int bitcounter = 0;
      unsigned char cbyte = v[0];
      while (j < n)
      {
        if (cbyte & 0x01)
        {
          cout << s[j];
        }

        cbyte >>= 1;
        bitcounter++;
        if (bitcounter == 8)
        {
          vcounter++;
          cbyte = v[vcounter];
        }
        j++;
      }
      cout << endl;
    }
  }

  //don't forget to cleanup any used memory
  free(v);
}

That was pretty much it. If we put everything together, and add a test routine in main, we'll get the final form of the algorithm:

/*
  Calculates kCn

  kCn = n!/(k! * (n-k)!)
*/
unsigned int total_combinations(int n, int k)
{
  //error
  if (k > n)
  {
    return 0;
  }

  unsigned int c = 1;	//final result

  //check which one is greater, so we can simplify
  if (n - k > k)
  {
    k = n - k;
  }

  for (int i = k + 1; i <= n; i++)
  {
    c *= i;
  }

  k = n - k;
  for (int i = 2; i <= k; i++)
  {
    c /= i;
  }

  return c;
}

/*
  this function counts how many binary 1s are in a data set
*/
unsigned int count_ones(int n, const void *data)
{
  unsigned int count = 0;
  for (int i = 0; i < n; i++)
  {
    unsigned char d = ((const unsigned char*)data)[i];
    while (d)
    {
      d &= (d - 1);
      count++;
    }
  }
  return count;
}

/*
  - prints out all k combinations of n on a given set (string) s
  - Memory efficient method to show all combinations
  - uses only 1 bit per element
  - uses the counter method
*/
void combinations(const int n, const int k, const char *s)
{
  //initialize the counter
  int no_bytes = (n / 8) + 1;
  unsigned char *v = (unsigned char*)malloc(no_bytes);
  if (v == NULL)
  {
    cout << "Allocation error!" << endl;
    return;
  }
  for (int i = 0; i < no_bytes; i++)
  {
    v[i] = 0;
  }

  //apply counter method
  unsigned int no_bits = 0; //no_bits represents the count of binary 1s
  while (no_bits != n)
  {
    //increment the first element with 1
    v[0]++;

    //if the increment causes overflows, we'll carry that bit further
    for (int i = 0; i < no_bytes; i++)
    {
      if (v[i] != 0)
      {
        break;
      }
      else
      {
        v[i + 1]++;
      }
    }

    //now let's check if your current element v matches what we're looking for (has k bits equal to 1)
    no_bits = count_ones(no_bytes, v);
    if (no_bits == k)
    {
      //it's a match, print it as efficiently as possible
      int j = 0;
      int vcounter = 0;
      int bitcounter = 0;
      unsigned char cbyte = v[0];
      while (j < n)
      {
        if (cbyte & 0x01)
        {
          cout << s[j];
        }

        cbyte >>= 1;
        bitcounter++;
        if (bitcounter == 8)
        {
          vcounter++;
          cbyte = v[vcounter];
        }
        j++;
      }
      cout << endl;
    }
  }

  //don't forget to cleanup any used memory
  free(v);
}

int main()
{
  char s[] = "Adrian";
  int n = strlen(s);
  int k = 2;
  cout << "Total combinations: " << total_combinations(n, k) << endl;
  combinations(n, k, s);

  return 0;
}

I hope this gives you the insight for this binary-counter method of solving combinations.