Programming

Getting umask

In unix/linux, the system call umask() sets a umask value and returns the old one. So, in order to get the current umask value without changing it, one can do

#include <sys/types.h>
#include <sys/stat.h>

mode_t get_umask()
{
   mode_t mask = umask(0);
   umask(mask);
   return mask;
}

C/C++

Comments (0)

Permalink

Conversion between codepoints and UTF

I was browsing Wikipedia for information on UTF-16 and UTF-8 this morning, and from the specifications, I wrote some C++ routines to convert from a codepoint to UTF-8 and UTF-16 and back. These are not perfect, but works for the test data I tried.

Code:

/*
 * utf.cxx
 *
 * (Some basic conversion routines for UTF-8 and UTF-16)
 * Author: Umesh Nair, Feb 2007
 *
 */

#include <sys/types.h>
#include <vector>
#include <iostream>
#include <iomanip>
#include <iterator>

using std::cout;
using std::endl;

// u_int8_t is sufficient,
// I used this for displaying it nicely
typedef u_int16_t OneByte;  

typedef u_int16_t TwoByte;

typedef u_int32_t FourByte;

// UTF is a variable-length encoding, so a vector is used
typedef std::vector<OneByte> utf8vec;
typedef std::vector<TwoByte> utf16vec;

// Converts codepoint to UTF-16
bool
codePointToUTF16(FourByte cp, utf16vec& utf)
{
  // Error check omitted
   if (cp < 0x10000) {
      // BMP
      utf.push_back(static_cast<TwoByte>(cp));
   } else {
      TwoByte lead = (cp >> 10) + 0xD800 - (0x10000 >> 10);
      TwoByte trail = (cp & 0x3FF) + 0xDC00;
      utf.push_back(lead);
      utf.push_back(trail);
   }

  return true;
}

// Converts UTF-16 to codepoint
bool
UTF16ToCodePoint(const utf16vec& utf, FourByte& cp)
{
   if (utf.size() == 1) {
      // BMP
      cp = utf[0];
   } else {
      TwoByte lead = utf[0];
      TwoByte trail = utf[1];

      // Error check omitted
      cp = (lead << 10) + trail
         - ((0xD800 << 10) + 0xDC00 - 0x10000);
   }

   return true;
}

// Converts codepoint to UTF-8
bool
codePointToUTF8(FourByte cp, utf8vec& utf)
{
   if (cp < 0x80) {
      utf.push_back(static_cast<OneByte>(cp));
   } else if (cp < 0x800) {
      OneByte b0 = 0x80 + (cp & 0x3F);

      OneByte b1 = 0xC0 + (cp >> 6);

      utf.push_back(b1);
      utf.push_back(b0);
   } else if (cp < 0x10000) {
      OneByte b0 = 0x80 + (cp & 0x3F);
      OneByte b1 = 0x80 + ((cp >> 6) & 0x3F);

      OneByte b2 = 0xE0 + (cp >> 12);

      utf.push_back(b2);
      utf.push_back(b1);
      utf.push_back(b0);
   } else {
      OneByte b0 = 0x80 + (cp & 0x3F);
      OneByte b1 = 0x80 + ((cp >> 6) & 0x3F);
      OneByte b2 = 0x80 + ((cp >> 12) & 0x3F);

      OneByte b3 = 0xF0 + (cp >> 18);

      utf.push_back(b3);
      utf.push_back(b2);
      utf.push_back(b1);
      utf.push_back(b0);
   }

   return true;
}

// Converts UTF-8 to codepoint, does validity check as well
bool
UTF8ToCodePoint(const utf8vec& utf, FourByte& cp)
{
   utf8vec::const_iterator it;

   FourByte lead = utf[0];

   size_t nBytes = utf.size();

   if (nBytes == 4) {
      if ((lead & 0xF8) != 0xF0) {
         return false;
      }
      lead = ((lead - 0xF0) << 18);
   } else if (nBytes == 3) {
      if ((lead& 0xF0) != 0xE0) {
         return false;
      }
      lead = ((lead - 0xE0) << 12);
   } else if (nBytes == 2) {
      if ((lead & 0xE0) != 0xC0) {
         return false;
      }
      lead = ((lead - 0xC0) << 6);
   } else if (nBytes == 1) {
      if ((lead & 0x80) != 0) {
         return false;
      }
      lead = lead;
   } else {
      assert(false);
   }

   FourByte value = 0;
   if (nBytes > 1) {
      for (it = utf.begin() + 1; it != utf.end(); ++it) {
         value <<= 6;
         FourByte bitmask = (*it & 0x3F);
         value += bitmask;
      }
   }
   cp = lead + value;

   return true;
}

// Testing with one data
void
testACodePoint(FourByte cp)
{
   cout << "\n\nTESTING Code point "
      << std::hex << cp << endl;

   FourByte cp8, cp16;

   utf16vec utf16;

   if (codePointToUTF16(cp, utf16)) {
      cout << "UTF-16 : ";
      std::copy(utf16.begin(), utf16.end(),
         std::ostream_iterator(cout, " "));
      cout << endl;
      if (UTF16ToCodePoint(utf16, cp16)) {
         cout << "Code point : "
            << std::hex << cp16 << endl;
      } else {
         cout << "Error in converting from UTF-16." << endl;
      }

   } else {
      cout << "Error in converting to UTF-16." << endl;
   }

   utf8vec utf8;

   if (codePointToUTF8(cp, utf8)) {
      cout << "UTF-8 : ";
      std::copy(utf8.begin(), utf8.end(),
         std::ostream_iterator(cout, " "));
      cout << endl;
      if (UTF8ToCodePoint(utf8, cp8)) {
         cout << "Code point : "
            << std::hex << cp8 << endl;
      } else {
         cout << "Error in converting from UTF-8." << endl;
      }

   } else {
      cout << "Error in converting to UTF-8." << endl;
   }
}

// Test program
int main()
{
   testACodePoint(0x10000);
   testACodePoint(0x10FFFD);;
   testACodePoint(0x64321);;
   testACodePoint(0x05D0);;
}

Output:


[ 85 ] $ a.out

TESTING Code point 10000
UTF-16 : d800 dc00
Code point : 10000
UTF-8 : f0 90 80 80
Code point : 10000

TESTING Code point 10fffd
UTF-16 : dbff dffd
Code point : 10fffd
UTF-8 : f4 8f bf bd
Code point : 10fffd

TESTING Code point 64321
UTF-16 : d950 df21
Code point : 64321
UTF-8 : f1 a4 8c a1
Code point : 64321

TESTING Code point 5d0
UTF-16 : 5d0
Code point : 5d0
UTF-8 : d7 90
Code point : 5d0

C/C++
Programming
Unicode

Comments (0)

Permalink

Calculating number of combinations

The well-known formula for calculating the number of combinations of n objects with r objects taken a t a time is

but this requires calculating three factorials which may be expensive and may cause overflow. A better way is to use the other definition

and doing multiplication and division alternatively. An example is the C++ code below:

int nCr (int n, int r)
{
   int ncr = n;
   int k = 1;
   int r1 = n - r;

   // Handle special cases
   if (r1 < 0) { return 0;}    // Invalid value
   if (r1 < r) { r = r1;}      // nCr = nC(n-r)
   if (r == 0) { return 1;}    // nC0 = 1

   // To avoid integer overflow, divide as early as possible
   for (int k = 2; k <= r; ++k) {
     ncr *= --n;
     ncr /= k;
   }
   return ncr;
}

C/C++
Mathematics

Comments (0)

Permalink

Circle from three points

The problem

Given three points , and . Find the center and radius of the circle passing through it.

Discussion

Three non-collinear points represent a unique circle on a unique plane. It is easy to construct the circle from 3 points by drawing perpendicular bisectors of any two chords. If the 3 points are not collinear, the bisectors should meet at the center of the circle.

Using vectors, it is possible to devise an algorithm to do this. However, an easier method will be substituting the three points into the general equation of the circle

and solving for , and . Center is and the radius is
.

Starightforward algorithm

This can be solved by substituting the three values in the above equation and solving the set of three simultaneous equations.

The solution to the above set of simultaneous equations yields that the equation of the circle passing through , and is given by equating the value of the following determinanat to zero:

A simpler algorithm

A modified algorithm is given below:

The task will be much simpler if we apply a linear transformation so that one of the points (say ) is , and now the constant term vanishes, and now there are only two variables and two unknowns. We can transform the co-ordinates back after solving.

So, transform , , to

and now the equations are

where

Now, we can solve for g and f here.

Finally, applying the tranformations back, the center is .

The following C function illustrates this algorithm.

#include <math.h>
#define TRUE 1
#define FALSE 0

int  /* Returns TRUE if it is a valid circle, FALSE if the points are collinear */
solve_3_points_circle (
   double  x1, double  y1, /* Point 1 */
   double  x2, double  y2, /* Point 2 */
   double  x3, double  y3, /* Point 3 */
   double* cx, double* cy, /* Center  */
   double* r               /* Radius  */
)
{
   double z1, z2, d;

   /* Apply the transforms */
   x1 -= x3;
   y1 -= y3;
   x2 -= x3;
   y2 -= y3;

   Temporary variables to avoid some computations */
   z1 = x1 * x1 + y1 * y1;
   z2 = x2 * x2 + y2 * y2;
   d = 2.0 * (x1 * y2 - x2 * y1);
   if (fabs(d) < 0.00000001) {
      return FALSE;
   }

   /* Calculate the center of the transformed circle */
   *cx = (y2 * z1 - y1 * z2)/d;
   *cy = (x1 * z2 - x2 * z1)/d;
   r = sqrt(*cx * *cx + *cy * *cy);

   /* Apply the transforms back */
   *cx += x3;
   *cy += y3;
   return TRUE;
}

Algorithm given by Plastock and Kalley

Plastock and Kalley gives the following formulae for solving this problem:

The center and the radius of a circle passing through the points ,
and , are given by:

where

These formulae can be used to find the parameters of the circle.

Solution by following the geometric construction

The line passing through and is

where

The perpendicular bisector of this line passes through and has a slope of .

So, the perpendicular bisector is

Similarly, the perpendicular bisector of the side passing through and is

where

Now, the center of the circle is the point of intersection of these two perpendicular bisectors. So, equating the right hand sides of the above two equations, and solving for x,

Algorithms
C/C++
Mathematics
Programming

Comments (0)

Permalink