""" This module supplies functions that implement various codes for integers, some of them self-delimiting. (a) regular binary - encode: dec_to_bin(n,d) ; decode: bin_to_dec(cl,d,0) (b) headless binary - encode: dec_to_headless(n) ; decode: bin_to_dec(cl,d,1) (c) C_alpha(n) - encode: encoded_alpha(n) ; decode: get_alpha_integer(cl) (d) Fibonacci code (Figital) - encode: to_fiboanacci(n); decoder: from_fibonacci(string) or from_fibonacci2(clist) (e) byte code (bases 3, 7, 15, 31,...) (This code is allowed to encode integers from 0 upwards, unlike the others, which take n>=1 only) C_alpha(n) is a self-delimiting code for integers. Neither regular binary nor headless binary is a self-delimiting code, hence their decoders must be supplied with a parameter d that tells how many bits to read. See Information Theory, Inference, and Learning Algorithms. (Ch 7: Codes for Integers) http://www.inference.phy.cam.ac.uk/mackay/itila/ This package uses the doctest module to test that it is functioning correctly. IntegerCodes.py is free software (c) David MacKay December 2005. License: GPL """ ## For license statement see http://www.gnu.org/copyleft/gpl.html import sys def standard_binary( n ): """ n is the number to convert to binary; returns the standard binary representation >>> print standard_binary( 17 ) 10001 >>> print standard_binary( 6 ) 110 """ return "1"+dec_to_headless( n ) def dec_to_bin( n , digits ): """ n is the number to convert to binary; digits is the number of bits you want Always prints full number of digits >>> print dec_to_bin( 17 , 9) 000010001 >>> print dec_to_bin( 17 , 5) 10001 Will behead the standard binary number if requested >>> print dec_to_bin( 17 , 4) 0001 """ if(n<0) : sys.stderr.write( "warning, negative n not expected\n") pass i=digits-1 ans="" while i>=0 : b = (((1<0) i -= 1 ans = ans + str(int(b)) pass return ans pass def ceillog( n ) : ## ceil( log_2 ( n )) [Used by LZ.py] """ >>> print ceillog(3), ceillog(4), ceillog(5) 2 2 3 """ assert n>=1 c = 0 while 2**c=0 into base B=2**bytesize - 1. Use the all-1 symbol as end-of-file symbol. Is this called "Rice Coding"? >>> print to_byte( 10, 2 ) ## 10 = 9 + 0 + 1 --> 01 00 01 11 01000111 >>> print to_byte( 10, 3 ) ## 10 = 7 + 3 --> 001 011 111 001011111 """ assert(bytesize>1) ## this coder does base 3, 7, 15,... assert (n>=0) B = (1<0 : rem = n % B answer=dec_to_bin(rem,bytesize)+answer # print n,B,rem,answer n = n/B pass answer=answer+"1"*bytesize return answer def from_byte( clist, bytesize): """ Takes a list of binary digits. Returns an integer, and destroys the elements of the list that it has read. >>> print from_byte( list("01000111"), 2 ) ## 10 = 9 + 0 + 1 --> 01 00 01 11 10 >>> print from_byte( list("001011111"), 3 ) ## 10 = 7 + 3 --> 001 011 111 10 """ assert (len(clist)>=0) B = (1<= 1 >>> [(n,dec_to_headless(n)) for n in range(1,8)] [(1, ''), (2, '0'), (3, '1'), (4, '00'), (5, '01'), (6, '10'), (7, '11')] >>> dec_to_headless(42) '01010' """ assert(n>=1) ans="" while n>1 : b = ((1&n)>0) ans = str(int(b)) + ans n >>= 1 pass return ans pass def bin_to_dec( clist , c , tot=0 ): """Implements ordinary binary to integer conversion if tot=0 and HEADLESS binary to integer if tot=1 clist is a list of bits; read c of them and turn into an integer. The bits that are read from the list are popped from it, i.e., deleted Regular binary to decimal 1001 is 9... >>> bin_to_dec( ['1', '0', '0', '1'] , 4 , 0 ) 9 Headless binary to decimal [1] 1001 is 25... >>> bin_to_dec( ['1', '0', '0', '1'] , 4 , 1 ) 25 """ while (c>0) : assert ( len(clist) > 0 ) ## else we have been fed insufficient bits. tot = tot*2 + int(clist.pop(0)) c-=1 pass return tot class Fibonacci: """ Returns fibonacci numbers and caches them for efficiency. >>> f = Fibonacci(); [f.calculate(i) for i in range(50)] # doctest:+ELLIPSIS [1, 1, 2, 3, 5, 8, 13, 21, ..., 12586269025L] """ def __init__(self): self.cache={} def calculate(self, n): if n < 2: return 1 try: return self.cache[n] except: return self.cache.setdefault(n, self.calculate(n-2) + self.calculate(n-1)) globalfib = Fibonacci() def to_fibonacci( r ) : """ Encode the positive integer r using the self-delimiting fibonacci code (aka "figital"), which has the property that every integer is terminated by "11". The decoding of c0c1c2... is c(0)*F(1) + c(1)*F(2) + c(2)*F(3) + ... For the Fibonacci code, the implicit distribution is $1/n^q$, with $q = 1/\log_2\left(\gamma\right) \simeq 1.44$, where $\gamma$ is the golden ratio. >>> for i in range(1,23): print i,to_fibonacci(i) # doctest:+ELLIPSIS +NORMALIZE_WHITESPACE 1 11 2 011 3 0011 4 1011 5 00011 6 10011 7 01011 8 000011 9 100011 10 010011 11 001011 12 101011 ... >>> print to_fibonacci(6), from_fibonacci(to_fibonacci(6)) 10011 6 >>> print to_fibonacci(33), from_fibonacci(to_fibonacci(33)) 10101011 33 >>> print to_fibonacci(18), from_fibonacci(to_fibonacci(18)) 0001011 18 """ i=1 assert r>=1 ## we encode positive integers only ## find thhe biggest fib needed while globalfib.calculate(i)<= r: i+=1 pass i-=1 # print i,r,globalfib.calculate(i) ans = '1' while i>0: # print i,r,globalfib.calculate(i) if globalfib.calculate(i)>r: ans = '0'+ ans pass else: ans = '1' + ans r -= globalfib.calculate(i) pass i -= 1 pass return ans def from_fibonacci( string ) : """ Decode the positive integer r from self-delimiting fibonacci code (aka "figital"), which has the property that every integer is terminated by "11". The decoding of c0c1c2... is c(0)*F(1) + c(1)*F(2) + c(2)*F(3) + ... except we ignore the final 1. >>> print from_fibonacci('10101011') 33 >>> print from_fibonacci('0001011') 18 >>> print from_fibonacci('10011') 6 """ return from_fibonacci2( list(string) ) def from_fibonacci2( slist ): global globalfib counter = 1 ; ans = 0 ; seenOne = 0 ; while 1: assert len(slist)>0 s = slist.pop(0) if s == '1': if seenOne: return ans ans += globalfib.calculate(counter) seenOne = 1 counter+=1 pass elif s == '0': counter +=1 seenOne = 0 pass else: import sys print >> sys.stderr, "warning, illegal character s in from_fiboanacci" pass pass pass def encoded_omega ( r ) : """ Encode the positive integer r using C_omega See Information Theory, Inference, and Learning Algorithms. (Ch 7: Codes for Integers) http://www.inference.phy.cam.ac.uk/mackay/itila/ >>> print encoded_omega(31) 10100111110 >>> print encoded_omega(1) 0 >>> print encoded_omega(2) 100 >>> print encoded_omega(3) 110 """ return omega_recursion(r)+"0" def omega_recursion(r): ## find the standard binary code for r cb = standard_binary(r) l = len(cb)-1 ## assert l is floor(log_2(n)) if l>=1: return omega_recursion(l) + cb else: return "" pass def get_omega_integer( clist ): """ Destructively read a self-delimiting integer from the head of clist and return its value. The answer "0" is returned if the clist is empty. An assertion error message will result if the clist ends before we have finished reading. >>> for i in [1,3,6,45]: b=encoded_omega(i); cl=list(b); j=get_omega_integer(cl); print i,b,j; assert i==j 1 0 1 3 110 3 6 101100 6 45 101011011010 45 """ if (len(clist)>0 ) : return get_omega_recurse( clist, 1 ) else: return 0 ## this is an error state pass def get_omega_recurse(clist, length): assert ( len(clist) > 0 ) ## otherwise we have an invalid encoded string if ( clist.pop(0) == '0' ) : return length else : n = bin_to_dec( clist, length, 1 ) ## headless integer return get_omega_recurse(clist, n) pass pass def encoded_alpha ( r ) : """ Encode the positive integer r using C_alpha See Information Theory, Inference, and Learning Algorithms. (Ch 7: Codes for Integers) http://www.inference.phy.cam.ac.uk/mackay/itila/ >>> print encoded_alpha( 17) 000010001 >>> print encoded_alpha( 9) 0001001 >>> print encoded_alpha( 63) 00000111111 """ c=0 ; rc =r ; ans="" while 1: r = (r >> 1) if r<1 : break ans = ans+"0" c += 1 pass ans = ans + dec_to_bin( rc , c+1 ) ## prints the standard binary representation of the number r return ans pass def get_alpha_integer ( clist ) : """ Destructively read a self-delimiting integer from the head of clist and return its value. The answer "0" is returned if the clist is empty. An assertion error message will result if the clist ends before we have finished reading. >>> for i in [1,3,6,42]: b=encoded_alpha(i); cl=list(b); print b,get_alpha_integer(cl) 1 1 011 3 00110 6 00000101010 42 """ if (len(clist)>0 ) : c = 0 ## counts number of bits to read while ( clist.pop(0) == '0' ) : c += 1 assert ( len(clist) > 0 ) pass # print "now going to read",c,"bits " # ok, we have just read a 1, that was the start of the integer r = bin_to_dec( clist , c , 1 ) return r pass else: return 0 pass def assertions(): print "Testing byte" for j in range(2,7): for i in range(1299): assert from_byte(list(to_byte(i,j)),j) == i print "Testing fibo" for i in range(1,12999): assert from_fibonacci2(list(to_fibonacci(i))) == i print "Testing omega" for i in range(1,12999): assert get_omega_integer(list(encoded_omega(i))) == i print "Testing alpha" for i in range(1,12999): assert get_alpha_integer(list(encoded_alpha(i))) == i pass def test(): import doctest for i in range(1,24): print "%-3d%s" % ( i,to_fibonacci(i) ) verbose=1 if(verbose): doctest.testmod(None,None,None,True) else: doctest.testmod() pass assertions() pass def oldtest(): print "testing dec_to_bin" testlist = ["print dec_to_bin( 17 , 9)",\ "print dec_to_bin( 17 , 6)",\ "print dec_to_bin( 17 , 5)",\ "print dec_to_bin( 17 , 4)"] for t in testlist : print t exec t pass print "\ntesting bin_to_dec" binlist = ["1001", "101010" ] for b in binlist : clist = list( b ) length = len(clist) print "\nRegular binary to decimal", b print "bin_to_dec( ",clist," , ",length," , 0 )" a = bin_to_dec( clist , length , 0 ) print a print "\nHeadless binary to decimal [1]", b clist = list( b ) print "bin_to_dec( ",clist," , ",length," , 1 )" a = bin_to_dec( clist , length , 1 ) print a pass pass print "\ntesting alpha encoder" testlist = ["print encoded_alpha( 17)",\ "print encoded_alpha( 9)", "print encoded_alpha( 63)"] for t in testlist : print t exec t pass print "\ntesting alpha decoder" for i in range (1,23): print "encoding", i b = encoded_alpha( i ) print " ->",b clist = list(b) a = get_alpha_integer( clist ) print " ->",a if i!=a : print "ERROR" pass pass pass ## from http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/111286 BASE2 = "01" BASE10 = "0123456789" BASE16 = "0123456789ABCDEF" BASE62 = "ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789abcdefghijklmnopqrstuvwxyz" def baseconvert(number,fromdigits,todigits): """ converts a "number" between two bases of arbitrary digits The input number is assumed to be a string of digits from the fromdigits string (which is in order of smallest to largest digit). The return value is a string of elements from todigits (ordered in the same way). The input and output bases are determined from the lengths of the digit strings. Negative signs are passed through. decimal to binary >>> baseconvert(555,BASE10,BASE2) '1000101011' binary to decimal >>> baseconvert('1000101011',BASE2,BASE10) '555' integer interpreted as binary and converted to decimal (!) >>> baseconvert(1000101011,BASE2,BASE10) '555' base10 to base4 >>> baseconvert(99,BASE10,"0123") '1203' base4 to base5 (with alphabetic digits) >>> baseconvert(1203,"0123","abcde") 'dee' base5, alpha digits back to base 10 >>> baseconvert('dee',"abcde",BASE10) '99' decimal to a base that uses A-Z0-9a-z for its digits >>> baseconvert(257938572394L,BASE10,BASE62) 'E78Lxik' ..convert back >>> baseconvert('E78Lxik',BASE62,BASE10) '257938572394' binary to a base with words for digits (the function cannot convert this back) >>> baseconvert('1101',BASE2,('Zero','One')) 'OneOneZeroOne' """ if str(number)[0]=='-': number = str(number)[1:] neg=1 else: neg=0 # make an integer out of the number x=long(0) for digit in str(number): x = x*len(fromdigits) + fromdigits.index(digit) # create the result in base 'len(todigits)' res="" while x>0: digit = x % len(todigits) res = todigits[digit] + res x /= len(todigits) if neg: res = "-"+res return res """ Note about conversion between bases. If the number is in string form, the constructor for int is designed to handle any base from 2 through 36: >>> int('FF', 16) # hexadecimal (base 16) 255 >>> int('777', 8) # base 8 511 >>> int('10000011', 2) # binary 131 >>> int('1234', 10) # decimal 1234 >>> int('1234') # defaults to decimal if not specified 1234 """ ## from http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/65212 def base10toN(num,n): """Change a to a base-n number. Up to base-36 is supported without special notation.""" num_rep={10:'a', 11:'b', 12:'c', 13:'d', 14:'e', 15:'f', 16:'g', 17:'h', 18:'i', 19:'j', 20:'k', 21:'l', 22:'m', 23:'n', 24:'o', 25:'p', 26:'q', 27:'r', 28:'s', 29:'t', 30:'u', 31:'v', 32:'w', 33:'x', 34:'y', 35:'z'} new_num_string='' current=num while current!=0: remainder=current%n if 36>remainder>9: remainder_string=num_rep[remainder] elif remainder>=36: remainder_string='('+str(remainder)+')' else: remainder_string=str(remainder) new_num_string=remainder_string+new_num_string current=current/n return new_num_string ## from http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/410672 NOTATION10 = '0123456789' NOTATION70 = "!'()*-0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz~" class BaseConvert: def __init__(self): self.notation = NOTATION10 def _convert(self, n=1, b=10): ''' Private function for doing conversions; returns a list ''' if True not in [ isinstance(n, x) for x in [long, int, float] ]: raise TypeError, 'parameters must be numbers' converted = [] quotient, remainder = divmod(n, b) converted.append(remainder) if quotient != 0: converted.extend(self._convert(quotient, b)) return converted def convert(self, n, b): ''' General conversion function ''' nums = self._convert(n, b) nums.reverse() return self.getNotation(nums) def getNotation(self, list_of_remainders): ''' Get the notational representation of the converted number ''' return ''.join([ self.notation[x] for x in list_of_remainders ]) class Base70(BaseConvert): ''' >>> base = Base70() >>> base.convert(10) '4' >>> base.convert(510) '1E' >>> base.convert(1000) '8E' >>> base.convert(10000000) 'N4o4' ''' def __init__(self): self.notation = NOTATION70 def convert(self, n): "Convert base 10 to base 70" return BaseConvert.convert(self, n, 70) if __name__ == '__main__': test()