# Written by Arno Bakker # see LICENSE.txt for license information """ Reference Implementation of Merkle hash torrent extension, as now standardized in http://www.bittorrent.org/beps/bep_0030.html (yay!) """ from math import log,pow,floor from Tribler.Core.Utilities.Crypto import sha import sys DEBUG = False # External classes class MerkleTree: def __init__(self,piece_size,total_length,root_hash=None,hashes=None): """ Create a Merkle hash tree When creating a .torrent: root_hash is None and hashes is not None When creating an initial seeder: root_hash is None and hashes is not None (root_hash is None to allow comparison with the calculated root hash and the one in the .torrent) When creating a downloader: root_hash is not None and hashes is None """ self.npieces = len2npieces(piece_size,total_length) self.treeheight = get_tree_height(self.npieces) self.tree = create_tree(self.treeheight) if hashes is None: self.root_hash = root_hash else: fill_tree(self.tree,self.treeheight,self.npieces,hashes) # root_hash is None during .torrent generation if root_hash is None: self.root_hash = self.tree[0] else: raise AssertionError, "merkle: if hashes not None, root_hash must be" def get_root_hash(self): return self.root_hash def compare_root_hashes(self,other): return self.root_hash == other def get_hashes_for_piece(self,index): return get_hashes_for_piece(self.tree,self.treeheight,index) def check_hashes(self,hashlist): return check_tree_path(self.root_hash,self.treeheight,hashlist) def update_hash_admin(self,hashlist,piece_hashes): update_hash_admin(hashlist,self.tree,self.treeheight,piece_hashes) def get_piece_hashes(self): """ Get the pieces' hashes from the bottom of the hash tree. Used during a graceful restart of a client that already downloaded stuff. """ return get_piece_hashes(self.tree,self.treeheight,self.npieces) def create_fake_hashes(info): total_length = calc_total_length(info) npieces = len2npieces(info['piece length'],total_length) return ['\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' ] * npieces # Internal functions # Design choice: all algoritmics have been returned into stateless functions, # i.e. they operate on the input parameters only. This to keep them extremely # clear. def len2npieces(piece_size,total_length): npieces = total_length / piece_size if piece_size*npieces < total_length: npieces += 1 return npieces def calc_total_length(info): # Merkle: Calculate total length from .torrent info if info.has_key('length'): return info['length'] # multi-file torrent files = info['files'] total_length = 0 for i in range(0,len(files)): total_length += files[i]['length'] return total_length def get_tree_height(npieces): if DEBUG: print >> sys.stderr,"merkle: number of pieces is",npieces height = log(npieces,2) if height - floor(height) > 0.0: height = int(height)+1 else: height = int(height) if DEBUG: print >> sys.stderr,"merkle: tree height is",height return height def create_tree(height): # Create tree that has enough leaves to hold all hashes treesize = int(pow(2,height+1)-1) # subtract unused tail if DEBUG: print >> sys.stderr,"merkle: treesize",treesize tree = ['\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' ] * treesize return tree def fill_tree(tree,height,npieces,hashes): # 1. Fill bottom of tree with hashes startoffset = int(pow(2,height)-1) if DEBUG: print >> sys.stderr,"merkle: bottom of tree starts at",startoffset for offset in range(startoffset,startoffset+npieces): #print >> sys.stderr,"merkle: copying",offset #print >> sys.stderr,"merkle: hashes[",offset-startoffset,"]=",str(hashes[offset-startoffset]) tree[offset] = hashes[offset-startoffset] # 2. Note that unused leaves are NOT filled. It may be a good idea to fill # them as hashing 0 values may create a security problem. However, the # filler values would have to be known to any initial seeder, otherwise it # will not be able build the same hash tree as the other initial seeders. # Assume anyone should be able to autonomously become a seeder, the filler # must be public info. I don't know whether having public info as filler # instead of 0s is any safer, cryptographically speaking. Hence, we stick # with 0 for the moment # 3. Calculate higher level hashes from leaves for level in range(height,0,-1): if DEBUG: print >> sys.stderr,"merkle: calculating level",level for offset in range(int(pow(2,level)-1),int(pow(2,level+1)-2),2): #print >> sys.stderr,"merkle: data offset",offset [ parentstartoffset, parentoffset ] = get_parent_offset(offset,level) #print >> sys.stderr,"merkle: parent offset",parentoffset data = tree[offset]+tree[offset+1] digester = sha() digester.update(data) digest = digester.digest() tree[parentoffset] = digest #for offset in range(0,treesize-1): # print offset,"HASH",str(tree[offset]) return tree def get_hashes_for_piece(tree,height,index): startoffset = int(pow(2,height)-1) myoffset = startoffset+index if DEBUG: print >> sys.stderr,"merkle: myoffset",myoffset # 1. Add piece's own hash hashlist = [ [myoffset,tree[myoffset]] ] # 2. Add hash of piece's sibling, left or right if myoffset % 2 == 0: siblingoffset = myoffset-1 else: siblingoffset = myoffset+1 if DEBUG: print >> sys.stderr,"merkle: siblingoffset",siblingoffset if siblingoffset != -1: hashlist.append([siblingoffset,tree[siblingoffset]]) # 3. Add hashes of uncles uncleoffset = myoffset for level in range(height,0,-1): uncleoffset = get_uncle_offset(uncleoffset,level) if DEBUG: print >> sys.stderr,"merkle: uncleoffset",uncleoffset hashlist.append( [uncleoffset,tree[uncleoffset]] ) return hashlist def check_tree_path(root_hash,height,hashlist): """ The hashes should be in the right order in the hashlist, otherwise the peer will be kicked. The hashlist parameter is assumed to be of the right type, and contain values of the right type as well. The exact values should be checked for validity here. """ maxoffset = int(pow(2,height+1)-2) mystartoffset = int(pow(2,height)-1) i=0 a = hashlist[i] if a[0] < 0 or a[0] > maxoffset: return False i += 1 b = hashlist[i] if b[0] < 0 or b[0] > maxoffset: return False i += 1 myindex = a[0]-mystartoffset sibindex = b[0]-mystartoffset for level in range(height,0,-1): if DEBUG: print >> sys.stderr,"merkle: checking level",level a = check_fork(a,b,level) b = hashlist[i] if b[0] < 0 or b[0] > maxoffset: return False i += 1 if DEBUG: print >> sys.stderr,"merkle: ROOT HASH",`str(root_hash)`,"==",`str(a[1])` if a[1] == root_hash: return True else: return False def update_hash_admin(hashlist,tree,height,hashes): mystartoffset = int(pow(2,height)-1) for i in range(0,len(hashlist)): if i < 2: # me and sibling real hashes of piece data, save them index = hashlist[i][0]-mystartoffset # ignore siblings that are just tree filler if index < len(hashes): if DEBUG: print >> sys.stderr,"merkle: update_hash_admin: saving hash of",index hashes[index] = hashlist[i][1] # put all hashes in tree, such that we incrementally learn it # and can pass them on to others tree[hashlist[i][0]] = hashlist[i][1] def check_fork(a,b,level): myoffset = a[0] siblingoffset = b[0] if myoffset > siblingoffset: data = b[1]+a[1] if DEBUG: print >> sys.stderr,"merkle: combining",siblingoffset,myoffset else: data = a[1]+b[1] if DEBUG: print >> sys.stderr,"merkle: combining",myoffset,siblingoffset digester = sha() digester.update(data) digest = digester.digest() [parentstartoffset, parentoffset ] = get_parent_offset(myoffset,level-1) return [parentoffset,digest] def get_parent_offset(myoffset,level): parentstartoffset = int(pow(2,level)-1) mystartoffset = int(pow(2,level+1)-1) parentoffset = parentstartoffset + (myoffset-mystartoffset)/2 return [parentstartoffset, parentoffset] def get_uncle_offset(myoffset,level): if level == 1: return 0 [parentstartoffset,parentoffset ] = get_parent_offset(myoffset,level-1) if DEBUG: print >> sys.stderr,"merkle: parent offset",parentoffset parentindex = parentoffset-parentstartoffset if parentoffset % 2 == 0: uncleoffset = parentoffset-1 else: uncleoffset = parentoffset+1 return uncleoffset def get_piece_hashes(tree,height,npieces): startoffset = int(pow(2,height)-1) hashes = ['\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' ] * npieces for offset in range(startoffset,startoffset+npieces): hashes[offset-startoffset] = tree[offset] return hashes