Please note, this is a STATIC archive of website www.tutorialspoint.com from 11 May 2019, cach3.com does not collect or store any user information, there is no "phishing" involved.
Tutorialspoint

huffmanowo

from time import time

class BinTree:
    def __init__(self, key, left, right):
        """
        Init Tree
        """
        self.key = key
        self.left = left
        self.right = right

class Heap:
    def __init__(self):
        """Init heap."""

        self.elts = [None]
            

    def isempty(self):
        """Check whether heap is empty.

        Returns:
            bool: True if heap is empty, False otherwise.

        """
        return len(self.elts) == 1

    def push(self, x):
        """Add an element to the heap.

        Args:
            x (value, elt): pair to enqueue.

        Returns:
            Heap: The updated heap.

        """
        self.elts.append(x)
        i = len(self.elts)-1
        while (i > 1) and x[0] < self.elts[i//2][0]:
            (self.elts[i], self.elts[i//2]) = (self.elts[i//2], self.elts[i])
            i = i // 2
        return self
    
    def pop(self):
        """Remove and return first element from the heap.

        Returns:
            (num, any): Element from the queue.

        Raises:
            IndexError: If heap is empty.

        """
        e = self.elts[1]
        self.elts[1] = self.elts[len(self.elts)-1]
        self.elts.pop()
        n = len(self.elts)-1
        ok = False
        i = 1    
        while (i <= n // 2) and not ok:
            j = 2 * i
            if (j + 1 <= n) and (self.elts[j+1][0] < self.elts[j][0]):
                j = j + 1
            if self.elts[i][0] > self.elts[j][0]:
                (self.elts[i], self.elts[j]) = (self.elts[j], self.elts[i])
                i = j
            else:
                ok = True
        return e


###############################################################################
# Do not change anything above this line, except your login!
# Do not add any import


###############################################################################
## COMPRESSION

def buildfrequencylist(dataIN):
	"""
	Builds a tuple list of the character frequencies in the input.
	"""
	# Check for NoneType and empty input
	if not dataIN: return []

	L = []
	l = 0  # List length
	for c in dataIN:
		i = 0
		elt = None  # Define elt before using it
		while i < l and c != elt:
			(val, elt) = L[i]
			if c == elt:
				L[i] = (val + 1, elt)  # Increase indexed character count
			else:
				i += 1  # Check next index

		if i >= l:
			L.append((1, c))  # Add new character
			l += 1  # Increase list length value

	return L


def buildHuffmantree(inputList):
	"""
	Processes the frequency list into a Huffman tree according to the algorithm.
	"""
	# Check for NoneType and empty list
	if not inputList: return None

	H = Heap()  # Heap of valued BinTrees
	for (val, elt) in inputList:
		H.push((val, BinTree(elt, None, None)))

	# Merge BinTrees while there are multiple
	while len(H.elts) > 2:
		# Pop the two smallest
		(smallerVal, smallerBinTree) = H.pop()
		(smallVal, smallBinTree) = H.pop()

		# Push merged BinTree
		H.push((smallVal + smallerVal, BinTree(None, smallBinTree, smallerBinTree)))

	# Check for empty heap
	if H.isempty(): return None

	# Return fully merged BinTree
	return H.pop()[1]


def encodedata(huffmanTree, dataIN):
	"""
	Encodes the input string to its binary string representation.
	"""
	# Check for NoneType
	if not huffmanTree: raise Exception("Empty BinTree")

	# Check for NoneType and empty string
	if not dataIN: return ""

	stack = [(huffmanTree, "")]  # (BinTree, path) list
	L = []  # Path list: (character, path) list

	while len(stack) > 0:
		(B, path) = stack.pop()
		if B.key: L.append((B.key, path))  # x has been found
		if B.left: stack.append((B.left, path + "0"))
		if B.right: stack.append((B.right, path + "1"))

	fullpath = ""
	l = len(L)  # Path list length
	for c in dataIN:
		# Search for character path
		path = None
		i = 0
		while i < l and not path:
			if L[i][0] == c: path = L[i][1]
			i += 1

		# Verify path
		if not path: raise Exception("Path to '" + x + "' not found")
		fullpath += path
	return fullpath


def encodetree(huffmanTree):
	"""
	Encodes a huffman tree to its binary representation using a preOrder traversal:
		* each leaf key is encoded into its binary representation on 8 bits preceded by '1'
		* each time we go left we add a '0' to the result
	"""
	# Check for NoneType
	if not huffmanTree: return ""

	def prefixEncodeTree(B, L):
		if not B: return
		if not B.left and not B.right:
			L.append(B.key); return
		L.append(None)
		prefixEncodeTree(B.left, L)
		prefixEncodeTree(B.right, L)

	# Self-made Stack (basically)
	L = []
	prefixEncodeTree(huffmanTree, L)

	b = ""
	for e in L:
		if not e:
			b += "0"
		else:
			b += "1" + __dec2bin(ord(e))
	return b

def it_encodetree(huffmanTree):
	"""
	Encodes a huffman tree to its binary representation using a preOrder traversal:
		* each leaf key is encoded into its binary representation on 8 bits preceded by '1'
		* each time we go left we add a '0' to the result
	"""
	# Check for NoneType
	if not huffmanTree: return ""

	b = ""
	stack = []
	B = huffmanTree
	l = 0  # List length
	while B or l > 0:
		if not B:
			B = stack.pop()
			l -= 1

		if B.key: b += "1" + __dec2bin(ord(B.key))
		else: b += "0"

		if B.right:
			stack.append(B.right)
			l += 1

		B = B.left

	return b


def tobinary(dataIN):
	"""
	Compresses a string containing binary code to its real binary value.
	"""
	i = 0
	li = len(dataIN)
	s = ""
	rs = ""
	while i < li:
		if i % 8 == 0 and s:
			rs += chr(__bin2dec(s))
			s = ""
		s += dataIN[i]
		i += 1

	align = 8 - len(s)
	for i in range(align):
		s = "0" + s
	rs += chr(__bin2dec(s))

	return rs, align


def compress(dataIn):
	"""
	The main function that makes the whole compression process.
	"""
	# Build Huffman tree
	L = buildfrequencylist(dataIn)
	H = buildHuffmantree(L)

	# Encode data & Huffman tree
	data = encodedata(H, dataIn)
	tree = encodetree(H)

	return tobinary(data), tobinary(tree)


################################################################################
## DECOMPRESSION

def decodedata(huffmanTree, dataIN):
	"""
	Decode a string using the corresponding huffman tree into something more readable.
	"""
	# Check for NoneType
	if not huffmanTree: raise Exception("Empty Huffman tree")
	# Check for empty string
	if not dataIN: return ""

	def decodecharacterat(H, i):
		if not H: raise Exception("Path not found in Huffman tree")
		if H.key: return H.key, i

		li = len(dataIN)
		if i >= li: raise Exception("Incomplete path to character")

		if dataIN[i] == '0': return decodecharacterat(H.left, i + 1)
		if dataIN[i] == '1': return decodecharacterat(H.right, i + 1)
		raise Exception("Input string should only contain '0' and '1' characters")

	i = 0
	li = len(dataIN)
	rs = ""
	while i < li:
		(s, i) = decodecharacterat(huffmanTree, i)
		rs += s
	return rs


def decodetree(dataIN):
	"""
	Decodes a huffman tree from its binary representation:
		* a '0' means we add a new internal node and go to its left node
		* a '1' means the next 8 values are the encoded character of the current leaf
	"""
	# Check for empty string
	if not dataIN: return ""

	L = []

	i = 0
	li = len(dataIN)
	rs = ""
	while i < li:
		if dataIN[i] == '0': L.append(None)
		elif dataIN[i] == '1':
			# Check for "1########" (9 characters-long)
			if li - i < 9: raise Exception("Syntax error")
			s = ""
			for i in range(i + 1, i + 9):
				s += dataIN[i]
			L.append(chr(__bin2dec(s)))
		else: raise Exception("Input string should only contain '0' and '1' characters")
		i += 1

	li = len(L)
	def buildTree(L, i=0):
		if i >= li: return None, li
		B = BinTree(None, None, None)
		if L[i]:
			B.key = L[i]
		else:
			(B.left, i) = buildTree(L, i+1)
			(B.right, i) = buildTree(L, i+1)
		return B, i

	return buildTree(L)[0]


def frombinary(dataIN, align):
	"""
	Retrieve a string containing binary code from its real binary value (inverse of :func:`toBinary`).
	"""
	# Check for empty input data
	if not dataIN: return ""

	binarystr = ""
	lDataIN = len(dataIN) - 1
	for i in range(0, lDataIN):
		binarystr += __dec2bin(ord(dataIN[i]))

	lastbinary = __dec2bin(ord(dataIN[lDataIN]))
	for j in range(align, len(lastbinary)):
		binarystr += lastbinary[j]

	return binarystr


def decompress(data, dataAlign, tree, treeAlign):
	"""
	The whole decompression process.
	"""
	# Decompress Huffman tree & data
	enTree = frombinary(tree, treeAlign)
	enData = frombinary(data, dataAlign)

	# Decode Huffman tree & data
	deTree = decodetree(enTree)
	deData = decodedata(deTree, enData)

	# Return decoded data
	return deData


################################################################################
## ADDITIONAL FUNCTIONS

def __dec2bin(x):
	n = ""
	while x >= 1:
		n = ("0" if x % 2 == 0 else "1") + n
		x //= 2

	# Force output string length to 8 (fills with "0")
	l = len(n)
	while l < 8:
		n = "0" + n
		l += 1

	return n


def __bin2dec(x):
	x = int(x)
	if x < 0: raise Exception("Invalid binary")

	n = 0
	w = 1
	while x > 0:
		mod = x % 10
		if mod == 1: n += w
		elif mod > 1: raise Exception("Not binary")
		# else (mod == 0): n += 0

		x //= 10
		w *= 2
	return n


original = "Lorem ipsum dolor sit amet, consectetur adipiscing elit. Quisque elementum quam diam, nec pharetra velit gravida ac. Quisque molestie efficitur nisl, auctor congue urna tempus in. Nullam risus felis, sollicitudin sit amet magna ultrices, consectetur cursus nisi. Mauris luctus leo dui, in rutrum mauris sodales non. Morbi laoreet purus et nulla elementum, et fermentum purus posuere. Etiam id porttitor odio. Mauris porttitor enim eu justo cursus, ac efficitur lacus pretium. Nulla eu enim quis metus fermentum suscipit. Etiam vel est in odio suscipit pretium. Donec gravida libero urna, vitae gravida massa aliquam fermentum. Nam orci ante, varius non purus eu, convallis tempus sem.\
Aenean euismod accumsan nunc, ac tincidunt odio interdum sit amet. Aliquam sit amet metus sem. Maecenas a vehicula ex, eu congue risus. Proin laoreet auctor porttitor. Interdum et malesuada fames ac ante ipsum primis in faucibus. Aenean ut vulputate lacus, id condimentum magna. Maecenas ultricies nec velit et amet."

print(original)

a = time()
compressed = compress(original)
print(time() - a)

b = time()
uncompressed = decompress(compressed[0][0], compressed[0][1], compressed[1][0], compressed[1][1])
print(time() - b)

print(uncompressed)

Advertisements
Loading...

We use cookies to provide and improve our services. By using our site, you consent to our Cookies Policy.