
Алгоритм Хаффмана
class HuffmanNode:
def __init__(self, char=None, freq=0, left=None, right=None):
self.char = char
self.freq = freq
self.left = left
self.right = right
def __lt__(self, other):
return self.freq < other.freq
def build_frequency_map(data):
freq_map = {}
for byte in data:
freq_map[byte] = freq_map.get(byte, 0) + 1
return freq_map
def build_huffman_tree(freq_map):
nodes = [HuffmanNode(char=char, freq=freq) for char, freq in freq_map.items()]
while len(nodes) > 1:
nodes.sort(key=lambda x: x.freq)
left = nodes.pop(0)
right = nodes.pop(0)
merged = HuffmanNode(freq=left.freq + right.freq, left=left, right=right)
nodes.append(merged)
return nodes[0]
def build_code_table(root, code="", code_table=None):
if code_table is None:
code_table = {}
if root is not None:
if root.char is not None:
code_table[root.char] = code
build_code_table(root.left, code + "0", code_table)
build_code_table(root.right, code + "1", code_table)
return code_table
def huffman_encode(data):
if not data:
return b"", {}, 0
freq_map = build_frequency_map(data)
root = build_huffman_tree(freq_map)
code_table = build_code_table(root)
encoded_bits = "".join(code_table[byte] for byte in data)
padding = (8 - len(encoded_bits) % 8)
encoded_bits += "0" * padding
encoded_bytes = bytearray()
for i in range(0, len(encoded_bits), 8):
byte = encoded_bits[i:i + 8]
encoded_bytes.append(int(byte, 2))
return bytes(encoded_bytes), code_table, padding
def huffman_decode(encoded_data, code_table, padding):
if not encoded_data:
return b""
encoded_bits = "".join(f"{byte:08b}" for byte in encoded_data)
encoded_bits = encoded_bits[:-padding] if padding > 0 else encoded_bits
reverse_code_table = {code: char for char, code in code_table.items()}
decoded_data = bytearray()
current_code = ""
for bit in encoded_bits:
current_code += bit
if current_code in reverse_code_table:
decoded_data.append(reverse_code_table[current_code])
current_code = ""
return bytes(decoded_data)
BWT
def bwt(data, chunk_size):
transformed_data = bytearray()
ind = []
for start in range(0, len(data), chunk_size):
chunk = data[start:start + chunk_size]
index, encoded_chunk = transform_chunk(chunk)
transformed_data.extend(encoded_chunk)
ind.append(index)
return bytes(transformed_data), ind
def transform_chunk(chunk):
rotations = [chunk[i:] + chunk[:i] for i in range(len(chunk))]
rotations.sort()
original_index = rotations.index(chunk)
encoded_chunk = bytes(rotation[-1] for rotation in rotations)
return original_index, encoded_chunk
def bwt_decode(encoded_data, indices, chunk_size):
restored_data = bytearray()
position = 0
index = 0
while position < len(encoded_data):
end = position + chunk_size if position + chunk_size <= len(encoded_data) else len(encoded_data)
chunk = encoded_data[position:end]
original_index = indices[index]
restored_chunk = reverse_transform_chunk(original_index, chunk)
restored_data.extend(restored_chunk)
position = end
index += 1
return bytes(restored_data)
def reverse_transform_chunk(original_index, encoded_chunk):
table = [(char, idx) for idx, char in enumerate(encoded_chunk)]
table.sort()
result = bytearray()
current_row = original_index
for _ in range(len(encoded_chunk)):
char, current_row = table[current_row]
result.append(char)
return bytes(result)
MTF
def mtf_encode(data: bytes) -> bytes:
alphabet = bytearray(range(256))
encoded = bytearray()
for byte in data:
index = alphabet.index(byte)
encoded.append(index)
del alphabet[index]
alphabet.insert(0, byte)
return bytes(encoded)
def mtf_decode(encoded_data: bytes) -> bytes:
alphabet = bytearray(range(256))
decoded = bytearray()
for index in encoded_data:
byte = alphabet[index]
decoded.append(byte)
del alphabet[index]
alphabet.insert(0, byte)
return bytes(decoded)
RLE
def rle_encode(data):
encoded_data = bytearray()
n = len(data)
i = 0
while i < n:
current_char = data[i]
count = 1
while i + count < n and data[i + count] == current_char and count < 127:
count += 1
if count > 1:
encoded_data.append(count)
encoded_data.append(current_char)
i += count
else:
non_repeat_chars = bytearray()
non_repeat_chars.append(current_char)
i += 1
while i < n and (i + 1 >= n or data[i] != data[i + 1]) and len(non_repeat_chars) < 127:
non_repeat_chars.append(data[i])
i += 1
encoded_data.append(0x80 | len(non_repeat_chars))
encoded_data.extend(non_repeat_chars)
return bytes(encoded_data)
def rle_decode(encoded_data):
decoded_data = bytearray()
n = len(encoded_data)
i = 0
while i < n:
control_byte = encoded_data[i]
i += 1
if control_byte & 0x80:
length = control_byte & 0x7F
decoded_data.extend(encoded_data[i:i + length])
i += length
else:
count = control_byte
char = encoded_data[i]
decoded_data.extend([char] * count)
i += 1
return bytes(decoded_data)
LZ77
def lz77_encode(data, window_size=1024, lookahead_buffer_size=16):
compressed = bytearray()
pos = 0
while pos < len(data):
window_start = max(0, pos - window_size)
lookahead_end = min(pos + lookahead_buffer_size, len(data))
best_match = (0, 0)
for i in range(window_start, pos):
match_length = 0
while (pos + match_length < lookahead_end and
i + match_length < pos and
data[i + match_length] == data[pos + match_length]):
match_length += 1
if match_length > best_match[1]:
best_match = (pos - i, match_length)
if best_match[1] >= 3:
offset, length = best_match
next_char = data[pos + length] if pos + length < len(data) else 0
compressed.extend(offset.to_bytes(2, 'big'))
compressed.extend(length.to_bytes(2, 'big'))
compressed.append(next_char)
pos += length + 1
else:
compressed.extend((0).to_bytes(2, 'big')) # Смещение = 0
compressed.extend((0).to_bytes(2, 'big')) # Длина = 0
compressed.append(data[pos])
pos += 1
return bytes(compressed)
def lz77_decode(compressed):
decompressed = bytearray()
pos = 0
while pos < len(compressed):
offset = int.from_bytes(compressed[pos:pos + 2], 'big')
length = int.from_bytes(compressed[pos + 2:pos + 4], 'big')
next_char = compressed[pos + 4]
pos += 5
if offset == 0 and length == 0:
decompressed.append(next_char)
else:
start = len(decompressed) - offset
for i in range(length):
decompressed.append(decompressed[start + i])
decompressed.append(next_char)
return bytes(decompressed)
LZ78
import os
def lz78_encode(data: bytes) -> bytes:
dictionary = {}
output = []
current_string = bytearray()
index = 1
i = 0
while i < len(data):
current_string.append(data[i])
current_bytes = bytes(current_string)
if current_bytes in dictionary:
i += 1
else:
output.append((dictionary.get(bytes(current_string[:-1]), 0), current_string[-1]))
dictionary[current_bytes] = index
index += 1
current_string = bytearray()
i += 1
if current_string:
output.append((dictionary.get(bytes(current_string[:-1]), 0), current_string[-1]))
compressed_data = bytearray()
for pair in output:
index_bytes = pair[0].to_bytes(4, 'big')
char_bytes = bytes([pair[1]])
compressed_data.extend(index_bytes + char_bytes)
return bytes(compressed_data)
def lz78_decode(compressed_data: bytes) -> bytes:
dictionary = {}
output = bytearray()
index = 1
i = 0
while i < len(compressed_data):
index_bytes = compressed_data[i:i + 4]
current_index = int.from_bytes(index_bytes, 'big')
i += 4
char_bytes = compressed_data[i:i + 1]
char = char_bytes[0]
i += 1
if current_index == 0:
output.append(char)
dictionary[index] = bytearray([char])
else:
string_from_dict = dictionary[current_index]
output.extend(string_from_dict)
output.append(char)
dictionary[index] = string_from_dict + bytearray([char])
index += 1
return bytes(output)