Добавил:
Опубликованный материал нарушает ваши авторские права? Сообщите нам.
Вуз: Предмет: Файл:
лаба Алгоритмы сжатия без потерь - 4 сем.docx
Скачиваний:
0
Добавлен:
08.07.2025
Размер:
454.72 Кб
Скачать

Алгоритм Хаффмана

class HuffmanNode:

def __init__(self, char=None, freq=0, left=None, right=None):

self.char = char

self.freq = freq

self.left = left

self.right = right

def __lt__(self, other):

return self.freq < other.freq

def build_frequency_map(data):

freq_map = {}

for byte in data:

freq_map[byte] = freq_map.get(byte, 0) + 1

return freq_map

def build_huffman_tree(freq_map):

nodes = [HuffmanNode(char=char, freq=freq) for char, freq in freq_map.items()]

while len(nodes) > 1:

nodes.sort(key=lambda x: x.freq)

left = nodes.pop(0)

right = nodes.pop(0)

merged = HuffmanNode(freq=left.freq + right.freq, left=left, right=right)

nodes.append(merged)

return nodes[0]

def build_code_table(root, code="", code_table=None):

if code_table is None:

code_table = {}

if root is not None:

if root.char is not None:

code_table[root.char] = code

build_code_table(root.left, code + "0", code_table)

build_code_table(root.right, code + "1", code_table)

return code_table

def huffman_encode(data):

if not data:

return b"", {}, 0

freq_map = build_frequency_map(data)

root = build_huffman_tree(freq_map)

code_table = build_code_table(root)

encoded_bits = "".join(code_table[byte] for byte in data)

padding = (8 - len(encoded_bits) % 8)

encoded_bits += "0" * padding

encoded_bytes = bytearray()

for i in range(0, len(encoded_bits), 8):

byte = encoded_bits[i:i + 8]

encoded_bytes.append(int(byte, 2))

return bytes(encoded_bytes), code_table, padding

def huffman_decode(encoded_data, code_table, padding):

if not encoded_data:

return b""

encoded_bits = "".join(f"{byte:08b}" for byte in encoded_data)

encoded_bits = encoded_bits[:-padding] if padding > 0 else encoded_bits

reverse_code_table = {code: char for char, code in code_table.items()}

decoded_data = bytearray()

current_code = ""

for bit in encoded_bits:

current_code += bit

if current_code in reverse_code_table:

decoded_data.append(reverse_code_table[current_code])

current_code = ""

return bytes(decoded_data)

BWT

def bwt(data, chunk_size):

transformed_data = bytearray()

ind = []

for start in range(0, len(data), chunk_size):

chunk = data[start:start + chunk_size]

index, encoded_chunk = transform_chunk(chunk)

transformed_data.extend(encoded_chunk)

ind.append(index)

return bytes(transformed_data), ind

def transform_chunk(chunk):

rotations = [chunk[i:] + chunk[:i] for i in range(len(chunk))]

rotations.sort()

original_index = rotations.index(chunk)

encoded_chunk = bytes(rotation[-1] for rotation in rotations)

return original_index, encoded_chunk

def bwt_decode(encoded_data, indices, chunk_size):

restored_data = bytearray()

position = 0

index = 0

while position < len(encoded_data):

end = position + chunk_size if position + chunk_size <= len(encoded_data) else len(encoded_data)

chunk = encoded_data[position:end]

original_index = indices[index]

restored_chunk = reverse_transform_chunk(original_index, chunk)

restored_data.extend(restored_chunk)

position = end

index += 1

return bytes(restored_data)

def reverse_transform_chunk(original_index, encoded_chunk):

table = [(char, idx) for idx, char in enumerate(encoded_chunk)]

table.sort()

result = bytearray()

current_row = original_index

for _ in range(len(encoded_chunk)):

char, current_row = table[current_row]

result.append(char)

return bytes(result)

MTF

def mtf_encode(data: bytes) -> bytes:

alphabet = bytearray(range(256))

encoded = bytearray()

for byte in data:

index = alphabet.index(byte)

encoded.append(index)

del alphabet[index]

alphabet.insert(0, byte)

return bytes(encoded)

def mtf_decode(encoded_data: bytes) -> bytes:

alphabet = bytearray(range(256))

decoded = bytearray()

for index in encoded_data:

byte = alphabet[index]

decoded.append(byte)

del alphabet[index]

alphabet.insert(0, byte)

return bytes(decoded)

RLE

def rle_encode(data):

encoded_data = bytearray()

n = len(data)

i = 0

while i < n:

current_char = data[i]

count = 1

while i + count < n and data[i + count] == current_char and count < 127:

count += 1

if count > 1:

encoded_data.append(count)

encoded_data.append(current_char)

i += count

else:

non_repeat_chars = bytearray()

non_repeat_chars.append(current_char)

i += 1

while i < n and (i + 1 >= n or data[i] != data[i + 1]) and len(non_repeat_chars) < 127:

non_repeat_chars.append(data[i])

i += 1

encoded_data.append(0x80 | len(non_repeat_chars))

encoded_data.extend(non_repeat_chars)

return bytes(encoded_data)

def rle_decode(encoded_data):

decoded_data = bytearray()

n = len(encoded_data)

i = 0

while i < n:

control_byte = encoded_data[i]

i += 1

if control_byte & 0x80:

length = control_byte & 0x7F

decoded_data.extend(encoded_data[i:i + length])

i += length

else:

count = control_byte

char = encoded_data[i]

decoded_data.extend([char] * count)

i += 1

return bytes(decoded_data)

LZ77

def lz77_encode(data, window_size=1024, lookahead_buffer_size=16):

compressed = bytearray()

pos = 0

while pos < len(data):

window_start = max(0, pos - window_size)

lookahead_end = min(pos + lookahead_buffer_size, len(data))

best_match = (0, 0)

for i in range(window_start, pos):

match_length = 0

while (pos + match_length < lookahead_end and

i + match_length < pos and

data[i + match_length] == data[pos + match_length]):

match_length += 1

if match_length > best_match[1]:

best_match = (pos - i, match_length)

if best_match[1] >= 3:

offset, length = best_match

next_char = data[pos + length] if pos + length < len(data) else 0

compressed.extend(offset.to_bytes(2, 'big'))

compressed.extend(length.to_bytes(2, 'big'))

compressed.append(next_char)

pos += length + 1

else:

compressed.extend((0).to_bytes(2, 'big')) # Смещение = 0

compressed.extend((0).to_bytes(2, 'big')) # Длина = 0

compressed.append(data[pos])

pos += 1

return bytes(compressed)

def lz77_decode(compressed):

decompressed = bytearray()

pos = 0

while pos < len(compressed):

offset = int.from_bytes(compressed[pos:pos + 2], 'big')

length = int.from_bytes(compressed[pos + 2:pos + 4], 'big')

next_char = compressed[pos + 4]

pos += 5

if offset == 0 and length == 0:

decompressed.append(next_char)

else:

start = len(decompressed) - offset

for i in range(length):

decompressed.append(decompressed[start + i])

decompressed.append(next_char)

return bytes(decompressed)

LZ78

import os

def lz78_encode(data: bytes) -> bytes:

dictionary = {}

output = []

current_string = bytearray()

index = 1

i = 0

while i < len(data):

current_string.append(data[i])

current_bytes = bytes(current_string)

if current_bytes in dictionary:

i += 1

else:

output.append((dictionary.get(bytes(current_string[:-1]), 0), current_string[-1]))

dictionary[current_bytes] = index

index += 1

current_string = bytearray()

i += 1

if current_string:

output.append((dictionary.get(bytes(current_string[:-1]), 0), current_string[-1]))

compressed_data = bytearray()

for pair in output:

index_bytes = pair[0].to_bytes(4, 'big')

char_bytes = bytes([pair[1]])

compressed_data.extend(index_bytes + char_bytes)

return bytes(compressed_data)

def lz78_decode(compressed_data: bytes) -> bytes:

dictionary = {}

output = bytearray()

index = 1

i = 0

while i < len(compressed_data):

index_bytes = compressed_data[i:i + 4]

current_index = int.from_bytes(index_bytes, 'big')

i += 4

char_bytes = compressed_data[i:i + 1]

char = char_bytes[0]

i += 1

if current_index == 0:

output.append(char)

dictionary[index] = bytearray([char])

else:

string_from_dict = dictionary[current_index]

output.extend(string_from_dict)

output.append(char)

dictionary[index] = string_from_dict + bytearray([char])

index += 1

return bytes(output)