Добавил:
надеюсь это добро кому-то поможет Опубликованный материал нарушает ваши авторские права? Сообщите нам.
Вуз: Предмет: Файл:
лаба 1 4 сем сжатие без потерь.docx
Скачиваний:
0
Добавлен:
09.07.2025
Размер:
1 Mб
Скачать

Коды программ

Алгоритм Хаффмана

class Node:

def __init__(self, char, freq):

self.char = char

self.freq = freq

self.left = None

self.right = None

def __lt__(self, other):

return self.freq < other.freq

def build_huffman_tree(text: bytes):

frequency = defaultdict(int)

for byte in text:

frequency[byte] += 1

queue_p = [Node(byte, freq) for byte, freq in frequency.items()]

heapq.heapify(queue_p)

while len(queue_p) > 1:

left = heapq.heappop(queue_p)

right = heapq.heappop(queue_p)

merged = Node(None, left.freq + right.freq)

merged.left = left

merged.right = right

heapq.heappush(queue_p, merged)

return queue_p[0]

def generate_codes(node, code=0, bit_length=0, codes={}):

if node is not None:

if node.char is not None:

codes[node.char] = (code, bit_length)

generate_codes(node.left, (code << 1), bit_length + 1, codes)

generate_codes(node.right, (code << 1) | 1, bit_length + 1, codes)

return codes

def huffman_encode(text: bytes):

root = build_huffman_tree(text)

codes = generate_codes(root)

encoded_bits = []

for byte in text:

code, length = codes[byte]

for i in reversed(range(length)):

encoded_bits.append((code >> i) & 1)

return encoded_bits, codes

def huffman_decode(encoded_bits, codes):

reverse_codes = {v: k for k, v in codes.items()}

code = 0

bit_length = 0

decoded_bytes = bytearray()

for bit in encoded_bits:

code = (code << 1) | bit

bit_length += 1

if (code, bit_length) in reverse_codes:

decoded_bytes.append(reverse_codes[(code, bit_length)])

code = 0

bit_length = 0

return bytes(decoded_bytes)

MTF

def mtf_encode(data: bytes) -> list:

symbol_table = list(range(256))

encoded = []

for byte in data:

index = symbol_table.index(byte)

encoded.append(index)

symbol_table.pop(index)

symbol_table.insert(0, byte)

return encoded

def mtf_decode(encoded: list) -> bytes:

symbol_table = list(range(256))

decoded = bytearray()

for index in encoded:

byte = symbol_table[index]

decoded.append(byte)

symbol_table.pop(index)

symbol_table.insert(0, byte)

return bytes(decoded)

RLE

def rle_encode(data: bytes, M: int) -> bytes:

if M not in [8, 16, 24]:

raise ValueError("M должно быть 8, 16 или 24.")

encoded = bytearray()

i = 0

n = len(data)

while i < n:

if M == 8:

symb = data[i]

symb_size = 1

elif M == 16:

if i + 1 >= n:

raise ValueError("Недостаточно данных 16.")

symb = data[i:i + 2]

symb_size = 2

elif M == 24:

if i + 2 >= n:

raise ValueError("Недостаточно данных 24.")

symb = data[i:i + 3]

symb_size = 3

count = 1

while i + count * symb_size < n and data[i + count * symb_size:i + (count + 1) * symb_size] == symb and count < 127:

count += 1

if count > 1:

encoded.append(count)

encoded.extend(symb)

i += count * symb_size

else:

start = i

while i < n and (i + symb_size >= n or data[i:i + symb_size] != data[i + symb_size:i + 2 * symb_size]) and (i - start) // symb_size < 127:

i += symb_size

length = (i - start) // symb_size

encoded.append(0x80 | length)

encoded.extend(data[start:i])

return bytes(encoded)

def rle_decode(encoded: bytes, M: int) -> bytes:

if M not in [8, 16, 24]:

raise ValueError("M должно быть 8, 16 или 24.")

decoded = bytearray()

i = 0

n = len(encoded)

while i < n:

control_byte = encoded[i]

i += 1

if control_byte & 0x80:

length = control_byte & 0x7F

if M == 8:

decoded.extend(encoded[i:i + length])

i += length

elif M == 16:

decoded.extend(encoded[i:i + 2 * length])

i += 2 * length

elif M == 24:

decoded.extend(encoded[i:i + 3 * length])

i += 3 * length

else:

count = control_byte

if M == 8:

symb = encoded[i]

decoded.extend([symb] * count)

i += 1

elif M == 16:

symb = encoded[i:i + 2]

decoded.extend(symb * count)

i += 2

elif M == 24:

symb = encoded[i:i + 3]

decoded.extend(symb * count)

i += 3

return bytes(decoded)

def detect_m(data: bytes) -> int:

n = len(data)

if n % 2 == 0:

is_16_bit = True

for i in range(0, n, 2):

if not (0x0000 <= int.from_bytes(data[i:i + 2], 'big') <= 0xFFFF):

is_16_bit = False

break

if is_16_bit:

return 16

if n % 3 == 0:

is_24_bit = True

for i in range(0, n, 3):

if not (0x000000 <= int.from_bytes(data[i:i + 3], 'big') <= 0xFFFFFF):

is_24_bit = False

break

if is_24_bit:

return 24

return 8

Алгоритм Барроуза-Уиллера для работы с блоками

def encode_bwt(origin_data, block_size):

indices = []

data = bytearray()

for i in range(0, len(origin_data), block_size):

block = origin_data[i:i + block_size]

rotations = sorted((block[j:] + block[:j], j) for j in range(len(block)))

index = next(j for j, (rot, _) in enumerate(rotations) if rot == block)

encoded_block = bytes(rot[0][-1] for rot in rotations)

data.extend(encoded_block)

indices.append(index)

return bytes(data), indices

def decode_bwt(encoded_data, indices, block_size):

restored_data = bytearray()

for i in range(len(indices)):

start = i * block_size

end = min(start + block_size, len(encoded_data))

block = encoded_data[start:end]

original_ind = indices[i]

row = original_ind

result = bytearray()

table = sorted((char, idx) for idx, char in enumerate(block))

for _ in range(len(block)):

char, row = table[row]

result.append(char)

restored_data.extend(result)

return bytes(restored_data)

LZ77

def lz77_compress(input_data, window_size=2048, buffer_size=64):

data_len = len(input_data)

output = []

window = deque(maxlen=window_size)

ind = 0

while ind < data_len:

length_l,pos_l = 0,0

for st in range(max(0, ind - window_size), ind):

lenght = 0

while lenght < buffer_size and ind + lenght < data_len and input_data[

st + lenght] == input_data[ind + lenght]:

lenght += 1

if lenght > length_l:

length_l = lenght

pos_l = ind - st

if length_l >= 3:

if ind + length_l < data_len:

output.append((pos_l, length_l, input_data[ind + length_l]))

ind += length_l + 1

else:

output.append((0, 0, input_data[ind]))

ind += 1

else:

output.append((0, 0, input_data[ind]))

ind += 1

window.append(input_data[ind - 1])

res = bytearray()

for pos, length, char in output:

res.extend(pos.to_bytes(2, 'big'))

res.extend(length.to_bytes(2, 'big'))

res.append(char)

return bytes(res)

def decode_LZ77(data):

ind = 0

decode = bytearray()

data_length = len(data)

while ind < data_length:

position, lenght = int.from_bytes(data[ind:ind + 2], 'big'),int.from_bytes(data[ind + 2:ind + 4], 'big')

byte = data[ind + 4]

if position == 0 and lenght == 0:

decode.append(byte)

else:

st_ind = len(decode) - position

for _ in range(lenght):

decode.append(decode[st_ind])

st_ind += 1

decode.append(byte)

ind += 5

return bytes(decode)

LZ78

def lz78_compress(input_filename: str, output_filename: str):

with open(input_filename, 'rb') as input_file:

data = input_file.read()

dictionary = {}

output = []

str_curr = bytearray()

ind = 1

i = 0

while i < len(data):

str_curr.append(data[i])

bytes_curr = bytes(str_curr)

if bytes_curr in dictionary:

i += 1

else:

output.append((dictionary.get(bytes(str_curr[:-1]), 0), str_curr[-1]))

dictionary[bytes_curr] = ind

ind += 1

str_curr = bytearray()

i += 1

if str_curr:

output.append((dictionary.get(bytes(str_curr[:-1]), 0), str_curr[-1]))

with open(output_filename, 'wb') as output_file:

for pair in output:

ind_bytes = pair[0].to_bytes(4, 'big')

char_bytes = bytes([pair[1]])

output_file.write(ind_bytes + char_bytes)

print(f"Сжатые данные записаны в {output_filename}")

original_size = os.path.getsize(input_filename)

compressed_size = os.path.getsize(output_filename)

print("original_size", original_size)

print("compressed_size", compressed_size)

compression_ratio = original_size / compressed_size

print(f"Коэффициент сжатия: {compression_ratio:.3f}")

def lz78_decompress(input_filename: str, output_filename: str):

with open(input_filename, 'rb') as input_file:

compressed_data = input_file.read()

dictionary = {}

output = bytearray()

index = 1

i = 0

while i < len(compressed_data):

index_bytes = compressed_data[i:i + 4]

current_index = int.from_bytes(index_bytes, 'big')

i += 4

char_bytes = compressed_data[i:i + 1]

char = char_bytes[0]

i += 1

if current_index == 0:

output.append(char)

dictionary[index] = bytearray([char])

else:

str_dict = dictionary[current_index]

output.extend(str_dict)

output.append(char)

dictionary[index] = str_dict + bytearray([char])

index += 1

with open(output_filename, 'wb') as output_file:

output_file.write(output)

print(f"Данные записаны в {output_filename}")

Ссылка на гитхаб: