- •Теоретическая часть
- •Переход в цветовое пространство yCbCr из rgb
- •Даунсэмплинг матрицы цветового канала
- •Разбиение изображения на блоки NxN
- •Прямое и обратное dct-II 2d для блока размера NxN
- •Изменения матрицы квантования в зависимости от уровня сжатия Квантование и обратное преобразование матрицы дкт по заданной матрице квантования
- •Зигзаг обход матрицы NxN
- •Разностное кодирование dc коэффициентов
- •Переменное кодирование разностей dc и ac коэффициентов
- •Rle кодирование ac коэффициентов
- •Кодирование разностей dc коэффициентов и последовательностей Run/Size по таблице кодов Хаффмана и упаковки результата в байтовую строку
- •Практическая часть
- •Код программы
Код программы
DC_HUFFMAN_TABLE = {0: '00',...11: '111111110',}
DC_HUFFMAN_DECODE = {v: k for k, v in DC_HUFFMAN_TABLE.items()}
AC_HUFFMAN_TABLE = {
(0, 0): '1010', # EOB....(15, 0): '11111111001', # ZRL }
AC_HUFFMAN_DECODE = {v: k for k, v in AC_HUFFMAN_TABLE.items()}
def rgb_to_ycbcr(img):
img = img.astype(np.float32)
R, G, B = img[..., 0], img[..., 1], img[..., 2]
Y = 0.299 * R + 0.587 * G + 0.114 * B
Cb = -0.168736 * R - 0.331264 * G + 0.5 * B + 128
Cr = 0.5 * R - 0.418688 * G - 0.081312 * B + 128
return np.stack((Y, Cb, Cr), axis=-1)
def ycbcr_to_rgb(img):
Y, Cb, Cr = img[..., 0], img[..., 1], img[..., 2]
Cb -= 128
Cr -= 128
R = Y + 1.402 * Cr
G = Y - 0.344136 * Cb - 0.714136 * Cr
B = Y + 1.772 * Cb
return np.clip(np.stack((R, G, B), axis=-1), 0, 255).astype(np.uint8)
def downsample_420(channel):
H, W = channel.shape
H = H - (H % 2)
W = W - (W % 2)
channel = channel[:H, :W]
return (channel[0::2, 0::2] +channel[1::2, 0::2] + channel[0::2, 1::2] +channel[1::2, 1::2]) /4
def upsample_420(channel, shape): return np.repeat(np.repeat(channel, 2, axis=0), 2, axis=1)[:shape[0], :shape[1]]
def create_dct_matrix(N):
D = np.zeros((N, N))
for k in range(N):
for n in range(N):
alpha = np.sqrt(1 / N) if k == 0 else np.sqrt(2 / N)
D[k, n] = alpha * np.cos((np.pi * (2 * n + 1) * k) / (2 * N))
return D
def dct2(block, D): return D @ block @ D.T
def idct2(block, D): return D.T @ block @ D
def get_quant_matrix(N, quality, chrominance=False):
Q_chrom_base = np.array([
[17,18,24,47,99,99,99,99],
[18,21,26,66,99,99,99,99],
[24,26,56,99,99,99,99,99],
[47,66,99,99,99,99,99,99],
[99,99,99,99,99,99,99,99],
[99,99,99,99,99,99,99,99],
[99,99,99,99,99,99,99,99],
[99,99,99,99,99,99,99,99] ])
Q_base_luminance = np.array([
[16,11,10,16,24,40,51,61],
[12,12,14,19,26,58,60,55],
[14,13,16,24,40,57,69,56],
[14,17,22,29,51,87,80,62],
[18,22,37,56,68,109,103,77],
[24,35,55,64,81,104,113,92],
[49,64,78,87,103,121,120,101],
[72,92,95,98,112,100,103,99]])
Q_base = Q_chrom_base if chrominance else Q_base_luminance
if quality <= 0: quality = 1
if N != 8: return np.ones((N, N)) * (quality if quality > 0 else 1)
scale = (5000 / quality) if quality < 50 else (200 - 2 * quality)
return np.clip((Q_base * scale + 50) // 100, 1, 255)
def quantize(block, Q): return np.round(block / Q)
def dequantize(block, Q): return block * Q
def zigzag(block):
h, w = block.shape
result = []
for s in range(h + w - 1):
for y in range(s + 1):
x = s - y
if y < h and x < w:
result.append(block[y, x] if s % 2 == 0 else block[x, y])
return np.array(result)
def inverse_zigzag(arr, N):
block = np.zeros((N, N))
i = 0
for s in range(2 * N - 1):
for y in range(s + 1):
x = s - y
if y < N and x < N:
if s % 2 == 0:
block[y, x] = arr[i]
else:
block[x, y] = arr[i]
i += 1
return block
def differential_encode_dc(dc_values):
diffs = [dc_values[0]]
for i in range(1, len(dc_values)):
diffs.append(dc_values[i] - dc_values[i - 1])
return np.array(diffs)
def differential_decode_dc(diffs):
values = [diffs[0]]
for i in range(1, len(diffs)):
values.append(values[-1] + diffs[i])
return np.array(values)
def pad_image(img, block_size):
h, w = img.shape
pad_h = (block_size - h % block_size) % block_size
pad_w = (block_size - w % block_size) % block_size
return np.pad(img, ((0, pad_h), (0, pad_w)), mode='constant')
def process_blocks(channel, block_size, func):
padded = pad_image(channel, block_size)
h, w = padded.shape
result = np.zeros_like(padded)
for i in range(0, h, block_size):
for j in range(0, w, block_size):
block = padded[i:i+block_size, j:j+block_size]
result[i:i+block_size, j:j+block_size] = func(block)
return result[:channel.shape[0], :channel.shape[1]]
def get_category(value):
if value == 0:
return 0
abs_val = abs(int(value))
return int(np.floor(np.log2(abs_val))) + 1
def encode_value_with_inversion(value, category):
if category == 0:
return ''
abs_val = abs(int(value))
bin_str = format(abs_val, f'0{category}b')
if value >= 0:
return bin_str
inverted = ''.join('1' if b == '0' else '0' for b in bin_str)
return inverted
def variable_length_encode(zz_blocks):
encoded_blocks = []
for zz in zz_blocks:
dc = zz[0]
dc_cat = get_category(dc)
dc_huff = DC_HUFFMAN_TABLE[dc_cat]
dc_bin = encode_value_with_inversion(dc, dc_cat)
dc_code = (dc_huff, dc_bin)
ac_codes = []
zero_count = 0
for val in zz[1:]:
if val == 0:
zero_count += 1
else:
while zero_count > 15:
ac_huff = AC_HUFFMAN_TABLE[(15, 0)]
ac_codes.append((ac_huff, '')) # ZRL
zero_count -= 16
cat = get_category(val)
val_bin = encode_value_with_inversion(val, cat)
ac_huff = AC_HUFFMAN_TABLE.get((zero_count, cat), '')
ac_codes.append((ac_huff, val_bin))
zero_count = 0
if zero_count > 0:
ac_codes.append((AC_HUFFMAN_TABLE[(0, 0)], '')) # EOB
encoded_blocks.append((dc_code, ac_codes))
return encoded_blocks
def decode_inverted_bits(bit_str, category):
if not bit_str:
return 0
if bit_str[0] == '1':
return int(bit_str, 2)
inverted = ''.join('1' if b == '0' else '0' for b in bit_str)
return -int(inverted, 2)
def variable_length_decode(encoded_blocks, block_size):
zz_blocks = []
for dc_code, ac_codes in encoded_blocks:
dc_huff, dc_bin = dc_code
dc_cat = DC_HUFFMAN_DECODE[dc_huff]
dc_val = decode_inverted_bits(dc_bin, dc_cat)
zz = [dc_val]
ac = []
for ac_huff, val_bin in ac_codes:
run_size = AC_HUFFMAN_DECODE[ac_huff]
run, size = run_size
if (run, size) == (0, 0): # EOB
ac.extend([0] * (block_size * block_size - 1 - len(ac)))
break
elif (run, size) == (15, 0): # ZRL
ac.extend([0] * 16)
else:
val = decode_inverted_bits(val_bin, size)
ac.extend([0] * run)
ac.append(val)
while len(ac) < block_size * block_size - 1:
ac.append(0)
zz.extend(ac)
zz_blocks.append(np.array(zz))
return zz_blocks
def compress(img, quality=50, block_size=8):
D = create_dct_matrix(block_size)
Q_Y = get_quant_matrix(block_size, quality, chrominance=False)
Q_C = get_quant_matrix(block_size, quality, chrominance=True)
img_np = np.array(img)
ycbcr = rgb_to_ycbcr(img_np)
Y, Cb, Cr = ycbcr[..., 0], ycbcr[..., 1], ycbcr[..., 2]
Cb_d = downsample_420(Cb)
Cr_d = downsample_420(Cr)
def encode_channel(channel, Q):
padded = pad_image(channel, block_size)
h, w = padded.shape
dc_values = []
ac_blocks = []
for i in range(0, h, block_size):
for j in range(0, w, block_size):
block = padded[i:i + block_size, j:j + block_size]
dct = dct2(block, D)
q = quantize(dct, Q)
zz = zigzag(q)
dc_values.append(zz[0])
ac_blocks.append(zz[1:])
dc_diffs = differential_encode_dc(dc_values)
encoded_ac = variable_length_encode(ac_blocks)
return dc_diffs, encoded_ac, padded.shape
y_dc, y_ac, shape_Y = encode_channel(Y, Q_Y)
cb_dc, cb_ac, shape_Cb = encode_channel(Cb_d, Q_C)
cr_dc, cr_ac, shape_Cr = encode_channel(Cr_d, Q_C)
dc_encoded = { 'y': y_dc, 'cb': cb_dc, 'cr': cr_dc }
return {
'dc': dc_encoded, 'ac_y': y_ac,'ac_cb': cb_ac, 'ac_cr': cr_ac, 'size': img_np.shape[:2],
'quality': quality, 'shapes': { 'y': shape_Y, 'cb': shape_Cb, 'cr': shape_Cr}
}
def decompress(data, block_size=8):
quality = data['quality']
D = create_dct_matrix(block_size)
Q_Y = get_quant_matrix(block_size, quality, chrominance=False)
Q_C = get_quant_matrix(block_size, quality, chrominance=True)
def decode_channel(dc_diffs, ac_blocks, shape, Q):
dc_values = differential_decode_dc(dc_diffs)
ac_zz = variable_length_decode(ac_blocks, block_size)
zz_blocks = []
for dc, ac in zip(dc_values, ac_zz):
zz = np.concatenate(([dc], ac))
zz_blocks.append(zz)
blocks = []
for zz in zz_blocks:
q = inverse_zigzag(zz, block_size)
idct = idct2(dequantize(q, Q), D)
blocks.append(idct)
h, w = shape
rec = np.zeros((h, w))
idx = 0
for i in range(0, h, block_size):
for j in range(0, w, block_size):
rec[i:i + block_size, j:j + block_size] = blocks[idx]
idx += 1
return rec[:shape[0], :shape[1]]
dc_data = data['dc']
Y_rec = decode_channel(dc_data['y'], data['ac_y'], data['shapes']['y'], Q_Y).clip(0, 255)
Cb_rec = upsample_420(decode_channel(dc_data['cb'], data['ac_cb'], data['shapes']['cb'], Q_C), Y_rec.shape)
Cr_rec = upsample_420(decode_channel(dc_data['cr'], data['ac_cr'], data['shapes']['cr'], Q_C), Y_rec.shape)
final_ycbcr = np.stack((Y_rec, Cb_rec, Cr_rec), axis=-1)
return ycbcr_to_rgb(final_ycbcr).astype(np.uint8)
img = Image.open("Lenna1.png").convert("RGB")
compressed = compress(img, quality=0)
restored_img = decompress(compressed)
Image.fromarray(restored_img).show()
Image.fromarray(restored_img).save("restored_Lenna1.png")
Ссылка на репозиторий гитхаб:
