import os import wave import numpy as np
PNG_SIG = b"\x89PNG\r\n\x1a\n"
MAGICS = [ (b"\x89PNG\r\n\x1a\n", "png"), (b"PK\x03\x04", "zip"), (b"RIFF", "riff"), (b"%PDF-", "pdf"), ]
def detect_magic(data: bytes): for sig, name in MAGICS: i = data.find(sig) if i != -1: return name, i return "unknown", -1
def read_wav_mono(path: str): with wave.open(path, "rb") as wf: ch = wf.getnchannels() sw = wf.getsampwidth() fs = wf.getframerate() n = wf.getnframes() raw = wf.readframes(n)
if sw == 1: x = np.frombuffer(raw, np.uint8).astype(np.float32) x = (x - 128.0) / 128.0 elif sw == 2: x = np.frombuffer(raw, np.int16).astype(np.float32) / 32768.0 elif sw == 3: b = np.frombuffer(raw, np.uint8).reshape(-1, 3) v = (b[:, 0].astype(np.int32) | (b[:, 1].astype(np.int32) << 8) | (b[:, 2].astype(np.int32) << 16)) v = (v << 8) >> 8 x = v.astype(np.float32) / 8388608.0 elif sw == 4: x = np.frombuffer(raw, np.int32).astype(np.float32) / 2147483648.0 else: raise ValueError(f"Unsupported sample width: {sw} bytes")
if ch > 1: x = x.reshape(-1, ch).mean(axis=1)
return x, fs
def frame_audio(x: np.ndarray, frame_len: int, offset: int = 0): if not (0 <= offset < frame_len): raise ValueError("offset must be in [0, frame_len-1]") x = x[offset:] n = (len(x) // frame_len) * frame_len x = x[:n] return x.reshape(-1, frame_len)
def compute_power(frames: np.ndarray, fs: int, freqs: np.ndarray): """ 计算每帧在每个目标频率上的能量:|sum(x[n]*e^{-j2πfn/fs})|^2 加 Hann 窗减少谱泄露。 """ n_frames, N = frames.shape w = np.hanning(N).astype(np.float32) fw = frames * w
n = np.arange(N, dtype=np.float32) ang = 2.0 * np.pi * (n[:, None] * freqs[None, :] / float(fs)) c = np.cos(ang).astype(np.float32) s = np.sin(ang).astype(np.float32)
re = fw @ c im = fw @ s power = re * re + im * im return power
def kmeans2_threshold(v: np.ndarray, iters: int = 50) -> float: """ 1D k-means (k=2) 找两团中心,用它们中点当阈值。 v 建议传 log10(power)。 """ c1, c2 = np.percentile(v, 25), np.percentile(v, 75) for _ in range(iters): d1 = np.abs(v - c1) d2 = np.abs(v - c2) m1 = d1 <= d2 if m1.all() or (~m1).all(): break nc1 = v[m1].mean() nc2 = v[~m1].mean() if abs(nc1 - c1) + abs(nc2 - c2) < 1e-6: c1, c2 = nc1, nc2 break c1, c2 = nc1, nc2 lo, hi = (c1, c2) if c1 < c2 else (c2, c1) return (lo + hi) / 2.0
def bits_from_power_kmeans(power: np.ndarray): """ 对每个频点单独做二分类阈值(k-means),输出 bits: (frames, nfreq) """ logp = np.log10(power + 1e-20) thr = np.array([kmeans2_threshold(logp[:, j]) for j in range(logp.shape[1])], dtype=np.float32) bits = logp > thr return bits
def pack_bits_to_bytes(bits: np.ndarray, invert=False, msb_first=False, reverse_each_byte=False) -> bytes: """ bits: (n_frames, n_bits_per_frame) 默认:freq0->bit0(LSB-first) """ bf = bits.copy() if invert: bf = ~bf
n_frames, n_bits = bf.shape pad = (-n_bits) % 8 if pad: bf = np.concatenate([bf, np.zeros((n_frames, pad), dtype=bool)], axis=1) n_bits = bf.shape[1]
out = bytearray() for i in range(n_frames): row = bf[i] for off in range(0, n_bits, 8): chunk = row[off:off+8] v = 0 for j in range(8): if chunk[j]: bitpos = (7 - j) if msb_first else j v |= (1 << bitpos) if reverse_each_byte: x = v x = ((x & 0xF0) >> 4) | ((x & 0x0F) << 4) x = ((x & 0xCC) >> 2) | ((x & 0x33) << 2) x = ((x & 0xAA) >> 1) | ((x & 0x55) << 1) v = x out.append(v) return bytes(out)
def cut_png_to_iend(data: bytes): idx = data.find(PNG_SIG) if idx < 0: return None b = data[idx:] if not b.startswith(PNG_SIG): return None pos = 8 try: while pos + 12 <= len(b): ln = int.from_bytes(b[pos:pos+4], "big") tp = b[pos+4:pos+8] pos += 8 + ln + 4 if pos > len(b): return None if tp == b"IEND": return b[:pos] except Exception: return None return None
def main(): print("WAV -> PNG 多频并行编码解码(K-means 判 0/1,交互版)") wav_path = input("WAV 路径: ").strip() if not wav_path or not os.path.exists(wav_path): print("文件不存在或未输入。") return
frame_len = input("每帧采样点数 frame_len [4800]: ").strip() frame_len = int(frame_len) if frame_len else 4800
offset = input("offset 对齐 [0]: ").strip() offset = int(offset) if offset else 0
nfreq = input("频率个数 N [8]: ").strip() nfreq = int(nfreq) if nfreq else 8
use_custom = input("手动输入频率列表?(y/N): ").strip().lower() in ("y", "yes") if use_custom: s = input("freqs(逗号分隔,例如 100,200,400,...): ").strip() freqs = np.array([float(t.strip()) for t in s.split(",") if t.strip()], dtype=np.float32) if len(freqs) != nfreq: print(f"你输入了 {len(freqs)} 个频率,但 N={nfreq},退出。") return else: base = input("base [100]: ").strip() ratio = input("ratio [2]: ").strip() base = float(base) if base else 100.0 ratio = float(ratio) if ratio else 2.0 freqs = np.array([base * (ratio ** i) for i in range(nfreq)], dtype=np.float32)
out_png = input("输出 PNG 文件名 [decoded.png]: ").strip() or "decoded.png"
x, fs = read_wav_mono(wav_path) frames = frame_audio(x, frame_len, offset) print(f"[+] fs={fs}, frames={len(frames)}, frame_len={frame_len}, offset={offset}") print(f"[+] freqs={freqs.tolist()}")
power = compute_power(frames, fs, freqs) bits0 = bits_from_power_kmeans(power)
trials = [] for revfreq in (False, True): bits = bits0[:, ::-1] if revfreq else bits0 for invert in (False, True): for msb_first in (False, True): for revbyte in (False, True): data = pack_bits_to_bytes(bits, invert=invert, msb_first=msb_first, reverse_each_byte=revbyte) typ, pos = detect_magic(data) png = cut_png_to_iend(data) score = 0 if typ == "png" and pos >= 0: score += 1000 if png is not None: score += 5000 trials.append((score, typ, pos, revfreq, invert, msb_first, revbyte, data, png))
trials.sort(key=lambda x: x[0], reverse=True) best = trials[0] score, typ, pos, revfreq, invert, msb_first, revbyte, data, png = best
print("\n[+] Best mapping:") print(" reverse_freq_order =", revfreq) print(" invert_bits =", invert) print(" msb_first =", msb_first) print(" reverse_each_byte =", revbyte) print(" detected =", typ, "pos =", pos, "score =", score) print(" first16 =", data[:16].hex().upper())
if png is None: bin_out = os.path.splitext(out_png)[0] + ".bin" with open(bin_out, "wb") as f: f.write(data) print("[-] 没能切到完整 PNG(IEND)。已输出字节流:", bin_out) return
with open(out_png, "wb") as f: f.write(png) print("[+] 写出 PNG:", out_png, "size =", len(png), "bytes")
if __name__ == "__main__": main()
|