Category: Forensics
Flag: EH4X{0n3_tr4ck_m1nd_tw0_tr4ck_f1les}
Challenge Description
In a colony of many, one penguin’s path is an anomaly. Silence the crowd to hear the individual.
Analysis
The first useful clue was the container layout itself: one video stream and two FLAC audio streams inside the same MKV. That matched the prompt language (“colony of many”) and suggested the solve was probably about isolating one stream from a mixture instead of visual stego on the video frames.
mkvinfo "challenge.mkv"| + Track
| + Track number: 2
| + Name: English (Stereo)
| + Codec ID: A_FLAC
| + Track
| + Track number: 3
| + Name: English (5.1 Surround)
| + Codec ID: A_FLAC
| + Tag
| + Simple
| + Name: COMMENT
| + String: EH4X{k33p_try1ng}The COMMENT value was a decoy (EH4X{k33p_try1ng}), so the real path had to be the audio relation between both tracks.

I extracted both audio tracks, subtracted them sample-by-sample, and amplified the residual signal. The key observation was that the true difference amplitude is tiny (min/max -153..150), exactly what you expect when two nearly-identical tracks hide a low-energy payload in their delta.
# build_true_diff.py
import wave
import numpy as np
def read(path):
with wave.open(path, "rb") as w:
fs = w.getframerate()
ch = w.getnchannels()
n = w.getnframes()
arr = np.frombuffer(w.readframes(n), dtype=np.int16).reshape(-1, ch).astype(np.int32)
return fs, arr
def write_mono(path, fs, arr):
out = np.clip(arr, -32768, 32767).astype(np.int16)
with wave.open(path, "wb") as w:
w.setnchannels(1)
w.setsampwidth(2)
w.setframerate(fs)
w.writeframes(out.tobytes())
fs0, a0 = read("penguin_a0.wav")
fs1, a1 = read("penguin_a1.wav")
assert fs0 == fs1 and a0.shape == a1.shape
d = a0 - a1
write_mono("tdiff_R_x64.wav", fs0, d[:, 1] * 64)
print("generated mono true-difference tracks")
print("min/max", int(d.min()), int(d.max()))python3.12 build_true_diff.pygenerated mono true-difference tracks
min/max -153 150That still produced cramped text in the spectrogram, so I stretched time by 8x (atempo=0.5 three times). This was the turning point: same signal content, but spread wider on the time axis so glyphs become readable.
ffmpeg -y -i "tdiff_R_x64.wav" -filter:a "atempo=0.5,atempo=0.5,atempo=0.5" "tdiff_R_x64_slow8.wav"Input #0, wav, from 'tdiff_R_x64.wav':
Duration: 00:01:03.02, bitrate: 705 kb/s
Output #0, wav, to 'tdiff_R_x64_slow8.wav':
size= 43405KiB time=00:08:23.93 bitrate= 705.6kbits/sThen I rendered the spectrogram from that stretched right-channel residual, which produced the image where the flag text became human-readable.
ffmpeg -y -i "tdiff_R_x64_slow8.wav" -lavfi "showspectrumpic=s=16000x2000:legend=disabled:mode=combined:color=intensity:scale=lin" -frames:v 1 -update 1 "tdiff_R_x64_slow8_spec_lin.png"Input #0, wav, from 'tdiff_R_x64_slow8.wav':
Duration: 00:08:23.94, bitrate: 705 kb/s
Output #0, image2, to 'tdiff_R_x64_slow8_spec_lin.png':
Stream #0:0: Video: png, ... 16000x2000From tdiff_R_x64_slow8_spec_lin.png, the flag was read manually as:
EH4X{0n3_tr4ck_m1nd_tw0_tr4ck_f1les}
Solution
# solve.py
import subprocess
import wave
import numpy as np
def run(cmd: list[str]) -> None:
subprocess.run(cmd, check=True)
def read_wav(path: str):
with wave.open(path, "rb") as w:
fs = w.getframerate()
ch = w.getnchannels()
n = w.getnframes()
arr = np.frombuffer(w.readframes(n), dtype=np.int16).reshape(-1, ch).astype(np.int32)
return fs, arr
def write_mono(path: str, fs: int, arr: np.ndarray):
out = np.clip(arr, -32768, 32767).astype(np.int16)
with wave.open(path, "wb") as w:
w.setnchannels(1)
w.setsampwidth(2)
w.setframerate(fs)
w.writeframes(out.tobytes())
def main() -> None:
run(["ffmpeg", "-y", "-i", "challenge.mkv", "-map", "0:a:0", "-c:a", "pcm_s16le", "penguin_a0.wav"])
run(["ffmpeg", "-y", "-i", "challenge.mkv", "-map", "0:a:1", "-c:a", "pcm_s16le", "penguin_a1.wav"])
fs0, a0 = read_wav("penguin_a0.wav")
fs1, a1 = read_wav("penguin_a1.wav")
assert fs0 == fs1 and a0.shape == a1.shape
d = a0 - a1
write_mono("tdiff_R_x64.wav", fs0, d[:, 1] * 64)
run([
"ffmpeg", "-y", "-i", "tdiff_R_x64.wav",
"-filter:a", "atempo=0.5,atempo=0.5,atempo=0.5",
"tdiff_R_x64_slow8.wav",
])
run([
"ffmpeg", "-y", "-i", "tdiff_R_x64_slow8.wav",
"-lavfi", "showspectrumpic=s=16000x2000:legend=disabled:mode=combined:color=intensity:scale=lin",
"-frames:v", "1", "-update", "1", "tdiff_R_x64_slow8_spec_lin.png",
])
print("Generated: tdiff_R_x64_slow8_spec_lin.png")
if __name__ == "__main__":
main()python3.12 solve.pyGenerated: tdiff_R_x64_slow8_spec_lin.png
EH4X{0n3_tr4ck_m1nd_tw0_tr4ck_f1les}