Фикс: конвертация голосовых сообщений desktop - iOS

This commit is contained in:
2026-04-13 17:02:17 +05:00
parent ab9a93cb7e
commit 05420337cc
8 changed files with 1090 additions and 59 deletions

View File

@@ -152,6 +152,16 @@ final class TransportManager: @unchecked Sendable {
private static let maxDownloadRetries = 3
func downloadFile(tag: String, server: String? = nil) async throws -> Data {
try await downloadFile(tag: tag, server: server, onProgress: nil)
}
/// Downloads file with optional progress reporting.
/// Progress callback fires on MainActor with values 0.01.0.
func downloadFile(
tag: String,
server: String? = nil,
onProgress: (@MainActor (Double) -> Void)?
) async throws -> Data {
let serverUrl: String
if let explicit = server, !explicit.isEmpty {
serverUrl = explicit
@@ -172,12 +182,41 @@ final class TransportManager: @unchecked Sendable {
var lastError: Error = TransportError.invalidResponse
for attempt in 0..<Self.maxDownloadRetries {
do {
if let onProgress {
// Stream bytes for progress reporting
let (asyncBytes, response) = try await session.bytes(for: request)
guard let httpResponse = response as? HTTPURLResponse else {
throw TransportError.invalidResponse
}
guard httpResponse.statusCode == 200 else {
throw TransportError.downloadFailed(statusCode: httpResponse.statusCode)
}
let expectedLength = httpResponse.expectedContentLength
var data = Data()
if expectedLength > 0 { data.reserveCapacity(Int(expectedLength)) }
var lastReportedKB = 0
for try await byte in asyncBytes {
data.append(byte)
let currentKB = data.count / 4096
if expectedLength > 0, currentKB > lastReportedKB {
lastReportedKB = currentKB
let fraction = Double(data.count) / Double(expectedLength)
await onProgress(min(fraction, 1.0))
}
}
await onProgress(1.0)
Self.logger.info("Download complete: tag=\(tag), \(data.count) bytes")
return data
} else {
// Fast path no progress needed
let (data, response) = try await session.data(for: request)
guard let httpResponse = response as? HTTPURLResponse else {
throw TransportError.invalidResponse
}
guard httpResponse.statusCode == 200 else {
Self.logger.error("Download failed: HTTP \(httpResponse.statusCode)")
throw TransportError.downloadFailed(statusCode: httpResponse.statusCode)
@@ -185,6 +224,7 @@ final class TransportManager: @unchecked Sendable {
Self.logger.info("Download complete: tag=\(tag), \(data.count) bytes")
return data
}
} catch {
lastError = error
if attempt < Self.maxDownloadRetries - 1 {

View File

@@ -839,16 +839,21 @@ final class SessionManager {
}
/// Builds a data URI from attachment data (desktop: `FileReader.readAsDataURL()`).
/// Voice attachments use hex encoding (Desktop parity: `Buffer.toString('hex')`).
private func buildDataURI(_ attachment: PendingAttachment) -> String {
let base64 = attachment.data.base64EncodedString()
switch attachment.type {
case .image:
return "data:image/jpeg;base64,\(base64)"
return "data:image/jpeg;base64,\(attachment.data.base64EncodedString())"
case .file:
let mimeType = mimeTypeForFileName(attachment.fileName ?? "file")
return "data:\(mimeType);base64,\(base64)"
return "data:\(mimeType);base64,\(attachment.data.base64EncodedString())"
case .voice:
// Desktop parity: raw bytes as hex string.
// Desktop DialogInput.tsx:214 sends Buffer.from(audioBlob).toString('hex'),
// Desktop MessageVoice.tsx:125 reads Buffer.from(blob, "hex").
return attachment.data.hexString
default:
return "data:application/octet-stream;base64,\(base64)"
return "data:application/octet-stream;base64,\(attachment.data.base64EncodedString())"
}
}

View File

@@ -0,0 +1,709 @@
import AudioToolbox
import AVFoundation
import Foundation
import os
// MARK: - WebM/Opus M4A Converter
/// Converts WebM/Opus audio (from Desktop) to M4A/AAC playable by AVAudioPlayer.
/// iOS doesn't support WebM container natively. This parser extracts Opus frames from
/// the EBML/WebM container, wraps them in a CAF file (Apple's native container that
/// supports Opus), then transcodes to M4A via AVAudioFile.
enum WebMOpusConverter {
private static let logger = Logger(subsystem: "com.rosetta.messenger", category: "WebMOpusConverter")
/// WebM/EBML magic: `0x1A 0x45 0xDF 0xA3`
static func isWebM(_ data: Data) -> Bool {
data.count >= 4 && data[0] == 0x1A && data[1] == 0x45 && data[2] == 0xDF && data[3] == 0xA3
}
/// Main entry: WebM/Opus Data M4A Data (or CAF fallback).
/// Returns nil only if WebM parsing fails entirely.
static func convertToPlayable(_ webmData: Data) -> Data? {
guard let parsed = parseWebM(webmData) else {
logger.error("WebM parse failed (\(webmData.count) bytes)")
return nil
}
guard !parsed.frames.isEmpty else {
logger.warning("WebM contains 0 Opus frames")
return nil
}
logger.info("WebM parsed: \(parsed.frames.count) frames, \(parsed.sampleRate)Hz, \(parsed.channels)ch")
// Decode Opus PCM via AudioConverter, then write as WAV
if let wavData = decodeOpusToWAV(parsed) {
logger.info("WebM→WAV conversion succeeded (\(wavData.count) bytes)")
return wavData
}
// Fallback: try CAF container route
if let cafData = buildCAF(parsed), let m4aData = transcodeCAFtoM4A(cafData) {
return m4aData
}
logger.error("All conversion paths failed")
return nil
}
// MARK: - WebM/EBML Parser
private struct ParsedWebM {
let frames: [Data]
let sampleRate: Double
let channels: UInt32
let codecPrivate: Data?
}
private static func parseWebM(_ data: Data) -> ParsedWebM? {
let bytes = [UInt8](data)
var pos = 0
// EBML header (0x1A45DFA3)
guard bytes.count >= 8,
bytes[0] == 0x1A, bytes[1] == 0x45, bytes[2] == 0xDF, bytes[3] == 0xA3
else { return nil }
pos = 4
guard let (headerSize, hLen) = readVINTValue(bytes, at: pos) else { return nil }
pos += hLen + Int(headerSize)
// Segment (0x18538067)
guard pos + 4 <= bytes.count,
bytes[pos] == 0x18, bytes[pos + 1] == 0x53,
bytes[pos + 2] == 0x80, bytes[pos + 3] == 0x67
else { return nil }
pos += 4
guard let (_, segLen) = readVINTValue(bytes, at: pos) else { return nil }
pos += segLen
var sampleRate: Double = 48000
var channels: UInt32 = 1
var codecPrivate: Data?
var audioTrackNum: UInt64 = 1
var frames: [Data] = []
frames.reserveCapacity(512)
while pos < bytes.count {
guard let (elemID, idLen) = readElementID(bytes, at: pos) else { break }
pos += idLen
guard let (elemSize, sizeLen) = readVINTValue(bytes, at: pos) else { break }
pos += sizeLen
let elemDataEnd = min(pos + Int(elemSize), bytes.count)
switch elemID {
case 0x1654AE6B: // Tracks parse children (TrackEntry)
parseTracks(bytes, from: pos, to: elemDataEnd,
sampleRate: &sampleRate, channels: &channels,
codecPrivate: &codecPrivate, audioTrackNum: &audioTrackNum)
pos = elemDataEnd
case 0x1F43B675: // Cluster extract SimpleBlocks
parseCluster(bytes, from: pos, to: elemDataEnd,
audioTrackNum: audioTrackNum, frames: &frames)
pos = elemDataEnd
default:
pos = elemDataEnd
}
if pos <= 0 || elemDataEnd <= 0 { break }
}
return ParsedWebM(frames: frames, sampleRate: sampleRate,
channels: channels, codecPrivate: codecPrivate)
}
private static func parseTracks(
_ bytes: [UInt8], from start: Int, to end: Int,
sampleRate: inout Double, channels: inout UInt32,
codecPrivate: inout Data?, audioTrackNum: inout UInt64
) {
var pos = start
while pos < end {
guard let (id, idLen) = readElementID(bytes, at: pos) else { break }
pos += idLen
guard let (size, sizeLen) = readVINTValue(bytes, at: pos) else { break }
pos += sizeLen
let childEnd = min(pos + Int(size), end)
if id == 0xAE { // TrackEntry
parseTrackEntry(bytes, from: pos, to: childEnd,
sampleRate: &sampleRate, channels: &channels,
codecPrivate: &codecPrivate, audioTrackNum: &audioTrackNum)
}
pos = childEnd
}
}
private static func parseTrackEntry(
_ bytes: [UInt8], from start: Int, to end: Int,
sampleRate: inout Double, channels: inout UInt32,
codecPrivate: inout Data?, audioTrackNum: inout UInt64
) {
var pos = start
var isAudio = false
var trackNum: UInt64 = 0
while pos < end {
guard let (id, idLen) = readElementID(bytes, at: pos) else { break }
pos += idLen
guard let (size, sizeLen) = readVINTValue(bytes, at: pos) else { break }
pos += sizeLen
let childEnd = min(pos + Int(size), end)
switch id {
case 0xD7: // TrackNumber
trackNum = readUInt(bytes, at: pos, count: Int(size))
case 0x83: // TrackType (2 = audio)
isAudio = readUInt(bytes, at: pos, count: Int(size)) == 2
case 0x63A2: // CodecPrivate (OpusHead)
if childEnd <= bytes.count {
codecPrivate = Data(bytes[pos..<childEnd])
}
case 0xE1: // Audio sub-element
parseAudioElement(bytes, from: pos, to: childEnd,
sampleRate: &sampleRate, channels: &channels)
default:
break
}
pos = childEnd
}
if isAudio { audioTrackNum = trackNum }
}
private static func parseAudioElement(
_ bytes: [UInt8], from start: Int, to end: Int,
sampleRate: inout Double, channels: inout UInt32
) {
var pos = start
while pos < end {
guard let (id, idLen) = readElementID(bytes, at: pos) else { break }
pos += idLen
guard let (size, sizeLen) = readVINTValue(bytes, at: pos) else { break }
pos += sizeLen
let childEnd = min(pos + Int(size), end)
switch id {
case 0xB5: // SamplingFrequency (float)
sampleRate = readFloat(bytes, at: pos, count: Int(size))
case 0x9F: // Channels
channels = UInt32(readUInt(bytes, at: pos, count: Int(size)))
default:
break
}
pos = childEnd
}
}
private static func parseCluster(
_ bytes: [UInt8], from start: Int, to end: Int,
audioTrackNum: UInt64, frames: inout [Data]
) {
var pos = start
while pos < end {
guard let (id, idLen) = readElementID(bytes, at: pos) else { break }
pos += idLen
guard let (size, sizeLen) = readVINTValue(bytes, at: pos) else { break }
pos += sizeLen
let childEnd = min(pos + Int(size), end)
if id == 0xA3 { // SimpleBlock
guard let (blockTrack, trackLen) = readVINTValue(bytes, at: pos) else {
pos = childEnd; continue
}
if blockTrack == audioTrackNum {
let headerStart = pos + trackLen
// 2 bytes timestamp + 1 byte flags
guard headerStart + 3 <= childEnd else { pos = childEnd; continue }
let flags = bytes[headerStart + 2]
let lacingType = (flags >> 1) & 0x03 // bits 1-2 in Matroska spec
let dataStart = headerStart + 3
if lacingType == 0 {
// No lacing single frame
if dataStart < childEnd {
frames.append(Data(bytes[dataStart..<childEnd]))
}
} else {
// Laced: first byte = number of frames - 1
guard dataStart < childEnd else { pos = childEnd; continue }
let numFrames = Int(bytes[dataStart]) + 1
let lacedDataStart = dataStart + 1
if lacingType == 0x02 { // Fixed-size lacing
let totalData = childEnd - lacedDataStart
let frameSize = totalData / numFrames
for i in 0..<numFrames {
let fStart = lacedDataStart + i * frameSize
let fEnd = min(fStart + frameSize, childEnd)
if fStart < fEnd { frames.append(Data(bytes[fStart..<fEnd])) }
}
} else if lacingType == 0x01 { // Xiph lacing
var frameSizes = [Int]()
var sizePos = lacedDataStart
for _ in 0..<(numFrames - 1) {
var frameSize = 0
while sizePos < childEnd {
let val = Int(bytes[sizePos]); sizePos += 1
frameSize += val
if val < 255 { break }
}
frameSizes.append(frameSize)
}
// Last frame = remaining data
let consumed = frameSizes.reduce(0, +)
frameSizes.append(childEnd - sizePos - consumed)
var fPos = sizePos
for sz in frameSizes {
let fEnd = min(fPos + sz, childEnd)
if fPos < fEnd { frames.append(Data(bytes[fPos..<fEnd])) }
fPos = fEnd
}
} else if lacingType == 0x03 { // EBML lacing
var frameSizes = [Int]()
var sizePos = lacedDataStart
// First frame size: unsigned VINT
if let (firstSize, firstLen) = readVINTValue(bytes, at: sizePos) {
frameSizes.append(Int(firstSize))
sizePos += firstLen
}
// Subsequent sizes: signed VINT deltas
for _ in 1..<(numFrames - 1) {
guard let (raw, len) = readVINTValue(bytes, at: sizePos) else { break }
sizePos += len
// Convert to signed: subtract midpoint
let midpoint = (1 << (len * 7 - 1)) - 1
let delta = Int(raw) - midpoint
let prev = frameSizes.last ?? 0
frameSizes.append(prev + delta)
}
// Last frame = remaining
let consumed = frameSizes.reduce(0, +)
frameSizes.append(childEnd - sizePos - consumed)
var fPos = sizePos
for sz in frameSizes {
let fEnd = min(fPos + max(0, sz), childEnd)
if fPos < fEnd { frames.append(Data(bytes[fPos..<fEnd])) }
fPos = fEnd
}
}
}
}
}
pos = childEnd
}
}
// MARK: - Opus PCM WAV (AudioToolbox AudioConverter)
/// Base frame duration in samples from Opus TOC byte (single frame, no count multiplier).
/// AudioConverter needs this for mFramesPerPacket (max 5760 = 120ms).
private static func opusBaseFrameSamples(_ packet: Data, sampleRate: Double) -> UInt32 {
guard !packet.isEmpty else { return 960 }
let config = Int((packet[0] >> 3) & 0x1F)
let durationMs: Int
switch config {
case 0...3: durationMs = 10
case 4...7: durationMs = 20
case 8...11: durationMs = 40
case 12...15: durationMs = 60
case 16...19: durationMs = 10
case 20...23: durationMs = 20
case 24...27: durationMs = 10
case 28...31: durationMs = 20
default: durationMs = 20
}
return UInt32(Int(sampleRate) * durationMs / 1000)
}
/// Total PCM samples this Opus packet will produce (base × frame count).
private static func opusTotalSamples(_ packet: Data, sampleRate: Double) -> UInt32 {
let base = opusBaseFrameSamples(packet, sampleRate: sampleRate)
guard !packet.isEmpty else { return base }
let code = packet[0] & 0x03
let count: UInt32
switch code {
case 0: count = 1
case 1, 2: count = 2
case 3: count = packet.count > 1 ? UInt32(packet[1] & 0x3F) : 1
default: count = 1
}
return base * count
}
private static func decodeOpusToWAV(_ parsed: ParsedWebM) -> Data? {
let sampleRate = parsed.sampleRate
let channels = parsed.channels
// Base frame size for ASBD (single sub-frame, max 5760 = 120ms)
let baseSamples = parsed.frames.first.map {
opusBaseFrameSamples($0, sampleRate: sampleRate)
} ?? 960
logger.info("Opus base frame: \(baseSamples) samples (\(Double(baseSamples) / sampleRate * 1000)ms)")
// Input: Opus compressed (base frame size for AudioConverter)
var inASBD = AudioStreamBasicDescription(
mSampleRate: sampleRate,
mFormatID: kAudioFormatOpus,
mFormatFlags: 0,
mBytesPerPacket: 0,
mFramesPerPacket: baseSamples,
mBytesPerFrame: 0,
mChannelsPerFrame: channels,
mBitsPerChannel: 0,
mReserved: 0
)
// Output: 16-bit PCM
let bytesPerSample: UInt32 = 2
var outASBD = AudioStreamBasicDescription(
mSampleRate: sampleRate,
mFormatID: kAudioFormatLinearPCM,
mFormatFlags: kAudioFormatFlagIsSignedInteger | kAudioFormatFlagIsPacked,
mBytesPerPacket: bytesPerSample * channels,
mFramesPerPacket: 1,
mBytesPerFrame: bytesPerSample * channels,
mChannelsPerFrame: channels,
mBitsPerChannel: bytesPerSample * 8,
mReserved: 0
)
var converter: AudioConverterRef?
var status = AudioConverterNew(&inASBD, &outASBD, &converter)
guard status == noErr, let converter else {
logger.warning("AudioConverterNew failed for Opus: \(status)")
return nil
}
defer { AudioConverterDispose(converter) }
// Set magic cookie (OpusHead) if available
if var cookie = parsed.codecPrivate.map({ [UInt8]($0) }), !cookie.isEmpty {
AudioConverterSetProperty(
converter,
kAudioConverterDecompressionMagicCookie,
UInt32(cookie.count),
&cookie
)
}
// Buffer sized for max multi-frame packet (6 × 60ms = 360ms = 17280 samples)
let maxOutputSamples: UInt32 = 17280
let pcmBufferSize = Int(maxOutputSamples * bytesPerSample * channels)
var allPCM = Data()
allPCM.reserveCapacity(parsed.frames.count * Int(baseSamples * bytesPerSample * channels))
for frame in parsed.frames {
// Per-packet total samples for output request
let totalSamples = opusTotalSamples(frame, sampleRate: sampleRate)
var pcmBuffer = [UInt8](repeating: 0, count: pcmBufferSize)
var outBufferList = AudioBufferList(
mNumberBuffers: 1,
mBuffers: AudioBuffer(
mNumberChannels: channels,
mDataByteSize: UInt32(pcmBufferSize),
mData: &pcmBuffer
)
)
var ioOutputDataPacketSize: UInt32 = max(totalSamples, maxOutputSamples)
// Packet description for this Opus frame
var packetDesc = AudioStreamPacketDescription(
mStartOffset: 0,
mVariableFramesInPacket: 0,
mDataByteSize: UInt32(frame.count)
)
let inputContext = UnsafeMutablePointer<OpusInputContext>.allocate(capacity: 1)
inputContext.pointee = OpusInputContext(data: frame, packetDesc: packetDesc, consumed: false)
defer { inputContext.deallocate() }
status = AudioConverterFillComplexBuffer(
converter,
opusInputCallback,
inputContext,
&ioOutputDataPacketSize,
&outBufferList,
nil
)
if status == noErr || status == 100 /* done */ {
let produced = Int(outBufferList.mBuffers.mDataByteSize)
allPCM.append(contentsOf: pcmBuffer[0..<produced])
} else if status != noErr {
// Some frames may fail, continue with remaining
logger.warning("AudioConverter decode frame failed: \(status)")
}
}
guard !allPCM.isEmpty else { return nil }
// Build WAV header + PCM data
return buildWAV(pcmData: allPCM, sampleRate: UInt32(sampleRate),
channels: UInt16(channels), bitsPerSample: UInt16(bytesPerSample * 8))
}
// Input callback for AudioConverterFillComplexBuffer
private struct OpusInputContext {
let data: Data
let packetDesc: AudioStreamPacketDescription
var consumed: Bool
}
private static let opusInputCallback: AudioConverterComplexInputDataProc = {
(converter, ioNumberDataPackets, ioData, outDataPacketDescription, inUserData) -> OSStatus in
guard let ctx = inUserData?.assumingMemoryBound(to: OpusInputContext.self) else {
ioNumberDataPackets.pointee = 0
return -50 // paramErr
}
if ctx.pointee.consumed {
ioNumberDataPackets.pointee = 0
return 100 // done
}
ctx.pointee.data.withUnsafeBytes { rawBuf in
ioData.pointee.mNumberBuffers = 1
ioData.pointee.mBuffers.mNumberChannels = 1
ioData.pointee.mBuffers.mDataByteSize = UInt32(rawBuf.count)
ioData.pointee.mBuffers.mData = UnsafeMutableRawPointer(mutating: rawBuf.baseAddress!)
}
ioNumberDataPackets.pointee = 1
if let descPtr = outDataPacketDescription {
descPtr.pointee = UnsafeMutablePointer<AudioStreamPacketDescription>.allocate(capacity: 1)
descPtr.pointee!.pointee = ctx.pointee.packetDesc
}
ctx.pointee.consumed = true
return noErr
}
/// Build minimal WAV file from raw PCM data.
private static func buildWAV(pcmData: Data, sampleRate: UInt32,
channels: UInt16, bitsPerSample: UInt16) -> Data {
let byteRate = sampleRate * UInt32(channels) * UInt32(bitsPerSample / 8)
let blockAlign = channels * (bitsPerSample / 8)
let dataSize = UInt32(pcmData.count)
let fileSize = 36 + dataSize
var wav = Data()
wav.reserveCapacity(Int(fileSize + 8))
// RIFF header
wav.append(contentsOf: [0x52, 0x49, 0x46, 0x46]) // "RIFF"
wav.appendLE32(fileSize)
wav.append(contentsOf: [0x57, 0x41, 0x56, 0x45]) // "WAVE"
// fmt chunk
wav.append(contentsOf: [0x66, 0x6D, 0x74, 0x20]) // "fmt "
wav.appendLE32(16) // chunk size
wav.appendLE16(1) // PCM format
wav.appendLE16(channels)
wav.appendLE32(sampleRate)
wav.appendLE32(byteRate)
wav.appendLE16(blockAlign)
wav.appendLE16(bitsPerSample)
// data chunk
wav.append(contentsOf: [0x64, 0x61, 0x74, 0x61]) // "data"
wav.appendLE32(dataSize)
wav.append(pcmData)
return wav
}
// MARK: - CAF Builder
private static func buildCAF(_ parsed: ParsedWebM) -> Data? {
var caf = Data()
let framesPerPacket: UInt32 = 960 // 20ms Opus frames at 48kHz
// -- File Header --
caf.appendBE32(0x63616666) // 'caff'
caf.appendBE16(1) // version
caf.appendBE16(0) // flags
// -- Audio Description ('desc') --
caf.appendBE32(0x64657363) // 'desc'
caf.appendBE64(32) // chunk size = sizeof(AudioStreamBasicDescription)
caf.appendBEFloat64(parsed.sampleRate)
caf.appendBE32(0x6F707573) // 'opus' (kAudioFormatOpus)
caf.appendBE32(0) // format flags
caf.appendBE32(0) // bytes per packet (variable)
caf.appendBE32(framesPerPacket)
caf.appendBE32(parsed.channels)
caf.appendBE32(0) // bits per channel (compressed)
// -- Magic Cookie ('kuki') OpusHead from WebM CodecPrivate --
if let cp = parsed.codecPrivate, !cp.isEmpty {
caf.appendBE32(0x6B756B69) // 'kuki'
caf.appendBE64(Int64(cp.count))
caf.append(cp)
}
// -- Packet Table ('pakt') --
let numPackets = Int64(parsed.frames.count)
let numValidFrames = numPackets * Int64(framesPerPacket)
var paktBody = Data()
paktBody.appendBE64(numPackets)
paktBody.appendBE64(numValidFrames)
paktBody.appendBE32S(0) // priming frames (Int32)
paktBody.appendBE32S(0) // remainder frames (Int32)
for frame in parsed.frames {
paktBody.appendBERInt(frame.count)
}
caf.appendBE32(0x70616B74) // 'pakt'
caf.appendBE64(Int64(paktBody.count))
caf.append(paktBody)
// -- Audio Data ('data') --
var totalAudioSize = 0
for frame in parsed.frames { totalAudioSize += frame.count }
caf.appendBE32(0x64617461) // 'data'
caf.appendBE64(Int64(totalAudioSize + 4)) // +4 for editCount
caf.appendBE32(0) // edit count
for frame in parsed.frames { caf.append(frame) }
return caf
}
// MARK: - CAF M4A Transcoding
private static func transcodeCAFtoM4A(_ cafData: Data) -> Data? {
let tmp = FileManager.default.temporaryDirectory
let cafURL = tmp.appendingPathComponent("webm_conv_\(UUID().uuidString).caf")
let m4aURL = tmp.appendingPathComponent("webm_conv_\(UUID().uuidString).m4a")
defer {
try? FileManager.default.removeItem(at: cafURL)
try? FileManager.default.removeItem(at: m4aURL)
}
do {
try cafData.write(to: cafURL)
let inputFile = try AVAudioFile(forReading: cafURL)
let pcmFormat = inputFile.processingFormat
let outputSettings: [String: Any] = [
AVFormatIDKey: Int(kAudioFormatMPEG4AAC),
AVSampleRateKey: pcmFormat.sampleRate,
AVNumberOfChannelsKey: pcmFormat.channelCount,
AVEncoderBitRateKey: 64000
]
let outputFile = try AVAudioFile(forWriting: m4aURL, settings: outputSettings)
let bufferCapacity: AVAudioFrameCount = 4096
guard let buffer = AVAudioPCMBuffer(pcmFormat: pcmFormat, frameCapacity: bufferCapacity) else {
return nil
}
while inputFile.framePosition < inputFile.length {
try inputFile.read(into: buffer)
try outputFile.write(from: buffer)
}
return try Data(contentsOf: m4aURL)
} catch {
logger.error("CAF→M4A transcode failed: \(error.localizedDescription)")
return nil
}
}
// MARK: - EBML Primitives
private static func readVINTValue(_ bytes: [UInt8], at pos: Int) -> (UInt64, Int)? {
guard pos < bytes.count, bytes[pos] != 0 else { return nil }
let length = bytes[pos].leadingZeroBitCount + 1
guard length <= 8, pos + length <= bytes.count else { return nil }
var value = UInt64(bytes[pos]) & ((1 << (8 - length)) - 1)
for i in 1..<length { value = (value << 8) | UInt64(bytes[pos + i]) }
return (value, length)
}
private static func readElementID(_ bytes: [UInt8], at pos: Int) -> (UInt32, Int)? {
guard pos < bytes.count, bytes[pos] != 0 else { return nil }
let length = bytes[pos].leadingZeroBitCount + 1
guard length <= 4, pos + length <= bytes.count else { return nil }
var id = UInt32(bytes[pos])
for i in 1..<length { id = (id << 8) | UInt32(bytes[pos + i]) }
return (id, length)
}
private static func readUInt(_ bytes: [UInt8], at pos: Int, count: Int) -> UInt64 {
var value: UInt64 = 0
for i in 0..<min(count, 8) where pos + i < bytes.count {
value = (value << 8) | UInt64(bytes[pos + i])
}
return value
}
private static func readFloat(_ bytes: [UInt8], at pos: Int, count: Int) -> Double {
if count == 4, pos + 4 <= bytes.count {
let bits = (UInt32(bytes[pos]) << 24) | (UInt32(bytes[pos + 1]) << 16) |
(UInt32(bytes[pos + 2]) << 8) | UInt32(bytes[pos + 3])
return Double(Float(bitPattern: bits))
} else if count == 8, pos + 8 <= bytes.count {
var bits: UInt64 = 0
for i in 0..<8 { bits = (bits << 8) | UInt64(bytes[pos + i]) }
return Double(bitPattern: bits)
}
return 48000 // default Opus sample rate
}
}
// MARK: - Data Little-Endian Writers (WAV format)
private extension Data {
mutating func appendLE16(_ v: UInt16) {
append(UInt8(v & 0xFF)); append(UInt8(v >> 8))
}
mutating func appendLE32(_ v: UInt32) {
append(UInt8(v & 0xFF)); append(UInt8((v >> 8) & 0xFF))
append(UInt8((v >> 16) & 0xFF)); append(UInt8((v >> 24) & 0xFF))
}
}
// MARK: - Data Big-Endian Writers (CAF format)
private extension Data {
mutating func appendBE16(_ v: UInt16) {
append(UInt8(v >> 8)); append(UInt8(v & 0xFF))
}
mutating func appendBE32(_ v: UInt32) {
append(UInt8((v >> 24) & 0xFF)); append(UInt8((v >> 16) & 0xFF))
append(UInt8((v >> 8) & 0xFF)); append(UInt8(v & 0xFF))
}
mutating func appendBE32S(_ v: Int32) { appendBE32(UInt32(bitPattern: v)) }
mutating func appendBE64(_ v: Int64) {
let u = UInt64(bitPattern: v)
for i in stride(from: 56, through: 0, by: -8) { append(UInt8((u >> i) & 0xFF)) }
}
mutating func appendBEFloat64(_ v: Double) {
let bits = v.bitPattern
for i in stride(from: 56, through: 0, by: -8) { append(UInt8((bits >> i) & 0xFF)) }
}
/// Variable-length BER integer (CAF packet table).
mutating func appendBERInt(_ value: Int) {
var v = value
var encoded = [UInt8]()
encoded.append(UInt8(v & 0x7F))
v >>= 7
while v > 0 {
encoded.append(UInt8(0x80 | (v & 0x7F)))
v >>= 7
}
for byte in encoded.reversed() { append(byte) }
}
}

View File

@@ -28,6 +28,7 @@ struct DarkModeWrapper<Content: View>: View {
if let windowScene = activeWindowScene {
let overlayWindow = UIWindow(windowScene: windowScene)
overlayWindow.tag = 0320
overlayWindow.backgroundColor = .clear
overlayWindow.isHidden = false
overlayWindow.isUserInteractionEnabled = false
self.overlayWindow = overlayWindow
@@ -42,8 +43,15 @@ struct DarkModeWrapper<Content: View>: View {
case "system": style = .unspecified
default: style = .dark
}
let bgColor: UIColor = (style == .light) ? .white : .black
for window in windowScene.windows {
window.overrideUserInterfaceStyle = style
// Match window background to app background prevents
// systemBackground (dark gray) from showing as a line
// in the bottom safe area.
if window.tag != 0320 {
window.backgroundColor = bgColor
}
}
}
}

View File

@@ -11,6 +11,8 @@ final class MessageVoiceView: UIView {
private let playButton = UIButton(type: .system)
private let playPauseAnimationView = LottieAnimationView()
private let downloadArrowView = UIImageView() // arrow before download
private let downloadRingView = VoiceDownloadRingView()
private let waveformView = WaveformView()
private let durationLabel = UILabel()
@@ -20,6 +22,7 @@ final class MessageVoiceView: UIView {
private var attachmentId: String = ""
private var isOutgoing = false
private var isShowingPause = false // tracks Lottie visual state
private var isDownloaded = false // false = show , true = show play/pause
private var totalDuration: TimeInterval = 0 // original duration for label reset
/// Center of play button in this view's coordinate space (for external blob positioning).
var playButtonCenter: CGPoint { playButton.center }
@@ -50,6 +53,8 @@ final class MessageVoiceView: UIView {
// Playback
var onPlayTapped: (() -> Void)?
/// Download cancel callback (forwarded from VoiceDownloadRingView).
var onDownloadCancel: (() -> Void)?
// MARK: - Setup
@@ -73,6 +78,17 @@ final class MessageVoiceView: UIView {
playButton.setImage(UIImage(systemName: "play.fill", withConfiguration: config), for: .normal)
}
// Download arrow (shown before voice is downloaded)
let arrowConfig = UIImage.SymbolConfiguration(pointSize: 16, weight: .bold)
downloadArrowView.image = UIImage(systemName: "arrow.down", withConfiguration: arrowConfig)
downloadArrowView.contentMode = .center
downloadArrowView.isUserInteractionEnabled = false
playButton.addSubview(downloadArrowView)
// Download progress ring (overlays play button during CDN download)
downloadRingView.onCancel = { [weak self] in self?.onDownloadCancel?() }
addSubview(downloadRingView)
waveformView.peakHeight = 18 // Telegram AudioWaveformComponent peak
waveformView.distance = 2.0 // Telegram AudioWaveformComponent (bubble context)
waveformView.gravity = .bottom // Telegram: bars grow upward from bottom
@@ -106,6 +122,11 @@ final class MessageVoiceView: UIView {
width: playButtonSize - lottieInset * 2,
height: playButtonSize - lottieInset * 2
)
// Download arrow: same frame as button interior
downloadArrowView.frame = playButton.bounds
// Download ring: same frame as play button
downloadRingView.frame = playButton.frame
// Waveform: from x=57 to near right edge, height=18, y=1
let waveW = bounds.width - waveformX - 4
@@ -163,6 +184,8 @@ final class MessageVoiceView: UIView {
playButton.backgroundColor = colors.playButtonBg
playButton.tintColor = colors.playButtonFg
downloadArrowView.tintColor = colors.playButtonFg
downloadRingView.setRingColor(colors.playButtonFg)
durationLabel.textColor = colors.durationText
waveformView.foregroundColor_ = colors.waveformPlayed
waveformView.backgroundColor_ = colors.waveformUnplayed
@@ -188,6 +211,32 @@ final class MessageVoiceView: UIView {
}
}
// MARK: - Download State
/// Set whether voice data is already downloaded (cached).
/// `false` show arrow. `true` show play/pause icon.
func setDownloaded(_ downloaded: Bool) {
isDownloaded = downloaded
downloadArrowView.isHidden = downloaded
playPauseAnimationView.isHidden = !downloaded
}
/// Show download progress ring overlaying the play button.
func showDownloadProgress(_ progress: CGFloat) {
downloadRingView.show()
downloadRingView.setProgress(progress)
downloadArrowView.isHidden = true
playPauseAnimationView.isHidden = true
}
/// Hide download progress ring and show play icon (download complete).
func hideDownloadProgress() {
downloadRingView.hide()
isDownloaded = true
downloadArrowView.isHidden = true
playPauseAnimationView.isHidden = false
}
// MARK: - Play Action
@objc private func playTapped() {
@@ -295,12 +344,14 @@ final class MessageVoiceView: UIView {
// MARK: - Waveform Encoding
/// Encode waveform samples to 5-bit packed base64 string (for sending).
/// Encode waveform samples to comma-separated floats (Desktop parity).
/// Desktop DialogInput.tsx:217 sends `interpolateCompressWaves(35).join(",")`.
/// Desktop MessageVoice.tsx parses with `split(",").map(parseFloat)`.
static func encodeWaveform(_ samples: [Float]) -> String {
guard !samples.isEmpty else { return "" }
// Resample to ~63 bars (Telegram standard)
let targetCount = min(63, samples.count)
// Resample to 35 bars (Desktop standard: interpolateCompressWaves(35))
let targetCount = min(35, samples.count)
let step = Float(samples.count) / Float(targetCount)
var resampled = [Float](repeating: 0, count: targetCount)
for i in 0..<targetCount {
@@ -310,28 +361,7 @@ final class MessageVoiceView: UIView {
resampled[i] = samples[start..<end].max() ?? 0
}
// Pack as 5-bit values
let bitCount = targetCount * 5
let byteCount = (bitCount + 7) / 8
var bytes = [UInt8](repeating: 0, count: byteCount)
for i in 0..<targetCount {
let value = UInt8(min(31, max(0, resampled[i] * 31)))
let bitOffset = i * 5
let byteIndex = bitOffset / 8
let bitIndex = bitOffset % 8
if bitIndex + 5 <= 8 {
bytes[byteIndex] |= value << (8 - bitIndex - 5)
} else {
let bitsInFirst = 8 - bitIndex
bytes[byteIndex] |= value >> (5 - bitsInFirst)
if byteIndex + 1 < bytes.count {
bytes[byteIndex + 1] |= value << (8 - (5 - bitsInFirst))
}
}
}
return Data(bytes).base64EncodedString()
// Comma-separated floats (Desktop parity)
return resampled.map { String(format: "%.2f", $0) }.joined(separator: ",")
}
}

View File

@@ -193,6 +193,7 @@ final class NativeMessageCell: UICollectionViewCell {
// Voice message
private let voiceView = MessageVoiceView()
private var voiceBlobView: VoiceBlobView?
private var activeVoiceDownloadTask: Task<Void, Never>?
// Avatar-specific
private let avatarImageView = UIImageView()
@@ -881,30 +882,61 @@ final class NativeMessageCell: UICollectionViewCell {
duration: previewParts.duration,
isOutgoing: layout.isOutgoing
)
let voiceAttachment = voiceAtt
let storedPassword = message.attachmentPassword
let playbackDuration = previewParts.duration
let playbackMessageId = message.id
// Check cache: arrow if not downloaded, play icon if cached
let voiceFileName = "voice_\(Int(playbackDuration))s.m4a"
let isCached = Self.playableVoiceURLFromCache(
attachmentId: voiceAttachment.id, fileName: voiceFileName
) != nil
// Own outgoing voice = always "downloaded" (data came from local recording)
let isOwnVoice = layout.isOutgoing
voiceView.setDownloaded(isCached || isOwnVoice)
let isCurrentVoice = VoiceMessagePlayer.shared.currentMessageId == message.id
voiceView.updatePlaybackState(
isPlaying: isCurrentVoice && VoiceMessagePlayer.shared.isPlaying,
progress: isCurrentVoice ? CGFloat(VoiceMessagePlayer.shared.progress) : 0
)
let voiceAttachment = voiceAtt
let storedPassword = message.attachmentPassword
let playbackDuration = previewParts.duration
let playbackMessageId = message.id
voiceView.onPlayTapped = { [weak self] in
guard let self else { return }
Task.detached(priority: .userInitiated) {
guard let playableURL = await Self.resolvePlayableVoiceURL(
attachment: voiceAttachment,
duration: playbackDuration,
storedPassword: storedPassword
) else {
// If already cached play immediately
if let cached = Self.playableVoiceURLFromCache(
attachmentId: voiceAttachment.id, fileName: voiceFileName
) {
self.voiceView.setDownloaded(true)
VoiceMessagePlayer.shared.play(messageId: playbackMessageId, fileURL: cached)
return
}
await MainActor.run {
guard self.message?.id == playbackMessageId else { return }
// Show progress ring and start download
self.voiceView.showDownloadProgress(0.027)
let downloadTask = Task {
let playableURL = await Self.resolvePlayableVoiceURL(
attachment: voiceAttachment,
duration: playbackDuration,
storedPassword: storedPassword,
onProgress: { [weak self] progress in
self?.voiceView.showDownloadProgress(CGFloat(progress))
}
)
guard !Task.isCancelled else { return }
self.voiceView.hideDownloadProgress()
if let playableURL, self.message?.id == playbackMessageId {
VoiceMessagePlayer.shared.play(messageId: playbackMessageId, fileURL: playableURL)
}
}
self.activeVoiceDownloadTask = downloadTask
}
voiceView.onDownloadCancel = { [weak self] in
self?.activeVoiceDownloadTask?.cancel()
self?.voiceView.hideDownloadProgress()
self?.activeVoiceDownloadTask = nil
}
fileIconView.isHidden = true
fileNameLabel.isHidden = true
@@ -1550,14 +1582,19 @@ final class NativeMessageCell: UICollectionViewCell {
private static func resolvePlayableVoiceURL(
attachment: MessageAttachment,
duration: TimeInterval,
storedPassword: String?
storedPassword: String?,
onProgress: (@MainActor (Double) -> Void)? = nil
) async -> URL? {
let fileName = "voice_\(Int(duration))s.m4a"
if let cached = playableVoiceURLFromCache(attachmentId: attachment.id, fileName: fileName) {
return cached
}
guard let downloaded = await downloadVoiceData(attachment: attachment, storedPassword: storedPassword) else {
guard let downloaded = await downloadVoiceData(
attachment: attachment,
storedPassword: storedPassword,
onProgress: onProgress
) else {
return nil
}
_ = AttachmentCache.shared.saveFile(downloaded, forAttachmentId: attachment.id, fileName: fileName)
@@ -1591,7 +1628,11 @@ final class NativeMessageCell: UICollectionViewCell {
}
}
private static func downloadVoiceData(attachment: MessageAttachment, storedPassword: String?) async -> Data? {
private static func downloadVoiceData(
attachment: MessageAttachment,
storedPassword: String?,
onProgress: (@MainActor (Double) -> Void)? = nil
) async -> Data? {
let tag = attachment.effectiveDownloadTag
guard !tag.isEmpty else { return nil }
guard let storedPassword, !storedPassword.isEmpty else { return nil }
@@ -1599,14 +1640,24 @@ final class NativeMessageCell: UICollectionViewCell {
do {
let encryptedData = try await TransportManager.shared.downloadFile(
tag: tag,
server: attachment.transportServer
server: attachment.transportServer,
onProgress: onProgress
)
let encryptedString = String(decoding: encryptedData, as: UTF8.self)
let passwords = MessageCrypto.attachmentPasswordCandidates(from: storedPassword)
guard let decrypted = decryptAttachmentData(encryptedString: encryptedString, passwords: passwords) else {
return nil
}
return parseAttachmentFileData(decrypted)
let rawData = parseAttachmentFileData(decrypted)
// Desktop sends WebM/Opus convert to M4A for iOS playback.
// Transcoding (~200ms) runs off MainActor to avoid UI hitch.
if WebMOpusConverter.isWebM(rawData) {
return await Task.detached(priority: .userInitiated) {
WebMOpusConverter.convertToPlayable(rawData)
}.value
}
return rawData
} catch {
return nil
}
@@ -1632,12 +1683,20 @@ final class NativeMessageCell: UICollectionViewCell {
}
private static func parseAttachmentFileData(_ data: Data) -> Data {
// 1. Data URI format (iOS images, files, legacy voice)
if let string = String(data: data, encoding: .utf8),
string.hasPrefix("data:"),
let comma = string.firstIndex(of: ",") {
let payload = String(string[string.index(after: comma)...])
return Data(base64Encoded: payload) ?? data
}
// 2. Hex-encoded raw bytes (Desktop voice: Buffer.toString('hex'))
if let string = String(data: data, encoding: .utf8),
string.count >= 100,
string.allSatisfy({ $0.isHexDigit }) {
return Data(hexString: string)
}
// 3. Raw binary (fallback)
return data
}

View File

@@ -0,0 +1,169 @@
import UIKit
/// Telegram-parity circular progress ring for voice message downloads.
/// Overlays the 44×44 play button during CDN download.
///
/// Reference: `SemanticStatusNodeProgressContext.swift` in Telegram-iOS.
/// - Arc starts at 12 o'clock (-π/2), fills clockwise
/// - Rounded line caps, ~2pt stroke, 2.5pt inset
/// - Continuous rotation animation (4× speed)
/// - Cancel in center (12pt, 1.8pt stroke)
/// - Minimum visible progress: 2.7%
final class VoiceDownloadRingView: UIView {
// MARK: - Telegram-exact constants
private let lineWidth: CGFloat = 2.0
private let inset: CGFloat = 2.5
private let startAngle: CGFloat = -.pi / 2
private let minProgress: CGFloat = 0.027
private let cancelCrossSize: CGFloat = 12.0
private let cancelLineWidth: CGFloat = 1.8
// MARK: - Callbacks
var onCancel: (() -> Void)?
// MARK: - Layers
private let progressLayer = CAShapeLayer()
private let cancelLayer = CAShapeLayer()
// MARK: - Init
override init(frame: CGRect) {
super.init(frame: frame)
setup()
}
required init?(coder: NSCoder) {
super.init(coder: coder)
setup()
}
private func setup() {
isUserInteractionEnabled = true
isHidden = true
backgroundColor = .clear
// Progress ring
progressLayer.fillColor = nil
progressLayer.lineCap = .round
progressLayer.lineWidth = lineWidth
progressLayer.strokeStart = 0
progressLayer.strokeEnd = minProgress
layer.addSublayer(progressLayer)
// Cancel
cancelLayer.fillColor = nil
cancelLayer.lineCap = .round
cancelLayer.lineWidth = cancelLineWidth
layer.addSublayer(cancelLayer)
let tap = UITapGestureRecognizer(target: self, action: #selector(cancelTapped))
addGestureRecognizer(tap)
}
// MARK: - Layout
override func layoutSubviews() {
super.layoutSubviews()
let size = bounds.size
guard size.width > 0 else { return }
// Progress arc path (full circle strokeEnd controls visible portion)
let pathDiameter = size.width - lineWidth - inset * 2
let radius = pathDiameter / 2
let center = CGPoint(x: size.width / 2, y: size.height / 2)
let circlePath = UIBezierPath(
arcCenter: center,
radius: radius,
startAngle: startAngle,
endAngle: startAngle + .pi * 2,
clockwise: true
)
progressLayer.path = circlePath.cgPath
progressLayer.frame = bounds
// Cancel centered
let half = cancelCrossSize / 2
let crossPath = UIBezierPath()
crossPath.move(to: CGPoint(x: center.x - half, y: center.y - half))
crossPath.addLine(to: CGPoint(x: center.x + half, y: center.y + half))
crossPath.move(to: CGPoint(x: center.x + half, y: center.y - half))
crossPath.addLine(to: CGPoint(x: center.x - half, y: center.y + half))
cancelLayer.path = crossPath.cgPath
cancelLayer.frame = bounds
}
// MARK: - Public API
/// Update ring color to match play button foreground.
func setRingColor(_ color: UIColor) {
progressLayer.strokeColor = color.cgColor
cancelLayer.strokeColor = color.cgColor
}
/// Set download progress (0.01.0). Values below 2.7% are clamped up.
func setProgress(_ value: CGFloat, animated: Bool = true) {
let clamped = max(minProgress, min(1.0, value))
if animated {
let anim = CABasicAnimation(keyPath: "strokeEnd")
anim.fromValue = progressLayer.presentation()?.strokeEnd ?? progressLayer.strokeEnd
anim.toValue = clamped
anim.duration = 0.2
anim.timingFunction = CAMediaTimingFunction(name: .easeInEaseOut)
anim.isRemovedOnCompletion = false
anim.fillMode = .forwards
progressLayer.add(anim, forKey: "progress")
} else {
progressLayer.removeAnimation(forKey: "progress")
}
progressLayer.strokeEnd = clamped
}
/// Show the ring and start rotation.
func show() {
guard isHidden else { return }
isHidden = false
alpha = 0
UIView.animate(withDuration: 0.18) { self.alpha = 1 }
startRotation()
}
/// Hide the ring and stop rotation.
func hide() {
guard !isHidden else { return }
UIView.animate(withDuration: 0.18) {
self.alpha = 0
} completion: { _ in
self.isHidden = true
self.stopRotation()
self.setProgress(self.minProgress, animated: false)
}
}
// MARK: - Rotation
private func startRotation() {
guard progressLayer.animation(forKey: "rotation") == nil else { return }
let rotation = CABasicAnimation(keyPath: "transform.rotation.z")
rotation.fromValue = 0
rotation.toValue = CGFloat.pi * 2
rotation.duration = 1.6 // ~4× per 2π normalized (Telegram: angle * 4.0)
rotation.repeatCount = .infinity
rotation.isRemovedOnCompletion = false
progressLayer.add(rotation, forKey: "rotation")
}
private func stopRotation() {
progressLayer.removeAnimation(forKey: "rotation")
}
// MARK: - Actions
@objc private func cancelTapped() {
onCancel?()
}
}

View File

@@ -801,7 +801,9 @@ struct RosettaApp: App {
return
}
UIWindow.appearance().backgroundColor = .systemBackground
UIWindow.appearance().backgroundColor = UIColor { traits in
traits.userInterfaceStyle == .dark ? .black : .white
}
// Detect fresh install: UserDefaults are wiped on uninstall, Keychain is not.
// If this is the first launch after install, clear any stale Keychain data.
@@ -862,6 +864,15 @@ struct RosettaApp: App {
if appState == nil {
appState = initialState()
}
// Set hosting controller & window background to match app background.
// Default .systemBackground (dark gray) leaks as a line in the safe area.
DispatchQueue.main.async {
guard let scene = UIApplication.shared.connectedScenes.first as? UIWindowScene,
let window = scene.windows.first(where: { $0.tag != 0320 }) else { return }
let bgColor: UIColor = window.traitCollection.userInterfaceStyle == .dark ? .black : .white
window.backgroundColor = bgColor
window.rootViewController?.view.backgroundColor = bgColor
}
}
.onOpenURL { url in
handleDeepLink(url)