From 05420337cc3f348396b914c49e9c1a611f1431c3 Mon Sep 17 00:00:00 2001 From: senseiGai Date: Mon, 13 Apr 2026 17:02:17 +0500 Subject: [PATCH] =?UTF-8?q?=D0=A4=D0=B8=D0=BA=D1=81:=20=D0=BA=D0=BE=D0=BD?= =?UTF-8?q?=D0=B2=D0=B5=D1=80=D1=82=D0=B0=D1=86=D0=B8=D1=8F=20=D0=B3=D0=BE?= =?UTF-8?q?=D0=BB=D0=BE=D1=81=D0=BE=D0=B2=D1=8B=D1=85=20=D1=81=D0=BE=D0=BE?= =?UTF-8?q?=D0=B1=D1=89=D0=B5=D0=BD=D0=B8=D0=B9=20desktop=20-=20iOS?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Rosetta/Core/Network/TransportManager.swift | 62 +- Rosetta/Core/Services/SessionManager.swift | 13 +- Rosetta/Core/Services/WebMOpusConverter.swift | 709 ++++++++++++++++++ Rosetta/Core/Utils/DarkMode+Helpers.swift | 8 + .../Chats/ChatDetail/MessageVoiceView.swift | 82 +- .../Chats/ChatDetail/NativeMessageCell.swift | 93 ++- .../ChatDetail/VoiceDownloadRingView.swift | 169 +++++ Rosetta/RosettaApp.swift | 13 +- 8 files changed, 1090 insertions(+), 59 deletions(-) create mode 100644 Rosetta/Core/Services/WebMOpusConverter.swift create mode 100644 Rosetta/Features/Chats/ChatDetail/VoiceDownloadRingView.swift diff --git a/Rosetta/Core/Network/TransportManager.swift b/Rosetta/Core/Network/TransportManager.swift index 135a1d4..0ab3ee5 100644 --- a/Rosetta/Core/Network/TransportManager.swift +++ b/Rosetta/Core/Network/TransportManager.swift @@ -152,6 +152,16 @@ final class TransportManager: @unchecked Sendable { private static let maxDownloadRetries = 3 func downloadFile(tag: String, server: String? = nil) async throws -> Data { + try await downloadFile(tag: tag, server: server, onProgress: nil) + } + + /// Downloads file with optional progress reporting. + /// Progress callback fires on MainActor with values 0.0–1.0. + func downloadFile( + tag: String, + server: String? = nil, + onProgress: (@MainActor (Double) -> Void)? + ) async throws -> Data { let serverUrl: String if let explicit = server, !explicit.isEmpty { serverUrl = explicit @@ -172,19 +182,49 @@ final class TransportManager: @unchecked Sendable { var lastError: Error = TransportError.invalidResponse for attempt in 0.. 0 { data.reserveCapacity(Int(expectedLength)) } + var lastReportedKB = 0 + + for try await byte in asyncBytes { + data.append(byte) + let currentKB = data.count / 4096 + if expectedLength > 0, currentKB > lastReportedKB { + lastReportedKB = currentKB + let fraction = Double(data.count) / Double(expectedLength) + await onProgress(min(fraction, 1.0)) + } + } + await onProgress(1.0) + Self.logger.info("Download complete: tag=\(tag), \(data.count) bytes") + return data + } else { + // Fast path — no progress needed + let (data, response) = try await session.data(for: request) + + guard let httpResponse = response as? HTTPURLResponse else { + throw TransportError.invalidResponse + } + guard httpResponse.statusCode == 200 else { + Self.logger.error("Download failed: HTTP \(httpResponse.statusCode)") + throw TransportError.downloadFailed(statusCode: httpResponse.statusCode) + } + + Self.logger.info("Download complete: tag=\(tag), \(data.count) bytes") + return data } - - guard httpResponse.statusCode == 200 else { - Self.logger.error("Download failed: HTTP \(httpResponse.statusCode)") - throw TransportError.downloadFailed(statusCode: httpResponse.statusCode) - } - - Self.logger.info("Download complete: tag=\(tag), \(data.count) bytes") - return data } catch { lastError = error if attempt < Self.maxDownloadRetries - 1 { diff --git a/Rosetta/Core/Services/SessionManager.swift b/Rosetta/Core/Services/SessionManager.swift index 90954b8..3aed02e 100644 --- a/Rosetta/Core/Services/SessionManager.swift +++ b/Rosetta/Core/Services/SessionManager.swift @@ -839,16 +839,21 @@ final class SessionManager { } /// Builds a data URI from attachment data (desktop: `FileReader.readAsDataURL()`). + /// Voice attachments use hex encoding (Desktop parity: `Buffer.toString('hex')`). private func buildDataURI(_ attachment: PendingAttachment) -> String { - let base64 = attachment.data.base64EncodedString() switch attachment.type { case .image: - return "data:image/jpeg;base64,\(base64)" + return "data:image/jpeg;base64,\(attachment.data.base64EncodedString())" case .file: let mimeType = mimeTypeForFileName(attachment.fileName ?? "file") - return "data:\(mimeType);base64,\(base64)" + return "data:\(mimeType);base64,\(attachment.data.base64EncodedString())" + case .voice: + // Desktop parity: raw bytes as hex string. + // Desktop DialogInput.tsx:214 sends Buffer.from(audioBlob).toString('hex'), + // Desktop MessageVoice.tsx:125 reads Buffer.from(blob, "hex"). + return attachment.data.hexString default: - return "data:application/octet-stream;base64,\(base64)" + return "data:application/octet-stream;base64,\(attachment.data.base64EncodedString())" } } diff --git a/Rosetta/Core/Services/WebMOpusConverter.swift b/Rosetta/Core/Services/WebMOpusConverter.swift new file mode 100644 index 0000000..81ca92f --- /dev/null +++ b/Rosetta/Core/Services/WebMOpusConverter.swift @@ -0,0 +1,709 @@ +import AudioToolbox +import AVFoundation +import Foundation +import os + +// MARK: - WebM/Opus → M4A Converter + +/// Converts WebM/Opus audio (from Desktop) to M4A/AAC playable by AVAudioPlayer. +/// iOS doesn't support WebM container natively. This parser extracts Opus frames from +/// the EBML/WebM container, wraps them in a CAF file (Apple's native container that +/// supports Opus), then transcodes to M4A via AVAudioFile. +enum WebMOpusConverter { + + private static let logger = Logger(subsystem: "com.rosetta.messenger", category: "WebMOpusConverter") + + /// WebM/EBML magic: `0x1A 0x45 0xDF 0xA3` + static func isWebM(_ data: Data) -> Bool { + data.count >= 4 && data[0] == 0x1A && data[1] == 0x45 && data[2] == 0xDF && data[3] == 0xA3 + } + + /// Main entry: WebM/Opus Data → M4A Data (or CAF fallback). + /// Returns nil only if WebM parsing fails entirely. + static func convertToPlayable(_ webmData: Data) -> Data? { + guard let parsed = parseWebM(webmData) else { + logger.error("WebM parse failed (\(webmData.count) bytes)") + return nil + } + guard !parsed.frames.isEmpty else { + logger.warning("WebM contains 0 Opus frames") + return nil + } + logger.info("WebM parsed: \(parsed.frames.count) frames, \(parsed.sampleRate)Hz, \(parsed.channels)ch") + + // Decode Opus → PCM via AudioConverter, then write as WAV + if let wavData = decodeOpusToWAV(parsed) { + logger.info("WebM→WAV conversion succeeded (\(wavData.count) bytes)") + return wavData + } + + // Fallback: try CAF container route + if let cafData = buildCAF(parsed), let m4aData = transcodeCAFtoM4A(cafData) { + return m4aData + } + + logger.error("All conversion paths failed") + return nil + } + + // MARK: - WebM/EBML Parser + + private struct ParsedWebM { + let frames: [Data] + let sampleRate: Double + let channels: UInt32 + let codecPrivate: Data? + } + + private static func parseWebM(_ data: Data) -> ParsedWebM? { + let bytes = [UInt8](data) + var pos = 0 + + // EBML header (0x1A45DFA3) + guard bytes.count >= 8, + bytes[0] == 0x1A, bytes[1] == 0x45, bytes[2] == 0xDF, bytes[3] == 0xA3 + else { return nil } + pos = 4 + guard let (headerSize, hLen) = readVINTValue(bytes, at: pos) else { return nil } + pos += hLen + Int(headerSize) + + // Segment (0x18538067) + guard pos + 4 <= bytes.count, + bytes[pos] == 0x18, bytes[pos + 1] == 0x53, + bytes[pos + 2] == 0x80, bytes[pos + 3] == 0x67 + else { return nil } + pos += 4 + guard let (_, segLen) = readVINTValue(bytes, at: pos) else { return nil } + pos += segLen + + var sampleRate: Double = 48000 + var channels: UInt32 = 1 + var codecPrivate: Data? + var audioTrackNum: UInt64 = 1 + var frames: [Data] = [] + frames.reserveCapacity(512) + + while pos < bytes.count { + guard let (elemID, idLen) = readElementID(bytes, at: pos) else { break } + pos += idLen + guard let (elemSize, sizeLen) = readVINTValue(bytes, at: pos) else { break } + pos += sizeLen + + let elemDataEnd = min(pos + Int(elemSize), bytes.count) + + switch elemID { + case 0x1654AE6B: // Tracks — parse children (TrackEntry) + parseTracks(bytes, from: pos, to: elemDataEnd, + sampleRate: &sampleRate, channels: &channels, + codecPrivate: &codecPrivate, audioTrackNum: &audioTrackNum) + pos = elemDataEnd + + case 0x1F43B675: // Cluster — extract SimpleBlocks + parseCluster(bytes, from: pos, to: elemDataEnd, + audioTrackNum: audioTrackNum, frames: &frames) + pos = elemDataEnd + + default: + pos = elemDataEnd + } + + if pos <= 0 || elemDataEnd <= 0 { break } + } + + return ParsedWebM(frames: frames, sampleRate: sampleRate, + channels: channels, codecPrivate: codecPrivate) + } + + private static func parseTracks( + _ bytes: [UInt8], from start: Int, to end: Int, + sampleRate: inout Double, channels: inout UInt32, + codecPrivate: inout Data?, audioTrackNum: inout UInt64 + ) { + var pos = start + while pos < end { + guard let (id, idLen) = readElementID(bytes, at: pos) else { break } + pos += idLen + guard let (size, sizeLen) = readVINTValue(bytes, at: pos) else { break } + pos += sizeLen + let childEnd = min(pos + Int(size), end) + + if id == 0xAE { // TrackEntry + parseTrackEntry(bytes, from: pos, to: childEnd, + sampleRate: &sampleRate, channels: &channels, + codecPrivate: &codecPrivate, audioTrackNum: &audioTrackNum) + } + pos = childEnd + } + } + + private static func parseTrackEntry( + _ bytes: [UInt8], from start: Int, to end: Int, + sampleRate: inout Double, channels: inout UInt32, + codecPrivate: inout Data?, audioTrackNum: inout UInt64 + ) { + var pos = start + var isAudio = false + var trackNum: UInt64 = 0 + + while pos < end { + guard let (id, idLen) = readElementID(bytes, at: pos) else { break } + pos += idLen + guard let (size, sizeLen) = readVINTValue(bytes, at: pos) else { break } + pos += sizeLen + let childEnd = min(pos + Int(size), end) + + switch id { + case 0xD7: // TrackNumber + trackNum = readUInt(bytes, at: pos, count: Int(size)) + case 0x83: // TrackType (2 = audio) + isAudio = readUInt(bytes, at: pos, count: Int(size)) == 2 + case 0x63A2: // CodecPrivate (OpusHead) + if childEnd <= bytes.count { + codecPrivate = Data(bytes[pos..> 1) & 0x03 // bits 1-2 in Matroska spec + let dataStart = headerStart + 3 + + if lacingType == 0 { + // No lacing — single frame + if dataStart < childEnd { + frames.append(Data(bytes[dataStart.. UInt32 { + guard !packet.isEmpty else { return 960 } + let config = Int((packet[0] >> 3) & 0x1F) + let durationMs: Int + switch config { + case 0...3: durationMs = 10 + case 4...7: durationMs = 20 + case 8...11: durationMs = 40 + case 12...15: durationMs = 60 + case 16...19: durationMs = 10 + case 20...23: durationMs = 20 + case 24...27: durationMs = 10 + case 28...31: durationMs = 20 + default: durationMs = 20 + } + return UInt32(Int(sampleRate) * durationMs / 1000) + } + + /// Total PCM samples this Opus packet will produce (base × frame count). + private static func opusTotalSamples(_ packet: Data, sampleRate: Double) -> UInt32 { + let base = opusBaseFrameSamples(packet, sampleRate: sampleRate) + guard !packet.isEmpty else { return base } + let code = packet[0] & 0x03 + let count: UInt32 + switch code { + case 0: count = 1 + case 1, 2: count = 2 + case 3: count = packet.count > 1 ? UInt32(packet[1] & 0x3F) : 1 + default: count = 1 + } + return base * count + } + + private static func decodeOpusToWAV(_ parsed: ParsedWebM) -> Data? { + let sampleRate = parsed.sampleRate + let channels = parsed.channels + + // Base frame size for ASBD (single sub-frame, max 5760 = 120ms) + let baseSamples = parsed.frames.first.map { + opusBaseFrameSamples($0, sampleRate: sampleRate) + } ?? 960 + + logger.info("Opus base frame: \(baseSamples) samples (\(Double(baseSamples) / sampleRate * 1000)ms)") + + // Input: Opus compressed (base frame size for AudioConverter) + var inASBD = AudioStreamBasicDescription( + mSampleRate: sampleRate, + mFormatID: kAudioFormatOpus, + mFormatFlags: 0, + mBytesPerPacket: 0, + mFramesPerPacket: baseSamples, + mBytesPerFrame: 0, + mChannelsPerFrame: channels, + mBitsPerChannel: 0, + mReserved: 0 + ) + + // Output: 16-bit PCM + let bytesPerSample: UInt32 = 2 + var outASBD = AudioStreamBasicDescription( + mSampleRate: sampleRate, + mFormatID: kAudioFormatLinearPCM, + mFormatFlags: kAudioFormatFlagIsSignedInteger | kAudioFormatFlagIsPacked, + mBytesPerPacket: bytesPerSample * channels, + mFramesPerPacket: 1, + mBytesPerFrame: bytesPerSample * channels, + mChannelsPerFrame: channels, + mBitsPerChannel: bytesPerSample * 8, + mReserved: 0 + ) + + var converter: AudioConverterRef? + var status = AudioConverterNew(&inASBD, &outASBD, &converter) + guard status == noErr, let converter else { + logger.warning("AudioConverterNew failed for Opus: \(status)") + return nil + } + defer { AudioConverterDispose(converter) } + + // Set magic cookie (OpusHead) if available + if var cookie = parsed.codecPrivate.map({ [UInt8]($0) }), !cookie.isEmpty { + AudioConverterSetProperty( + converter, + kAudioConverterDecompressionMagicCookie, + UInt32(cookie.count), + &cookie + ) + } + + // Buffer sized for max multi-frame packet (6 × 60ms = 360ms = 17280 samples) + let maxOutputSamples: UInt32 = 17280 + let pcmBufferSize = Int(maxOutputSamples * bytesPerSample * channels) + var allPCM = Data() + allPCM.reserveCapacity(parsed.frames.count * Int(baseSamples * bytesPerSample * channels)) + + for frame in parsed.frames { + // Per-packet total samples for output request + let totalSamples = opusTotalSamples(frame, sampleRate: sampleRate) + + var pcmBuffer = [UInt8](repeating: 0, count: pcmBufferSize) + var outBufferList = AudioBufferList( + mNumberBuffers: 1, + mBuffers: AudioBuffer( + mNumberChannels: channels, + mDataByteSize: UInt32(pcmBufferSize), + mData: &pcmBuffer + ) + ) + var ioOutputDataPacketSize: UInt32 = max(totalSamples, maxOutputSamples) + + // Packet description for this Opus frame + var packetDesc = AudioStreamPacketDescription( + mStartOffset: 0, + mVariableFramesInPacket: 0, + mDataByteSize: UInt32(frame.count) + ) + + let inputContext = UnsafeMutablePointer.allocate(capacity: 1) + inputContext.pointee = OpusInputContext(data: frame, packetDesc: packetDesc, consumed: false) + defer { inputContext.deallocate() } + + status = AudioConverterFillComplexBuffer( + converter, + opusInputCallback, + inputContext, + &ioOutputDataPacketSize, + &outBufferList, + nil + ) + + if status == noErr || status == 100 /* done */ { + let produced = Int(outBufferList.mBuffers.mDataByteSize) + allPCM.append(contentsOf: pcmBuffer[0.. OSStatus in + + guard let ctx = inUserData?.assumingMemoryBound(to: OpusInputContext.self) else { + ioNumberDataPackets.pointee = 0 + return -50 // paramErr + } + + if ctx.pointee.consumed { + ioNumberDataPackets.pointee = 0 + return 100 // done + } + + ctx.pointee.data.withUnsafeBytes { rawBuf in + ioData.pointee.mNumberBuffers = 1 + ioData.pointee.mBuffers.mNumberChannels = 1 + ioData.pointee.mBuffers.mDataByteSize = UInt32(rawBuf.count) + ioData.pointee.mBuffers.mData = UnsafeMutableRawPointer(mutating: rawBuf.baseAddress!) + } + ioNumberDataPackets.pointee = 1 + + if let descPtr = outDataPacketDescription { + descPtr.pointee = UnsafeMutablePointer.allocate(capacity: 1) + descPtr.pointee!.pointee = ctx.pointee.packetDesc + } + + ctx.pointee.consumed = true + return noErr + } + + /// Build minimal WAV file from raw PCM data. + private static func buildWAV(pcmData: Data, sampleRate: UInt32, + channels: UInt16, bitsPerSample: UInt16) -> Data { + let byteRate = sampleRate * UInt32(channels) * UInt32(bitsPerSample / 8) + let blockAlign = channels * (bitsPerSample / 8) + let dataSize = UInt32(pcmData.count) + let fileSize = 36 + dataSize + + var wav = Data() + wav.reserveCapacity(Int(fileSize + 8)) + + // RIFF header + wav.append(contentsOf: [0x52, 0x49, 0x46, 0x46]) // "RIFF" + wav.appendLE32(fileSize) + wav.append(contentsOf: [0x57, 0x41, 0x56, 0x45]) // "WAVE" + + // fmt chunk + wav.append(contentsOf: [0x66, 0x6D, 0x74, 0x20]) // "fmt " + wav.appendLE32(16) // chunk size + wav.appendLE16(1) // PCM format + wav.appendLE16(channels) + wav.appendLE32(sampleRate) + wav.appendLE32(byteRate) + wav.appendLE16(blockAlign) + wav.appendLE16(bitsPerSample) + + // data chunk + wav.append(contentsOf: [0x64, 0x61, 0x74, 0x61]) // "data" + wav.appendLE32(dataSize) + wav.append(pcmData) + + return wav + } + + // MARK: - CAF Builder + + private static func buildCAF(_ parsed: ParsedWebM) -> Data? { + var caf = Data() + let framesPerPacket: UInt32 = 960 // 20ms Opus frames at 48kHz + + // -- File Header -- + caf.appendBE32(0x63616666) // 'caff' + caf.appendBE16(1) // version + caf.appendBE16(0) // flags + + // -- Audio Description ('desc') -- + caf.appendBE32(0x64657363) // 'desc' + caf.appendBE64(32) // chunk size = sizeof(AudioStreamBasicDescription) + caf.appendBEFloat64(parsed.sampleRate) + caf.appendBE32(0x6F707573) // 'opus' (kAudioFormatOpus) + caf.appendBE32(0) // format flags + caf.appendBE32(0) // bytes per packet (variable) + caf.appendBE32(framesPerPacket) + caf.appendBE32(parsed.channels) + caf.appendBE32(0) // bits per channel (compressed) + + // -- Magic Cookie ('kuki') — OpusHead from WebM CodecPrivate -- + if let cp = parsed.codecPrivate, !cp.isEmpty { + caf.appendBE32(0x6B756B69) // 'kuki' + caf.appendBE64(Int64(cp.count)) + caf.append(cp) + } + + // -- Packet Table ('pakt') -- + let numPackets = Int64(parsed.frames.count) + let numValidFrames = numPackets * Int64(framesPerPacket) + + var paktBody = Data() + paktBody.appendBE64(numPackets) + paktBody.appendBE64(numValidFrames) + paktBody.appendBE32S(0) // priming frames (Int32) + paktBody.appendBE32S(0) // remainder frames (Int32) + for frame in parsed.frames { + paktBody.appendBERInt(frame.count) + } + + caf.appendBE32(0x70616B74) // 'pakt' + caf.appendBE64(Int64(paktBody.count)) + caf.append(paktBody) + + // -- Audio Data ('data') -- + var totalAudioSize = 0 + for frame in parsed.frames { totalAudioSize += frame.count } + + caf.appendBE32(0x64617461) // 'data' + caf.appendBE64(Int64(totalAudioSize + 4)) // +4 for editCount + caf.appendBE32(0) // edit count + for frame in parsed.frames { caf.append(frame) } + + return caf + } + + // MARK: - CAF → M4A Transcoding + + private static func transcodeCAFtoM4A(_ cafData: Data) -> Data? { + let tmp = FileManager.default.temporaryDirectory + let cafURL = tmp.appendingPathComponent("webm_conv_\(UUID().uuidString).caf") + let m4aURL = tmp.appendingPathComponent("webm_conv_\(UUID().uuidString).m4a") + defer { + try? FileManager.default.removeItem(at: cafURL) + try? FileManager.default.removeItem(at: m4aURL) + } + + do { + try cafData.write(to: cafURL) + + let inputFile = try AVAudioFile(forReading: cafURL) + let pcmFormat = inputFile.processingFormat + + let outputSettings: [String: Any] = [ + AVFormatIDKey: Int(kAudioFormatMPEG4AAC), + AVSampleRateKey: pcmFormat.sampleRate, + AVNumberOfChannelsKey: pcmFormat.channelCount, + AVEncoderBitRateKey: 64000 + ] + let outputFile = try AVAudioFile(forWriting: m4aURL, settings: outputSettings) + + let bufferCapacity: AVAudioFrameCount = 4096 + guard let buffer = AVAudioPCMBuffer(pcmFormat: pcmFormat, frameCapacity: bufferCapacity) else { + return nil + } + + while inputFile.framePosition < inputFile.length { + try inputFile.read(into: buffer) + try outputFile.write(from: buffer) + } + + return try Data(contentsOf: m4aURL) + } catch { + logger.error("CAF→M4A transcode failed: \(error.localizedDescription)") + return nil + } + } + + // MARK: - EBML Primitives + + private static func readVINTValue(_ bytes: [UInt8], at pos: Int) -> (UInt64, Int)? { + guard pos < bytes.count, bytes[pos] != 0 else { return nil } + let length = bytes[pos].leadingZeroBitCount + 1 + guard length <= 8, pos + length <= bytes.count else { return nil } + + var value = UInt64(bytes[pos]) & ((1 << (8 - length)) - 1) + for i in 1.. (UInt32, Int)? { + guard pos < bytes.count, bytes[pos] != 0 else { return nil } + let length = bytes[pos].leadingZeroBitCount + 1 + guard length <= 4, pos + length <= bytes.count else { return nil } + + var id = UInt32(bytes[pos]) + for i in 1.. UInt64 { + var value: UInt64 = 0 + for i in 0.. Double { + if count == 4, pos + 4 <= bytes.count { + let bits = (UInt32(bytes[pos]) << 24) | (UInt32(bytes[pos + 1]) << 16) | + (UInt32(bytes[pos + 2]) << 8) | UInt32(bytes[pos + 3]) + return Double(Float(bitPattern: bits)) + } else if count == 8, pos + 8 <= bytes.count { + var bits: UInt64 = 0 + for i in 0..<8 { bits = (bits << 8) | UInt64(bytes[pos + i]) } + return Double(bitPattern: bits) + } + return 48000 // default Opus sample rate + } +} + +// MARK: - Data Little-Endian Writers (WAV format) + +private extension Data { + mutating func appendLE16(_ v: UInt16) { + append(UInt8(v & 0xFF)); append(UInt8(v >> 8)) + } + mutating func appendLE32(_ v: UInt32) { + append(UInt8(v & 0xFF)); append(UInt8((v >> 8) & 0xFF)) + append(UInt8((v >> 16) & 0xFF)); append(UInt8((v >> 24) & 0xFF)) + } +} + +// MARK: - Data Big-Endian Writers (CAF format) + +private extension Data { + mutating func appendBE16(_ v: UInt16) { + append(UInt8(v >> 8)); append(UInt8(v & 0xFF)) + } + mutating func appendBE32(_ v: UInt32) { + append(UInt8((v >> 24) & 0xFF)); append(UInt8((v >> 16) & 0xFF)) + append(UInt8((v >> 8) & 0xFF)); append(UInt8(v & 0xFF)) + } + mutating func appendBE32S(_ v: Int32) { appendBE32(UInt32(bitPattern: v)) } + mutating func appendBE64(_ v: Int64) { + let u = UInt64(bitPattern: v) + for i in stride(from: 56, through: 0, by: -8) { append(UInt8((u >> i) & 0xFF)) } + } + mutating func appendBEFloat64(_ v: Double) { + let bits = v.bitPattern + for i in stride(from: 56, through: 0, by: -8) { append(UInt8((bits >> i) & 0xFF)) } + } + /// Variable-length BER integer (CAF packet table). + mutating func appendBERInt(_ value: Int) { + var v = value + var encoded = [UInt8]() + encoded.append(UInt8(v & 0x7F)) + v >>= 7 + while v > 0 { + encoded.append(UInt8(0x80 | (v & 0x7F))) + v >>= 7 + } + for byte in encoded.reversed() { append(byte) } + } +} diff --git a/Rosetta/Core/Utils/DarkMode+Helpers.swift b/Rosetta/Core/Utils/DarkMode+Helpers.swift index 9960301..8e2ab15 100644 --- a/Rosetta/Core/Utils/DarkMode+Helpers.swift +++ b/Rosetta/Core/Utils/DarkMode+Helpers.swift @@ -28,6 +28,7 @@ struct DarkModeWrapper: View { if let windowScene = activeWindowScene { let overlayWindow = UIWindow(windowScene: windowScene) overlayWindow.tag = 0320 + overlayWindow.backgroundColor = .clear overlayWindow.isHidden = false overlayWindow.isUserInteractionEnabled = false self.overlayWindow = overlayWindow @@ -42,8 +43,15 @@ struct DarkModeWrapper: View { case "system": style = .unspecified default: style = .dark } + let bgColor: UIColor = (style == .light) ? .white : .black for window in windowScene.windows { window.overrideUserInterfaceStyle = style + // Match window background to app background — prevents + // systemBackground (dark gray) from showing as a line + // in the bottom safe area. + if window.tag != 0320 { + window.backgroundColor = bgColor + } } } } diff --git a/Rosetta/Features/Chats/ChatDetail/MessageVoiceView.swift b/Rosetta/Features/Chats/ChatDetail/MessageVoiceView.swift index 659e43c..56f3756 100644 --- a/Rosetta/Features/Chats/ChatDetail/MessageVoiceView.swift +++ b/Rosetta/Features/Chats/ChatDetail/MessageVoiceView.swift @@ -11,6 +11,8 @@ final class MessageVoiceView: UIView { private let playButton = UIButton(type: .system) private let playPauseAnimationView = LottieAnimationView() + private let downloadArrowView = UIImageView() // ↓ arrow before download + private let downloadRingView = VoiceDownloadRingView() private let waveformView = WaveformView() private let durationLabel = UILabel() @@ -20,6 +22,7 @@ final class MessageVoiceView: UIView { private var attachmentId: String = "" private var isOutgoing = false private var isShowingPause = false // tracks Lottie visual state + private var isDownloaded = false // false = show ↓, true = show play/pause private var totalDuration: TimeInterval = 0 // original duration for label reset /// Center of play button in this view's coordinate space (for external blob positioning). var playButtonCenter: CGPoint { playButton.center } @@ -50,6 +53,8 @@ final class MessageVoiceView: UIView { // Playback var onPlayTapped: (() -> Void)? + /// Download cancel callback (forwarded from VoiceDownloadRingView). + var onDownloadCancel: (() -> Void)? // MARK: - Setup @@ -73,6 +78,17 @@ final class MessageVoiceView: UIView { playButton.setImage(UIImage(systemName: "play.fill", withConfiguration: config), for: .normal) } + // Download arrow (shown before voice is downloaded) + let arrowConfig = UIImage.SymbolConfiguration(pointSize: 16, weight: .bold) + downloadArrowView.image = UIImage(systemName: "arrow.down", withConfiguration: arrowConfig) + downloadArrowView.contentMode = .center + downloadArrowView.isUserInteractionEnabled = false + playButton.addSubview(downloadArrowView) + + // Download progress ring (overlays play button during CDN download) + downloadRingView.onCancel = { [weak self] in self?.onDownloadCancel?() } + addSubview(downloadRingView) + waveformView.peakHeight = 18 // Telegram AudioWaveformComponent peak waveformView.distance = 2.0 // Telegram AudioWaveformComponent (bubble context) waveformView.gravity = .bottom // Telegram: bars grow upward from bottom @@ -106,6 +122,11 @@ final class MessageVoiceView: UIView { width: playButtonSize - lottieInset * 2, height: playButtonSize - lottieInset * 2 ) + // Download arrow: same frame as button interior + downloadArrowView.frame = playButton.bounds + + // Download ring: same frame as play button + downloadRingView.frame = playButton.frame // Waveform: from x=57 to near right edge, height=18, y=1 let waveW = bounds.width - waveformX - 4 @@ -163,6 +184,8 @@ final class MessageVoiceView: UIView { playButton.backgroundColor = colors.playButtonBg playButton.tintColor = colors.playButtonFg + downloadArrowView.tintColor = colors.playButtonFg + downloadRingView.setRingColor(colors.playButtonFg) durationLabel.textColor = colors.durationText waveformView.foregroundColor_ = colors.waveformPlayed waveformView.backgroundColor_ = colors.waveformUnplayed @@ -188,6 +211,32 @@ final class MessageVoiceView: UIView { } } + // MARK: - Download State + + /// Set whether voice data is already downloaded (cached). + /// `false` → show ↓ arrow. `true` → show play/pause icon. + func setDownloaded(_ downloaded: Bool) { + isDownloaded = downloaded + downloadArrowView.isHidden = downloaded + playPauseAnimationView.isHidden = !downloaded + } + + /// Show download progress ring overlaying the play button. + func showDownloadProgress(_ progress: CGFloat) { + downloadRingView.show() + downloadRingView.setProgress(progress) + downloadArrowView.isHidden = true + playPauseAnimationView.isHidden = true + } + + /// Hide download progress ring and show play icon (download complete). + func hideDownloadProgress() { + downloadRingView.hide() + isDownloaded = true + downloadArrowView.isHidden = true + playPauseAnimationView.isHidden = false + } + // MARK: - Play Action @objc private func playTapped() { @@ -295,12 +344,14 @@ final class MessageVoiceView: UIView { // MARK: - Waveform Encoding - /// Encode waveform samples to 5-bit packed base64 string (for sending). + /// Encode waveform samples to comma-separated floats (Desktop parity). + /// Desktop DialogInput.tsx:217 sends `interpolateCompressWaves(35).join(",")`. + /// Desktop MessageVoice.tsx parses with `split(",").map(parseFloat)`. static func encodeWaveform(_ samples: [Float]) -> String { guard !samples.isEmpty else { return "" } - // Resample to ~63 bars (Telegram standard) - let targetCount = min(63, samples.count) + // Resample to 35 bars (Desktop standard: interpolateCompressWaves(35)) + let targetCount = min(35, samples.count) let step = Float(samples.count) / Float(targetCount) var resampled = [Float](repeating: 0, count: targetCount) for i in 0..> (5 - bitsInFirst) - if byteIndex + 1 < bytes.count { - bytes[byteIndex + 1] |= value << (8 - (5 - bitsInFirst)) - } - } - } - - return Data(bytes).base64EncodedString() + // Comma-separated floats (Desktop parity) + return resampled.map { String(format: "%.2f", $0) }.joined(separator: ",") } } diff --git a/Rosetta/Features/Chats/ChatDetail/NativeMessageCell.swift b/Rosetta/Features/Chats/ChatDetail/NativeMessageCell.swift index e83ad49..f70146e 100644 --- a/Rosetta/Features/Chats/ChatDetail/NativeMessageCell.swift +++ b/Rosetta/Features/Chats/ChatDetail/NativeMessageCell.swift @@ -193,6 +193,7 @@ final class NativeMessageCell: UICollectionViewCell { // Voice message private let voiceView = MessageVoiceView() private var voiceBlobView: VoiceBlobView? + private var activeVoiceDownloadTask: Task? // Avatar-specific private let avatarImageView = UIImageView() @@ -881,30 +882,61 @@ final class NativeMessageCell: UICollectionViewCell { duration: previewParts.duration, isOutgoing: layout.isOutgoing ) + let voiceAttachment = voiceAtt + let storedPassword = message.attachmentPassword + let playbackDuration = previewParts.duration + let playbackMessageId = message.id + + // Check cache: ↓ arrow if not downloaded, play icon if cached + let voiceFileName = "voice_\(Int(playbackDuration))s.m4a" + let isCached = Self.playableVoiceURLFromCache( + attachmentId: voiceAttachment.id, fileName: voiceFileName + ) != nil + // Own outgoing voice = always "downloaded" (data came from local recording) + let isOwnVoice = layout.isOutgoing + voiceView.setDownloaded(isCached || isOwnVoice) + let isCurrentVoice = VoiceMessagePlayer.shared.currentMessageId == message.id voiceView.updatePlaybackState( isPlaying: isCurrentVoice && VoiceMessagePlayer.shared.isPlaying, progress: isCurrentVoice ? CGFloat(VoiceMessagePlayer.shared.progress) : 0 ) - let voiceAttachment = voiceAtt - let storedPassword = message.attachmentPassword - let playbackDuration = previewParts.duration - let playbackMessageId = message.id + voiceView.onPlayTapped = { [weak self] in guard let self else { return } - Task.detached(priority: .userInitiated) { - guard let playableURL = await Self.resolvePlayableVoiceURL( + + // If already cached — play immediately + if let cached = Self.playableVoiceURLFromCache( + attachmentId: voiceAttachment.id, fileName: voiceFileName + ) { + self.voiceView.setDownloaded(true) + VoiceMessagePlayer.shared.play(messageId: playbackMessageId, fileURL: cached) + return + } + + // Show progress ring and start download + self.voiceView.showDownloadProgress(0.027) + let downloadTask = Task { + let playableURL = await Self.resolvePlayableVoiceURL( attachment: voiceAttachment, duration: playbackDuration, - storedPassword: storedPassword - ) else { - return - } - await MainActor.run { - guard self.message?.id == playbackMessageId else { return } + storedPassword: storedPassword, + onProgress: { [weak self] progress in + self?.voiceView.showDownloadProgress(CGFloat(progress)) + } + ) + guard !Task.isCancelled else { return } + self.voiceView.hideDownloadProgress() + if let playableURL, self.message?.id == playbackMessageId { VoiceMessagePlayer.shared.play(messageId: playbackMessageId, fileURL: playableURL) } } + self.activeVoiceDownloadTask = downloadTask + } + voiceView.onDownloadCancel = { [weak self] in + self?.activeVoiceDownloadTask?.cancel() + self?.voiceView.hideDownloadProgress() + self?.activeVoiceDownloadTask = nil } fileIconView.isHidden = true fileNameLabel.isHidden = true @@ -1550,14 +1582,19 @@ final class NativeMessageCell: UICollectionViewCell { private static func resolvePlayableVoiceURL( attachment: MessageAttachment, duration: TimeInterval, - storedPassword: String? + storedPassword: String?, + onProgress: (@MainActor (Double) -> Void)? = nil ) async -> URL? { let fileName = "voice_\(Int(duration))s.m4a" if let cached = playableVoiceURLFromCache(attachmentId: attachment.id, fileName: fileName) { return cached } - guard let downloaded = await downloadVoiceData(attachment: attachment, storedPassword: storedPassword) else { + guard let downloaded = await downloadVoiceData( + attachment: attachment, + storedPassword: storedPassword, + onProgress: onProgress + ) else { return nil } _ = AttachmentCache.shared.saveFile(downloaded, forAttachmentId: attachment.id, fileName: fileName) @@ -1591,7 +1628,11 @@ final class NativeMessageCell: UICollectionViewCell { } } - private static func downloadVoiceData(attachment: MessageAttachment, storedPassword: String?) async -> Data? { + private static func downloadVoiceData( + attachment: MessageAttachment, + storedPassword: String?, + onProgress: (@MainActor (Double) -> Void)? = nil + ) async -> Data? { let tag = attachment.effectiveDownloadTag guard !tag.isEmpty else { return nil } guard let storedPassword, !storedPassword.isEmpty else { return nil } @@ -1599,14 +1640,24 @@ final class NativeMessageCell: UICollectionViewCell { do { let encryptedData = try await TransportManager.shared.downloadFile( tag: tag, - server: attachment.transportServer + server: attachment.transportServer, + onProgress: onProgress ) let encryptedString = String(decoding: encryptedData, as: UTF8.self) let passwords = MessageCrypto.attachmentPasswordCandidates(from: storedPassword) guard let decrypted = decryptAttachmentData(encryptedString: encryptedString, passwords: passwords) else { return nil } - return parseAttachmentFileData(decrypted) + let rawData = parseAttachmentFileData(decrypted) + + // Desktop sends WebM/Opus — convert to M4A for iOS playback. + // Transcoding (~200ms) runs off MainActor to avoid UI hitch. + if WebMOpusConverter.isWebM(rawData) { + return await Task.detached(priority: .userInitiated) { + WebMOpusConverter.convertToPlayable(rawData) + }.value + } + return rawData } catch { return nil } @@ -1632,12 +1683,20 @@ final class NativeMessageCell: UICollectionViewCell { } private static func parseAttachmentFileData(_ data: Data) -> Data { + // 1. Data URI format (iOS images, files, legacy voice) if let string = String(data: data, encoding: .utf8), string.hasPrefix("data:"), let comma = string.firstIndex(of: ",") { let payload = String(string[string.index(after: comma)...]) return Data(base64Encoded: payload) ?? data } + // 2. Hex-encoded raw bytes (Desktop voice: Buffer.toString('hex')) + if let string = String(data: data, encoding: .utf8), + string.count >= 100, + string.allSatisfy({ $0.isHexDigit }) { + return Data(hexString: string) + } + // 3. Raw binary (fallback) return data } diff --git a/Rosetta/Features/Chats/ChatDetail/VoiceDownloadRingView.swift b/Rosetta/Features/Chats/ChatDetail/VoiceDownloadRingView.swift new file mode 100644 index 0000000..c81093a --- /dev/null +++ b/Rosetta/Features/Chats/ChatDetail/VoiceDownloadRingView.swift @@ -0,0 +1,169 @@ +import UIKit + +/// Telegram-parity circular progress ring for voice message downloads. +/// Overlays the 44×44 play button during CDN download. +/// +/// Reference: `SemanticStatusNodeProgressContext.swift` in Telegram-iOS. +/// - Arc starts at 12 o'clock (-π/2), fills clockwise +/// - Rounded line caps, ~2pt stroke, 2.5pt inset +/// - Continuous rotation animation (4× speed) +/// - Cancel ✕ in center (12pt, 1.8pt stroke) +/// - Minimum visible progress: 2.7% +final class VoiceDownloadRingView: UIView { + + // MARK: - Telegram-exact constants + + private let lineWidth: CGFloat = 2.0 + private let inset: CGFloat = 2.5 + private let startAngle: CGFloat = -.pi / 2 + private let minProgress: CGFloat = 0.027 + private let cancelCrossSize: CGFloat = 12.0 + private let cancelLineWidth: CGFloat = 1.8 + + // MARK: - Callbacks + + var onCancel: (() -> Void)? + + // MARK: - Layers + + private let progressLayer = CAShapeLayer() + private let cancelLayer = CAShapeLayer() + + // MARK: - Init + + override init(frame: CGRect) { + super.init(frame: frame) + setup() + } + + required init?(coder: NSCoder) { + super.init(coder: coder) + setup() + } + + private func setup() { + isUserInteractionEnabled = true + isHidden = true + backgroundColor = .clear + + // Progress ring + progressLayer.fillColor = nil + progressLayer.lineCap = .round + progressLayer.lineWidth = lineWidth + progressLayer.strokeStart = 0 + progressLayer.strokeEnd = minProgress + layer.addSublayer(progressLayer) + + // Cancel ✕ + cancelLayer.fillColor = nil + cancelLayer.lineCap = .round + cancelLayer.lineWidth = cancelLineWidth + layer.addSublayer(cancelLayer) + + let tap = UITapGestureRecognizer(target: self, action: #selector(cancelTapped)) + addGestureRecognizer(tap) + } + + // MARK: - Layout + + override func layoutSubviews() { + super.layoutSubviews() + let size = bounds.size + guard size.width > 0 else { return } + + // Progress arc path (full circle — strokeEnd controls visible portion) + let pathDiameter = size.width - lineWidth - inset * 2 + let radius = pathDiameter / 2 + let center = CGPoint(x: size.width / 2, y: size.height / 2) + let circlePath = UIBezierPath( + arcCenter: center, + radius: radius, + startAngle: startAngle, + endAngle: startAngle + .pi * 2, + clockwise: true + ) + progressLayer.path = circlePath.cgPath + progressLayer.frame = bounds + + // Cancel ✕ centered + let half = cancelCrossSize / 2 + let crossPath = UIBezierPath() + crossPath.move(to: CGPoint(x: center.x - half, y: center.y - half)) + crossPath.addLine(to: CGPoint(x: center.x + half, y: center.y + half)) + crossPath.move(to: CGPoint(x: center.x + half, y: center.y - half)) + crossPath.addLine(to: CGPoint(x: center.x - half, y: center.y + half)) + cancelLayer.path = crossPath.cgPath + cancelLayer.frame = bounds + } + + // MARK: - Public API + + /// Update ring color to match play button foreground. + func setRingColor(_ color: UIColor) { + progressLayer.strokeColor = color.cgColor + cancelLayer.strokeColor = color.cgColor + } + + /// Set download progress (0.0–1.0). Values below 2.7% are clamped up. + func setProgress(_ value: CGFloat, animated: Bool = true) { + let clamped = max(minProgress, min(1.0, value)) + + if animated { + let anim = CABasicAnimation(keyPath: "strokeEnd") + anim.fromValue = progressLayer.presentation()?.strokeEnd ?? progressLayer.strokeEnd + anim.toValue = clamped + anim.duration = 0.2 + anim.timingFunction = CAMediaTimingFunction(name: .easeInEaseOut) + anim.isRemovedOnCompletion = false + anim.fillMode = .forwards + progressLayer.add(anim, forKey: "progress") + } else { + progressLayer.removeAnimation(forKey: "progress") + } + progressLayer.strokeEnd = clamped + } + + /// Show the ring and start rotation. + func show() { + guard isHidden else { return } + isHidden = false + alpha = 0 + UIView.animate(withDuration: 0.18) { self.alpha = 1 } + startRotation() + } + + /// Hide the ring and stop rotation. + func hide() { + guard !isHidden else { return } + UIView.animate(withDuration: 0.18) { + self.alpha = 0 + } completion: { _ in + self.isHidden = true + self.stopRotation() + self.setProgress(self.minProgress, animated: false) + } + } + + // MARK: - Rotation + + private func startRotation() { + guard progressLayer.animation(forKey: "rotation") == nil else { return } + let rotation = CABasicAnimation(keyPath: "transform.rotation.z") + rotation.fromValue = 0 + rotation.toValue = CGFloat.pi * 2 + rotation.duration = 1.6 // ~4× per 2π normalized (Telegram: angle * 4.0) + rotation.repeatCount = .infinity + rotation.isRemovedOnCompletion = false + progressLayer.add(rotation, forKey: "rotation") + } + + private func stopRotation() { + progressLayer.removeAnimation(forKey: "rotation") + } + + // MARK: - Actions + + @objc private func cancelTapped() { + onCancel?() + } +} diff --git a/Rosetta/RosettaApp.swift b/Rosetta/RosettaApp.swift index bec78d3..781e2ce 100644 --- a/Rosetta/RosettaApp.swift +++ b/Rosetta/RosettaApp.swift @@ -801,7 +801,9 @@ struct RosettaApp: App { return } - UIWindow.appearance().backgroundColor = .systemBackground + UIWindow.appearance().backgroundColor = UIColor { traits in + traits.userInterfaceStyle == .dark ? .black : .white + } // Detect fresh install: UserDefaults are wiped on uninstall, Keychain is not. // If this is the first launch after install, clear any stale Keychain data. @@ -862,6 +864,15 @@ struct RosettaApp: App { if appState == nil { appState = initialState() } + // Set hosting controller & window background to match app background. + // Default .systemBackground (dark gray) leaks as a line in the safe area. + DispatchQueue.main.async { + guard let scene = UIApplication.shared.connectedScenes.first as? UIWindowScene, + let window = scene.windows.first(where: { $0.tag != 0320 }) else { return } + let bgColor: UIColor = window.traitCollection.userInterfaceStyle == .dark ? .black : .white + window.backgroundColor = bgColor + window.rootViewController?.view.backgroundColor = bgColor + } } .onOpenURL { url in handleDeepLink(url)