diff --git a/libraries/extractor/src/main/java/androidx/media3/extractor/DtsUtil.java b/libraries/extractor/src/main/java/androidx/media3/extractor/DtsUtil.java index bed79220563..5c0dbec2184 100644 --- a/libraries/extractor/src/main/java/androidx/media3/extractor/DtsUtil.java +++ b/libraries/extractor/src/main/java/androidx/media3/extractor/DtsUtil.java @@ -15,6 +15,7 @@ */ package androidx.media3.extractor; +import static com.google.common.base.Preconditions.checkNotNull; import static java.lang.annotation.ElementType.TYPE_USE; import static java.lang.annotation.RetentionPolicy.SOURCE; @@ -45,7 +46,7 @@ public final class DtsUtil { /** Information parsed from a DTS frame header. */ public static final class DtsHeader { /** The mime type of the DTS bitstream. */ - public final @DtsAudioMimeType String mimeType; + @Nullable public final @DtsAudioMimeType String mimeType; /** The audio sampling rate in Hertz, or {@link C#RATE_UNSET_INT} if unknown. */ public final int sampleRate; @@ -63,7 +64,7 @@ public static final class DtsHeader { public final int bitrate; private DtsHeader( - String mimeType, + @Nullable String mimeType, int channelCount, int sampleRate, int frameSize, @@ -429,6 +430,8 @@ public static DtsHeader parseDtsHdHeader(byte[] header) throws ParserException { int assetsCount; // nuNumAssets int referenceClockCode; // nuRefClockCode int extensionSubstreamFrameDurationCode; // nuExSSFrameDurationCode + boolean enableMixMetadata = false; // bMixMetadataEnbl + int[] mixerOutChannels = null; boolean staticFieldsPresent = headerBits.readBit(); // bStaticFieldsPresent if (staticFieldsPresent) { @@ -456,13 +459,16 @@ public static DtsHeader parseDtsHdHeader(byte[] header) throws ParserException { } } - if (headerBits.readBit()) { // bMixMetadataEnbl + enableMixMetadata = headerBits.readBit(); + if (enableMixMetadata) { // bMixMetadataEnbl headerBits.skipBits(2); // nuMixMetadataAdjLevel int mixerOutputMaskBits = (headerBits.readBits(2) + 1) << 2; // nuBits4MixOutMask int mixerOutputConfigurationCount = headerBits.readBits(2) + 1; // nuNumMixOutConfigs + mixerOutChannels = new int[mixerOutputConfigurationCount]; // Output Mixing Configuration Loop for (int i = 0; i < mixerOutputConfigurationCount; i++) { - headerBits.skipBits(mixerOutputMaskBits); // nuMixOutChMask + int mask = headerBits.readBits(mixerOutputMaskBits); // nuMixOutChMask + mixerOutChannels[i] = getRemapChannelCount(mask); } } } else { @@ -476,9 +482,13 @@ public static DtsHeader parseDtsHdHeader(byte[] header) throws ParserException { headerBits.skipBits(extensionSubstreamFrameSizeBits); // nuAssetFsize int sampleRate = C.RATE_UNSET_INT; int channelCount = C.LENGTH_UNSET; // nuTotalNumChs + boolean embeddedStereo = false; // bEmbeddedStereoFlag + boolean embedded6ch = false; // bEmbeddedSixChFlag - // Asset descriptor, see ETSI TS 102 114 V1.6.1 (2019-08) Table 7-5. + // Asset descriptor: Size, Index and Per Stream Static Metadata, see ETSI TS 102 114 V1.6.1 + // (2019-08) Table 7-5. headerBits.skipBits(9 + 3); // nuAssetDescriptFsize, nuAssetIndex + String mimeType = null; if (staticFieldsPresent) { if (headerBits.readBit()) { // bAssetTypeDescrPresent headerBits.skipBits(4); // nuAssetTypeDescriptor @@ -493,8 +503,47 @@ public static DtsHeader parseDtsHdHeader(byte[] header) throws ParserException { headerBits.skipBits(5); // nuBitResolution sampleRate = SAMPLE_RATE_BY_INDEX[headerBits.readBits(4)]; // nuMaxSampleRate channelCount = headerBits.readBits(8) + 1; - // Done reading necessary bits, ignoring the rest. + if (headerBits.readBit()) { // bOne2OneMapChannels2Speakers + if (channelCount > 2) { + embeddedStereo = headerBits.readBit(); // bEmbeddedStereoFlag + } + if (channelCount > 6) { + embedded6ch = headerBits.readBit(); // bEmbeddedSixChFlag + } + int speakerMaskLength = 0; + if (headerBits.readBit()) { // bSpkrMaskEnabled + speakerMaskLength = (headerBits.readBits(2) + 1) << 2; // nuNumBits4SAMask + headerBits.skipBits(speakerMaskLength); // nuSpkrActivityMask + } + int speakerRemapSetsCount = headerBits.readBits(3); // nuNumSpkrRemapSets + int[] speakerRemapSets = new int[speakerRemapSetsCount]; + for (int i = 0; i < speakerRemapSetsCount; i++) { + speakerRemapSets[i] = headerBits.readBits(speakerMaskLength); // nuStndrSpkrLayoutMask[ns] + } + for (int i = 0; i < speakerRemapSetsCount; i++) { + int remapChannelCount = getRemapChannelCount(speakerRemapSets[i]); + int remapMaskLength = headerBits.readBits(5) + 1; // nuNumDecCh4Remap[ns] + for (int j = 0; j < remapChannelCount; j++) { + int remapMask = headerBits.readBits(remapMaskLength); // nuRemapDecChMask[ns][nCh] + int coef = Integer.bitCount(remapMask); // nCoef + headerBits.skipBits(coef * 5); // nuSpkrRemapCodes[ns][nCh][nc] + } + } + } else { + headerBits.skipBits(3); // nuRepresentationType + } + + parseAssetDescriptorDynamicData( + headerBits, + embeddedStereo, + embedded6ch, + channelCount, + enableMixMetadata, + mixerOutChannels); + + mimeType = parseDecoderNavigationData(headerBits); } + // Done reading necessary bits, ignoring the rest. long frameDurationUs = C.TIME_UNSET; if (staticFieldsPresent) { @@ -521,7 +570,7 @@ public static DtsHeader parseDtsHdHeader(byte[] header) throws ParserException { extensionSubstreamFrameDurationCode, C.MICROS_PER_SECOND, referenceClockFrequency); } return new DtsHeader( - MimeTypes.AUDIO_DTS_EXPRESS, + mimeType, channelCount, sampleRate, extensionSubstreamFrameSize, @@ -529,6 +578,143 @@ public static DtsHeader parseDtsHdHeader(byte[] header) throws ParserException { /* bitrate= */ 0); } + // Asset descriptor: Dynamic Metadata - DRC, DNC and Mixing Metadata, see ETSI TS 102 114 + // V1.6.1 (2019-08) Table 7-6. + private static void parseAssetDescriptorDynamicData( + ParsableBitArray headerBits, + boolean embeddedStereo, + boolean embedded6ch, + int channelCount, + boolean enableMixMetadata, + @Nullable int[] mixerOutChannels) { + boolean hasDrcCoef = headerBits.readBit(); + if (hasDrcCoef) { // bDRCCoefPresent + headerBits.skipBits(8); // nuDRCCode + } + if (headerBits.readBit()) { // bDialNormPresent + headerBits.skipBits(5); // nuDialNormCode + } + if (hasDrcCoef && embeddedStereo) { + headerBits.skipBits(8); // nuDRC2ChDmixCode + } + if (enableMixMetadata && headerBits.readBit()) { // bMixMetadataPresent + checkNotNull(mixerOutChannels); + headerBits.skipBits(1 + 6); // bExternalMixFlag, nuPostMixGainAdjCode + if (headerBits.readBits(2) < 3) { // nuControlMixerDRC + headerBits.skipBits(3); // nuLimit4EmbeddedDRC + } else { + headerBits.skipBits(8); // nuCustomDRCCode + } + boolean audioScalePerChannel = headerBits.readBit(); // bEnblPerChMainAudioScale + for (int mixerOutChannel : mixerOutChannels) { + if (audioScalePerChannel) { + headerBits.skipBits(6 * mixerOutChannel); // nuMainAudioScaleCode[ns][nCh] + } else { + headerBits.skipBits(6); // nuMainAudioScaleCode[ns][0] + } + } + int mixesCount = 1; // nEmDM + int[] channelCountsForDownmixes = new int[3]; + channelCountsForDownmixes[0] = channelCount; // nDecCh[0] + if (embedded6ch) { + channelCountsForDownmixes[mixesCount] = 6; // nDecCh[nEmDM] + mixesCount++; // nEmDM + } + if (embeddedStereo) { + channelCountsForDownmixes[mixesCount] = 2; // nDecCh[nEmDM] + mixesCount++; // nEmDM + } + for (int mixerOutChannel : mixerOutChannels) { + for (int downmix = 0; downmix < mixesCount; downmix++) { + int channelCountForDownmix = channelCountsForDownmixes[downmix]; + for (int downmixChannel = 0; downmixChannel < channelCountForDownmix; downmixChannel++) { + int mask = headerBits.readBits(mixerOutChannel); // nuMixMapMask[ns][nE][nCh] + int coefficients = Integer.bitCount(mask); // nuNumMixCoefs[ns][nE][nCh] + headerBits.skipBits(coefficients * 6); // nuMixCoeffs[ns][nE][nCh][nC] + } + } + } + } + } + + // Asset descriptor: Decoder Navigation Data, see ETSI TS 102 114 V1.6.1 (2019-08) Table 7-7. + private static String parseDecoderNavigationData(ParsableBitArray headerBits) + throws ParserException { + int codingMode = headerBits.readBits(2); // nuCodingMode + switch (codingMode) { + case 0: // DTS-HD Coding Mode that may contain multiple coding components + int extensionMask = headerBits.readBits(12); + if ((extensionMask & 0x100) != 0) { // Low bit rate component + return MimeTypes.AUDIO_DTS_EXPRESS; + } else { + return MimeTypes.AUDIO_DTS_HD; + } + case 1: // DTS-HD Loss-less coding mode without CBR component + return MimeTypes.AUDIO_DTS_HD; + case 2: // DTS-HD Low bit-rate mode + return MimeTypes.AUDIO_DTS_EXPRESS; + case 3: // The auxiliary coding mode is reserved for future applications. + default: + throw ParserException.createForMalformedContainer( + /* message= */ "Unsupported coding mode in DTS HD header: " + codingMode, + /* cause= */ null); + } + } + + // See Table 7-10 in ETSI TS 102 114 V1.6.1 + private static int getRemapChannelCount(int mask) { + int remapChannelCount = 0; + if ((mask & 0x0001) != 0) { // Centre in front of listener + remapChannelCount += 1; + } + if ((mask & 0x0002) != 0) { // Left/Right in front + remapChannelCount += 2; + } + if ((mask & 0x0004) != 0) { // Left/Right surround on side in rear + remapChannelCount += 2; + } + if ((mask & 0x0008) != 0) { // Low frequency effects subwoofer + remapChannelCount += 1; + } + if ((mask & 0x0010) != 0) { // Centre surround in rear + remapChannelCount += 1; + } + if ((mask & 0x0020) != 0) { // Left/Right height in front + remapChannelCount += 2; + } + if ((mask & 0x0040) != 0) { // Left/Right surround in rear + remapChannelCount += 2; + } + if ((mask & 0x0080) != 0) { // Centre Height in front + remapChannelCount += 1; + } + if ((mask & 0x0100) != 0) { // Over the listener's head + remapChannelCount += 1; + } + if ((mask & 0x0200) != 0) { // Between left/right and centre in front + remapChannelCount += 2; + } + if ((mask & 0x0400) != 0) { // Left/Right on side in front + remapChannelCount += 2; + } + if ((mask & 0x0800) != 0) { // Left/Right surround on side + remapChannelCount += 2; + } + if ((mask & 0x1000) != 0) { // Second low frequency effects subwoofer + remapChannelCount += 1; + } + if ((mask & 0x2000) != 0) { // Left/Right height on side + remapChannelCount += 2; + } + if ((mask & 0x4000) != 0) { // Centre height in rear + remapChannelCount += 1; + } + if ((mask & 0x8000) != 0) { // Left/Right height in rear + remapChannelCount += 2; + } + return remapChannelCount; + } + /** * Returns the size of the extension substream header in a DTS-HD frame according to ETSI TS 102 * 114 V1.6.1 (2019-08), Section 7.5.2. diff --git a/libraries/extractor/src/main/java/androidx/media3/extractor/ts/DtsReader.java b/libraries/extractor/src/main/java/androidx/media3/extractor/ts/DtsReader.java index c2e8a5fa225..ad5d5346d13 100644 --- a/libraries/extractor/src/main/java/androidx/media3/extractor/ts/DtsReader.java +++ b/libraries/extractor/src/main/java/androidx/media3/extractor/ts/DtsReader.java @@ -430,17 +430,21 @@ private void updateFormatWithDtsHeaderInfo(DtsUtil.DtsHeader dtsHeader) { if (dtsHeader.sampleRate == C.RATE_UNSET_INT || dtsHeader.channelCount == C.LENGTH_UNSET) { return; } + String sampleMimeType = + dtsHeader.mimeType != null + ? dtsHeader.mimeType + : format != null ? format.sampleMimeType : null; if (format == null || coreFormatPendingEmit || dtsHeader.channelCount != format.channelCount || dtsHeader.sampleRate != format.sampleRate - || !Objects.equals(dtsHeader.mimeType, format.sampleMimeType)) { + || !Objects.equals(sampleMimeType, format.sampleMimeType)) { Format.Builder formatBuilder = format == null ? new Format.Builder() : format.buildUpon(); format = formatBuilder .setId(formatId) .setContainerMimeType(containerMimeType) - .setSampleMimeType(dtsHeader.mimeType) + .setSampleMimeType(sampleMimeType) .setChannelCount(dtsHeader.channelCount) .setSampleRate(dtsHeader.sampleRate) .setLanguage(language) diff --git a/libraries/test_data/src/test/assets/extractordumps/ts/sample_dts_hd_across_pes.ts.0.dump b/libraries/test_data/src/test/assets/extractordumps/ts/sample_dts_hd_across_pes.ts.0.dump index 38a6da0d6f6..8b1d1337374 100644 --- a/libraries/test_data/src/test/assets/extractordumps/ts/sample_dts_hd_across_pes.ts.0.dump +++ b/libraries/test_data/src/test/assets/extractordumps/ts/sample_dts_hd_across_pes.ts.0.dump @@ -10,7 +10,7 @@ track 256: averageBitrate = 1536000 id = 1/256 containerMimeType = video/mp2t - sampleMimeType = audio/vnd.dts.hd;profile=lbr + sampleMimeType = audio/vnd.dts.hd channelCount = 8 sampleRate = 48000 language = en diff --git a/libraries/test_data/src/test/assets/extractordumps/ts/sample_dts_hd_across_pes.ts.unknown_length.dump b/libraries/test_data/src/test/assets/extractordumps/ts/sample_dts_hd_across_pes.ts.unknown_length.dump index 38a6da0d6f6..8b1d1337374 100644 --- a/libraries/test_data/src/test/assets/extractordumps/ts/sample_dts_hd_across_pes.ts.unknown_length.dump +++ b/libraries/test_data/src/test/assets/extractordumps/ts/sample_dts_hd_across_pes.ts.unknown_length.dump @@ -10,7 +10,7 @@ track 256: averageBitrate = 1536000 id = 1/256 containerMimeType = video/mp2t - sampleMimeType = audio/vnd.dts.hd;profile=lbr + sampleMimeType = audio/vnd.dts.hd channelCount = 8 sampleRate = 48000 language = en diff --git a/libraries/test_data/src/test/assets/extractordumps/ts/sample_dts_hd_ma.ts.0.dump b/libraries/test_data/src/test/assets/extractordumps/ts/sample_dts_hd_ma.ts.0.dump index 2e70e07f75e..61ba38d8c47 100644 --- a/libraries/test_data/src/test/assets/extractordumps/ts/sample_dts_hd_ma.ts.0.dump +++ b/libraries/test_data/src/test/assets/extractordumps/ts/sample_dts_hd_ma.ts.0.dump @@ -13,7 +13,7 @@ track 256: averageBitrate = 1536000 id = 1/256 containerMimeType = video/mp2t - sampleMimeType = audio/vnd.dts.hd;profile=lbr + sampleMimeType = audio/vnd.dts.hd channelCount = 8 sampleRate = 48000 language = en diff --git a/libraries/test_data/src/test/assets/extractordumps/ts/sample_dts_hd_ma.ts.1.dump b/libraries/test_data/src/test/assets/extractordumps/ts/sample_dts_hd_ma.ts.1.dump index 2dc144fda79..2aa6ee986bd 100644 --- a/libraries/test_data/src/test/assets/extractordumps/ts/sample_dts_hd_ma.ts.1.dump +++ b/libraries/test_data/src/test/assets/extractordumps/ts/sample_dts_hd_ma.ts.1.dump @@ -13,7 +13,7 @@ track 256: averageBitrate = 1536000 id = 1/256 containerMimeType = video/mp2t - sampleMimeType = audio/vnd.dts.hd;profile=lbr + sampleMimeType = audio/vnd.dts.hd channelCount = 8 sampleRate = 48000 language = en diff --git a/libraries/test_data/src/test/assets/extractordumps/ts/sample_dts_hd_ma.ts.2.dump b/libraries/test_data/src/test/assets/extractordumps/ts/sample_dts_hd_ma.ts.2.dump index 690fa210839..c63c1833def 100644 --- a/libraries/test_data/src/test/assets/extractordumps/ts/sample_dts_hd_ma.ts.2.dump +++ b/libraries/test_data/src/test/assets/extractordumps/ts/sample_dts_hd_ma.ts.2.dump @@ -13,7 +13,7 @@ track 256: averageBitrate = 1536000 id = 1/256 containerMimeType = video/mp2t - sampleMimeType = audio/vnd.dts.hd;profile=lbr + sampleMimeType = audio/vnd.dts.hd channelCount = 8 sampleRate = 48000 language = en diff --git a/libraries/test_data/src/test/assets/extractordumps/ts/sample_dts_hd_ma.ts.3.dump b/libraries/test_data/src/test/assets/extractordumps/ts/sample_dts_hd_ma.ts.3.dump index 2ffcfa14c65..fc1aed4d522 100644 --- a/libraries/test_data/src/test/assets/extractordumps/ts/sample_dts_hd_ma.ts.3.dump +++ b/libraries/test_data/src/test/assets/extractordumps/ts/sample_dts_hd_ma.ts.3.dump @@ -13,7 +13,7 @@ track 256: averageBitrate = 1536000 id = 1/256 containerMimeType = video/mp2t - sampleMimeType = audio/vnd.dts.hd;profile=lbr + sampleMimeType = audio/vnd.dts.hd channelCount = 8 sampleRate = 48000 language = en diff --git a/libraries/test_data/src/test/assets/extractordumps/ts/sample_dts_hd_ma.ts.unknown_length.dump b/libraries/test_data/src/test/assets/extractordumps/ts/sample_dts_hd_ma.ts.unknown_length.dump index bbeb66f93b1..554573488ff 100644 --- a/libraries/test_data/src/test/assets/extractordumps/ts/sample_dts_hd_ma.ts.unknown_length.dump +++ b/libraries/test_data/src/test/assets/extractordumps/ts/sample_dts_hd_ma.ts.unknown_length.dump @@ -10,7 +10,7 @@ track 256: averageBitrate = 1536000 id = 1/256 containerMimeType = video/mp2t - sampleMimeType = audio/vnd.dts.hd;profile=lbr + sampleMimeType = audio/vnd.dts.hd channelCount = 8 sampleRate = 48000 language = en