Flv
FLV文件结构总结
flv的结构
第一,FlvFile
第二,FlvHead
第三,FlvBody
第四,FlvTag
第五,AudioData
第六,VideoData
第七,AvcVideoPacket
第八,AacAudioData
第九,AVCDecoderConfigurationRecord
第十,AudioSpecificConfig
补充:
第十一,ScriptData
第十二,ScriptDataObject
第十三,ScriptDataString
第十四,ScriptDataLongString
第十五,ScriptDataValue
第十六,ScriptDataAvarible
第十七,ScriptDataDate
1
//flv文件由FlvHead和FlvBody构成
1,typedef struct tagFlvFile
{
FlvHead;
FlvBody;
} FlvFile
// FlvHead由9个字节构成,主要告知音视频的存在情况,第5个字节为1,只有视频,
// 第5个字节为4,只有音频,第五个字节为5,则由音视频;
2,typedef struct tagFlvHead
{
Signature UI8 Signature byte always ‘F’ (0x46)
Signature UI8 Signature byte always ‘L’ (0x4C)
Signature UI8 Signature byte always ‘V’ (0x56)
Version UI8 File version (for example, 0x01 for FLV version 1)
TypeFlagsReserved UB[5] Must be 0
TypeFlagsAudio UB[1] Audio tags are present
TypeFlagsReserved UB[1] Must be 0
TypeFlagsVideo UB[1] Video tags are present
DataOffset UI32 Offset in bytes from start of file to start
} FlvHead
// FlvBody的前四个字节恒为0,后面是一个个的Tag和TagSize的序列;其中,
// TagSize为4个字节,长度是tag的数据部分长度加上tag的头信息长度。
// Tag的头信息恒为11个字节。
// Tag有三种,vidio、audio、scriptData
3,typedef struct tagFlvBody
{
PreviousTagSize0 UI32 Always 0
Tag1 FLVTAG First tag
PreviousTagSize1 UI32 Size of Tag1
Tag2 FLVTAG Second tag
Previous TagSize2 Ul32 Size of Tag2
…
TagN-1 FLVTAG Second tag
PreviousTagSizeN-1 UI32 Size of second-to-last tag
TagN FLVTAG Last tag
PreviousTagSizeN UI32 Size of last tag
1
}FlvBody
// FlvTag由两部分构成,前部分是FlvTag的信息,恒为11字节;后部分是FlvTag的
// 数据部分;
4,typedef struct tagFlvTag
{
1
2
3
4
5
6
7
8
9
10
11
TagType Ul8 //为8,则该tag为音频,为9,则为视频,为18,则为scriptData;
DataSize Ul24 //Tag的数据部分的大小,加上11等于TagSize;
TimeStamp Ul24 //音视频的时间戳
TimeStampExtended Ul8 // 时间戳的扩展,代表时间戳的高8位。
StreamID Ul24 // 恒为0
Data // TagType为8,9,18,分别为AudioData,VideoData和ScriptDataObject
}FlvTag
// SoundFormat
// 0 = Linear PCM, platform endian
// 1 = ADPCM
// 2 = MP3
// 3 = Linear PCM, little endian
// 4 = Nellymoser 16-kHz mono
// 5 = Nellymoser 8-kHz mono
// 6 = Nellymoser
// 7 = G.711 A-law logarithmic PCM
// 8 = G.711 mu-law logarithmic PCM
// 9 = reserved
// 10 = AAC
// 11 = Speex
// 14 = MP3 8-Khz
// 15 = Device-specific sound
// SoundRate
// 0 = 5.5-kHz
// 1 = 11-kHz
// 2 = 22-kHz
// 3 = 44-kHz
// SoundSize
// 0 = snd8Bit
// 1 = snd16Bit
// SoundType
// 0 = sndMono
// 1 = sndStereo
// AudioData的前一个字节表示音频的信息。(音频类型、采样率、位深度、是否单声道)
5,typedef struct tagAudioData
{
SoundFormat; UB[4] //
SoundRate; UB[2] //
SoundSize; UB[1] //
SoundType; UB[1] //
SoundData; Ul8[size of sound data] //
}AudioData
// FrameType
// 1: keyframe (for AVC, a seekableframe)
// 2: inter frame (for AVC, a nonseekable frame)
// 3: disposable inter frame (H.263 only)
// 4: generated keyframe (reserved for server use only)
// 5: video info/command frame
// CodecID
// 1: JPEG (currently unused)
// 2: Sorenson H.263
// 3: Screen video
// 4: On2 VP6
// 5: On2 VP6 with alpha channel
// 6: Screen video version 2
// 7: AVC
6,typedef struct tagVideoDate
{
FrameType; UB[4] // 判断是否是关键帧
1
CodecID; UB[4] // 判断视频类型
VideoData // If CodecID == 2 H263VIDEOPACKET
// If CodecID == 3 SCREENVIDEOPACKET
// If CodecID == 4 VP6FLVVIDEOPACKET
// If CodecID == 5 VP6FLVALPHAVIDEOPACKET
// If CodecID == 6 SCREENV2VIDEOPACKET
// if CodecID == 7 AVCVIDEOPACKET
1
}VideoData
// AVCPacketType
// 0: AVC sequence header
// 1: AVC NALU
// 2: AVC end of sequence (lower level NALU sequence ender is not required or supported)
// CompositionTime
// if AVCPacketType == 1
// Composition time offset // See ISO 14496-12, 8.15.3 for an explanation of composition // // times. The offset in an FLV file is always in milliseconds.
// else
// 0
// Data
// if AVCPacketType == 0
// AVCDecoderConfigurationRecord // See ISO 14496-15, 5.2.4.1 for the description of // // AVCDecoderConfigurationRecord
// else if AVCPacketType == 1
// One or more NALUs (can be individual slices per FLV packets; that is, full frames
// are not strictly required,无前面的0x00000001)
// else if AVCPacketType == 2
// Empty
7,typedef struct tagAvcVideoPacket
{
1
2
3
4
5
AVCPacketType UI8
CompositionTime SI24
Data UI8[n]
}AvcVideoPacket
// AACPacketType
// 0: AAC sequence header
// 1: AAC raw
// Data
// if AACPacketType == 0
// AudioSpecificConfig // The AudioSpecificConfig is explained in ISO 14496-3.
// else if AACPacketType == 1
// Raw AAC frame data //去掉了aac前面的7个字节,即无aac的头信息
8,typedef struct tagAacAudioData
{
1
2
3
AACPacketType Ul8
Data Ul[n]
}AacAudioData
//sps和pps放在一起,形成一个tag
//9,Class AVCDecoderConfigurationRecord
aligned(8) class AVCDecoderConfigurationRecord
{
unsigned int(8) configurationVersion = 1;
unsigned int(8) AVCProfileIndication;
unsigned int(8) profile_compatibility;
unsigned int(8) AVCLevelIndication;
bit(6) reserved = ‘111111’b;
unsigned int(2) lengthSizeMinusOne;
bit(3) reserved = ‘111’b;
unsigned int(5) numOfSequenceParameterSets;
for (i=0; i< numOfSequenceParameterSets; i++)
{
unsigned int(16) sequenceParameterSetLength ;
bit(8*sequenceParameterSetLength) sequenceParameterSetNALUnit;
}
unsigned int(8) numOfPictureParameterSets;
for (i=0; i< numOfPictureParameterSets; i++)
{
unsigned int(16) pictureParameterSetLength;
bit(8*pictureParameterSetLength) pictureParameterSetNALUnit;
}
}
// SamplingFrequencyIndex A four bit field indicating the sampling rate used.
// Value samplingFrequencyIndex
// 0x0 96000
// 0x1 88200
// 0x2 64000
// 0x3 48000
// 0x4 44100
// 0x5 32000
// 0x6 24000
// 0x7 22050
// 0x8 16000
// 0x9 12000
// 0xa 11025
// 0xb 8000
// 0xc reserved
// 0xd reserved
// 0xe reserved
// 0xf escape value
// channelConfiguration A four bit field indicating the channel configuration
// value number of channels channel to speaker mapping
// 0 - defined in audioDecderSpecificConfig
// 1 1 center front speaker
// 2 2 left, right front speakers
// 3 3 center front speaker, left, right front speakers
// 4 4 center front speaker, left, right center front speakers,
// rear surround speakers
// 5 5 center front speaker, left, right front speakers, left
// surround, right surround rear speakers
// 6 5+1 center front speaker, left, right front speakers, left
// surround, right surround rear speakers, front low
// frequency effects speaker
// 7 7+1 center front speaker left, right center front speakers,
// left, right outside front speakers, left surround, right
// surround rear speakers, front low frequency effects speaker
// 8-15 - reserved
// 10,class AudioSpecificConfig()
aligned (8) class AudioSpecificConfig()
{
uint(4) objectProfile; /* see also Clause 2.1.1, “Object Profiles” in this document; remark :
composition profile and level are already signaled by the
profileAndLevelIndication value in DecoderConfigDescriptor */
uint(4) samplingFrequencyIndex;
if (samplingFrequencyIndex==0xf)
{
uint(24) samplingFrequency;/* in Hz */
}
uint(4) channelConfiguration;
if (objectProfile<8) /* this is T/F (AAC/TwinVq)*/
{
TFSpecificConfig tfConfig( uint(4) samplingFrequencyIndex);
}
if (objectProfile==8) /* this is Celp */
{
CelpSpecificConfig celpConfig( uint(4) samplingFrequencyIndex);
}
if (objectProfile==9 or 10 ) /* this is Parametric , HVXC or HILN*/
{
ParametricSpecificConfig paramConfig( );
}
if (objectProfile==11) /* this is TextToSpeech */
{
TTSSpecificConfig ttsConfig( );
}
if (objectProfile==12 or 13) // this is structured audio , Main Synthetic or Wavetable
// Synthesis
{
StructuredAudioSpecificConfig strucConfig( );
}
}
ScriptData字段详解:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
一般来说,该Tag Data结构(scriptData)包含两个AMF包。AMF(Action Message Format)是Adobe设计的一种通用数据封装格式,在Adobe的很多产品中应用,简单来说,AMF将不同类型的数据用统一的格式来描述。第一个 AMF包封装字符串类型数据,用来装入一个"onMetaData"标志,这个标志与Adobe的一些API调用有,在此不细述。第二个AMF包封装一个数组类型,这个数组中包含了音视频信息项的名称和值。具体说明如下,大家可以参照图片上的数据进行理解。
第一个AMF包:
第1个字节表示AMF包类型,一般总是0x02,表示字符串,其他值表示意义请查阅文档。(// 0 = Number type、// 1 = Boolean type、// 2 = String type、// 3 = Object type、// 4 = MovieClip type、// 5 = Null type、// 6 = Undefined type、// 7 = Reference type、// 8 = ECMA array type、// 10 = Strict array type、// 11 = Date type、// 12 = Long string type)
第2-3个字节为UI16类型值,表示字符串的长度,一般总是0x000A("onMetaData"长度)。
后面字节为字符串数据,一般总为"onMetaData"。
第二个AMF包:
第1个字节表示AMF包类型,一般总是0x08,表示数组。
第2-5个字节为UI32类型值,表示数组元素的个数。
后面即为各数组元素的封装,数组元素为元素名称和值组成的对。表示方法如下:
第1-2个字节表示元素名称的长度,假设为L。
后面跟着为长度为L的字符串。
第L+3个字节表示元素值的类型。
(// If Type == 0 DOUBLE // 8字节
// If Type == 1 UI8 //可以认为是bool类型
// If Type == 2 SCRIPTDATASTRING
// If Type == 3 SCRIPTDATAOBJECT[n]
// If Type == 4 SCRIPTDATASTRING defining the MovieClip path
// If Type == 7 UI16
// If Type == 8 SCRIPTDATAVARIABLE[ECMAArrayLength]
// If Type == 10 SCRIPTDATAVARIABLE[n]
// If Type == 11 SCRIPTDATADATE
// If Type == 12 SCRIPTDATALONGSTRING)
1
2
3
后面跟着为对应值,占用字节数取决于值的类型。
把所有的数组都完成后,写一个3字节的结束符0x000009
补充://scriptData的相关数据结构
// 脚本数据结构体
11,typedef struct tagScriptData
{
Objects SCRIPTDATAOBJECT[] //任意数量的SCRIPTDATAOBJECT结构
End UI24 // 恒为0x000009, SCRIPTDATAOBJECTEND
}ScriptData
// 脚本数据对象结构体
12,typedef struct tagScriptDataObject
{
ObjectName // SCRIPTDATASTRING , Name of the object
ObjectData // SCRIPTDATAVALUE , Data of the object
}ScriptDataObject
// ScriptDataString用于记录定义字符串数据的标签。
13,typedef struct tagScriptDataString
{
StringLength UI16 //String length in bytes
StringData STRING // String data
}ScriptDataString
// ScriptDataString用于记录指定字符串大于65535的字符。
14,typedef struct tagScriptDataLongString
{
StringLength UI32 //String length in bytes
StringData STRING // String data
} ScriptDataLongString
// Type
// 0 = Number type
// 1 = Boolean type
// 2 = String type
// 3 = Object type
// 4 = MovieClip type
// 5 = Null type
// 6 = Undefined type
// 7 = Reference type
// 8 = ECMA array type
// 10 = Strict array type
// 11 = Date type
// 12 = Long string type
// ScriptDataValue
// If Type == 0 DOUBLE
// If Type == 1 UI8
// If Type == 2 SCRIPTDATASTRING
// If Type == 3 SCRIPTDATAOBJECT[n]
// If Type == 4 SCRIPTDATASTRING defining the MovieClip path
// If Type == 7 UI16
// If Type == 8 SCRIPTDATAVARIABLE[ECMAArrayLength]
// If Type == 10 SCRIPTDATAVARIABLE[n]
// If Type == 11 SCRIPTDATADATE
// If Type == 12 SCRIPTDATALONGSTRING
// ScriptDataValueTerminator
// If Type == 3 SCRIPTDATAOBJECTEND
// If Type == 8 SCRIPTDATAVARIABLEEND
1
15,typedef struct tagScriptDataValue
{
Type UI8;
ECMAArrayLength // If Type = 8, UI32
ScriptDataValue
ScriptDataValueTerminator
}ScriptDataValue
1
//
16,typedef struct tagScriptDataAvarible
{
1
VariableName // SCRIPTDATASTRING Name of the variable
VariableData // SCRIPTDATAVALUE Data of the variable
}ScriptDataAvarible
// DateTime DOUBLE Number of milliseconds since Jan 1, 1970 UTC.
17,typedef struct tagScriptDataDate
{
1
DateTime
LocalDateTimeOffset SI16;
1
}ScriptDataDate