forked from MikeKovarik/exifr
-
Notifications
You must be signed in to change notification settings - Fork 0
/
jpeg.mjs
238 lines (217 loc) · 8.54 KB
/
jpeg.mjs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
import {FileParserBase, AppSegmentParserBase} from '../parser.mjs'
import {fileParsers, segmentParsers} from '../plugins.mjs'
const JPEG_SOI = 0xffd8
const MARKER_1 = 0xff
const MARKER_2_APP0 = 0xe0 // ff e0
const MARKER_2_APP15 = 0xef // ff ef
const MARKER_2_SOF0 = 0xc0 // ff c0
const MARKER_2_SOF2 = 0xc2 // ff c2
const MARKER_2_DHT = 0xc4 // ff c4
const MARKER_2_DQT = 0xdb // ff db
const MARKER_2_DRI = 0xdd // ff dd
const MARKER_2_SOS = 0xda // ff da
const MARKER_2_COMMENT = 0xfe // ff fe
function isJpgMarker(marker2) {
return marker2 === MARKER_2_SOF0
|| marker2 === MARKER_2_SOF2
|| marker2 === MARKER_2_DHT
|| marker2 === MARKER_2_DQT
|| marker2 === MARKER_2_DRI
|| marker2 === MARKER_2_SOS
|| marker2 === MARKER_2_COMMENT
}
function isAppMarker(marker2) {
return marker2 >= MARKER_2_APP0
&& marker2 <= MARKER_2_APP15
}
function getSegmentType(buffer, offset, length) {
for (let [type, Parser] of segmentParsers)
if (Parser.canHandle(buffer, offset, length))
return type
}
// https://en.wikipedia.org/wiki/JPEG_File_Interchange_Format
// https://sno.phy.queensu.ca/~phil/exiftool/TagNames/JPEG.html
// http://dev.exiv2.org/projects/exiv2/wiki/The_Metadata_in_JPEG_files
// JPG contains SOI, APP1, [APP2, ... APPn], DQT, DHT, and more segments
// APPn contain metadata about the image in various formats. There can be multiple APPn segments,
// even multiple segments of the same type.
// APP1 contains the basic and most important EXIF data.
// APP2 contains ICC
// APP13 contains IPTC
// the main APP1 (the one with EXIF) is often followed by another APP1 with XMP data (in XML format).
// Structure of APPn (APP1, APP2, APP13, etc...):
// - First two bytes are the marker FF En (e.g. FF E1 for APP1)
// - 3rd & 4th bytes are length of the APPn segment
// - Followed by a few bytes of segment itentification - describing what type of content is there.
// Structure of TIFF (APP1-EXIF):
// - FF 01 - marker
// - xx xx - Size
// - 45 78 69 66 00 00 / ASCII string 'Exif\0\0'
// - TIFF HEADER
// - 0th IFD + value
// - 1th IFD + value
// - may contain additional GPS, Interop, SubExif blocks (pointed to from IFD0)
export class JpegFileParser extends FileParserBase {
static type = 'jpeg'
static canHandle(file, firstTwoBytes) {
return firstTwoBytes === JPEG_SOI
}
appSegments = []
jpegSegments = []
unknownSegments = []
async parse() {
await this.findAppSegments()
await this.readSegments(this.appSegments)
this.mergeMultiSegments()
this.createParsers(this.mergedAppSegments || this.appSegments)
}
setupSegmentFinderArgs(wanted) {
if (wanted === true) {
this.findAll = true
this.wanted = new Set(segmentParsers.keyList())
} else {
if (wanted === undefined)
wanted = segmentParsers.keyList().filter(key => this.options[key].enabled)
else
wanted = wanted.filter(key => this.options[key].enabled && segmentParsers.has(key))
this.findAll = false
this.remaining = new Set(wanted)
this.wanted = new Set(wanted)
}
this.unfinishedMultiSegment = false
}
async findAppSegments(offset = 0, wantedArray) {
this.setupSegmentFinderArgs(wantedArray)
let {file, findAll, wanted, remaining} = this
if (!findAll && this.file.chunked) {
findAll = Array.from(wanted).some(type => {
let Parser = segmentParsers.get(type)
let segOpts = this.options[type]
return Parser.multiSegment && segOpts.multiSegment
})
if (findAll) await this.file.readWhole()
}
// findAppSegmentsInRange() returns offset where next segment starts. If we didn't store it, next time we continue
// we might start in middle of data segment and would uselessly read & parse through noise.
offset = this.findAppSegmentsInRange(offset, file.byteLength)
// If user only requests TIFF it's not necessary to read any more chunks. Because EXIF in jpg is always near the start of the file.
if (this.options.onlyTiff) return
if (file.chunked) {
// We're in chunked mode and couldn't find all wanted segments.
// We'll read couple more chunks and parse them until we've found everything or hit chunk limit.
// EOF = End Of File
let eof = false
while (remaining.size > 0 && !eof && (file.canReadNextChunk || this.unfinishedMultiSegment)) {
let {nextChunkOffset} = file
// We might have previously found beginning of segment, but only fitst half of it be read in memory.
let hasIncompleteSegments = this.appSegments.some(seg => !this.file.available(seg.offset || seg.start, seg.length || seg.size))
// Start reading where we the next block begins. That way we avoid reading part of file where some jpeg image data may be.
// Unless there's an incomplete segment. In this case start reading right where the last chunk ends to get the whole segment.
if (offset > nextChunkOffset && !hasIncompleteSegments)
eof = !await file.readNextChunk(offset)
else
eof = !await file.readNextChunk(nextChunkOffset)
offset = this.findAppSegmentsInRange(offset, file.byteLength)
// search for APP segments was cancelled because we reached raw jpeg image data.
if (offset === undefined) return
}
}
}
findAppSegmentsInRange(offset, end) {
// TLDR: Make space for MARKER and LENGTH.
// Don't read right till end. If the last byte is marker, then length is out of bounds and crashes.
end -= 2
let {file, findAll, wanted, remaining, options} = this
let marker2, length, type, Parser, seg, segOpts
for (; offset < end; offset++) {
if (file.getUint8(offset) !== MARKER_1) continue
// Reading uint8 instead of uint16 to prevent re-reading subsequent bytes.
marker2 = file.getUint8(offset + 1)
if (isAppMarker(marker2)) {
// WE FOUND APP-N SEGMENT
length = file.getUint16(offset + 2)
type = getSegmentType(file, offset, length)
if (type && wanted.has(type)) {
// known and parseable segment found
Parser = segmentParsers.get(type)
seg = Parser.findPosition(file, offset)
segOpts = options[type]
seg.type = type
this.appSegments.push(seg)
if (!findAll) {
if (Parser.multiSegment && segOpts.multiSegment) {
// Found multisegment segment and options allows to process them.
this.unfinishedMultiSegment = seg.chunkNumber < seg.chunkCount
// Clear the segment from remaining if we've reached the last multi-segment.
if (!this.unfinishedMultiSegment) remaining.delete(type)
} else {
// Not a multisegment or we're not allowed to process them.
remaining.delete(type)
}
// stop parsing alltogether if we've found all requested segments.
if (remaining.size === 0) break
}
} if (options.recordUnknownSegments) {
// either unknown/supported appN segment or just a noise.
seg = AppSegmentParserBase.findPosition(file, offset)
seg.marker = marker2
this.unknownSegments.push(seg)
}
offset += length + 1
} else if (isJpgMarker(marker2)) {
// WE FOUND JPEG FILE STRUCTURE SEGMENT
length = file.getUint16(offset + 2)
// Compressed data follows after SOS. SOS marker does not have (useful) length bytes.
// Lot of FF00 follows but those do not have length either. We can stop reading here.
if (marker2 === MARKER_2_SOS && options.stopAfterSos !== false)
return undefined
if (options.recordJpegSegments)
this.jpegSegments.push({offset, length, marker: marker2})
offset += length + 1
}
}
return offset
}
// Goes through this.appSegments and merge all segments that are split between multiple chunks.
// The processed array is written to this.mergedAppSegments.
mergeMultiSegments() {
let hasMultiSegments = this.appSegments.some(seg => seg.multiSegment)
if (!hasMultiSegments) return
let grouped = groupBy(this.appSegments, 'type')
this.mergedAppSegments = grouped.map(([type, typeSegments]) => {
let Parser = segmentParsers.get(type, this.options)
if (Parser.handleMultiSegments) {
let chunk = Parser.handleMultiSegments(typeSegments)
return {type, chunk}
} else {
return typeSegments[0]
}
})
}
getSegment(type) {
return this.appSegments.find(seg => seg.type === type)
}
async getOrFindSegment(type) {
let seg = this.getSegment(type)
if (seg === undefined) {
await this.findAppSegments(0, [type])
seg = this.getSegment(type)
}
return seg
}
}
function groupBy(array, key) {
let groups = new Map
let item, groupKey, group
for (let i = 0; i < array.length; i++) {
item = array[i]
groupKey = item[key]
if (groups.has(groupKey))
group = groups.get(groupKey)
else
groups.set(groupKey, group = [])
group.push(item)
}
return Array.from(groups)
}
fileParsers.set('jpeg', JpegFileParser)