Sourcemap Resolve

背景

很久之前已经做过将混淆后的代码根据sourcemap解析为混淆前的工作了, 这次需要研究的更近了一步: 如何解决多次编译的代码后的sourcemap错误导致找不到原始代码的问题. 比如由ts开发的源码, 先经过tsc打包生成js, 然后经过uglifyjs压缩成为一个文件, 最后由第三方打包工具再进行一些修改, 即使每一步都存在sourcemap记录, 但此时的sourcemap也已经无法解析到原始的源码了.

调研

根据sourcemap的原理, 实际上我们是可以将这几次的生成合并成一份最终文件, 根据这份文件找到原始代码的, 因为按层层反向解析, 确实能找到每一步的操作是由哪里开始, 然后到哪里结束.

同时还发现了一个非常好用的库: magic-string, 它可以记录下原始的字符串, 并根据每一步的修改内容, 最终生成一份sourcemap文件, 在rollup打包里就使用的它来实现的代码修改

问题

但是在最终尝试和完成之后, 却发现了一个比较大的问题: 如果工具链中有一个并没有根据原始的sourcemap来记录它的修改, 那么就会出现某些行信息遗漏, 导致找不到原始的代码.

flowchart TD A(1: ts源码) --> |编译|B(2: .js) B --> |uglify|C(3: .min.js) C --> |copy+change|D(4: .xxx.js)

1-3步, 由于都是有现成的工具, 而且它们往往会对每一行都进行操作, 所以他们的sourcemap信息里, 会将每一行的原始信息都记录下来, 但是在4步, 假设这个change只是统一全局替换了某个字符串, 那么它只会记录影响到的每一行, 如果这个字符串没有出现在这一行, 那么这行的信息在回溯查找的时候就会被遗漏掉, 存在比较大的问题. 但理论上其实也不是无法解决的, 在解析上一层sourcemap的时候如果发现有遗漏的行信息, 可以实现往后追溯补全这一行的, 但是这个实现过于复杂, 没有深入研究

开发

这里就记录下如果每一步都有完整的sourcemap信息, 将它们合并为原始的sourcemap的过程. 实测经过tsjs再到min.js的代码可以通过这个脚本后生成原始tssourcemap

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
import fs from 'fs';
import path from 'path';
import _ from 'lodash';
import { decode, encode, SourceMapMappings, SourceMapLine } from 'sourcemap-codec';
import { ExistingDecodedSourceMap, ExistingRawSourceMap, SegmentStruct, SegmentStructSource } from '../source';

const resolveSourcemap = (filepath: string): ExistingDecodedSourceMap | null => {
const code = fs.readFileSync(filepath).toString('utf-8');
const lastLine = _.last(code.trim().split('\n'));
if (!lastLine) {
return null;
}
const sourcemapMatch = /^\/\/#\s*sourceMappingURL=(.+)$/i.exec(lastLine);
if (!sourcemapMatch) {
return null;
}
const sourcemapUrl = sourcemapMatch[1];

let sourcemapStr: string = '';

if (sourcemapUrl.startsWith(`data:application/json;base64,`)) {
const sourcemapBase64Str = sourcemapUrl.slice(`data:application/json;base64,`.length);
sourcemapStr = Buffer.from(sourcemapBase64Str, 'base64').toString();
} else {
sourcemapStr = fs.readFileSync(path.resolve(path.dirname(filepath), sourcemapUrl)).toString('utf-8');
}
const sourceMap: ExistingRawSourceMap = JSON.parse(sourcemapStr);
const sourceMapDecode: ExistingDecodedSourceMap = { ...sourceMap, mappings: decode(sourceMap.mappings), file: filepath };
return sourceMapDecode;
};

const resolveMappingToSegments = (sourcemap: ExistingDecodedSourceMap): SegmentStruct[] => {
const segments: SegmentStruct[] = [];
_.map(sourcemap.mappings, (line, lineIndex) => {
_.map(line, (_segments) => {
if (_segments.length === 1) {
return null;
}
segments.push({
dist: {
file: sourcemap.file,
line: lineIndex,
column: _segments[0],
},
source: {
file: path.resolve(path.dirname(sourcemap.file), sourcemap.sources[_segments[1]]),
line: _segments[2],
column: _segments[3],
name: _segments.length === 5 ? sourcemap.names[_segments[4]] : undefined,
},
});
});
});
return segments;
};

const resolveSegmentLine = (segmentSource: SegmentStructSource): SegmentStructSource => {
const sourcemapDecode = resolveSourcemap(segmentSource.file);
if (sourcemapDecode) {
return findSegmentAtSourcePosition(segmentSource, sourcemapDecode);
} else {
return segmentSource;
}
};

const findSegmentAtSourcePosition = (segmentSource: SegmentStructSource, sourcemap: ExistingDecodedSourceMap): SegmentStructSource => {
const sourceSegments = sourcemap.mappings[segmentSource.line];
if (!sourceSegments) {
return segmentSource;
}
// binary search through segments for the given column
let columnStart = 0;
let columnEnd = sourceSegments.length - 1;

while (columnStart <= columnEnd) {
const m = (columnStart + columnEnd) >> 1;
const sourceSegment = sourceSegments[m];
if (sourceSegment[0] === segmentSource.column) {
if (sourceSegment.length == 1) {
return segmentSource;
}
const sourceFile = sourcemap.sources[sourceSegment[1]];
if (!sourceFile) {
return segmentSource;
}

return resolveSegmentLine({
file: path.resolve(path.dirname(sourcemap.file), sourceFile),
line: sourceSegment[2],
column: sourceSegment[3],
// why need segmentSource.name?
name: sourceSegment.length === 5 ? sourcemap.names[sourceSegment[4]] : segmentSource.name,
});
}
if (sourceSegment[0] > segmentSource.column) {
columnEnd = m - 1;
} else {
columnStart = m + 1;
}
}

return segmentSource;
};

const formatSegments = (file: string, sourceRoot: string | undefined, segments: SegmentStruct[]): ExistingRawSourceMap => {
const result: ExistingRawSourceMap = {
version: 3,
file: file,
sourceRoot: sourceRoot,
sources: [],
sourcesContent: [],
names: [],
mappings: '',
};
const sourceMapMappings: SourceMapMappings = Array.from<SourceMapLine>({ length: segments[segments.length - 1].dist.line + 1 }).fill([]);
_.map(segments, (segment) => {
let fileIndex = result.sources.indexOf(segment.source.file);
if (fileIndex === -1) {
fileIndex = result.sources.length;
result.sources.push(segment.source.file);
}

let nameIndex: number | null = null;
if (segment.source.name) {
nameIndex = result.names.indexOf(segment.source.name);
if (nameIndex === -1) {
nameIndex = result.names.length;
result.names.push(segment.source.name);
}
}

sourceMapMappings[segment.dist.line].push(
nameIndex === null
? [segment.dist.column, fileIndex, segment.source.line, segment.source.column]
: [segment.dist.column, fileIndex, segment.source.line, segment.source.column, nameIndex],
);
});
result.mappings = encode(sourceMapMappings);
return result;
};

export { resolveSourcemap, resolveMappingToSegments, resolveSegmentLine, formatSegments };
作者

Mosby

发布于

2020-07-01

许可协议

评论