1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
| // Note: requires node 12.
const fs = require('fs');
const os = require('os');
const path = require('path');
const readline = require('readline');
const child_process = require('child_process')
// Regexes we'll use repeatedly to find image tags or markdown images.
const rexImgTag = new RegExp(/<\ figure\s+([^>]*)[/]?>/);
const regImgSrcAttribute = new RegExp(/src=\"([^"]+)"/);
const regImgAltAttribute = new RegExp(/alt=\"([^"]+)"/);
const regImgWidthAttribute = new RegExp(/width=\"([^"]+)"/);
const rexMarkdownImage = new RegExp(/\!\[([^\]]*)\]\(([^\)]+)\)/);
/**
* moveFileSafeSync - move src to dest, ensuring all required folders in the
* destination are created.
*
* @param src - the source file path
* @param dest - the destination file path
* @returns {undefined}
*/
function moveFileSafeSync(src, dest) {
// If the source doesn't exist, but the destination does, we've probably
// just already processed the file.
if (!fs.existsSync(src) && fs.existsSync(dest)) return;
const directory = path.dirname(dest);
if (!fs.existsSync(directory)) fs.mkdirSync(directory, { recursive: true } );
fs.copyFileSync(src, dest);
fs.unlinkSync(src);
}
/**
* downloadFile - download a file from the web, ensures the folder for the
* destination exists.
*
* @param src - the source fiile
* @param dest - the download destination
* @returns {undefined}
*/
function downloadFile(src, dest) {
const directory = path.dirname(dest);
if (!fs.existsSync(directory)) fs.mkdirSync(directory, { recursive: true } );
const command = `wget "${src}" -P "${directory}"`;
return child_process.execSync(command);
}
// Thanks: https://gist.github.com/kethinov/6658166
function findInDir (dir, filter, fileList = []) {
const files = fs.readdirSync(dir);
files.forEach((file) => {
const filePath = path.join(dir, file);
const fileStat = fs.lstatSync(filePath);
if (fileStat.isDirectory()) {
findInDir(filePath, filter, fileList);
} else if (filter.test(filePath)) {
fileList.push(filePath);
}
});
return fileList;
}
/**
* processPost
*
* @param rootPath
* @param postPath
* @returns {undefined}
*/
function processPost(rootPath, postPath) {
return new Promise((resolve, reject) => {
// Get some details about the post which will be useful.
const postDirectory = path.dirname(postPath);
const postFileName = path.basename(postPath);
console.log(` Processing: ${postFileName}`);
// Create the input and output streams. Track whether we change the file.
const updatedPostPath = `${postPath}.updated`;
const inputStream = fs.createReadStream(postPath);
const outputStream = fs.createWriteStream(updatedPostPath, { encoding: 'utf8'} );
let changed = false;
// Read the file line-wise.
const rl = readline.createInterface({
input: inputStream,
terminal: false,
historySize: 0
});
// Process each line, looking for image info.
rl.on('line', (line) => {
// Check for html image tags.
if (rexImgTag.test(line)) {
const imageTagResults = rexImgTag.exec(line);
const imageTag = imageTagResults[0];
const imageTagInner = imageTagResults[1];
console.log(` Found image tag contents: ${imageTagInner}`);
// Rip out the component parts.
const src = regImgSrcAttribute.test(imageTagInner) && regImgSrcAttribute.exec(imageTagInner)[1];
const alt = regImgAltAttribute.test(imageTagInner) && regImgAltAttribute.exec(imageTagInner)[1];
const width = regImgWidthAttribute.test(imageTagInner) && regImgWidthAttribute.exec(imageTagInner)[1];
console.log(` src: ${src}, alt: ${alt}, width: ${width}`);
// If the source is already in the appropriate location, don't process it.
if (/^images\//.test(src)) {
console.log(` skipping, already processed`);
outputStream.write(line + os.EOL);
return;
}
// Now that we have the details of the image tag, we can work out the
// desired destination in the images folder.
const imageFileName = path.basename(src);
const newRelativePath = path.join("images", imageFileName);
const newAbsolutePath = path.join(postDirectory, newRelativePath);
// If the file is on the web, we need to download it...
if (/^http/.test(src)) {
console.log(` Downloading '${src}' to '${newAbsolutePath}'...`);
downloadFile(src, newAbsolutePath);
}
// ...otherwise we can just move it.
else {
const absoluteSrc = path.join(rootPath, src);
moveFileSafeSync(absoluteSrc, newAbsolutePath);
console.log(` Copied '${absoluteSrc}' to '${newAbsolutePath}'`);
}
// Now re-write the image tag.
const newImgTag = `< figure src="${newRelativePath}"${alt ? ` alt="${alt}"` : ''}${width ? ` width="${width}"` : ''} >`;
console.log(` Changing : ${imageTag}`);
console.log(` To : ${newImgTag}`);
line = line.replace(imageTag, newImgTag);
changed = true;
}
// Check for markdown image tags.
if (rexMarkdownImage.test(line)) {
const markdownImageCaptures = rexMarkdownImage.exec(line);
const markdownImage = markdownImageCaptures[0];
const markdownImageDescription = markdownImageCaptures[1];
const markdownImagePath = markdownImageCaptures[2];
console.log(` Found markdown image: ${markdownImagePath}`);
// If the source is already in the appropriate location, don't process it.
if (/^images\//.test(markdownImagePath)) {
console.log(` skipping, already processed`);
outputStream.write(line + os.EOL);
return;
}
// Now that we have the details of the image tag, we can work out the
// desired destination in the images folder.
const imageFileName = path.basename(markdownImagePath);
const newRelativePath = path.join("images", imageFileName);
const newAbsolutePath = path.join(postDirectory, newRelativePath);
// If the file is on the web, we need to download it...
if (/^http/.test(markdownImagePath)) {
console.log(` Downloading '${markdownImagePath}' to '${newAbsolutePath}'...`);
downloadFile(markdownImagePath, newAbsolutePath);
}
// ...otherwise we can just move it.
else {
const absoluteSrc = path.join(rootPath, markdownImagePath);
moveFileSafeSync(absoluteSrc, newAbsolutePath);
console.log(` Copied '${absoluteSrc}' to '${newAbsolutePath}'`);
}
// Now re-write the markdown.
const newMarkdownImage = `![${markdownImageDescription}](${newRelativePath})`;
console.log(` Changing : ${markdownImage}`);
console.log(` To : ${newMarkdownImage}`);
line = line.replace(markdownImage, newMarkdownImage);
changed = true;
}
outputStream.write(line + os.EOL);
});
rl.on('error', (err) => {
console.log(` Error reading file: ${err}`);
return reject(err);
});
rl.on('close', () => {
console.log(` Completed, written to: ${updatedPostPath}`);
if (changed) moveFileSafeSync(updatedPostPath, postPath);
else fs.unlinkSync(updatedPostPath);
return resolve();
});
});
}
console.log("collect-images: Tool to co-locate blog post images")
console.log("");
// Get the directory to search. Arg 0 is node, Arg 1 iis the script path, Arg 3 onwards are commandline arguments.
const sourceDirectory = process.argv[2] || process.cwd();
console.log(`Source Directory: ${sourceDirectory}`);
const rootDirectory = process.argv[3] || sourceDirectory;
console.log(`Root Directory: ${rootDirectory}`);
console.log("");
// Find all blog posts.
const postPaths = findInDir(sourceDirectory, /\.md$/);
// Process each path.
postPaths.forEach(postPath => processPost(rootDirectory, postPath));
// Let the user know we're done.
console.log(`Completed processing ${postPaths.length} file(s)`);
|