Is it possible to remove files with same name from source? For example, let's say I have the following folder structure
a
---file1.txt
---file2.txt
---file3.txt
b
---file1.txt
When I select both folder in source I want in destination folder only file that aren't duplicates. In example above result would be
result
---file2.txt
---file3.txt
Optional, it would be great if I could duplicates somehow filter and write in separate folder.
By duplicates, I mean explicitly duplicates by name, file content is not important.
It took me awhile to get there but try this:
var gulp = require('gulp');
var fs = require('fs');
var path = require('path');
var flatten = require('gulp-flatten');
var filter = require('gulp-filter');
var folders = ['a', 'b', 'c']; // I just hard-coded your folders here
// this function is called by filter for each file in the above folders
// it should return false if the file is a duplicate, i.e., occurs
// in at least two folders
function isUnique(file) {
console.dir(file.history[0]); // just for fun
var baseName = file.history[0].split(path.sep);
baseName = baseName[baseName.length - 1];
// var fileParents = '././';
var fileParents = '.' + path.sep + '.' + path.sep;
var count = 0;
folders.forEach(function (folder) {
if (fs.existsSync(fileParents + folder + path.sep + baseName)) count++;
// could quit forEach when count >= 2 if there were a lot of folders/files
// but there is no way to break out of a forEach
});
if (count >= 2) { // the file is a duplicate
fs.unlinkSync(file.history[0]); // remove from 'Result' directory
return false;
}
else return true;
}
gulp.task('default', ['clump'], function () {
// create a filter to remove duplicates
const f = filter(function (file) { return isUnique(file); }, {restore: true, passthrough: false} );
const stream = gulp.src('./result/*.txt')
.pipe(f); // actually do the filtering here
f.restore.pipe(gulp.dest('duplicates')); // new stream with the removed duplicates
return stream;
});
// 'clump' runs first
// gathers all files into result directory
gulp.task('clump', function () {
return gulp.src('./**/*.txt')
.pipe(flatten()) // because the original folder structure in not wanted
.pipe(gulp.dest('result'));
});
Run it with 'gulp'. The default task will trigger the 'clump' task first.
Since your OP didn't require that any particular version of duplicated files be kept - like the newest or whatever - I haven't worried about that here. If in the 'Result' folder you want each version of a duplicated file, such as file1.txt (version from one folder) and file1.txt (from another folder) but obviously must be renamed to something that could be done in the 'clump' task.
Let me know if this works for you.