When a duplicate file is found, the scan job creates a Notes document in the database that contains the MD5 checksum, the filepaths of all occurrences and the filesize. We are using a view control to display those file documents in the UI, sorted by the total file size of all file instances in descending order, so that you can easily find the files that fill the hard disk the most.
To let the user select a directory, the sample uses the
File Dialog Tools of the Platform UI API:
var conn=X2E.createConnection();
var pUI=com.x2e.PlatformUIAPI.getUI(conn);
var fdTools=pUI.getFileDialogTools();
var dir=fdTools.showDirectoryDialog(null, "Please select a directory for the scan process");
if (dir) {
var oldDirs=getComponent("folderPaths").getValue();
var newDirs=oldDirs;
if (oldDirs!="")
newDirs=newDirs+"\n";
newDirs=newDirs+dir;
getComponent("folderPaths").setValue(newDirs);
}
This API method uses the Eclipse class
DirectoryDialog internally which displays a platform specific selection dialog.
Job scheduling
Here is the code snippet that reads the list of the selected directories and schedules the background Job:
//read the scan paths:
var paths=getComponent("folderPaths").getValue().replace("\r", "");
if (!paths)
return;
var pathsArr=paths.split("\n");
//add them to an ArrayList; passing the JavaScript array to the Job
//would not work, because both JavaScript environments are running
//in different engines
var pathsList=new java.util.ArrayList();
for (var i=0; i<pathsArr.length; i++) {
pathsList.add(pathsArr[i]);
}
var conn=X2E.createConnection();
var pUI=com.x2e.PlatformUIAPI.getUI(conn);
var jobTools=com.x2e.JobAPI.getTools(conn);
//parameters for background Job execution
var jobName="Scanning folders";
var server=database.getServer();
var filePath=database.getFilePath();
var ssjsLibName="FileScanner";
var methodCallJS="doScan();";
//create the background job:
var jsJob=jobTools.createJavascriptInLibJob(jobName, server, filePath, ssjsLibName,
methodCallJS);
//declare a new global JavaScript variable to pass the scan folder paths
jsJob.addJSProperty("scanpaths", pathsList);
jsJob.setUser(true);
//schedule job execution
jsJob.schedule();
pUI.logToStatusBar("Scheduled scan job");
When the Job is executed, it reads the current content of the SSJS library design element, appends the specified JavaScript code in
methodCallJS
, declares a few global variables and launches the JavaScript code with the
Rhino scripting engine.
As you may notice in the code snippet above, the method
addJSProperty
enables you to declare your own global variables, in our case a Java ArrayList with the scan paths.
It is important to note that you cannot pass JavaScript
objects between an XPages application and a scheduled background job,
because both environments are using different JavaScript engines. That's
why we do not pass a JavaScript string array, but use the ArrayList
instead.
There are a few other global variables that we define automatically for you:
session
- use this to read/write data in Lotus Notes Java APIs
database
- database in which the SSJS is located
nsfclassloader
- classloader used to load Java code and resources from the database
progress
- wraps a
org.eclipse.core.runtime.IProgressMonitor to report progress and check for cancellation
context
- a temporary document that contains string/double/integer parameter values passed via addJSProperty(String, Object)
Job implementation
The code in the
FileScanner
library runs in two phases: In phase 1, it traverses the scan directories and their subdirectories recursively and builds a list with all filepaths that need to be scanned for duplicates.
We then report the list size to Eclipse so that the Job progress can be properly calculated and displayed.
In phase 2, we calculate the MD5 checksum for each file and check if the same checksum has been calculated before for a file in a different location.
//this method is called in the context of an XPages2Eclipse JavaScript job,
//running outside of the XPages context, so that the XPages application
//can be closed while the scan is in progress
function doScan() {
var pUI=com.x2e.PlatformUIAPI.getUI(eclipseconnection);
//-1==IProgressMonitor.UNKNOWN; causes the progress bar to animate
progress.beginTask("Building file list", -1);
var allFiles=new java.util.ArrayList();
var queue=new java.util.LinkedList();
//first we scan the base paths recursively and build a list of
//all files that need to be scanned. We do this to initialize
//the Job progress monitor with a precise number of files to
//be processed
for (var i=0; i<scanpaths.size(); i++) {
//global variable "scanpaths" contains an array with the base paths
var f=new java.io.File(scanpaths.get(i));
if (!f.exists()) {
pUI.logToStatusBar("File does not exist. Skipping "+scanpaths.get(i));
continue;
}
else {
pUI.logToStatusBar("Starting recursive scan of "+f.getName());
queue.add(f);
}
while (!queue.isEmpty()) {
if (progress.isCanceled()) {
pUI.logToStatusBar("Job cancelled by user");
return;
}
var currFile=queue.removeFirst();
if (currFile.isDirectory()) {
var content=currFile.listFiles();
if (content) {
for (var j=0; j<content.length; j++) {
queue.add(content[j]);
}
}
}
else {
allFiles.add(currFile.getAbsolutePath());
}
}
}
if (progress.isCanceled()) {
pUI.logToStatusBar("Job cancelled by user");
return;
}
//report the exact number of work units that need to be processed
progress.beginTask("Scanning file content", allFiles.size());
var md5LookupMap=new java.util.HashMap();
for (var i=0; i<allFiles.size(); i++) {
if (progress.isCanceled()) {
pUI.logToStatusBar("Job cancelled by user");
return;
}
var currFilePath=allFiles.get(i);
var currFile=new java.io.File(currFilePath);
try {
var fileSize=currFile.length();
//skip empty files
if (fileSize==0)
continue;
progress.setTaskName("Calculating hash for "+currFile.getName()+
" ("+fileSize+" bytes)");
//try to calculate the MD5 sum
var currMD5=com.mindoo.filescanner.MD5.calculateMD5(currFile, progress);
if (currMD5) {
var filePath=currFile.getAbsolutePath();
var cacheEntry=md5LookupMap.get(currMD5);
if (cacheEntry) {
if (cacheEntry.substring(0,1)=="x") {
//this hash is read for the second time
var fileDoc=database.createDocument();
fileDoc.replaceItemValue("Form", "File");
var paths=new java.util.Vector();
//remove leading "x" from path
paths.add(cacheEntry.substring(1));
paths.add(filePath);
fileDoc.replaceItemValue("Filepath", paths);
fileDoc.replaceItemValue("Filesize", new java.lang.Integer(fileSize));
fileDoc.save(true,false);
md5LookupMap.put(currMD5, fileDoc.getUniversalID());
fileDoc.recycle();
}
else {
//a file with the same hash has been read more than
//one times before; in this case, the cacheEntry
//contains the UNID of the File document
var fileDoc=database.getDocumentByUNID(cacheEntry);
if (fileDoc.hasItem("Filepath")) {
var paths=fileDoc.getItemValue("filePath");
paths.add(filePath);
fileDoc.replaceItemValue("filePath", paths);
}
else {
fileDoc.replaceItemValue("filePath", filePath);
}
fileDoc.save(true,false);
fileDoc.recycle();
}
}
else {
//store the hash in our cache map to check for duplicates
md5LookupMap.put(currMD5, "x"+filePath);
}
}
else {
java.lang.System.out.println("No MD5: "+filePath);
}
}
catch (e) {
java.lang.System.out.println(e);
}
finally {
//report 1 work unit has processed
progress.worked(1);
}
}
pUI.logToStatusBar("Scan done.");
progress.done();
}