If you have some ideas how to improve it, I'd appreciate if you share it in comments.
MD5 algorithm taken from here.
package com.blogspot.pawelstawicki.remove.duplicates
import java.security.MessageDigest
import java.io.{FileInputStream, File}
import org.apache.commons.io.{FilenameUtils, FileUtils, IOUtils}
/**
* @author ${user.name}
*/
object App {
def main(args : Array[String]) {
val dir1 = new File("/photos/main-album,");
val dir2 = new File("/photos/album-with-duplicates");
val dir1Content = getAllFiles(dir1)
val dir2Content = getAllFiles(dir2)
var dir1Map = Map[String, File]()
dir1Content.foreach(f => {
val md5 = md5SumString(IOUtils.toByteArray(new FileInputStream(f)))
println("md5 for " + f.getPath + ": " + md5)
dir1Map = dir1Map + (md5 -> f)
})
var dir2Map = Map[String, File]()
dir2Content.foreach(f => {
val md5 = md5SumString(IOUtils.toByteArray(new FileInputStream(f)))
println("md5 for " + f.getPath + ": " + md5)
dir2Map = dir2Map + (md5 -> f)
})
for(md51 <- dir1Map.keys; md52 <- dir2Map.keys) {
if (md51.equals(md52)) {
val suspectedDuplicate = dir2Map(md52)
val original = dir1Map(md52)
if (checkDuplicate(original, suspectedDuplicate)) {
println(suspectedDuplicate.getPath + " is duplicate of " + original.getPath)
val copiesDir = new File(FileUtils.getUserDirectory + "/copies/" + FilenameUtils.getPathNoEndSeparator(original.getAbsolutePath()));
println("Moving to " + copiesDir.getPath)
FileUtils.moveFileToDirectory(suspectedDuplicate, copiesDir, true)
}
}
}
}
def checkDuplicate(f1: File, f2: File): Boolean = {
val bytes1 = new Array[Byte](1024*1024)
val bytes2 = new Array[Byte](1024*1024)
val input1 = new FileInputStream(f1)
val input2 = new FileInputStream(f2)
var bytesRead1 = input1.read(bytes1)
while(bytesRead1 > 0) {
val bytesRead2 = input2.read(bytes2)
if (bytesRead1 != bytesRead2) {
return false;
}
//Bytes read number the same
if (!bytes1.sameElements(bytes2)) {
return false
}
bytesRead1 = input1.read(bytes1)
}
//bytesRead1 is -1. Check if bytes read number from file2 is also -1
if (input2.read(bytes2) == -1) {
return true;
} else {
return false;
}
}
def md5SumString(bytes : Array[Byte]) : String = {
val md5 = MessageDigest.getInstance("MD5")
md5.reset()
md5.update(bytes)
md5.digest().map(0xFF & _).map { "%02x".format(_) }.foldLeft(""){_ + _}
}
def getAllFiles(dir : File) : List[File] = {
var l = List[File]()
dir.listFiles.foreach(f => {
if (f.isFile) {
l = f :: l
} else {
l = l ::: getAllFiles(f)
}
})
l
}
}





