If you have some ideas how to improve it, I'd appreciate if you share it in comments.
MD5 algorithm taken from here.
package com.blogspot.pawelstawicki.remove.duplicates import java.security.MessageDigest import java.io.{FileInputStream, File} import org.apache.commons.io.{FilenameUtils, FileUtils, IOUtils} /** * @author ${user.name} */ object App { def main(args : Array[String]) { val dir1 = new File("/photos/main-album,"); val dir2 = new File("/photos/album-with-duplicates"); val dir1Content = getAllFiles(dir1) val dir2Content = getAllFiles(dir2) var dir1Map = Map[String, File]() dir1Content.foreach(f => { val md5 = md5SumString(IOUtils.toByteArray(new FileInputStream(f))) println("md5 for " + f.getPath + ": " + md5) dir1Map = dir1Map + (md5 -> f) }) var dir2Map = Map[String, File]() dir2Content.foreach(f => { val md5 = md5SumString(IOUtils.toByteArray(new FileInputStream(f))) println("md5 for " + f.getPath + ": " + md5) dir2Map = dir2Map + (md5 -> f) }) for(md51 <- dir1Map.keys; md52 <- dir2Map.keys) { if (md51.equals(md52)) { val suspectedDuplicate = dir2Map(md52) val original = dir1Map(md52) if (checkDuplicate(original, suspectedDuplicate)) { println(suspectedDuplicate.getPath + " is duplicate of " + original.getPath) val copiesDir = new File(FileUtils.getUserDirectory + "/copies/" + FilenameUtils.getPathNoEndSeparator(original.getAbsolutePath())); println("Moving to " + copiesDir.getPath) FileUtils.moveFileToDirectory(suspectedDuplicate, copiesDir, true) } } } } def checkDuplicate(f1: File, f2: File): Boolean = { val bytes1 = new Array[Byte](1024*1024) val bytes2 = new Array[Byte](1024*1024) val input1 = new FileInputStream(f1) val input2 = new FileInputStream(f2) var bytesRead1 = input1.read(bytes1) while(bytesRead1 > 0) { val bytesRead2 = input2.read(bytes2) if (bytesRead1 != bytesRead2) { return false; } //Bytes read number the same if (!bytes1.sameElements(bytes2)) { return false } bytesRead1 = input1.read(bytes1) } //bytesRead1 is -1. Check if bytes read number from file2 is also -1 if (input2.read(bytes2) == -1) { return true; } else { return false; } } def md5SumString(bytes : Array[Byte]) : String = { val md5 = MessageDigest.getInstance("MD5") md5.reset() md5.update(bytes) md5.digest().map(0xFF & _).map { "%02x".format(_) }.foldLeft(""){_ + _} } def getAllFiles(dir : File) : List[File] = { var l = List[File]() dir.listFiles.foreach(f => { if (f.isFile) { l = f :: l } else { l = l ::: getAllFiles(f) } }) l } }