/* dircomp - A directory comparison tool THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */ #include "dircomp.h" /// @brief Entry function int main(int argc, char *argv[]) { struct arguments arguments = get_arguments(argc, argv); if (arguments.h == true) { print_help(); return 0; } char* directory_to_analyze1 = malloc(strlen(arguments.directory1) * sizeof(char)); char* directory_to_analyze2 = malloc(strlen(arguments.directory2) * sizeof(char)); strcpy(directory_to_analyze1, arguments.directory1); strcpy(directory_to_analyze2, arguments.directory2); free(arguments.directory1); free(arguments.directory2); arguments.directory1 = NULL; arguments.directory2 = NULL; if (analyze_directories(directory_to_analyze1, directory_to_analyze2, &arguments)) { printf("Directories are equal\n"); } else { printf("Directories are not equal\n"); } return 0; } /// @brief Given the arguments passed to the program, construct a "struct arguments" representing its options struct arguments get_arguments(int argc, char **argv) { struct arguments provided_arguments = {"", "", false, false, false, false, false}; if(argc == 1){ provided_arguments.h = true; return provided_arguments; } char option; while ((option = getopt(argc, argv, "rvhfb")) != -1) { switch (option) { case 'r': provided_arguments.r = true; break; case 'v': provided_arguments.v = true; break; case 'h': provided_arguments.h = true; break; case 'f': provided_arguments.f = true; break; case 'b': provided_arguments.b = true; break; } } // Get directories if ((argc - optind) < 2) { fprintf(stderr, "Not enough directories.\n"); exit(-1); } else if ((argc - optind) > 2) { fprintf(stderr, "Too many directories.\n"); exit(-1); } provided_arguments.directory1 = malloc((strlen(argv[optind]) * sizeof(char)) + 1); strcpy(provided_arguments.directory1, argv[optind]); provided_arguments.directory2 = malloc((strlen(argv[optind + 1]) * sizeof(char)) + 1); strcpy(provided_arguments.directory2, argv[optind + 1]); return provided_arguments; } /// @brief Checks if two directories are equivalent. /// @param directory_to_analyze_1 Name of the first directory. Will be free-d before the call terminates /// @param directory_to_analyze_2 Name of the second directory. Will be free-d before the call terminates /// @param arguments Arguments passed to dircomp /// @return true if the directories are equivalent, false otherwise bool analyze_directories(char* directory_to_analyze_1, char* directory_to_analyze_2, struct arguments *arguments) { bool is_directory_equal = true; int stat_result, file_equality_result; char* fullpath_file_helper; char* fullpath_file_helper2; char* subdirectory1; char* subdirectory2; struct dirent *element; DIR *directory1, *directory2; directory1 = opendir(directory_to_analyze_1); directory2 = opendir(directory_to_analyze_2); if(directory1 == NULL || directory2 == NULL) { printf("Couldn't open %s, %s, or both. The directories will be considered to be not equal.\n", directory_to_analyze_1, directory_to_analyze_2); free(directory_to_analyze_1); free(directory_to_analyze_2); return false; } while ((element = readdir(directory1)) != NULL) { // Is file if (element->d_type == DT_REG) { // Check whether it exists in directory2 fullpath_file_helper = combine_path(directory_to_analyze_2, element->d_name); if (access(fullpath_file_helper, F_OK) == -1) { is_directory_equal = false; if ( arguments->v == true ) printf("File %s exists in %s but does not in %s\n" , element->d_name , directory_to_analyze_1 , directory_to_analyze_2); if( arguments->f == true ){ free(fullpath_file_helper); free(directory_to_analyze_1); free(directory_to_analyze_2); closedir(directory1); return false; } } // If the file exists, check if they are the same else { fullpath_file_helper = combine_path(directory_to_analyze_1, element->d_name); fullpath_file_helper2 = combine_path(directory_to_analyze_2, element->d_name); if(arguments->b == true) file_equality_result = byte_by_byte_file_comparison(fullpath_file_helper, fullpath_file_helper2); else file_equality_result = hash_by_hash_file_comparison(fullpath_file_helper, fullpath_file_helper2); free( fullpath_file_helper ); free( fullpath_file_helper2 ); if (file_equality_result != 1) { is_directory_equal = false; if(file_equality_result == 0 && arguments->v == true) { printf("File %s in %s is different in %s\n" , element->d_name , directory_to_analyze_1 , directory_to_analyze_2); } else if(file_equality_result == -1) { printf("An error occurred while comparing file %s in the directories %s, %s\n" , element->d_name , directory_to_analyze_1 , directory_to_analyze_2); } if( arguments->f == true ) { free( directory_to_analyze_1 ); free( directory_to_analyze_2 ); closedir( directory1 ); return false; } } } } // Is directory else if (element->d_type == DT_DIR) { if (strcmp(element->d_name, ".") == 0 || strcmp(element->d_name, "..") == 0) { continue; } // Check whether a folder with the same name exists in directory2 fullpath_file_helper = combine_path(directory_to_analyze_2, element->d_name); // Allocate heap memory in order to be able to free it before a potential recursive call starts struct stat *dummy_structure = malloc(sizeof(struct stat)); stat_result = stat(fullpath_file_helper, dummy_structure); free(dummy_structure); free(fullpath_file_helper); if (stat_result == -1) // directory does not exist { is_directory_equal = false; if (arguments->v == true) printf("Sub-directory %s exists in %s but does not in %s\n" , element->d_name , directory_to_analyze_1 , directory_to_analyze_2); if( arguments->f == true ) { free( directory_to_analyze_1 ); free( directory_to_analyze_2 ); closedir(directory1); return false; } } else { // Analyze recursively if (arguments->r == true) { subdirectory1 = combine_path(directory_to_analyze_1, element->d_name); subdirectory2 = combine_path(directory_to_analyze_2, element->d_name); is_directory_equal = analyze_directories(subdirectory1, subdirectory2, arguments) && is_directory_equal; // Interrupt recursion if -f option is set if(arguments->f == true && is_directory_equal == false){ free( directory_to_analyze_1 ); free( directory_to_analyze_2 ); closedir( directory1 ); return false; } } } } } closedir(directory1); while ((element = readdir(directory2)) != NULL) { if (element->d_type == DT_REG || element->d_type == DT_DIR) { fullpath_file_helper = combine_path(directory_to_analyze_1, element->d_name); if(element->d_type == DT_REG){ if (access(fullpath_file_helper, F_OK) == -1) { is_directory_equal = false; if (arguments->v == true) { printf("File %s exists in %s but does not in %s\n" , element->d_name , directory_to_analyze_2 , directory_to_analyze_1); } } } else if(element->d_type == DT_DIR) { struct stat dummy_structure; // no need to be malloc-ed, as it'll be automatically free-d as the call ends if (stat(fullpath_file_helper, &dummy_structure) == -1) { is_directory_equal = false; if (arguments->v == true) { printf("Sub-directory %s exists in %s but does not in %s\n" , element->d_name , directory_to_analyze_2 , directory_to_analyze_1); } } } free(fullpath_file_helper); if( arguments->f == true && is_directory_equal == false) { free( directory_to_analyze_1 ); free( directory_to_analyze_2 ); closedir( directory2 ); return false; } } } closedir(directory2); free(directory_to_analyze_1); free(directory_to_analyze_2); return is_directory_equal; } /// @brief Checks if two files contain the same data (by comparing their content, one byte a time) /// @param filename1 Name of the first file /// @param filename2 Name of the second file /// @return Returns 1 if the files are the same, 0 otherwise, -1 if an error occurred int byte_by_byte_file_comparison(char* filename1, char* filename2) { if(strcmp(filename1, filename2) == 0) return 1; // it's the same path, so it's the same file struct stat stat1, stat2; if ( stat(filename1, &stat1) != 0 || stat(filename2, &stat2) != 0) return -1; // error opening files if(stat1.st_size != stat2.st_size) return 0; // files are not the same as they have a different dimension FILE *file1 = fopen(filename1, "rb"); FILE *file2 = fopen(filename2, "rb"); if (file1 == NULL || file2 == NULL) { return -1; // error opening files } #define BYTES_TO_READ_AT_ONCE 512000 unsigned char databuffer1[BYTES_TO_READ_AT_ONCE] = ""; unsigned char databuffer2[BYTES_TO_READ_AT_ONCE] = ""; size_t bytes; while ((bytes = fread(databuffer1, 1, BYTES_TO_READ_AT_ONCE, file1)) != 0) { if(fread(databuffer2, 1, bytes, file2) != bytes){ fclose(file1); fclose(file2); return -1; // error while reading the file(s) } if(memcmp(databuffer1, databuffer2, bytes) != 0){ fclose(file1); fclose(file2); return 0; // files are not the same } } fclose(file1); fclose(file2); return 1; } /// @brief Checks if two files contain the same data (by comparing the resulting SHA-1 hash) /// @param filename1 /// @param filename2 /// @return Returns 1 if the files are the same, 0 otherwise, -1 if an error occurred int hash_by_hash_file_comparison(char* filename1, char* filename2) { char* hash1 = sha1(filename1); char* hash2 = sha1(filename2); if(hash1 == NULL || hash2 == NULL) { return -1; } int ret = (memcmp(hash1, hash2, SHA_DIGEST_LENGTH) == 0); free(hash1); free(hash2); return ret; } /// @brief Generates the SHA-1 hash of a file /// @param filename Name of the file /// @return Pointer to the digest unsigned char* sha1(char *filename) { FILE *f = fopen(filename, "rb"); if (f == NULL) { fprintf(stderr, "Couldn't open %s\n", filename); return NULL; } // For a matter of efficiency, we do not read // the whole file at once. It'd be heavy on RAM. // Instead, we read BYTES_TO_READ_AT_ONCE at time #define BYTES_TO_READ_AT_ONCE 512000 // 500KiB unsigned int bytes; // how many bytes we have actually read from fread #if BYTES_TO_READ_AT_ONCE > UINT_MAX #error Trying to read more bytes than what is possible to handle. Recompile using unsigned long or reduce BYTES_TO_READ_AT_ONCE #endif SHA_CTX context; unsigned char *hash = malloc(SHA_DIGEST_LENGTH * sizeof(unsigned char)); // result will be here unsigned char databuffer[BYTES_TO_READ_AT_ONCE]; SHA1_Init(&context); while ((bytes = fread(databuffer, 1, BYTES_TO_READ_AT_ONCE, f)) != 0) { SHA1_Update(&context, databuffer, bytes); } SHA1_Final(hash, &context); fclose(f); return hash; } /// @brief Combines two paths /// @param path1 /// @param path2 /// @return Pointer to the new path char* combine_path(char* path1, char* path2){ char* path = malloc(sizeof(char) * (strlen(path1) + strlen(path2) + 2) ); strcpy(path, path1); strcat(path, "/"); strcat(path, path2); return path; } /// @brief Output of the command 'dircomp -h' /// @param void print_help(void) { printf("usage: dircomp directory1 directory2 [-rvfbh]\n"); printf(" -r \t\t Recursive\n"); printf(" -v \t\t Verbose\n"); printf(" -f \t\t Fast. Halt as soon as the directories are found to be not equal\n"); printf(" -b \t\t Byte-by-byte file comparison (default compares their hashes)\n"); printf(" -h \t\t Print this help and quit\n"); }