dircomp/dircomp.c

401 lines
15 KiB
C
Raw Normal View History

2023-04-13 09:46:28 +02:00
/*
2023-04-21 20:50:39 +02:00
dircomp - A directory comparison tool
2023-04-13 09:46:28 +02:00
2023-04-21 20:50:39 +02:00
THE SOFTWARE IS PROVIDED AS IS, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
2023-04-13 09:46:28 +02:00
*/
2023-04-14 22:36:09 +02:00
#include "dircomp.h"
2023-04-13 09:46:28 +02:00
2023-04-21 20:50:39 +02:00
/// @brief Entry function
2023-04-14 22:36:09 +02:00
int main(int argc, char *argv[])
{
2023-04-13 10:14:32 +02:00
struct arguments arguments = get_arguments(argc, argv);
2023-04-14 22:36:09 +02:00
if (arguments.h == true)
{
print_help();
2023-04-13 10:14:32 +02:00
return 0;
}
2023-04-14 22:36:09 +02:00
char* directory_to_analyze1 = malloc(strlen(arguments.directory1) * sizeof(char));
char* directory_to_analyze2 = malloc(strlen(arguments.directory2) * sizeof(char));
strcpy(directory_to_analyze1, arguments.directory1);
strcpy(directory_to_analyze2, arguments.directory2);
2023-04-21 20:50:39 +02:00
free(arguments.directory1);
free(arguments.directory2);
arguments.directory1 = NULL;
arguments.directory2 = NULL;
2023-04-14 22:36:09 +02:00
if (analyze_directories(directory_to_analyze1, directory_to_analyze2, &arguments))
{
2023-04-14 18:20:44 +02:00
printf("Directories are equal\n");
}
2023-04-14 22:36:09 +02:00
else
{
2023-04-14 18:20:44 +02:00
printf("Directories are not equal\n");
}
2023-04-15 22:21:53 +02:00
free(arguments.directory1);
free(arguments.directory2);
2023-04-13 10:14:32 +02:00
return 0;
2023-04-13 09:46:28 +02:00
}
2023-04-21 20:50:39 +02:00
/// @brief Given the arguments passed to the program, construct a "struct arguments" representing its options
2023-04-14 22:36:09 +02:00
struct arguments get_arguments(int argc, char **argv)
{
2023-04-17 15:51:04 +02:00
struct arguments provided_arguments = {"", "", false, false, false, false, false};
2023-04-14 22:51:43 +02:00
if(argc == 1){
provided_arguments.h = true;
return provided_arguments;
}
2023-04-13 09:46:28 +02:00
char option;
2023-04-17 15:51:04 +02:00
while ((option = getopt(argc, argv, "rvhfb")) != -1)
2023-04-14 22:36:09 +02:00
{
switch (option)
{
case 'r':
provided_arguments.r = true;
break;
case 'v':
provided_arguments.v = true;
break;
case 'h':
provided_arguments.h = true;
break;
2023-04-16 17:02:23 +02:00
case 'f':
provided_arguments.f = true;
break;
2023-04-17 15:51:04 +02:00
case 'b':
provided_arguments.b = true;
break;
2023-04-13 10:14:32 +02:00
}
}
2023-04-13 09:46:28 +02:00
2023-04-13 23:48:16 +02:00
// Get directories
2023-04-14 22:36:09 +02:00
if ((argc - optind) < 2)
{
fprintf(stderr, "Not enough directories.\n");
exit(-1);
2023-04-13 23:48:16 +02:00
}
2023-04-14 22:36:09 +02:00
else if ((argc - optind) > 2)
{
fprintf(stderr, "Too many directories.\n");
exit(-1);
2023-04-13 23:48:16 +02:00
}
2023-04-14 22:36:09 +02:00
provided_arguments.directory1 = malloc((strlen(argv[optind]) * sizeof(char)) + 1);
2023-04-13 23:48:16 +02:00
strcpy(provided_arguments.directory1, argv[optind]);
2023-04-14 22:36:09 +02:00
provided_arguments.directory2 = malloc((strlen(argv[optind + 1]) * sizeof(char)) + 1);
2023-04-13 23:48:16 +02:00
strcpy(provided_arguments.directory2, argv[optind + 1]);
return provided_arguments;
2023-04-13 09:46:28 +02:00
}
2023-04-21 20:50:39 +02:00
/// @brief Checks if two directories are equivalent.
/// @param directory_to_analyze_1 Name of the first directory. Will be free-d before the call terminates
/// @param directory_to_analyze_2 Name of the second directory. Will be free-d before the call terminates
/// @param arguments Arguments passed to dircomp
/// @return true if the directories are equivalent, false otherwise
2023-04-14 22:36:09 +02:00
bool analyze_directories(char* directory_to_analyze_1, char* directory_to_analyze_2, struct arguments *arguments)
{
2023-04-14 18:20:44 +02:00
bool is_directory_equal = true;
int stat_result, file_equality_result;
2023-04-16 16:35:03 +02:00
char* fullpath_file_helper;
char* fullpath_file_helper2;
2023-04-14 22:36:09 +02:00
char* subdirectory1;
char* subdirectory2;
struct dirent *element;
2023-04-21 20:50:39 +02:00
DIR *directory1, *directory2;
directory1 = opendir(directory_to_analyze_1);
directory2 = opendir(directory_to_analyze_2);
2023-04-14 18:20:44 +02:00
2023-04-21 20:50:39 +02:00
if(directory1 == NULL || directory2 == NULL)
{
printf("Couldn't open %s, %s, or both. The directories will be considered to be not equal.\n",
directory_to_analyze_1, directory_to_analyze_2);
free(directory_to_analyze_1);
free(directory_to_analyze_2);
return false;
}
while ((element = readdir(directory1)) != NULL)
2023-04-14 22:36:09 +02:00
{
2023-04-21 20:50:39 +02:00
// Is file
if (element->d_type == DT_REG)
2023-04-14 22:36:09 +02:00
{
2023-04-21 20:50:39 +02:00
// Check whether it exists in directory2
fullpath_file_helper = combine_path(directory_to_analyze_2, element->d_name);
if (access(fullpath_file_helper, F_OK) == -1)
2023-04-13 10:14:32 +02:00
{
2023-04-21 20:50:39 +02:00
is_directory_equal = false;
if ( arguments->v == true )
printf("File %s exists in %s but does not in %s\n" , element->d_name , directory_to_analyze_1
, directory_to_analyze_2);
2023-04-17 09:26:28 +02:00
2023-04-21 20:50:39 +02:00
if( arguments->f == true ){
free(fullpath_file_helper);
free(directory_to_analyze_1);
free(directory_to_analyze_2);
closedir(directory1);
return false;
2023-04-14 18:20:44 +02:00
}
2023-04-13 10:14:32 +02:00
}
2023-04-13 09:49:03 +02:00
2023-04-21 20:50:39 +02:00
// If the file exists, check if they are the same
else
2023-04-13 10:14:32 +02:00
{
2023-04-21 20:50:39 +02:00
fullpath_file_helper = combine_path(directory_to_analyze_1, element->d_name);
fullpath_file_helper2 = combine_path(directory_to_analyze_2, element->d_name);
if(arguments->b == true)
file_equality_result = byte_by_byte_file_comparison(fullpath_file_helper, fullpath_file_helper2);
else
file_equality_result = hash_by_hash_file_comparison(fullpath_file_helper, fullpath_file_helper2);
free( fullpath_file_helper );
free( fullpath_file_helper2 );
if (file_equality_result != 1)
2023-04-14 22:36:09 +02:00
{
is_directory_equal = false;
2023-04-21 20:50:39 +02:00
if(file_equality_result == 0 && arguments->v == true)
{
printf("File %s in %s is different in %s\n" , element->d_name
, directory_to_analyze_1
, directory_to_analyze_2);
}
else if(file_equality_result == -1)
{
printf("An error occurred while comparing file %s in the directories %s, %s\n" , element->d_name
, directory_to_analyze_1
, directory_to_analyze_2);
}
2023-04-16 17:02:23 +02:00
if( arguments->f == true )
{
free( directory_to_analyze_1 );
free( directory_to_analyze_2 );
2023-04-21 20:50:39 +02:00
closedir( directory1 );
2023-04-16 17:02:23 +02:00
return false;
}
2023-04-14 22:36:09 +02:00
}
2023-04-21 20:50:39 +02:00
}
}
// Is directory
else if (element->d_type == DT_DIR)
{
if (strcmp(element->d_name, ".") == 0 || strcmp(element->d_name, "..") == 0)
{
continue;
}
// Check whether a folder with the same name exists in directory2
fullpath_file_helper = combine_path(directory_to_analyze_2, element->d_name);
// Allocate heap memory in order to be able to free it before a potential recursive call starts
struct stat *dummy_structure = malloc(sizeof(struct stat));
stat_result = stat(fullpath_file_helper, dummy_structure);
free(dummy_structure);
free(fullpath_file_helper);
if (stat_result == -1) // directory does not exist
{
is_directory_equal = false;
if (arguments->v == true)
printf("Sub-directory %s exists in %s but does not in %s\n" , element->d_name
, directory_to_analyze_1
, directory_to_analyze_2);
if( arguments->f == true )
{
free( directory_to_analyze_1 );
free( directory_to_analyze_2 );
closedir(directory1);
return false;
}
}
else
{
2023-04-14 18:20:44 +02:00
// Analyze recursively
2023-04-21 20:50:39 +02:00
if (arguments->r == true)
2023-04-17 09:26:28 +02:00
{
2023-04-21 20:50:39 +02:00
subdirectory1 = combine_path(directory_to_analyze_1, element->d_name);
subdirectory2 = combine_path(directory_to_analyze_2, element->d_name);
is_directory_equal = analyze_directories(subdirectory1, subdirectory2, arguments) && is_directory_equal;
// Interrupt recursion if -f option is set
if(arguments->f == true && is_directory_equal == false){
free( directory_to_analyze_1 );
free( directory_to_analyze_2 );
closedir( directory1 );
return false;
2023-04-14 18:20:44 +02:00
}
}
2023-04-13 10:14:32 +02:00
}
2023-04-14 18:20:44 +02:00
}
}
2023-04-21 20:50:39 +02:00
closedir(directory1);
2023-04-13 09:49:03 +02:00
2023-04-21 20:50:39 +02:00
while ((element = readdir(directory2)) != NULL)
2023-04-14 18:20:44 +02:00
{
2023-04-21 20:50:39 +02:00
if (element->d_type == DT_REG || element->d_type == DT_DIR)
2023-04-14 18:20:44 +02:00
{
2023-04-21 20:50:39 +02:00
fullpath_file_helper = combine_path(directory_to_analyze_1, element->d_name);
if(element->d_type == DT_REG){
if (access(fullpath_file_helper, F_OK) == -1)
{
is_directory_equal = false;
if (arguments->v == true)
2023-04-17 00:06:26 +02:00
{
2023-04-21 20:50:39 +02:00
printf("File %s exists in %s but does not in %s\n" , element->d_name
2023-04-14 22:36:09 +02:00
, directory_to_analyze_2
, directory_to_analyze_1);
2023-04-16 17:02:23 +02:00
}
2023-04-14 18:20:44 +02:00
}
2023-04-21 20:50:39 +02:00
}
else if(element->d_type == DT_DIR)
{
struct stat dummy_structure; // no need to be malloc-ed, as it'll be automatically free-d as the call ends
if (stat(fullpath_file_helper, &dummy_structure) == -1)
{
is_directory_equal = false;
if (arguments->v == true)
2023-04-17 00:06:26 +02:00
{
2023-04-21 20:50:39 +02:00
printf("Sub-directory %s exists in %s but does not in %s\n" , element->d_name
2023-04-14 22:36:09 +02:00
, directory_to_analyze_2
, directory_to_analyze_1);
2023-04-16 17:02:23 +02:00
}
2023-04-14 18:20:44 +02:00
}
2023-04-21 20:50:39 +02:00
}
free(fullpath_file_helper);
if( arguments->f == true && is_directory_equal == false)
{
free( directory_to_analyze_1 );
free( directory_to_analyze_2 );
closedir( directory2 );
return false;
2023-04-14 18:20:44 +02:00
}
2023-04-13 10:14:32 +02:00
}
}
2023-04-21 20:50:39 +02:00
closedir(directory2);
2023-04-14 22:36:09 +02:00
free(directory_to_analyze_1);
free(directory_to_analyze_2);
2023-04-21 20:50:39 +02:00
2023-04-14 18:20:44 +02:00
return is_directory_equal;
2023-04-13 09:46:28 +02:00
}
2023-04-13 23:48:16 +02:00
2023-04-21 20:50:39 +02:00
/// @brief Checks if two files contain the same data (by comparing their content, one byte a time)
/// @param filename1 Name of the first file
/// @param filename2 Name of the second file
/// @return Returns 1 if the files are the same, 0 otherwise, -1 if an error occurred
2023-04-17 15:51:04 +02:00
int byte_by_byte_file_comparison(char* filename1, char* filename2)
{
2023-04-16 13:03:41 +02:00
if(strcmp(filename1, filename2) == 0)
return 1; // it's the same path, so it's the same file
struct stat stat1, stat2;
if ( stat(filename1, &stat1) != 0 || stat(filename2, &stat2) != 0)
return -1; // error opening files
if(stat1.st_size != stat2.st_size)
return 0; // files are not the same as they have a different dimension
2023-04-17 09:26:28 +02:00
FILE *file1 = fopen(filename1, "rb");
FILE *file2 = fopen(filename2, "rb");
if (file1 == NULL || file2 == NULL)
2023-04-14 22:36:09 +02:00
{
return -1; // error opening files
2023-04-14 09:53:56 +02:00
}
#define BYTES_TO_READ_AT_ONCE 512000
2023-04-16 13:03:41 +02:00
unsigned char databuffer1[BYTES_TO_READ_AT_ONCE] = "";
unsigned char databuffer2[BYTES_TO_READ_AT_ONCE] = "";
size_t bytes;
while ((bytes = fread(databuffer1, 1, BYTES_TO_READ_AT_ONCE, file1)) != 0)
2023-04-14 22:36:09 +02:00
{
2023-04-16 13:03:41 +02:00
if(fread(databuffer2, 1, bytes, file2) != bytes){
fclose(file1);
fclose(file2);
return -1; // error while reading the file(s)
}
if(memcmp(databuffer1, databuffer2, bytes) != 0){
fclose(file1);
fclose(file2);
return 0; // files are not the same
}
2023-04-14 09:53:56 +02:00
}
fclose(file1);
fclose(file2);
return 1;
2023-04-13 23:48:16 +02:00
}
2023-04-21 20:50:39 +02:00
/// @brief Checks if two files contain the same data (by comparing the resulting SHA-1 hash)
/// @param filename1
/// @param filename2
/// @return Returns 1 if the files are the same, 0 otherwise, -1 if an error occurred
2023-04-17 15:51:04 +02:00
int hash_by_hash_file_comparison(char* filename1, char* filename2)
{
char* hash1 = sha1(filename1);
char* hash2 = sha1(filename2);
2023-04-21 20:50:39 +02:00
if(hash1 == NULL || hash2 == NULL)
{
return -1;
}
2023-04-19 11:54:00 +02:00
int ret = (memcmp(hash1, hash2, SHA_DIGEST_LENGTH) == 0);
2023-04-17 15:51:04 +02:00
free(hash1);
free(hash2);
return ret;
}
2023-04-21 20:50:39 +02:00
/// @brief Generates the SHA-1 hash of a file
/// @param filename Name of the file
/// @return Pointer to the digest
2023-04-17 15:51:04 +02:00
unsigned char* sha1(char *filename)
{
FILE *f = fopen(filename, "rb");
if (f == NULL)
{
fprintf(stderr, "Couldn't open %s\n", filename);
2023-04-21 20:50:39 +02:00
return NULL;
2023-04-17 15:51:04 +02:00
}
// For a matter of efficiency, we do not read
// the whole file at once. It'd be heavy on RAM.
// Instead, we read BYTES_TO_READ_AT_ONCE at time
#define BYTES_TO_READ_AT_ONCE 512000 // 500KiB
unsigned int bytes; // how many bytes we have actually read from fread
#if BYTES_TO_READ_AT_ONCE > UINT_MAX
#error Trying to read more bytes than what is possible to handle. Recompile using unsigned long or reduce BYTES_TO_READ_AT_ONCE
#endif
SHA_CTX context;
unsigned char *hash = malloc(SHA_DIGEST_LENGTH * sizeof(unsigned char)); // result will be here
unsigned char databuffer[BYTES_TO_READ_AT_ONCE];
SHA1_Init(&context);
while ((bytes = fread(databuffer, 1, BYTES_TO_READ_AT_ONCE, f)) != 0)
{
SHA1_Update(&context, databuffer, bytes);
}
SHA1_Final(hash, &context);
fclose(f);
return hash;
}
2023-04-21 20:50:39 +02:00
/// @brief Combines two paths
/// @param path1
/// @param path2
/// @return Pointer to the new path
2023-04-21 12:33:34 +02:00
char* combine_path(char* path1, char* path2){
char* path = malloc(sizeof(char) * (strlen(path1) + strlen(path2) + 2) );
strcpy(path, path1);
strcat(path, "/");
strcat(path, path2);
return path;
}
2023-04-21 20:50:39 +02:00
/// @brief Output of the command 'dircomp -h'
/// @param
2023-04-14 22:36:09 +02:00
void print_help(void)
{
2023-04-17 15:51:04 +02:00
printf("usage: dircomp directory1 directory2 [-rvfbh]\n");
2023-04-14 22:36:09 +02:00
printf(" -r \t\t Recursive\n");
2023-04-13 23:48:16 +02:00
printf(" -v \t\t Verbose\n");
2023-04-16 17:02:23 +02:00
printf(" -f \t\t Fast. Halt as soon as the directories are found to be not equal\n");
2023-04-17 15:51:04 +02:00
printf(" -b \t\t Byte-by-byte file comparison (default compares their hashes)\n");
2023-04-14 22:51:43 +02:00
printf(" -h \t\t Print this help and quit\n");
2023-04-21 20:50:39 +02:00
}