dircomp/dircomp.c

420 lines
15 KiB
C
Raw Normal View History

2023-04-13 09:46:28 +02:00
/*
2023-04-21 20:50:39 +02:00
dircomp - A directory comparison tool
2023-04-13 09:46:28 +02:00
2023-04-21 20:50:39 +02:00
THE SOFTWARE IS PROVIDED AS IS, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
2023-04-13 09:46:28 +02:00
*/
2023-04-14 22:36:09 +02:00
#include "dircomp.h"
2023-04-13 09:46:28 +02:00
2023-04-21 20:50:39 +02:00
/// @brief Entry function
2023-04-14 22:36:09 +02:00
int main(int argc, char *argv[])
{
2023-04-13 10:14:32 +02:00
struct arguments arguments = get_arguments(argc, argv);
2023-04-14 22:36:09 +02:00
if (arguments.h == true)
{
print_help();
2023-04-13 10:14:32 +02:00
return 0;
}
2023-04-14 22:36:09 +02:00
2023-04-29 15:26:03 +02:00
char* directory_to_analyze1 = malloc(strlen(arguments.directory1) * sizeof(char) + 1);
char* directory_to_analyze2 = malloc(strlen(arguments.directory2) * sizeof(char) + 1);
2023-04-14 22:36:09 +02:00
strcpy(directory_to_analyze1, arguments.directory1);
strcpy(directory_to_analyze2, arguments.directory2);
2023-04-21 20:50:39 +02:00
free(arguments.directory1);
free(arguments.directory2);
arguments.directory1 = NULL;
arguments.directory2 = NULL;
2023-04-14 22:36:09 +02:00
if (analyze_directories(directory_to_analyze1, directory_to_analyze2, &arguments))
{
2023-04-14 18:20:44 +02:00
printf("Directories are equal\n");
}
2023-04-14 22:36:09 +02:00
else
{
2023-04-14 18:20:44 +02:00
printf("Directories are not equal\n");
}
2023-04-24 10:35:46 +02:00
2023-04-13 10:14:32 +02:00
return 0;
2023-04-13 09:46:28 +02:00
}
2023-04-21 20:50:39 +02:00
/// @brief Given the arguments passed to the program, construct a "struct arguments" representing its options
2023-04-14 22:36:09 +02:00
struct arguments get_arguments(int argc, char **argv)
{
2023-04-17 15:51:04 +02:00
struct arguments provided_arguments = {"", "", false, false, false, false, false};
2023-04-14 22:51:43 +02:00
if(argc == 1){
provided_arguments.h = true;
return provided_arguments;
}
2023-04-13 09:46:28 +02:00
char option;
2023-04-28 22:10:25 +02:00
while ((option = getopt(argc, argv, "rvhfd")) != -1)
2023-04-14 22:36:09 +02:00
{
switch (option)
{
case 'r':
provided_arguments.r = true;
break;
case 'v':
provided_arguments.v = true;
break;
case 'h':
provided_arguments.h = true;
break;
2023-04-16 17:02:23 +02:00
case 'f':
provided_arguments.f = true;
break;
2023-04-28 22:10:25 +02:00
case 'd':
provided_arguments.d = true;
2023-04-17 15:51:04 +02:00
break;
2023-04-13 10:14:32 +02:00
}
}
2023-04-13 09:46:28 +02:00
2023-04-13 23:48:16 +02:00
// Get directories
2023-04-14 22:36:09 +02:00
if ((argc - optind) < 2)
{
2023-04-26 23:21:22 +02:00
printf("Not enough directories.\n");
2023-04-14 22:36:09 +02:00
exit(-1);
2023-04-13 23:48:16 +02:00
}
2023-04-14 22:36:09 +02:00
else if ((argc - optind) > 2)
{
2023-04-26 23:21:22 +02:00
printf("Too many directories.\n");
2023-04-14 22:36:09 +02:00
exit(-1);
2023-04-13 23:48:16 +02:00
}
2023-04-14 22:36:09 +02:00
provided_arguments.directory1 = malloc((strlen(argv[optind]) * sizeof(char)) + 1);
2023-04-13 23:48:16 +02:00
strcpy(provided_arguments.directory1, argv[optind]);
2023-04-14 22:36:09 +02:00
provided_arguments.directory2 = malloc((strlen(argv[optind + 1]) * sizeof(char)) + 1);
2023-04-13 23:48:16 +02:00
strcpy(provided_arguments.directory2, argv[optind + 1]);
return provided_arguments;
2023-04-13 09:46:28 +02:00
}
2023-04-21 20:50:39 +02:00
/// @brief Checks if two directories are equivalent.
/// @param directory_to_analyze_1 Name of the first directory. Will be free-d before the call terminates
2023-04-21 20:50:39 +02:00
/// @param directory_to_analyze_2 Name of the second directory. Will be free-d before the call terminates
/// @param arguments Arguments passed to dircomp
/// @return true if the directories are equivalent, false otherwise
2023-04-14 22:36:09 +02:00
bool analyze_directories(char* directory_to_analyze_1, char* directory_to_analyze_2, struct arguments *arguments)
{
2023-04-14 18:20:44 +02:00
bool is_directory_equal = true;
int stat_result, file_equality_result;
2023-04-26 23:21:22 +02:00
char *fullpath_helper, *fullpath_helper2;
2023-04-14 22:36:09 +02:00
struct dirent *element;
2023-04-21 20:50:39 +02:00
DIR *directory1, *directory2;
2023-04-26 21:50:16 +02:00
if(strcmp(directory_to_analyze_1, directory_to_analyze_2) == 0){
return true;
}
2023-04-21 20:50:39 +02:00
directory1 = opendir(directory_to_analyze_1);
directory2 = opendir(directory_to_analyze_2);
2023-04-14 18:20:44 +02:00
2023-04-21 20:50:39 +02:00
if(directory1 == NULL || directory2 == NULL)
{
printf("Couldn't open %s, %s, or both. The directories will be considered to be not equal.\n",
directory_to_analyze_1, directory_to_analyze_2);
free(directory_to_analyze_1);
free(directory_to_analyze_2);
return false;
}
2023-04-21 20:50:39 +02:00
while ((element = readdir(directory1)) != NULL)
2023-04-14 22:36:09 +02:00
{
2023-04-21 20:50:39 +02:00
// Is file
if (element->d_type == DT_REG)
2023-04-14 22:36:09 +02:00
{
2023-04-21 20:50:39 +02:00
// Check whether it exists in directory2
2023-04-26 23:21:22 +02:00
fullpath_helper = combine_path(directory_to_analyze_2, element->d_name);
if (access(fullpath_helper, F_OK) == -1)
2023-04-13 10:14:32 +02:00
{
2023-04-21 20:50:39 +02:00
is_directory_equal = false;
2023-04-26 23:21:22 +02:00
if (arguments->v)
printf("File %s exists in %s but does not in %s\n" , element->d_name
2023-04-21 20:53:14 +02:00
, directory_to_analyze_1
, directory_to_analyze_2);
2023-04-26 23:21:22 +02:00
free(fullpath_helper);
2023-04-13 10:14:32 +02:00
}
2023-04-13 09:49:03 +02:00
2023-04-21 20:50:39 +02:00
// If the file exists, check if they are the same
else
2023-04-13 10:14:32 +02:00
{
2023-04-26 23:21:22 +02:00
fullpath_helper2 = combine_path(directory_to_analyze_1, element->d_name);
2023-04-21 20:50:39 +02:00
2023-04-28 22:10:25 +02:00
if(arguments->d)
2023-04-26 23:21:22 +02:00
file_equality_result = hash_by_hash_file_comparison(fullpath_helper, fullpath_helper2);
2023-04-28 22:10:25 +02:00
else
file_equality_result = byte_by_byte_file_comparison(fullpath_helper, fullpath_helper2);
2023-04-21 20:50:39 +02:00
2023-04-21 20:50:39 +02:00
if (file_equality_result != 1)
2023-04-14 22:36:09 +02:00
{
is_directory_equal = false;
2023-04-26 23:21:22 +02:00
if(file_equality_result == 0 && arguments->v)
2023-04-21 20:50:39 +02:00
{
2023-04-28 22:27:23 +02:00
printf("The files %s in %s and %s are different\n" , element->d_name
, directory_to_analyze_1
, directory_to_analyze_2);
2023-04-21 20:50:39 +02:00
}
else if(file_equality_result == -1)
{
printf("An error occurred while comparing file %s in the directories %s, %s\n" , element->d_name
, directory_to_analyze_1
, directory_to_analyze_2);
}
2023-04-14 22:36:09 +02:00
}
2023-04-26 23:21:22 +02:00
free( fullpath_helper );
free( fullpath_helper2 );
2023-04-21 20:50:39 +02:00
}
}
// Is directory
else if (element->d_type == DT_DIR)
{
if (strcmp(element->d_name, ".") == 0 || strcmp(element->d_name, "..") == 0)
{
continue;
}
// Check whether a folder with the same name exists in directory2
2023-04-26 23:21:22 +02:00
fullpath_helper = combine_path(directory_to_analyze_2, element->d_name);
2023-04-21 20:50:39 +02:00
// Allocate heap memory in order to be able to free it before a potential recursive call starts
struct stat *dummy_structure = malloc(sizeof(struct stat));
2023-04-26 23:21:22 +02:00
stat_result = stat(fullpath_helper, dummy_structure);
2023-04-21 20:50:39 +02:00
free(dummy_structure);
2023-04-26 23:21:22 +02:00
free(fullpath_helper);
2023-04-21 20:50:39 +02:00
if (stat_result == -1) // directory does not exist
{
is_directory_equal = false;
2023-04-26 23:21:22 +02:00
if (arguments->v)
2023-04-21 20:50:39 +02:00
printf("Sub-directory %s exists in %s but does not in %s\n" , element->d_name
, directory_to_analyze_1
, directory_to_analyze_2);
}
else
{
2023-04-14 18:20:44 +02:00
// Analyze recursively
2023-04-26 23:21:22 +02:00
if (arguments->r)
2023-04-17 09:26:28 +02:00
{
2023-04-26 23:21:22 +02:00
fullpath_helper = combine_path(directory_to_analyze_1, element->d_name);
fullpath_helper2 = combine_path(directory_to_analyze_2, element->d_name);
is_directory_equal = analyze_directories(fullpath_helper, fullpath_helper2, arguments) && is_directory_equal;
2023-04-14 18:20:44 +02:00
}
2023-04-13 10:14:32 +02:00
}
2023-04-14 18:20:44 +02:00
}
2023-04-26 23:21:22 +02:00
if(arguments->f && !is_directory_equal){
free(directory_to_analyze_1);
free(directory_to_analyze_2);
closedir(directory1);
2023-04-29 15:41:09 +02:00
closedir(directory2);
2023-04-26 23:21:22 +02:00
return false;
}
2023-04-14 18:20:44 +02:00
}
2023-04-21 20:50:39 +02:00
closedir(directory1);
2023-04-13 09:49:03 +02:00
2023-04-21 20:50:39 +02:00
while ((element = readdir(directory2)) != NULL)
2023-04-14 18:20:44 +02:00
{
2023-04-21 20:50:39 +02:00
if (element->d_type == DT_REG || element->d_type == DT_DIR)
2023-04-14 18:20:44 +02:00
{
2023-04-26 23:21:22 +02:00
fullpath_helper = combine_path(directory_to_analyze_1, element->d_name);
2023-04-21 20:50:39 +02:00
if(element->d_type == DT_REG){
2023-04-26 23:21:22 +02:00
if (access(fullpath_helper, F_OK) == -1)
2023-04-21 20:50:39 +02:00
{
is_directory_equal = false;
2023-04-26 23:21:22 +02:00
if (arguments->v)
2023-04-17 00:06:26 +02:00
{
2023-04-21 20:50:39 +02:00
printf("File %s exists in %s but does not in %s\n" , element->d_name
2023-04-14 22:36:09 +02:00
, directory_to_analyze_2
, directory_to_analyze_1);
2023-04-16 17:02:23 +02:00
}
2023-04-14 18:20:44 +02:00
}
2023-04-21 20:50:39 +02:00
}
else if(element->d_type == DT_DIR)
{
struct stat dummy_structure; // no need to be malloc-ed, as it'll be automatically free-d as the call ends
2023-04-26 23:21:22 +02:00
if (stat(fullpath_helper, &dummy_structure) == -1)
2023-04-21 20:50:39 +02:00
{
is_directory_equal = false;
2023-04-26 23:21:22 +02:00
if (arguments->v)
2023-04-17 00:06:26 +02:00
{
2023-04-21 20:50:39 +02:00
printf("Sub-directory %s exists in %s but does not in %s\n" , element->d_name
2023-04-14 22:36:09 +02:00
, directory_to_analyze_2
, directory_to_analyze_1);
2023-04-16 17:02:23 +02:00
}
2023-04-14 18:20:44 +02:00
}
2023-04-21 20:50:39 +02:00
}
2023-04-26 23:21:22 +02:00
free(fullpath_helper);
}
if(arguments->f && !is_directory_equal)
{
free(directory_to_analyze_1);
free(directory_to_analyze_2);
closedir(directory2);
return false;
2023-04-13 10:14:32 +02:00
}
}
2023-04-21 20:50:39 +02:00
closedir(directory2);
2023-04-14 22:36:09 +02:00
free(directory_to_analyze_1);
free(directory_to_analyze_2);
2023-04-14 18:20:44 +02:00
return is_directory_equal;
2023-04-13 09:46:28 +02:00
}
2023-04-13 23:48:16 +02:00
2023-04-21 20:50:39 +02:00
/// @brief Checks if two files contain the same data (by comparing their content, one byte a time)
/// @param filename1 Name of the first file
/// @param filename2 Name of the second file
/// @return Returns 1 if the files are the same, 0 otherwise, -1 if an error occurred
2023-04-17 15:51:04 +02:00
int byte_by_byte_file_comparison(char* filename1, char* filename2)
{
2023-04-16 13:03:41 +02:00
if(strcmp(filename1, filename2) == 0)
return 1; // it's the same path, so it's the same file
struct stat stat1, stat2;
if ( stat(filename1, &stat1) != 0 || stat(filename2, &stat2) != 0)
return -1; // error opening files
if(stat1.st_size != stat2.st_size)
return 0; // files are not the same as they have a different dimension
2023-04-17 09:26:28 +02:00
FILE *file1 = fopen(filename1, "rb");
FILE *file2 = fopen(filename2, "rb");
if (file1 == NULL || file2 == NULL)
2023-04-14 22:36:09 +02:00
{
return -1; // error opening files
2023-04-14 09:53:56 +02:00
}
2023-04-28 15:21:37 +02:00
2023-04-16 13:03:41 +02:00
unsigned char databuffer1[BYTES_TO_READ_AT_ONCE] = "";
unsigned char databuffer2[BYTES_TO_READ_AT_ONCE] = "";
size_t bytes;
while ((bytes = fread(databuffer1, 1, BYTES_TO_READ_AT_ONCE, file1)) != 0)
2023-04-14 22:36:09 +02:00
{
2023-04-16 13:03:41 +02:00
if(fread(databuffer2, 1, bytes, file2) != bytes){
fclose(file1);
fclose(file2);
return -1; // error while reading the file(s)
}
if(memcmp(databuffer1, databuffer2, bytes) != 0){
fclose(file1);
fclose(file2);
return 0; // files are not the same
}
2023-04-14 09:53:56 +02:00
}
fclose(file1);
fclose(file2);
return 1;
2023-04-13 23:48:16 +02:00
}
2023-04-21 20:50:39 +02:00
/// @brief Checks if two files contain the same data (by comparing the resulting SHA-1 hash)
/// @param filename1
/// @param filename2
2023-04-21 20:50:39 +02:00
/// @return Returns 1 if the files are the same, 0 otherwise, -1 if an error occurred
2023-04-17 15:51:04 +02:00
int hash_by_hash_file_comparison(char* filename1, char* filename2)
{
unsigned char* hash1 = sha1(filename1);
unsigned char* hash2 = sha1(filename2);
2023-04-21 20:50:39 +02:00
if(hash1 == NULL || hash2 == NULL)
{
return -1;
}
2023-04-19 11:54:00 +02:00
int ret = (memcmp(hash1, hash2, SHA_DIGEST_LENGTH) == 0);
2023-04-17 15:51:04 +02:00
free(hash1);
free(hash2);
return ret;
}
/// @brief Generates the SHA-1 hash of a file. Deprecated since OpenSSL >= 3.0
2023-04-21 20:50:39 +02:00
/// @param filename Name of the file
/// @return Pointer to the digest
unsigned char* sha1_legacy(char *filename)
2023-04-17 15:51:04 +02:00
{
FILE *f = fopen(filename, "rb");
if (f == NULL)
{
fprintf(stderr, "Couldn't open %s\n", filename);
2023-04-21 20:50:39 +02:00
return NULL;
2023-04-17 15:51:04 +02:00
}
// For a matter of efficiency, we do not read
// the whole file at once. It'd be heavy on RAM.
// Instead, we read BYTES_TO_READ_AT_ONCE at time
2023-04-28 15:21:37 +02:00
size_t bytes; // how many bytes we have actually read from fread
2023-04-17 15:51:04 +02:00
SHA_CTX context;
unsigned char *hash = malloc(SHA_DIGEST_LENGTH * sizeof(unsigned char)); // result will be here
unsigned char databuffer[BYTES_TO_READ_AT_ONCE];
SHA1_Init(&context);
while ((bytes = fread(databuffer, 1, BYTES_TO_READ_AT_ONCE, f)) != 0)
{
SHA1_Update(&context, databuffer, bytes);
}
SHA1_Final(hash, &context);
fclose(f);
return hash;
}
/// @brief Generates the SHA-1 hash of a file.
/// @param filename Name of the file
/// @return Pointer to the digest
2023-04-28 15:21:37 +02:00
unsigned char* sha1(char *filename)
{
EVP_MD_CTX *mdctx; // envelope context
const EVP_MD *md; // envelope mode (SHA1)
unsigned char *hash = malloc(EVP_MAX_MD_SIZE * sizeof(unsigned char)); // result will be here
2023-04-28 22:10:25 +02:00
unsigned int digest_len;
2023-04-28 15:21:37 +02:00
size_t bytes; // how many bytes we have actually read from fread
unsigned char databuffer[BYTES_TO_READ_AT_ONCE];
FILE *f = fopen(filename, "rb");
if (f == NULL)
{
fprintf(stderr, "Couldn't open %s\n", filename);
return NULL;
}
mdctx = EVP_MD_CTX_new();
if (mdctx == NULL) {
return NULL;
}
md = EVP_sha1();
if (!EVP_DigestInit_ex(mdctx, md, NULL)) {
return NULL;
}
while ((bytes = fread(databuffer, 1, BYTES_TO_READ_AT_ONCE, f)) != 0)
{
if (!EVP_DigestUpdate(mdctx, databuffer, bytes)) {
return NULL;
}
}
if (!EVP_DigestFinal_ex(mdctx, hash, &digest_len)) {
return NULL;
}
EVP_MD_CTX_free(mdctx);
fclose(f);
return hash;
}
2023-04-21 20:50:39 +02:00
/// @brief Combines two paths
/// @param path1
/// @param path2
2023-04-21 20:50:39 +02:00
/// @return Pointer to the new path
2023-04-21 12:33:34 +02:00
char* combine_path(char* path1, char* path2){
char* path = malloc(sizeof(char) * (strlen(path1) + strlen(path2) + 2) );
strcpy(path, path1);
strcat(path, "/");
strcat(path, path2);
return path;
}
2023-04-21 20:50:39 +02:00
/// @brief Output of the command 'dircomp -h'
/// @param
2023-04-14 22:36:09 +02:00
void print_help(void)
{
2023-04-17 15:51:04 +02:00
printf("usage: dircomp directory1 directory2 [-rvfbh]\n");
2023-04-14 22:36:09 +02:00
printf(" -r \t\t Recursive\n");
2023-04-13 23:48:16 +02:00
printf(" -v \t\t Verbose\n");
2023-04-16 17:02:23 +02:00
printf(" -f \t\t Fast. Halt as soon as the directories are found to be not equal\n");
2023-04-28 22:10:25 +02:00
printf(" -d \t\t Comparison by digest\n");
2023-04-14 22:51:43 +02:00
printf(" -h \t\t Print this help and quit\n");
}