from multiprocessing import Pool from hashlib import sha256 import time import sys # Function to calculate the hash for the given file content with proper newline handling def calculate_hash(file_lines_with_newline, num_chars): # Join file lines with '\n' to ensure correct structure file_content = "".join(file_lines_with_newline) return sha256(file_content.encode()).hexdigest()[-num_chars:] # Function to generate a modified fake file by adding spaces based on the bit pattern def modify_and_hash(args): fake_og, num_chars, bit_pattern, real_hash = args # Modify the fake file based on the bit pattern fake_modified = [ line.rstrip() + " " * ((bit_pattern >> idx) & 1) + "\n" for idx, line in enumerate(fake_og) ] # Calculate the hash for the modified fake file fake_hash = calculate_hash(fake_modified, num_chars) # Return if the hashes match return (fake_hash == real_hash, fake_hash, fake_modified) def main(real_file, fake_file, num_chars): # Read the original real file and retain newline characters with open(real_file, 'r') as f: real_og = f.readlines() # This keeps newlines intact # Read the original fake file and retain newline characters with open(fake_file, 'r') as f: fake_og = f.readlines() # This keeps newlines intact # Calculate the hash of the real file (unmodified) real_hash = calculate_hash(real_og, num_chars) print(f"Real file hash: {real_hash}") hash_counter = 0 # To track the number of hashes calculated start_time = time.time() # Start the timer # Use multiprocessing Pool with Pool() as pool: i = 0 batch_size = 100 found_collision = False while not found_collision: # Prepare a batch of bit patterns to process in parallel bit_patterns = [(fake_og, num_chars, pattern, real_hash) for pattern in range(i, i + batch_size)] # Process the batch in parallel results = pool.map(modify_and_hash, bit_patterns) # Update the hash counter hash_counter += batch_size # Display progress on the same line elapsed_time = time.time() - start_time sys.stdout.write(f"\rProcessed {hash_counter} hashes in {elapsed_time:.2f} seconds.") sys.stdout.flush() # Check the results to see if a collision was found for match, fake_hash, fake_modified in results: if match: elapsed_time = time.time() - start_time print(f"\nCollision found! The fake file's hash matches the real file's hash: {real_hash}") print(f"Total hashes processed: {hash_counter} in {elapsed_time:.2f} seconds.") # Write the modified fake file to the output with open(f"{fake_file}.out", 'w') as f_out: f_out.writelines(fake_modified) found_collision = True break i += batch_size if __name__ == "__main__": if len(sys.argv) != 4: print(f"Usage: {sys.argv[0]} ") sys.exit(1) real_file = sys.argv[1] fake_file = sys.argv[2] num_chars = int(sys.argv[3]) main(real_file, fake_file, num_chars)