from multiprocessing import Pool from hashlib import sha256 # Function to calculate the hash for the given modified file content def calculate_hash(file_lines, num_chars): return sha256("\n".join(file_lines).encode()).hexdigest()[-num_chars:] # Function to generate a modified file by adding spaces based on the bit pattern def modify_and_hash(args): real_og, fake_og, num_chars, bit_pattern = args # Modify the real and fake files based on the bit pattern real_modified = [ line + " " * ((bit_pattern >> idx) & 1) for idx, line in enumerate(real_og) ] fake_modified = [ line + " " * ((bit_pattern >> idx) & 1) for idx, line in enumerate(fake_og) ] # Calculate hashes for both modified files real_hash = calculate_hash(real_modified, num_chars) fake_hash = calculate_hash(fake_modified, num_chars) return (real_hash, fake_hash, real_modified, fake_modified) def main(real_file, fake_file, num_chars): # Read the original files with open(real_file) as f: real_og = f.read().splitlines() with open(fake_file) as f: fake_og = f.read().splitlines() all_real_hashes = {} all_fake_hashes = {} found_collision = False # Use multiprocessing Pool with Pool() as pool: i = 0 while not found_collision: # Prepare a batch of bit patterns to process in parallel batch_size = 100 # Number of combinations to process in parallel bit_patterns = [(real_og, fake_og, num_chars, pattern) for pattern in range(i, i + batch_size)] # Process the batch in parallel results = pool.map(modify_and_hash, bit_patterns) # Check the results for a hash collision for real_hash, fake_hash, real_modified, fake_modified in results: all_real_hashes[real_hash] = real_modified all_fake_hashes[fake_hash] = fake_modified if real_hash in all_fake_hashes or fake_hash in all_real_hashes: collision_hash = real_hash if real_hash in all_fake_hashes else fake_hash print(f"Collision found! {real_file}.out and {fake_file}.out have the same hash: {collision_hash}") with open(f"{real_file}.out", 'w') as f_out: f_out.writelines("\n".join(all_real_hashes[collision_hash])) with open(f"{fake_file}.out", 'w') as f_out: f_out.writelines("\n".join(all_fake_hashes[collision_hash])) found_collision = True break i += batch_size if __name__ == "__main__": import sys if len(sys.argv) != 4: print(f"Usage: {sys.argv[0]} ") sys.exit(1) real_file = sys.argv[1] fake_file = sys.argv[2] num_chars = int(sys.argv[3]) main(real_file, fake_file, num_chars)