Files
COMP6441-birthday-attack/enforcer.py
2024-11-18 17:58:13 +11:00

91 lines
3.3 KiB
Python

from multiprocessing import Pool
from hashlib import sha256
import time
import sys
# Function to calculate the hash for the given file content with proper newline handling
def calculate_hash(file_lines_with_newline, num_chars):
# Join file lines with '\n' to ensure correct structure
file_content = "".join(file_lines_with_newline)
return sha256(file_content.encode()).hexdigest()[-num_chars:]
# Function to generate a modified fake file by adding spaces based on the bit pattern
def modify_and_hash(args):
fake_og, num_chars, bit_pattern, real_hash = args
# Modify the fake file based on the bit pattern
fake_modified = [
line.rstrip() + " " * ((bit_pattern >> idx) & 1) + "\n"
for idx, line in enumerate(fake_og)
]
# Calculate the hash for the modified fake file
fake_hash = calculate_hash(fake_modified, num_chars)
# Return if the hashes match
return (fake_hash == real_hash, fake_hash, fake_modified)
def main(real_file, fake_file, num_chars):
# Read the original real file and retain newline characters
with open(real_file, 'r') as f:
real_og = f.readlines() # This keeps newlines intact
# Read the original fake file and retain newline characters
with open(fake_file, 'r') as f:
fake_og = f.readlines() # This keeps newlines intact
# Calculate the hash of the real file (unmodified)
real_hash = calculate_hash(real_og, num_chars)
print(f"Real file hash: {real_hash}")
hash_counter = 0 # To track the number of hashes calculated
start_time = time.time() # Start the timer
# Use multiprocessing Pool
with Pool() as pool:
i = 0
batch_size = 100
found_collision = False
while not found_collision:
# Prepare a batch of bit patterns to process in parallel
bit_patterns = [(fake_og, num_chars, pattern, real_hash) for pattern in range(i, i + batch_size)]
# Process the batch in parallel
results = pool.map(modify_and_hash, bit_patterns)
# Update the hash counter
hash_counter += batch_size
# Display progress on the same line
elapsed_time = time.time() - start_time
sys.stdout.write(f"\rProcessed {hash_counter} hashes in {elapsed_time:.2f} seconds.")
sys.stdout.flush()
# Check the results to see if a collision was found
for match, fake_hash, fake_modified in results:
if match:
elapsed_time = time.time() - start_time
print(f"\nCollision found! The fake file's hash matches the real file's hash: {real_hash}")
print(f"Total hashes processed: {hash_counter} in {elapsed_time:.2f} seconds.")
# Write the modified fake file to the output
with open(f"{fake_file}.out", 'w') as f_out:
f_out.writelines(fake_modified)
found_collision = True
break
i += batch_size
if __name__ == "__main__":
if len(sys.argv) != 4:
print(f"Usage: {sys.argv[0]} <real_file> <fake_file> <num_chars>")
sys.exit(1)
real_file = sys.argv[1]
fake_file = sys.argv[2]
num_chars = int(sys.argv[3])
main(real_file, fake_file, num_chars)