Files

91 lines
3.3 KiB
Python
Raw Permalink Normal View History

2024-11-18 12:35:21 +11:00
from multiprocessing import Pool
from hashlib import sha256
2024-11-18 17:58:13 +11:00
import time
import sys
2024-11-18 12:35:21 +11:00
2024-11-18 17:58:13 +11:00
# Function to calculate the hash for the given file content with proper newline handling
def calculate_hash(file_lines_with_newline, num_chars):
# Join file lines with '\n' to ensure correct structure
file_content = "".join(file_lines_with_newline)
return sha256(file_content.encode()).hexdigest()[-num_chars:]
2024-11-18 12:35:21 +11:00
2024-11-18 17:58:13 +11:00
# Function to generate a modified fake file by adding spaces based on the bit pattern
2024-11-18 12:35:21 +11:00
def modify_and_hash(args):
2024-11-18 17:58:13 +11:00
fake_og, num_chars, bit_pattern, real_hash = args
2024-11-18 12:35:21 +11:00
2024-11-18 17:58:13 +11:00
# Modify the fake file based on the bit pattern
2024-11-18 12:35:21 +11:00
fake_modified = [
2024-11-18 17:58:13 +11:00
line.rstrip() + " " * ((bit_pattern >> idx) & 1) + "\n"
2024-11-18 12:35:21 +11:00
for idx, line in enumerate(fake_og)
]
2024-11-18 17:58:13 +11:00
# Calculate the hash for the modified fake file
2024-11-18 12:35:21 +11:00
fake_hash = calculate_hash(fake_modified, num_chars)
2024-11-18 17:58:13 +11:00
# Return if the hashes match
return (fake_hash == real_hash, fake_hash, fake_modified)
2024-11-18 12:35:21 +11:00
def main(real_file, fake_file, num_chars):
2024-11-18 17:58:13 +11:00
# Read the original real file and retain newline characters
with open(real_file, 'r') as f:
real_og = f.readlines() # This keeps newlines intact
# Read the original fake file and retain newline characters
with open(fake_file, 'r') as f:
fake_og = f.readlines() # This keeps newlines intact
2024-11-18 12:35:21 +11:00
2024-11-18 17:58:13 +11:00
# Calculate the hash of the real file (unmodified)
real_hash = calculate_hash(real_og, num_chars)
print(f"Real file hash: {real_hash}")
2024-11-18 12:35:21 +11:00
2024-11-18 17:58:13 +11:00
hash_counter = 0 # To track the number of hashes calculated
start_time = time.time() # Start the timer
2024-11-18 12:35:21 +11:00
# Use multiprocessing Pool
with Pool() as pool:
i = 0
2024-11-18 17:58:13 +11:00
batch_size = 100
found_collision = False
2024-11-18 12:35:21 +11:00
while not found_collision:
# Prepare a batch of bit patterns to process in parallel
2024-11-18 17:58:13 +11:00
bit_patterns = [(fake_og, num_chars, pattern, real_hash) for pattern in range(i, i + batch_size)]
2024-11-18 12:35:21 +11:00
# Process the batch in parallel
results = pool.map(modify_and_hash, bit_patterns)
2024-11-18 17:58:13 +11:00
# Update the hash counter
hash_counter += batch_size
2024-11-18 12:59:22 +11:00
2024-11-18 17:58:13 +11:00
# Display progress on the same line
elapsed_time = time.time() - start_time
sys.stdout.write(f"\rProcessed {hash_counter} hashes in {elapsed_time:.2f} seconds.")
sys.stdout.flush()
2024-11-18 12:35:21 +11:00
2024-11-18 17:58:13 +11:00
# Check the results to see if a collision was found
for match, fake_hash, fake_modified in results:
if match:
elapsed_time = time.time() - start_time
print(f"\nCollision found! The fake file's hash matches the real file's hash: {real_hash}")
print(f"Total hashes processed: {hash_counter} in {elapsed_time:.2f} seconds.")
2024-11-18 12:35:21 +11:00
2024-11-18 17:58:13 +11:00
# Write the modified fake file to the output
2024-11-18 12:35:21 +11:00
with open(f"{fake_file}.out", 'w') as f_out:
2024-11-18 17:58:13 +11:00
f_out.writelines(fake_modified)
2024-11-18 12:35:21 +11:00
found_collision = True
break
i += batch_size
if __name__ == "__main__":
if len(sys.argv) != 4:
print(f"Usage: {sys.argv[0]} <real_file> <fake_file> <num_chars>")
sys.exit(1)
real_file = sys.argv[1]
fake_file = sys.argv[2]
num_chars = int(sys.argv[3])
main(real_file, fake_file, num_chars)