91 lines
3.3 KiB
Python
91 lines
3.3 KiB
Python
from multiprocessing import Pool
|
|
from hashlib import sha256
|
|
import time
|
|
import sys
|
|
|
|
# Function to calculate the hash for the given file content with proper newline handling
|
|
def calculate_hash(file_lines_with_newline, num_chars):
|
|
# Join file lines with '\n' to ensure correct structure
|
|
file_content = "".join(file_lines_with_newline)
|
|
return sha256(file_content.encode()).hexdigest()[-num_chars:]
|
|
|
|
# Function to generate a modified fake file by adding spaces based on the bit pattern
|
|
def modify_and_hash(args):
|
|
fake_og, num_chars, bit_pattern, real_hash = args
|
|
|
|
# Modify the fake file based on the bit pattern
|
|
fake_modified = [
|
|
line.rstrip() + " " * ((bit_pattern >> idx) & 1) + "\n"
|
|
for idx, line in enumerate(fake_og)
|
|
]
|
|
|
|
# Calculate the hash for the modified fake file
|
|
fake_hash = calculate_hash(fake_modified, num_chars)
|
|
|
|
# Return if the hashes match
|
|
return (fake_hash == real_hash, fake_hash, fake_modified)
|
|
|
|
def main(real_file, fake_file, num_chars):
|
|
# Read the original real file and retain newline characters
|
|
with open(real_file, 'r') as f:
|
|
real_og = f.readlines() # This keeps newlines intact
|
|
|
|
# Read the original fake file and retain newline characters
|
|
with open(fake_file, 'r') as f:
|
|
fake_og = f.readlines() # This keeps newlines intact
|
|
|
|
# Calculate the hash of the real file (unmodified)
|
|
real_hash = calculate_hash(real_og, num_chars)
|
|
print(f"Real file hash: {real_hash}")
|
|
|
|
hash_counter = 0 # To track the number of hashes calculated
|
|
start_time = time.time() # Start the timer
|
|
|
|
# Use multiprocessing Pool
|
|
with Pool() as pool:
|
|
i = 0
|
|
batch_size = 100
|
|
found_collision = False
|
|
|
|
while not found_collision:
|
|
# Prepare a batch of bit patterns to process in parallel
|
|
bit_patterns = [(fake_og, num_chars, pattern, real_hash) for pattern in range(i, i + batch_size)]
|
|
|
|
# Process the batch in parallel
|
|
results = pool.map(modify_and_hash, bit_patterns)
|
|
|
|
# Update the hash counter
|
|
hash_counter += batch_size
|
|
|
|
# Display progress on the same line
|
|
elapsed_time = time.time() - start_time
|
|
sys.stdout.write(f"\rProcessed {hash_counter} hashes in {elapsed_time:.2f} seconds.")
|
|
sys.stdout.flush()
|
|
|
|
# Check the results to see if a collision was found
|
|
for match, fake_hash, fake_modified in results:
|
|
if match:
|
|
elapsed_time = time.time() - start_time
|
|
print(f"\nCollision found! The fake file's hash matches the real file's hash: {real_hash}")
|
|
print(f"Total hashes processed: {hash_counter} in {elapsed_time:.2f} seconds.")
|
|
|
|
# Write the modified fake file to the output
|
|
with open(f"{fake_file}.out", 'w') as f_out:
|
|
f_out.writelines(fake_modified)
|
|
|
|
found_collision = True
|
|
break
|
|
|
|
i += batch_size
|
|
|
|
if __name__ == "__main__":
|
|
if len(sys.argv) != 4:
|
|
print(f"Usage: {sys.argv[0]} <real_file> <fake_file> <num_chars>")
|
|
sys.exit(1)
|
|
|
|
real_file = sys.argv[1]
|
|
fake_file = sys.argv[2]
|
|
num_chars = int(sys.argv[3])
|
|
|
|
main(real_file, fake_file, num_chars)
|