This commit is contained in:
2024-11-18 17:58:13 +11:00
parent 279664fc84
commit 702447b8d1
6 changed files with 223 additions and 171 deletions

View File

@@ -1,83 +1,81 @@
import sys
import time
from multiprocessing import Pool
from hashlib import sha256
import time
import sys
# Function to calculate the hash for the given modified file content
def calculate_hash(file_lines, num_chars):
return sha256("\n".join(file_lines).encode()).hexdigest()[-num_chars:]
# Function to calculate the hash for the given file content with proper newline handling
def calculate_hash(file_lines_with_newline, num_chars):
# Join file lines with '\n' to ensure correct structure
file_content = "".join(file_lines_with_newline)
return sha256(file_content.encode()).hexdigest()[-num_chars:]
# Function to generate a modified file by adding spaces based on the bit pattern
# Function to generate a modified fake file by adding spaces based on the bit pattern
def modify_and_hash(args):
real_og, fake_og, num_chars, bit_pattern = args
fake_og, num_chars, bit_pattern, real_hash = args
# Modify the real and fake files based on the bit pattern
real_modified = [
line + " " * ((bit_pattern >> idx) & 1)
for idx, line in enumerate(real_og)
]
# Modify the fake file based on the bit pattern
fake_modified = [
line + " " * ((bit_pattern >> idx) & 1)
line.rstrip() + " " * ((bit_pattern >> idx) & 1) + "\n"
for idx, line in enumerate(fake_og)
]
# Calculate hashes for both modified files
real_hash = calculate_hash(real_modified, num_chars)
# Calculate the hash for the modified fake file
fake_hash = calculate_hash(fake_modified, num_chars)
return (real_hash, fake_hash, real_modified, fake_modified)
# Return if the hashes match
return (fake_hash == real_hash, fake_hash, fake_modified)
def main(real_file, fake_file, num_chars):
# Read the original files
with open(real_file) as f:
real_og = f.read().splitlines()
with open(fake_file) as f:
fake_og = f.read().splitlines()
# Read the original real file and retain newline characters
with open(real_file, 'r') as f:
real_og = f.readlines() # This keeps newlines intact
# Read the original fake file and retain newline characters
with open(fake_file, 'r') as f:
fake_og = f.readlines() # This keeps newlines intact
all_real_hashes = {}
all_fake_hashes = {}
# Calculate the hash of the real file (unmodified)
real_hash = calculate_hash(real_og, num_chars)
print(f"Real file hash: {real_hash}")
found_collision = False
total_hashes = 0
batch_size = 100 # Number of combinations to process in parallel
start_time = time.time() # Start time to measure hashes per second
hash_counter = 0 # To track the number of hashes calculated
start_time = time.time() # Start the timer
# Use multiprocessing Pool
with Pool() as pool:
i = 0
batch_size = 100
found_collision = False
while not found_collision:
# Prepare a batch of bit patterns to process in parallel
bit_patterns = [(real_og, fake_og, num_chars, pattern) for pattern in range(i, i + batch_size)]
bit_patterns = [(fake_og, num_chars, pattern, real_hash) for pattern in range(i, i + batch_size)]
# Process the batch in parallel
results = pool.map(modify_and_hash, bit_patterns)
# Update the total count of hashes processed
total_hashes += len(results)
# Update the hash counter
hash_counter += batch_size
# Check the results for a hash collision
for real_hash, fake_hash, real_modified, fake_modified in results:
all_real_hashes[real_hash] = real_modified
all_fake_hashes[fake_hash] = fake_modified
# Display progress on the same line
elapsed_time = time.time() - start_time
sys.stdout.write(f"\rProcessed {hash_counter} hashes in {elapsed_time:.2f} seconds.")
sys.stdout.flush()
if real_hash in all_fake_hashes or fake_hash in all_real_hashes:
collision_hash = real_hash if real_hash in all_fake_hashes else fake_hash
print(f"\n[+] Collision found! {real_file}.out and {fake_file}.out have the same hash: {collision_hash}")
# Check the results to see if a collision was found
for match, fake_hash, fake_modified in results:
if match:
elapsed_time = time.time() - start_time
print(f"\nCollision found! The fake file's hash matches the real file's hash: {real_hash}")
print(f"Total hashes processed: {hash_counter} in {elapsed_time:.2f} seconds.")
with open(f"{real_file}.out", 'w') as f_out:
f_out.writelines("\n".join(all_real_hashes[collision_hash]))
# Write the modified fake file to the output
with open(f"{fake_file}.out", 'w') as f_out:
f_out.writelines("\n".join(all_fake_hashes[collision_hash]))
f_out.writelines(fake_modified)
found_collision = True
break
# Update progress every batch
elapsed_time = time.time() - start_time
hashes_per_sec = total_hashes / elapsed_time
print(f"\rProcessed {total_hashes} hashes in {elapsed_time:.2f} seconds, {hashes_per_sec:.2f} H/s", end='')
# Increment the bit pattern range
i += batch_size
if __name__ == "__main__":