Welcome to OStack Knowledge Sharing Community for programmer and developer-Open, Learning and Share
Welcome To Ask or Share your Answers For Others

Categories

0 votes
232 views
in Technique[技术] by (71.8m points)

python - numba.njit code runs faster than numba.cuda.jit

I am new to cuda and numba. My first goal was to write a hash-cracker on my gpu RTX 2070S. That didn't worked for me at first, so I wrote it for my cpu Intel i7 10700kf. Later I read some numpy documentation and rewrote the code for my gpu. Now when I let the two scripts compete, the cpu code generates the password lists much faster, probably because of the transformation from hash to array. Can someone who is more advanced help me optimize the code for the gpu.

# cpu.py

import gc
import hashlib
import itertools
import time
from concurrent.futures import ThreadPoolExecutor
import numpy as np
from numba import njit


@njit
def cpu_kernel(hashed: np.ndarray, un_hashed: np.ndarray, md5: str):
    for i in range(hashed.size):
        if hashed[i] == md5:
            return un_hashed[i]
    return None


def get_password_possibilities(signs_length, min_length, max_length):
    c = 1
    for value in range(min_length, max_length + 1):
        val = 1
        for i in range(value):
            val *= signs_length
        c += val
    return c - 1


def generate_password_list(signs, min_len, max_len):
    # print(min_len, max_len)
    hashed_pwds, normal_pwds = [], []
    counter = -1
    for i in range(signs, signs + 1):
        for c in map(''.join, itertools.product(LETTERS, repeat=i)):
            counter += 1
            if counter > max_len:
                break
            if counter >= min_len:
                hashed_pwds.append(hashlib.md5(c.encode("utf-8")).hexdigest())

    counter = -1
    for i in range(signs, signs + 1):
        for c in map(''.join, itertools.product(LETTERS, repeat=i)):
            counter += 1
            if counter > max_len:
                break
            if counter >= min_len:
                normal_pwds.append(c)

    return normal_pwds, hashed_pwds


def try_crack(passwords, hashs, search):
    passwords_hashed = np.array(hashs)
    passwords = np.array(passwords)
    # print(f"Started cracking '{hash_to_crack}' with {current_length} signs")

    result = cpu_kernel(passwords_hashed, passwords, search)
    del passwords_hashed, passwords
    gc.collect()
    if result is not None:
        print(f"Successfully cracked {hash_to_crack}, {result} in {time.time() - find_timer} seconds")
        exit()


LETTERS = "abcdefghijklmnopqrstuvwxyz"
hash_to_crack = hashlib.md5("sabine".encode("utf-8")).hexdigest()
print(hash_to_crack)
current_length = 1
pwd_amount = 10750000
find_timer = time.time()
while True:
    # print("Before", process.memory_info().rss / 1000000)
    pwd_idx = 0
    possibilities = get_password_possibilities(len(LETTERS), current_length, current_length)
    continue_to_next = False
    print(f"Checking {current_length} signs, with {(possibilities / 1000000).__round__(2)} mio possible passwords")
    sign_start = time.time()
    while not continue_to_next:
        start = time.time()
        with ThreadPoolExecutor() as executor:
            res = executor.submit(generate_password_list, current_length, pwd_idx * pwd_amount,
                                  (pwd_idx + 1) * pwd_amount)
        while not res.done():
            time.sleep(0.05)
        length = len(res.result()[0])
        print(f"Generated {length} passwords in {time.time() - start} seconds")

        try_crack(res.result()[0], res.result()[1], hash_to_crack)
        del res
        gc.collect()
        if length < pwd_amount:
            continue_to_next = True
        else:
            pwd_idx += 1
            print(f"Running batch {pwd_idx + 1}")
    print(f"Checking {current_length} signs took    {time.time() - sign_start} seconds
")
    current_length += 1
# gpu.py

import numpy as np
from numba import cuda
import hashlib
import itertools
import time
import gc
from concurrent.futures import ThreadPoolExecutor


@cuda.jit
def gpu_kernel(target: np.ndarray, hashes: np.ndarray, res: np.ndarray):
    pos = cuda.grid(1)
    for i in range(target.shape[0]):
        if target[i] == hashes[pos][i]:
            if i == target.shape[0] - 1:
                res[0] = pos
        else:
            break


def generate_password_list(signs, min_len, max_len):
    hashed_passwords, normal_passwords = [], []
    counter = -1
    for i in range(signs, signs + 1):
        for c in map(''.join, itertools.product(LETTERS, repeat=i)):
            counter += 1
            if counter > max_len:
                break
            if counter >= min_len:
                hashed_passwords.append(np.fromstring(hashlib.md5(c.encode("utf-8")).hexdigest(), dtype=np.uint8))

    counter = -1
    for i in range(signs, signs + 1):
        for c in map(''.join, itertools.product(LETTERS, repeat=i)):
            counter += 1
            if counter > max_len:
                break
            if counter >= min_len:
                normal_passwords.append(np.fromstring(c, dtype=np.uint8))
    return normal_passwords, hashed_passwords


def get_password_possibilities(signs_length, min_length, max_length):
    c = 1
    for value in range(min_length, max_length + 1):
        val = 1
        for i in range(value):
            val *= signs_length
        c += val
    return c - 1


def main():
    hash_to_crack = hashlib.md5("sabine".encode("utf-8")).hexdigest()
    hash_arr = np.fromstring(hash_to_crack, dtype=np.uint8)
    # print(hash_arr)
    current_length = 1
    pwd_amount = 10000000
    find_timer = time.time()
    while True:
        # print("Before", process.memory_info().rss / 1000000)
        pwd_idx = 0
        possibilities = get_password_possibilities(len(LETTERS), current_length, current_length)
        continue_to_next = False
        print(f"Checking {current_length} signs, with {(possibilities / 1000000).__round__(2)} mio possible passwords")
        sign_start = time.time()
        while not continue_to_next:
            start = time.time()

            with ThreadPoolExecutor() as executor:
                res = executor.submit(generate_password_list, current_length, pwd_idx * pwd_amount,
                                      (pwd_idx + 1) * pwd_amount)
            while not res.done():
                time.sleep(0.1)
            length = len(res.result()[0])
            print(f"Generated {length} passwords in {time.time() - start} seconds")

            # print([_.tobytes() for _ in passwords[-20:]])
            passwords_array = np.array(res.result()[0])  # Create array 1
            hashes_array = np.array(res.result()[1])  # Create array 2
            del res  # Delete for memory
            gc.collect()
            blocks_per_grid = (passwords_array.shape[0] // 32) + 1
            print("Started cracking")
            res = np.array([-1])
            threads_per_block = 32
            blocks_per_grid = (length // 32) + 1
            gpu_kernel[blocks_per_grid, threads_per_block](hash_arr, hashes_array, res)
            if res[0] != -1:
                print("Hash is", passwords_array[res[0]].tobytes())
                print(f"Cracking took {time.time() - find_timer}")
                exit()
            if length < pwd_amount:
                continue_to_next = True
            else:
                pwd_idx += 1
                print(f"Running batch {pwd_idx + 1}")
        print(f"Checking {current_length} signs took    {time.time() - sign_start} seconds
")
        current_length += 1


if __name__ == '__main__':
    LETTERS = "abcdefghijklmnopqrstuvwxyz"
    main()

I think the main problems on this code are, that it generates the howle list even if it only reads the last 10 mio passwords and that it runs only on one or two threads.

question from:https://stackoverflow.com/questions/66062347/numba-njit-code-runs-faster-than-numba-cuda-jit

与恶龙缠斗过久,自身亦成为恶龙;凝视深渊过久,深渊将回以凝视…
Welcome To Ask or Share your Answers For Others

1 Answer

0 votes
by (71.8m points)
Waitting for answers

与恶龙缠斗过久,自身亦成为恶龙;凝视深渊过久,深渊将回以凝视…
Welcome to OStack Knowledge Sharing Community for programmer and developer-Open, Learning and Share
Click Here to Ask a Question

...