from pathlib import Path

data_dir = Path("data")
smp_dir = data_dir.joinpath("samples")


class ReadFile:
    @staticmethod
    def bytes_reader(id):
        """
        Reads the bytes within a Microsoft dataset .bytes file
        :param id: file id
        :return: Byte sequence
        """
        with smp_dir.joinpath(id + ".bytes").open() as file:
            data = file.read()
        file_split = data.split()
        byte_list = []
        for element in file_split:
            if len(element) == 2 and element != "??":
                byte_list.append(element)
        return byte_list

    @staticmethod
    def asm(id):
        """
        Returns the entire .asm file in string format
        :param id: file id
        :return: .asm files in string format
        """
        with smp_dir.joinpath(id + ".asm").open(encoding="utf-8", errors="ignore") as file:
            return file.read()

    @staticmethod
    def asm_lines(id):
        """
        It reads an .asm file from the Microsoft dataset and returns the contents in the form of a list of strings,
        each containing a line from the asm file
        :param id: file id
        :return: List with lines in the file
        """
        with smp_dir.joinpath(id + ".asm").open(encoding="utf-8", errors="ignore") as file:
            return file.readlines()

    @staticmethod
    def apis(id):
        """
        Reads the api functions present in a sample of the ST-WinMal dataset
        :param id: file id
        :return: List of api functions found
        """
        with data_dir.joinpath("Andrea_Microsoft").joinpath(id + ".apis").open() as file:
            data = file.read()
        file_split = data.split("\n")
        return file_split

    @staticmethod
    def entropy(id):
        """
        Returns a list of containing each element the name of the section and its entropy
        :param id: file id
        :return: Entropy per section
        """
        sections_values = []
        with data_dir.joinpath("Andrea_Microsoft").joinpath(id + ".entropy").open() as file:
            data = file.read().replace("Section", ":").replace("b'", " ").replace("'", " ").replace(":", " ").split()
        single_element = []
        for i, elem in enumerate(data):
            if i % 2 == 0:
                single_element = [elem]
            else:
                single_element.append(elem)
                sections_values.append(single_element)
        return sections_values

    @staticmethod
    def bytes_microsoft(id):
        """
        Reads the bytes within a ST-WinMal dataset .bytes file
        :param id: file id
        :return: Byte sequence
        """
        with data_dir.joinpath("Andrea_Microsoft").joinpath(id + ".bytes").open() as file:
            data = file.read()
        file_split = data.split()
        byte_list = []
        for element in file_split:
            if len(element) == 2 and element != "??":
                byte_list.append(element)
        return byte_list
