"""
The objective of this script is to take samples from the dataset, submbit them to cuckoo, and then parse the results. 
"""

from clientAPI import *
from extractFeatures import *
from os import listdir, remove, mkdir
from time import sleep
import sys
import conf
from android_static import runAndroidStatic, prepareStatic

MACHINE = conf.MACHINE

DATASET = conf.D_SOURCE

families = conf.FAMILIES
collected = {}
for tag in families:
    collected[tag] = []

def log(msg):
    logfile = open("logfile.log", "a")
    logfile.write(msg + "\n")
    logfile.close()

def getAvailable(tag):
    return len(listdir(DATASET + tag + "/"))


def waitForReport(taskId):
    while getTaskState(taskId) != "reported":
        sleep(10)
    print("Report for task " + taskId + " is now ready.")
    return taskId

"""
this function should extract dynamic APIs from the report and return them into a dictionary
"""
def parseReportDynamic(taskId):
    report = retrieveReport(taskId)
    if conf.PLATFORM=="WINDOWS":
        if "behavior" in report and "apistats" in report['behavior']:
            api_count = extractDynamicAPIs(report['behavior'])
        else:
            print("Report of task " + str(taskId) + " doesn't have a 'behavior' section")
            return {}
    else:
        if 'api' in report['droidmon']:
            return report['droidmon']['api']
        else:
            print("Report of task " + str(taskId) + " doesn't have a 'api' section")
            return {}
    return api_count


def saveResults(res, tag, sample, type):
    f = open("results/" + tag + "/" + "dict_" + type + "_" + sample.split(".")[0], "w")
    f.write(str(res))
    f.close()
    return

def sendToCuckoo(source, tag, analyses, samples, limit=50, start=0, ext=""):
    if source[-1] != "/":
        source += "/"
    count = 0
    for sample in sorted(listdir(source))[start:start + limit]:
        sname = extractArchive(source + sample, ext=ext)
        if sname != "" and sname not in collected[tag]:
            count += 1
            taskId = submitSample(sname, MACHINE)
            analyses[tag].append(taskId)
            samples[tag].append(sname)

    print(str(count) + " analyses for sample " + tag + " have been submitted")
    return count

# this function starts analysis for samples of a given tag
def analyzeWithCuckoo(tag, limit, start, analyses, samples):
    # this path should be a folder, named as a family, containing zip files
    path = DATASET+ tag + "/"
    if limit <= 0:
        log("Analysis completed for " + tag)
        print("Analysis completed for " + tag)
        return
    if start + limit > getAvailable(tag):
        log("Ran out of samples for " + tag)
        print("Ran out of samples for " + tag)
        return
    count = sendToCuckoo(path, tag, analyses, samples, limit=limit, start=start)
    log(str(count) + " tasks for " + tag + " family have been sumbitted. Taking a little nap..")
    print(str(count) + " tasks for " + tag + " family have been sumbitted. Taking a little nap..")
    sleep(120)
    for id, sample in zip(analyses[tag], samples[tag]):
        #collect result
        print("Parsing report for " + sample)
        try:
            features = parseReportDynamic(waitForReport(str(id)))
        except requests.exceptions.ChunkedEncodingError as e:
            log("Got : " + str(e))
            print("Got : " + str(e))
            features = {}
        except requests.exceptions.JSONDecodeError as e:
            log("Got : " + str(e))
            print("Got : " + str(e))
            features = {}
        if len(features) > 0:
            log("Saving results for " + tag + " - " + str(id) + " : " + sample)
            print("Saving results...")
            saveResults(features, tag, sample, "dynamicAPIs")
            collected[tag].append(sample)
            #clean
        else:
            count -= 1
            print("Need a manual check on sample : " + sample)
        cleanData(str(id))
        remove(sample)
    print(str(count) + " tasks have performed correctly")
    log(str(count) + " tasks have performed correctly")

    analyses[tag] = []
    samples[tag] = []
    if count < limit:
        log("Retrying with other " + str(limit - count) + " tasks")
        print("Retrying with other " + str(limit - count) + " tasks")
        analyzeWithCuckoo(tag, limit - count, limit + start, analyses, samples)
    return 

def analyzeWithCuckooDroid(tag, analyses, samples):
    # this path should be a folder, named as a family, containing zip files
    path = DATASET + tag + "/"
    count = sendToCuckoo(path, tag, analyses, samples, limit=getAvailable(tag), ext="apk")#getAvailable(tag))
    log(str(count) + " tasks for " + tag + " family have been sumbitted. Taking a little nap..")
    print(str(count) + " tasks for " + tag + " family have been sumbitted. Taking a little nap..")
    sleep(120)
    for id, sample in zip(analyses[tag], samples[tag]):
        #collect result
        print("Parsing report for " + sample)
        try:
            features = parseReportDynamic(waitForReport(str(id)))
        except requests.exceptions.ChunkedEncodingError as e:
            log("Got : " + str(e))
            print("Got : " + str(e))
            features = {}
        except requests.exceptions.JSONDecodeError as e:
            log("Got : " + str(e))
            print("Got : " + str(e))
            features = {}
        if len(features) > 0:
            log("Saving results for " + tag + " - " + str(id) + " : " + sample)
            print("Saving results...")
            saveResults(features, tag, sample, "dynamicAPIs")
            #clean
        else:
            count -= 1
            print("Need a manual check on sample : " + sample)
            log("Need a manual check on sample : " + sample)
        cleanData(str(id))
        remove(sample)
    print(str(count) + " tasks have performed correctly")
    log(str(count) + " tasks have performed correctly")
    
    return 

#this extracts static features without using cuckoo
def analyzeStatic(tag):
    path = DATASET + tag + "/"
    e_d = {}
    api_d = {}
    for sample in listdir(path):
        sname = extractArchive(path + sample)
        if sname != "":
            e_d[sample] = extractSectionEntropy(path + sname)
            api_d[sample] = extractStaticAPIs(path + sname)
            remove(path + sname)
        saveResults(e_d, tag, sname, "entropy")
        saveResults(api_d, tag, sname, "staticAPIs")
    return 

def dumpData(last, analyses, samples):
    f = open("dump.out", "w")
    f.write(str(analyses) + "\n")
    f.write(str(samples) + "\n")
    f.close()
    files = listdir()
    for hash in samples[last]:
        if hash in files:
            remove(hash)

def getCollected(tag):
    hashes = []
    for file in listdir("results/" + tag):
        hashes.append(file.split("_")[2])
    return hashes

def analyzeW():
    #this dict contains the task ids for each family, filled when submitted
    analyses = {}
    #this is the samples list, filled when submitted
    samples = {}
    print("Creating logfile")
    f = open("logfile.log", "w")
    f.write("--- LOGS ---\n")
    f.close()
    for tag in families:
        analyses[tag] = []
        samples[tag] = []
        collected[tag] = getCollected(tag)
        print("Starting analyses of " + tag)
        log("Starting analyses of " + tag)
        try:
            if conf.MAX_SAMPLES_DW == -1:
                analyzeWithCuckoo(tag, 100 - len(collected[tag]), len(collected[tag]), analyses, samples)
            else:
                analyzeWithCuckoo(tag, conf.MAX_SAMPLES_DW - len(collected[tag]), len(collected[tag]), analyses, samples)
        except KeyboardInterrupt:
            dumpData(tag, analyses, samples)
            break
    return

def analyzeA():
    #this dict contains the task ids for each family, filled when submitted
    analyses = {}
    #this is the samples list, filled when submitted
    samples = {}
    for tag in families:
        analyses[tag] = []
        samples[tag] = []
    print("Creating logfile")
    f = open("logfile.log", "w")
    f.write("--- LOGS ---\n")
    f.close()
    for tag in analyses:
        last = tag
        print("Starting analyses of " + tag)
        log("Starting analyses of " + tag)
        try:
            analyzeWithCuckooDroid(tag, analyses, samples)
        except:
             print("Got : " + str(sys.exc_info()[0]))
             dumpData(last, analyses, samples)
    return

def prepareDyn():
    base = "results/"
    print("Creating folder " + base + "...")
    mkdir(base)
    for tag in families:
        print("Creating folder " + tag + "...")
        mkdir(base + tag)
    return

a_d = {"ANDROID":{"STATIC":runAndroidStatic, "DYNAMIC":analyzeA}, "WINDOWS":{"STATIC":analyzeStatic, "DYNAMIC":analyzeW}}
prepare_d = {"ANDROID":{"STATIC":prepareStatic, "DYNAMIC":prepareDyn}, "WINDOWS":{"STATIC":prepareStaticW, "DYNAMIC":prepareDyn}}

try:
    prepare_d[conf.PLATFORM][conf.MODE]()
    print("Result folders have been prepared")
except:
    print("Skipping result folder creation...")

a_d[conf.PLATFORM][conf.MODE]()

