cuckoosandbox · cccs-kevin · Feb 4, 2020 · May 29, 2020 · Jul 3, 2020 · Jul 22, 2020
diff --git a/cuckoo/common/config.py b/cuckoo/common/config.py
@@ -267,6 +267,63 @@ class Config(object):
                 "guacd_port": Int(4822),
             },
         },
+        "az": {
+            "az": {
+                "region_name": String("earth"),
+                "group": String("malware_fighters"),
+                "subscription_id": String(
+                    "xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx"),
+                "client_id": String("xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx"),
+                "secret": String("xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"),
+                "tenant": String("xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx"),
+                "machines": List(String, "cuckoo1"),
+                "interface": String("eth1"),
+                "running_machines_gap": Int(110),
+                "vnet": String("cuckoo-vnet"),
+                "cuckoo_subnet": String("cuckoo-subnet"),
+                "environment": String("staging"),
+                "dynamic_machines_limit": Int(10),
+                "instance_type": String("average"),
+                "options": List(String, None, ",\\s"),
+                "tags": String(),
+                "resultserver_ip": String("192.168.54.111"),
+                "resultserver_port": Int(2042),
+                "guest_snapshot": List(String, None, ",\\s"),
+                "storage_account_type": String("sample-type"),
+                "initial_pool_size": Int(1),
+            },
+        },
+        "az_with_vmss": {
+            "az_with_vmss": {
+                "region_name": String("earth"),
+                "group": String("malware_fighters"),
+                "subscription_id": String(
+                    "xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx"),
+                "client_id": String("xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx"),
+                "secret": String("xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"),
+                "tenant": String("xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx"),
+                "machines": List(String, "cuckoo1"),
+                "interface": String("eth1"),
+                "vnet": String("cuckoo-vnet"),
+                "subnet": String("cuckoo-subnet"),
+                "environment": String("staging"),
+                "machine_pool_limit": Int(10),
+                "total_machines_limit": Int(50),
+                "instance_type": String("average"),
+                "options": List(String, None, ",\\s"),
+                "tags": String(),
+                "resultserver_ip": String("192.168.54.111"),
+                "resultserver_port": Int(2042),
+                "gallery_name": String("Louvre"),
+                "gallery_image_names": List(String, None, ",\\s"),
+                "storage_account_type": String("sample-type"),
+                "initial_pool_size": Int(1),
+                "supported_os_tags": List(String, None, ",\\s"),
+                "overprovision": Int(0),
+                "wait_time_to_reimage": Int(30),
+                "spot_instances": Boolean(False),
+            },
+        },
         "virtualbox": {
             "virtualbox": {
                 "mode": String("headless"),

diff --git a/cuckoo/core/database.py b/cuckoo/core/database.py
@@ -19,6 +19,7 @@
 from cuckoo.common.utils import Singleton, classlock, json_encode, parse_bool
 from cuckoo.misc import cwd, format_command
 
+from psycopg2.errors import UniqueViolation
 from sqlalchemy import create_engine, Column, not_, func
 from sqlalchemy import Integer, String, Boolean, DateTime, Enum
 from sqlalchemy import ForeignKey, Text, Index, Table, TypeDecorator
@@ -1063,7 +1064,7 @@ def add(self, obj, timeout=0, package="", options="", priority=1,
 
             try:
                 session.commit()
-            except IntegrityError:
+            except (IntegrityError, UniqueViolation):
                 session.rollback()
                 try:
                     sample = session.query(Sample).filter_by(md5=obj.get_md5()).first()

diff --git a/cuckoo/core/guest.py b/cuckoo/core/guest.py
@@ -341,7 +341,7 @@ def post(self, method, *args, **kwargs):
 
     def wait_available(self):
         """Wait until the Virtual Machine is available for usage."""
-        end = time.time() + self.timeout
+        end = time.time() + config("cuckoo:timeouts:vm_state")
 
         while db.guest_get_status(self.task_id) == "starting" and self.do_run:
             try:
@@ -351,7 +351,7 @@ def wait_available(self):
                 log.debug("%s: not ready yet", self.vmid)
             except socket.error:
                 log.debug("%s: not ready yet", self.vmid)
-                time.sleep(1)
+            time.sleep(10)
 
             if time.time() > end:
                 raise CuckooGuestCriticalTimeout(
@@ -485,7 +485,16 @@ def start_analysis(self, options, monitor):
         # Pin the Agent to our IP address so that it is not accessible by
         # other Virtual Machines etc.
         if "pinning" in features:
-            self.get("/pinning")
+            strikes = 5
+            for strike in range(strikes):
+                try:
+                    self.get("/pinning")
+                    break
+                except Exception:
+                    if strike == strikes-1:
+                        raise
+                    log.warning("Attempt #%s to pin machine %s %s" % (strike+1, self.vmid, self.ipaddr))
+                    time.sleep(30)
 
         # Obtain the environment variables.
         self.query_environ()
@@ -560,7 +569,7 @@ def wait_for_completion(self):
                 # wait for things to recover
                 log.warning(
                     "Virtual Machine /status failed. This can indicate the "
-                    "guest losing network connectivity"
+                    "guest losing network connectivity with %s" % self.vmid
                 )
                 continue
             except Exception as e:

diff --git a/cuckoo/core/scheduler.py b/cuckoo/core/scheduler.py
@@ -20,7 +20,7 @@
 )
 from cuckoo.common.objects import File
 from cuckoo.common.files import Folders
-from cuckoo.core.database import Database, TASK_COMPLETED, TASK_REPORTED
+from cuckoo.core.database import Database, TASK_COMPLETED, TASK_REPORTED, TASK_RUNNING, TASK_PENDING
 from cuckoo.core.guest import GuestManager
 from cuckoo.core.plugins import RunAuxiliary, RunProcessing
 from cuckoo.core.plugins import RunSignatures, RunReporting
@@ -162,7 +162,7 @@ def acquire_machine(self):
             # In some cases it's possible that we enter this loop without
             # having any available machines. We should make sure this is not
             # such case, or the analysis task will fail completely.
-            if not machinery.availables():
+            if not machinery.availables(label=self.task.machine, platform=self.task.platform, tags=self.task.tags):
                 machine_lock.release()
                 time.sleep(1)
                 continue
@@ -224,13 +224,6 @@ def build_options(self):
             options["timeout"] = self.cfg.timeouts.default
         else:
             options["timeout"] = self.task.timeout
-
-        # copy in other analyzer specific options, TEMPORARY (most likely)
-        vm_options = getattr(machinery.options, self.machine.name)
-        for k in vm_options:
-            if k.startswith("analyzer_"):
-                options[k] = vm_options[k]
-
         return options
 
     def route_network(self):
@@ -965,11 +958,18 @@ def _cleanup_managers(self):
                 cleaned.add(am)
         return cleaned
 
+    def _thr_periodic_log(self):
+        log.debug("# Tasks: %d; # Available Machines: %d; # Locked Machines: %d; # Total Machines: %d;",
+                  self.db.count_tasks(status=TASK_PENDING), self.db.count_machines_available(),
+                  len(self.db.list_machines(locked=True)), len(self.db.list_machines()))
+        threading.Timer(10, self._thr_periodic_log).start()
+
     def start(self):
         """Start scheduler."""
         self.initialize()
 
         log.info("Waiting for analysis tasks.")
+        self._thr_periodic_log()
 
         # Message queue with threads to transmit exceptions (used as IPC).
         errors = Queue.Queue()
@@ -978,27 +978,12 @@ def start(self):
         if self.maxcount is None:
             self.maxcount = self.cfg.cuckoo.max_analysis_count
 
+        launched_analysis = True
         # This loop runs forever.
         while self.running:
-            time.sleep(1)
-
-            # Run cleanup on finished analysis managers and untrack them
-            for am in self._cleanup_managers():
-                self.analysis_managers.discard(am)
-
-            # Wait until the machine lock is not locked. This is only the case
-            # when all machines are fully running, rather that about to start
-            # or still busy starting. This way we won't have race conditions
-            # with finding out there are no available machines in the analysis
-            # manager or having two analyses pick the same machine.
-            if not machine_lock.acquire(False):
-                logger(
-                    "Could not acquire machine lock",
-                    action="scheduler.machine_lock", status="busy"
-                )
-                continue
-
-            machine_lock.release()
+            if not launched_analysis:
+                time.sleep(1)
+            launched_analysis = False
 
             # If not enough free disk space is available, then we print an
             # error message and wait another round (this check is ignored
@@ -1064,28 +1049,50 @@ def start(self):
                     )
                 continue
 
-            # Fetch a pending analysis task.
-            # TODO This fixes only submissions by --machine, need to add
-            # other attributes (tags etc).
-            # TODO We should probably move the entire "acquire machine" logic
-            # from the Analysis Manager to the Scheduler and then pass the
-            # selected machine onto the Analysis Manager instance.
-            task, available = None, False
-            for machine in self.db.get_available_machines():
-                task = self.db.fetch(machine=machine.name)
-                if task:
-                    break
-
-                if machine.is_analysis():
+            # Get all tasks in the queue
+            tasks = self.db.list_tasks(status=TASK_PENDING, details=True, order_by="added_on")
+            if not tasks:
+                continue
+
+            for task in tasks:
+                # Run cleanup on finished analysis managers and untrack them
+                for am in self._cleanup_managers():
+                    self.analysis_managers.discard(am)
+
+                # Wait until the machine lock is not locked. This is only the case
+                # when all machines are fully running, rather that about to start
+                # or still busy starting. This way we won't have race conditions
+                # with finding out there are no available machines in the analysis
+                # manager or having two analyses pick the same machine.
+                if not machine_lock.acquire(False):
+                    logger(
+                        "Could not acquire machine lock",
+                        action="scheduler.machine_lock", status="busy"
+                    )
+                    continue
+
+                machine_lock.release()
+
+                available = False
+                # Note that label > platform > tags
+                if task.machine:
+                    if machinery.availables(label=task.machine):
+                        available = True
+                elif task.platform:
+                    if machinery.availables(platform=task.platform):
+                        available = True
+                elif task.tags:
+                    tag_names = [tag.name for tag in task.tags]
+                    if machinery.availables(tags=tag_names):
+                        available = True
+                else:
                     available = True
 
-            # We only fetch a new task if at least one of the available
-            # machines is not a "service" machine (again, please refer to the
-            # services auxiliary module for more information on service VMs).
-            if not task and available:
-                task = self.db.fetch(service=False)
+                if not available:
+                    continue
+
+                self.db.set_status(task.id, TASK_RUNNING)
 
-            if task:
                 log.debug("Processing task #%s", task.id)
                 self.total_analysis_count += 1
 
@@ -1094,7 +1101,7 @@ def start(self):
                 analysis.daemon = True
                 analysis.start()
                 self.analysis_managers.add(analysis)
-
+                launched_analysis = True
             # Deal with errors.
             try:
                 raise errors.get(block=False)