Merge branch 'main' into dependabot/npm_and_yarn/gh-pages-6.1.1

andeplane · Oct 5, 2024 · 3e3e982 · 3e3e982
2 parents 1d470b5 + f5fad73
commit 3e3e982
Show file tree

Hide file tree

Showing 4 changed files with 221 additions and 0 deletions.
diff --git a/jupyterlite/content/lammps_logfile/File.py b/jupyterlite/content/lammps_logfile/File.py
@@ -0,0 +1,125 @@
+import numpy as np
+import pandas as pd
+from io import BytesIO, StringIO
+
+class File:
+    """Class for handling lammps log files.
+
+    Parameters
+    ----------------------
+    :param ifile: path to lammps log file 
+    :type ifile: string or file  
+
+    """
+    def __init__(self, ifile):
+        # Identifiers for places in the log file
+        self.start_thermo_strings = ["Memory usage per processor", "Per MPI rank memory allocation"]
+        self.stop_thermo_strings = ["Loop time", "ERROR"]
+        self.data_dict = {}
+        self.keywords = []
+        self.output_before_first_run = ""
+        self.partial_logs = []
+        if hasattr(ifile, "read"):
+            self.logfile = ifile
+        else:
+            self.logfile = open(ifile, 'r')
+        self.read_file_to_dict()
+
+    def read_file_to_dict(self):
+        contents = self.logfile.readlines()
+        keyword_flag = False
+        before_first_run_flag = True
+        i = 0
+        while i < len(contents):
+            line = contents[i]
+            if before_first_run_flag:
+                self.output_before_first_run += line
+
+            if keyword_flag:
+                keywords = line.split()
+                tmpString = ""
+                # Check wheter any of the thermo stop strigs are in the present line
+                while not sum([string in line for string in self.stop_thermo_strings]) >= 1:
+                    if "\n" in line:
+                        tmpString+=line
+                    i+=1
+                    if i<len(contents):
+                        line = contents[i]
+                    else:
+                        break
+                partialLogContents = pd.read_table(StringIO(tmpString), sep=r'\s+')
+
+                if (self.keywords != keywords):
+                    # If the log keyword changes, i.e. the thermo data to be outputted chages,
+                    # we flush all prevous log data. This is a limitation of this implementation. 
+                    self.flush_dict_and_set_new_keyword(keywords)
+
+                self.partial_dict = {}
+                for name in keywords:
+                    self.data_dict[name] = np.append(self.data_dict[name],partialLogContents[name])
+                    self.partial_dict[name] = np.append(np.asarray([]), partialLogContents[name])
+                self.partial_logs.append(self.partial_dict)
+                keyword_flag = False
+
+            # Check whether the string matches any of the start string identifiers
+            if sum([line.startswith(string) for string in self.start_thermo_strings]) >= 1:
+                keyword_flag = True
+                before_first_run_flag = False
+            i += 1
+
+    def flush_dict_and_set_new_keyword(self, keywords):
+        self.data_dict = {}
+        for entry in keywords:
+            self.data_dict[entry] = np.asarray([])
+        self.keywords = keywords
+
+    def get(self, entry_name, run_num=-1):
+        """Get time-series from log file by name.
+
+        Paramerers
+        --------------------
+        :param entry_name: Name of the entry, for example "Temp"
+        :type entry_name: str
+        :param run_num: Lammps simulations commonly involve several run-commands. Here you may choose what run you want the log data from. Default of :code:`-1` returns data from all runs concatenated 
+        :type run_num: int 
+        
+        If the rows in the log file changes between runs, the logs are being flushed. 
+        """
+
+        if run_num == -1:
+            if entry_name in self.data_dict.keys():
+                return self.data_dict[entry_name]
+            else:
+                return None
+        else:
+            if len(self.partial_logs) > run_num:
+                partial_log = self.partial_logs[run_num]
+                if entry_name in partial_log.keys():
+                    return partial_log[entry_name]
+                else:
+                    return None
+            else:
+                return None
+
+    def get_keywords(self, run_num=-1):
+        """Return list of available data columns in the log file."""
+        if run_num == -1:
+            return sorted(self.keywords)
+        else:
+            if len(self.partial_logs) > run_num: 
+                return sorted(list(self.partial_logs[run_num].keys()))
+            else:
+                return None
+
+    def to_exdir_group(self, name, exdirfile):
+        group = exdirfile.require_group(name)
+        for i, log in enumerate(self.partial_logs):
+            subgroup = group.require_group(str(i))
+            for key, value in log.items():
+                key = key.replace("/", ".")
+                subgroup.create_dataset(key, data=value)
+
+
+
+    def get_num_partial_logs(self):
+        return len(self.partial_logs)
diff --git a/jupyterlite/content/lammps_logfile/__init__.py b/jupyterlite/content/lammps_logfile/__init__.py
@@ -0,0 +1,3 @@
+from .File import File
+from .utils import running_mean, get_color_value, get_matlab_color
+from .cmd_interface import run
diff --git a/jupyterlite/content/lammps_logfile/cmd_interface.py b/jupyterlite/content/lammps_logfile/cmd_interface.py
@@ -0,0 +1,30 @@
+from lammps_logfile import File
+from lammps_logfile import running_mean
+import argparse
+import matplotlib.pyplot as plt
+
+def get_parser():
+    parser = argparse.ArgumentParser(description="Plot contents from lammps log files")
+    parser.add_argument("input_file", type=str, help="Lammps log file containing thermo output from lammps simulation.")
+    parser.add_argument("-x", type=str, default="Time", help="Data to plot on the first axis")
+    parser.add_argument("-y", type=str, nargs="+", help="Data to plot on the second axis. You can supply several names to get several plot lines in the same figure.")
+    parser.add_argument("-a", "--running_average", type=int, default=1, help="Optionally average over this many log entries with a running average. Some thermo properties fluctuate wildly, and often we are interested in te running average of properties like temperature and pressure.")
+    return parser
+
+def run():
+    args = get_parser().parse_args()
+    log = File(args.input_file)
+    x = log.get(args.x)
+    print(x)
+    if args.running_average >= 2:    
+            x = running_mean(x, args.running_average)
+    for y in args.y:
+        data = log.get(y)
+        print(data)
+        if args.running_average >= 2:    
+            data = running_mean(data, args.running_average)
+
+        plt.plot(x, data, label=y)
+    plt.legend()
+    plt.show()
+
diff --git a/jupyterlite/content/lammps_logfile/utils.py b/jupyterlite/content/lammps_logfile/utils.py
@@ -0,0 +1,63 @@
+import numpy as np 
+import matplotlib 
+
+def running_mean(data, N):
+    """Calculate running mean of an array-like dataset.
+
+    Parameters 
+    --------------------
+    :param data: The array 
+    :type data: 1d array-like
+    :param N: Width of the averaging window
+    :type N: int 
+
+    """
+
+    data = np.asarray(data)
+    if N == 1:
+        return data
+    else:
+        retArray = np.zeros(data.size)*np.nan
+        padL = int(N/2)
+        padR = N-padL-1
+        retArray[padL:-padR] = np.convolve(data, np.ones((N,))/N, mode='valid')
+        return retArray 
+
+def get_color_value(value, minValue, maxValue, cmap='viridis'):
+    """Get color from colormap.
+
+    Parameters
+    -----------------
+    :param value: Value used tpo get color from colormap
+    :param minValue: Minimum value in colormap. Values below this value will saturate on the lower color of the colormap.
+    :param maxValue: Maximum value in colormap. Values above this value will saturate on the upper color of the colormap.
+
+    :returns: 4-vector containing colormap values. 
+
+    This is useful if you are plotting data from several simulations, and want to color them based on some parameters changing between the simulations. For example, you may want the color to gradually change along a clormap as the temperature increases. 
+
+    """
+    diff = maxValue-minValue
+    cmap = matplotlib.cm.get_cmap(cmap)
+    rgba = cmap((value-minValue)/diff)
+    return rgba
+
+def get_matlab_color(i):
+    """Get colors from matlabs standard color order. 
+
+    Parameters
+    -------------
+    :param i: Index. Cycles with a period of 7, so calling with 1 returns the same color as calling with 8. 
+    :type i: int 
+
+    :returns: color as 3-vector 
+
+    """
+    colors = np.asarray([ [0, 0.447000000000000, 0.741000000000000],
+                          [0.850000000000000, 0.325000000000000, 0.098000000000000],
+                          [0.929000000000000, 0.694000000000000, 0.125000000000000],
+                          [0.494000000000000, 0.184000000000000, 0.556000000000000],
+                          [0.466000000000000, 0.674000000000000, 0.188000000000000],
+                          [0.301000000000000, 0.745000000000000, 0.933000000000000],
+                          [0.635000000000000, 0.078000000000000, 0.184000000000000]])
+    return colors[ i%len(colors) ]