Source code for runNMTF

import argparse

import DataLoader
import NMTF as fact
import os


[docs] def runNMTF(args): """ Runs Non-negative Matrix Tri-Factorization (NMTF) on an input dataset and saves the results. This function initializes the NMTF model using the provided arguments, loads the input data (either from a PyTorch `.pt` file or a tab-delimited text file), fits the model to the data, and saves the output to the specified directory. Parameters ---------- args : argparse.Namespace Parsed command line arguments with the following attributes: - **in_file** (str): Path to the input file (tab-delimited matrix or .pt file). - **k1** (int, optional): Dimension of the row factors. Default is -999. - **k2** (int, optional): Dimension of the column factors. Default is -999. - **lU** (float, optional): Orthogonal regularization for the U factor. Default is 0. - **lV** (float, optional): Orthogonal regularization for the V factor. Default is 0. - **aU** (float, optional): Sparsity (L1) regularization for the U factor. Default is 0. - **aV** (float, optional): Sparsity (L1) regularization for the V factor. Default is 0. - **verbose** (bool, optional): If True, print progress to the terminal. Default is False. - **seed** (int, optional): Random seed for reproducibility. Default is 1010. - **max_iter** (int, optional): Maximum number of iterations. Default is 100. - **term_tol** (float, optional): Termination tolerance for relative error change. Default is 1e-25. - **out_dir** (str, optional): Directory for saving output files. Default is '.'. - **save_clust** (bool, optional): Save cluster assignments for each iteration. Default is False. - **kill_factors** (bool, optional): Option to kill unused factors. Default is False. - **track_objective** (bool, optional): Track objective function values during training. Default is False. - **save_USV** (bool, optional): Save factorization components (U, S, V) at each iteration. Default is False. - **device** (str, optional): Compute device for PyTorch ('cuda:0', 'cuda:1', 'cpu'). Default is 'cuda:0'. - **legacy** (bool, optional): Use legacy update method for factorization. Default is False. Returns ------- None """ if args.save_USV: mod = fact.NMTF(verbose=args.verbose, max_iter=args.max_iter, seed=args.seed, term_tol=args.term_tol, max_l_u=args.lU, max_l_v=args.lV, max_a_u=args.aU, max_a_v=args.aV, k1=args.k1, k2=args.k2, save_clust=args.save_clust, track_objective=args.track_objective, kill_factors=args.kill_factors, out_path=args.out_dir, device=args.device) else: mod = fact.NMTF(verbose=args.verbose, max_iter=args.max_iter, seed=args.seed, term_tol=args.term_tol, max_l_u=args.lU, max_l_v=args.lV, max_a_u=args.aU, max_a_v=args.aV, k1=args.k1, k2=args.k2, save_clust=args.save_clust, track_objective=args.track_objective, kill_factors=args.kill_factors, device=args.device) dl = DataLoader.DataLoader(verbose=args.verbose) file_parts = os.path.splitext(args.in_file) if file_parts[1] == '.pt': X, x_shape = dl.from_pt(datafile=args.in_file) else: X, x_shape = dl.from_text(datafile=args.in_file) mod.assign_X_data(X) mod.send_to_gpu() mod.fit() mod.print_output(args.out_dir)
if __name__ == "__main__": parser = argparse.ArgumentParser( description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter) parser.add_argument('--in_file', help='tab-delimited input matrix. Or pytorch .pt file', required=True) parser.add_argument('--k1', help='lower dimension of the row factors (U).', required=False, default=-999) parser.add_argument('--k2', help='lower dimension of the column factors (V).', required=False, default=-999) parser.add_argument('--test_multiple', help="file containing test k1 and k2 in two tab delimited columns.", required=False, default='') parser.add_argument('--lU', help='Ortho regularization of U term.', required=False, default=0) parser.add_argument('--lV', help='Ortho regularization of V term.', required=False, default=0) parser.add_argument('--aU', help='Sparsity (L1) regularization of U term.', required=False, default=0) parser.add_argument('--aV', help='Sparsity (L1) regularization of V term.', required=False, default=0) parser.add_argument('--verbose', help="Print current status to terminal. (True/False).", required=False, action="store_true") parser.add_argument('--seed', help="Random seed.", required=False, default=1010) parser.add_argument('--max_iter', help="Maximum number of iterations.", required=False, default=100) parser.add_argument('--term_tol', help="Relative change in error before finish.", required=False, default=1e-25) parser.add_argument('--out_dir', help="Path to output directory", required=False, default='.') parser.add_argument('--save_clust', help="Save cluster assignments for each interation to an assignment matrix", required=False, action="store_true") parser.add_argument('--kill_factors', help="Save cluster assignments for each interation to an assignment matrix", required=False, action="store_true") parser.add_argument('--track_objective', help="Save cluster assignments for each interation to an assignment matrix", required=False, action="store_true") parser.add_argument('--save_USV', help="Save lower dimensional matrices at every iteration", required=False, action="store_true") parser.add_argument('--device', help="Select Device. Default is cuda:0. Options are cuda:0/cuda:1/cpu", required=False, default="cuda:0") parser.add_argument('--legacy', help="Use the legacy update. The new update improves selection of lU and lV.", required=False, action='store_true') args = parser.parse_args() runNMTF(args)