Source code for convertTextToPT

import torch
import pandas as pd
import numpy as np
import argparse 
import os


[docs] def convert_text_to_pt(args): """ Converts a delimited text file to a PyTorch tensor (.pt) file. This function reads a delimited file using pandas, converts the data to a NumPy array, then to a PyTorch tensor, and finally saves it as a `.pt` file. The output file will have the same name as the input file, but with a `.pt` extension. Parameters ---------- args : argparse.Namespace Parsed command line arguments with the following attributes: - **in_file** (str): Path to the input delimited text file. - **delimiter** (str, optional): Delimiter used in the text file. Default is tab (`'\t'`). - **header** (int or None, optional): Number of header lines before data. Default is None. - **dtype** (numpy.dtype, optional): Data type for the PyTorch tensor. Default is `np.float32`. Returns ------- None """ file_parts = os.path.splitext(args.in_file) df = pd.read_csv(args.in_file, sep=args.delimiter, header=args.header, dtype=args.dtype) df = df.to_numpy() X = torch.from_numpy(df) torch.save(X, file_parts[0] + '.pt')
if __name__ == "__name__": parser = argparse.ArgumentParser( description = __doc__, formatter_class = argparse.RawDescriptionHelpFormatter) parser.add_argument('--in_file', help="Deliminated file to convert to a .pt file", required=True) parser.add_argument('--delimiter', help="Delimiter of .txt file. Default is tab", required=False, default='\t') parser.add_argument('--header', help="Number of lines before data. Default is zero", required = False, default = None) parser.add_argument('--dtype', help="Data type for .pt file. Default is np.float32", required=False, default=np.float32) args = parser.parse_args() convert_text_to_pt(args)