I have this really convenient high-level pd.DataFrame
saving function that I want to add to pandas
. How can I add this method to the pd.DataFrame
class?
def to_file(df, path, sep="\t", compression="infer", pickled="infer", verbose=False, **args):
_ , ext = os.path.splitext(path)
# Serialization
if pickled == "infer":
if ext in {".pkl", ".pgz", ".pbz2"}:
pickled = True
else:
pickled = False
# Compression
if compression == "infer":
if pickled:
if ext == ".pkl":
compression = None
if ext == ".pgz":
compression = "gzip"
if ext == ".pbz2":
compression = "bz2"
else:
compression = None
if path.endswith(".gz"):
compression = "gzip"
if path.endswith(".bz2"):
compression = "bz2"
if verbose:
print(
f"path:\t{path}",
f"sep:\t{repr(sep)}",
f"compression:\t{compression}",
f"pickled:\t{pickled}",
sep="\n",
file=sys.stderr,
)
if pickled == False:
df.to_csv(path, sep=sep, compression=compression, **args)
if pickled == True:
df.to_pickle(path, compression=compression, **args)