1、使用numpy.savez()实现
相关文档:numpy.savez()
a = np.array([[2,4],[6,8],[10,12]])
d = {"first": 1, "second": "two", "third": 3}
np.savez(whatever_name.npz, a=a, d=d)
data = np.load(whatever_name.npz)
arr = data['a']
dic = data['d'].tolist()
2、使用h5py实现
相关文档:h5py
import h5py, numpy as np arr = np.random.randint(0, 10, (1000, 1000)) f = h5py.File('file.h5', 'w', libver='latest') # use 'latest' for performance dset = f.create_dataset('array', shape=(1000, 1000), data=arr, chunks=(100, 100), compression='gzip', compression_opts=9) #添加一些属性 dset.attrs['Description'] = 'Some text snippet' dset.attrs['RowIndexArray'] = np.arange(1000) #储存字典 for k, v in d.items(): f.create_dataset('dictgroup/'+str(k), data=v) #内存不足时访问方法 dictionary = f['dictgroup'] res = dictionary['my_key']
3、使用pyarrow实现
1) 安装引用
pip install pyarrow
2) 实现代码
import pyarrow as pa import pyarrow.parquet as pq import pandas as pd import numpy as np df = pd.DataFrame(np.random.normal(size=(1000, 10))) tab = pa.Table.from_pandas(df) tab = tab.replace_schema_metadata({'here' : 'it is'}) pq.write_table(tab, 'where_is_it.parq') pq.read_table('where_is_it.parq')
相关文档: