commit 2f56f4c23ea840f9d15c43801368cf4a089efa84
Author: sophgo-forum-service <forum_service@sophgo.com>
Date:   Sun May 19 23:03:28 2024 +0800

    [feat] cnpy opensource for cv18xx soc.

    - e9d84e
This commit is contained in:
carbon
2024-05-31 11:58:25 +08:00
parent 83dc4914fe
commit 3e2ce5494f
10 changed files with 1162 additions and 0 deletions

View File

@ -20,3 +20,4 @@
| cvikernel | cvikernel | https://github.com/sophgo/cvikernel.git | sg200x-dev | 9f1f57a |
| cviruntime | cviruntime | https://github.com/sophgo/cviruntime.git | sg200x-dev | 3f49386 |
| cvimath | cvimath | https://github.com/sophgo/cvimath.git | sg200x-dev | ce8705f |
| cnpy | cnpy | https://github.com/sophgo/cnpy.git | sg200x-dev | 2f56f4c |

65
cnpy/CMakeLists.txt Normal file
View File

@ -0,0 +1,65 @@
CMAKE_MINIMUM_REQUIRED(VERSION 3.0 FATAL_ERROR)
if(COMMAND cmake_policy)
cmake_policy(SET CMP0003 NEW)
endif(COMMAND cmake_policy)
if (NOT DEFINED LLVM_MAIN_SRC_DIR)
project(CNPY)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11")
message(STATUS "CMAKE_SYSROOT ${CMAKE_SYSROOT}")
include_directories(${CMAKE_SYSROOT}/include)
include_directories(${CMAKE_INSTALL_PREFIX}/include)
link_directories(${CMAKE_INSTALL_PREFIX}/lib)
set(ENV{PKG_CONFIG_DIR} "")
set(ENV{PKG_CONFIG_LIBDIR} "${CMAKE_SYSROOT}/usr/lib/pkgconfig:${CMAKE_SYSROOT}/usr/share/pkgconfig")
set(ENV{PKG_CONFIG_SYSROOT_DIR} ${CMAKE_SYSROOT})
find_package(ZLIB)
option(ENABLE_STATIC "Build static (.a) library" ON)
add_library(cnpy SHARED "cnpy.cpp")
target_link_libraries(cnpy z)
install(TARGETS "cnpy" LIBRARY DESTINATION lib PERMISSIONS OWNER_READ OWNER_WRITE OWNER_EXECUTE GROUP_READ GROUP_EXECUTE WORLD_READ WORLD_EXECUTE)
if(ENABLE_STATIC)
add_library(cnpy-static STATIC "cnpy.cpp")
set_target_properties(cnpy-static PROPERTIES OUTPUT_NAME "cnpy")
install(TARGETS "cnpy-static" ARCHIVE DESTINATION lib)
endif(ENABLE_STATIC)
install(FILES "cnpy.h" DESTINATION include)
install(FILES "mat2npz" "npy2mat" "npz2mat" DESTINATION bin PERMISSIONS OWNER_READ OWNER_WRITE OWNER_EXECUTE GROUP_READ GROUP_EXECUTE WORLD_READ WORLD_EXECUTE)
add_executable(example1 example1.cpp)
target_link_libraries(example1 cnpy)
else()
message(STATUS "CNPY LLVM Tree Build")
add_custom_target(CNPY)
set_target_properties(CNPY PROPERTIES FOLDER Third_party)
add_dependencies(CNPY
cnpy
cnpy_example1
)
set(LLVM_OPTIONAL_SOURCES
cnpy.cpp
example1.cpp
)
find_package(ZLIB REQUIRED)
add_llvm_library(cnpy SHARED
cnpy.cpp)
target_link_libraries(cnpy ${ZLIB_LIBRARIES})
install(FILES "cnpy.h" DESTINATION include)
add_llvm_executable(cnpy_example1
example1.cpp)
target_link_libraries(cnpy_example1 PRIVATE
cnpy)
endif()

21
cnpy/LICENSE Normal file
View File

@ -0,0 +1,21 @@
The MIT License
Copyright (c) Carl Rogers, 2011
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.

55
cnpy/README.md Normal file
View File

@ -0,0 +1,55 @@
# Purpose:
NumPy offers the `save` method for easy saving of arrays into .npy and `savez` for zipping multiple .npy arrays together into a .npz file.
`cnpy` lets you read and write to these formats in C++.
The motivation comes from scientific programming where large amounts of data are generated in C++ and analyzed in Python.
Writing to .npy has the advantage of using low-level C++ I/O (fread and fwrite) for speed and binary format for size.
The .npy file header takes care of specifying the size, shape, and data type of the array, so specifying the format of the data is unnecessary.
Loading data written in numpy formats into C++ is equally simple, but requires you to type-cast the loaded data to the type of your choice.
# Installation:
Default installation directory is /usr/local.
To specify a different directory, add `-DCMAKE_INSTALL_PREFIX=/path/to/install/dir` to the cmake invocation in step 4.
1. get [cmake](www.cmake.org)
2. create a build directory, say $HOME/build
3. cd $HOME/build
4. cmake /path/to/cnpy
5. make
6. make install
# Using:
To use, `#include"cnpy.h"` in your source code. Compile the source code mycode.cpp as
```bash
g++ -o mycode mycode.cpp -L/path/to/install/dir -lcnpy -lz --std=c++11
```
# Description:
There are two functions for writing data: `npy_save` and `npz_save`.
There are 3 functions for reading:
- `npy_load` will load a .npy file.
- `npz_load(fname)` will load a .npz and return a dictionary of NpyArray structues.
- `npz_load(fname,varname)` will load and return the NpyArray for data varname from the specified .npz file.
The data structure for loaded data is below.
Data is accessed via the `data<T>()`-method, which returns a pointer of the specified type (which must match the underlying datatype of the data).
The array shape and word size are read from the npy header.
```c++
struct NpyArray {
std::vector<size_t> shape;
size_t word_size;
template<typename T> T* data();
};
```
See [example1.cpp](example1.cpp) for examples of how to use the library. example1 will also be build during cmake installation.

785
cnpy/cnpy.cpp Normal file
View File

@ -0,0 +1,785 @@
//Copyright (C) 2011 Carl Rogers
//Released under MIT License
//license available in LICENSE file, or at http://www.opensource.org/licenses/mit-license.php
#define _FILE_OFFSET_BITS 64
#define __USE_FILE_OFFSET64
#define __USE_LARGEFILE64
#define _LARGEFILE64_SOURCE
#include"cnpy.h"
#include<complex>
#include<cstdlib>
#include<algorithm>
#include<cstring>
#include<iomanip>
#include<stdint.h>
#include<stdexcept>
#include <regex>
#define ZIP64_LIMIT ((((size_t)1) << 31) - 1)
namespace cnpy {
static char BigEndianTest() {
int x = 1;
return (((char *)&x)[0]) ? '<' : '>';
}
static char map_type(const std::type_info& t)
{
if( t == typeid(float) ) return 'f';
if( t == typeid(double) ) return 'f';
if( t == typeid(long double) ) return 'f';
if( t == typeid(int) ) return 'i';
if( t == typeid(char) ) return 'i';
if( t == typeid(signed char) ) return 'i';
if( t == typeid(short) ) return 'i';
if( t == typeid(long) ) return 'i';
if( t == typeid(long long) ) return 'i';
if( t == typeid(unsigned char) ) return 'u';
if( t == typeid(unsigned short) ) return 'u';
if( t == typeid(unsigned long) ) return 'u';
if( t == typeid(unsigned long long) ) return 'u';
if( t == typeid(unsigned int) ) return 'u';
if( t == typeid(bool) ) return 'b';
if( t == typeid(std::complex<float>) ) return 'c';
if( t == typeid(std::complex<double>) ) return 'c';
if( t == typeid(std::complex<long double>) ) return 'c';
std::cout << "libcnpy error: unknown type_id "
<< t.name() << "\n";
// ref: https://itanium-cxx-abi.github.io/cxx-abi/abi.html#mangling-builtin
assert(0);
return '?';
}
template<typename T>
std::vector<char>& operator+=(std::vector<char>& lhs, const T rhs) {
//write in little endian
for(size_t byte = 0; byte < sizeof(T); byte++) {
char val = *((const char*)&rhs+byte);
lhs.push_back(val);
}
return lhs;
}
template<>
std::vector<char>& operator+=(std::vector<char>& lhs, const std::string rhs) {
lhs.insert(lhs.end(),rhs.begin(),rhs.end());
return lhs;
}
template<>
std::vector<char>& operator+=(std::vector<char>& lhs, const char* rhs) {
//write in little endian
size_t len = strlen(rhs);
lhs.reserve(len);
for(size_t byte = 0; byte < len; byte++) {
lhs.push_back(rhs[byte]);
}
return lhs;
}
std::vector<char> create_npy_header(const std::vector<size_t>& shape,
size_t word_size, char type) {
std::vector<char> dict;
dict += "{'descr': '";
dict += BigEndianTest();
dict += type;
dict += std::to_string(word_size);
dict += "', 'fortran_order': False, 'shape': (";
dict += std::to_string(shape[0]);
for(size_t i = 1;i < shape.size();i++) {
dict += ", ";
dict += std::to_string(shape[i]);
}
if(shape.size() == 1) dict += ",";
dict += "), }";
//pad with spaces so that preamble+dict is modulo 16 bytes.
//preamble is 10 bytes. dict needs to end with \n
int remainder = 16 - (10 + dict.size()) % 16;
dict.insert(dict.end(),remainder,' ');
dict.back() = '\n';
std::vector<char> header;
header += (char) 0x93;
header += "NUMPY";
header += (char) 0x01; //major version of numpy format
header += (char) 0x00; //minor version of numpy format
header += (uint16_t) dict.size();
header.insert(header.end(),dict.begin(),dict.end());
return header;
}
void parse_npy_header(unsigned char* buffer, size_t& word_size, char& type,
std::vector<size_t>& shape, bool& fortran_order) {
//std::string magic_string(buffer,6);
//uint8_t major_version = *reinterpret_cast<uint8_t*>(buffer+6);
//uint8_t minor_version = *reinterpret_cast<uint8_t*>(buffer+7);
uint16_t header_len = *reinterpret_cast<uint16_t*>(buffer+8);
std::string header(reinterpret_cast<char*>(buffer+9),header_len);
size_t loc1, loc2;
//fortran order
loc1 = header.find("fortran_order")+16;
fortran_order = (header.substr(loc1,4) == "True" ? true : false);
//shape
loc1 = header.find("(");
loc2 = header.find(")");
std::regex num_regex("[0-9][0-9]*");
std::smatch sm;
shape.clear();
std::string str_shape = header.substr(loc1+1,loc2-loc1-1);
while(std::regex_search(str_shape, sm, num_regex)) {
shape.push_back(std::stoi(sm[0].str()));
str_shape = sm.suffix().str();
}
//endian, word size, data type
//byte order code | stands for not applicable.
//not sure when this applies except for byte array
loc1 = header.find("descr")+9;
bool littleEndian = (header[loc1] == '<' || header[loc1] == '|' ? true : false);
assert(littleEndian);
type = header[loc1+1];
//assert(type == map_type(T));
std::string str_ws = header.substr(loc1+2);
loc2 = str_ws.find("'");
word_size = atoi(str_ws.substr(0,loc2).c_str());
}
void parse_npy_header(FILE* fp, size_t& word_size, char& type,
std::vector<size_t>& shape, bool& fortran_order) {
char buffer[256];
size_t res = fread(buffer,sizeof(char),11,fp);
if(res != 11)
throw std::runtime_error("parse_npy_header: failed fread");
std::string header = fgets(buffer,256,fp);
assert(header[header.size()-1] == '\n');
size_t loc1, loc2;
//fortran order
loc1 = header.find("fortran_order");
if (loc1 == std::string::npos)
throw std::runtime_error("parse_npy_header: "
"failed to find header keyword: 'fortran_order'");
loc1 += 16;
fortran_order = (header.substr(loc1,4) == "True" ? true : false);
//shape
loc1 = header.find("(");
loc2 = header.find(")");
if (loc1 == std::string::npos || loc2 == std::string::npos)
throw std::runtime_error("parse_npy_header: "
"failed to find header keyword: '(' or ')'");
std::regex num_regex("[0-9][0-9]*");
std::smatch sm;
shape.clear();
std::string str_shape = header.substr(loc1+1,loc2-loc1-1);
while(std::regex_search(str_shape, sm, num_regex)) {
shape.push_back(std::stoi(sm[0].str()));
str_shape = sm.suffix().str();
}
//endian, word size, data type
//byte order code | stands for not applicable.
//not sure when this applies except for byte array
loc1 = header.find("descr");
if (loc1 == std::string::npos)
throw std::runtime_error("parse_npy_header: "
"failed to find header keyword: 'descr'");
loc1 += 9;
bool littleEndian = (header[loc1] == '<' || header[loc1] == '|' ? true : false);
assert(littleEndian);
type = header[loc1+1];
//assert(type == map_type(T));
std::string str_ws = header.substr(loc1+2);
loc2 = str_ws.find("'");
word_size = atoi(str_ws.substr(0,loc2).c_str());
}
void parse_zip_footer(FILE* fp, uint16_t& nrecs, size_t& global_header_size,
size_t& global_header_offset) {
std::vector<char> footer(22);
fseek(fp,-22,SEEK_END);
size_t res = fread(&footer[0],sizeof(char),22,fp);
if(res != 22)
throw std::runtime_error("parse_zip_footer: failed fread");
uint16_t disk_no, disk_start, nrecs_on_disk, comment_len;
disk_no = *(uint16_t*) &footer[4];
disk_start = *(uint16_t*) &footer[6];
nrecs_on_disk = *(uint16_t*) &footer[8];
nrecs = *(uint16_t*) &footer[10];
global_header_size = *(uint32_t*) &footer[12];
global_header_offset = *(uint32_t*) &footer[16];
comment_len = *(uint16_t*) &footer[20];
assert(disk_no == 0);
assert(disk_start == 0);
assert(nrecs_on_disk == nrecs);
assert(comment_len == 0);
if (global_header_offset >= 0xFFFFFFFF) {
//get global header offset from extra data
std::vector<char> zip64endrec_header(56);
fseek(fp,-98,SEEK_END);
size_t res = fread(&zip64endrec_header[0],sizeof(char),56,fp);
global_header_offset = *(uint64_t*) &zip64endrec_header[48];
}
}
template<typename T>
void npy_save(std::string fname, const T* data,
const std::vector<size_t> shape, std::string mode) {
FILE* fp = NULL;
//if appending, the shape of existing + new data
std::vector<size_t> true_data_shape;
if(mode == "a") fp = fopen(fname.c_str(),"r+b");
if(fp) {
//file exists. we need to append to it. read the header, modify the array size
size_t word_size;
char type;
bool fortran_order;
parse_npy_header(fp,word_size,type,true_data_shape,fortran_order);
assert(!fortran_order);
if(word_size != sizeof(T)) {
std::cout << "libnpy error: " << fname << " has word size "
<< word_size << " but npy_save appending data sized "
<< sizeof(T) << "\n";
assert( word_size == sizeof(T) );
}
if(true_data_shape.size() != shape.size()) {
std::cout << "libnpy error: npy_save attempting to append "
<< "misdimensioned data to " << fname << "\n";
assert(true_data_shape.size() != shape.size());
}
for(size_t i = 1; i < shape.size(); i++) {
if(shape[i] != true_data_shape[i]) {
std::cout << "libnpy error: npy_save attempting to append "
<< "misshaped data to " << fname << "\n";
assert(shape[i] == true_data_shape[i]);
}
}
true_data_shape[0] += shape[0];
}
else {
fp = fopen(fname.c_str(),"wb");
true_data_shape = shape;
}
size_t word_size = sizeof(T);
char type = map_type(typeid(T));
std::vector<char> header = create_npy_header(true_data_shape, word_size, type);
size_t nels = std::accumulate(shape.begin(),shape.end(),1,std::multiplies<size_t>());
fseek(fp,0,SEEK_SET);
fwrite(&header[0],sizeof(char),header.size(),fp);
fseek(fp,0,SEEK_END);
fwrite(data,sizeof(T),nels,fp);
fclose(fp);
}
template void npy_save<std::complex<double> >(std::string,
const std::complex<double>*,
const std::vector<size_t>, std::string);
template void npy_save<double>(std::string, const double*,
const std::vector<size_t>, std::string);
template void npy_save<char>(std::string, const char*,
const std::vector<size_t>, std::string);
template<typename T>
void npy_save(std::string fname, const std::vector<T> data,
std::string mode) {
std::vector<size_t> shape;
shape.push_back(data.size());
npy_save<T>(fname, &data[0], shape, mode);
}
template<typename T>
void npz_save(std::string zipname, std::string fname,
const T* data, const std::vector<size_t>& shape,
std::string mode) {
//first, append a .npy to the fname
fname += ".npy";
//now, on with the show
FILE* fp = NULL;
uint16_t nrecs = 0;
size_t global_header_offset = 0;
std::vector<char> global_header;
if(mode == "a") fp = fopen(zipname.c_str(),"r+b");
if(fp) {
//zip file exists. we need to add a new npy file to it.
//first read the footer.
//this gives us the offset and size of the global header
//then read and store the global header.
//below, we will write the the new data at the start of the global
//header then append the global header and footer below it
size_t global_header_size;
parse_zip_footer(fp,nrecs,global_header_size,global_header_offset);
fseek(fp,global_header_offset,SEEK_SET);
global_header.resize(global_header_size);
size_t res = fread(&global_header[0],sizeof(char),global_header_size,fp);
if(res != global_header_size){
throw std::runtime_error("npz_save: "
"header read error while adding to existing zip");
}
fseek(fp,global_header_offset,SEEK_SET);
}
else {
fp = fopen(zipname.c_str(),"wb");
}
size_t word_size = sizeof(T);
char type = map_type(typeid(T));
std::vector<char> npy_header;
if(shape.size() != 0){
npy_header = create_npy_header(shape, word_size, type);
}else{
std::cerr << "[Warning] zip name: " << fname <<" npz shape size is 0, skip it\n";
fclose(fp);
return;
}
size_t nels = std::accumulate(shape.begin(),shape.end(),1,std::multiplies<size_t>());
size_t nbytes = nels*sizeof(T) + npy_header.size();
//get the CRC of the data to be added
uint32_t crc = crc32(0L,(uint8_t*)&npy_header[0],npy_header.size());
crc = crc32(crc,(const uint8_t*)data,nels*sizeof(T));
//build the local header
std::vector<char> local_header;
local_header += "PK"; //first part of sig
local_header += (uint16_t) 0x0403; //second part of sig
local_header += (uint16_t) 20; //min version to extract
local_header += (uint16_t) 0; //general purpose bit flag
local_header += (uint16_t) 0; //compression method
local_header += (uint16_t) 0; //file last mod time
local_header += (uint16_t) 0; //file last mod date
local_header += (uint32_t) crc; //crc
local_header += (uint32_t) nbytes; //compressed size
local_header += (uint32_t) nbytes; //uncompressed size
local_header += (uint16_t) fname.size(); //fname length
local_header += (uint16_t) 0; //extra field length
local_header += fname;
fwrite(&local_header[0],sizeof(char),local_header.size(),fp);
fwrite(&npy_header[0],sizeof(char),npy_header.size(),fp);
fwrite(data,sizeof(T),nels,fp);
/*
Only support global_header_offset is larger than ZIP64_LIMIT.
Not support size is larger than ZIP64_LIMIT now.
*/
if (global_header_offset + nbytes + local_header.size() >= ZIP64_LIMIT) {
//structCentralDir = "<4s4B4HL2L5H2L"
//centdir = struct.pack(structCentralDir,
//stringCentralDir, create_version,
//zinfo.create_system, extract_version, zinfo.reserved,
//flag_bits, zinfo.compress_type, dostime, dosdate,
//zinfo.CRC, compress_size, file_size,
//len(filename), len(extra_data), len(zinfo.comment),
//0, zinfo.internal_attr, zinfo.external_attr,
//header_offset)
//build global header
global_header += "PK"; //first part of sig
global_header += (uint16_t) 0x0201; //second part of sig
global_header += (uint8_t) 45; //create_version
global_header += (uint8_t) 3; //zinfo.create_system
global_header += (uint8_t) 45; //extract_version
global_header += (uint8_t) 0; //zinfo.reserved
global_header.insert(global_header.end(),local_header.begin()+6,
local_header.begin()+28);
global_header += (uint16_t) 12; //extran data length
global_header += (uint16_t) 0; //file comment length
global_header += (uint16_t) 0; //disk number where file starts
global_header += (uint16_t) 0; //internal file attributes
global_header += (uint32_t) 0; //external file attributes
//relative offset of local file header
//since it begins where the global header used to begin
global_header += (uint32_t) 0xFFFFFFFF ; //global_header_offset;
global_header += fname;
// Append a ZIP64 field to the extra's
// extra_data = struct.pack(
// '<HH' + 'Q'*len(extra),
// 1, 8*len(extra), *extra) + extra_data
// extract_version = max(45, zinfo.extract_version)
// create_version = max(45, zinfo.create_version)
global_header += (uint16_t) 0x01;
global_header += (uint16_t) 0x08;
global_header += (uint64_t) global_header_offset;
} else {
//build global header
global_header += "PK"; //first part of sig
global_header += (uint16_t) 0x0201; //second part of sig
global_header += (uint16_t) 20; //version made by
global_header.insert(global_header.end(),local_header.begin()+4,
local_header.begin()+30);
global_header += (uint16_t) 0; //file comment length
global_header += (uint16_t) 0; //disk number where file starts
global_header += (uint16_t) 0; //internal file attributes
global_header += (uint32_t) 0; //external file attributes
//relative offset of local file header
//since it begins where the global header used to begin
global_header += (uint32_t) global_header_offset;
global_header += fname;
}
fwrite(&global_header[0],sizeof(char),global_header.size(),fp);
if (global_header_offset >= ZIP64_LIMIT) {
//structEndArchive64 = "<4sQ2H2L4Q"
//zip64endrec = struct.pack(
// structEndArchive64, stringEndArchive64,
// 44, 45, 45, 0, 0, centDirCount, centDirCount,
// centDirSize, centDirOffset)
//self.fp.write(zip64endrec)
std::vector<char> zip64endrec_header;
zip64endrec_header += "PK";
zip64endrec_header += (uint16_t) 0x0606;
zip64endrec_header += (uint64_t) 0x44;
zip64endrec_header += (uint16_t) 0x45;
zip64endrec_header += (uint16_t) 0x45;
zip64endrec_header += (uint32_t) 0x0;
zip64endrec_header += (uint32_t) 0x0;
zip64endrec_header += (uint64_t) (nrecs+1); //centDirCount
zip64endrec_header += (uint64_t) (nrecs+1); //centDirCount
zip64endrec_header += (uint64_t) global_header.size(); //centDirSize
zip64endrec_header += (uint64_t) global_header_offset + nbytes + local_header.size(); //centDirOffset
fwrite(&zip64endrec_header[0],sizeof(char),zip64endrec_header.size(),fp);
//structEndArchive64Locator = "<4sLQL"
//zip64locrec = struct.pack(
// structEndArchive64Locator,
// stringEndArchive64Locator, 0, pos2, 1)
//self.fp.write(zip64locrec)
std::vector<char> zip64locrec_header;
zip64locrec_header += "PK";
zip64locrec_header += (uint16_t) 0x0706;
zip64locrec_header += (uint32_t) 0x0;
zip64locrec_header += (uint64_t) global_header_offset + nbytes + local_header.size() +
zip64endrec_header.size(); // zip64endrec_header offset
zip64locrec_header += (uint32_t) 0x1;
fwrite(&zip64locrec_header[0],sizeof(char),zip64locrec_header.size(),fp);
}
//build footer
std::vector<char> footer;
footer += "PK"; //first part of sig
footer += (uint16_t) 0x0605; //second part of sig
footer += (uint16_t) 0; //number of this disk
footer += (uint16_t) 0; //disk where footer starts
footer += (uint16_t) (nrecs+1); //number of records on this disk
footer += (uint16_t) (nrecs+1); //total number of records
footer += (uint32_t) global_header.size(); //nbytes of global headers
//offset of start of global headers
//since global header now starts after newly written array
footer += (global_header_offset >= ZIP64_LIMIT) ?
(uint32_t) 0xFFFFFFFF : (uint32_t) (global_header_offset + nbytes + local_header.size());
footer += (uint16_t) 0; //zip file comment length
fwrite(&footer[0],sizeof(char),footer.size(),fp);
fclose(fp);
}
template void npz_save<std::complex<double> >(std::string, std::string,
const std::complex<double>*, const std::vector<size_t>&,
std::string);
template void npz_save<double>(std::string, std::string,
const double*, const std::vector<size_t>&, std::string);
template void npz_save<char>(std::string, std::string,
const char*, const std::vector<size_t>&, std::string);
template<typename T>
void npz_save(std::string zipname, std::string fname,
const std::vector<T> &data, std::string mode) {
std::vector<size_t> shape;
shape.push_back(data.size());
npz_save(zipname, fname, &data[0], shape, mode);
}
template<typename T>
void npz_save(std::string zipname, std::string fname,
NpyArray &array, std::string mode) {
npz_save<T>(zipname, fname, array.data<T>(), array.shape, mode);
}
template<typename T>
void npz_add_array(npz_t &map, std::string fname,
const T* data, const std::vector<size_t> shape) {
size_t word_size = sizeof(T);
char type = map_type(typeid(T));
bool fortran_order = false;
NpyArray array(shape, word_size, type, fortran_order);
memcpy(array.data<unsigned char>(), data, array.num_bytes());
map[fname] = array;
}
template void npz_add_array<std::complex<double> >(npz_t &, std::string,
const std::complex<double>*, const std::vector<size_t>);
template void npz_add_array<float>(npz_t &, std::string,
const float*, const std::vector<size_t>);
template void npz_add_array<int8_t>(npz_t &, std::string,
const int8_t*, const std::vector<size_t>);
template void npz_add_array<uint8_t>(npz_t &, std::string,
const uint8_t*, const std::vector<size_t>);
template void npz_add_array<int16_t>(npz_t &, std::string,
const int16_t*, const std::vector<size_t>);
template void npz_add_array<uint16_t>(npz_t &, std::string,
const uint16_t*, const std::vector<size_t>);
template void npz_add_array<uint32_t>(npz_t &, std::string,
const uint32_t*, const std::vector<size_t>);
template<typename T>
void npz_add_array(npz_t &map, std::string fname,
const std::vector<T> &data) {
std::vector<size_t> shape;
shape.push_back(data.size());
npz_add_array(map, fname, &data[0], shape);
}
template void npz_add_array<std::complex<double> >(npz_t &, std::string,
const std::vector<std::complex<double> > &);
template void npz_add_array<float>(npz_t &, std::string,
const std::vector<float> &);
template void npz_add_array<int8_t>(npz_t &, std::string,
const std::vector<int8_t> &);
template void npz_add_array<int16_t>(npz_t &, std::string,
const std::vector<int16_t> &);
template void npz_add_array<uint16_t>(npz_t &, std::string,
const std::vector<uint16_t> &);
void npz_save_all(std::string zipname, npz_t &map) {
for (auto it = map.begin(); it != map.end(); it++) {
std::string mode = (it == map.begin()) ? "w" : "a";
NpyArray &arr = it->second;
if (arr.type == 'f') {
// support float only for now
assert(arr.word_size = sizeof(float));
npz_save<float>(zipname, it->first, it->second, mode);
} else if (arr.type == 'i') {
// support int8/int16 only
if (arr.word_size == sizeof(int8_t)) {
npz_save<int8_t>(zipname, it->first, it->second, mode);
} else if (arr.word_size == sizeof(int16_t)) {
npz_save<int16_t>(zipname, it->first, it->second, mode);
} else {
assert(0);
}
} else if (arr.type == 'u') {
// support uint8/uint16/uint32
if (arr.word_size == sizeof(uint8_t)) {
npz_save<uint8_t>(zipname, it->first, it->second, mode);
} else if (arr.word_size == sizeof(uint16_t)) {
npz_save<uint16_t>(zipname, it->first, it->second, mode);
} else if (arr.word_size == sizeof(uint32_t)) {
npz_save<uint32_t>(zipname, it->first, it->second, mode);
} else {
assert(0);
}
} else if (arr.type == 'b') {
// not support yet
assert(0);
} else if (arr.type == 'c') {
// not support yet
assert(0);
} else {
// invalid type
std::cout << "libcnpy error: invalid array type "
<< arr.type << ", for " << it->first << "\n";
assert(0);
}
}
}
static NpyArray load_the_npy_file(FILE* fp) {
std::vector<size_t> shape;
size_t word_size;
char type;
bool fortran_order;
parse_npy_header(fp,word_size,type,shape,fortran_order);
NpyArray arr(shape, word_size, type, fortran_order);
size_t nread = fread(arr.data<char>(),1,arr.num_bytes(),fp);
if(nread != arr.num_bytes())
throw std::runtime_error("load_the_npy_file: failed fread");
return arr;
}
static NpyArray load_the_npz_array(FILE* fp, uint32_t compr_bytes,
uint32_t uncompr_bytes) {
std::vector<unsigned char> buffer_compr(compr_bytes);
std::vector<unsigned char> buffer_uncompr(uncompr_bytes);
size_t nread = fread(&buffer_compr[0],1,compr_bytes,fp);
if(nread != compr_bytes)
throw std::runtime_error("load_the_npy_file: failed fread");
int err;
z_stream d_stream;
d_stream.zalloc = Z_NULL;
d_stream.zfree = Z_NULL;
d_stream.opaque = Z_NULL;
d_stream.avail_in = 0;
d_stream.next_in = Z_NULL;
err = inflateInit2(&d_stream, -MAX_WBITS);
assert(err = 0);
d_stream.avail_in = compr_bytes;
d_stream.next_in = &buffer_compr[0];
d_stream.avail_out = uncompr_bytes;
d_stream.next_out = &buffer_uncompr[0];
err = inflate(&d_stream, Z_FINISH);
assert(err = 0);
err = inflateEnd(&d_stream);
assert(err = 0);
std::vector<size_t> shape;
size_t word_size;
char type;
bool fortran_order;
parse_npy_header(&buffer_uncompr[0],word_size,type,shape,fortran_order);
NpyArray array(shape, word_size, type, fortran_order);
size_t offset = uncompr_bytes - array.num_bytes();
memcpy(array.data<unsigned char>(),&buffer_uncompr[0]+offset,array.num_bytes());
return array;
}
npz_t npz_load(std::string fname) {
npz_t arrays;
arrays.clear();
FILE* fp = fopen(fname.c_str(),"rb");
if(!fp) {
//throw std::runtime_error("npz_load: Error! Unable to open file "+fname+"!");
return arrays;
}
while(1) {
std::vector<char> local_header(30);
size_t headerres = fread(&local_header[0],sizeof(char),30,fp);
if(headerres != 30)
break;
//if we've reached the global header, stop reading
if(local_header[2] != 0x03 || local_header[3] != 0x04) break;
//read in the variable name
uint16_t name_len = *(uint16_t*) &local_header[26];
std::string varname(name_len,' ');
size_t vname_res = fread(&varname[0],sizeof(char),name_len,fp);
if(vname_res != name_len)
throw std::runtime_error("npz_load: failed fread");
//erase the lagging .npy
varname.erase(varname.end()-4,varname.end());
//read in the extra field
uint16_t extra_field_len = *(uint16_t*) &local_header[28];
if(extra_field_len > 0) {
std::vector<char> buff(extra_field_len);
size_t efield_res = fread(&buff[0],sizeof(char),extra_field_len,fp);
if(efield_res != extra_field_len)
throw std::runtime_error("npz_load: failed fread");
}
uint16_t compr_method = *reinterpret_cast<uint16_t*>(&local_header[0]+8);
uint32_t compr_bytes = *reinterpret_cast<uint32_t*>(&local_header[0]+18);
uint32_t uncompr_bytes = *reinterpret_cast<uint32_t*>(&local_header[0]+22);
if(compr_method == 0) {arrays[varname] = load_the_npy_file(fp);}
else {arrays[varname] = load_the_npz_array(fp,compr_bytes,uncompr_bytes);}
}
fclose(fp);
return arrays;
}
NpyArray npz_load(std::string fname, std::string varname) {
FILE* fp = fopen(fname.c_str(),"rb");
if(!fp) throw std::runtime_error("npz_load: Unable to open file "+fname);
while(1) {
std::vector<char> local_header(30);
size_t header_res = fread(&local_header[0],sizeof(char),30,fp);
if(header_res != 30)
throw std::runtime_error("npz_load: failed fread");
//if we've reached the global header, stop reading
if(local_header[2] != 0x03 || local_header[3] != 0x04) break;
//read in the variable name
uint16_t name_len = *(uint16_t*) &local_header[26];
std::string vname(name_len,' ');
size_t vname_res = fread(&vname[0],sizeof(char),name_len,fp);
if(vname_res != name_len)
throw std::runtime_error("npz_load: failed fread");
vname.erase(vname.end()-4,vname.end()); //erase the lagging .npy
//read in the extra field
uint16_t extra_field_len = *(uint16_t*) &local_header[28];
fseek(fp,extra_field_len,SEEK_CUR); //skip past the extra field
uint16_t compr_method = *reinterpret_cast<uint16_t*>(&local_header[0]+8);
uint32_t compr_bytes = *reinterpret_cast<uint32_t*>(&local_header[0]+18);
uint32_t uncompr_bytes = *reinterpret_cast<uint32_t*>(&local_header[0]+22);
if(vname == varname) {
NpyArray array = (compr_method == 0) ? load_the_npy_file(fp)
: load_the_npz_array(fp,compr_bytes,uncompr_bytes);
fclose(fp);
return array;
}
else {
//skip past the data
uint32_t size = *(uint32_t*) &local_header[22];
fseek(fp,size,SEEK_CUR);
}
}
fclose(fp);
//if we get here, we haven't found the variable in the file
throw std::runtime_error("npz_load: Variable name "+varname+" not found in "+fname);
}
NpyArray npy_load(std::string fname) {
FILE* fp = fopen(fname.c_str(), "rb");
if(!fp) throw std::runtime_error("npy_load: Unable to open file "+fname);
NpyArray arr = load_the_npy_file(fp);
fclose(fp);
return arr;
}
} // namespace cnpy

117
cnpy/cnpy.h Normal file
View File

@ -0,0 +1,117 @@
//Copyright (C) 2011 Carl Rogers
//Released under MIT License
//license available in LICENSE file, or at http://www.opensource.org/licenses/mit-license.php
#ifndef LIBCNPY_H_
#define LIBCNPY_H_
#include<string>
#include<cstring>
#include<stdexcept>
#include<sstream>
#include<vector>
#include<cstdio>
#include<typeinfo>
#include<iostream>
#include<cassert>
#include<zlib.h>
#include<map>
#include<memory>
#include<stdint.h>
#include<numeric>
namespace cnpy {
struct NpyArray {
NpyArray(const std::vector<size_t>& _shape, size_t _word_size,
char _type, bool _fortran_order)
: shape(_shape), word_size(_word_size),
type(_type), fortran_order(_fortran_order) {
num_vals = 1;
for(size_t i = 0;i < shape.size();i++) num_vals *= shape[i];
data_holder = std::shared_ptr<std::vector<char>>(
new std::vector<char>(num_vals * word_size));
}
NpyArray() : shape(0), word_size(0), type(0), fortran_order(0), num_vals(0) {}
template<typename T>
T* data() {
return reinterpret_cast<T*>(&(*data_holder)[0]);
}
template<typename T>
const T* data() const {
return reinterpret_cast<T*>(&(*data_holder)[0]);
}
template<typename T>
std::vector<T> as_vec() const {
const T* p = data<T>();
return std::vector<T>(p, p+num_vals);
}
size_t num_bytes() const {
return data_holder->size();
}
std::shared_ptr<std::vector<char>> data_holder;
std::vector<size_t> shape;
size_t word_size;
char type;
bool fortran_order;
size_t num_vals;
};
using npz_t = std::map<std::string, NpyArray>;
std::vector<char> create_npy_header(const std::vector<size_t>& shape,
size_t word_size, char type);
void parse_npy_header(FILE* fp,size_t& word_size, char& type,
std::vector<size_t>& shape, bool& fortran_order);
void parse_npy_header(unsigned char* buffer, size_t& word_size, char& type,
std::vector<size_t>& shape, bool& fortran_order);
void parse_zip_footer(FILE* fp, uint16_t& nrecs, size_t& global_header_size,
size_t& global_header_offset);
npz_t npz_load(std::string fname);
NpyArray npz_load(std::string fname, std::string varname);
NpyArray npy_load(std::string fname);
template<typename T>
std::vector<char>& operator+=(std::vector<char>& lhs, const T rhs);
template<>
std::vector<char>& operator+=(std::vector<char>& lhs, const std::string rhs);
template<>
std::vector<char>& operator+=(std::vector<char>& lhs, const char* rhs);
template<typename T>
void npy_save(std::string fname, const T* data,
const std::vector<size_t> shape, std::string mode = "w");
template<typename T>
void npy_save(std::string fname, const std::vector<T> data,
std::string mode = "w");
template<typename T>
void npz_save(std::string zipname, std::string fname,
const T* data, const std::vector<size_t>& shape,
std::string mode = "w");
template<typename T>
void npz_save(std::string zipname, std::string fname,
const std::vector<T> &data, std::string mode = "w");
template<typename T>
void npz_save(std::string zipname, std::string fname,
NpyArray &array, std::string mode = "w");
template<typename T>
void npz_add_array(npz_t &map, std::string fname,
const T* data, const std::vector<size_t> shape);
template<typename T>
void npz_add_array(npz_t &map, std::string fname,
const std::vector<T> &data);
void npz_save_all(std::string zipname, npz_t &map);
} // namespace cnpy
#endif

70
cnpy/example1.cpp Normal file
View File

@ -0,0 +1,70 @@
#include"cnpy.h"
#include<complex>
#include<cstdlib>
#include<iostream>
#include<map>
#include<string>
const int Nx = 128;
const int Ny = 64;
const int Nz = 32;
int main()
{
//set random seed so that result is reproducible (for testing)
srand(0);
//create random data
std::vector<std::complex<double>> data(Nx*Ny*Nz);
for(int i = 0;i < Nx*Ny*Nz;i++) data[i] = std::complex<double>(rand(),rand());
//save it to file
cnpy::npy_save("arr1.npy",&data[0],{Nz,Ny,Nx},"w");
//load it into a new array
cnpy::NpyArray arr = cnpy::npy_load("arr1.npy");
std::complex<double>* loaded_data = arr.data<std::complex<double>>();
//make sure the loaded data matches the saved data
assert(arr.word_size == sizeof(std::complex<double>));
assert(arr.shape.size() == 3 && arr.shape[0] == Nz && arr.shape[1] == Ny && arr.shape[2] == Nx);
for(int i = 0; i < Nx*Ny*Nz;i++) assert(data[i] == loaded_data[i]);
//append the same data to file
//npy array on file now has shape (Nz+Nz,Ny,Nx)
cnpy::npy_save("arr1.npy",&data[0],{Nz,Ny,Nx},"a");
//now write to an npz file
//non-array variables are treated as 1D arrays with 1 element
double myVar1 = 1.2;
char myVar2 = 'a';
cnpy::npz_save("out.npz","myVar1",&myVar1,{1},"w"); //"w" overwrites any existing file
cnpy::npz_save("out.npz","myVar2",&myVar2,{1},"a"); //"a" appends to the file we created above
cnpy::npz_save("out.npz","arr1",&data[0],{Nz,Ny,Nx},"a"); //"a" appends to the file we created above
//load a single var from the npz file
cnpy::NpyArray arr2 = cnpy::npz_load("out.npz","arr1");
//load the entire npz file
cnpy::npz_t my_npz = cnpy::npz_load("out.npz");
// add a new array
std::vector<std::complex<double>> new_data(20);
for(int i = 0;i < 20;i++)
new_data[i] = std::complex<double>(i, 20 - i);
cnpy::npz_add_array<std::complex<double> >(my_npz, "new_arr", new_data);
//save the entire npz file back
// TODO: this has some problem, because npz_save_all() is assuming
// all array in same type, this is because NPArray struct does not
// convey a type info.
cnpy::npz_save_all("out_new.npz", my_npz);
//check that the loaded myVar1 matches myVar1
cnpy::NpyArray arr_mv1 = my_npz["myVar1"];
double* mv1 = arr_mv1.data<double>();
assert(arr_mv1.shape.size() == 1 && arr_mv1.shape[0] == 1);
assert(mv1[0] == myVar1);
return 0;
}

18
cnpy/mat2npz Normal file
View File

@ -0,0 +1,18 @@
#!/usr/bin/env python
import sys
from numpy import savez
from scipy.io import loadmat
assert len(sys.argv) > 1
files = sys.argv[1:]
for f in files:
mat_vars = loadmat(f)
mat_vars.pop('__version__')
mat_vars.pop('__header__')
mat_vars.pop('__globals__')
fn = f.replace('.mat','.npz')
savez(fn,**mat_vars)

15
cnpy/npy2mat Normal file
View File

@ -0,0 +1,15 @@
#!/usr/bin/env python
import sys
from numpy import load
from scipy.io import savemat
assert len(sys.argv) > 1
files = sys.argv[1:]
for f in files:
data = load(f)
fn = f.replace('.npy','')
fn = fn.replace('.','_')
savemat(fn,{fn : data})

15
cnpy/npz2mat Executable file
View File

@ -0,0 +1,15 @@
#!/usr/bin/env python
import sys
from numpy import load
from scipy.io import savemat
assert len(sys.argv) > 1
files = sys.argv[1:]
for f in files:
data = load(f)
fn = f.replace('.npz','')
fn = fn.replace('.','_') #matlab cant handle dots
savemat(fn,data)