9012 lines
370 KiB
Plaintext
9012 lines
370 KiB
Plaintext
This is
|
||
/ldhome/software/toolsbuild/slave2/workspace/Toolchain/release-riscv-0/build-riscv-gcc-riscv64-unknown-linux-gnu/build-riscv64-linux-x86_64/build-glibc-linux-rv64imafdcvxtheadc-lp64dv/manual/libc.info,
|
||
produced by makeinfo version 4.9 from libc.texinfo.
|
||
|
||
INFO-DIR-SECTION Software libraries
|
||
START-INFO-DIR-ENTRY
|
||
* Libc: (libc). C library.
|
||
END-INFO-DIR-ENTRY
|
||
|
||
INFO-DIR-SECTION GNU C library functions and macros
|
||
START-INFO-DIR-ENTRY
|
||
* ALTWERASE: (libc)Local Modes.
|
||
* ARGP_ERR_UNKNOWN: (libc)Argp Parser Functions.
|
||
* ARG_MAX: (libc)General Limits.
|
||
* BC_BASE_MAX: (libc)Utility Limits.
|
||
* BC_DIM_MAX: (libc)Utility Limits.
|
||
* BC_SCALE_MAX: (libc)Utility Limits.
|
||
* BC_STRING_MAX: (libc)Utility Limits.
|
||
* BRKINT: (libc)Input Modes.
|
||
* BUFSIZ: (libc)Controlling Buffering.
|
||
* CCTS_OFLOW: (libc)Control Modes.
|
||
* CHAR_BIT: (libc)Width of Type.
|
||
* CHILD_MAX: (libc)General Limits.
|
||
* CIGNORE: (libc)Control Modes.
|
||
* CLK_TCK: (libc)Processor Time.
|
||
* CLOCAL: (libc)Control Modes.
|
||
* CLOCKS_PER_SEC: (libc)CPU Time.
|
||
* COLL_WEIGHTS_MAX: (libc)Utility Limits.
|
||
* CPU_CLR: (libc)CPU Affinity.
|
||
* CPU_ISSET: (libc)CPU Affinity.
|
||
* CPU_SET: (libc)CPU Affinity.
|
||
* CPU_SETSIZE: (libc)CPU Affinity.
|
||
* CPU_ZERO: (libc)CPU Affinity.
|
||
* CREAD: (libc)Control Modes.
|
||
* CRTS_IFLOW: (libc)Control Modes.
|
||
* CS5: (libc)Control Modes.
|
||
* CS6: (libc)Control Modes.
|
||
* CS7: (libc)Control Modes.
|
||
* CS8: (libc)Control Modes.
|
||
* CSIZE: (libc)Control Modes.
|
||
* CSTOPB: (libc)Control Modes.
|
||
* DTTOIF: (libc)Directory Entries.
|
||
* E2BIG: (libc)Error Codes.
|
||
* EACCES: (libc)Error Codes.
|
||
* EADDRINUSE: (libc)Error Codes.
|
||
* EADDRNOTAVAIL: (libc)Error Codes.
|
||
* EADV: (libc)Error Codes.
|
||
* EAFNOSUPPORT: (libc)Error Codes.
|
||
* EAGAIN: (libc)Error Codes.
|
||
* EALREADY: (libc)Error Codes.
|
||
* EAUTH: (libc)Error Codes.
|
||
* EBACKGROUND: (libc)Error Codes.
|
||
* EBADE: (libc)Error Codes.
|
||
* EBADF: (libc)Error Codes.
|
||
* EBADFD: (libc)Error Codes.
|
||
* EBADMSG: (libc)Error Codes.
|
||
* EBADR: (libc)Error Codes.
|
||
* EBADRPC: (libc)Error Codes.
|
||
* EBADRQC: (libc)Error Codes.
|
||
* EBADSLT: (libc)Error Codes.
|
||
* EBFONT: (libc)Error Codes.
|
||
* EBUSY: (libc)Error Codes.
|
||
* ECANCELED: (libc)Error Codes.
|
||
* ECHILD: (libc)Error Codes.
|
||
* ECHO: (libc)Local Modes.
|
||
* ECHOCTL: (libc)Local Modes.
|
||
* ECHOE: (libc)Local Modes.
|
||
* ECHOK: (libc)Local Modes.
|
||
* ECHOKE: (libc)Local Modes.
|
||
* ECHONL: (libc)Local Modes.
|
||
* ECHOPRT: (libc)Local Modes.
|
||
* ECHRNG: (libc)Error Codes.
|
||
* ECOMM: (libc)Error Codes.
|
||
* ECONNABORTED: (libc)Error Codes.
|
||
* ECONNREFUSED: (libc)Error Codes.
|
||
* ECONNRESET: (libc)Error Codes.
|
||
* ED: (libc)Error Codes.
|
||
* EDEADLK: (libc)Error Codes.
|
||
* EDEADLOCK: (libc)Error Codes.
|
||
* EDESTADDRREQ: (libc)Error Codes.
|
||
* EDIED: (libc)Error Codes.
|
||
* EDOM: (libc)Error Codes.
|
||
* EDOTDOT: (libc)Error Codes.
|
||
* EDQUOT: (libc)Error Codes.
|
||
* EEXIST: (libc)Error Codes.
|
||
* EFAULT: (libc)Error Codes.
|
||
* EFBIG: (libc)Error Codes.
|
||
* EFTYPE: (libc)Error Codes.
|
||
* EGRATUITOUS: (libc)Error Codes.
|
||
* EGREGIOUS: (libc)Error Codes.
|
||
* EHOSTDOWN: (libc)Error Codes.
|
||
* EHOSTUNREACH: (libc)Error Codes.
|
||
* EHWPOISON: (libc)Error Codes.
|
||
* EIDRM: (libc)Error Codes.
|
||
* EIEIO: (libc)Error Codes.
|
||
* EILSEQ: (libc)Error Codes.
|
||
* EINPROGRESS: (libc)Error Codes.
|
||
* EINTR: (libc)Error Codes.
|
||
* EINVAL: (libc)Error Codes.
|
||
* EIO: (libc)Error Codes.
|
||
* EISCONN: (libc)Error Codes.
|
||
* EISDIR: (libc)Error Codes.
|
||
* EISNAM: (libc)Error Codes.
|
||
* EKEYEXPIRED: (libc)Error Codes.
|
||
* EKEYREJECTED: (libc)Error Codes.
|
||
* EKEYREVOKED: (libc)Error Codes.
|
||
* EL2HLT: (libc)Error Codes.
|
||
* EL2NSYNC: (libc)Error Codes.
|
||
* EL3HLT: (libc)Error Codes.
|
||
* EL3RST: (libc)Error Codes.
|
||
* ELIBACC: (libc)Error Codes.
|
||
* ELIBBAD: (libc)Error Codes.
|
||
* ELIBEXEC: (libc)Error Codes.
|
||
* ELIBMAX: (libc)Error Codes.
|
||
* ELIBSCN: (libc)Error Codes.
|
||
* ELNRNG: (libc)Error Codes.
|
||
* ELOOP: (libc)Error Codes.
|
||
* EMEDIUMTYPE: (libc)Error Codes.
|
||
* EMFILE: (libc)Error Codes.
|
||
* EMLINK: (libc)Error Codes.
|
||
* EMSGSIZE: (libc)Error Codes.
|
||
* EMULTIHOP: (libc)Error Codes.
|
||
* ENAMETOOLONG: (libc)Error Codes.
|
||
* ENAVAIL: (libc)Error Codes.
|
||
* ENEEDAUTH: (libc)Error Codes.
|
||
* ENETDOWN: (libc)Error Codes.
|
||
* ENETRESET: (libc)Error Codes.
|
||
* ENETUNREACH: (libc)Error Codes.
|
||
* ENFILE: (libc)Error Codes.
|
||
* ENOANO: (libc)Error Codes.
|
||
* ENOBUFS: (libc)Error Codes.
|
||
* ENOCSI: (libc)Error Codes.
|
||
* ENODATA: (libc)Error Codes.
|
||
* ENODEV: (libc)Error Codes.
|
||
* ENOENT: (libc)Error Codes.
|
||
* ENOEXEC: (libc)Error Codes.
|
||
* ENOKEY: (libc)Error Codes.
|
||
* ENOLCK: (libc)Error Codes.
|
||
* ENOLINK: (libc)Error Codes.
|
||
* ENOMEDIUM: (libc)Error Codes.
|
||
* ENOMEM: (libc)Error Codes.
|
||
* ENOMSG: (libc)Error Codes.
|
||
* ENONET: (libc)Error Codes.
|
||
* ENOPKG: (libc)Error Codes.
|
||
* ENOPROTOOPT: (libc)Error Codes.
|
||
* ENOSPC: (libc)Error Codes.
|
||
* ENOSR: (libc)Error Codes.
|
||
* ENOSTR: (libc)Error Codes.
|
||
* ENOSYS: (libc)Error Codes.
|
||
* ENOTBLK: (libc)Error Codes.
|
||
* ENOTCONN: (libc)Error Codes.
|
||
* ENOTDIR: (libc)Error Codes.
|
||
* ENOTEMPTY: (libc)Error Codes.
|
||
* ENOTNAM: (libc)Error Codes.
|
||
* ENOTRECOVERABLE: (libc)Error Codes.
|
||
* ENOTSOCK: (libc)Error Codes.
|
||
* ENOTSUP: (libc)Error Codes.
|
||
* ENOTTY: (libc)Error Codes.
|
||
* ENOTUNIQ: (libc)Error Codes.
|
||
* ENXIO: (libc)Error Codes.
|
||
* EOF: (libc)EOF and Errors.
|
||
* EOPNOTSUPP: (libc)Error Codes.
|
||
* EOVERFLOW: (libc)Error Codes.
|
||
* EOWNERDEAD: (libc)Error Codes.
|
||
* EPERM: (libc)Error Codes.
|
||
* EPFNOSUPPORT: (libc)Error Codes.
|
||
* EPIPE: (libc)Error Codes.
|
||
* EPROCLIM: (libc)Error Codes.
|
||
* EPROCUNAVAIL: (libc)Error Codes.
|
||
* EPROGMISMATCH: (libc)Error Codes.
|
||
* EPROGUNAVAIL: (libc)Error Codes.
|
||
* EPROTO: (libc)Error Codes.
|
||
* EPROTONOSUPPORT: (libc)Error Codes.
|
||
* EPROTOTYPE: (libc)Error Codes.
|
||
* EQUIV_CLASS_MAX: (libc)Utility Limits.
|
||
* ERANGE: (libc)Error Codes.
|
||
* EREMCHG: (libc)Error Codes.
|
||
* EREMOTE: (libc)Error Codes.
|
||
* EREMOTEIO: (libc)Error Codes.
|
||
* ERESTART: (libc)Error Codes.
|
||
* ERFKILL: (libc)Error Codes.
|
||
* EROFS: (libc)Error Codes.
|
||
* ERPCMISMATCH: (libc)Error Codes.
|
||
* ESHUTDOWN: (libc)Error Codes.
|
||
* ESOCKTNOSUPPORT: (libc)Error Codes.
|
||
* ESPIPE: (libc)Error Codes.
|
||
* ESRCH: (libc)Error Codes.
|
||
* ESRMNT: (libc)Error Codes.
|
||
* ESTALE: (libc)Error Codes.
|
||
* ESTRPIPE: (libc)Error Codes.
|
||
* ETIME: (libc)Error Codes.
|
||
* ETIMEDOUT: (libc)Error Codes.
|
||
* ETOOMANYREFS: (libc)Error Codes.
|
||
* ETXTBSY: (libc)Error Codes.
|
||
* EUCLEAN: (libc)Error Codes.
|
||
* EUNATCH: (libc)Error Codes.
|
||
* EUSERS: (libc)Error Codes.
|
||
* EWOULDBLOCK: (libc)Error Codes.
|
||
* EXDEV: (libc)Error Codes.
|
||
* EXFULL: (libc)Error Codes.
|
||
* EXIT_FAILURE: (libc)Exit Status.
|
||
* EXIT_SUCCESS: (libc)Exit Status.
|
||
* EXPR_NEST_MAX: (libc)Utility Limits.
|
||
* FD_CLOEXEC: (libc)Descriptor Flags.
|
||
* FD_CLR: (libc)Waiting for I/O.
|
||
* FD_ISSET: (libc)Waiting for I/O.
|
||
* FD_SET: (libc)Waiting for I/O.
|
||
* FD_SETSIZE: (libc)Waiting for I/O.
|
||
* FD_ZERO: (libc)Waiting for I/O.
|
||
* FE_SNANS_ALWAYS_SIGNAL: (libc)Infinity and NaN.
|
||
* FILENAME_MAX: (libc)Limits for Files.
|
||
* FLUSHO: (libc)Local Modes.
|
||
* FOPEN_MAX: (libc)Opening Streams.
|
||
* FP_ILOGB0: (libc)Exponents and Logarithms.
|
||
* FP_ILOGBNAN: (libc)Exponents and Logarithms.
|
||
* FP_LLOGB0: (libc)Exponents and Logarithms.
|
||
* FP_LLOGBNAN: (libc)Exponents and Logarithms.
|
||
* F_DUPFD: (libc)Duplicating Descriptors.
|
||
* F_GETFD: (libc)Descriptor Flags.
|
||
* F_GETFL: (libc)Getting File Status Flags.
|
||
* F_GETLK: (libc)File Locks.
|
||
* F_GETOWN: (libc)Interrupt Input.
|
||
* F_OFD_GETLK: (libc)Open File Description Locks.
|
||
* F_OFD_SETLK: (libc)Open File Description Locks.
|
||
* F_OFD_SETLKW: (libc)Open File Description Locks.
|
||
* F_OK: (libc)Testing File Access.
|
||
* F_SETFD: (libc)Descriptor Flags.
|
||
* F_SETFL: (libc)Getting File Status Flags.
|
||
* F_SETLK: (libc)File Locks.
|
||
* F_SETLKW: (libc)File Locks.
|
||
* F_SETOWN: (libc)Interrupt Input.
|
||
* HUGE_VAL: (libc)Math Error Reporting.
|
||
* HUGE_VALF: (libc)Math Error Reporting.
|
||
* HUGE_VALL: (libc)Math Error Reporting.
|
||
* HUGE_VAL_FN: (libc)Math Error Reporting.
|
||
* HUGE_VAL_FNx: (libc)Math Error Reporting.
|
||
* HUPCL: (libc)Control Modes.
|
||
* I: (libc)Complex Numbers.
|
||
* ICANON: (libc)Local Modes.
|
||
* ICRNL: (libc)Input Modes.
|
||
* IEXTEN: (libc)Local Modes.
|
||
* IFNAMSIZ: (libc)Interface Naming.
|
||
* IFTODT: (libc)Directory Entries.
|
||
* IGNBRK: (libc)Input Modes.
|
||
* IGNCR: (libc)Input Modes.
|
||
* IGNPAR: (libc)Input Modes.
|
||
* IMAXBEL: (libc)Input Modes.
|
||
* INADDR_ANY: (libc)Host Address Data Type.
|
||
* INADDR_BROADCAST: (libc)Host Address Data Type.
|
||
* INADDR_LOOPBACK: (libc)Host Address Data Type.
|
||
* INADDR_NONE: (libc)Host Address Data Type.
|
||
* INFINITY: (libc)Infinity and NaN.
|
||
* INLCR: (libc)Input Modes.
|
||
* INPCK: (libc)Input Modes.
|
||
* IPPORT_RESERVED: (libc)Ports.
|
||
* IPPORT_USERRESERVED: (libc)Ports.
|
||
* ISIG: (libc)Local Modes.
|
||
* ISTRIP: (libc)Input Modes.
|
||
* IXANY: (libc)Input Modes.
|
||
* IXOFF: (libc)Input Modes.
|
||
* IXON: (libc)Input Modes.
|
||
* LINE_MAX: (libc)Utility Limits.
|
||
* LINK_MAX: (libc)Limits for Files.
|
||
* L_ctermid: (libc)Identifying the Terminal.
|
||
* L_cuserid: (libc)Who Logged In.
|
||
* L_tmpnam: (libc)Temporary Files.
|
||
* MAXNAMLEN: (libc)Limits for Files.
|
||
* MAXSYMLINKS: (libc)Symbolic Links.
|
||
* MAX_CANON: (libc)Limits for Files.
|
||
* MAX_INPUT: (libc)Limits for Files.
|
||
* MB_CUR_MAX: (libc)Selecting the Conversion.
|
||
* MB_LEN_MAX: (libc)Selecting the Conversion.
|
||
* MDMBUF: (libc)Control Modes.
|
||
* MSG_DONTROUTE: (libc)Socket Data Options.
|
||
* MSG_OOB: (libc)Socket Data Options.
|
||
* MSG_PEEK: (libc)Socket Data Options.
|
||
* NAME_MAX: (libc)Limits for Files.
|
||
* NAN: (libc)Infinity and NaN.
|
||
* NCCS: (libc)Mode Data Types.
|
||
* NGROUPS_MAX: (libc)General Limits.
|
||
* NOFLSH: (libc)Local Modes.
|
||
* NOKERNINFO: (libc)Local Modes.
|
||
* NSIG: (libc)Standard Signals.
|
||
* NULL: (libc)Null Pointer Constant.
|
||
* ONLCR: (libc)Output Modes.
|
||
* ONOEOT: (libc)Output Modes.
|
||
* OPEN_MAX: (libc)General Limits.
|
||
* OPOST: (libc)Output Modes.
|
||
* OXTABS: (libc)Output Modes.
|
||
* O_ACCMODE: (libc)Access Modes.
|
||
* O_APPEND: (libc)Operating Modes.
|
||
* O_ASYNC: (libc)Operating Modes.
|
||
* O_CREAT: (libc)Open-time Flags.
|
||
* O_EXCL: (libc)Open-time Flags.
|
||
* O_EXEC: (libc)Access Modes.
|
||
* O_EXLOCK: (libc)Open-time Flags.
|
||
* O_FSYNC: (libc)Operating Modes.
|
||
* O_IGNORE_CTTY: (libc)Open-time Flags.
|
||
* O_NDELAY: (libc)Operating Modes.
|
||
* O_NOATIME: (libc)Operating Modes.
|
||
* O_NOCTTY: (libc)Open-time Flags.
|
||
* O_NOLINK: (libc)Open-time Flags.
|
||
* O_NONBLOCK: (libc)Open-time Flags.
|
||
* O_NONBLOCK: (libc)Operating Modes.
|
||
* O_NOTRANS: (libc)Open-time Flags.
|
||
* O_RDONLY: (libc)Access Modes.
|
||
* O_RDWR: (libc)Access Modes.
|
||
* O_READ: (libc)Access Modes.
|
||
* O_SHLOCK: (libc)Open-time Flags.
|
||
* O_SYNC: (libc)Operating Modes.
|
||
* O_TMPFILE: (libc)Open-time Flags.
|
||
* O_TRUNC: (libc)Open-time Flags.
|
||
* O_WRITE: (libc)Access Modes.
|
||
* O_WRONLY: (libc)Access Modes.
|
||
* PARENB: (libc)Control Modes.
|
||
* PARMRK: (libc)Input Modes.
|
||
* PARODD: (libc)Control Modes.
|
||
* PATH_MAX: (libc)Limits for Files.
|
||
* PA_FLAG_MASK: (libc)Parsing a Template String.
|
||
* PENDIN: (libc)Local Modes.
|
||
* PF_FILE: (libc)Local Namespace Details.
|
||
* PF_INET6: (libc)Internet Namespace.
|
||
* PF_INET: (libc)Internet Namespace.
|
||
* PF_LOCAL: (libc)Local Namespace Details.
|
||
* PF_UNIX: (libc)Local Namespace Details.
|
||
* PIPE_BUF: (libc)Limits for Files.
|
||
* P_tmpdir: (libc)Temporary Files.
|
||
* RAND_MAX: (libc)ISO Random.
|
||
* RE_DUP_MAX: (libc)General Limits.
|
||
* RLIM_INFINITY: (libc)Limits on Resources.
|
||
* R_OK: (libc)Testing File Access.
|
||
* SA_NOCLDSTOP: (libc)Flags for Sigaction.
|
||
* SA_ONSTACK: (libc)Flags for Sigaction.
|
||
* SA_RESTART: (libc)Flags for Sigaction.
|
||
* SEEK_CUR: (libc)File Positioning.
|
||
* SEEK_END: (libc)File Positioning.
|
||
* SEEK_SET: (libc)File Positioning.
|
||
* SIGABRT: (libc)Program Error Signals.
|
||
* SIGALRM: (libc)Alarm Signals.
|
||
* SIGBUS: (libc)Program Error Signals.
|
||
* SIGCHLD: (libc)Job Control Signals.
|
||
* SIGCLD: (libc)Job Control Signals.
|
||
* SIGCONT: (libc)Job Control Signals.
|
||
* SIGEMT: (libc)Program Error Signals.
|
||
* SIGFPE: (libc)Program Error Signals.
|
||
* SIGHUP: (libc)Termination Signals.
|
||
* SIGILL: (libc)Program Error Signals.
|
||
* SIGINFO: (libc)Miscellaneous Signals.
|
||
* SIGINT: (libc)Termination Signals.
|
||
* SIGIO: (libc)Asynchronous I/O Signals.
|
||
* SIGIOT: (libc)Program Error Signals.
|
||
* SIGKILL: (libc)Termination Signals.
|
||
* SIGLOST: (libc)Operation Error Signals.
|
||
* SIGPIPE: (libc)Operation Error Signals.
|
||
* SIGPOLL: (libc)Asynchronous I/O Signals.
|
||
* SIGPROF: (libc)Alarm Signals.
|
||
* SIGQUIT: (libc)Termination Signals.
|
||
* SIGSEGV: (libc)Program Error Signals.
|
||
* SIGSTOP: (libc)Job Control Signals.
|
||
* SIGSYS: (libc)Program Error Signals.
|
||
* SIGTERM: (libc)Termination Signals.
|
||
* SIGTRAP: (libc)Program Error Signals.
|
||
* SIGTSTP: (libc)Job Control Signals.
|
||
* SIGTTIN: (libc)Job Control Signals.
|
||
* SIGTTOU: (libc)Job Control Signals.
|
||
* SIGURG: (libc)Asynchronous I/O Signals.
|
||
* SIGUSR1: (libc)Miscellaneous Signals.
|
||
* SIGUSR2: (libc)Miscellaneous Signals.
|
||
* SIGVTALRM: (libc)Alarm Signals.
|
||
* SIGWINCH: (libc)Miscellaneous Signals.
|
||
* SIGXCPU: (libc)Operation Error Signals.
|
||
* SIGXFSZ: (libc)Operation Error Signals.
|
||
* SIG_ERR: (libc)Basic Signal Handling.
|
||
* SNAN: (libc)Infinity and NaN.
|
||
* SNANF: (libc)Infinity and NaN.
|
||
* SNANFN: (libc)Infinity and NaN.
|
||
* SNANFNx: (libc)Infinity and NaN.
|
||
* SNANL: (libc)Infinity and NaN.
|
||
* SOCK_DGRAM: (libc)Communication Styles.
|
||
* SOCK_RAW: (libc)Communication Styles.
|
||
* SOCK_RDM: (libc)Communication Styles.
|
||
* SOCK_SEQPACKET: (libc)Communication Styles.
|
||
* SOCK_STREAM: (libc)Communication Styles.
|
||
* SOL_SOCKET: (libc)Socket-Level Options.
|
||
* SSIZE_MAX: (libc)General Limits.
|
||
* STREAM_MAX: (libc)General Limits.
|
||
* SUN_LEN: (libc)Local Namespace Details.
|
||
* S_IFMT: (libc)Testing File Type.
|
||
* S_ISBLK: (libc)Testing File Type.
|
||
* S_ISCHR: (libc)Testing File Type.
|
||
* S_ISDIR: (libc)Testing File Type.
|
||
* S_ISFIFO: (libc)Testing File Type.
|
||
* S_ISLNK: (libc)Testing File Type.
|
||
* S_ISREG: (libc)Testing File Type.
|
||
* S_ISSOCK: (libc)Testing File Type.
|
||
* S_TYPEISMQ: (libc)Testing File Type.
|
||
* S_TYPEISSEM: (libc)Testing File Type.
|
||
* S_TYPEISSHM: (libc)Testing File Type.
|
||
* TMP_MAX: (libc)Temporary Files.
|
||
* TOSTOP: (libc)Local Modes.
|
||
* TZNAME_MAX: (libc)General Limits.
|
||
* VDISCARD: (libc)Other Special.
|
||
* VDSUSP: (libc)Signal Characters.
|
||
* VEOF: (libc)Editing Characters.
|
||
* VEOL2: (libc)Editing Characters.
|
||
* VEOL: (libc)Editing Characters.
|
||
* VERASE: (libc)Editing Characters.
|
||
* VINTR: (libc)Signal Characters.
|
||
* VKILL: (libc)Editing Characters.
|
||
* VLNEXT: (libc)Other Special.
|
||
* VMIN: (libc)Noncanonical Input.
|
||
* VQUIT: (libc)Signal Characters.
|
||
* VREPRINT: (libc)Editing Characters.
|
||
* VSTART: (libc)Start/Stop Characters.
|
||
* VSTATUS: (libc)Other Special.
|
||
* VSTOP: (libc)Start/Stop Characters.
|
||
* VSUSP: (libc)Signal Characters.
|
||
* VTIME: (libc)Noncanonical Input.
|
||
* VWERASE: (libc)Editing Characters.
|
||
* WCHAR_MAX: (libc)Extended Char Intro.
|
||
* WCHAR_MIN: (libc)Extended Char Intro.
|
||
* WCOREDUMP: (libc)Process Completion Status.
|
||
* WEOF: (libc)EOF and Errors.
|
||
* WEOF: (libc)Extended Char Intro.
|
||
* WEXITSTATUS: (libc)Process Completion Status.
|
||
* WIFEXITED: (libc)Process Completion Status.
|
||
* WIFSIGNALED: (libc)Process Completion Status.
|
||
* WIFSTOPPED: (libc)Process Completion Status.
|
||
* WSTOPSIG: (libc)Process Completion Status.
|
||
* WTERMSIG: (libc)Process Completion Status.
|
||
* W_OK: (libc)Testing File Access.
|
||
* X_OK: (libc)Testing File Access.
|
||
* _Complex_I: (libc)Complex Numbers.
|
||
* _Exit: (libc)Termination Internals.
|
||
* _IOFBF: (libc)Controlling Buffering.
|
||
* _IOLBF: (libc)Controlling Buffering.
|
||
* _IONBF: (libc)Controlling Buffering.
|
||
* _Imaginary_I: (libc)Complex Numbers.
|
||
* _PATH_UTMP: (libc)Manipulating the Database.
|
||
* _PATH_WTMP: (libc)Manipulating the Database.
|
||
* _POSIX2_C_DEV: (libc)System Options.
|
||
* _POSIX2_C_VERSION: (libc)Version Supported.
|
||
* _POSIX2_FORT_DEV: (libc)System Options.
|
||
* _POSIX2_FORT_RUN: (libc)System Options.
|
||
* _POSIX2_LOCALEDEF: (libc)System Options.
|
||
* _POSIX2_SW_DEV: (libc)System Options.
|
||
* _POSIX_CHOWN_RESTRICTED: (libc)Options for Files.
|
||
* _POSIX_JOB_CONTROL: (libc)System Options.
|
||
* _POSIX_NO_TRUNC: (libc)Options for Files.
|
||
* _POSIX_SAVED_IDS: (libc)System Options.
|
||
* _POSIX_VDISABLE: (libc)Options for Files.
|
||
* _POSIX_VERSION: (libc)Version Supported.
|
||
* __fbufsize: (libc)Controlling Buffering.
|
||
* __flbf: (libc)Controlling Buffering.
|
||
* __fpending: (libc)Controlling Buffering.
|
||
* __fpurge: (libc)Flushing Buffers.
|
||
* __freadable: (libc)Opening Streams.
|
||
* __freading: (libc)Opening Streams.
|
||
* __fsetlocking: (libc)Streams and Threads.
|
||
* __fwritable: (libc)Opening Streams.
|
||
* __fwriting: (libc)Opening Streams.
|
||
* __gconv_end_fct: (libc)glibc iconv Implementation.
|
||
* __gconv_fct: (libc)glibc iconv Implementation.
|
||
* __gconv_init_fct: (libc)glibc iconv Implementation.
|
||
* __ppc_get_timebase: (libc)PowerPC.
|
||
* __ppc_get_timebase_freq: (libc)PowerPC.
|
||
* __ppc_mdoio: (libc)PowerPC.
|
||
* __ppc_mdoom: (libc)PowerPC.
|
||
* __ppc_set_ppr_low: (libc)PowerPC.
|
||
* __ppc_set_ppr_med: (libc)PowerPC.
|
||
* __ppc_set_ppr_med_high: (libc)PowerPC.
|
||
* __ppc_set_ppr_med_low: (libc)PowerPC.
|
||
* __ppc_set_ppr_very_low: (libc)PowerPC.
|
||
* __ppc_yield: (libc)PowerPC.
|
||
* __riscv_flush_icache: (libc)RISC-V.
|
||
* __va_copy: (libc)Argument Macros.
|
||
* _exit: (libc)Termination Internals.
|
||
* _flushlbf: (libc)Flushing Buffers.
|
||
* _tolower: (libc)Case Conversion.
|
||
* _toupper: (libc)Case Conversion.
|
||
* a64l: (libc)Encode Binary Data.
|
||
* abort: (libc)Aborting a Program.
|
||
* abs: (libc)Absolute Value.
|
||
* accept: (libc)Accepting Connections.
|
||
* access: (libc)Testing File Access.
|
||
* acos: (libc)Inverse Trig Functions.
|
||
* acosf: (libc)Inverse Trig Functions.
|
||
* acosfN: (libc)Inverse Trig Functions.
|
||
* acosfNx: (libc)Inverse Trig Functions.
|
||
* acosh: (libc)Hyperbolic Functions.
|
||
* acoshf: (libc)Hyperbolic Functions.
|
||
* acoshfN: (libc)Hyperbolic Functions.
|
||
* acoshfNx: (libc)Hyperbolic Functions.
|
||
* acoshl: (libc)Hyperbolic Functions.
|
||
* acosl: (libc)Inverse Trig Functions.
|
||
* addmntent: (libc)mtab.
|
||
* addseverity: (libc)Adding Severity Classes.
|
||
* adjtime: (libc)High-Resolution Calendar.
|
||
* adjtimex: (libc)High-Resolution Calendar.
|
||
* aio_cancel64: (libc)Cancel AIO Operations.
|
||
* aio_cancel: (libc)Cancel AIO Operations.
|
||
* aio_error64: (libc)Status of AIO Operations.
|
||
* aio_error: (libc)Status of AIO Operations.
|
||
* aio_fsync64: (libc)Synchronizing AIO Operations.
|
||
* aio_fsync: (libc)Synchronizing AIO Operations.
|
||
* aio_init: (libc)Configuration of AIO.
|
||
* aio_read64: (libc)Asynchronous Reads/Writes.
|
||
* aio_read: (libc)Asynchronous Reads/Writes.
|
||
* aio_return64: (libc)Status of AIO Operations.
|
||
* aio_return: (libc)Status of AIO Operations.
|
||
* aio_suspend64: (libc)Synchronizing AIO Operations.
|
||
* aio_suspend: (libc)Synchronizing AIO Operations.
|
||
* aio_write64: (libc)Asynchronous Reads/Writes.
|
||
* aio_write: (libc)Asynchronous Reads/Writes.
|
||
* alarm: (libc)Setting an Alarm.
|
||
* aligned_alloc: (libc)Aligned Memory Blocks.
|
||
* alloca: (libc)Variable Size Automatic.
|
||
* alphasort64: (libc)Scanning Directory Content.
|
||
* alphasort: (libc)Scanning Directory Content.
|
||
* argp_error: (libc)Argp Helper Functions.
|
||
* argp_failure: (libc)Argp Helper Functions.
|
||
* argp_help: (libc)Argp Help.
|
||
* argp_parse: (libc)Argp.
|
||
* argp_state_help: (libc)Argp Helper Functions.
|
||
* argp_usage: (libc)Argp Helper Functions.
|
||
* argz_add: (libc)Argz Functions.
|
||
* argz_add_sep: (libc)Argz Functions.
|
||
* argz_append: (libc)Argz Functions.
|
||
* argz_count: (libc)Argz Functions.
|
||
* argz_create: (libc)Argz Functions.
|
||
* argz_create_sep: (libc)Argz Functions.
|
||
* argz_delete: (libc)Argz Functions.
|
||
* argz_extract: (libc)Argz Functions.
|
||
* argz_insert: (libc)Argz Functions.
|
||
* argz_next: (libc)Argz Functions.
|
||
* argz_replace: (libc)Argz Functions.
|
||
* argz_stringify: (libc)Argz Functions.
|
||
* asctime: (libc)Formatting Calendar Time.
|
||
* asctime_r: (libc)Formatting Calendar Time.
|
||
* asin: (libc)Inverse Trig Functions.
|
||
* asinf: (libc)Inverse Trig Functions.
|
||
* asinfN: (libc)Inverse Trig Functions.
|
||
* asinfNx: (libc)Inverse Trig Functions.
|
||
* asinh: (libc)Hyperbolic Functions.
|
||
* asinhf: (libc)Hyperbolic Functions.
|
||
* asinhfN: (libc)Hyperbolic Functions.
|
||
* asinhfNx: (libc)Hyperbolic Functions.
|
||
* asinhl: (libc)Hyperbolic Functions.
|
||
* asinl: (libc)Inverse Trig Functions.
|
||
* asprintf: (libc)Dynamic Output.
|
||
* assert: (libc)Consistency Checking.
|
||
* assert_perror: (libc)Consistency Checking.
|
||
* atan2: (libc)Inverse Trig Functions.
|
||
* atan2f: (libc)Inverse Trig Functions.
|
||
* atan2fN: (libc)Inverse Trig Functions.
|
||
* atan2fNx: (libc)Inverse Trig Functions.
|
||
* atan2l: (libc)Inverse Trig Functions.
|
||
* atan: (libc)Inverse Trig Functions.
|
||
* atanf: (libc)Inverse Trig Functions.
|
||
* atanfN: (libc)Inverse Trig Functions.
|
||
* atanfNx: (libc)Inverse Trig Functions.
|
||
* atanh: (libc)Hyperbolic Functions.
|
||
* atanhf: (libc)Hyperbolic Functions.
|
||
* atanhfN: (libc)Hyperbolic Functions.
|
||
* atanhfNx: (libc)Hyperbolic Functions.
|
||
* atanhl: (libc)Hyperbolic Functions.
|
||
* atanl: (libc)Inverse Trig Functions.
|
||
* atexit: (libc)Cleanups on Exit.
|
||
* atof: (libc)Parsing of Floats.
|
||
* atoi: (libc)Parsing of Integers.
|
||
* atol: (libc)Parsing of Integers.
|
||
* atoll: (libc)Parsing of Integers.
|
||
* backtrace: (libc)Backtraces.
|
||
* backtrace_symbols: (libc)Backtraces.
|
||
* backtrace_symbols_fd: (libc)Backtraces.
|
||
* basename: (libc)Finding Tokens in a String.
|
||
* basename: (libc)Finding Tokens in a String.
|
||
* bcmp: (libc)String/Array Comparison.
|
||
* bcopy: (libc)Copying Strings and Arrays.
|
||
* bind: (libc)Setting Address.
|
||
* bind_textdomain_codeset: (libc)Charset conversion in gettext.
|
||
* bindtextdomain: (libc)Locating gettext catalog.
|
||
* brk: (libc)Resizing the Data Segment.
|
||
* bsearch: (libc)Array Search Function.
|
||
* btowc: (libc)Converting a Character.
|
||
* bzero: (libc)Copying Strings and Arrays.
|
||
* cabs: (libc)Absolute Value.
|
||
* cabsf: (libc)Absolute Value.
|
||
* cabsfN: (libc)Absolute Value.
|
||
* cabsfNx: (libc)Absolute Value.
|
||
* cabsl: (libc)Absolute Value.
|
||
* cacos: (libc)Inverse Trig Functions.
|
||
* cacosf: (libc)Inverse Trig Functions.
|
||
* cacosfN: (libc)Inverse Trig Functions.
|
||
* cacosfNx: (libc)Inverse Trig Functions.
|
||
* cacosh: (libc)Hyperbolic Functions.
|
||
* cacoshf: (libc)Hyperbolic Functions.
|
||
* cacoshfN: (libc)Hyperbolic Functions.
|
||
* cacoshfNx: (libc)Hyperbolic Functions.
|
||
* cacoshl: (libc)Hyperbolic Functions.
|
||
* cacosl: (libc)Inverse Trig Functions.
|
||
* call_once: (libc)Call Once.
|
||
* calloc: (libc)Allocating Cleared Space.
|
||
* canonicalize: (libc)FP Bit Twiddling.
|
||
* canonicalize_file_name: (libc)Symbolic Links.
|
||
* canonicalizef: (libc)FP Bit Twiddling.
|
||
* canonicalizefN: (libc)FP Bit Twiddling.
|
||
* canonicalizefNx: (libc)FP Bit Twiddling.
|
||
* canonicalizel: (libc)FP Bit Twiddling.
|
||
* carg: (libc)Operations on Complex.
|
||
* cargf: (libc)Operations on Complex.
|
||
* cargfN: (libc)Operations on Complex.
|
||
* cargfNx: (libc)Operations on Complex.
|
||
* cargl: (libc)Operations on Complex.
|
||
* casin: (libc)Inverse Trig Functions.
|
||
* casinf: (libc)Inverse Trig Functions.
|
||
* casinfN: (libc)Inverse Trig Functions.
|
||
* casinfNx: (libc)Inverse Trig Functions.
|
||
* casinh: (libc)Hyperbolic Functions.
|
||
* casinhf: (libc)Hyperbolic Functions.
|
||
* casinhfN: (libc)Hyperbolic Functions.
|
||
* casinhfNx: (libc)Hyperbolic Functions.
|
||
* casinhl: (libc)Hyperbolic Functions.
|
||
* casinl: (libc)Inverse Trig Functions.
|
||
* catan: (libc)Inverse Trig Functions.
|
||
* catanf: (libc)Inverse Trig Functions.
|
||
* catanfN: (libc)Inverse Trig Functions.
|
||
* catanfNx: (libc)Inverse Trig Functions.
|
||
* catanh: (libc)Hyperbolic Functions.
|
||
* catanhf: (libc)Hyperbolic Functions.
|
||
* catanhfN: (libc)Hyperbolic Functions.
|
||
* catanhfNx: (libc)Hyperbolic Functions.
|
||
* catanhl: (libc)Hyperbolic Functions.
|
||
* catanl: (libc)Inverse Trig Functions.
|
||
* catclose: (libc)The catgets Functions.
|
||
* catgets: (libc)The catgets Functions.
|
||
* catopen: (libc)The catgets Functions.
|
||
* cbrt: (libc)Exponents and Logarithms.
|
||
* cbrtf: (libc)Exponents and Logarithms.
|
||
* cbrtfN: (libc)Exponents and Logarithms.
|
||
* cbrtfNx: (libc)Exponents and Logarithms.
|
||
* cbrtl: (libc)Exponents and Logarithms.
|
||
* ccos: (libc)Trig Functions.
|
||
* ccosf: (libc)Trig Functions.
|
||
* ccosfN: (libc)Trig Functions.
|
||
* ccosfNx: (libc)Trig Functions.
|
||
* ccosh: (libc)Hyperbolic Functions.
|
||
* ccoshf: (libc)Hyperbolic Functions.
|
||
* ccoshfN: (libc)Hyperbolic Functions.
|
||
* ccoshfNx: (libc)Hyperbolic Functions.
|
||
* ccoshl: (libc)Hyperbolic Functions.
|
||
* ccosl: (libc)Trig Functions.
|
||
* ceil: (libc)Rounding Functions.
|
||
* ceilf: (libc)Rounding Functions.
|
||
* ceilfN: (libc)Rounding Functions.
|
||
* ceilfNx: (libc)Rounding Functions.
|
||
* ceill: (libc)Rounding Functions.
|
||
* cexp: (libc)Exponents and Logarithms.
|
||
* cexpf: (libc)Exponents and Logarithms.
|
||
* cexpfN: (libc)Exponents and Logarithms.
|
||
* cexpfNx: (libc)Exponents and Logarithms.
|
||
* cexpl: (libc)Exponents and Logarithms.
|
||
* cfgetispeed: (libc)Line Speed.
|
||
* cfgetospeed: (libc)Line Speed.
|
||
* cfmakeraw: (libc)Noncanonical Input.
|
||
* cfsetispeed: (libc)Line Speed.
|
||
* cfsetospeed: (libc)Line Speed.
|
||
* cfsetspeed: (libc)Line Speed.
|
||
* chdir: (libc)Working Directory.
|
||
* chmod: (libc)Setting Permissions.
|
||
* chown: (libc)File Owner.
|
||
* cimag: (libc)Operations on Complex.
|
||
* cimagf: (libc)Operations on Complex.
|
||
* cimagfN: (libc)Operations on Complex.
|
||
* cimagfNx: (libc)Operations on Complex.
|
||
* cimagl: (libc)Operations on Complex.
|
||
* clearenv: (libc)Environment Access.
|
||
* clearerr: (libc)Error Recovery.
|
||
* clearerr_unlocked: (libc)Error Recovery.
|
||
* clock: (libc)CPU Time.
|
||
* clog10: (libc)Exponents and Logarithms.
|
||
* clog10f: (libc)Exponents and Logarithms.
|
||
* clog10fN: (libc)Exponents and Logarithms.
|
||
* clog10fNx: (libc)Exponents and Logarithms.
|
||
* clog10l: (libc)Exponents and Logarithms.
|
||
* clog: (libc)Exponents and Logarithms.
|
||
* clogf: (libc)Exponents and Logarithms.
|
||
* clogfN: (libc)Exponents and Logarithms.
|
||
* clogfNx: (libc)Exponents and Logarithms.
|
||
* clogl: (libc)Exponents and Logarithms.
|
||
* close: (libc)Opening and Closing Files.
|
||
* closedir: (libc)Reading/Closing Directory.
|
||
* closelog: (libc)closelog.
|
||
* cnd_broadcast: (libc)ISO C Condition Variables.
|
||
* cnd_destroy: (libc)ISO C Condition Variables.
|
||
* cnd_init: (libc)ISO C Condition Variables.
|
||
* cnd_signal: (libc)ISO C Condition Variables.
|
||
* cnd_timedwait: (libc)ISO C Condition Variables.
|
||
* cnd_wait: (libc)ISO C Condition Variables.
|
||
* confstr: (libc)String Parameters.
|
||
* conj: (libc)Operations on Complex.
|
||
* conjf: (libc)Operations on Complex.
|
||
* conjfN: (libc)Operations on Complex.
|
||
* conjfNx: (libc)Operations on Complex.
|
||
* conjl: (libc)Operations on Complex.
|
||
* connect: (libc)Connecting.
|
||
* copy_file_range: (libc)Copying File Data.
|
||
* copysign: (libc)FP Bit Twiddling.
|
||
* copysignf: (libc)FP Bit Twiddling.
|
||
* copysignfN: (libc)FP Bit Twiddling.
|
||
* copysignfNx: (libc)FP Bit Twiddling.
|
||
* copysignl: (libc)FP Bit Twiddling.
|
||
* cos: (libc)Trig Functions.
|
||
* cosf: (libc)Trig Functions.
|
||
* cosfN: (libc)Trig Functions.
|
||
* cosfNx: (libc)Trig Functions.
|
||
* cosh: (libc)Hyperbolic Functions.
|
||
* coshf: (libc)Hyperbolic Functions.
|
||
* coshfN: (libc)Hyperbolic Functions.
|
||
* coshfNx: (libc)Hyperbolic Functions.
|
||
* coshl: (libc)Hyperbolic Functions.
|
||
* cosl: (libc)Trig Functions.
|
||
* cpow: (libc)Exponents and Logarithms.
|
||
* cpowf: (libc)Exponents and Logarithms.
|
||
* cpowfN: (libc)Exponents and Logarithms.
|
||
* cpowfNx: (libc)Exponents and Logarithms.
|
||
* cpowl: (libc)Exponents and Logarithms.
|
||
* cproj: (libc)Operations on Complex.
|
||
* cprojf: (libc)Operations on Complex.
|
||
* cprojfN: (libc)Operations on Complex.
|
||
* cprojfNx: (libc)Operations on Complex.
|
||
* cprojl: (libc)Operations on Complex.
|
||
* creal: (libc)Operations on Complex.
|
||
* crealf: (libc)Operations on Complex.
|
||
* crealfN: (libc)Operations on Complex.
|
||
* crealfNx: (libc)Operations on Complex.
|
||
* creall: (libc)Operations on Complex.
|
||
* creat64: (libc)Opening and Closing Files.
|
||
* creat: (libc)Opening and Closing Files.
|
||
* crypt: (libc)Passphrase Storage.
|
||
* crypt_r: (libc)Passphrase Storage.
|
||
* csin: (libc)Trig Functions.
|
||
* csinf: (libc)Trig Functions.
|
||
* csinfN: (libc)Trig Functions.
|
||
* csinfNx: (libc)Trig Functions.
|
||
* csinh: (libc)Hyperbolic Functions.
|
||
* csinhf: (libc)Hyperbolic Functions.
|
||
* csinhfN: (libc)Hyperbolic Functions.
|
||
* csinhfNx: (libc)Hyperbolic Functions.
|
||
* csinhl: (libc)Hyperbolic Functions.
|
||
* csinl: (libc)Trig Functions.
|
||
* csqrt: (libc)Exponents and Logarithms.
|
||
* csqrtf: (libc)Exponents and Logarithms.
|
||
* csqrtfN: (libc)Exponents and Logarithms.
|
||
* csqrtfNx: (libc)Exponents and Logarithms.
|
||
* csqrtl: (libc)Exponents and Logarithms.
|
||
* ctan: (libc)Trig Functions.
|
||
* ctanf: (libc)Trig Functions.
|
||
* ctanfN: (libc)Trig Functions.
|
||
* ctanfNx: (libc)Trig Functions.
|
||
* ctanh: (libc)Hyperbolic Functions.
|
||
* ctanhf: (libc)Hyperbolic Functions.
|
||
* ctanhfN: (libc)Hyperbolic Functions.
|
||
* ctanhfNx: (libc)Hyperbolic Functions.
|
||
* ctanhl: (libc)Hyperbolic Functions.
|
||
* ctanl: (libc)Trig Functions.
|
||
* ctermid: (libc)Identifying the Terminal.
|
||
* ctime: (libc)Formatting Calendar Time.
|
||
* ctime_r: (libc)Formatting Calendar Time.
|
||
* cuserid: (libc)Who Logged In.
|
||
* daddl: (libc)Misc FP Arithmetic.
|
||
* dcgettext: (libc)Translation with gettext.
|
||
* dcngettext: (libc)Advanced gettext functions.
|
||
* ddivl: (libc)Misc FP Arithmetic.
|
||
* dgettext: (libc)Translation with gettext.
|
||
* difftime: (libc)Elapsed Time.
|
||
* dirfd: (libc)Opening a Directory.
|
||
* dirname: (libc)Finding Tokens in a String.
|
||
* div: (libc)Integer Division.
|
||
* dmull: (libc)Misc FP Arithmetic.
|
||
* dngettext: (libc)Advanced gettext functions.
|
||
* drand48: (libc)SVID Random.
|
||
* drand48_r: (libc)SVID Random.
|
||
* drem: (libc)Remainder Functions.
|
||
* dremf: (libc)Remainder Functions.
|
||
* dreml: (libc)Remainder Functions.
|
||
* dsubl: (libc)Misc FP Arithmetic.
|
||
* dup2: (libc)Duplicating Descriptors.
|
||
* dup: (libc)Duplicating Descriptors.
|
||
* ecvt: (libc)System V Number Conversion.
|
||
* ecvt_r: (libc)System V Number Conversion.
|
||
* endfsent: (libc)fstab.
|
||
* endgrent: (libc)Scanning All Groups.
|
||
* endhostent: (libc)Host Names.
|
||
* endmntent: (libc)mtab.
|
||
* endnetent: (libc)Networks Database.
|
||
* endnetgrent: (libc)Lookup Netgroup.
|
||
* endprotoent: (libc)Protocols Database.
|
||
* endpwent: (libc)Scanning All Users.
|
||
* endservent: (libc)Services Database.
|
||
* endutent: (libc)Manipulating the Database.
|
||
* endutxent: (libc)XPG Functions.
|
||
* envz_add: (libc)Envz Functions.
|
||
* envz_entry: (libc)Envz Functions.
|
||
* envz_get: (libc)Envz Functions.
|
||
* envz_merge: (libc)Envz Functions.
|
||
* envz_remove: (libc)Envz Functions.
|
||
* envz_strip: (libc)Envz Functions.
|
||
* erand48: (libc)SVID Random.
|
||
* erand48_r: (libc)SVID Random.
|
||
* erf: (libc)Special Functions.
|
||
* erfc: (libc)Special Functions.
|
||
* erfcf: (libc)Special Functions.
|
||
* erfcfN: (libc)Special Functions.
|
||
* erfcfNx: (libc)Special Functions.
|
||
* erfcl: (libc)Special Functions.
|
||
* erff: (libc)Special Functions.
|
||
* erffN: (libc)Special Functions.
|
||
* erffNx: (libc)Special Functions.
|
||
* erfl: (libc)Special Functions.
|
||
* err: (libc)Error Messages.
|
||
* errno: (libc)Checking for Errors.
|
||
* error: (libc)Error Messages.
|
||
* error_at_line: (libc)Error Messages.
|
||
* errx: (libc)Error Messages.
|
||
* execl: (libc)Executing a File.
|
||
* execle: (libc)Executing a File.
|
||
* execlp: (libc)Executing a File.
|
||
* execv: (libc)Executing a File.
|
||
* execve: (libc)Executing a File.
|
||
* execvp: (libc)Executing a File.
|
||
* exit: (libc)Normal Termination.
|
||
* exp10: (libc)Exponents and Logarithms.
|
||
* exp10f: (libc)Exponents and Logarithms.
|
||
* exp10fN: (libc)Exponents and Logarithms.
|
||
* exp10fNx: (libc)Exponents and Logarithms.
|
||
* exp10l: (libc)Exponents and Logarithms.
|
||
* exp2: (libc)Exponents and Logarithms.
|
||
* exp2f: (libc)Exponents and Logarithms.
|
||
* exp2fN: (libc)Exponents and Logarithms.
|
||
* exp2fNx: (libc)Exponents and Logarithms.
|
||
* exp2l: (libc)Exponents and Logarithms.
|
||
* exp: (libc)Exponents and Logarithms.
|
||
* expf: (libc)Exponents and Logarithms.
|
||
* expfN: (libc)Exponents and Logarithms.
|
||
* expfNx: (libc)Exponents and Logarithms.
|
||
* expl: (libc)Exponents and Logarithms.
|
||
* explicit_bzero: (libc)Erasing Sensitive Data.
|
||
* expm1: (libc)Exponents and Logarithms.
|
||
* expm1f: (libc)Exponents and Logarithms.
|
||
* expm1fN: (libc)Exponents and Logarithms.
|
||
* expm1fNx: (libc)Exponents and Logarithms.
|
||
* expm1l: (libc)Exponents and Logarithms.
|
||
* fMaddfN: (libc)Misc FP Arithmetic.
|
||
* fMaddfNx: (libc)Misc FP Arithmetic.
|
||
* fMdivfN: (libc)Misc FP Arithmetic.
|
||
* fMdivfNx: (libc)Misc FP Arithmetic.
|
||
* fMmulfN: (libc)Misc FP Arithmetic.
|
||
* fMmulfNx: (libc)Misc FP Arithmetic.
|
||
* fMsubfN: (libc)Misc FP Arithmetic.
|
||
* fMsubfNx: (libc)Misc FP Arithmetic.
|
||
* fMxaddfN: (libc)Misc FP Arithmetic.
|
||
* fMxaddfNx: (libc)Misc FP Arithmetic.
|
||
* fMxdivfN: (libc)Misc FP Arithmetic.
|
||
* fMxdivfNx: (libc)Misc FP Arithmetic.
|
||
* fMxmulfN: (libc)Misc FP Arithmetic.
|
||
* fMxmulfNx: (libc)Misc FP Arithmetic.
|
||
* fMxsubfN: (libc)Misc FP Arithmetic.
|
||
* fMxsubfNx: (libc)Misc FP Arithmetic.
|
||
* fabs: (libc)Absolute Value.
|
||
* fabsf: (libc)Absolute Value.
|
||
* fabsfN: (libc)Absolute Value.
|
||
* fabsfNx: (libc)Absolute Value.
|
||
* fabsl: (libc)Absolute Value.
|
||
* fadd: (libc)Misc FP Arithmetic.
|
||
* faddl: (libc)Misc FP Arithmetic.
|
||
* fchdir: (libc)Working Directory.
|
||
* fchmod: (libc)Setting Permissions.
|
||
* fchown: (libc)File Owner.
|
||
* fclose: (libc)Closing Streams.
|
||
* fcloseall: (libc)Closing Streams.
|
||
* fcntl: (libc)Control Operations.
|
||
* fcvt: (libc)System V Number Conversion.
|
||
* fcvt_r: (libc)System V Number Conversion.
|
||
* fdatasync: (libc)Synchronizing I/O.
|
||
* fdim: (libc)Misc FP Arithmetic.
|
||
* fdimf: (libc)Misc FP Arithmetic.
|
||
* fdimfN: (libc)Misc FP Arithmetic.
|
||
* fdimfNx: (libc)Misc FP Arithmetic.
|
||
* fdiml: (libc)Misc FP Arithmetic.
|
||
* fdiv: (libc)Misc FP Arithmetic.
|
||
* fdivl: (libc)Misc FP Arithmetic.
|
||
* fdopen: (libc)Descriptors and Streams.
|
||
* fdopendir: (libc)Opening a Directory.
|
||
* feclearexcept: (libc)Status bit operations.
|
||
* fedisableexcept: (libc)Control Functions.
|
||
* feenableexcept: (libc)Control Functions.
|
||
* fegetenv: (libc)Control Functions.
|
||
* fegetexcept: (libc)Control Functions.
|
||
* fegetexceptflag: (libc)Status bit operations.
|
||
* fegetmode: (libc)Control Functions.
|
||
* fegetround: (libc)Rounding.
|
||
* feholdexcept: (libc)Control Functions.
|
||
* feof: (libc)EOF and Errors.
|
||
* feof_unlocked: (libc)EOF and Errors.
|
||
* feraiseexcept: (libc)Status bit operations.
|
||
* ferror: (libc)EOF and Errors.
|
||
* ferror_unlocked: (libc)EOF and Errors.
|
||
* fesetenv: (libc)Control Functions.
|
||
* fesetexcept: (libc)Status bit operations.
|
||
* fesetexceptflag: (libc)Status bit operations.
|
||
* fesetmode: (libc)Control Functions.
|
||
* fesetround: (libc)Rounding.
|
||
* fetestexcept: (libc)Status bit operations.
|
||
* fetestexceptflag: (libc)Status bit operations.
|
||
* feupdateenv: (libc)Control Functions.
|
||
* fflush: (libc)Flushing Buffers.
|
||
* fflush_unlocked: (libc)Flushing Buffers.
|
||
* fgetc: (libc)Character Input.
|
||
* fgetc_unlocked: (libc)Character Input.
|
||
* fgetgrent: (libc)Scanning All Groups.
|
||
* fgetgrent_r: (libc)Scanning All Groups.
|
||
* fgetpos64: (libc)Portable Positioning.
|
||
* fgetpos: (libc)Portable Positioning.
|
||
* fgetpwent: (libc)Scanning All Users.
|
||
* fgetpwent_r: (libc)Scanning All Users.
|
||
* fgets: (libc)Line Input.
|
||
* fgets_unlocked: (libc)Line Input.
|
||
* fgetwc: (libc)Character Input.
|
||
* fgetwc_unlocked: (libc)Character Input.
|
||
* fgetws: (libc)Line Input.
|
||
* fgetws_unlocked: (libc)Line Input.
|
||
* fileno: (libc)Descriptors and Streams.
|
||
* fileno_unlocked: (libc)Descriptors and Streams.
|
||
* finite: (libc)Floating Point Classes.
|
||
* finitef: (libc)Floating Point Classes.
|
||
* finitel: (libc)Floating Point Classes.
|
||
* flockfile: (libc)Streams and Threads.
|
||
* floor: (libc)Rounding Functions.
|
||
* floorf: (libc)Rounding Functions.
|
||
* floorfN: (libc)Rounding Functions.
|
||
* floorfNx: (libc)Rounding Functions.
|
||
* floorl: (libc)Rounding Functions.
|
||
* fma: (libc)Misc FP Arithmetic.
|
||
* fmaf: (libc)Misc FP Arithmetic.
|
||
* fmafN: (libc)Misc FP Arithmetic.
|
||
* fmafNx: (libc)Misc FP Arithmetic.
|
||
* fmal: (libc)Misc FP Arithmetic.
|
||
* fmax: (libc)Misc FP Arithmetic.
|
||
* fmaxf: (libc)Misc FP Arithmetic.
|
||
* fmaxfN: (libc)Misc FP Arithmetic.
|
||
* fmaxfNx: (libc)Misc FP Arithmetic.
|
||
* fmaxl: (libc)Misc FP Arithmetic.
|
||
* fmaxmag: (libc)Misc FP Arithmetic.
|
||
* fmaxmagf: (libc)Misc FP Arithmetic.
|
||
* fmaxmagfN: (libc)Misc FP Arithmetic.
|
||
* fmaxmagfNx: (libc)Misc FP Arithmetic.
|
||
* fmaxmagl: (libc)Misc FP Arithmetic.
|
||
* fmemopen: (libc)String Streams.
|
||
* fmin: (libc)Misc FP Arithmetic.
|
||
* fminf: (libc)Misc FP Arithmetic.
|
||
* fminfN: (libc)Misc FP Arithmetic.
|
||
* fminfNx: (libc)Misc FP Arithmetic.
|
||
* fminl: (libc)Misc FP Arithmetic.
|
||
* fminmag: (libc)Misc FP Arithmetic.
|
||
* fminmagf: (libc)Misc FP Arithmetic.
|
||
* fminmagfN: (libc)Misc FP Arithmetic.
|
||
* fminmagfNx: (libc)Misc FP Arithmetic.
|
||
* fminmagl: (libc)Misc FP Arithmetic.
|
||
* fmod: (libc)Remainder Functions.
|
||
* fmodf: (libc)Remainder Functions.
|
||
* fmodfN: (libc)Remainder Functions.
|
||
* fmodfNx: (libc)Remainder Functions.
|
||
* fmodl: (libc)Remainder Functions.
|
||
* fmtmsg: (libc)Printing Formatted Messages.
|
||
* fmul: (libc)Misc FP Arithmetic.
|
||
* fmull: (libc)Misc FP Arithmetic.
|
||
* fnmatch: (libc)Wildcard Matching.
|
||
* fopen64: (libc)Opening Streams.
|
||
* fopen: (libc)Opening Streams.
|
||
* fopencookie: (libc)Streams and Cookies.
|
||
* fork: (libc)Creating a Process.
|
||
* forkpty: (libc)Pseudo-Terminal Pairs.
|
||
* fpathconf: (libc)Pathconf.
|
||
* fpclassify: (libc)Floating Point Classes.
|
||
* fprintf: (libc)Formatted Output Functions.
|
||
* fputc: (libc)Simple Output.
|
||
* fputc_unlocked: (libc)Simple Output.
|
||
* fputs: (libc)Simple Output.
|
||
* fputs_unlocked: (libc)Simple Output.
|
||
* fputwc: (libc)Simple Output.
|
||
* fputwc_unlocked: (libc)Simple Output.
|
||
* fputws: (libc)Simple Output.
|
||
* fputws_unlocked: (libc)Simple Output.
|
||
* fread: (libc)Block Input/Output.
|
||
* fread_unlocked: (libc)Block Input/Output.
|
||
* free: (libc)Freeing after Malloc.
|
||
* freopen64: (libc)Opening Streams.
|
||
* freopen: (libc)Opening Streams.
|
||
* frexp: (libc)Normalization Functions.
|
||
* frexpf: (libc)Normalization Functions.
|
||
* frexpfN: (libc)Normalization Functions.
|
||
* frexpfNx: (libc)Normalization Functions.
|
||
* frexpl: (libc)Normalization Functions.
|
||
* fromfp: (libc)Rounding Functions.
|
||
* fromfpf: (libc)Rounding Functions.
|
||
* fromfpfN: (libc)Rounding Functions.
|
||
* fromfpfNx: (libc)Rounding Functions.
|
||
* fromfpl: (libc)Rounding Functions.
|
||
* fromfpx: (libc)Rounding Functions.
|
||
* fromfpxf: (libc)Rounding Functions.
|
||
* fromfpxfN: (libc)Rounding Functions.
|
||
* fromfpxfNx: (libc)Rounding Functions.
|
||
* fromfpxl: (libc)Rounding Functions.
|
||
* fscanf: (libc)Formatted Input Functions.
|
||
* fseek: (libc)File Positioning.
|
||
* fseeko64: (libc)File Positioning.
|
||
* fseeko: (libc)File Positioning.
|
||
* fsetpos64: (libc)Portable Positioning.
|
||
* fsetpos: (libc)Portable Positioning.
|
||
* fstat64: (libc)Reading Attributes.
|
||
* fstat: (libc)Reading Attributes.
|
||
* fsub: (libc)Misc FP Arithmetic.
|
||
* fsubl: (libc)Misc FP Arithmetic.
|
||
* fsync: (libc)Synchronizing I/O.
|
||
* ftell: (libc)File Positioning.
|
||
* ftello64: (libc)File Positioning.
|
||
* ftello: (libc)File Positioning.
|
||
* ftruncate64: (libc)File Size.
|
||
* ftruncate: (libc)File Size.
|
||
* ftrylockfile: (libc)Streams and Threads.
|
||
* ftw64: (libc)Working with Directory Trees.
|
||
* ftw: (libc)Working with Directory Trees.
|
||
* funlockfile: (libc)Streams and Threads.
|
||
* futimes: (libc)File Times.
|
||
* fwide: (libc)Streams and I18N.
|
||
* fwprintf: (libc)Formatted Output Functions.
|
||
* fwrite: (libc)Block Input/Output.
|
||
* fwrite_unlocked: (libc)Block Input/Output.
|
||
* fwscanf: (libc)Formatted Input Functions.
|
||
* gamma: (libc)Special Functions.
|
||
* gammaf: (libc)Special Functions.
|
||
* gammal: (libc)Special Functions.
|
||
* gcvt: (libc)System V Number Conversion.
|
||
* get_avphys_pages: (libc)Query Memory Parameters.
|
||
* get_current_dir_name: (libc)Working Directory.
|
||
* get_nprocs: (libc)Processor Resources.
|
||
* get_nprocs_conf: (libc)Processor Resources.
|
||
* get_phys_pages: (libc)Query Memory Parameters.
|
||
* getauxval: (libc)Auxiliary Vector.
|
||
* getc: (libc)Character Input.
|
||
* getc_unlocked: (libc)Character Input.
|
||
* getchar: (libc)Character Input.
|
||
* getchar_unlocked: (libc)Character Input.
|
||
* getcontext: (libc)System V contexts.
|
||
* getcpu: (libc)CPU Affinity.
|
||
* getcwd: (libc)Working Directory.
|
||
* getdate: (libc)General Time String Parsing.
|
||
* getdate_r: (libc)General Time String Parsing.
|
||
* getdelim: (libc)Line Input.
|
||
* getdomainnname: (libc)Host Identification.
|
||
* getegid: (libc)Reading Persona.
|
||
* getentropy: (libc)Unpredictable Bytes.
|
||
* getenv: (libc)Environment Access.
|
||
* geteuid: (libc)Reading Persona.
|
||
* getfsent: (libc)fstab.
|
||
* getfsfile: (libc)fstab.
|
||
* getfsspec: (libc)fstab.
|
||
* getgid: (libc)Reading Persona.
|
||
* getgrent: (libc)Scanning All Groups.
|
||
* getgrent_r: (libc)Scanning All Groups.
|
||
* getgrgid: (libc)Lookup Group.
|
||
* getgrgid_r: (libc)Lookup Group.
|
||
* getgrnam: (libc)Lookup Group.
|
||
* getgrnam_r: (libc)Lookup Group.
|
||
* getgrouplist: (libc)Setting Groups.
|
||
* getgroups: (libc)Reading Persona.
|
||
* gethostbyaddr: (libc)Host Names.
|
||
* gethostbyaddr_r: (libc)Host Names.
|
||
* gethostbyname2: (libc)Host Names.
|
||
* gethostbyname2_r: (libc)Host Names.
|
||
* gethostbyname: (libc)Host Names.
|
||
* gethostbyname_r: (libc)Host Names.
|
||
* gethostent: (libc)Host Names.
|
||
* gethostid: (libc)Host Identification.
|
||
* gethostname: (libc)Host Identification.
|
||
* getitimer: (libc)Setting an Alarm.
|
||
* getline: (libc)Line Input.
|
||
* getloadavg: (libc)Processor Resources.
|
||
* getlogin: (libc)Who Logged In.
|
||
* getmntent: (libc)mtab.
|
||
* getmntent_r: (libc)mtab.
|
||
* getnetbyaddr: (libc)Networks Database.
|
||
* getnetbyname: (libc)Networks Database.
|
||
* getnetent: (libc)Networks Database.
|
||
* getnetgrent: (libc)Lookup Netgroup.
|
||
* getnetgrent_r: (libc)Lookup Netgroup.
|
||
* getopt: (libc)Using Getopt.
|
||
* getopt_long: (libc)Getopt Long Options.
|
||
* getopt_long_only: (libc)Getopt Long Options.
|
||
* getpagesize: (libc)Query Memory Parameters.
|
||
* getpass: (libc)getpass.
|
||
* getpayload: (libc)FP Bit Twiddling.
|
||
* getpayloadf: (libc)FP Bit Twiddling.
|
||
* getpayloadfN: (libc)FP Bit Twiddling.
|
||
* getpayloadfNx: (libc)FP Bit Twiddling.
|
||
* getpayloadl: (libc)FP Bit Twiddling.
|
||
* getpeername: (libc)Who is Connected.
|
||
* getpgid: (libc)Process Group Functions.
|
||
* getpgrp: (libc)Process Group Functions.
|
||
* getpid: (libc)Process Identification.
|
||
* getppid: (libc)Process Identification.
|
||
* getpriority: (libc)Traditional Scheduling Functions.
|
||
* getprotobyname: (libc)Protocols Database.
|
||
* getprotobynumber: (libc)Protocols Database.
|
||
* getprotoent: (libc)Protocols Database.
|
||
* getpt: (libc)Allocation.
|
||
* getpwent: (libc)Scanning All Users.
|
||
* getpwent_r: (libc)Scanning All Users.
|
||
* getpwnam: (libc)Lookup User.
|
||
* getpwnam_r: (libc)Lookup User.
|
||
* getpwuid: (libc)Lookup User.
|
||
* getpwuid_r: (libc)Lookup User.
|
||
* getrandom: (libc)Unpredictable Bytes.
|
||
* getrlimit64: (libc)Limits on Resources.
|
||
* getrlimit: (libc)Limits on Resources.
|
||
* getrusage: (libc)Resource Usage.
|
||
* gets: (libc)Line Input.
|
||
* getservbyname: (libc)Services Database.
|
||
* getservbyport: (libc)Services Database.
|
||
* getservent: (libc)Services Database.
|
||
* getsid: (libc)Process Group Functions.
|
||
* getsockname: (libc)Reading Address.
|
||
* getsockopt: (libc)Socket Option Functions.
|
||
* getsubopt: (libc)Suboptions.
|
||
* gettext: (libc)Translation with gettext.
|
||
* gettimeofday: (libc)High-Resolution Calendar.
|
||
* getuid: (libc)Reading Persona.
|
||
* getumask: (libc)Setting Permissions.
|
||
* getutent: (libc)Manipulating the Database.
|
||
* getutent_r: (libc)Manipulating the Database.
|
||
* getutid: (libc)Manipulating the Database.
|
||
* getutid_r: (libc)Manipulating the Database.
|
||
* getutline: (libc)Manipulating the Database.
|
||
* getutline_r: (libc)Manipulating the Database.
|
||
* getutmp: (libc)XPG Functions.
|
||
* getutmpx: (libc)XPG Functions.
|
||
* getutxent: (libc)XPG Functions.
|
||
* getutxid: (libc)XPG Functions.
|
||
* getutxline: (libc)XPG Functions.
|
||
* getw: (libc)Character Input.
|
||
* getwc: (libc)Character Input.
|
||
* getwc_unlocked: (libc)Character Input.
|
||
* getwchar: (libc)Character Input.
|
||
* getwchar_unlocked: (libc)Character Input.
|
||
* getwd: (libc)Working Directory.
|
||
* glob64: (libc)Calling Glob.
|
||
* glob: (libc)Calling Glob.
|
||
* globfree64: (libc)More Flags for Globbing.
|
||
* globfree: (libc)More Flags for Globbing.
|
||
* gmtime: (libc)Broken-down Time.
|
||
* gmtime_r: (libc)Broken-down Time.
|
||
* grantpt: (libc)Allocation.
|
||
* gsignal: (libc)Signaling Yourself.
|
||
* gtty: (libc)BSD Terminal Modes.
|
||
* hasmntopt: (libc)mtab.
|
||
* hcreate: (libc)Hash Search Function.
|
||
* hcreate_r: (libc)Hash Search Function.
|
||
* hdestroy: (libc)Hash Search Function.
|
||
* hdestroy_r: (libc)Hash Search Function.
|
||
* hsearch: (libc)Hash Search Function.
|
||
* hsearch_r: (libc)Hash Search Function.
|
||
* htonl: (libc)Byte Order.
|
||
* htons: (libc)Byte Order.
|
||
* hypot: (libc)Exponents and Logarithms.
|
||
* hypotf: (libc)Exponents and Logarithms.
|
||
* hypotfN: (libc)Exponents and Logarithms.
|
||
* hypotfNx: (libc)Exponents and Logarithms.
|
||
* hypotl: (libc)Exponents and Logarithms.
|
||
* iconv: (libc)Generic Conversion Interface.
|
||
* iconv_close: (libc)Generic Conversion Interface.
|
||
* iconv_open: (libc)Generic Conversion Interface.
|
||
* if_freenameindex: (libc)Interface Naming.
|
||
* if_indextoname: (libc)Interface Naming.
|
||
* if_nameindex: (libc)Interface Naming.
|
||
* if_nametoindex: (libc)Interface Naming.
|
||
* ilogb: (libc)Exponents and Logarithms.
|
||
* ilogbf: (libc)Exponents and Logarithms.
|
||
* ilogbfN: (libc)Exponents and Logarithms.
|
||
* ilogbfNx: (libc)Exponents and Logarithms.
|
||
* ilogbl: (libc)Exponents and Logarithms.
|
||
* imaxabs: (libc)Absolute Value.
|
||
* imaxdiv: (libc)Integer Division.
|
||
* in6addr_any: (libc)Host Address Data Type.
|
||
* in6addr_loopback: (libc)Host Address Data Type.
|
||
* index: (libc)Search Functions.
|
||
* inet_addr: (libc)Host Address Functions.
|
||
* inet_aton: (libc)Host Address Functions.
|
||
* inet_lnaof: (libc)Host Address Functions.
|
||
* inet_makeaddr: (libc)Host Address Functions.
|
||
* inet_netof: (libc)Host Address Functions.
|
||
* inet_network: (libc)Host Address Functions.
|
||
* inet_ntoa: (libc)Host Address Functions.
|
||
* inet_ntop: (libc)Host Address Functions.
|
||
* inet_pton: (libc)Host Address Functions.
|
||
* initgroups: (libc)Setting Groups.
|
||
* initstate: (libc)BSD Random.
|
||
* initstate_r: (libc)BSD Random.
|
||
* innetgr: (libc)Netgroup Membership.
|
||
* ioctl: (libc)IOCTLs.
|
||
* isalnum: (libc)Classification of Characters.
|
||
* isalpha: (libc)Classification of Characters.
|
||
* isascii: (libc)Classification of Characters.
|
||
* isatty: (libc)Is It a Terminal.
|
||
* isblank: (libc)Classification of Characters.
|
||
* iscanonical: (libc)Floating Point Classes.
|
||
* iscntrl: (libc)Classification of Characters.
|
||
* isdigit: (libc)Classification of Characters.
|
||
* iseqsig: (libc)FP Comparison Functions.
|
||
* isfinite: (libc)Floating Point Classes.
|
||
* isgraph: (libc)Classification of Characters.
|
||
* isgreater: (libc)FP Comparison Functions.
|
||
* isgreaterequal: (libc)FP Comparison Functions.
|
||
* isinf: (libc)Floating Point Classes.
|
||
* isinff: (libc)Floating Point Classes.
|
||
* isinfl: (libc)Floating Point Classes.
|
||
* isless: (libc)FP Comparison Functions.
|
||
* islessequal: (libc)FP Comparison Functions.
|
||
* islessgreater: (libc)FP Comparison Functions.
|
||
* islower: (libc)Classification of Characters.
|
||
* isnan: (libc)Floating Point Classes.
|
||
* isnan: (libc)Floating Point Classes.
|
||
* isnanf: (libc)Floating Point Classes.
|
||
* isnanl: (libc)Floating Point Classes.
|
||
* isnormal: (libc)Floating Point Classes.
|
||
* isprint: (libc)Classification of Characters.
|
||
* ispunct: (libc)Classification of Characters.
|
||
* issignaling: (libc)Floating Point Classes.
|
||
* isspace: (libc)Classification of Characters.
|
||
* issubnormal: (libc)Floating Point Classes.
|
||
* isunordered: (libc)FP Comparison Functions.
|
||
* isupper: (libc)Classification of Characters.
|
||
* iswalnum: (libc)Classification of Wide Characters.
|
||
* iswalpha: (libc)Classification of Wide Characters.
|
||
* iswblank: (libc)Classification of Wide Characters.
|
||
* iswcntrl: (libc)Classification of Wide Characters.
|
||
* iswctype: (libc)Classification of Wide Characters.
|
||
* iswdigit: (libc)Classification of Wide Characters.
|
||
* iswgraph: (libc)Classification of Wide Characters.
|
||
* iswlower: (libc)Classification of Wide Characters.
|
||
* iswprint: (libc)Classification of Wide Characters.
|
||
* iswpunct: (libc)Classification of Wide Characters.
|
||
* iswspace: (libc)Classification of Wide Characters.
|
||
* iswupper: (libc)Classification of Wide Characters.
|
||
* iswxdigit: (libc)Classification of Wide Characters.
|
||
* isxdigit: (libc)Classification of Characters.
|
||
* iszero: (libc)Floating Point Classes.
|
||
* j0: (libc)Special Functions.
|
||
* j0f: (libc)Special Functions.
|
||
* j0fN: (libc)Special Functions.
|
||
* j0fNx: (libc)Special Functions.
|
||
* j0l: (libc)Special Functions.
|
||
* j1: (libc)Special Functions.
|
||
* j1f: (libc)Special Functions.
|
||
* j1fN: (libc)Special Functions.
|
||
* j1fNx: (libc)Special Functions.
|
||
* j1l: (libc)Special Functions.
|
||
* jn: (libc)Special Functions.
|
||
* jnf: (libc)Special Functions.
|
||
* jnfN: (libc)Special Functions.
|
||
* jnfNx: (libc)Special Functions.
|
||
* jnl: (libc)Special Functions.
|
||
* jrand48: (libc)SVID Random.
|
||
* jrand48_r: (libc)SVID Random.
|
||
* kill: (libc)Signaling Another Process.
|
||
* killpg: (libc)Signaling Another Process.
|
||
* l64a: (libc)Encode Binary Data.
|
||
* labs: (libc)Absolute Value.
|
||
* lcong48: (libc)SVID Random.
|
||
* lcong48_r: (libc)SVID Random.
|
||
* ldexp: (libc)Normalization Functions.
|
||
* ldexpf: (libc)Normalization Functions.
|
||
* ldexpfN: (libc)Normalization Functions.
|
||
* ldexpfNx: (libc)Normalization Functions.
|
||
* ldexpl: (libc)Normalization Functions.
|
||
* ldiv: (libc)Integer Division.
|
||
* lfind: (libc)Array Search Function.
|
||
* lgamma: (libc)Special Functions.
|
||
* lgamma_r: (libc)Special Functions.
|
||
* lgammaf: (libc)Special Functions.
|
||
* lgammafN: (libc)Special Functions.
|
||
* lgammafN_r: (libc)Special Functions.
|
||
* lgammafNx: (libc)Special Functions.
|
||
* lgammafNx_r: (libc)Special Functions.
|
||
* lgammaf_r: (libc)Special Functions.
|
||
* lgammal: (libc)Special Functions.
|
||
* lgammal_r: (libc)Special Functions.
|
||
* link: (libc)Hard Links.
|
||
* linkat: (libc)Hard Links.
|
||
* lio_listio64: (libc)Asynchronous Reads/Writes.
|
||
* lio_listio: (libc)Asynchronous Reads/Writes.
|
||
* listen: (libc)Listening.
|
||
* llabs: (libc)Absolute Value.
|
||
* lldiv: (libc)Integer Division.
|
||
* llogb: (libc)Exponents and Logarithms.
|
||
* llogbf: (libc)Exponents and Logarithms.
|
||
* llogbfN: (libc)Exponents and Logarithms.
|
||
* llogbfNx: (libc)Exponents and Logarithms.
|
||
* llogbl: (libc)Exponents and Logarithms.
|
||
* llrint: (libc)Rounding Functions.
|
||
* llrintf: (libc)Rounding Functions.
|
||
* llrintfN: (libc)Rounding Functions.
|
||
* llrintfNx: (libc)Rounding Functions.
|
||
* llrintl: (libc)Rounding Functions.
|
||
* llround: (libc)Rounding Functions.
|
||
* llroundf: (libc)Rounding Functions.
|
||
* llroundfN: (libc)Rounding Functions.
|
||
* llroundfNx: (libc)Rounding Functions.
|
||
* llroundl: (libc)Rounding Functions.
|
||
* localeconv: (libc)The Lame Way to Locale Data.
|
||
* localtime: (libc)Broken-down Time.
|
||
* localtime_r: (libc)Broken-down Time.
|
||
* log10: (libc)Exponents and Logarithms.
|
||
* log10f: (libc)Exponents and Logarithms.
|
||
* log10fN: (libc)Exponents and Logarithms.
|
||
* log10fNx: (libc)Exponents and Logarithms.
|
||
* log10l: (libc)Exponents and Logarithms.
|
||
* log1p: (libc)Exponents and Logarithms.
|
||
* log1pf: (libc)Exponents and Logarithms.
|
||
* log1pfN: (libc)Exponents and Logarithms.
|
||
* log1pfNx: (libc)Exponents and Logarithms.
|
||
* log1pl: (libc)Exponents and Logarithms.
|
||
* log2: (libc)Exponents and Logarithms.
|
||
* log2f: (libc)Exponents and Logarithms.
|
||
* log2fN: (libc)Exponents and Logarithms.
|
||
* log2fNx: (libc)Exponents and Logarithms.
|
||
* log2l: (libc)Exponents and Logarithms.
|
||
* log: (libc)Exponents and Logarithms.
|
||
* logb: (libc)Exponents and Logarithms.
|
||
* logbf: (libc)Exponents and Logarithms.
|
||
* logbfN: (libc)Exponents and Logarithms.
|
||
* logbfNx: (libc)Exponents and Logarithms.
|
||
* logbl: (libc)Exponents and Logarithms.
|
||
* logf: (libc)Exponents and Logarithms.
|
||
* logfN: (libc)Exponents and Logarithms.
|
||
* logfNx: (libc)Exponents and Logarithms.
|
||
* login: (libc)Logging In and Out.
|
||
* login_tty: (libc)Logging In and Out.
|
||
* logl: (libc)Exponents and Logarithms.
|
||
* logout: (libc)Logging In and Out.
|
||
* logwtmp: (libc)Logging In and Out.
|
||
* longjmp: (libc)Non-Local Details.
|
||
* lrand48: (libc)SVID Random.
|
||
* lrand48_r: (libc)SVID Random.
|
||
* lrint: (libc)Rounding Functions.
|
||
* lrintf: (libc)Rounding Functions.
|
||
* lrintfN: (libc)Rounding Functions.
|
||
* lrintfNx: (libc)Rounding Functions.
|
||
* lrintl: (libc)Rounding Functions.
|
||
* lround: (libc)Rounding Functions.
|
||
* lroundf: (libc)Rounding Functions.
|
||
* lroundfN: (libc)Rounding Functions.
|
||
* lroundfNx: (libc)Rounding Functions.
|
||
* lroundl: (libc)Rounding Functions.
|
||
* lsearch: (libc)Array Search Function.
|
||
* lseek64: (libc)File Position Primitive.
|
||
* lseek: (libc)File Position Primitive.
|
||
* lstat64: (libc)Reading Attributes.
|
||
* lstat: (libc)Reading Attributes.
|
||
* lutimes: (libc)File Times.
|
||
* madvise: (libc)Memory-mapped I/O.
|
||
* makecontext: (libc)System V contexts.
|
||
* mallinfo: (libc)Statistics of Malloc.
|
||
* malloc: (libc)Basic Allocation.
|
||
* mallopt: (libc)Malloc Tunable Parameters.
|
||
* mblen: (libc)Non-reentrant Character Conversion.
|
||
* mbrlen: (libc)Converting a Character.
|
||
* mbrtowc: (libc)Converting a Character.
|
||
* mbsinit: (libc)Keeping the state.
|
||
* mbsnrtowcs: (libc)Converting Strings.
|
||
* mbsrtowcs: (libc)Converting Strings.
|
||
* mbstowcs: (libc)Non-reentrant String Conversion.
|
||
* mbtowc: (libc)Non-reentrant Character Conversion.
|
||
* mcheck: (libc)Heap Consistency Checking.
|
||
* memalign: (libc)Aligned Memory Blocks.
|
||
* memccpy: (libc)Copying Strings and Arrays.
|
||
* memchr: (libc)Search Functions.
|
||
* memcmp: (libc)String/Array Comparison.
|
||
* memcpy: (libc)Copying Strings and Arrays.
|
||
* memfd_create: (libc)Memory-mapped I/O.
|
||
* memfrob: (libc)Obfuscating Data.
|
||
* memmem: (libc)Search Functions.
|
||
* memmove: (libc)Copying Strings and Arrays.
|
||
* mempcpy: (libc)Copying Strings and Arrays.
|
||
* memrchr: (libc)Search Functions.
|
||
* memset: (libc)Copying Strings and Arrays.
|
||
* mkdir: (libc)Creating Directories.
|
||
* mkdtemp: (libc)Temporary Files.
|
||
* mkfifo: (libc)FIFO Special Files.
|
||
* mknod: (libc)Making Special Files.
|
||
* mkstemp: (libc)Temporary Files.
|
||
* mktemp: (libc)Temporary Files.
|
||
* mktime: (libc)Broken-down Time.
|
||
* mlock2: (libc)Page Lock Functions.
|
||
* mlock: (libc)Page Lock Functions.
|
||
* mlockall: (libc)Page Lock Functions.
|
||
* mmap64: (libc)Memory-mapped I/O.
|
||
* mmap: (libc)Memory-mapped I/O.
|
||
* modf: (libc)Rounding Functions.
|
||
* modff: (libc)Rounding Functions.
|
||
* modffN: (libc)Rounding Functions.
|
||
* modffNx: (libc)Rounding Functions.
|
||
* modfl: (libc)Rounding Functions.
|
||
* mount: (libc)Mount-Unmount-Remount.
|
||
* mprobe: (libc)Heap Consistency Checking.
|
||
* mprotect: (libc)Memory Protection.
|
||
* mrand48: (libc)SVID Random.
|
||
* mrand48_r: (libc)SVID Random.
|
||
* mremap: (libc)Memory-mapped I/O.
|
||
* msync: (libc)Memory-mapped I/O.
|
||
* mtrace: (libc)Tracing malloc.
|
||
* mtx_destroy: (libc)ISO C Mutexes.
|
||
* mtx_init: (libc)ISO C Mutexes.
|
||
* mtx_lock: (libc)ISO C Mutexes.
|
||
* mtx_timedlock: (libc)ISO C Mutexes.
|
||
* mtx_trylock: (libc)ISO C Mutexes.
|
||
* mtx_unlock: (libc)ISO C Mutexes.
|
||
* munlock: (libc)Page Lock Functions.
|
||
* munlockall: (libc)Page Lock Functions.
|
||
* munmap: (libc)Memory-mapped I/O.
|
||
* muntrace: (libc)Tracing malloc.
|
||
* nan: (libc)FP Bit Twiddling.
|
||
* nanf: (libc)FP Bit Twiddling.
|
||
* nanfN: (libc)FP Bit Twiddling.
|
||
* nanfNx: (libc)FP Bit Twiddling.
|
||
* nanl: (libc)FP Bit Twiddling.
|
||
* nanosleep: (libc)Sleeping.
|
||
* nearbyint: (libc)Rounding Functions.
|
||
* nearbyintf: (libc)Rounding Functions.
|
||
* nearbyintfN: (libc)Rounding Functions.
|
||
* nearbyintfNx: (libc)Rounding Functions.
|
||
* nearbyintl: (libc)Rounding Functions.
|
||
* nextafter: (libc)FP Bit Twiddling.
|
||
* nextafterf: (libc)FP Bit Twiddling.
|
||
* nextafterfN: (libc)FP Bit Twiddling.
|
||
* nextafterfNx: (libc)FP Bit Twiddling.
|
||
* nextafterl: (libc)FP Bit Twiddling.
|
||
* nextdown: (libc)FP Bit Twiddling.
|
||
* nextdownf: (libc)FP Bit Twiddling.
|
||
* nextdownfN: (libc)FP Bit Twiddling.
|
||
* nextdownfNx: (libc)FP Bit Twiddling.
|
||
* nextdownl: (libc)FP Bit Twiddling.
|
||
* nexttoward: (libc)FP Bit Twiddling.
|
||
* nexttowardf: (libc)FP Bit Twiddling.
|
||
* nexttowardl: (libc)FP Bit Twiddling.
|
||
* nextup: (libc)FP Bit Twiddling.
|
||
* nextupf: (libc)FP Bit Twiddling.
|
||
* nextupfN: (libc)FP Bit Twiddling.
|
||
* nextupfNx: (libc)FP Bit Twiddling.
|
||
* nextupl: (libc)FP Bit Twiddling.
|
||
* nftw64: (libc)Working with Directory Trees.
|
||
* nftw: (libc)Working with Directory Trees.
|
||
* ngettext: (libc)Advanced gettext functions.
|
||
* nice: (libc)Traditional Scheduling Functions.
|
||
* nl_langinfo: (libc)The Elegant and Fast Way.
|
||
* nrand48: (libc)SVID Random.
|
||
* nrand48_r: (libc)SVID Random.
|
||
* ntohl: (libc)Byte Order.
|
||
* ntohs: (libc)Byte Order.
|
||
* ntp_adjtime: (libc)High Accuracy Clock.
|
||
* ntp_gettime: (libc)High Accuracy Clock.
|
||
* obstack_1grow: (libc)Growing Objects.
|
||
* obstack_1grow_fast: (libc)Extra Fast Growing.
|
||
* obstack_alignment_mask: (libc)Obstacks Data Alignment.
|
||
* obstack_alloc: (libc)Allocation in an Obstack.
|
||
* obstack_base: (libc)Status of an Obstack.
|
||
* obstack_blank: (libc)Growing Objects.
|
||
* obstack_blank_fast: (libc)Extra Fast Growing.
|
||
* obstack_chunk_size: (libc)Obstack Chunks.
|
||
* obstack_copy0: (libc)Allocation in an Obstack.
|
||
* obstack_copy: (libc)Allocation in an Obstack.
|
||
* obstack_finish: (libc)Growing Objects.
|
||
* obstack_free: (libc)Freeing Obstack Objects.
|
||
* obstack_grow0: (libc)Growing Objects.
|
||
* obstack_grow: (libc)Growing Objects.
|
||
* obstack_init: (libc)Preparing for Obstacks.
|
||
* obstack_int_grow: (libc)Growing Objects.
|
||
* obstack_int_grow_fast: (libc)Extra Fast Growing.
|
||
* obstack_next_free: (libc)Status of an Obstack.
|
||
* obstack_object_size: (libc)Growing Objects.
|
||
* obstack_object_size: (libc)Status of an Obstack.
|
||
* obstack_printf: (libc)Dynamic Output.
|
||
* obstack_ptr_grow: (libc)Growing Objects.
|
||
* obstack_ptr_grow_fast: (libc)Extra Fast Growing.
|
||
* obstack_room: (libc)Extra Fast Growing.
|
||
* obstack_vprintf: (libc)Variable Arguments Output.
|
||
* offsetof: (libc)Structure Measurement.
|
||
* on_exit: (libc)Cleanups on Exit.
|
||
* open64: (libc)Opening and Closing Files.
|
||
* open: (libc)Opening and Closing Files.
|
||
* open_memstream: (libc)String Streams.
|
||
* opendir: (libc)Opening a Directory.
|
||
* openlog: (libc)openlog.
|
||
* openpty: (libc)Pseudo-Terminal Pairs.
|
||
* parse_printf_format: (libc)Parsing a Template String.
|
||
* pathconf: (libc)Pathconf.
|
||
* pause: (libc)Using Pause.
|
||
* pclose: (libc)Pipe to a Subprocess.
|
||
* perror: (libc)Error Messages.
|
||
* pipe: (libc)Creating a Pipe.
|
||
* pkey_alloc: (libc)Memory Protection.
|
||
* pkey_free: (libc)Memory Protection.
|
||
* pkey_get: (libc)Memory Protection.
|
||
* pkey_mprotect: (libc)Memory Protection.
|
||
* pkey_set: (libc)Memory Protection.
|
||
* popen: (libc)Pipe to a Subprocess.
|
||
* posix_fallocate64: (libc)Storage Allocation.
|
||
* posix_fallocate: (libc)Storage Allocation.
|
||
* posix_memalign: (libc)Aligned Memory Blocks.
|
||
* pow: (libc)Exponents and Logarithms.
|
||
* powf: (libc)Exponents and Logarithms.
|
||
* powfN: (libc)Exponents and Logarithms.
|
||
* powfNx: (libc)Exponents and Logarithms.
|
||
* powl: (libc)Exponents and Logarithms.
|
||
* pread64: (libc)I/O Primitives.
|
||
* pread: (libc)I/O Primitives.
|
||
* preadv2: (libc)Scatter-Gather.
|
||
* preadv64: (libc)Scatter-Gather.
|
||
* preadv64v2: (libc)Scatter-Gather.
|
||
* preadv: (libc)Scatter-Gather.
|
||
* printf: (libc)Formatted Output Functions.
|
||
* printf_size: (libc)Predefined Printf Handlers.
|
||
* printf_size_info: (libc)Predefined Printf Handlers.
|
||
* psignal: (libc)Signal Messages.
|
||
* pthread_getattr_default_np: (libc)Default Thread Attributes.
|
||
* pthread_getspecific: (libc)Thread-specific Data.
|
||
* pthread_key_create: (libc)Thread-specific Data.
|
||
* pthread_key_delete: (libc)Thread-specific Data.
|
||
* pthread_setattr_default_np: (libc)Default Thread Attributes.
|
||
* pthread_setspecific: (libc)Thread-specific Data.
|
||
* ptsname: (libc)Allocation.
|
||
* ptsname_r: (libc)Allocation.
|
||
* putc: (libc)Simple Output.
|
||
* putc_unlocked: (libc)Simple Output.
|
||
* putchar: (libc)Simple Output.
|
||
* putchar_unlocked: (libc)Simple Output.
|
||
* putenv: (libc)Environment Access.
|
||
* putpwent: (libc)Writing a User Entry.
|
||
* puts: (libc)Simple Output.
|
||
* pututline: (libc)Manipulating the Database.
|
||
* pututxline: (libc)XPG Functions.
|
||
* putw: (libc)Simple Output.
|
||
* putwc: (libc)Simple Output.
|
||
* putwc_unlocked: (libc)Simple Output.
|
||
* putwchar: (libc)Simple Output.
|
||
* putwchar_unlocked: (libc)Simple Output.
|
||
* pwrite64: (libc)I/O Primitives.
|
||
* pwrite: (libc)I/O Primitives.
|
||
* pwritev2: (libc)Scatter-Gather.
|
||
* pwritev64: (libc)Scatter-Gather.
|
||
* pwritev64v2: (libc)Scatter-Gather.
|
||
* pwritev: (libc)Scatter-Gather.
|
||
* qecvt: (libc)System V Number Conversion.
|
||
* qecvt_r: (libc)System V Number Conversion.
|
||
* qfcvt: (libc)System V Number Conversion.
|
||
* qfcvt_r: (libc)System V Number Conversion.
|
||
* qgcvt: (libc)System V Number Conversion.
|
||
* qsort: (libc)Array Sort Function.
|
||
* raise: (libc)Signaling Yourself.
|
||
* rand: (libc)ISO Random.
|
||
* rand_r: (libc)ISO Random.
|
||
* random: (libc)BSD Random.
|
||
* random_r: (libc)BSD Random.
|
||
* rawmemchr: (libc)Search Functions.
|
||
* read: (libc)I/O Primitives.
|
||
* readdir64: (libc)Reading/Closing Directory.
|
||
* readdir64_r: (libc)Reading/Closing Directory.
|
||
* readdir: (libc)Reading/Closing Directory.
|
||
* readdir_r: (libc)Reading/Closing Directory.
|
||
* readlink: (libc)Symbolic Links.
|
||
* readv: (libc)Scatter-Gather.
|
||
* realloc: (libc)Changing Block Size.
|
||
* reallocarray: (libc)Changing Block Size.
|
||
* realpath: (libc)Symbolic Links.
|
||
* recv: (libc)Receiving Data.
|
||
* recvfrom: (libc)Receiving Datagrams.
|
||
* recvmsg: (libc)Receiving Datagrams.
|
||
* regcomp: (libc)POSIX Regexp Compilation.
|
||
* regerror: (libc)Regexp Cleanup.
|
||
* regexec: (libc)Matching POSIX Regexps.
|
||
* regfree: (libc)Regexp Cleanup.
|
||
* register_printf_function: (libc)Registering New Conversions.
|
||
* remainder: (libc)Remainder Functions.
|
||
* remainderf: (libc)Remainder Functions.
|
||
* remainderfN: (libc)Remainder Functions.
|
||
* remainderfNx: (libc)Remainder Functions.
|
||
* remainderl: (libc)Remainder Functions.
|
||
* remove: (libc)Deleting Files.
|
||
* rename: (libc)Renaming Files.
|
||
* rewind: (libc)File Positioning.
|
||
* rewinddir: (libc)Random Access Directory.
|
||
* rindex: (libc)Search Functions.
|
||
* rint: (libc)Rounding Functions.
|
||
* rintf: (libc)Rounding Functions.
|
||
* rintfN: (libc)Rounding Functions.
|
||
* rintfNx: (libc)Rounding Functions.
|
||
* rintl: (libc)Rounding Functions.
|
||
* rmdir: (libc)Deleting Files.
|
||
* round: (libc)Rounding Functions.
|
||
* roundeven: (libc)Rounding Functions.
|
||
* roundevenf: (libc)Rounding Functions.
|
||
* roundevenfN: (libc)Rounding Functions.
|
||
* roundevenfNx: (libc)Rounding Functions.
|
||
* roundevenl: (libc)Rounding Functions.
|
||
* roundf: (libc)Rounding Functions.
|
||
* roundfN: (libc)Rounding Functions.
|
||
* roundfNx: (libc)Rounding Functions.
|
||
* roundl: (libc)Rounding Functions.
|
||
* rpmatch: (libc)Yes-or-No Questions.
|
||
* sbrk: (libc)Resizing the Data Segment.
|
||
* scalb: (libc)Normalization Functions.
|
||
* scalbf: (libc)Normalization Functions.
|
||
* scalbl: (libc)Normalization Functions.
|
||
* scalbln: (libc)Normalization Functions.
|
||
* scalblnf: (libc)Normalization Functions.
|
||
* scalblnfN: (libc)Normalization Functions.
|
||
* scalblnfNx: (libc)Normalization Functions.
|
||
* scalblnl: (libc)Normalization Functions.
|
||
* scalbn: (libc)Normalization Functions.
|
||
* scalbnf: (libc)Normalization Functions.
|
||
* scalbnfN: (libc)Normalization Functions.
|
||
* scalbnfNx: (libc)Normalization Functions.
|
||
* scalbnl: (libc)Normalization Functions.
|
||
* scandir64: (libc)Scanning Directory Content.
|
||
* scandir: (libc)Scanning Directory Content.
|
||
* scanf: (libc)Formatted Input Functions.
|
||
* sched_get_priority_max: (libc)Basic Scheduling Functions.
|
||
* sched_get_priority_min: (libc)Basic Scheduling Functions.
|
||
* sched_getaffinity: (libc)CPU Affinity.
|
||
* sched_getparam: (libc)Basic Scheduling Functions.
|
||
* sched_getscheduler: (libc)Basic Scheduling Functions.
|
||
* sched_rr_get_interval: (libc)Basic Scheduling Functions.
|
||
* sched_setaffinity: (libc)CPU Affinity.
|
||
* sched_setparam: (libc)Basic Scheduling Functions.
|
||
* sched_setscheduler: (libc)Basic Scheduling Functions.
|
||
* sched_yield: (libc)Basic Scheduling Functions.
|
||
* secure_getenv: (libc)Environment Access.
|
||
* seed48: (libc)SVID Random.
|
||
* seed48_r: (libc)SVID Random.
|
||
* seekdir: (libc)Random Access Directory.
|
||
* select: (libc)Waiting for I/O.
|
||
* sem_close: (libc)Semaphores.
|
||
* sem_destroy: (libc)Semaphores.
|
||
* sem_getvalue: (libc)Semaphores.
|
||
* sem_init: (libc)Semaphores.
|
||
* sem_open: (libc)Semaphores.
|
||
* sem_post: (libc)Semaphores.
|
||
* sem_timedwait: (libc)Semaphores.
|
||
* sem_trywait: (libc)Semaphores.
|
||
* sem_unlink: (libc)Semaphores.
|
||
* sem_wait: (libc)Semaphores.
|
||
* semctl: (libc)Semaphores.
|
||
* semget: (libc)Semaphores.
|
||
* semop: (libc)Semaphores.
|
||
* semtimedop: (libc)Semaphores.
|
||
* send: (libc)Sending Data.
|
||
* sendmsg: (libc)Receiving Datagrams.
|
||
* sendto: (libc)Sending Datagrams.
|
||
* setbuf: (libc)Controlling Buffering.
|
||
* setbuffer: (libc)Controlling Buffering.
|
||
* setcontext: (libc)System V contexts.
|
||
* setdomainname: (libc)Host Identification.
|
||
* setegid: (libc)Setting Groups.
|
||
* setenv: (libc)Environment Access.
|
||
* seteuid: (libc)Setting User ID.
|
||
* setfsent: (libc)fstab.
|
||
* setgid: (libc)Setting Groups.
|
||
* setgrent: (libc)Scanning All Groups.
|
||
* setgroups: (libc)Setting Groups.
|
||
* sethostent: (libc)Host Names.
|
||
* sethostid: (libc)Host Identification.
|
||
* sethostname: (libc)Host Identification.
|
||
* setitimer: (libc)Setting an Alarm.
|
||
* setjmp: (libc)Non-Local Details.
|
||
* setlinebuf: (libc)Controlling Buffering.
|
||
* setlocale: (libc)Setting the Locale.
|
||
* setlogmask: (libc)setlogmask.
|
||
* setmntent: (libc)mtab.
|
||
* setnetent: (libc)Networks Database.
|
||
* setnetgrent: (libc)Lookup Netgroup.
|
||
* setpayload: (libc)FP Bit Twiddling.
|
||
* setpayloadf: (libc)FP Bit Twiddling.
|
||
* setpayloadfN: (libc)FP Bit Twiddling.
|
||
* setpayloadfNx: (libc)FP Bit Twiddling.
|
||
* setpayloadl: (libc)FP Bit Twiddling.
|
||
* setpayloadsig: (libc)FP Bit Twiddling.
|
||
* setpayloadsigf: (libc)FP Bit Twiddling.
|
||
* setpayloadsigfN: (libc)FP Bit Twiddling.
|
||
* setpayloadsigfNx: (libc)FP Bit Twiddling.
|
||
* setpayloadsigl: (libc)FP Bit Twiddling.
|
||
* setpgid: (libc)Process Group Functions.
|
||
* setpgrp: (libc)Process Group Functions.
|
||
* setpriority: (libc)Traditional Scheduling Functions.
|
||
* setprotoent: (libc)Protocols Database.
|
||
* setpwent: (libc)Scanning All Users.
|
||
* setregid: (libc)Setting Groups.
|
||
* setreuid: (libc)Setting User ID.
|
||
* setrlimit64: (libc)Limits on Resources.
|
||
* setrlimit: (libc)Limits on Resources.
|
||
* setservent: (libc)Services Database.
|
||
* setsid: (libc)Process Group Functions.
|
||
* setsockopt: (libc)Socket Option Functions.
|
||
* setstate: (libc)BSD Random.
|
||
* setstate_r: (libc)BSD Random.
|
||
* settimeofday: (libc)High-Resolution Calendar.
|
||
* setuid: (libc)Setting User ID.
|
||
* setutent: (libc)Manipulating the Database.
|
||
* setutxent: (libc)XPG Functions.
|
||
* setvbuf: (libc)Controlling Buffering.
|
||
* shm_open: (libc)Memory-mapped I/O.
|
||
* shm_unlink: (libc)Memory-mapped I/O.
|
||
* shutdown: (libc)Closing a Socket.
|
||
* sigaction: (libc)Advanced Signal Handling.
|
||
* sigaddset: (libc)Signal Sets.
|
||
* sigaltstack: (libc)Signal Stack.
|
||
* sigblock: (libc)BSD Signal Handling.
|
||
* sigdelset: (libc)Signal Sets.
|
||
* sigemptyset: (libc)Signal Sets.
|
||
* sigfillset: (libc)Signal Sets.
|
||
* siginterrupt: (libc)BSD Signal Handling.
|
||
* sigismember: (libc)Signal Sets.
|
||
* siglongjmp: (libc)Non-Local Exits and Signals.
|
||
* sigmask: (libc)BSD Signal Handling.
|
||
* signal: (libc)Basic Signal Handling.
|
||
* signbit: (libc)FP Bit Twiddling.
|
||
* significand: (libc)Normalization Functions.
|
||
* significandf: (libc)Normalization Functions.
|
||
* significandl: (libc)Normalization Functions.
|
||
* sigpause: (libc)BSD Signal Handling.
|
||
* sigpending: (libc)Checking for Pending Signals.
|
||
* sigprocmask: (libc)Process Signal Mask.
|
||
* sigsetjmp: (libc)Non-Local Exits and Signals.
|
||
* sigsetmask: (libc)BSD Signal Handling.
|
||
* sigstack: (libc)Signal Stack.
|
||
* sigsuspend: (libc)Sigsuspend.
|
||
* sin: (libc)Trig Functions.
|
||
* sincos: (libc)Trig Functions.
|
||
* sincosf: (libc)Trig Functions.
|
||
* sincosfN: (libc)Trig Functions.
|
||
* sincosfNx: (libc)Trig Functions.
|
||
* sincosl: (libc)Trig Functions.
|
||
* sinf: (libc)Trig Functions.
|
||
* sinfN: (libc)Trig Functions.
|
||
* sinfNx: (libc)Trig Functions.
|
||
* sinh: (libc)Hyperbolic Functions.
|
||
* sinhf: (libc)Hyperbolic Functions.
|
||
* sinhfN: (libc)Hyperbolic Functions.
|
||
* sinhfNx: (libc)Hyperbolic Functions.
|
||
* sinhl: (libc)Hyperbolic Functions.
|
||
* sinl: (libc)Trig Functions.
|
||
* sleep: (libc)Sleeping.
|
||
* snprintf: (libc)Formatted Output Functions.
|
||
* socket: (libc)Creating a Socket.
|
||
* socketpair: (libc)Socket Pairs.
|
||
* sprintf: (libc)Formatted Output Functions.
|
||
* sqrt: (libc)Exponents and Logarithms.
|
||
* sqrtf: (libc)Exponents and Logarithms.
|
||
* sqrtfN: (libc)Exponents and Logarithms.
|
||
* sqrtfNx: (libc)Exponents and Logarithms.
|
||
* sqrtl: (libc)Exponents and Logarithms.
|
||
* srand48: (libc)SVID Random.
|
||
* srand48_r: (libc)SVID Random.
|
||
* srand: (libc)ISO Random.
|
||
* srandom: (libc)BSD Random.
|
||
* srandom_r: (libc)BSD Random.
|
||
* sscanf: (libc)Formatted Input Functions.
|
||
* ssignal: (libc)Basic Signal Handling.
|
||
* stat64: (libc)Reading Attributes.
|
||
* stat: (libc)Reading Attributes.
|
||
* stime: (libc)Simple Calendar Time.
|
||
* stpcpy: (libc)Copying Strings and Arrays.
|
||
* stpncpy: (libc)Truncating Strings.
|
||
* strcasecmp: (libc)String/Array Comparison.
|
||
* strcasestr: (libc)Search Functions.
|
||
* strcat: (libc)Concatenating Strings.
|
||
* strchr: (libc)Search Functions.
|
||
* strchrnul: (libc)Search Functions.
|
||
* strcmp: (libc)String/Array Comparison.
|
||
* strcoll: (libc)Collation Functions.
|
||
* strcpy: (libc)Copying Strings and Arrays.
|
||
* strcspn: (libc)Search Functions.
|
||
* strdup: (libc)Copying Strings and Arrays.
|
||
* strdupa: (libc)Copying Strings and Arrays.
|
||
* strerror: (libc)Error Messages.
|
||
* strerror_r: (libc)Error Messages.
|
||
* strfmon: (libc)Formatting Numbers.
|
||
* strfromd: (libc)Printing of Floats.
|
||
* strfromf: (libc)Printing of Floats.
|
||
* strfromfN: (libc)Printing of Floats.
|
||
* strfromfNx: (libc)Printing of Floats.
|
||
* strfroml: (libc)Printing of Floats.
|
||
* strfry: (libc)Shuffling Bytes.
|
||
* strftime: (libc)Formatting Calendar Time.
|
||
* strlen: (libc)String Length.
|
||
* strncasecmp: (libc)String/Array Comparison.
|
||
* strncat: (libc)Truncating Strings.
|
||
* strncmp: (libc)String/Array Comparison.
|
||
* strncpy: (libc)Truncating Strings.
|
||
* strndup: (libc)Truncating Strings.
|
||
* strndupa: (libc)Truncating Strings.
|
||
* strnlen: (libc)String Length.
|
||
* strpbrk: (libc)Search Functions.
|
||
* strptime: (libc)Low-Level Time String Parsing.
|
||
* strrchr: (libc)Search Functions.
|
||
* strsep: (libc)Finding Tokens in a String.
|
||
* strsignal: (libc)Signal Messages.
|
||
* strspn: (libc)Search Functions.
|
||
* strstr: (libc)Search Functions.
|
||
* strtod: (libc)Parsing of Floats.
|
||
* strtof: (libc)Parsing of Floats.
|
||
* strtofN: (libc)Parsing of Floats.
|
||
* strtofNx: (libc)Parsing of Floats.
|
||
* strtoimax: (libc)Parsing of Integers.
|
||
* strtok: (libc)Finding Tokens in a String.
|
||
* strtok_r: (libc)Finding Tokens in a String.
|
||
* strtol: (libc)Parsing of Integers.
|
||
* strtold: (libc)Parsing of Floats.
|
||
* strtoll: (libc)Parsing of Integers.
|
||
* strtoq: (libc)Parsing of Integers.
|
||
* strtoul: (libc)Parsing of Integers.
|
||
* strtoull: (libc)Parsing of Integers.
|
||
* strtoumax: (libc)Parsing of Integers.
|
||
* strtouq: (libc)Parsing of Integers.
|
||
* strverscmp: (libc)String/Array Comparison.
|
||
* strxfrm: (libc)Collation Functions.
|
||
* stty: (libc)BSD Terminal Modes.
|
||
* swapcontext: (libc)System V contexts.
|
||
* swprintf: (libc)Formatted Output Functions.
|
||
* swscanf: (libc)Formatted Input Functions.
|
||
* symlink: (libc)Symbolic Links.
|
||
* sync: (libc)Synchronizing I/O.
|
||
* syscall: (libc)System Calls.
|
||
* sysconf: (libc)Sysconf Definition.
|
||
* sysctl: (libc)System Parameters.
|
||
* syslog: (libc)syslog; vsyslog.
|
||
* system: (libc)Running a Command.
|
||
* sysv_signal: (libc)Basic Signal Handling.
|
||
* tan: (libc)Trig Functions.
|
||
* tanf: (libc)Trig Functions.
|
||
* tanfN: (libc)Trig Functions.
|
||
* tanfNx: (libc)Trig Functions.
|
||
* tanh: (libc)Hyperbolic Functions.
|
||
* tanhf: (libc)Hyperbolic Functions.
|
||
* tanhfN: (libc)Hyperbolic Functions.
|
||
* tanhfNx: (libc)Hyperbolic Functions.
|
||
* tanhl: (libc)Hyperbolic Functions.
|
||
* tanl: (libc)Trig Functions.
|
||
* tcdrain: (libc)Line Control.
|
||
* tcflow: (libc)Line Control.
|
||
* tcflush: (libc)Line Control.
|
||
* tcgetattr: (libc)Mode Functions.
|
||
* tcgetpgrp: (libc)Terminal Access Functions.
|
||
* tcgetsid: (libc)Terminal Access Functions.
|
||
* tcsendbreak: (libc)Line Control.
|
||
* tcsetattr: (libc)Mode Functions.
|
||
* tcsetpgrp: (libc)Terminal Access Functions.
|
||
* tdelete: (libc)Tree Search Function.
|
||
* tdestroy: (libc)Tree Search Function.
|
||
* telldir: (libc)Random Access Directory.
|
||
* tempnam: (libc)Temporary Files.
|
||
* textdomain: (libc)Locating gettext catalog.
|
||
* tfind: (libc)Tree Search Function.
|
||
* tgamma: (libc)Special Functions.
|
||
* tgammaf: (libc)Special Functions.
|
||
* tgammafN: (libc)Special Functions.
|
||
* tgammafNx: (libc)Special Functions.
|
||
* tgammal: (libc)Special Functions.
|
||
* thrd_create: (libc)ISO C Thread Management.
|
||
* thrd_current: (libc)ISO C Thread Management.
|
||
* thrd_detach: (libc)ISO C Thread Management.
|
||
* thrd_equal: (libc)ISO C Thread Management.
|
||
* thrd_exit: (libc)ISO C Thread Management.
|
||
* thrd_join: (libc)ISO C Thread Management.
|
||
* thrd_sleep: (libc)ISO C Thread Management.
|
||
* thrd_yield: (libc)ISO C Thread Management.
|
||
* time: (libc)Simple Calendar Time.
|
||
* timegm: (libc)Broken-down Time.
|
||
* timelocal: (libc)Broken-down Time.
|
||
* times: (libc)Processor Time.
|
||
* tmpfile64: (libc)Temporary Files.
|
||
* tmpfile: (libc)Temporary Files.
|
||
* tmpnam: (libc)Temporary Files.
|
||
* tmpnam_r: (libc)Temporary Files.
|
||
* toascii: (libc)Case Conversion.
|
||
* tolower: (libc)Case Conversion.
|
||
* totalorder: (libc)FP Comparison Functions.
|
||
* totalorderf: (libc)FP Comparison Functions.
|
||
* totalorderfN: (libc)FP Comparison Functions.
|
||
* totalorderfNx: (libc)FP Comparison Functions.
|
||
* totalorderl: (libc)FP Comparison Functions.
|
||
* totalordermag: (libc)FP Comparison Functions.
|
||
* totalordermagf: (libc)FP Comparison Functions.
|
||
* totalordermagfN: (libc)FP Comparison Functions.
|
||
* totalordermagfNx: (libc)FP Comparison Functions.
|
||
* totalordermagl: (libc)FP Comparison Functions.
|
||
* toupper: (libc)Case Conversion.
|
||
* towctrans: (libc)Wide Character Case Conversion.
|
||
* towlower: (libc)Wide Character Case Conversion.
|
||
* towupper: (libc)Wide Character Case Conversion.
|
||
* trunc: (libc)Rounding Functions.
|
||
* truncate64: (libc)File Size.
|
||
* truncate: (libc)File Size.
|
||
* truncf: (libc)Rounding Functions.
|
||
* truncfN: (libc)Rounding Functions.
|
||
* truncfNx: (libc)Rounding Functions.
|
||
* truncl: (libc)Rounding Functions.
|
||
* tsearch: (libc)Tree Search Function.
|
||
* tss_create: (libc)ISO C Thread-local Storage.
|
||
* tss_delete: (libc)ISO C Thread-local Storage.
|
||
* tss_get: (libc)ISO C Thread-local Storage.
|
||
* tss_set: (libc)ISO C Thread-local Storage.
|
||
* ttyname: (libc)Is It a Terminal.
|
||
* ttyname_r: (libc)Is It a Terminal.
|
||
* twalk: (libc)Tree Search Function.
|
||
* tzset: (libc)Time Zone Functions.
|
||
* ufromfp: (libc)Rounding Functions.
|
||
* ufromfpf: (libc)Rounding Functions.
|
||
* ufromfpfN: (libc)Rounding Functions.
|
||
* ufromfpfNx: (libc)Rounding Functions.
|
||
* ufromfpl: (libc)Rounding Functions.
|
||
* ufromfpx: (libc)Rounding Functions.
|
||
* ufromfpxf: (libc)Rounding Functions.
|
||
* ufromfpxfN: (libc)Rounding Functions.
|
||
* ufromfpxfNx: (libc)Rounding Functions.
|
||
* ufromfpxl: (libc)Rounding Functions.
|
||
* ulimit: (libc)Limits on Resources.
|
||
* umask: (libc)Setting Permissions.
|
||
* umount2: (libc)Mount-Unmount-Remount.
|
||
* umount: (libc)Mount-Unmount-Remount.
|
||
* uname: (libc)Platform Type.
|
||
* ungetc: (libc)How Unread.
|
||
* ungetwc: (libc)How Unread.
|
||
* unlink: (libc)Deleting Files.
|
||
* unlockpt: (libc)Allocation.
|
||
* unsetenv: (libc)Environment Access.
|
||
* updwtmp: (libc)Manipulating the Database.
|
||
* utime: (libc)File Times.
|
||
* utimes: (libc)File Times.
|
||
* utmpname: (libc)Manipulating the Database.
|
||
* utmpxname: (libc)XPG Functions.
|
||
* va_arg: (libc)Argument Macros.
|
||
* va_copy: (libc)Argument Macros.
|
||
* va_end: (libc)Argument Macros.
|
||
* va_start: (libc)Argument Macros.
|
||
* valloc: (libc)Aligned Memory Blocks.
|
||
* vasprintf: (libc)Variable Arguments Output.
|
||
* verr: (libc)Error Messages.
|
||
* verrx: (libc)Error Messages.
|
||
* versionsort64: (libc)Scanning Directory Content.
|
||
* versionsort: (libc)Scanning Directory Content.
|
||
* vfork: (libc)Creating a Process.
|
||
* vfprintf: (libc)Variable Arguments Output.
|
||
* vfscanf: (libc)Variable Arguments Input.
|
||
* vfwprintf: (libc)Variable Arguments Output.
|
||
* vfwscanf: (libc)Variable Arguments Input.
|
||
* vlimit: (libc)Limits on Resources.
|
||
* vprintf: (libc)Variable Arguments Output.
|
||
* vscanf: (libc)Variable Arguments Input.
|
||
* vsnprintf: (libc)Variable Arguments Output.
|
||
* vsprintf: (libc)Variable Arguments Output.
|
||
* vsscanf: (libc)Variable Arguments Input.
|
||
* vswprintf: (libc)Variable Arguments Output.
|
||
* vswscanf: (libc)Variable Arguments Input.
|
||
* vsyslog: (libc)syslog; vsyslog.
|
||
* vtimes: (libc)Resource Usage.
|
||
* vwarn: (libc)Error Messages.
|
||
* vwarnx: (libc)Error Messages.
|
||
* vwprintf: (libc)Variable Arguments Output.
|
||
* vwscanf: (libc)Variable Arguments Input.
|
||
* wait3: (libc)BSD Wait Functions.
|
||
* wait4: (libc)Process Completion.
|
||
* wait: (libc)Process Completion.
|
||
* waitpid: (libc)Process Completion.
|
||
* warn: (libc)Error Messages.
|
||
* warnx: (libc)Error Messages.
|
||
* wcpcpy: (libc)Copying Strings and Arrays.
|
||
* wcpncpy: (libc)Truncating Strings.
|
||
* wcrtomb: (libc)Converting a Character.
|
||
* wcscasecmp: (libc)String/Array Comparison.
|
||
* wcscat: (libc)Concatenating Strings.
|
||
* wcschr: (libc)Search Functions.
|
||
* wcschrnul: (libc)Search Functions.
|
||
* wcscmp: (libc)String/Array Comparison.
|
||
* wcscoll: (libc)Collation Functions.
|
||
* wcscpy: (libc)Copying Strings and Arrays.
|
||
* wcscspn: (libc)Search Functions.
|
||
* wcsdup: (libc)Copying Strings and Arrays.
|
||
* wcsftime: (libc)Formatting Calendar Time.
|
||
* wcslen: (libc)String Length.
|
||
* wcsncasecmp: (libc)String/Array Comparison.
|
||
* wcsncat: (libc)Truncating Strings.
|
||
* wcsncmp: (libc)String/Array Comparison.
|
||
* wcsncpy: (libc)Truncating Strings.
|
||
* wcsnlen: (libc)String Length.
|
||
* wcsnrtombs: (libc)Converting Strings.
|
||
* wcspbrk: (libc)Search Functions.
|
||
* wcsrchr: (libc)Search Functions.
|
||
* wcsrtombs: (libc)Converting Strings.
|
||
* wcsspn: (libc)Search Functions.
|
||
* wcsstr: (libc)Search Functions.
|
||
* wcstod: (libc)Parsing of Floats.
|
||
* wcstof: (libc)Parsing of Floats.
|
||
* wcstofN: (libc)Parsing of Floats.
|
||
* wcstofNx: (libc)Parsing of Floats.
|
||
* wcstoimax: (libc)Parsing of Integers.
|
||
* wcstok: (libc)Finding Tokens in a String.
|
||
* wcstol: (libc)Parsing of Integers.
|
||
* wcstold: (libc)Parsing of Floats.
|
||
* wcstoll: (libc)Parsing of Integers.
|
||
* wcstombs: (libc)Non-reentrant String Conversion.
|
||
* wcstoq: (libc)Parsing of Integers.
|
||
* wcstoul: (libc)Parsing of Integers.
|
||
* wcstoull: (libc)Parsing of Integers.
|
||
* wcstoumax: (libc)Parsing of Integers.
|
||
* wcstouq: (libc)Parsing of Integers.
|
||
* wcswcs: (libc)Search Functions.
|
||
* wcsxfrm: (libc)Collation Functions.
|
||
* wctob: (libc)Converting a Character.
|
||
* wctomb: (libc)Non-reentrant Character Conversion.
|
||
* wctrans: (libc)Wide Character Case Conversion.
|
||
* wctype: (libc)Classification of Wide Characters.
|
||
* wmemchr: (libc)Search Functions.
|
||
* wmemcmp: (libc)String/Array Comparison.
|
||
* wmemcpy: (libc)Copying Strings and Arrays.
|
||
* wmemmove: (libc)Copying Strings and Arrays.
|
||
* wmempcpy: (libc)Copying Strings and Arrays.
|
||
* wmemset: (libc)Copying Strings and Arrays.
|
||
* wordexp: (libc)Calling Wordexp.
|
||
* wordfree: (libc)Calling Wordexp.
|
||
* wprintf: (libc)Formatted Output Functions.
|
||
* write: (libc)I/O Primitives.
|
||
* writev: (libc)Scatter-Gather.
|
||
* wscanf: (libc)Formatted Input Functions.
|
||
* y0: (libc)Special Functions.
|
||
* y0f: (libc)Special Functions.
|
||
* y0fN: (libc)Special Functions.
|
||
* y0fNx: (libc)Special Functions.
|
||
* y0l: (libc)Special Functions.
|
||
* y1: (libc)Special Functions.
|
||
* y1f: (libc)Special Functions.
|
||
* y1fN: (libc)Special Functions.
|
||
* y1fNx: (libc)Special Functions.
|
||
* y1l: (libc)Special Functions.
|
||
* yn: (libc)Special Functions.
|
||
* ynf: (libc)Special Functions.
|
||
* ynfN: (libc)Special Functions.
|
||
* ynfNx: (libc)Special Functions.
|
||
* ynl: (libc)Special Functions.
|
||
END-INFO-DIR-ENTRY
|
||
|
||
This file documents the GNU C Library.
|
||
|
||
This is `The GNU C Library Reference Manual', for version 2.29.
|
||
|
||
Copyright (C) 1993-2019 Free Software Foundation, Inc.
|
||
|
||
Permission is granted to copy, distribute and/or modify this document
|
||
under the terms of the GNU Free Documentation License, Version
|
||
1.3 or any later version published by the Free Software Foundation;
|
||
with the Invariant Sections being "Free Software Needs Free
|
||
Documentation" and "GNU Lesser General Public License", the Front-Cover
|
||
texts being "A GNU Manual", and with the Back-Cover Texts as in (a)
|
||
below. A copy of the license is included in the section entitled "GNU
|
||
Free Documentation License".
|
||
|
||
(a) The FSF's Back-Cover Text is: "You have the freedom to copy and
|
||
modify this GNU manual. Buying copies from the FSF supports it in
|
||
developing GNU and promoting software freedom."
|
||
|
||
|
||
File: libc.info, Node: Locked Memory Details, Next: Page Lock Functions, Prev: Why Lock Pages, Up: Locking Pages
|
||
|
||
3.5.2 Locked Memory Details
|
||
---------------------------
|
||
|
||
A memory lock is associated with a virtual page, not a real frame. The
|
||
paging rule is: If a frame backs at least one locked page, don't page it
|
||
out.
|
||
|
||
Memory locks do not stack. I.e., you can't lock a particular page
|
||
twice so that it has to be unlocked twice before it is truly unlocked.
|
||
It is either locked or it isn't.
|
||
|
||
A memory lock persists until the process that owns the memory
|
||
explicitly unlocks it. (But process termination and exec cause the
|
||
virtual memory to cease to exist, which you might say means it isn't
|
||
locked any more).
|
||
|
||
Memory locks are not inherited by child processes. (But note that
|
||
on a modern Unix system, immediately after a fork, the parent's and the
|
||
child's virtual address space are backed by the same real page frames,
|
||
so the child enjoys the parent's locks). *Note Creating a Process::.
|
||
|
||
Because of its ability to impact other processes, only the superuser
|
||
can lock a page. Any process can unlock its own page.
|
||
|
||
The system sets limits on the amount of memory a process can have
|
||
locked and the amount of real memory it can have dedicated to it.
|
||
*Note Limits on Resources::.
|
||
|
||
In Linux, locked pages aren't as locked as you might think. Two
|
||
virtual pages that are not shared memory can nonetheless be backed by
|
||
the same real frame. The kernel does this in the name of efficiency
|
||
when it knows both virtual pages contain identical data, and does it
|
||
even if one or both of the virtual pages are locked.
|
||
|
||
But when a process modifies one of those pages, the kernel must get
|
||
it a separate frame and fill it with the page's data. This is known as
|
||
a "copy-on-write page fault". It takes a small amount of time and in a
|
||
pathological case, getting that frame may require I/O.
|
||
|
||
To make sure this doesn't happen to your program, don't just lock the
|
||
pages. Write to them as well, unless you know you won't write to them
|
||
ever. And to make sure you have pre-allocated frames for your stack,
|
||
enter a scope that declares a C automatic variable larger than the
|
||
maximum stack size you will need, set it to something, then return from
|
||
its scope.
|
||
|
||
|
||
File: libc.info, Node: Page Lock Functions, Prev: Locked Memory Details, Up: Locking Pages
|
||
|
||
3.5.3 Functions To Lock And Unlock Pages
|
||
----------------------------------------
|
||
|
||
The symbols in this section are declared in `sys/mman.h'. These
|
||
functions are defined by POSIX.1b, but their availability depends on
|
||
your kernel. If your kernel doesn't allow these functions, they exist
|
||
but always fail. They _are_ available with a Linux kernel.
|
||
|
||
*Portability Note:* POSIX.1b requires that when the `mlock' and
|
||
`munlock' functions are available, the file `unistd.h' define the macro
|
||
`_POSIX_MEMLOCK_RANGE' and the file `limits.h' define the macro
|
||
`PAGESIZE' to be the size of a memory page in bytes. It requires that
|
||
when the `mlockall' and `munlockall' functions are available, the
|
||
`unistd.h' file define the macro `_POSIX_MEMLOCK'. The GNU C Library
|
||
conforms to this requirement.
|
||
|
||
-- Function: int mlock (const void *ADDR, size_t LEN)
|
||
Preliminary: | MT-Safe | AS-Safe | AC-Safe | *Note POSIX Safety
|
||
Concepts::.
|
||
|
||
`mlock' locks a range of the calling process' virtual pages.
|
||
|
||
The range of memory starts at address ADDR and is LEN bytes long.
|
||
Actually, since you must lock whole pages, it is the range of
|
||
pages that include any part of the specified range.
|
||
|
||
When the function returns successfully, each of those pages is
|
||
backed by (connected to) a real frame (is resident) and is marked
|
||
to stay that way. This means the function may cause page-ins and
|
||
have to wait for them.
|
||
|
||
When the function fails, it does not affect the lock status of any
|
||
pages.
|
||
|
||
The return value is zero if the function succeeds. Otherwise, it
|
||
is `-1' and `errno' is set accordingly. `errno' values specific
|
||
to this function are:
|
||
|
||
`ENOMEM'
|
||
* At least some of the specified address range does not
|
||
exist in the calling process' virtual address space.
|
||
|
||
* The locking would cause the process to exceed its locked
|
||
page limit.
|
||
|
||
`EPERM'
|
||
The calling process is not superuser.
|
||
|
||
`EINVAL'
|
||
LEN is not positive.
|
||
|
||
`ENOSYS'
|
||
The kernel does not provide `mlock' capability.
|
||
|
||
|
||
-- Function: int mlock2 (const void *ADDR, size_t LEN, unsigned int
|
||
FLAGS)
|
||
Preliminary: | MT-Safe | AS-Safe | AC-Safe | *Note POSIX Safety
|
||
Concepts::.
|
||
|
||
This function is similar to `mlock'. If FLAGS is zero, a call to
|
||
`mlock2' behaves exactly as the equivalent call to `mlock'.
|
||
|
||
The FLAGS argument must be a combination of zero or more of the
|
||
following flags:
|
||
|
||
`MLOCK_ONFAULT'
|
||
Only those pages in the specified address range which are
|
||
already in memory are locked immediately. Additional pages
|
||
in the range are automatically locked in case of a page fault
|
||
and allocation of memory.
|
||
|
||
Like `mlock', `mlock2' returns zero on success and `-1' on
|
||
failure, setting `errno' accordingly. Additional `errno' values
|
||
defined for `mlock2' are:
|
||
|
||
`EINVAL'
|
||
The specified (non-zero) FLAGS argument is not supported by
|
||
this system.
|
||
|
||
You can lock _all_ a process' memory with `mlockall'. You unlock
|
||
memory with `munlock' or `munlockall'.
|
||
|
||
To avoid all page faults in a C program, you have to use `mlockall',
|
||
because some of the memory a program uses is hidden from the C code,
|
||
e.g. the stack and automatic variables, and you wouldn't know what
|
||
address to tell `mlock'.
|
||
|
||
-- Function: int munlock (const void *ADDR, size_t LEN)
|
||
Preliminary: | MT-Safe | AS-Safe | AC-Safe | *Note POSIX Safety
|
||
Concepts::.
|
||
|
||
`munlock' unlocks a range of the calling process' virtual pages.
|
||
|
||
`munlock' is the inverse of `mlock' and functions completely
|
||
analogously to `mlock', except that there is no `EPERM' failure.
|
||
|
||
|
||
-- Function: int mlockall (int FLAGS)
|
||
Preliminary: | MT-Safe | AS-Safe | AC-Safe | *Note POSIX Safety
|
||
Concepts::.
|
||
|
||
`mlockall' locks all the pages in a process' virtual memory address
|
||
space, and/or any that are added to it in the future. This
|
||
includes the pages of the code, data and stack segment, as well as
|
||
shared libraries, user space kernel data, shared memory, and
|
||
memory mapped files.
|
||
|
||
FLAGS is a string of single bit flags represented by the following
|
||
macros. They tell `mlockall' which of its functions you want. All
|
||
other bits must be zero.
|
||
|
||
`MCL_CURRENT'
|
||
Lock all pages which currently exist in the calling process'
|
||
virtual address space.
|
||
|
||
`MCL_FUTURE'
|
||
Set a mode such that any pages added to the process' virtual
|
||
address space in the future will be locked from birth. This
|
||
mode does not affect future address spaces owned by the same
|
||
process so exec, which replaces a process' address space,
|
||
wipes out `MCL_FUTURE'. *Note Executing a File::.
|
||
|
||
|
||
When the function returns successfully, and you specified
|
||
`MCL_CURRENT', all of the process' pages are backed by (connected
|
||
to) real frames (they are resident) and are marked to stay that
|
||
way. This means the function may cause page-ins and have to wait
|
||
for them.
|
||
|
||
When the process is in `MCL_FUTURE' mode because it successfully
|
||
executed this function and specified `MCL_CURRENT', any system call
|
||
by the process that requires space be added to its virtual address
|
||
space fails with `errno' = `ENOMEM' if locking the additional space
|
||
would cause the process to exceed its locked page limit. In the
|
||
case that the address space addition that can't be accommodated is
|
||
stack expansion, the stack expansion fails and the kernel sends a
|
||
`SIGSEGV' signal to the process.
|
||
|
||
When the function fails, it does not affect the lock status of any
|
||
pages or the future locking mode.
|
||
|
||
The return value is zero if the function succeeds. Otherwise, it
|
||
is `-1' and `errno' is set accordingly. `errno' values specific
|
||
to this function are:
|
||
|
||
`ENOMEM'
|
||
* At least some of the specified address range does not
|
||
exist in the calling process' virtual address space.
|
||
|
||
* The locking would cause the process to exceed its locked
|
||
page limit.
|
||
|
||
`EPERM'
|
||
The calling process is not superuser.
|
||
|
||
`EINVAL'
|
||
Undefined bits in FLAGS are not zero.
|
||
|
||
`ENOSYS'
|
||
The kernel does not provide `mlockall' capability.
|
||
|
||
|
||
You can lock just specific pages with `mlock'. You unlock pages
|
||
with `munlockall' and `munlock'.
|
||
|
||
|
||
-- Function: int munlockall (void)
|
||
Preliminary: | MT-Safe | AS-Safe | AC-Safe | *Note POSIX Safety
|
||
Concepts::.
|
||
|
||
`munlockall' unlocks every page in the calling process' virtual
|
||
address space and turns off `MCL_FUTURE' future locking mode.
|
||
|
||
The return value is zero if the function succeeds. Otherwise, it
|
||
is `-1' and `errno' is set accordingly. The only way this
|
||
function can fail is for generic reasons that all functions and
|
||
system calls can fail, so there are no specific `errno' values.
|
||
|
||
|
||
|
||
File: libc.info, Node: Character Handling, Next: String and Array Utilities, Prev: Memory, Up: Top
|
||
|
||
4 Character Handling
|
||
********************
|
||
|
||
Programs that work with characters and strings often need to classify a
|
||
character--is it alphabetic, is it a digit, is it whitespace, and so
|
||
on--and perform case conversion operations on characters. The
|
||
functions in the header file `ctype.h' are provided for this purpose.
|
||
|
||
Since the choice of locale and character set can alter the
|
||
classifications of particular character codes, all of these functions
|
||
are affected by the current locale. (More precisely, they are affected
|
||
by the locale currently selected for character classification--the
|
||
`LC_CTYPE' category; see *Note Locale Categories::.)
|
||
|
||
The ISO C standard specifies two different sets of functions. The
|
||
one set works on `char' type characters, the other one on `wchar_t'
|
||
wide characters (*note Extended Char Intro::).
|
||
|
||
* Menu:
|
||
|
||
* Classification of Characters:: Testing whether characters are
|
||
letters, digits, punctuation, etc.
|
||
|
||
* Case Conversion:: Case mapping, and the like.
|
||
* Classification of Wide Characters:: Character class determination for
|
||
wide characters.
|
||
* Using Wide Char Classes:: Notes on using the wide character
|
||
classes.
|
||
* Wide Character Case Conversion:: Mapping of wide characters.
|
||
|
||
|
||
File: libc.info, Node: Classification of Characters, Next: Case Conversion, Up: Character Handling
|
||
|
||
4.1 Classification of Characters
|
||
================================
|
||
|
||
This section explains the library functions for classifying characters.
|
||
For example, `isalpha' is the function to test for an alphabetic
|
||
character. It takes one argument, the character to test, and returns a
|
||
nonzero integer if the character is alphabetic, and zero otherwise. You
|
||
would use it like this:
|
||
|
||
if (isalpha (c))
|
||
printf ("The character `%c' is alphabetic.\n", c);
|
||
|
||
Each of the functions in this section tests for membership in a
|
||
particular class of characters; each has a name starting with `is'.
|
||
Each of them takes one argument, which is a character to test, and
|
||
returns an `int' which is treated as a boolean value. The character
|
||
argument is passed as an `int', and it may be the constant value `EOF'
|
||
instead of a real character.
|
||
|
||
The attributes of any given character can vary between locales.
|
||
*Note Locales::, for more information on locales.
|
||
|
||
These functions are declared in the header file `ctype.h'.
|
||
|
||
-- Function: int islower (int C)
|
||
Preliminary: | MT-Safe | AS-Safe | AC-Safe | *Note POSIX Safety
|
||
Concepts::.
|
||
|
||
Returns true if C is a lower-case letter. The letter need not be
|
||
from the Latin alphabet, any alphabet representable is valid.
|
||
|
||
-- Function: int isupper (int C)
|
||
Preliminary: | MT-Safe | AS-Safe | AC-Safe | *Note POSIX Safety
|
||
Concepts::.
|
||
|
||
Returns true if C is an upper-case letter. The letter need not be
|
||
from the Latin alphabet, any alphabet representable is valid.
|
||
|
||
-- Function: int isalpha (int C)
|
||
Preliminary: | MT-Safe | AS-Safe | AC-Safe | *Note POSIX Safety
|
||
Concepts::.
|
||
|
||
Returns true if C is an alphabetic character (a letter). If
|
||
`islower' or `isupper' is true of a character, then `isalpha' is
|
||
also true.
|
||
|
||
In some locales, there may be additional characters for which
|
||
`isalpha' is true--letters which are neither upper case nor lower
|
||
case. But in the standard `"C"' locale, there are no such
|
||
additional characters.
|
||
|
||
-- Function: int isdigit (int C)
|
||
Preliminary: | MT-Safe | AS-Safe | AC-Safe | *Note POSIX Safety
|
||
Concepts::.
|
||
|
||
Returns true if C is a decimal digit (`0' through `9').
|
||
|
||
-- Function: int isalnum (int C)
|
||
Preliminary: | MT-Safe | AS-Safe | AC-Safe | *Note POSIX Safety
|
||
Concepts::.
|
||
|
||
Returns true if C is an alphanumeric character (a letter or
|
||
number); in other words, if either `isalpha' or `isdigit' is true
|
||
of a character, then `isalnum' is also true.
|
||
|
||
-- Function: int isxdigit (int C)
|
||
Preliminary: | MT-Safe | AS-Safe | AC-Safe | *Note POSIX Safety
|
||
Concepts::.
|
||
|
||
Returns true if C is a hexadecimal digit. Hexadecimal digits
|
||
include the normal decimal digits `0' through `9' and the letters
|
||
`A' through `F' and `a' through `f'.
|
||
|
||
-- Function: int ispunct (int C)
|
||
Preliminary: | MT-Safe | AS-Safe | AC-Safe | *Note POSIX Safety
|
||
Concepts::.
|
||
|
||
Returns true if C is a punctuation character. This means any
|
||
printing character that is not alphanumeric or a space character.
|
||
|
||
-- Function: int isspace (int C)
|
||
Preliminary: | MT-Safe | AS-Safe | AC-Safe | *Note POSIX Safety
|
||
Concepts::.
|
||
|
||
Returns true if C is a "whitespace" character. In the standard
|
||
`"C"' locale, `isspace' returns true for only the standard
|
||
whitespace characters:
|
||
|
||
`' ''
|
||
space
|
||
|
||
`'\f''
|
||
formfeed
|
||
|
||
`'\n''
|
||
newline
|
||
|
||
`'\r''
|
||
carriage return
|
||
|
||
`'\t''
|
||
horizontal tab
|
||
|
||
`'\v''
|
||
vertical tab
|
||
|
||
-- Function: int isblank (int C)
|
||
Preliminary: | MT-Safe | AS-Safe | AC-Safe | *Note POSIX Safety
|
||
Concepts::.
|
||
|
||
Returns true if C is a blank character; that is, a space or a tab.
|
||
This function was originally a GNU extension, but was added in
|
||
ISO C99.
|
||
|
||
-- Function: int isgraph (int C)
|
||
Preliminary: | MT-Safe | AS-Safe | AC-Safe | *Note POSIX Safety
|
||
Concepts::.
|
||
|
||
Returns true if C is a graphic character; that is, a character
|
||
that has a glyph associated with it. The whitespace characters
|
||
are not considered graphic.
|
||
|
||
-- Function: int isprint (int C)
|
||
Preliminary: | MT-Safe | AS-Safe | AC-Safe | *Note POSIX Safety
|
||
Concepts::.
|
||
|
||
Returns true if C is a printing character. Printing characters
|
||
include all the graphic characters, plus the space (` ') character.
|
||
|
||
-- Function: int iscntrl (int C)
|
||
Preliminary: | MT-Safe | AS-Safe | AC-Safe | *Note POSIX Safety
|
||
Concepts::.
|
||
|
||
Returns true if C is a control character (that is, a character that
|
||
is not a printing character).
|
||
|
||
-- Function: int isascii (int C)
|
||
Preliminary: | MT-Safe | AS-Safe | AC-Safe | *Note POSIX Safety
|
||
Concepts::.
|
||
|
||
Returns true if C is a 7-bit `unsigned char' value that fits into
|
||
the US/UK ASCII character set. This function is a BSD extension
|
||
and is also an SVID extension.
|
||
|
||
|
||
File: libc.info, Node: Case Conversion, Next: Classification of Wide Characters, Prev: Classification of Characters, Up: Character Handling
|
||
|
||
4.2 Case Conversion
|
||
===================
|
||
|
||
This section explains the library functions for performing conversions
|
||
such as case mappings on characters. For example, `toupper' converts
|
||
any character to upper case if possible. If the character can't be
|
||
converted, `toupper' returns it unchanged.
|
||
|
||
These functions take one argument of type `int', which is the
|
||
character to convert, and return the converted character as an `int'.
|
||
If the conversion is not applicable to the argument given, the argument
|
||
is returned unchanged.
|
||
|
||
*Compatibility Note:* In pre-ISO C dialects, instead of returning
|
||
the argument unchanged, these functions may fail when the argument is
|
||
not suitable for the conversion. Thus for portability, you may need to
|
||
write `islower(c) ? toupper(c) : c' rather than just `toupper(c)'.
|
||
|
||
These functions are declared in the header file `ctype.h'.
|
||
|
||
-- Function: int tolower (int C)
|
||
Preliminary: | MT-Safe | AS-Safe | AC-Safe | *Note POSIX Safety
|
||
Concepts::.
|
||
|
||
If C is an upper-case letter, `tolower' returns the corresponding
|
||
lower-case letter. If C is not an upper-case letter, C is
|
||
returned unchanged.
|
||
|
||
-- Function: int toupper (int C)
|
||
Preliminary: | MT-Safe | AS-Safe | AC-Safe | *Note POSIX Safety
|
||
Concepts::.
|
||
|
||
If C is a lower-case letter, `toupper' returns the corresponding
|
||
upper-case letter. Otherwise C is returned unchanged.
|
||
|
||
-- Function: int toascii (int C)
|
||
Preliminary: | MT-Safe | AS-Safe | AC-Safe | *Note POSIX Safety
|
||
Concepts::.
|
||
|
||
This function converts C to a 7-bit `unsigned char' value that
|
||
fits into the US/UK ASCII character set, by clearing the high-order
|
||
bits. This function is a BSD extension and is also an SVID
|
||
extension.
|
||
|
||
-- Function: int _tolower (int C)
|
||
Preliminary: | MT-Safe | AS-Safe | AC-Safe | *Note POSIX Safety
|
||
Concepts::.
|
||
|
||
This is identical to `tolower', and is provided for compatibility
|
||
with the SVID. *Note SVID::.
|
||
|
||
-- Function: int _toupper (int C)
|
||
Preliminary: | MT-Safe | AS-Safe | AC-Safe | *Note POSIX Safety
|
||
Concepts::.
|
||
|
||
This is identical to `toupper', and is provided for compatibility
|
||
with the SVID.
|
||
|
||
|
||
File: libc.info, Node: Classification of Wide Characters, Next: Using Wide Char Classes, Prev: Case Conversion, Up: Character Handling
|
||
|
||
4.3 Character class determination for wide characters
|
||
=====================================================
|
||
|
||
Amendment 1 to ISO C90 defines functions to classify wide characters.
|
||
Although the original ISO C90 standard already defined the type
|
||
`wchar_t', no functions operating on them were defined.
|
||
|
||
The general design of the classification functions for wide
|
||
characters is more general. It allows extensions to the set of
|
||
available classifications, beyond those which are always available.
|
||
The POSIX standard specifies how extensions can be made, and this is
|
||
already implemented in the GNU C Library implementation of the
|
||
`localedef' program.
|
||
|
||
The character class functions are normally implemented with bitsets,
|
||
with a bitset per character. For a given character, the appropriate
|
||
bitset is read from a table and a test is performed as to whether a
|
||
certain bit is set. Which bit is tested for is determined by the class.
|
||
|
||
For the wide character classification functions this is made visible.
|
||
There is a type classification type defined, a function to retrieve this
|
||
value for a given class, and a function to test whether a given
|
||
character is in this class, using the classification value. On top of
|
||
this the normal character classification functions as used for `char'
|
||
objects can be defined.
|
||
|
||
-- Data type: wctype_t
|
||
The `wctype_t' can hold a value which represents a character class.
|
||
The only defined way to generate such a value is by using the
|
||
`wctype' function.
|
||
|
||
This type is defined in `wctype.h'.
|
||
|
||
-- Function: wctype_t wctype (const char *PROPERTY)
|
||
Preliminary: | MT-Safe locale | AS-Safe | AC-Safe | *Note POSIX
|
||
Safety Concepts::.
|
||
|
||
`wctype' returns a value representing a class of wide characters
|
||
which is identified by the string PROPERTY. Besides some standard
|
||
properties each locale can define its own ones. In case no
|
||
property with the given name is known for the current locale
|
||
selected for the `LC_CTYPE' category, the function returns zero.
|
||
|
||
The properties known in every locale are:
|
||
|
||
`"alnum"' `"alpha"' `"cntrl"' `"digit"'
|
||
`"graph"' `"lower"' `"print"' `"punct"'
|
||
`"space"' `"upper"' `"xdigit"'
|
||
|
||
This function is declared in `wctype.h'.
|
||
|
||
To test the membership of a character to one of the non-standard
|
||
classes the ISO C standard defines a completely new function.
|
||
|
||
-- Function: int iswctype (wint_t WC, wctype_t DESC)
|
||
Preliminary: | MT-Safe | AS-Safe | AC-Safe | *Note POSIX Safety
|
||
Concepts::.
|
||
|
||
This function returns a nonzero value if WC is in the character
|
||
class specified by DESC. DESC must previously be returned by a
|
||
successful call to `wctype'.
|
||
|
||
This function is declared in `wctype.h'.
|
||
|
||
To make it easier to use the commonly-used classification functions,
|
||
they are defined in the C library. There is no need to use `wctype' if
|
||
the property string is one of the known character classes. In some
|
||
situations it is desirable to construct the property strings, and then
|
||
it is important that `wctype' can also handle the standard classes.
|
||
|
||
-- Function: int iswalnum (wint_t WC)
|
||
Preliminary: | MT-Safe locale | AS-Safe | AC-Safe | *Note POSIX
|
||
Safety Concepts::.
|
||
|
||
This function returns a nonzero value if WC is an alphanumeric
|
||
character (a letter or number); in other words, if either
|
||
`iswalpha' or `iswdigit' is true of a character, then `iswalnum'
|
||
is also true.
|
||
|
||
This function can be implemented using
|
||
|
||
iswctype (wc, wctype ("alnum"))
|
||
|
||
It is declared in `wctype.h'.
|
||
|
||
-- Function: int iswalpha (wint_t WC)
|
||
Preliminary: | MT-Safe locale | AS-Safe | AC-Safe | *Note POSIX
|
||
Safety Concepts::.
|
||
|
||
Returns true if WC is an alphabetic character (a letter). If
|
||
`iswlower' or `iswupper' is true of a character, then `iswalpha'
|
||
is also true.
|
||
|
||
In some locales, there may be additional characters for which
|
||
`iswalpha' is true--letters which are neither upper case nor lower
|
||
case. But in the standard `"C"' locale, there are no such
|
||
additional characters.
|
||
|
||
This function can be implemented using
|
||
|
||
iswctype (wc, wctype ("alpha"))
|
||
|
||
It is declared in `wctype.h'.
|
||
|
||
-- Function: int iswcntrl (wint_t WC)
|
||
Preliminary: | MT-Safe locale | AS-Safe | AC-Safe | *Note POSIX
|
||
Safety Concepts::.
|
||
|
||
Returns true if WC is a control character (that is, a character
|
||
that is not a printing character).
|
||
|
||
This function can be implemented using
|
||
|
||
iswctype (wc, wctype ("cntrl"))
|
||
|
||
It is declared in `wctype.h'.
|
||
|
||
-- Function: int iswdigit (wint_t WC)
|
||
Preliminary: | MT-Safe locale | AS-Safe | AC-Safe | *Note POSIX
|
||
Safety Concepts::.
|
||
|
||
Returns true if WC is a digit (e.g., `0' through `9'). Please
|
||
note that this function does not only return a nonzero value for
|
||
_decimal_ digits, but for all kinds of digits. A consequence is
|
||
that code like the following will *not* work unconditionally for
|
||
wide characters:
|
||
|
||
n = 0;
|
||
while (iswdigit (*wc))
|
||
{
|
||
n *= 10;
|
||
n += *wc++ - L'0';
|
||
}
|
||
|
||
This function can be implemented using
|
||
|
||
iswctype (wc, wctype ("digit"))
|
||
|
||
It is declared in `wctype.h'.
|
||
|
||
-- Function: int iswgraph (wint_t WC)
|
||
Preliminary: | MT-Safe locale | AS-Safe | AC-Safe | *Note POSIX
|
||
Safety Concepts::.
|
||
|
||
Returns true if WC is a graphic character; that is, a character
|
||
that has a glyph associated with it. The whitespace characters
|
||
are not considered graphic.
|
||
|
||
This function can be implemented using
|
||
|
||
iswctype (wc, wctype ("graph"))
|
||
|
||
It is declared in `wctype.h'.
|
||
|
||
-- Function: int iswlower (wint_t WC)
|
||
Preliminary: | MT-Safe locale | AS-Safe | AC-Safe | *Note POSIX
|
||
Safety Concepts::.
|
||
|
||
Returns true if WC is a lower-case letter. The letter need not be
|
||
from the Latin alphabet, any alphabet representable is valid.
|
||
|
||
This function can be implemented using
|
||
|
||
iswctype (wc, wctype ("lower"))
|
||
|
||
It is declared in `wctype.h'.
|
||
|
||
-- Function: int iswprint (wint_t WC)
|
||
Preliminary: | MT-Safe locale | AS-Safe | AC-Safe | *Note POSIX
|
||
Safety Concepts::.
|
||
|
||
Returns true if WC is a printing character. Printing characters
|
||
include all the graphic characters, plus the space (` ') character.
|
||
|
||
This function can be implemented using
|
||
|
||
iswctype (wc, wctype ("print"))
|
||
|
||
It is declared in `wctype.h'.
|
||
|
||
-- Function: int iswpunct (wint_t WC)
|
||
Preliminary: | MT-Safe locale | AS-Safe | AC-Safe | *Note POSIX
|
||
Safety Concepts::.
|
||
|
||
Returns true if WC is a punctuation character. This means any
|
||
printing character that is not alphanumeric or a space character.
|
||
|
||
This function can be implemented using
|
||
|
||
iswctype (wc, wctype ("punct"))
|
||
|
||
It is declared in `wctype.h'.
|
||
|
||
-- Function: int iswspace (wint_t WC)
|
||
Preliminary: | MT-Safe locale | AS-Safe | AC-Safe | *Note POSIX
|
||
Safety Concepts::.
|
||
|
||
Returns true if WC is a "whitespace" character. In the standard
|
||
`"C"' locale, `iswspace' returns true for only the standard
|
||
whitespace characters:
|
||
|
||
`L' ''
|
||
space
|
||
|
||
`L'\f''
|
||
formfeed
|
||
|
||
`L'\n''
|
||
newline
|
||
|
||
`L'\r''
|
||
carriage return
|
||
|
||
`L'\t''
|
||
horizontal tab
|
||
|
||
`L'\v''
|
||
vertical tab
|
||
|
||
This function can be implemented using
|
||
|
||
iswctype (wc, wctype ("space"))
|
||
|
||
It is declared in `wctype.h'.
|
||
|
||
-- Function: int iswupper (wint_t WC)
|
||
Preliminary: | MT-Safe locale | AS-Safe | AC-Safe | *Note POSIX
|
||
Safety Concepts::.
|
||
|
||
Returns true if WC is an upper-case letter. The letter need not be
|
||
from the Latin alphabet, any alphabet representable is valid.
|
||
|
||
This function can be implemented using
|
||
|
||
iswctype (wc, wctype ("upper"))
|
||
|
||
It is declared in `wctype.h'.
|
||
|
||
-- Function: int iswxdigit (wint_t WC)
|
||
Preliminary: | MT-Safe locale | AS-Safe | AC-Safe | *Note POSIX
|
||
Safety Concepts::.
|
||
|
||
Returns true if WC is a hexadecimal digit. Hexadecimal digits
|
||
include the normal decimal digits `0' through `9' and the letters
|
||
`A' through `F' and `a' through `f'.
|
||
|
||
This function can be implemented using
|
||
|
||
iswctype (wc, wctype ("xdigit"))
|
||
|
||
It is declared in `wctype.h'.
|
||
|
||
The GNU C Library also provides a function which is not defined in
|
||
the ISO C standard but which is available as a version for single byte
|
||
characters as well.
|
||
|
||
-- Function: int iswblank (wint_t WC)
|
||
Preliminary: | MT-Safe locale | AS-Safe | AC-Safe | *Note POSIX
|
||
Safety Concepts::.
|
||
|
||
Returns true if WC is a blank character; that is, a space or a tab.
|
||
This function was originally a GNU extension, but was added in
|
||
ISO C99. It is declared in `wchar.h'.
|
||
|
||
|
||
File: libc.info, Node: Using Wide Char Classes, Next: Wide Character Case Conversion, Prev: Classification of Wide Characters, Up: Character Handling
|
||
|
||
4.4 Notes on using the wide character classes
|
||
=============================================
|
||
|
||
The first note is probably not astonishing but still occasionally a
|
||
cause of problems. The `iswXXX' functions can be implemented using
|
||
macros and in fact, the GNU C Library does this. They are still
|
||
available as real functions but when the `wctype.h' header is included
|
||
the macros will be used. This is the same as the `char' type versions
|
||
of these functions.
|
||
|
||
The second note covers something new. It can be best illustrated by
|
||
a (real-world) example. The first piece of code is an excerpt from the
|
||
original code. It is truncated a bit but the intention should be clear.
|
||
|
||
int
|
||
is_in_class (int c, const char *class)
|
||
{
|
||
if (strcmp (class, "alnum") == 0)
|
||
return isalnum (c);
|
||
if (strcmp (class, "alpha") == 0)
|
||
return isalpha (c);
|
||
if (strcmp (class, "cntrl") == 0)
|
||
return iscntrl (c);
|
||
...
|
||
return 0;
|
||
}
|
||
|
||
Now, with the `wctype' and `iswctype' you can avoid the `if'
|
||
cascades, but rewriting the code as follows is wrong:
|
||
|
||
int
|
||
is_in_class (int c, const char *class)
|
||
{
|
||
wctype_t desc = wctype (class);
|
||
return desc ? iswctype ((wint_t) c, desc) : 0;
|
||
}
|
||
|
||
The problem is that it is not guaranteed that the wide character
|
||
representation of a single-byte character can be found using casting.
|
||
In fact, usually this fails miserably. The correct solution to this
|
||
problem is to write the code as follows:
|
||
|
||
int
|
||
is_in_class (int c, const char *class)
|
||
{
|
||
wctype_t desc = wctype (class);
|
||
return desc ? iswctype (btowc (c), desc) : 0;
|
||
}
|
||
|
||
*Note Converting a Character::, for more information on `btowc'.
|
||
Note that this change probably does not improve the performance of the
|
||
program a lot since the `wctype' function still has to make the string
|
||
comparisons. It gets really interesting if the `is_in_class' function
|
||
is called more than once for the same class name. In this case the
|
||
variable DESC could be computed once and reused for all the calls.
|
||
Therefore the above form of the function is probably not the final one.
|
||
|
||
|
||
File: libc.info, Node: Wide Character Case Conversion, Prev: Using Wide Char Classes, Up: Character Handling
|
||
|
||
4.5 Mapping of wide characters.
|
||
===============================
|
||
|
||
The classification functions are also generalized by the ISO C
|
||
standard. Instead of just allowing the two standard mappings, a locale
|
||
can contain others. Again, the `localedef' program already supports
|
||
generating such locale data files.
|
||
|
||
-- Data Type: wctrans_t
|
||
This data type is defined as a scalar type which can hold a value
|
||
representing the locale-dependent character mapping. There is no
|
||
way to construct such a value apart from using the return value of
|
||
the `wctrans' function.
|
||
|
||
This type is defined in `wctype.h'.
|
||
|
||
-- Function: wctrans_t wctrans (const char *PROPERTY)
|
||
Preliminary: | MT-Safe locale | AS-Safe | AC-Safe | *Note POSIX
|
||
Safety Concepts::.
|
||
|
||
The `wctrans' function has to be used to find out whether a named
|
||
mapping is defined in the current locale selected for the
|
||
`LC_CTYPE' category. If the returned value is non-zero, you can
|
||
use it afterwards in calls to `towctrans'. If the return value is
|
||
zero no such mapping is known in the current locale.
|
||
|
||
Beside locale-specific mappings there are two mappings which are
|
||
guaranteed to be available in every locale:
|
||
|
||
`"tolower"' `"toupper"'
|
||
|
||
These functions are declared in `wctype.h'.
|
||
|
||
-- Function: wint_t towctrans (wint_t WC, wctrans_t DESC)
|
||
Preliminary: | MT-Safe | AS-Safe | AC-Safe | *Note POSIX Safety
|
||
Concepts::.
|
||
|
||
`towctrans' maps the input character WC according to the rules of
|
||
the mapping for which DESC is a descriptor, and returns the value
|
||
it finds. DESC must be obtained by a successful call to `wctrans'.
|
||
|
||
This function is declared in `wctype.h'.
|
||
|
||
For the generally available mappings, the ISO C standard defines
|
||
convenient shortcuts so that it is not necessary to call `wctrans' for
|
||
them.
|
||
|
||
-- Function: wint_t towlower (wint_t WC)
|
||
Preliminary: | MT-Safe locale | AS-Safe | AC-Safe | *Note POSIX
|
||
Safety Concepts::.
|
||
|
||
If WC is an upper-case letter, `towlower' returns the corresponding
|
||
lower-case letter. If WC is not an upper-case letter, WC is
|
||
returned unchanged.
|
||
|
||
`towlower' can be implemented using
|
||
|
||
towctrans (wc, wctrans ("tolower"))
|
||
|
||
This function is declared in `wctype.h'.
|
||
|
||
-- Function: wint_t towupper (wint_t WC)
|
||
Preliminary: | MT-Safe locale | AS-Safe | AC-Safe | *Note POSIX
|
||
Safety Concepts::.
|
||
|
||
If WC is a lower-case letter, `towupper' returns the corresponding
|
||
upper-case letter. Otherwise WC is returned unchanged.
|
||
|
||
`towupper' can be implemented using
|
||
|
||
towctrans (wc, wctrans ("toupper"))
|
||
|
||
This function is declared in `wctype.h'.
|
||
|
||
The same warnings given in the last section for the use of the wide
|
||
character classification functions apply here. It is not possible to
|
||
simply cast a `char' type value to a `wint_t' and use it as an argument
|
||
to `towctrans' calls.
|
||
|
||
|
||
File: libc.info, Node: String and Array Utilities, Next: Character Set Handling, Prev: Character Handling, Up: Top
|
||
|
||
5 String and Array Utilities
|
||
****************************
|
||
|
||
Operations on strings (null-terminated byte sequences) are an important
|
||
part of many programs. The GNU C Library provides an extensive set of
|
||
string utility functions, including functions for copying,
|
||
concatenating, comparing, and searching strings. Many of these
|
||
functions can also operate on arbitrary regions of storage; for
|
||
example, the `memcpy' function can be used to copy the contents of any
|
||
kind of array.
|
||
|
||
It's fairly common for beginning C programmers to "reinvent the
|
||
wheel" by duplicating this functionality in their own code, but it pays
|
||
to become familiar with the library functions and to make use of them,
|
||
since this offers benefits in maintenance, efficiency, and portability.
|
||
|
||
For instance, you could easily compare one string to another in two
|
||
lines of C code, but if you use the built-in `strcmp' function, you're
|
||
less likely to make a mistake. And, since these library functions are
|
||
typically highly optimized, your program may run faster too.
|
||
|
||
* Menu:
|
||
|
||
* Representation of Strings:: Introduction to basic concepts.
|
||
* String/Array Conventions:: Whether to use a string function or an
|
||
arbitrary array function.
|
||
* String Length:: Determining the length of a string.
|
||
* Copying Strings and Arrays:: Functions to copy strings and arrays.
|
||
* Concatenating Strings:: Functions to concatenate strings while copying.
|
||
* Truncating Strings:: Functions to truncate strings while copying.
|
||
* String/Array Comparison:: Functions for byte-wise and character-wise
|
||
comparison.
|
||
* Collation Functions:: Functions for collating strings.
|
||
* Search Functions:: Searching for a specific element or substring.
|
||
* Finding Tokens in a String:: Splitting a string into tokens by looking
|
||
for delimiters.
|
||
* Erasing Sensitive Data:: Clearing memory which contains sensitive
|
||
data, after it's no longer needed.
|
||
* Shuffling Bytes:: Or how to flash-cook a string.
|
||
* Obfuscating Data:: Reversibly obscuring data from casual view.
|
||
* Encode Binary Data:: Encoding and Decoding of Binary Data.
|
||
* Argz and Envz Vectors:: Null-separated string vectors.
|
||
|
||
|
||
File: libc.info, Node: Representation of Strings, Next: String/Array Conventions, Up: String and Array Utilities
|
||
|
||
5.1 Representation of Strings
|
||
=============================
|
||
|
||
This section is a quick summary of string concepts for beginning C
|
||
programmers. It describes how strings are represented in C and some
|
||
common pitfalls. If you are already familiar with this material, you
|
||
can skip this section.
|
||
|
||
A "string" is a null-terminated array of bytes of type `char',
|
||
including the terminating null byte. String-valued variables are
|
||
usually declared to be pointers of type `char *'. Such variables do
|
||
not include space for the text of a string; that has to be stored
|
||
somewhere else--in an array variable, a string constant, or dynamically
|
||
allocated memory (*note Memory Allocation::). It's up to you to store
|
||
the address of the chosen memory space into the pointer variable.
|
||
Alternatively you can store a "null pointer" in the pointer variable.
|
||
The null pointer does not point anywhere, so attempting to reference
|
||
the string it points to gets an error.
|
||
|
||
A "multibyte character" is a sequence of one or more bytes that
|
||
represents a single character using the locale's encoding scheme; a
|
||
null byte always represents the null character. A "multibyte string"
|
||
is a string that consists entirely of multibyte characters. In
|
||
contrast, a "wide string" is a null-terminated sequence of `wchar_t'
|
||
objects. A wide-string variable is usually declared to be a pointer of
|
||
type `wchar_t *', by analogy with string variables and `char *'. *Note
|
||
Extended Char Intro::.
|
||
|
||
By convention, the "null byte", `'\0'', marks the end of a string
|
||
and the "null wide character", `L'\0'', marks the end of a wide string.
|
||
For example, in testing to see whether the `char *' variable P points
|
||
to a null byte marking the end of a string, you can write `!*P' or `*P
|
||
== '\0''.
|
||
|
||
A null byte is quite different conceptually from a null pointer,
|
||
although both are represented by the integer constant `0'.
|
||
|
||
A "string literal" appears in C program source as a multibyte string
|
||
between double-quote characters (`"'). If the initial double-quote
|
||
character is immediately preceded by a capital `L' (ell) character (as
|
||
in `L"foo"'), it is a wide string literal. String literals can also
|
||
contribute to "string concatenation": `"a" "b"' is the same as `"ab"'.
|
||
For wide strings one can use either `L"a" L"b"' or `L"a" "b"'.
|
||
Modification of string literals is not allowed by the GNU C compiler,
|
||
because literals are placed in read-only storage.
|
||
|
||
Arrays that are declared `const' cannot be modified either. It's
|
||
generally good style to declare non-modifiable string pointers to be of
|
||
type `const char *', since this often allows the C compiler to detect
|
||
accidental modifications as well as providing some amount of
|
||
documentation about what your program intends to do with the string.
|
||
|
||
The amount of memory allocated for a byte array may extend past the
|
||
null byte that marks the end of the string that the array contains. In
|
||
this document, the term "allocated size" is always used to refer to the
|
||
total amount of memory allocated for an array, while the term "length"
|
||
refers to the number of bytes up to (but not including) the terminating
|
||
null byte. Wide strings are similar, except their sizes and lengths
|
||
count wide characters, not bytes.
|
||
|
||
A notorious source of program bugs is trying to put more bytes into a
|
||
string than fit in its allocated size. When writing code that extends
|
||
strings or moves bytes into a pre-allocated array, you should be very
|
||
careful to keep track of the length of the text and make explicit
|
||
checks for overflowing the array. Many of the library functions _do
|
||
not_ do this for you! Remember also that you need to allocate an extra
|
||
byte to hold the null byte that marks the end of the string.
|
||
|
||
Originally strings were sequences of bytes where each byte
|
||
represented a single character. This is still true today if the
|
||
strings are encoded using a single-byte character encoding. Things are
|
||
different if the strings are encoded using a multibyte encoding (for
|
||
more information on encodings see *Note Extended Char Intro::). There
|
||
is no difference in the programming interface for these two kind of
|
||
strings; the programmer has to be aware of this and interpret the byte
|
||
sequences accordingly.
|
||
|
||
But since there is no separate interface taking care of these
|
||
differences the byte-based string functions are sometimes hard to use.
|
||
Since the count parameters of these functions specify bytes a call to
|
||
`memcpy' could cut a multibyte character in the middle and put an
|
||
incomplete (and therefore unusable) byte sequence in the target buffer.
|
||
|
||
To avoid these problems later versions of the ISO C standard
|
||
introduce a second set of functions which are operating on "wide
|
||
characters" (*note Extended Char Intro::). These functions don't have
|
||
the problems the single-byte versions have since every wide character is
|
||
a legal, interpretable value. This does not mean that cutting wide
|
||
strings at arbitrary points is without problems. It normally is for
|
||
alphabet-based languages (except for non-normalized text) but languages
|
||
based on syllables still have the problem that more than one wide
|
||
character is necessary to complete a logical unit. This is a higher
|
||
level problem which the C library functions are not designed to solve.
|
||
But it is at least good that no invalid byte sequences can be created.
|
||
Also, the higher level functions can also much more easily operate on
|
||
wide characters than on multibyte characters so that a common strategy
|
||
is to use wide characters internally whenever text is more than simply
|
||
copied.
|
||
|
||
The remaining of this chapter will discuss the functions for handling
|
||
wide strings in parallel with the discussion of strings since there is
|
||
almost always an exact equivalent available.
|
||
|
||
|
||
File: libc.info, Node: String/Array Conventions, Next: String Length, Prev: Representation of Strings, Up: String and Array Utilities
|
||
|
||
5.2 String and Array Conventions
|
||
================================
|
||
|
||
This chapter describes both functions that work on arbitrary arrays or
|
||
blocks of memory, and functions that are specific to strings and wide
|
||
strings.
|
||
|
||
Functions that operate on arbitrary blocks of memory have names
|
||
beginning with `mem' and `wmem' (such as `memcpy' and `wmemcpy') and
|
||
invariably take an argument which specifies the size (in bytes and wide
|
||
characters respectively) of the block of memory to operate on. The
|
||
array arguments and return values for these functions have type `void
|
||
*' or `wchar_t'. As a matter of style, the elements of the arrays used
|
||
with the `mem' functions are referred to as "bytes". You can pass any
|
||
kind of pointer to these functions, and the `sizeof' operator is useful
|
||
in computing the value for the size argument. Parameters to the `wmem'
|
||
functions must be of type `wchar_t *'. These functions are not really
|
||
usable with anything but arrays of this type.
|
||
|
||
In contrast, functions that operate specifically on strings and wide
|
||
strings have names beginning with `str' and `wcs' respectively (such as
|
||
`strcpy' and `wcscpy') and look for a terminating null byte or null
|
||
wide character instead of requiring an explicit size argument to be
|
||
passed. (Some of these functions accept a specified maximum length,
|
||
but they also check for premature termination.) The array arguments
|
||
and return values for these functions have type `char *' and `wchar_t
|
||
*' respectively, and the array elements are referred to as "bytes" and
|
||
"wide characters".
|
||
|
||
In many cases, there are both `mem' and `str'/`wcs' versions of a
|
||
function. The one that is more appropriate to use depends on the exact
|
||
situation. When your program is manipulating arbitrary arrays or
|
||
blocks of storage, then you should always use the `mem' functions. On
|
||
the other hand, when you are manipulating strings it is usually more
|
||
convenient to use the `str'/`wcs' functions, unless you already know
|
||
the length of the string in advance. The `wmem' functions should be
|
||
used for wide character arrays with known size.
|
||
|
||
Some of the memory and string functions take single characters as
|
||
arguments. Since a value of type `char' is automatically promoted into
|
||
a value of type `int' when used as a parameter, the functions are
|
||
declared with `int' as the type of the parameter in question. In case
|
||
of the wide character functions the situation is similar: the parameter
|
||
type for a single wide character is `wint_t' and not `wchar_t'. This
|
||
would for many implementations not be necessary since `wchar_t' is
|
||
large enough to not be automatically promoted, but since the ISO C
|
||
standard does not require such a choice of types the `wint_t' type is
|
||
used.
|
||
|
||
|
||
File: libc.info, Node: String Length, Next: Copying Strings and Arrays, Prev: String/Array Conventions, Up: String and Array Utilities
|
||
|
||
5.3 String Length
|
||
=================
|
||
|
||
You can get the length of a string using the `strlen' function. This
|
||
function is declared in the header file `string.h'.
|
||
|
||
-- Function: size_t strlen (const char *S)
|
||
Preliminary: | MT-Safe | AS-Safe | AC-Safe | *Note POSIX Safety
|
||
Concepts::.
|
||
|
||
The `strlen' function returns the length of the string S in bytes.
|
||
(In other words, it returns the offset of the terminating null
|
||
byte within the array.)
|
||
|
||
For example,
|
||
strlen ("hello, world")
|
||
=> 12
|
||
|
||
When applied to an array, the `strlen' function returns the length
|
||
of the string stored there, not its allocated size. You can get
|
||
the allocated size of the array that holds a string using the
|
||
`sizeof' operator:
|
||
|
||
char string[32] = "hello, world";
|
||
sizeof (string)
|
||
=> 32
|
||
strlen (string)
|
||
=> 12
|
||
|
||
But beware, this will not work unless STRING is the array itself,
|
||
not a pointer to it. For example:
|
||
|
||
char string[32] = "hello, world";
|
||
char *ptr = string;
|
||
sizeof (string)
|
||
=> 32
|
||
sizeof (ptr)
|
||
=> 4 /* (on a machine with 4 byte pointers) */
|
||
|
||
This is an easy mistake to make when you are working with
|
||
functions that take string arguments; those arguments are always
|
||
pointers, not arrays.
|
||
|
||
It must also be noted that for multibyte encoded strings the return
|
||
value does not have to correspond to the number of characters in
|
||
the string. To get this value the string can be converted to wide
|
||
characters and `wcslen' can be used or something like the following
|
||
code can be used:
|
||
|
||
/* The input is in `string'.
|
||
The length is expected in `n'. */
|
||
{
|
||
mbstate_t t;
|
||
char *scopy = string;
|
||
/* In initial state. */
|
||
memset (&t, '\0', sizeof (t));
|
||
/* Determine number of characters. */
|
||
n = mbsrtowcs (NULL, &scopy, strlen (scopy), &t);
|
||
}
|
||
|
||
This is cumbersome to do so if the number of characters (as
|
||
opposed to bytes) is needed often it is better to work with wide
|
||
characters.
|
||
|
||
The wide character equivalent is declared in `wchar.h'.
|
||
|
||
-- Function: size_t wcslen (const wchar_t *WS)
|
||
Preliminary: | MT-Safe | AS-Safe | AC-Safe | *Note POSIX Safety
|
||
Concepts::.
|
||
|
||
The `wcslen' function is the wide character equivalent to
|
||
`strlen'. The return value is the number of wide characters in the
|
||
wide string pointed to by WS (this is also the offset of the
|
||
terminating null wide character of WS).
|
||
|
||
Since there are no multi wide character sequences making up one
|
||
wide character the return value is not only the offset in the
|
||
array, it is also the number of wide characters.
|
||
|
||
This function was introduced in Amendment 1 to ISO C90.
|
||
|
||
-- Function: size_t strnlen (const char *S, size_t MAXLEN)
|
||
Preliminary: | MT-Safe | AS-Safe | AC-Safe | *Note POSIX Safety
|
||
Concepts::.
|
||
|
||
If the array S of size MAXLEN contains a null byte, the `strnlen'
|
||
function returns the length of the string S in bytes. Otherwise it
|
||
returns MAXLEN. Therefore this function is equivalent to `(strlen
|
||
(S) < MAXLEN ? strlen (S) : MAXLEN)' but it is more efficient and
|
||
works even if S is not null-terminated so long as MAXLEN does not
|
||
exceed the size of S's array.
|
||
|
||
char string[32] = "hello, world";
|
||
strnlen (string, 32)
|
||
=> 12
|
||
strnlen (string, 5)
|
||
=> 5
|
||
|
||
This function is a GNU extension and is declared in `string.h'.
|
||
|
||
-- Function: size_t wcsnlen (const wchar_t *WS, size_t MAXLEN)
|
||
Preliminary: | MT-Safe | AS-Safe | AC-Safe | *Note POSIX Safety
|
||
Concepts::.
|
||
|
||
`wcsnlen' is the wide character equivalent to `strnlen'. The
|
||
MAXLEN parameter specifies the maximum number of wide characters.
|
||
|
||
This function is a GNU extension and is declared in `wchar.h'.
|
||
|
||
|
||
File: libc.info, Node: Copying Strings and Arrays, Next: Concatenating Strings, Prev: String Length, Up: String and Array Utilities
|
||
|
||
5.4 Copying Strings and Arrays
|
||
==============================
|
||
|
||
You can use the functions described in this section to copy the contents
|
||
of strings, wide strings, and arrays. The `str' and `mem' functions
|
||
are declared in `string.h' while the `w' functions are declared in
|
||
`wchar.h'.
|
||
|
||
A helpful way to remember the ordering of the arguments to the
|
||
functions in this section is that it corresponds to an assignment
|
||
expression, with the destination array specified to the left of the
|
||
source array. Most of these functions return the address of the
|
||
destination array; a few return the address of the destination's
|
||
terminating null, or of just past the destination.
|
||
|
||
Most of these functions do not work properly if the source and
|
||
destination arrays overlap. For example, if the beginning of the
|
||
destination array overlaps the end of the source array, the original
|
||
contents of that part of the source array may get overwritten before it
|
||
is copied. Even worse, in the case of the string functions, the null
|
||
byte marking the end of the string may be lost, and the copy function
|
||
might get stuck in a loop trashing all the memory allocated to your
|
||
program.
|
||
|
||
All functions that have problems copying between overlapping arrays
|
||
are explicitly identified in this manual. In addition to functions in
|
||
this section, there are a few others like `sprintf' (*note Formatted
|
||
Output Functions::) and `scanf' (*note Formatted Input Functions::).
|
||
|
||
-- Function: void * memcpy (void *restrict TO, const void *restrict
|
||
FROM, size_t SIZE)
|
||
Preliminary: | MT-Safe | AS-Safe | AC-Safe | *Note POSIX Safety
|
||
Concepts::.
|
||
|
||
The `memcpy' function copies SIZE bytes from the object beginning
|
||
at FROM into the object beginning at TO. The behavior of this
|
||
function is undefined if the two arrays TO and FROM overlap; use
|
||
`memmove' instead if overlapping is possible.
|
||
|
||
The value returned by `memcpy' is the value of TO.
|
||
|
||
Here is an example of how you might use `memcpy' to copy the
|
||
contents of an array:
|
||
|
||
struct foo *oldarray, *newarray;
|
||
int arraysize;
|
||
...
|
||
memcpy (new, old, arraysize * sizeof (struct foo));
|
||
|
||
-- Function: wchar_t * wmemcpy (wchar_t *restrict WTO, const wchar_t
|
||
*restrict WFROM, size_t SIZE)
|
||
Preliminary: | MT-Safe | AS-Safe | AC-Safe | *Note POSIX Safety
|
||
Concepts::.
|
||
|
||
The `wmemcpy' function copies SIZE wide characters from the object
|
||
beginning at WFROM into the object beginning at WTO. The behavior
|
||
of this function is undefined if the two arrays WTO and WFROM
|
||
overlap; use `wmemmove' instead if overlapping is possible.
|
||
|
||
The following is a possible implementation of `wmemcpy' but there
|
||
are more optimizations possible.
|
||
|
||
wchar_t *
|
||
wmemcpy (wchar_t *restrict wto, const wchar_t *restrict wfrom,
|
||
size_t size)
|
||
{
|
||
return (wchar_t *) memcpy (wto, wfrom, size * sizeof (wchar_t));
|
||
}
|
||
|
||
The value returned by `wmemcpy' is the value of WTO.
|
||
|
||
This function was introduced in Amendment 1 to ISO C90.
|
||
|
||
-- Function: void * mempcpy (void *restrict TO, const void *restrict
|
||
FROM, size_t SIZE)
|
||
Preliminary: | MT-Safe | AS-Safe | AC-Safe | *Note POSIX Safety
|
||
Concepts::.
|
||
|
||
The `mempcpy' function is nearly identical to the `memcpy'
|
||
function. It copies SIZE bytes from the object beginning at
|
||
`from' into the object pointed to by TO. But instead of returning
|
||
the value of TO it returns a pointer to the byte following the
|
||
last written byte in the object beginning at TO. I.e., the value
|
||
is `((void *) ((char *) TO + SIZE))'.
|
||
|
||
This function is useful in situations where a number of objects
|
||
shall be copied to consecutive memory positions.
|
||
|
||
void *
|
||
combine (void *o1, size_t s1, void *o2, size_t s2)
|
||
{
|
||
void *result = malloc (s1 + s2);
|
||
if (result != NULL)
|
||
mempcpy (mempcpy (result, o1, s1), o2, s2);
|
||
return result;
|
||
}
|
||
|
||
This function is a GNU extension.
|
||
|
||
-- Function: wchar_t * wmempcpy (wchar_t *restrict WTO, const wchar_t
|
||
*restrict WFROM, size_t SIZE)
|
||
Preliminary: | MT-Safe | AS-Safe | AC-Safe | *Note POSIX Safety
|
||
Concepts::.
|
||
|
||
The `wmempcpy' function is nearly identical to the `wmemcpy'
|
||
function. It copies SIZE wide characters from the object
|
||
beginning at `wfrom' into the object pointed to by WTO. But
|
||
instead of returning the value of WTO it returns a pointer to the
|
||
wide character following the last written wide character in the
|
||
object beginning at WTO. I.e., the value is `WTO + SIZE'.
|
||
|
||
This function is useful in situations where a number of objects
|
||
shall be copied to consecutive memory positions.
|
||
|
||
The following is a possible implementation of `wmemcpy' but there
|
||
are more optimizations possible.
|
||
|
||
wchar_t *
|
||
wmempcpy (wchar_t *restrict wto, const wchar_t *restrict wfrom,
|
||
size_t size)
|
||
{
|
||
return (wchar_t *) mempcpy (wto, wfrom, size * sizeof (wchar_t));
|
||
}
|
||
|
||
This function is a GNU extension.
|
||
|
||
-- Function: void * memmove (void *TO, const void *FROM, size_t SIZE)
|
||
Preliminary: | MT-Safe | AS-Safe | AC-Safe | *Note POSIX Safety
|
||
Concepts::.
|
||
|
||
`memmove' copies the SIZE bytes at FROM into the SIZE bytes at TO,
|
||
even if those two blocks of space overlap. In the case of
|
||
overlap, `memmove' is careful to copy the original values of the
|
||
bytes in the block at FROM, including those bytes which also
|
||
belong to the block at TO.
|
||
|
||
The value returned by `memmove' is the value of TO.
|
||
|
||
-- Function: wchar_t * wmemmove (wchar_t *WTO, const wchar_t *WFROM,
|
||
size_t SIZE)
|
||
Preliminary: | MT-Safe | AS-Safe | AC-Safe | *Note POSIX Safety
|
||
Concepts::.
|
||
|
||
`wmemmove' copies the SIZE wide characters at WFROM into the SIZE
|
||
wide characters at WTO, even if those two blocks of space overlap.
|
||
In the case of overlap, `wmemmove' is careful to copy the
|
||
original values of the wide characters in the block at WFROM,
|
||
including those wide characters which also belong to the block at
|
||
WTO.
|
||
|
||
The following is a possible implementation of `wmemcpy' but there
|
||
are more optimizations possible.
|
||
|
||
wchar_t *
|
||
wmempcpy (wchar_t *restrict wto, const wchar_t *restrict wfrom,
|
||
size_t size)
|
||
{
|
||
return (wchar_t *) mempcpy (wto, wfrom, size * sizeof (wchar_t));
|
||
}
|
||
|
||
The value returned by `wmemmove' is the value of WTO.
|
||
|
||
This function is a GNU extension.
|
||
|
||
-- Function: void * memccpy (void *restrict TO, const void *restrict
|
||
FROM, int C, size_t SIZE)
|
||
Preliminary: | MT-Safe | AS-Safe | AC-Safe | *Note POSIX Safety
|
||
Concepts::.
|
||
|
||
This function copies no more than SIZE bytes from FROM to TO,
|
||
stopping if a byte matching C is found. The return value is a
|
||
pointer into TO one byte past where C was copied, or a null
|
||
pointer if no byte matching C appeared in the first SIZE bytes of
|
||
FROM.
|
||
|
||
-- Function: void * memset (void *BLOCK, int C, size_t SIZE)
|
||
Preliminary: | MT-Safe | AS-Safe | AC-Safe | *Note POSIX Safety
|
||
Concepts::.
|
||
|
||
This function copies the value of C (converted to an `unsigned
|
||
char') into each of the first SIZE bytes of the object beginning
|
||
at BLOCK. It returns the value of BLOCK.
|
||
|
||
-- Function: wchar_t * wmemset (wchar_t *BLOCK, wchar_t WC, size_t
|
||
SIZE)
|
||
Preliminary: | MT-Safe | AS-Safe | AC-Safe | *Note POSIX Safety
|
||
Concepts::.
|
||
|
||
This function copies the value of WC into each of the first SIZE
|
||
wide characters of the object beginning at BLOCK. It returns the
|
||
value of BLOCK.
|
||
|
||
-- Function: char * strcpy (char *restrict TO, const char *restrict
|
||
FROM)
|
||
Preliminary: | MT-Safe | AS-Safe | AC-Safe | *Note POSIX Safety
|
||
Concepts::.
|
||
|
||
This copies bytes from the string FROM (up to and including the
|
||
terminating null byte) into the string TO. Like `memcpy', this
|
||
function has undefined results if the strings overlap. The return
|
||
value is the value of TO.
|
||
|
||
-- Function: wchar_t * wcscpy (wchar_t *restrict WTO, const wchar_t
|
||
*restrict WFROM)
|
||
Preliminary: | MT-Safe | AS-Safe | AC-Safe | *Note POSIX Safety
|
||
Concepts::.
|
||
|
||
This copies wide characters from the wide string WFROM (up to and
|
||
including the terminating null wide character) into the string
|
||
WTO. Like `wmemcpy', this function has undefined results if the
|
||
strings overlap. The return value is the value of WTO.
|
||
|
||
-- Function: char * strdup (const char *S)
|
||
Preliminary: | MT-Safe | AS-Unsafe heap | AC-Unsafe mem | *Note
|
||
POSIX Safety Concepts::.
|
||
|
||
This function copies the string S into a newly allocated string.
|
||
The string is allocated using `malloc'; see *Note Unconstrained
|
||
Allocation::. If `malloc' cannot allocate space for the new
|
||
string, `strdup' returns a null pointer. Otherwise it returns a
|
||
pointer to the new string.
|
||
|
||
-- Function: wchar_t * wcsdup (const wchar_t *WS)
|
||
Preliminary: | MT-Safe | AS-Unsafe heap | AC-Unsafe mem | *Note
|
||
POSIX Safety Concepts::.
|
||
|
||
This function copies the wide string WS into a newly allocated
|
||
string. The string is allocated using `malloc'; see *Note
|
||
Unconstrained Allocation::. If `malloc' cannot allocate space for
|
||
the new string, `wcsdup' returns a null pointer. Otherwise it
|
||
returns a pointer to the new wide string.
|
||
|
||
This function is a GNU extension.
|
||
|
||
-- Function: char * stpcpy (char *restrict TO, const char *restrict
|
||
FROM)
|
||
Preliminary: | MT-Safe | AS-Safe | AC-Safe | *Note POSIX Safety
|
||
Concepts::.
|
||
|
||
This function is like `strcpy', except that it returns a pointer to
|
||
the end of the string TO (that is, the address of the terminating
|
||
null byte `to + strlen (from)') rather than the beginning.
|
||
|
||
For example, this program uses `stpcpy' to concatenate `foo' and
|
||
`bar' to produce `foobar', which it then prints.
|
||
|
||
|
||
#include <string.h>
|
||
#include <stdio.h>
|
||
|
||
int
|
||
main (void)
|
||
{
|
||
char buffer[10];
|
||
char *to = buffer;
|
||
to = stpcpy (to, "foo");
|
||
to = stpcpy (to, "bar");
|
||
puts (buffer);
|
||
return 0;
|
||
}
|
||
|
||
This function is part of POSIX.1-2008 and later editions, but was
|
||
available in the GNU C Library and other systems as an extension
|
||
long before it was standardized.
|
||
|
||
Its behavior is undefined if the strings overlap. The function is
|
||
declared in `string.h'.
|
||
|
||
-- Function: wchar_t * wcpcpy (wchar_t *restrict WTO, const wchar_t
|
||
*restrict WFROM)
|
||
Preliminary: | MT-Safe | AS-Safe | AC-Safe | *Note POSIX Safety
|
||
Concepts::.
|
||
|
||
This function is like `wcscpy', except that it returns a pointer to
|
||
the end of the string WTO (that is, the address of the terminating
|
||
null wide character `wto + wcslen (wfrom)') rather than the
|
||
beginning.
|
||
|
||
This function is not part of ISO or POSIX but was found useful
|
||
while developing the GNU C Library itself.
|
||
|
||
The behavior of `wcpcpy' is undefined if the strings overlap.
|
||
|
||
`wcpcpy' is a GNU extension and is declared in `wchar.h'.
|
||
|
||
-- Macro: char * strdupa (const char *S)
|
||
Preliminary: | MT-Safe | AS-Safe | AC-Safe | *Note POSIX Safety
|
||
Concepts::.
|
||
|
||
This macro is similar to `strdup' but allocates the new string
|
||
using `alloca' instead of `malloc' (*note Variable Size
|
||
Automatic::). This means of course the returned string has the
|
||
same limitations as any block of memory allocated using `alloca'.
|
||
|
||
For obvious reasons `strdupa' is implemented only as a macro; you
|
||
cannot get the address of this function. Despite this limitation
|
||
it is a useful function. The following code shows a situation
|
||
where using `malloc' would be a lot more expensive.
|
||
|
||
|
||
#include <paths.h>
|
||
#include <string.h>
|
||
#include <stdio.h>
|
||
|
||
const char path[] = _PATH_STDPATH;
|
||
|
||
int
|
||
main (void)
|
||
{
|
||
char *wr_path = strdupa (path);
|
||
char *cp = strtok (wr_path, ":");
|
||
|
||
while (cp != NULL)
|
||
{
|
||
puts (cp);
|
||
cp = strtok (NULL, ":");
|
||
}
|
||
return 0;
|
||
}
|
||
|
||
Please note that calling `strtok' using PATH directly is invalid.
|
||
It is also not allowed to call `strdupa' in the argument list of
|
||
`strtok' since `strdupa' uses `alloca' (*note Variable Size
|
||
Automatic::) can interfere with the parameter passing.
|
||
|
||
This function is only available if GNU CC is used.
|
||
|
||
-- Function: void bcopy (const void *FROM, void *TO, size_t SIZE)
|
||
Preliminary: | MT-Safe | AS-Safe | AC-Safe | *Note POSIX Safety
|
||
Concepts::.
|
||
|
||
This is a partially obsolete alternative for `memmove', derived
|
||
from BSD. Note that it is not quite equivalent to `memmove',
|
||
because the arguments are not in the same order and there is no
|
||
return value.
|
||
|
||
-- Function: void bzero (void *BLOCK, size_t SIZE)
|
||
Preliminary: | MT-Safe | AS-Safe | AC-Safe | *Note POSIX Safety
|
||
Concepts::.
|
||
|
||
This is a partially obsolete alternative for `memset', derived from
|
||
BSD. Note that it is not as general as `memset', because the only
|
||
value it can store is zero.
|
||
|
||
|
||
File: libc.info, Node: Concatenating Strings, Next: Truncating Strings, Prev: Copying Strings and Arrays, Up: String and Array Utilities
|
||
|
||
5.5 Concatenating Strings
|
||
=========================
|
||
|
||
The functions described in this section concatenate the contents of a
|
||
string or wide string to another. They follow the string-copying
|
||
functions in their conventions. *Note Copying Strings and Arrays::.
|
||
`strcat' is declared in the header file `string.h' while `wcscat' is
|
||
declared in `wchar.h'.
|
||
|
||
-- Function: char * strcat (char *restrict TO, const char *restrict
|
||
FROM)
|
||
Preliminary: | MT-Safe | AS-Safe | AC-Safe | *Note POSIX Safety
|
||
Concepts::.
|
||
|
||
The `strcat' function is similar to `strcpy', except that the
|
||
bytes from FROM are concatenated or appended to the end of TO,
|
||
instead of overwriting it. That is, the first byte from FROM
|
||
overwrites the null byte marking the end of TO.
|
||
|
||
An equivalent definition for `strcat' would be:
|
||
|
||
char *
|
||
strcat (char *restrict to, const char *restrict from)
|
||
{
|
||
strcpy (to + strlen (to), from);
|
||
return to;
|
||
}
|
||
|
||
This function has undefined results if the strings overlap.
|
||
|
||
As noted below, this function has significant performance issues.
|
||
|
||
-- Function: wchar_t * wcscat (wchar_t *restrict WTO, const wchar_t
|
||
*restrict WFROM)
|
||
Preliminary: | MT-Safe | AS-Safe | AC-Safe | *Note POSIX Safety
|
||
Concepts::.
|
||
|
||
The `wcscat' function is similar to `wcscpy', except that the wide
|
||
characters from WFROM are concatenated or appended to the end of
|
||
WTO, instead of overwriting it. That is, the first wide character
|
||
from WFROM overwrites the null wide character marking the end of
|
||
WTO.
|
||
|
||
An equivalent definition for `wcscat' would be:
|
||
|
||
wchar_t *
|
||
wcscat (wchar_t *wto, const wchar_t *wfrom)
|
||
{
|
||
wcscpy (wto + wcslen (wto), wfrom);
|
||
return wto;
|
||
}
|
||
|
||
This function has undefined results if the strings overlap.
|
||
|
||
As noted below, this function has significant performance issues.
|
||
|
||
Programmers using the `strcat' or `wcscat' function (or the
|
||
`strncat' or `wcsncat' functions defined in a later section, for that
|
||
matter) can easily be recognized as lazy and reckless. In almost all
|
||
situations the lengths of the participating strings are known (it
|
||
better should be since how can one otherwise ensure the allocated size
|
||
of the buffer is sufficient?) Or at least, one could know them if one
|
||
keeps track of the results of the various function calls. But then it
|
||
is very inefficient to use `strcat'/`wcscat'. A lot of time is wasted
|
||
finding the end of the destination string so that the actual copying
|
||
can start. This is a common example:
|
||
|
||
/* This function concatenates arbitrarily many strings. The last
|
||
parameter must be `NULL'. */
|
||
char *
|
||
concat (const char *str, ...)
|
||
{
|
||
va_list ap, ap2;
|
||
size_t total = 1;
|
||
const char *s;
|
||
char *result;
|
||
|
||
va_start (ap, str);
|
||
va_copy (ap2, ap);
|
||
|
||
/* Determine how much space we need. */
|
||
for (s = str; s != NULL; s = va_arg (ap, const char *))
|
||
total += strlen (s);
|
||
|
||
va_end (ap);
|
||
|
||
result = (char *) malloc (total);
|
||
if (result != NULL)
|
||
{
|
||
result[0] = '\0';
|
||
|
||
/* Copy the strings. */
|
||
for (s = str; s != NULL; s = va_arg (ap2, const char *))
|
||
strcat (result, s);
|
||
}
|
||
|
||
va_end (ap2);
|
||
|
||
return result;
|
||
}
|
||
|
||
This looks quite simple, especially the second loop where the strings
|
||
are actually copied. But these innocent lines hide a major performance
|
||
penalty. Just imagine that ten strings of 100 bytes each have to be
|
||
concatenated. For the second string we search the already stored 100
|
||
bytes for the end of the string so that we can append the next string.
|
||
For all strings in total the comparisons necessary to find the end of
|
||
the intermediate results sums up to 5500! If we combine the copying
|
||
with the search for the allocation we can write this function more
|
||
efficiently:
|
||
|
||
char *
|
||
concat (const char *str, ...)
|
||
{
|
||
va_list ap;
|
||
size_t allocated = 100;
|
||
char *result = (char *) malloc (allocated);
|
||
|
||
if (result != NULL)
|
||
{
|
||
char *newp;
|
||
char *wp;
|
||
const char *s;
|
||
|
||
va_start (ap, str);
|
||
|
||
wp = result;
|
||
for (s = str; s != NULL; s = va_arg (ap, const char *))
|
||
{
|
||
size_t len = strlen (s);
|
||
|
||
/* Resize the allocated memory if necessary. */
|
||
if (wp + len + 1 > result + allocated)
|
||
{
|
||
allocated = (allocated + len) * 2;
|
||
newp = (char *) realloc (result, allocated);
|
||
if (newp == NULL)
|
||
{
|
||
free (result);
|
||
return NULL;
|
||
}
|
||
wp = newp + (wp - result);
|
||
result = newp;
|
||
}
|
||
|
||
wp = mempcpy (wp, s, len);
|
||
}
|
||
|
||
/* Terminate the result string. */
|
||
*wp++ = '\0';
|
||
|
||
/* Resize memory to the optimal size. */
|
||
newp = realloc (result, wp - result);
|
||
if (newp != NULL)
|
||
result = newp;
|
||
|
||
va_end (ap);
|
||
}
|
||
|
||
return result;
|
||
}
|
||
|
||
With a bit more knowledge about the input strings one could fine-tune
|
||
the memory allocation. The difference we are pointing to here is that
|
||
we don't use `strcat' anymore. We always keep track of the length of
|
||
the current intermediate result so we can save ourselves the search for
|
||
the end of the string and use `mempcpy'. Please note that we also
|
||
don't use `stpcpy' which might seem more natural since we are handling
|
||
strings. But this is not necessary since we already know the length of
|
||
the string and therefore can use the faster memory copying function.
|
||
The example would work for wide characters the same way.
|
||
|
||
Whenever a programmer feels the need to use `strcat' she or he
|
||
should think twice and look through the program to see whether the code
|
||
cannot be rewritten to take advantage of already calculated results.
|
||
Again: it is almost always unnecessary to use `strcat'.
|
||
|
||
|
||
File: libc.info, Node: Truncating Strings, Next: String/Array Comparison, Prev: Concatenating Strings, Up: String and Array Utilities
|
||
|
||
5.6 Truncating Strings while Copying
|
||
====================================
|
||
|
||
The functions described in this section copy or concatenate the
|
||
possibly-truncated contents of a string or array to another, and
|
||
similarly for wide strings. They follow the string-copying functions
|
||
in their header conventions. *Note Copying Strings and Arrays::. The
|
||
`str' functions are declared in the header file `string.h' and the `wc'
|
||
functions are declared in the file `wchar.h'.
|
||
|
||
-- Function: char * strncpy (char *restrict TO, const char *restrict
|
||
FROM, size_t SIZE)
|
||
Preliminary: | MT-Safe | AS-Safe | AC-Safe | *Note POSIX Safety
|
||
Concepts::.
|
||
|
||
This function is similar to `strcpy' but always copies exactly
|
||
SIZE bytes into TO.
|
||
|
||
If FROM does not contain a null byte in its first SIZE bytes,
|
||
`strncpy' copies just the first SIZE bytes. In this case no null
|
||
terminator is written into TO.
|
||
|
||
Otherwise FROM must be a string with length less than SIZE. In
|
||
this case `strncpy' copies all of FROM, followed by enough null
|
||
bytes to add up to SIZE bytes in all.
|
||
|
||
The behavior of `strncpy' is undefined if the strings overlap.
|
||
|
||
This function was designed for now-rarely-used arrays consisting of
|
||
non-null bytes followed by zero or more null bytes. It needs to
|
||
set all SIZE bytes of the destination, even when SIZE is much
|
||
greater than the length of FROM. As noted below, this function is
|
||
generally a poor choice for processing text.
|
||
|
||
-- Function: wchar_t * wcsncpy (wchar_t *restrict WTO, const wchar_t
|
||
*restrict WFROM, size_t SIZE)
|
||
Preliminary: | MT-Safe | AS-Safe | AC-Safe | *Note POSIX Safety
|
||
Concepts::.
|
||
|
||
This function is similar to `wcscpy' but always copies exactly
|
||
SIZE wide characters into WTO.
|
||
|
||
If WFROM does not contain a null wide character in its first SIZE
|
||
wide characters, then `wcsncpy' copies just the first SIZE wide
|
||
characters. In this case no null terminator is written into WTO.
|
||
|
||
Otherwise WFROM must be a wide string with length less than SIZE.
|
||
In this case `wcsncpy' copies all of WFROM, followed by enough
|
||
null wide characters to add up to SIZE wide characters in all.
|
||
|
||
The behavior of `wcsncpy' is undefined if the strings overlap.
|
||
|
||
This function is the wide-character counterpart of `strncpy' and
|
||
suffers from most of the problems that `strncpy' does. For
|
||
example, as noted below, this function is generally a poor choice
|
||
for processing text.
|
||
|
||
-- Function: char * strndup (const char *S, size_t SIZE)
|
||
Preliminary: | MT-Safe | AS-Unsafe heap | AC-Unsafe mem | *Note
|
||
POSIX Safety Concepts::.
|
||
|
||
This function is similar to `strdup' but always copies at most
|
||
SIZE bytes into the newly allocated string.
|
||
|
||
If the length of S is more than SIZE, then `strndup' copies just
|
||
the first SIZE bytes and adds a closing null byte. Otherwise all
|
||
bytes are copied and the string is terminated.
|
||
|
||
This function differs from `strncpy' in that it always terminates
|
||
the destination string.
|
||
|
||
As noted below, this function is generally a poor choice for
|
||
processing text.
|
||
|
||
`strndup' is a GNU extension.
|
||
|
||
-- Macro: char * strndupa (const char *S, size_t SIZE)
|
||
Preliminary: | MT-Safe | AS-Safe | AC-Safe | *Note POSIX Safety
|
||
Concepts::.
|
||
|
||
This function is similar to `strndup' but like `strdupa' it
|
||
allocates the new string using `alloca' *note Variable Size
|
||
Automatic::. The same advantages and limitations of `strdupa' are
|
||
valid for `strndupa', too.
|
||
|
||
This function is implemented only as a macro, just like `strdupa'.
|
||
Just as `strdupa' this macro also must not be used inside the
|
||
parameter list in a function call.
|
||
|
||
As noted below, this function is generally a poor choice for
|
||
processing text.
|
||
|
||
`strndupa' is only available if GNU CC is used.
|
||
|
||
-- Function: char * stpncpy (char *restrict TO, const char *restrict
|
||
FROM, size_t SIZE)
|
||
Preliminary: | MT-Safe | AS-Safe | AC-Safe | *Note POSIX Safety
|
||
Concepts::.
|
||
|
||
This function is similar to `stpcpy' but copies always exactly
|
||
SIZE bytes into TO.
|
||
|
||
If the length of FROM is more than SIZE, then `stpncpy' copies
|
||
just the first SIZE bytes and returns a pointer to the byte
|
||
directly following the one which was copied last. Note that in
|
||
this case there is no null terminator written into TO.
|
||
|
||
If the length of FROM is less than SIZE, then `stpncpy' copies all
|
||
of FROM, followed by enough null bytes to add up to SIZE bytes in
|
||
all. This behavior is rarely useful, but it is implemented to be
|
||
useful in contexts where this behavior of the `strncpy' is used.
|
||
`stpncpy' returns a pointer to the _first_ written null byte.
|
||
|
||
This function is not part of ISO or POSIX but was found useful
|
||
while developing the GNU C Library itself.
|
||
|
||
Its behavior is undefined if the strings overlap. The function is
|
||
declared in `string.h'.
|
||
|
||
As noted below, this function is generally a poor choice for
|
||
processing text.
|
||
|
||
-- Function: wchar_t * wcpncpy (wchar_t *restrict WTO, const wchar_t
|
||
*restrict WFROM, size_t SIZE)
|
||
Preliminary: | MT-Safe | AS-Safe | AC-Safe | *Note POSIX Safety
|
||
Concepts::.
|
||
|
||
This function is similar to `wcpcpy' but copies always exactly
|
||
WSIZE wide characters into WTO.
|
||
|
||
If the length of WFROM is more than SIZE, then `wcpncpy' copies
|
||
just the first SIZE wide characters and returns a pointer to the
|
||
wide character directly following the last non-null wide character
|
||
which was copied last. Note that in this case there is no null
|
||
terminator written into WTO.
|
||
|
||
If the length of WFROM is less than SIZE, then `wcpncpy' copies
|
||
all of WFROM, followed by enough null wide characters to add up to
|
||
SIZE wide characters in all. This behavior is rarely useful, but
|
||
it is implemented to be useful in contexts where this behavior of
|
||
the `wcsncpy' is used. `wcpncpy' returns a pointer to the _first_
|
||
written null wide character.
|
||
|
||
This function is not part of ISO or POSIX but was found useful
|
||
while developing the GNU C Library itself.
|
||
|
||
Its behavior is undefined if the strings overlap.
|
||
|
||
As noted below, this function is generally a poor choice for
|
||
processing text.
|
||
|
||
`wcpncpy' is a GNU extension.
|
||
|
||
-- Function: char * strncat (char *restrict TO, const char *restrict
|
||
FROM, size_t SIZE)
|
||
Preliminary: | MT-Safe | AS-Safe | AC-Safe | *Note POSIX Safety
|
||
Concepts::.
|
||
|
||
This function is like `strcat' except that not more than SIZE
|
||
bytes from FROM are appended to the end of TO, and FROM need not
|
||
be null-terminated. A single null byte is also always appended to
|
||
TO, so the total allocated size of TO must be at least `SIZE + 1'
|
||
bytes longer than its initial length.
|
||
|
||
The `strncat' function could be implemented like this:
|
||
|
||
char *
|
||
strncat (char *to, const char *from, size_t size)
|
||
{
|
||
size_t len = strlen (to);
|
||
memcpy (to + len, from, strnlen (from, size));
|
||
to[len + strnlen (from, size)] = '\0';
|
||
return to;
|
||
}
|
||
|
||
The behavior of `strncat' is undefined if the strings overlap.
|
||
|
||
As a companion to `strncpy', `strncat' was designed for
|
||
now-rarely-used arrays consisting of non-null bytes followed by
|
||
zero or more null bytes. As noted below, this function is
|
||
generally a poor choice for processing text. Also, this function
|
||
has significant performance issues. *Note Concatenating Strings::.
|
||
|
||
-- Function: wchar_t * wcsncat (wchar_t *restrict WTO, const wchar_t
|
||
*restrict WFROM, size_t SIZE)
|
||
Preliminary: | MT-Safe | AS-Safe | AC-Safe | *Note POSIX Safety
|
||
Concepts::.
|
||
|
||
This function is like `wcscat' except that not more than SIZE wide
|
||
characters from FROM are appended to the end of TO, and FROM need
|
||
not be null-terminated. A single null wide character is also
|
||
always appended to TO, so the total allocated size of TO must be
|
||
at least `wcsnlen (WFROM, SIZE) + 1' wide characters longer than
|
||
its initial length.
|
||
|
||
The `wcsncat' function could be implemented like this:
|
||
|
||
wchar_t *
|
||
wcsncat (wchar_t *restrict wto, const wchar_t *restrict wfrom,
|
||
size_t size)
|
||
{
|
||
size_t len = wcslen (wto);
|
||
memcpy (wto + len, wfrom, wcsnlen (wfrom, size) * sizeof (wchar_t));
|
||
wto[len + wcsnlen (wfrom, size)] = L'\0';
|
||
return wto;
|
||
}
|
||
|
||
The behavior of `wcsncat' is undefined if the strings overlap.
|
||
|
||
As noted below, this function is generally a poor choice for
|
||
processing text. Also, this function has significant performance
|
||
issues. *Note Concatenating Strings::.
|
||
|
||
Because these functions can abruptly truncate strings or wide
|
||
strings, they are generally poor choices for processing text. When
|
||
coping or concatening multibyte strings, they can truncate within a
|
||
multibyte character so that the result is not a valid multibyte string.
|
||
When combining or concatenating multibyte or wide strings, they may
|
||
truncate the output after a combining character, resulting in a
|
||
corrupted grapheme. They can cause bugs even when processing
|
||
single-byte strings: for example, when calculating an ASCII-only user
|
||
name, a truncated name can identify the wrong user.
|
||
|
||
Although some buffer overruns can be prevented by manually replacing
|
||
calls to copying functions with calls to truncation functions, there
|
||
are often easier and safer automatic techniques that cause buffer
|
||
overruns to reliably terminate a program, such as GCC's
|
||
`-fcheck-pointer-bounds' and `-fsanitize=address' options. *Note
|
||
Options for Debugging Your Program or GCC: (gcc)Debugging Options.
|
||
Because truncation functions can mask application bugs that would
|
||
otherwise be caught by the automatic techniques, these functions should
|
||
be used only when the application's underlying logic requires
|
||
truncation.
|
||
|
||
*Note_* GNU programs should not truncate strings or wide strings to
|
||
fit arbitrary size limits. *Note Writing Robust Programs:
|
||
(standards)Semantics. Instead of string-truncation functions, it is
|
||
usually better to use dynamic memory allocation (*note Unconstrained
|
||
Allocation::) and functions such as `strdup' or `asprintf' to construct
|
||
strings.
|
||
|
||
|
||
File: libc.info, Node: String/Array Comparison, Next: Collation Functions, Prev: Truncating Strings, Up: String and Array Utilities
|
||
|
||
5.7 String/Array Comparison
|
||
===========================
|
||
|
||
You can use the functions in this section to perform comparisons on the
|
||
contents of strings and arrays. As well as checking for equality, these
|
||
functions can also be used as the ordering functions for sorting
|
||
operations. *Note Searching and Sorting::, for an example of this.
|
||
|
||
Unlike most comparison operations in C, the string comparison
|
||
functions return a nonzero value if the strings are _not_ equivalent
|
||
rather than if they are. The sign of the value indicates the relative
|
||
ordering of the first part of the strings that are not equivalent: a
|
||
negative value indicates that the first string is "less" than the
|
||
second, while a positive value indicates that the first string is
|
||
"greater".
|
||
|
||
The most common use of these functions is to check only for equality.
|
||
This is canonically done with an expression like `! strcmp (s1, s2)'.
|
||
|
||
All of these functions are declared in the header file `string.h'.
|
||
|
||
-- Function: int memcmp (const void *A1, const void *A2, size_t SIZE)
|
||
Preliminary: | MT-Safe | AS-Safe | AC-Safe | *Note POSIX Safety
|
||
Concepts::.
|
||
|
||
The function `memcmp' compares the SIZE bytes of memory beginning
|
||
at A1 against the SIZE bytes of memory beginning at A2. The value
|
||
returned has the same sign as the difference between the first
|
||
differing pair of bytes (interpreted as `unsigned char' objects,
|
||
then promoted to `int').
|
||
|
||
If the contents of the two blocks are equal, `memcmp' returns `0'.
|
||
|
||
-- Function: int wmemcmp (const wchar_t *A1, const wchar_t *A2, size_t
|
||
SIZE)
|
||
Preliminary: | MT-Safe | AS-Safe | AC-Safe | *Note POSIX Safety
|
||
Concepts::.
|
||
|
||
The function `wmemcmp' compares the SIZE wide characters beginning
|
||
at A1 against the SIZE wide characters beginning at A2. The value
|
||
returned is smaller than or larger than zero depending on whether
|
||
the first differing wide character is A1 is smaller or larger than
|
||
the corresponding wide character in A2.
|
||
|
||
If the contents of the two blocks are equal, `wmemcmp' returns `0'.
|
||
|
||
On arbitrary arrays, the `memcmp' function is mostly useful for
|
||
testing equality. It usually isn't meaningful to do byte-wise ordering
|
||
comparisons on arrays of things other than bytes. For example, a
|
||
byte-wise comparison on the bytes that make up floating-point numbers
|
||
isn't likely to tell you anything about the relationship between the
|
||
values of the floating-point numbers.
|
||
|
||
`wmemcmp' is really only useful to compare arrays of type `wchar_t'
|
||
since the function looks at `sizeof (wchar_t)' bytes at a time and this
|
||
number of bytes is system dependent.
|
||
|
||
You should also be careful about using `memcmp' to compare objects
|
||
that can contain "holes", such as the padding inserted into structure
|
||
objects to enforce alignment requirements, extra space at the end of
|
||
unions, and extra bytes at the ends of strings whose length is less
|
||
than their allocated size. The contents of these "holes" are
|
||
indeterminate and may cause strange behavior when performing byte-wise
|
||
comparisons. For more predictable results, perform an explicit
|
||
component-wise comparison.
|
||
|
||
For example, given a structure type definition like:
|
||
|
||
struct foo
|
||
{
|
||
unsigned char tag;
|
||
union
|
||
{
|
||
double f;
|
||
long i;
|
||
char *p;
|
||
} value;
|
||
};
|
||
|
||
you are better off writing a specialized comparison function to compare
|
||
`struct foo' objects instead of comparing them with `memcmp'.
|
||
|
||
-- Function: int strcmp (const char *S1, const char *S2)
|
||
Preliminary: | MT-Safe | AS-Safe | AC-Safe | *Note POSIX Safety
|
||
Concepts::.
|
||
|
||
The `strcmp' function compares the string S1 against S2, returning
|
||
a value that has the same sign as the difference between the first
|
||
differing pair of bytes (interpreted as `unsigned char' objects,
|
||
then promoted to `int').
|
||
|
||
If the two strings are equal, `strcmp' returns `0'.
|
||
|
||
A consequence of the ordering used by `strcmp' is that if S1 is an
|
||
initial substring of S2, then S1 is considered to be "less than"
|
||
S2.
|
||
|
||
`strcmp' does not take sorting conventions of the language the
|
||
strings are written in into account. To get that one has to use
|
||
`strcoll'.
|
||
|
||
-- Function: int wcscmp (const wchar_t *WS1, const wchar_t *WS2)
|
||
Preliminary: | MT-Safe | AS-Safe | AC-Safe | *Note POSIX Safety
|
||
Concepts::.
|
||
|
||
The `wcscmp' function compares the wide string WS1 against WS2.
|
||
The value returned is smaller than or larger than zero depending
|
||
on whether the first differing wide character is WS1 is smaller or
|
||
larger than the corresponding wide character in WS2.
|
||
|
||
If the two strings are equal, `wcscmp' returns `0'.
|
||
|
||
A consequence of the ordering used by `wcscmp' is that if WS1 is
|
||
an initial substring of WS2, then WS1 is considered to be "less
|
||
than" WS2.
|
||
|
||
`wcscmp' does not take sorting conventions of the language the
|
||
strings are written in into account. To get that one has to use
|
||
`wcscoll'.
|
||
|
||
-- Function: int strcasecmp (const char *S1, const char *S2)
|
||
Preliminary: | MT-Safe locale | AS-Safe | AC-Safe | *Note POSIX
|
||
Safety Concepts::.
|
||
|
||
This function is like `strcmp', except that differences in case are
|
||
ignored, and its arguments must be multibyte strings. How
|
||
uppercase and lowercase characters are related is determined by
|
||
the currently selected locale. In the standard `"C"' locale the
|
||
characters A" and a" do not match but in a locale which regards
|
||
these characters as parts of the alphabet they do match.
|
||
|
||
`strcasecmp' is derived from BSD.
|
||
|
||
-- Function: int wcscasecmp (const wchar_t *WS1, const wchar_t *WS2)
|
||
Preliminary: | MT-Safe locale | AS-Safe | AC-Safe | *Note POSIX
|
||
Safety Concepts::.
|
||
|
||
This function is like `wcscmp', except that differences in case are
|
||
ignored. How uppercase and lowercase characters are related is
|
||
determined by the currently selected locale. In the standard `"C"'
|
||
locale the characters A" and a" do not match but in a locale which
|
||
regards these characters as parts of the alphabet they do match.
|
||
|
||
`wcscasecmp' is a GNU extension.
|
||
|
||
-- Function: int strncmp (const char *S1, const char *S2, size_t SIZE)
|
||
Preliminary: | MT-Safe | AS-Safe | AC-Safe | *Note POSIX Safety
|
||
Concepts::.
|
||
|
||
This function is the similar to `strcmp', except that no more than
|
||
SIZE bytes are compared. In other words, if the two strings are
|
||
the same in their first SIZE bytes, the return value is zero.
|
||
|
||
-- Function: int wcsncmp (const wchar_t *WS1, const wchar_t *WS2,
|
||
size_t SIZE)
|
||
Preliminary: | MT-Safe | AS-Safe | AC-Safe | *Note POSIX Safety
|
||
Concepts::.
|
||
|
||
This function is similar to `wcscmp', except that no more than
|
||
SIZE wide characters are compared. In other words, if the two
|
||
strings are the same in their first SIZE wide characters, the
|
||
return value is zero.
|
||
|
||
-- Function: int strncasecmp (const char *S1, const char *S2, size_t N)
|
||
Preliminary: | MT-Safe locale | AS-Safe | AC-Safe | *Note POSIX
|
||
Safety Concepts::.
|
||
|
||
This function is like `strncmp', except that differences in case
|
||
are ignored, and the compared parts of the arguments should
|
||
consist of valid multibyte characters. Like `strcasecmp', it is
|
||
locale dependent how uppercase and lowercase characters are
|
||
related.
|
||
|
||
`strncasecmp' is a GNU extension.
|
||
|
||
-- Function: int wcsncasecmp (const wchar_t *WS1, const wchar_t *S2,
|
||
size_t N)
|
||
Preliminary: | MT-Safe locale | AS-Safe | AC-Safe | *Note POSIX
|
||
Safety Concepts::.
|
||
|
||
This function is like `wcsncmp', except that differences in case
|
||
are ignored. Like `wcscasecmp', it is locale dependent how
|
||
uppercase and lowercase characters are related.
|
||
|
||
`wcsncasecmp' is a GNU extension.
|
||
|
||
Here are some examples showing the use of `strcmp' and `strncmp'
|
||
(equivalent examples can be constructed for the wide character
|
||
functions). These examples assume the use of the ASCII character set.
|
||
(If some other character set--say, EBCDIC--is used instead, then the
|
||
glyphs are associated with different numeric codes, and the return
|
||
values and ordering may differ.)
|
||
|
||
strcmp ("hello", "hello")
|
||
=> 0 /* These two strings are the same. */
|
||
strcmp ("hello", "Hello")
|
||
=> 32 /* Comparisons are case-sensitive. */
|
||
strcmp ("hello", "world")
|
||
=> -15 /* The byte `'h'' comes before `'w''. */
|
||
strcmp ("hello", "hello, world")
|
||
=> -44 /* Comparing a null byte against a comma. */
|
||
strncmp ("hello", "hello, world", 5)
|
||
=> 0 /* The initial 5 bytes are the same. */
|
||
strncmp ("hello, world", "hello, stupid world!!!", 5)
|
||
=> 0 /* The initial 5 bytes are the same. */
|
||
|
||
-- Function: int strverscmp (const char *S1, const char *S2)
|
||
Preliminary: | MT-Safe locale | AS-Safe | AC-Safe | *Note POSIX
|
||
Safety Concepts::.
|
||
|
||
The `strverscmp' function compares the string S1 against S2,
|
||
considering them as holding indices/version numbers. The return
|
||
value follows the same conventions as found in the `strcmp'
|
||
function. In fact, if S1 and S2 contain no digits, `strverscmp'
|
||
behaves like `strcmp' (in the sense that the sign of the result is
|
||
the same).
|
||
|
||
The comparison algorithm which the `strverscmp' function implements
|
||
differs slightly from other version-comparison algorithms. The
|
||
implementation is based on a finite-state machine, whose behavior
|
||
is approximated below.
|
||
|
||
* The input strings are each split into sequences of non-digits
|
||
and digits. These sequences can be empty at the beginning
|
||
and end of the string. Digits are determined by the
|
||
`isdigit' function and are thus subject to the current locale.
|
||
|
||
* Comparison starts with a (possibly empty) non-digit sequence.
|
||
The first non-equal sequences of non-digits or digits
|
||
determines the outcome of the comparison.
|
||
|
||
* Corresponding non-digit sequences in both strings are compared
|
||
lexicographically if their lengths are equal. If the lengths
|
||
differ, the shorter non-digit sequence is extended with the
|
||
input string character immediately following it (which may be
|
||
the null terminator), the other sequence is truncated to be
|
||
of the same (extended) length, and these two sequences are
|
||
compared lexicographically. In the last case, the sequence
|
||
comparison determines the result of the function because the
|
||
extension character (or some character before it) is
|
||
necessarily different from the character at the same offset
|
||
in the other input string.
|
||
|
||
* For two sequences of digits, the number of leading zeros is
|
||
counted (which can be zero). If the count differs, the
|
||
string with more leading zeros in the digit sequence is
|
||
considered smaller than the other string.
|
||
|
||
* If the two sequences of digits have no leading zeros, they
|
||
are compared as integers, that is, the string with the longer
|
||
digit sequence is deemed larger, and if both sequences are of
|
||
equal length, they are compared lexicographically.
|
||
|
||
* If both digit sequences start with a zero and have an equal
|
||
number of leading zeros, they are compared lexicographically
|
||
if their lengths are the same. If the lengths differ, the
|
||
shorter sequence is extended with the following character in
|
||
its input string, and the other sequence is truncated to the
|
||
same length, and both sequences are compared
|
||
lexicographically (similar to the non-digit sequence case
|
||
above).
|
||
|
||
The treatment of leading zeros and the tie-breaking extension
|
||
characters (which in effect propagate across non-digit/digit
|
||
sequence boundaries) differs from other version-comparison
|
||
algorithms.
|
||
|
||
strverscmp ("no digit", "no digit")
|
||
=> 0 /* same behavior as strcmp. */
|
||
strverscmp ("item#99", "item#100")
|
||
=> <0 /* same prefix, but 99 < 100. */
|
||
strverscmp ("alpha1", "alpha001")
|
||
=> >0 /* different number of leading zeros (0 and 2). */
|
||
strverscmp ("part1_f012", "part1_f01")
|
||
=> >0 /* lexicographical comparison with leading zeros. */
|
||
strverscmp ("foo.009", "foo.0")
|
||
=> <0 /* different number of leading zeros (2 and 1). */
|
||
|
||
`strverscmp' is a GNU extension.
|
||
|
||
-- Function: int bcmp (const void *A1, const void *A2, size_t SIZE)
|
||
Preliminary: | MT-Safe | AS-Safe | AC-Safe | *Note POSIX Safety
|
||
Concepts::.
|
||
|
||
This is an obsolete alias for `memcmp', derived from BSD.
|
||
|
||
|
||
File: libc.info, Node: Collation Functions, Next: Search Functions, Prev: String/Array Comparison, Up: String and Array Utilities
|
||
|
||
5.8 Collation Functions
|
||
=======================
|
||
|
||
In some locales, the conventions for lexicographic ordering differ from
|
||
the strict numeric ordering of character codes. For example, in Spanish
|
||
most glyphs with diacritical marks such as accents are not considered
|
||
distinct letters for the purposes of collation. On the other hand, the
|
||
two-character sequence `ll' is treated as a single letter that is
|
||
collated immediately after `l'.
|
||
|
||
You can use the functions `strcoll' and `strxfrm' (declared in the
|
||
headers file `string.h') and `wcscoll' and `wcsxfrm' (declared in the
|
||
headers file `wchar') to compare strings using a collation ordering
|
||
appropriate for the current locale. The locale used by these functions
|
||
in particular can be specified by setting the locale for the
|
||
`LC_COLLATE' category; see *Note Locales::.
|
||
|
||
In the standard C locale, the collation sequence for `strcoll' is
|
||
the same as that for `strcmp'. Similarly, `wcscoll' and `wcscmp' are
|
||
the same in this situation.
|
||
|
||
Effectively, the way these functions work is by applying a mapping to
|
||
transform the characters in a multibyte string to a byte sequence that
|
||
represents the string's position in the collating sequence of the
|
||
current locale. Comparing two such byte sequences in a simple fashion
|
||
is equivalent to comparing the strings with the locale's collating
|
||
sequence.
|
||
|
||
The functions `strcoll' and `wcscoll' perform this translation
|
||
implicitly, in order to do one comparison. By contrast, `strxfrm' and
|
||
`wcsxfrm' perform the mapping explicitly. If you are making multiple
|
||
comparisons using the same string or set of strings, it is likely to be
|
||
more efficient to use `strxfrm' or `wcsxfrm' to transform all the
|
||
strings just once, and subsequently compare the transformed strings
|
||
with `strcmp' or `wcscmp'.
|
||
|
||
-- Function: int strcoll (const char *S1, const char *S2)
|
||
Preliminary: | MT-Safe locale | AS-Unsafe heap | AC-Unsafe mem |
|
||
*Note POSIX Safety Concepts::.
|
||
|
||
The `strcoll' function is similar to `strcmp' but uses the
|
||
collating sequence of the current locale for collation (the
|
||
`LC_COLLATE' locale). The arguments are multibyte strings.
|
||
|
||
-- Function: int wcscoll (const wchar_t *WS1, const wchar_t *WS2)
|
||
Preliminary: | MT-Safe locale | AS-Unsafe heap | AC-Unsafe mem |
|
||
*Note POSIX Safety Concepts::.
|
||
|
||
The `wcscoll' function is similar to `wcscmp' but uses the
|
||
collating sequence of the current locale for collation (the
|
||
`LC_COLLATE' locale).
|
||
|
||
Here is an example of sorting an array of strings, using `strcoll'
|
||
to compare them. The actual sort algorithm is not written here; it
|
||
comes from `qsort' (*note Array Sort Function::). The job of the code
|
||
shown here is to say how to compare the strings while sorting them.
|
||
(Later on in this section, we will show a way to do this more
|
||
efficiently using `strxfrm'.)
|
||
|
||
/* This is the comparison function used with `qsort'. */
|
||
|
||
int
|
||
compare_elements (const void *v1, const void *v2)
|
||
{
|
||
char * const *p1 = v1;
|
||
char * const *p2 = v2;
|
||
|
||
return strcoll (*p1, *p2);
|
||
}
|
||
|
||
/* This is the entry point--the function to sort
|
||
strings using the locale's collating sequence. */
|
||
|
||
void
|
||
sort_strings (char **array, int nstrings)
|
||
{
|
||
/* Sort `temp_array' by comparing the strings. */
|
||
qsort (array, nstrings,
|
||
sizeof (char *), compare_elements);
|
||
}
|
||
|
||
-- Function: size_t strxfrm (char *restrict TO, const char *restrict
|
||
FROM, size_t SIZE)
|
||
Preliminary: | MT-Safe locale | AS-Unsafe heap | AC-Unsafe mem |
|
||
*Note POSIX Safety Concepts::.
|
||
|
||
The function `strxfrm' transforms the multibyte string FROM using
|
||
the collation transformation determined by the locale currently
|
||
selected for collation, and stores the transformed string in the
|
||
array TO. Up to SIZE bytes (including a terminating null byte) are
|
||
stored.
|
||
|
||
The behavior is undefined if the strings TO and FROM overlap; see
|
||
*Note Copying Strings and Arrays::.
|
||
|
||
The return value is the length of the entire transformed string.
|
||
This value is not affected by the value of SIZE, but if it is
|
||
greater or equal than SIZE, it means that the transformed string
|
||
did not entirely fit in the array TO. In this case, only as much
|
||
of the string as actually fits was stored. To get the whole
|
||
transformed string, call `strxfrm' again with a bigger output
|
||
array.
|
||
|
||
The transformed string may be longer than the original string, and
|
||
it may also be shorter.
|
||
|
||
If SIZE is zero, no bytes are stored in TO. In this case,
|
||
`strxfrm' simply returns the number of bytes that would be the
|
||
length of the transformed string. This is useful for determining
|
||
what size the allocated array should be. It does not matter what
|
||
TO is if SIZE is zero; TO may even be a null pointer.
|
||
|
||
-- Function: size_t wcsxfrm (wchar_t *restrict WTO, const wchar_t
|
||
*WFROM, size_t SIZE)
|
||
Preliminary: | MT-Safe locale | AS-Unsafe heap | AC-Unsafe mem |
|
||
*Note POSIX Safety Concepts::.
|
||
|
||
The function `wcsxfrm' transforms wide string WFROM using the
|
||
collation transformation determined by the locale currently
|
||
selected for collation, and stores the transformed string in the
|
||
array WTO. Up to SIZE wide characters (including a terminating
|
||
null wide character) are stored.
|
||
|
||
The behavior is undefined if the strings WTO and WFROM overlap;
|
||
see *Note Copying Strings and Arrays::.
|
||
|
||
The return value is the length of the entire transformed wide
|
||
string. This value is not affected by the value of SIZE, but if
|
||
it is greater or equal than SIZE, it means that the transformed
|
||
wide string did not entirely fit in the array WTO. In this case,
|
||
only as much of the wide string as actually fits was stored. To
|
||
get the whole transformed wide string, call `wcsxfrm' again with a
|
||
bigger output array.
|
||
|
||
The transformed wide string may be longer than the original wide
|
||
string, and it may also be shorter.
|
||
|
||
If SIZE is zero, no wide characters are stored in TO. In this
|
||
case, `wcsxfrm' simply returns the number of wide characters that
|
||
would be the length of the transformed wide string. This is
|
||
useful for determining what size the allocated array should be
|
||
(remember to multiply with `sizeof (wchar_t)'). It does not
|
||
matter what WTO is if SIZE is zero; WTO may even be a null pointer.
|
||
|
||
Here is an example of how you can use `strxfrm' when you plan to do
|
||
many comparisons. It does the same thing as the previous example, but
|
||
much faster, because it has to transform each string only once, no
|
||
matter how many times it is compared with other strings. Even the time
|
||
needed to allocate and free storage is much less than the time we save,
|
||
when there are many strings.
|
||
|
||
struct sorter { char *input; char *transformed; };
|
||
|
||
/* This is the comparison function used with `qsort'
|
||
to sort an array of `struct sorter'. */
|
||
|
||
int
|
||
compare_elements (const void *v1, const void *v2)
|
||
{
|
||
const struct sorter *p1 = v1;
|
||
const struct sorter *p2 = v2;
|
||
|
||
return strcmp (p1->transformed, p2->transformed);
|
||
}
|
||
|
||
/* This is the entry point--the function to sort
|
||
strings using the locale's collating sequence. */
|
||
|
||
void
|
||
sort_strings_fast (char **array, int nstrings)
|
||
{
|
||
struct sorter temp_array[nstrings];
|
||
int i;
|
||
|
||
/* Set up `temp_array'. Each element contains
|
||
one input string and its transformed string. */
|
||
for (i = 0; i < nstrings; i++)
|
||
{
|
||
size_t length = strlen (array[i]) * 2;
|
||
char *transformed;
|
||
size_t transformed_length;
|
||
|
||
temp_array[i].input = array[i];
|
||
|
||
/* First try a buffer perhaps big enough. */
|
||
transformed = (char *) xmalloc (length);
|
||
|
||
/* Transform `array[i]'. */
|
||
transformed_length = strxfrm (transformed, array[i], length);
|
||
|
||
/* If the buffer was not large enough, resize it
|
||
and try again. */
|
||
if (transformed_length >= length)
|
||
{
|
||
/* Allocate the needed space. +1 for terminating
|
||
`'\0'' byte. */
|
||
transformed = (char *) xrealloc (transformed,
|
||
transformed_length + 1);
|
||
|
||
/* The return value is not interesting because we know
|
||
how long the transformed string is. */
|
||
(void) strxfrm (transformed, array[i],
|
||
transformed_length + 1);
|
||
}
|
||
|
||
temp_array[i].transformed = transformed;
|
||
}
|
||
|
||
/* Sort `temp_array' by comparing transformed strings. */
|
||
qsort (temp_array, nstrings,
|
||
sizeof (struct sorter), compare_elements);
|
||
|
||
/* Put the elements back in the permanent array
|
||
in their sorted order. */
|
||
for (i = 0; i < nstrings; i++)
|
||
array[i] = temp_array[i].input;
|
||
|
||
/* Free the strings we allocated. */
|
||
for (i = 0; i < nstrings; i++)
|
||
free (temp_array[i].transformed);
|
||
}
|
||
|
||
The interesting part of this code for the wide character version
|
||
would look like this:
|
||
|
||
void
|
||
sort_strings_fast (wchar_t **array, int nstrings)
|
||
{
|
||
...
|
||
/* Transform `array[i]'. */
|
||
transformed_length = wcsxfrm (transformed, array[i], length);
|
||
|
||
/* If the buffer was not large enough, resize it
|
||
and try again. */
|
||
if (transformed_length >= length)
|
||
{
|
||
/* Allocate the needed space. +1 for terminating
|
||
`L'\0'' wide character. */
|
||
transformed = (wchar_t *) xrealloc (transformed,
|
||
(transformed_length + 1)
|
||
* sizeof (wchar_t));
|
||
|
||
/* The return value is not interesting because we know
|
||
how long the transformed string is. */
|
||
(void) wcsxfrm (transformed, array[i],
|
||
transformed_length + 1);
|
||
}
|
||
...
|
||
|
||
Note the additional multiplication with `sizeof (wchar_t)' in the
|
||
`realloc' call.
|
||
|
||
*Compatibility Note:* The string collation functions are a new
|
||
feature of ISO C90. Older C dialects have no equivalent feature. The
|
||
wide character versions were introduced in Amendment 1 to ISO C90.
|
||
|
||
|
||
File: libc.info, Node: Search Functions, Next: Finding Tokens in a String, Prev: Collation Functions, Up: String and Array Utilities
|
||
|
||
5.9 Search Functions
|
||
====================
|
||
|
||
This section describes library functions which perform various kinds of
|
||
searching operations on strings and arrays. These functions are
|
||
declared in the header file `string.h'.
|
||
|
||
-- Function: void * memchr (const void *BLOCK, int C, size_t SIZE)
|
||
Preliminary: | MT-Safe | AS-Safe | AC-Safe | *Note POSIX Safety
|
||
Concepts::.
|
||
|
||
This function finds the first occurrence of the byte C (converted
|
||
to an `unsigned char') in the initial SIZE bytes of the object
|
||
beginning at BLOCK. The return value is a pointer to the located
|
||
byte, or a null pointer if no match was found.
|
||
|
||
-- Function: wchar_t * wmemchr (const wchar_t *BLOCK, wchar_t WC,
|
||
size_t SIZE)
|
||
Preliminary: | MT-Safe | AS-Safe | AC-Safe | *Note POSIX Safety
|
||
Concepts::.
|
||
|
||
This function finds the first occurrence of the wide character WC
|
||
in the initial SIZE wide characters of the object beginning at
|
||
BLOCK. The return value is a pointer to the located wide
|
||
character, or a null pointer if no match was found.
|
||
|
||
-- Function: void * rawmemchr (const void *BLOCK, int C)
|
||
Preliminary: | MT-Safe | AS-Safe | AC-Safe | *Note POSIX Safety
|
||
Concepts::.
|
||
|
||
Often the `memchr' function is used with the knowledge that the
|
||
byte C is available in the memory block specified by the
|
||
parameters. But this means that the SIZE parameter is not really
|
||
needed and that the tests performed with it at runtime (to check
|
||
whether the end of the block is reached) are not needed.
|
||
|
||
The `rawmemchr' function exists for just this situation which is
|
||
surprisingly frequent. The interface is similar to `memchr' except
|
||
that the SIZE parameter is missing. The function will look beyond
|
||
the end of the block pointed to by BLOCK in case the programmer
|
||
made an error in assuming that the byte C is present in the block.
|
||
In this case the result is unspecified. Otherwise the return
|
||
value is a pointer to the located byte.
|
||
|
||
This function is of special interest when looking for the end of a
|
||
string. Since all strings are terminated by a null byte a call
|
||
like
|
||
|
||
rawmemchr (str, '\0')
|
||
|
||
will never go beyond the end of the string.
|
||
|
||
This function is a GNU extension.
|
||
|
||
-- Function: void * memrchr (const void *BLOCK, int C, size_t SIZE)
|
||
Preliminary: | MT-Safe | AS-Safe | AC-Safe | *Note POSIX Safety
|
||
Concepts::.
|
||
|
||
The function `memrchr' is like `memchr', except that it searches
|
||
backwards from the end of the block defined by BLOCK and SIZE
|
||
(instead of forwards from the front).
|
||
|
||
This function is a GNU extension.
|
||
|
||
-- Function: char * strchr (const char *STRING, int C)
|
||
Preliminary: | MT-Safe | AS-Safe | AC-Safe | *Note POSIX Safety
|
||
Concepts::.
|
||
|
||
The `strchr' function finds the first occurrence of the byte C
|
||
(converted to a `char') in the string beginning at STRING. The
|
||
return value is a pointer to the located byte, or a null pointer
|
||
if no match was found.
|
||
|
||
For example,
|
||
strchr ("hello, world", 'l')
|
||
=> "llo, world"
|
||
strchr ("hello, world", '?')
|
||
=> NULL
|
||
|
||
The terminating null byte is considered to be part of the string,
|
||
so you can use this function get a pointer to the end of a string
|
||
by specifying zero as the value of the C argument.
|
||
|
||
When `strchr' returns a null pointer, it does not let you know the
|
||
position of the terminating null byte it has found. If you need
|
||
that information, it is better (but less portable) to use
|
||
`strchrnul' than to search for it a second time.
|
||
|
||
-- Function: wchar_t * wcschr (const wchar_t *WSTRING, int WC)
|
||
Preliminary: | MT-Safe | AS-Safe | AC-Safe | *Note POSIX Safety
|
||
Concepts::.
|
||
|
||
The `wcschr' function finds the first occurrence of the wide
|
||
character WC in the wide string beginning at WSTRING. The return
|
||
value is a pointer to the located wide character, or a null
|
||
pointer if no match was found.
|
||
|
||
The terminating null wide character is considered to be part of
|
||
the wide string, so you can use this function get a pointer to the
|
||
end of a wide string by specifying a null wide character as the
|
||
value of the WC argument. It would be better (but less portable)
|
||
to use `wcschrnul' in this case, though.
|
||
|
||
-- Function: char * strchrnul (const char *STRING, int C)
|
||
Preliminary: | MT-Safe | AS-Safe | AC-Safe | *Note POSIX Safety
|
||
Concepts::.
|
||
|
||
`strchrnul' is the same as `strchr' except that if it does not
|
||
find the byte, it returns a pointer to string's terminating null
|
||
byte rather than a null pointer.
|
||
|
||
This function is a GNU extension.
|
||
|
||
-- Function: wchar_t * wcschrnul (const wchar_t *WSTRING, wchar_t WC)
|
||
Preliminary: | MT-Safe | AS-Safe | AC-Safe | *Note POSIX Safety
|
||
Concepts::.
|
||
|
||
`wcschrnul' is the same as `wcschr' except that if it does not
|
||
find the wide character, it returns a pointer to the wide string's
|
||
terminating null wide character rather than a null pointer.
|
||
|
||
This function is a GNU extension.
|
||
|
||
One useful, but unusual, use of the `strchr' function is when one
|
||
wants to have a pointer pointing to the null byte terminating a string.
|
||
This is often written in this way:
|
||
|
||
s += strlen (s);
|
||
|
||
This is almost optimal but the addition operation duplicated a bit of
|
||
the work already done in the `strlen' function. A better solution is
|
||
this:
|
||
|
||
s = strchr (s, '\0');
|
||
|
||
There is no restriction on the second parameter of `strchr' so it
|
||
could very well also be zero. Those readers thinking very hard about
|
||
this might now point out that the `strchr' function is more expensive
|
||
than the `strlen' function since we have two abort criteria. This is
|
||
right. But in the GNU C Library the implementation of `strchr' is
|
||
optimized in a special way so that `strchr' actually is faster.
|
||
|
||
-- Function: char * strrchr (const char *STRING, int C)
|
||
Preliminary: | MT-Safe | AS-Safe | AC-Safe | *Note POSIX Safety
|
||
Concepts::.
|
||
|
||
The function `strrchr' is like `strchr', except that it searches
|
||
backwards from the end of the string STRING (instead of forwards
|
||
from the front).
|
||
|
||
For example,
|
||
strrchr ("hello, world", 'l')
|
||
=> "ld"
|
||
|
||
-- Function: wchar_t * wcsrchr (const wchar_t *WSTRING, wchar_t C)
|
||
Preliminary: | MT-Safe | AS-Safe | AC-Safe | *Note POSIX Safety
|
||
Concepts::.
|
||
|
||
The function `wcsrchr' is like `wcschr', except that it searches
|
||
backwards from the end of the string WSTRING (instead of forwards
|
||
from the front).
|
||
|
||
-- Function: char * strstr (const char *HAYSTACK, const char *NEEDLE)
|
||
Preliminary: | MT-Safe | AS-Safe | AC-Safe | *Note POSIX Safety
|
||
Concepts::.
|
||
|
||
This is like `strchr', except that it searches HAYSTACK for a
|
||
substring NEEDLE rather than just a single byte. It returns a
|
||
pointer into the string HAYSTACK that is the first byte of the
|
||
substring, or a null pointer if no match was found. If NEEDLE is
|
||
an empty string, the function returns HAYSTACK.
|
||
|
||
For example,
|
||
strstr ("hello, world", "l")
|
||
=> "llo, world"
|
||
strstr ("hello, world", "wo")
|
||
=> "world"
|
||
|
||
-- Function: wchar_t * wcsstr (const wchar_t *HAYSTACK, const wchar_t
|
||
*NEEDLE)
|
||
Preliminary: | MT-Safe | AS-Safe | AC-Safe | *Note POSIX Safety
|
||
Concepts::.
|
||
|
||
This is like `wcschr', except that it searches HAYSTACK for a
|
||
substring NEEDLE rather than just a single wide character. It
|
||
returns a pointer into the string HAYSTACK that is the first wide
|
||
character of the substring, or a null pointer if no match was
|
||
found. If NEEDLE is an empty string, the function returns
|
||
HAYSTACK.
|
||
|
||
-- Function: wchar_t * wcswcs (const wchar_t *HAYSTACK, const wchar_t
|
||
*NEEDLE)
|
||
Preliminary: | MT-Safe | AS-Safe | AC-Safe | *Note POSIX Safety
|
||
Concepts::.
|
||
|
||
`wcswcs' is a deprecated alias for `wcsstr'. This is the name
|
||
originally used in the X/Open Portability Guide before the
|
||
Amendment 1 to ISO C90 was published.
|
||
|
||
-- Function: char * strcasestr (const char *HAYSTACK, const char
|
||
*NEEDLE)
|
||
Preliminary: | MT-Safe locale | AS-Safe | AC-Safe | *Note POSIX
|
||
Safety Concepts::.
|
||
|
||
This is like `strstr', except that it ignores case in searching for
|
||
the substring. Like `strcasecmp', it is locale dependent how
|
||
uppercase and lowercase characters are related, and arguments are
|
||
multibyte strings.
|
||
|
||
For example,
|
||
strcasestr ("hello, world", "L")
|
||
=> "llo, world"
|
||
strcasestr ("hello, World", "wo")
|
||
=> "World"
|
||
|
||
-- Function: void * memmem (const void *HAYSTACK, size_t HAYSTACK-LEN,
|
||
const void *NEEDLE, size_t NEEDLE-LEN)
|
||
Preliminary: | MT-Safe | AS-Safe | AC-Safe | *Note POSIX Safety
|
||
Concepts::.
|
||
|
||
This is like `strstr', but NEEDLE and HAYSTACK are byte arrays
|
||
rather than strings. NEEDLE-LEN is the length of NEEDLE and
|
||
HAYSTACK-LEN is the length of HAYSTACK.
|
||
|
||
This function is a GNU extension.
|
||
|
||
-- Function: size_t strspn (const char *STRING, const char *SKIPSET)
|
||
Preliminary: | MT-Safe | AS-Safe | AC-Safe | *Note POSIX Safety
|
||
Concepts::.
|
||
|
||
The `strspn' ("string span") function returns the length of the
|
||
initial substring of STRING that consists entirely of bytes that
|
||
are members of the set specified by the string SKIPSET. The order
|
||
of the bytes in SKIPSET is not important.
|
||
|
||
For example,
|
||
strspn ("hello, world", "abcdefghijklmnopqrstuvwxyz")
|
||
=> 5
|
||
|
||
In a multibyte string, characters consisting of more than one byte
|
||
are not treated as single entities. Each byte is treated
|
||
separately. The function is not locale-dependent.
|
||
|
||
-- Function: size_t wcsspn (const wchar_t *WSTRING, const wchar_t
|
||
*SKIPSET)
|
||
Preliminary: | MT-Safe | AS-Safe | AC-Safe | *Note POSIX Safety
|
||
Concepts::.
|
||
|
||
The `wcsspn' ("wide character string span") function returns the
|
||
length of the initial substring of WSTRING that consists entirely
|
||
of wide characters that are members of the set specified by the
|
||
string SKIPSET. The order of the wide characters in SKIPSET is not
|
||
important.
|
||
|
||
-- Function: size_t strcspn (const char *STRING, const char *STOPSET)
|
||
Preliminary: | MT-Safe | AS-Safe | AC-Safe | *Note POSIX Safety
|
||
Concepts::.
|
||
|
||
The `strcspn' ("string complement span") function returns the
|
||
length of the initial substring of STRING that consists entirely
|
||
of bytes that are _not_ members of the set specified by the string
|
||
STOPSET. (In other words, it returns the offset of the first byte
|
||
in STRING that is a member of the set STOPSET.)
|
||
|
||
For example,
|
||
strcspn ("hello, world", " \t\n,.;!?")
|
||
=> 5
|
||
|
||
In a multibyte string, characters consisting of more than one byte
|
||
are not treated as a single entities. Each byte is treated
|
||
separately. The function is not locale-dependent.
|
||
|
||
-- Function: size_t wcscspn (const wchar_t *WSTRING, const wchar_t
|
||
*STOPSET)
|
||
Preliminary: | MT-Safe | AS-Safe | AC-Safe | *Note POSIX Safety
|
||
Concepts::.
|
||
|
||
The `wcscspn' ("wide character string complement span") function
|
||
returns the length of the initial substring of WSTRING that
|
||
consists entirely of wide characters that are _not_ members of the
|
||
set specified by the string STOPSET. (In other words, it returns
|
||
the offset of the first wide character in STRING that is a member
|
||
of the set STOPSET.)
|
||
|
||
-- Function: char * strpbrk (const char *STRING, const char *STOPSET)
|
||
Preliminary: | MT-Safe | AS-Safe | AC-Safe | *Note POSIX Safety
|
||
Concepts::.
|
||
|
||
The `strpbrk' ("string pointer break") function is related to
|
||
`strcspn', except that it returns a pointer to the first byte in
|
||
STRING that is a member of the set STOPSET instead of the length
|
||
of the initial substring. It returns a null pointer if no such
|
||
byte from STOPSET is found.
|
||
|
||
For example,
|
||
|
||
strpbrk ("hello, world", " \t\n,.;!?")
|
||
=> ", world"
|
||
|
||
In a multibyte string, characters consisting of more than one byte
|
||
are not treated as single entities. Each byte is treated
|
||
separately. The function is not locale-dependent.
|
||
|
||
-- Function: wchar_t * wcspbrk (const wchar_t *WSTRING, const wchar_t
|
||
*STOPSET)
|
||
Preliminary: | MT-Safe | AS-Safe | AC-Safe | *Note POSIX Safety
|
||
Concepts::.
|
||
|
||
The `wcspbrk' ("wide character string pointer break") function is
|
||
related to `wcscspn', except that it returns a pointer to the first
|
||
wide character in WSTRING that is a member of the set STOPSET
|
||
instead of the length of the initial substring. It returns a null
|
||
pointer if no such wide character from STOPSET is found.
|
||
|
||
5.9.1 Compatibility String Search Functions
|
||
-------------------------------------------
|
||
|
||
-- Function: char * index (const char *STRING, int C)
|
||
Preliminary: | MT-Safe | AS-Safe | AC-Safe | *Note POSIX Safety
|
||
Concepts::.
|
||
|
||
`index' is another name for `strchr'; they are exactly the same.
|
||
New code should always use `strchr' since this name is defined in
|
||
ISO C while `index' is a BSD invention which never was available
|
||
on System V derived systems.
|
||
|
||
-- Function: char * rindex (const char *STRING, int C)
|
||
Preliminary: | MT-Safe | AS-Safe | AC-Safe | *Note POSIX Safety
|
||
Concepts::.
|
||
|
||
`rindex' is another name for `strrchr'; they are exactly the same.
|
||
New code should always use `strrchr' since this name is defined in
|
||
ISO C while `rindex' is a BSD invention which never was available
|
||
on System V derived systems.
|
||
|
||
|
||
File: libc.info, Node: Finding Tokens in a String, Next: Erasing Sensitive Data, Prev: Search Functions, Up: String and Array Utilities
|
||
|
||
5.10 Finding Tokens in a String
|
||
===============================
|
||
|
||
It's fairly common for programs to have a need to do some simple kinds
|
||
of lexical analysis and parsing, such as splitting a command string up
|
||
into tokens. You can do this with the `strtok' function, declared in
|
||
the header file `string.h'.
|
||
|
||
-- Function: char * strtok (char *restrict NEWSTRING, const char
|
||
*restrict DELIMITERS)
|
||
Preliminary: | MT-Unsafe race:strtok | AS-Unsafe | AC-Safe | *Note
|
||
POSIX Safety Concepts::.
|
||
|
||
A string can be split into tokens by making a series of calls to
|
||
the function `strtok'.
|
||
|
||
The string to be split up is passed as the NEWSTRING argument on
|
||
the first call only. The `strtok' function uses this to set up
|
||
some internal state information. Subsequent calls to get
|
||
additional tokens from the same string are indicated by passing a
|
||
null pointer as the NEWSTRING argument. Calling `strtok' with
|
||
another non-null NEWSTRING argument reinitializes the state
|
||
information. It is guaranteed that no other library function ever
|
||
calls `strtok' behind your back (which would mess up this internal
|
||
state information).
|
||
|
||
The DELIMITERS argument is a string that specifies a set of
|
||
delimiters that may surround the token being extracted. All the
|
||
initial bytes that are members of this set are discarded. The
|
||
first byte that is _not_ a member of this set of delimiters marks
|
||
the beginning of the next token. The end of the token is found by
|
||
looking for the next byte that is a member of the delimiter set.
|
||
This byte in the original string NEWSTRING is overwritten by a
|
||
null byte, and the pointer to the beginning of the token in
|
||
NEWSTRING is returned.
|
||
|
||
On the next call to `strtok', the searching begins at the next
|
||
byte beyond the one that marked the end of the previous token.
|
||
Note that the set of delimiters DELIMITERS do not have to be the
|
||
same on every call in a series of calls to `strtok'.
|
||
|
||
If the end of the string NEWSTRING is reached, or if the remainder
|
||
of string consists only of delimiter bytes, `strtok' returns a
|
||
null pointer.
|
||
|
||
In a multibyte string, characters consisting of more than one byte
|
||
are not treated as single entities. Each byte is treated
|
||
separately. The function is not locale-dependent.
|
||
|
||
-- Function: wchar_t * wcstok (wchar_t *NEWSTRING, const wchar_t
|
||
*DELIMITERS, wchar_t **SAVE_PTR)
|
||
Preliminary: | MT-Safe | AS-Safe | AC-Safe | *Note POSIX Safety
|
||
Concepts::.
|
||
|
||
A string can be split into tokens by making a series of calls to
|
||
the function `wcstok'.
|
||
|
||
The string to be split up is passed as the NEWSTRING argument on
|
||
the first call only. The `wcstok' function uses this to set up
|
||
some internal state information. Subsequent calls to get
|
||
additional tokens from the same wide string are indicated by
|
||
passing a null pointer as the NEWSTRING argument, which causes the
|
||
pointer previously stored in SAVE_PTR to be used instead.
|
||
|
||
The DELIMITERS argument is a wide string that specifies a set of
|
||
delimiters that may surround the token being extracted. All the
|
||
initial wide characters that are members of this set are discarded.
|
||
The first wide character that is _not_ a member of this set of
|
||
delimiters marks the beginning of the next token. The end of the
|
||
token is found by looking for the next wide character that is a
|
||
member of the delimiter set. This wide character in the original
|
||
wide string NEWSTRING is overwritten by a null wide character, the
|
||
pointer past the overwritten wide character is saved in SAVE_PTR,
|
||
and the pointer to the beginning of the token in NEWSTRING is
|
||
returned.
|
||
|
||
On the next call to `wcstok', the searching begins at the next
|
||
wide character beyond the one that marked the end of the previous
|
||
token. Note that the set of delimiters DELIMITERS do not have to
|
||
be the same on every call in a series of calls to `wcstok'.
|
||
|
||
If the end of the wide string NEWSTRING is reached, or if the
|
||
remainder of string consists only of delimiter wide characters,
|
||
`wcstok' returns a null pointer.
|
||
|
||
*Warning:* Since `strtok' and `wcstok' alter the string they is
|
||
parsing, you should always copy the string to a temporary buffer before
|
||
parsing it with `strtok'/`wcstok' (*note Copying Strings and Arrays::).
|
||
If you allow `strtok' or `wcstok' to modify a string that came from
|
||
another part of your program, you are asking for trouble; that string
|
||
might be used for other purposes after `strtok' or `wcstok' has
|
||
modified it, and it would not have the expected value.
|
||
|
||
The string that you are operating on might even be a constant. Then
|
||
when `strtok' or `wcstok' tries to modify it, your program will get a
|
||
fatal signal for writing in read-only memory. *Note Program Error
|
||
Signals::. Even if the operation of `strtok' or `wcstok' would not
|
||
require a modification of the string (e.g., if there is exactly one
|
||
token) the string can (and in the GNU C Library case will) be modified.
|
||
|
||
This is a special case of a general principle: if a part of a program
|
||
does not have as its purpose the modification of a certain data
|
||
structure, then it is error-prone to modify the data structure
|
||
temporarily.
|
||
|
||
The function `strtok' is not reentrant, whereas `wcstok' is. *Note
|
||
Nonreentrancy::, for a discussion of where and why reentrancy is
|
||
important.
|
||
|
||
Here is a simple example showing the use of `strtok'.
|
||
|
||
#include <string.h>
|
||
#include <stddef.h>
|
||
|
||
...
|
||
|
||
const char string[] = "words separated by spaces -- and, punctuation!";
|
||
const char delimiters[] = " .,;:!-";
|
||
char *token, *cp;
|
||
|
||
...
|
||
|
||
cp = strdupa (string); /* Make writable copy. */
|
||
token = strtok (cp, delimiters); /* token => "words" */
|
||
token = strtok (NULL, delimiters); /* token => "separated" */
|
||
token = strtok (NULL, delimiters); /* token => "by" */
|
||
token = strtok (NULL, delimiters); /* token => "spaces" */
|
||
token = strtok (NULL, delimiters); /* token => "and" */
|
||
token = strtok (NULL, delimiters); /* token => "punctuation" */
|
||
token = strtok (NULL, delimiters); /* token => NULL */
|
||
|
||
The GNU C Library contains two more functions for tokenizing a string
|
||
which overcome the limitation of non-reentrancy. They are not
|
||
available available for wide strings.
|
||
|
||
-- Function: char * strtok_r (char *NEWSTRING, const char *DELIMITERS,
|
||
char **SAVE_PTR)
|
||
Preliminary: | MT-Safe | AS-Safe | AC-Safe | *Note POSIX Safety
|
||
Concepts::.
|
||
|
||
Just like `strtok', this function splits the string into several
|
||
tokens which can be accessed by successive calls to `strtok_r'.
|
||
The difference is that, as in `wcstok', the information about the
|
||
next token is stored in the space pointed to by the third argument,
|
||
SAVE_PTR, which is a pointer to a string pointer. Calling
|
||
`strtok_r' with a null pointer for NEWSTRING and leaving SAVE_PTR
|
||
between the calls unchanged does the job without hindering
|
||
reentrancy.
|
||
|
||
This function is defined in POSIX.1 and can be found on many
|
||
systems which support multi-threading.
|
||
|
||
-- Function: char * strsep (char **STRING_PTR, const char *DELIMITER)
|
||
Preliminary: | MT-Safe | AS-Safe | AC-Safe | *Note POSIX Safety
|
||
Concepts::.
|
||
|
||
This function has a similar functionality as `strtok_r' with the
|
||
NEWSTRING argument replaced by the SAVE_PTR argument. The
|
||
initialization of the moving pointer has to be done by the user.
|
||
Successive calls to `strsep' move the pointer along the tokens
|
||
separated by DELIMITER, returning the address of the next token
|
||
and updating STRING_PTR to point to the beginning of the next
|
||
token.
|
||
|
||
One difference between `strsep' and `strtok_r' is that if the
|
||
input string contains more than one byte from DELIMITER in a row
|
||
`strsep' returns an empty string for each pair of bytes from
|
||
DELIMITER. This means that a program normally should test for
|
||
`strsep' returning an empty string before processing it.
|
||
|
||
This function was introduced in 4.3BSD and therefore is widely
|
||
available.
|
||
|
||
Here is how the above example looks like when `strsep' is used.
|
||
|
||
#include <string.h>
|
||
#include <stddef.h>
|
||
|
||
...
|
||
|
||
const char string[] = "words separated by spaces -- and, punctuation!";
|
||
const char delimiters[] = " .,;:!-";
|
||
char *running;
|
||
char *token;
|
||
|
||
...
|
||
|
||
running = strdupa (string);
|
||
token = strsep (&running, delimiters); /* token => "words" */
|
||
token = strsep (&running, delimiters); /* token => "separated" */
|
||
token = strsep (&running, delimiters); /* token => "by" */
|
||
token = strsep (&running, delimiters); /* token => "spaces" */
|
||
token = strsep (&running, delimiters); /* token => "" */
|
||
token = strsep (&running, delimiters); /* token => "" */
|
||
token = strsep (&running, delimiters); /* token => "" */
|
||
token = strsep (&running, delimiters); /* token => "and" */
|
||
token = strsep (&running, delimiters); /* token => "" */
|
||
token = strsep (&running, delimiters); /* token => "punctuation" */
|
||
token = strsep (&running, delimiters); /* token => "" */
|
||
token = strsep (&running, delimiters); /* token => NULL */
|
||
|
||
-- Function: char * basename (const char *FILENAME)
|
||
Preliminary: | MT-Safe | AS-Safe | AC-Safe | *Note POSIX Safety
|
||
Concepts::.
|
||
|
||
The GNU version of the `basename' function returns the last
|
||
component of the path in FILENAME. This function is the preferred
|
||
usage, since it does not modify the argument, FILENAME, and
|
||
respects trailing slashes. The prototype for `basename' can be
|
||
found in `string.h'. Note, this function is overridden by the XPG
|
||
version, if `libgen.h' is included.
|
||
|
||
Example of using GNU `basename':
|
||
|
||
#include <string.h>
|
||
|
||
int
|
||
main (int argc, char *argv[])
|
||
{
|
||
char *prog = basename (argv[0]);
|
||
|
||
if (argc < 2)
|
||
{
|
||
fprintf (stderr, "Usage %s <arg>\n", prog);
|
||
exit (1);
|
||
}
|
||
|
||
...
|
||
}
|
||
|
||
*Portability Note:* This function may produce different results on
|
||
different systems.
|
||
|
||
|
||
-- Function: char * basename (char *PATH)
|
||
Preliminary: | MT-Safe | AS-Safe | AC-Safe | *Note POSIX Safety
|
||
Concepts::.
|
||
|
||
This is the standard XPG defined `basename'. It is similar in
|
||
spirit to the GNU version, but may modify the PATH by removing
|
||
trailing '/' bytes. If the PATH is made up entirely of '/' bytes,
|
||
then "/" will be returned. Also, if PATH is `NULL' or an empty
|
||
string, then "." is returned. The prototype for the XPG version
|
||
can be found in `libgen.h'.
|
||
|
||
Example of using XPG `basename':
|
||
|
||
#include <libgen.h>
|
||
|
||
int
|
||
main (int argc, char *argv[])
|
||
{
|
||
char *prog;
|
||
char *path = strdupa (argv[0]);
|
||
|
||
prog = basename (path);
|
||
|
||
if (argc < 2)
|
||
{
|
||
fprintf (stderr, "Usage %s <arg>\n", prog);
|
||
exit (1);
|
||
}
|
||
|
||
...
|
||
|
||
}
|
||
|
||
-- Function: char * dirname (char *PATH)
|
||
Preliminary: | MT-Safe | AS-Safe | AC-Safe | *Note POSIX Safety
|
||
Concepts::.
|
||
|
||
The `dirname' function is the compliment to the XPG version of
|
||
`basename'. It returns the parent directory of the file specified
|
||
by PATH. If PATH is `NULL', an empty string, or contains no '/'
|
||
bytes, then "." is returned. The prototype for this function can
|
||
be found in `libgen.h'.
|
||
|
||
|
||
File: libc.info, Node: Erasing Sensitive Data, Next: Shuffling Bytes, Prev: Finding Tokens in a String, Up: String and Array Utilities
|
||
|
||
5.11 Erasing Sensitive Data
|
||
===========================
|
||
|
||
Sensitive data, such as cryptographic keys, should be erased from
|
||
memory after use, to reduce the risk that a bug will expose it to the
|
||
outside world. However, compiler optimizations may determine that an
|
||
erasure operation is "unnecessary," and remove it from the generated
|
||
code, because no _correct_ program could access the variable or heap
|
||
object containing the sensitive data after it's deallocated. Since
|
||
erasure is a precaution against bugs, this optimization is
|
||
inappropriate.
|
||
|
||
The function `explicit_bzero' erases a block of memory, and
|
||
guarantees that the compiler will not remove the erasure as
|
||
"unnecessary."
|
||
|
||
#include <string.h>
|
||
|
||
extern void encrypt (const char *key, const char *in,
|
||
char *out, size_t n);
|
||
extern void genkey (const char *phrase, char *key);
|
||
|
||
void encrypt_with_phrase (const char *phrase, const char *in,
|
||
char *out, size_t n)
|
||
{
|
||
char key[16];
|
||
genkey (phrase, key);
|
||
encrypt (key, in, out, n);
|
||
explicit_bzero (key, 16);
|
||
}
|
||
|
||
In this example, if `memset', `bzero', or a hand-written loop had been
|
||
used, the compiler might remove them as "unnecessary."
|
||
|
||
*Warning:* `explicit_bzero' does not guarantee that sensitive data
|
||
is _completely_ erased from the computer's memory. There may be copies
|
||
in temporary storage areas, such as registers and "scratch" stack
|
||
space; since these are invisible to the source code, a library function
|
||
cannot erase them.
|
||
|
||
Also, `explicit_bzero' only operates on RAM. If a sensitive data
|
||
object never needs to have its address taken other than to call
|
||
`explicit_bzero', it might be stored entirely in CPU registers _until_
|
||
the call to `explicit_bzero'. Then it will be copied into RAM, the
|
||
copy will be erased, and the original will remain intact. Data in RAM
|
||
is more likely to be exposed by a bug than data in registers, so this
|
||
creates a brief window where the data is at greater risk of exposure
|
||
than it would have been if the program didn't try to erase it at all.
|
||
|
||
Declaring sensitive variables as `volatile' will make both the above
|
||
problems _worse_; a `volatile' variable will be stored in memory for
|
||
its entire lifetime, and the compiler will make _more_ copies of it
|
||
than it would otherwise have. Attempting to erase a normal variable
|
||
"by hand" through a `volatile'-qualified pointer doesn't work at
|
||
all--because the variable itself is not `volatile', some compilers will
|
||
ignore the qualification on the pointer and remove the erasure anyway.
|
||
|
||
Having said all that, in most situations, using `explicit_bzero' is
|
||
better than not using it. At present, the only way to do a more
|
||
thorough job is to write the entire sensitive operation in assembly
|
||
language. We anticipate that future compilers will recognize calls to
|
||
`explicit_bzero' and take appropriate steps to erase all the copies of
|
||
the affected data, whereever they may be.
|
||
|
||
-- Function: void explicit_bzero (void *BLOCK, size_t LEN)
|
||
Preliminary: | MT-Safe | AS-Safe | AC-Safe | *Note POSIX Safety
|
||
Concepts::.
|
||
|
||
`explicit_bzero' writes zero into LEN bytes of memory beginning at
|
||
BLOCK, just as `bzero' would. The zeroes are always written, even
|
||
if the compiler could determine that this is "unnecessary" because
|
||
no correct program could read them back.
|
||
|
||
*Note_* The _only_ optimization that `explicit_bzero' disables is
|
||
removal of "unnecessary" writes to memory. The compiler can
|
||
perform all the other optimizations that it could for a call to
|
||
`memset'. For instance, it may replace the function call with
|
||
inline memory writes, and it may assume that BLOCK cannot be a
|
||
null pointer.
|
||
|
||
*Portability Note:* This function first appeared in OpenBSD 5.5
|
||
and has not been standardized. Other systems may provide the same
|
||
functionality under a different name, such as `explicit_memset',
|
||
`memset_s', or `SecureZeroMemory'.
|
||
|
||
The GNU C Library declares this function in `string.h', but on
|
||
other systems it may be in `strings.h' instead.
|
||
|
||
|
||
File: libc.info, Node: Shuffling Bytes, Next: Obfuscating Data, Prev: Erasing Sensitive Data, Up: String and Array Utilities
|
||
|
||
5.12 Shuffling Bytes
|
||
====================
|
||
|
||
The function below addresses the perennial programming quandary: "How do
|
||
I take good data in string form and painlessly turn it into garbage?"
|
||
This is not a difficult thing to code for oneself, but the authors of
|
||
the GNU C Library wish to make it as convenient as possible.
|
||
|
||
To _erase_ data, use `explicit_bzero' (*note Erasing Sensitive
|
||
Data::); to obfuscate it reversibly, use `memfrob' (*note Obfuscating
|
||
Data::).
|
||
|
||
-- Function: char * strfry (char *STRING)
|
||
Preliminary: | MT-Safe | AS-Safe | AC-Safe | *Note POSIX Safety
|
||
Concepts::.
|
||
|
||
`strfry' performs an in-place shuffle on STRING. Each character
|
||
is swapped to a position selected at random, within the portion of
|
||
the string starting with the character's original position. (This
|
||
is the Fisher-Yates algorithm for unbiased shuffling.)
|
||
|
||
Calling `strfry' will not disturb any of the random number
|
||
generators that have global state (*note Pseudo-Random Numbers::).
|
||
|
||
The return value of `strfry' is always STRING.
|
||
|
||
*Portability Note:* This function is unique to the GNU C Library.
|
||
It is declared in `string.h'.
|
||
|
||
|
||
File: libc.info, Node: Obfuscating Data, Next: Encode Binary Data, Prev: Shuffling Bytes, Up: String and Array Utilities
|
||
|
||
5.13 Obfuscating Data
|
||
=====================
|
||
|
||
The `memfrob' function reversibly obfuscates an array of binary data.
|
||
This is not true encryption; the obfuscated data still bears a clear
|
||
relationship to the original, and no secret key is required to undo the
|
||
obfuscation. It is analogous to the "Rot13" cipher used on Usenet for
|
||
obscuring offensive jokes, spoilers for works of fiction, and so on,
|
||
but it can be applied to arbitrary binary data.
|
||
|
||
Programs that need true encryption--a transformation that completely
|
||
obscures the original and cannot be reversed without knowledge of a
|
||
secret key--should use a dedicated cryptography library, such as
|
||
libgcrypt.
|
||
|
||
Programs that need to _destroy_ data should use `explicit_bzero'
|
||
(*note Erasing Sensitive Data::), or possibly `strfry' (*note Shuffling
|
||
Bytes::).
|
||
|
||
-- Function: void * memfrob (void *MEM, size_t LENGTH)
|
||
Preliminary: | MT-Safe | AS-Safe | AC-Safe | *Note POSIX Safety
|
||
Concepts::.
|
||
|
||
The function `memfrob' obfuscates LENGTH bytes of data beginning
|
||
at MEM, in place. Each byte is bitwise xor-ed with the binary
|
||
pattern 00101010 (hexadecimal 0x2A). The return value is always
|
||
MEM.
|
||
|
||
`memfrob' a second time on the same data returns it to its
|
||
original state.
|
||
|
||
*Portability Note:* This function is unique to the GNU C Library.
|
||
It is declared in `string.h'.
|
||
|
||
|
||
File: libc.info, Node: Encode Binary Data, Next: Argz and Envz Vectors, Prev: Obfuscating Data, Up: String and Array Utilities
|
||
|
||
5.14 Encode Binary Data
|
||
=======================
|
||
|
||
To store or transfer binary data in environments which only support text
|
||
one has to encode the binary data by mapping the input bytes to bytes
|
||
in the range allowed for storing or transferring. SVID systems (and
|
||
nowadays XPG compliant systems) provide minimal support for this task.
|
||
|
||
-- Function: char * l64a (long int N)
|
||
Preliminary: | MT-Unsafe race:l64a | AS-Unsafe | AC-Safe | *Note
|
||
POSIX Safety Concepts::.
|
||
|
||
This function encodes a 32-bit input value using bytes from the
|
||
basic character set. It returns a pointer to a 7 byte buffer which
|
||
contains an encoded version of N. To encode a series of bytes the
|
||
user must copy the returned string to a destination buffer. It
|
||
returns the empty string if N is zero, which is somewhat bizarre
|
||
but mandated by the standard.
|
||
*Warning:* Since a static buffer is used this function should not
|
||
be used in multi-threaded programs. There is no thread-safe
|
||
alternative to this function in the C library.
|
||
*Compatibility Note:* The XPG standard states that the return
|
||
value of `l64a' is undefined if N is negative. In the GNU
|
||
implementation, `l64a' treats its argument as unsigned, so it will
|
||
return a sensible encoding for any nonzero N; however, portable
|
||
programs should not rely on this.
|
||
|
||
To encode a large buffer `l64a' must be called in a loop, once for
|
||
each 32-bit word of the buffer. For example, one could do
|
||
something like this:
|
||
|
||
char *
|
||
encode (const void *buf, size_t len)
|
||
{
|
||
/* We know in advance how long the buffer has to be. */
|
||
unsigned char *in = (unsigned char *) buf;
|
||
char *out = malloc (6 + ((len + 3) / 4) * 6 + 1);
|
||
char *cp = out, *p;
|
||
|
||
/* Encode the length. */
|
||
/* Using `htonl' is necessary so that the data can be
|
||
decoded even on machines with different byte order.
|
||
`l64a' can return a string shorter than 6 bytes, so
|
||
we pad it with encoding of 0 ('.') at the end by
|
||
hand. */
|
||
|
||
p = stpcpy (cp, l64a (htonl (len)));
|
||
cp = mempcpy (p, "......", 6 - (p - cp));
|
||
|
||
while (len > 3)
|
||
{
|
||
unsigned long int n = *in++;
|
||
n = (n << 8) | *in++;
|
||
n = (n << 8) | *in++;
|
||
n = (n << 8) | *in++;
|
||
len -= 4;
|
||
p = stpcpy (cp, l64a (htonl (n)));
|
||
cp = mempcpy (p, "......", 6 - (p - cp));
|
||
}
|
||
if (len > 0)
|
||
{
|
||
unsigned long int n = *in++;
|
||
if (--len > 0)
|
||
{
|
||
n = (n << 8) | *in++;
|
||
if (--len > 0)
|
||
n = (n << 8) | *in;
|
||
}
|
||
cp = stpcpy (cp, l64a (htonl (n)));
|
||
}
|
||
*cp = '\0';
|
||
return out;
|
||
}
|
||
|
||
It is strange that the library does not provide the complete
|
||
functionality needed but so be it.
|
||
|
||
|
||
To decode data produced with `l64a' the following function should be
|
||
used.
|
||
|
||
-- Function: long int a64l (const char *STRING)
|
||
Preliminary: | MT-Safe | AS-Safe | AC-Safe | *Note POSIX Safety
|
||
Concepts::.
|
||
|
||
The parameter STRING should contain a string which was produced by
|
||
a call to `l64a'. The function processes at least 6 bytes of this
|
||
string, and decodes the bytes it finds according to the table
|
||
below. It stops decoding when it finds a byte not in the table,
|
||
rather like `atoi'; if you have a buffer which has been broken into
|
||
lines, you must be careful to skip over the end-of-line bytes.
|
||
|
||
The decoded number is returned as a `long int' value.
|
||
|
||
The `l64a' and `a64l' functions use a base 64 encoding, in which
|
||
each byte of an encoded string represents six bits of an input word.
|
||
These symbols are used for the base 64 digits:
|
||
|
||
0 1 2 3 4 5 6 7
|
||
0 `.' `/' `0' `1' `2' `3' `4' `5'
|
||
8 `6' `7' `8' `9' `A' `B' `C' `D'
|
||
16 `E' `F' `G' `H' `I' `J' `K' `L'
|
||
24 `M' `N' `O' `P' `Q' `R' `S' `T'
|
||
32 `U' `V' `W' `X' `Y' `Z' `a' `b'
|
||
40 `c' `d' `e' `f' `g' `h' `i' `j'
|
||
48 `k' `l' `m' `n' `o' `p' `q' `r'
|
||
56 `s' `t' `u' `v' `w' `x' `y' `z'
|
||
|
||
This encoding scheme is not standard. There are some other encoding
|
||
methods which are much more widely used (UU encoding, MIME encoding).
|
||
Generally, it is better to use one of these encodings.
|
||
|
||
|
||
File: libc.info, Node: Argz and Envz Vectors, Prev: Encode Binary Data, Up: String and Array Utilities
|
||
|
||
5.15 Argz and Envz Vectors
|
||
==========================
|
||
|
||
"argz vectors" are vectors of strings in a contiguous block of memory,
|
||
each element separated from its neighbors by null bytes (`'\0'').
|
||
|
||
"Envz vectors" are an extension of argz vectors where each element
|
||
is a name-value pair, separated by a `'='' byte (as in a Unix
|
||
environment).
|
||
|
||
* Menu:
|
||
|
||
* Argz Functions:: Operations on argz vectors.
|
||
* Envz Functions:: Additional operations on environment vectors.
|
||
|
||
|
||
File: libc.info, Node: Argz Functions, Next: Envz Functions, Up: Argz and Envz Vectors
|
||
|
||
5.15.1 Argz Functions
|
||
---------------------
|
||
|
||
Each argz vector is represented by a pointer to the first element, of
|
||
type `char *', and a size, of type `size_t', both of which can be
|
||
initialized to `0' to represent an empty argz vector. All argz
|
||
functions accept either a pointer and a size argument, or pointers to
|
||
them, if they will be modified.
|
||
|
||
The argz functions use `malloc'/`realloc' to allocate/grow argz
|
||
vectors, and so any argz vector created using these functions may be
|
||
freed by using `free'; conversely, any argz function that may grow a
|
||
string expects that string to have been allocated using `malloc' (those
|
||
argz functions that only examine their arguments or modify them in
|
||
place will work on any sort of memory). *Note Unconstrained
|
||
Allocation::.
|
||
|
||
All argz functions that do memory allocation have a return type of
|
||
`error_t', and return `0' for success, and `ENOMEM' if an allocation
|
||
error occurs.
|
||
|
||
These functions are declared in the standard include file `argz.h'.
|
||
|
||
-- Function: error_t argz_create (char *const ARGV[], char **ARGZ,
|
||
size_t *ARGZ_LEN)
|
||
Preliminary: | MT-Safe | AS-Unsafe heap | AC-Unsafe mem | *Note
|
||
POSIX Safety Concepts::.
|
||
|
||
The `argz_create' function converts the Unix-style argument vector
|
||
ARGV (a vector of pointers to normal C strings, terminated by
|
||
`(char *)0'; *note Program Arguments::) into an argz vector with
|
||
the same elements, which is returned in ARGZ and ARGZ_LEN.
|
||
|
||
-- Function: error_t argz_create_sep (const char *STRING, int SEP,
|
||
char **ARGZ, size_t *ARGZ_LEN)
|
||
Preliminary: | MT-Safe | AS-Unsafe heap | AC-Unsafe mem | *Note
|
||
POSIX Safety Concepts::.
|
||
|
||
The `argz_create_sep' function converts the string STRING into an
|
||
argz vector (returned in ARGZ and ARGZ_LEN) by splitting it into
|
||
elements at every occurrence of the byte SEP.
|
||
|
||
-- Function: size_t argz_count (const char *ARGZ, size_t ARGZ_LEN)
|
||
Preliminary: | MT-Safe | AS-Safe | AC-Safe | *Note POSIX Safety
|
||
Concepts::.
|
||
|
||
Returns the number of elements in the argz vector ARGZ and
|
||
ARGZ_LEN.
|
||
|
||
-- Function: void argz_extract (const char *ARGZ, size_t ARGZ_LEN,
|
||
char **ARGV)
|
||
Preliminary: | MT-Safe | AS-Safe | AC-Safe | *Note POSIX Safety
|
||
Concepts::.
|
||
|
||
The `argz_extract' function converts the argz vector ARGZ and
|
||
ARGZ_LEN into a Unix-style argument vector stored in ARGV, by
|
||
putting pointers to every element in ARGZ into successive
|
||
positions in ARGV, followed by a terminator of `0'. ARGV must be
|
||
pre-allocated with enough space to hold all the elements in ARGZ
|
||
plus the terminating `(char *)0' (`(argz_count (ARGZ, ARGZ_LEN) +
|
||
1) * sizeof (char *)' bytes should be enough). Note that the
|
||
string pointers stored into ARGV point into ARGZ--they are not
|
||
copies--and so ARGZ must be copied if it will be changed while
|
||
ARGV is still active. This function is useful for passing the
|
||
elements in ARGZ to an exec function (*note Executing a File::).
|
||
|
||
-- Function: void argz_stringify (char *ARGZ, size_t LEN, int SEP)
|
||
Preliminary: | MT-Safe | AS-Safe | AC-Safe | *Note POSIX Safety
|
||
Concepts::.
|
||
|
||
The `argz_stringify' converts ARGZ into a normal string with the
|
||
elements separated by the byte SEP, by replacing each `'\0''
|
||
inside ARGZ (except the last one, which terminates the string)
|
||
with SEP. This is handy for printing ARGZ in a readable manner.
|
||
|
||
-- Function: error_t argz_add (char **ARGZ, size_t *ARGZ_LEN, const
|
||
char *STR)
|
||
Preliminary: | MT-Safe | AS-Unsafe heap | AC-Unsafe mem | *Note
|
||
POSIX Safety Concepts::.
|
||
|
||
The `argz_add' function adds the string STR to the end of the argz
|
||
vector `*ARGZ', and updates `*ARGZ' and `*ARGZ_LEN' accordingly.
|
||
|
||
-- Function: error_t argz_add_sep (char **ARGZ, size_t *ARGZ_LEN,
|
||
const char *STR, int DELIM)
|
||
Preliminary: | MT-Safe | AS-Unsafe heap | AC-Unsafe mem | *Note
|
||
POSIX Safety Concepts::.
|
||
|
||
The `argz_add_sep' function is similar to `argz_add', but STR is
|
||
split into separate elements in the result at occurrences of the
|
||
byte DELIM. This is useful, for instance, for adding the
|
||
components of a Unix search path to an argz vector, by using a
|
||
value of `':'' for DELIM.
|
||
|
||
-- Function: error_t argz_append (char **ARGZ, size_t *ARGZ_LEN, const
|
||
char *BUF, size_t BUF_LEN)
|
||
Preliminary: | MT-Safe | AS-Unsafe heap | AC-Unsafe mem | *Note
|
||
POSIX Safety Concepts::.
|
||
|
||
The `argz_append' function appends BUF_LEN bytes starting at BUF
|
||
to the argz vector `*ARGZ', reallocating `*ARGZ' to accommodate
|
||
it, and adding BUF_LEN to `*ARGZ_LEN'.
|
||
|
||
-- Function: void argz_delete (char **ARGZ, size_t *ARGZ_LEN, char
|
||
*ENTRY)
|
||
Preliminary: | MT-Safe | AS-Unsafe heap | AC-Unsafe mem | *Note
|
||
POSIX Safety Concepts::.
|
||
|
||
If ENTRY points to the beginning of one of the elements in the
|
||
argz vector `*ARGZ', the `argz_delete' function will remove this
|
||
entry and reallocate `*ARGZ', modifying `*ARGZ' and `*ARGZ_LEN'
|
||
accordingly. Note that as destructive argz functions usually
|
||
reallocate their argz argument, pointers into argz vectors such as
|
||
ENTRY will then become invalid.
|
||
|
||
-- Function: error_t argz_insert (char **ARGZ, size_t *ARGZ_LEN, char
|
||
*BEFORE, const char *ENTRY)
|
||
Preliminary: | MT-Safe | AS-Unsafe heap | AC-Unsafe mem | *Note
|
||
POSIX Safety Concepts::.
|
||
|
||
The `argz_insert' function inserts the string ENTRY into the argz
|
||
vector `*ARGZ' at a point just before the existing element pointed
|
||
to by BEFORE, reallocating `*ARGZ' and updating `*ARGZ' and
|
||
`*ARGZ_LEN'. If BEFORE is `0', ENTRY is added to the end instead
|
||
(as if by `argz_add'). Since the first element is in fact the
|
||
same as `*ARGZ', passing in `*ARGZ' as the value of BEFORE will
|
||
result in ENTRY being inserted at the beginning.
|
||
|
||
-- Function: char * argz_next (const char *ARGZ, size_t ARGZ_LEN,
|
||
const char *ENTRY)
|
||
Preliminary: | MT-Safe | AS-Safe | AC-Safe | *Note POSIX Safety
|
||
Concepts::.
|
||
|
||
The `argz_next' function provides a convenient way of iterating
|
||
over the elements in the argz vector ARGZ. It returns a pointer
|
||
to the next element in ARGZ after the element ENTRY, or `0' if
|
||
there are no elements following ENTRY. If ENTRY is `0', the first
|
||
element of ARGZ is returned.
|
||
|
||
This behavior suggests two styles of iteration:
|
||
|
||
char *entry = 0;
|
||
while ((entry = argz_next (ARGZ, ARGZ_LEN, entry)))
|
||
ACTION;
|
||
|
||
(the double parentheses are necessary to make some C compilers
|
||
shut up about what they consider a questionable `while'-test) and:
|
||
|
||
char *entry;
|
||
for (entry = ARGZ;
|
||
entry;
|
||
entry = argz_next (ARGZ, ARGZ_LEN, entry))
|
||
ACTION;
|
||
|
||
Note that the latter depends on ARGZ having a value of `0' if it
|
||
is empty (rather than a pointer to an empty block of memory); this
|
||
invariant is maintained for argz vectors created by the functions
|
||
here.
|
||
|
||
-- Function: error_t argz_replace (char **ARGZ, size_t *ARGZ_LEN,
|
||
const char *STR, const char *WITH, unsigned *REPLACE_COUNT)
|
||
Preliminary: | MT-Safe | AS-Unsafe heap | AC-Unsafe mem | *Note
|
||
POSIX Safety Concepts::.
|
||
|
||
Replace any occurrences of the string STR in ARGZ with WITH,
|
||
reallocating ARGZ as necessary. If REPLACE_COUNT is non-zero,
|
||
`*REPLACE_COUNT' will be incremented by the number of replacements
|
||
performed.
|
||
|
||
|
||
File: libc.info, Node: Envz Functions, Prev: Argz Functions, Up: Argz and Envz Vectors
|
||
|
||
5.15.2 Envz Functions
|
||
---------------------
|
||
|
||
Envz vectors are just argz vectors with additional constraints on the
|
||
form of each element; as such, argz functions can also be used on them,
|
||
where it makes sense.
|
||
|
||
Each element in an envz vector is a name-value pair, separated by a
|
||
`'='' byte; if multiple `'='' bytes are present in an element, those
|
||
after the first are considered part of the value, and treated like all
|
||
other non-`'\0'' bytes.
|
||
|
||
If _no_ `'='' bytes are present in an element, that element is
|
||
considered the name of a "null" entry, as distinct from an entry with an
|
||
empty value: `envz_get' will return `0' if given the name of null
|
||
entry, whereas an entry with an empty value would result in a value of
|
||
`""'; `envz_entry' will still find such entries, however. Null entries
|
||
can be removed with the `envz_strip' function.
|
||
|
||
As with argz functions, envz functions that may allocate memory (and
|
||
thus fail) have a return type of `error_t', and return either `0' or
|
||
`ENOMEM'.
|
||
|
||
These functions are declared in the standard include file `envz.h'.
|
||
|
||
-- Function: char * envz_entry (const char *ENVZ, size_t ENVZ_LEN,
|
||
const char *NAME)
|
||
Preliminary: | MT-Safe | AS-Safe | AC-Safe | *Note POSIX Safety
|
||
Concepts::.
|
||
|
||
The `envz_entry' function finds the entry in ENVZ with the name
|
||
NAME, and returns a pointer to the whole entry--that is, the argz
|
||
element which begins with NAME followed by a `'='' byte. If there
|
||
is no entry with that name, `0' is returned.
|
||
|
||
-- Function: char * envz_get (const char *ENVZ, size_t ENVZ_LEN, const
|
||
char *NAME)
|
||
Preliminary: | MT-Safe | AS-Safe | AC-Safe | *Note POSIX Safety
|
||
Concepts::.
|
||
|
||
The `envz_get' function finds the entry in ENVZ with the name NAME
|
||
(like `envz_entry'), and returns a pointer to the value portion of
|
||
that entry (following the `'=''). If there is no entry with that
|
||
name (or only a null entry), `0' is returned.
|
||
|
||
-- Function: error_t envz_add (char **ENVZ, size_t *ENVZ_LEN, const
|
||
char *NAME, const char *VALUE)
|
||
Preliminary: | MT-Safe | AS-Unsafe heap | AC-Unsafe mem | *Note
|
||
POSIX Safety Concepts::.
|
||
|
||
The `envz_add' function adds an entry to `*ENVZ' (updating `*ENVZ'
|
||
and `*ENVZ_LEN') with the name NAME, and value VALUE. If an entry
|
||
with the same name already exists in ENVZ, it is removed first.
|
||
If VALUE is `0', then the new entry will be the special null type
|
||
of entry (mentioned above).
|
||
|
||
-- Function: error_t envz_merge (char **ENVZ, size_t *ENVZ_LEN, const
|
||
char *ENVZ2, size_t ENVZ2_LEN, int OVERRIDE)
|
||
Preliminary: | MT-Safe | AS-Unsafe heap | AC-Unsafe mem | *Note
|
||
POSIX Safety Concepts::.
|
||
|
||
The `envz_merge' function adds each entry in ENVZ2 to ENVZ, as if
|
||
with `envz_add', updating `*ENVZ' and `*ENVZ_LEN'. If OVERRIDE is
|
||
true, then values in ENVZ2 will supersede those with the same name
|
||
in ENVZ, otherwise not.
|
||
|
||
Null entries are treated just like other entries in this respect,
|
||
so a null entry in ENVZ can prevent an entry of the same name in
|
||
ENVZ2 from being added to ENVZ, if OVERRIDE is false.
|
||
|
||
-- Function: void envz_strip (char **ENVZ, size_t *ENVZ_LEN)
|
||
Preliminary: | MT-Safe | AS-Safe | AC-Safe | *Note POSIX Safety
|
||
Concepts::.
|
||
|
||
The `envz_strip' function removes any null entries from ENVZ,
|
||
updating `*ENVZ' and `*ENVZ_LEN'.
|
||
|
||
-- Function: void envz_remove (char **ENVZ, size_t *ENVZ_LEN, const
|
||
char *NAME)
|
||
Preliminary: | MT-Safe | AS-Unsafe heap | AC-Unsafe mem | *Note
|
||
POSIX Safety Concepts::.
|
||
|
||
The `envz_remove' function removes an entry named NAME from ENVZ,
|
||
updating `*ENVZ' and `*ENVZ_LEN'.
|
||
|
||
|
||
File: libc.info, Node: Character Set Handling, Next: Locales, Prev: String and Array Utilities, Up: Top
|
||
|
||
6 Character Set Handling
|
||
************************
|
||
|
||
Character sets used in the early days of computing had only six, seven,
|
||
or eight bits for each character: there was never a case where more than
|
||
eight bits (one byte) were used to represent a single character. The
|
||
limitations of this approach became more apparent as more people
|
||
grappled with non-Roman character sets, where not all the characters
|
||
that make up a language's character set can be represented by 2^8
|
||
choices. This chapter shows the functionality that was added to the C
|
||
library to support multiple character sets.
|
||
|
||
* Menu:
|
||
|
||
* Extended Char Intro:: Introduction to Extended Characters.
|
||
* Charset Function Overview:: Overview about Character Handling
|
||
Functions.
|
||
* Restartable multibyte conversion:: Restartable multibyte conversion
|
||
Functions.
|
||
* Non-reentrant Conversion:: Non-reentrant Conversion Function.
|
||
* Generic Charset Conversion:: Generic Charset Conversion.
|
||
|
||
|
||
File: libc.info, Node: Extended Char Intro, Next: Charset Function Overview, Up: Character Set Handling
|
||
|
||
6.1 Introduction to Extended Characters
|
||
=======================================
|
||
|
||
A variety of solutions are available to overcome the differences between
|
||
character sets with a 1:1 relation between bytes and characters and
|
||
character sets with ratios of 2:1 or 4:1. The remainder of this
|
||
section gives a few examples to help understand the design decisions
|
||
made while developing the functionality of the C library.
|
||
|
||
A distinction we have to make right away is between internal and
|
||
external representation. "Internal representation" means the
|
||
representation used by a program while keeping the text in memory.
|
||
External representations are used when text is stored or transmitted
|
||
through some communication channel. Examples of external
|
||
representations include files waiting in a directory to be read and
|
||
parsed.
|
||
|
||
Traditionally there has been no difference between the two
|
||
representations. It was equally comfortable and useful to use the same
|
||
single-byte representation internally and externally. This comfort
|
||
level decreases with more and larger character sets.
|
||
|
||
One of the problems to overcome with the internal representation is
|
||
handling text that is externally encoded using different character
|
||
sets. Assume a program that reads two texts and compares them using
|
||
some metric. The comparison can be usefully done only if the texts are
|
||
internally kept in a common format.
|
||
|
||
For such a common format (= character set) eight bits are certainly
|
||
no longer enough. So the smallest entity will have to grow: "wide
|
||
characters" will now be used. Instead of one byte per character, two or
|
||
four will be used instead. (Three are not good to address in memory and
|
||
more than four bytes seem not to be necessary).
|
||
|
||
As shown in some other part of this manual, a completely new family
|
||
has been created of functions that can handle wide character texts in
|
||
memory. The most commonly used character sets for such internal wide
|
||
character representations are Unicode and ISO 10646 (also known as UCS
|
||
for Universal Character Set). Unicode was originally planned as a
|
||
16-bit character set; whereas, ISO 10646 was designed to be a 31-bit
|
||
large code space. The two standards are practically identical. They
|
||
have the same character repertoire and code table, but Unicode specifies
|
||
added semantics. At the moment, only characters in the first `0x10000'
|
||
code positions (the so-called Basic Multilingual Plane, BMP) have been
|
||
assigned, but the assignment of more specialized characters outside this
|
||
16-bit space is already in progress. A number of encodings have been
|
||
defined for Unicode and ISO 10646 characters: UCS-2 is a 16-bit word
|
||
that can only represent characters from the BMP, UCS-4 is a 32-bit word
|
||
than can represent any Unicode and ISO 10646 character, UTF-8 is an
|
||
ASCII compatible encoding where ASCII characters are represented by
|
||
ASCII bytes and non-ASCII characters by sequences of 2-6 non-ASCII
|
||
bytes, and finally UTF-16 is an extension of UCS-2 in which pairs of
|
||
certain UCS-2 words can be used to encode non-BMP characters up to
|
||
`0x10ffff'.
|
||
|
||
To represent wide characters the `char' type is not suitable. For
|
||
this reason the ISO C standard introduces a new type that is designed
|
||
to keep one character of a wide character string. To maintain the
|
||
similarity there is also a type corresponding to `int' for those
|
||
functions that take a single wide character.
|
||
|
||
-- Data type: wchar_t
|
||
This data type is used as the base type for wide character strings.
|
||
In other words, arrays of objects of this type are the equivalent
|
||
of `char[]' for multibyte character strings. The type is defined
|
||
in `stddef.h'.
|
||
|
||
The ISO C90 standard, where `wchar_t' was introduced, does not say
|
||
anything specific about the representation. It only requires that
|
||
this type is capable of storing all elements of the basic
|
||
character set. Therefore it would be legitimate to define
|
||
`wchar_t' as `char', which might make sense for embedded systems.
|
||
|
||
But in the GNU C Library `wchar_t' is always 32 bits wide and,
|
||
therefore, capable of representing all UCS-4 values and,
|
||
therefore, covering all of ISO 10646. Some Unix systems define
|
||
`wchar_t' as a 16-bit type and thereby follow Unicode very
|
||
strictly. This definition is perfectly fine with the standard,
|
||
but it also means that to represent all characters from Unicode
|
||
and ISO 10646 one has to use UTF-16 surrogate characters, which is
|
||
in fact a multi-wide-character encoding. But resorting to
|
||
multi-wide-character encoding contradicts the purpose of the
|
||
`wchar_t' type.
|
||
|
||
-- Data type: wint_t
|
||
`wint_t' is a data type used for parameters and variables that
|
||
contain a single wide character. As the name suggests this type
|
||
is the equivalent of `int' when using the normal `char' strings.
|
||
The types `wchar_t' and `wint_t' often have the same
|
||
representation if their size is 32 bits wide but if `wchar_t' is
|
||
defined as `char' the type `wint_t' must be defined as `int' due
|
||
to the parameter promotion.
|
||
|
||
This type is defined in `wchar.h' and was introduced in
|
||
Amendment 1 to ISO C90.
|
||
|
||
As there are for the `char' data type macros are available for
|
||
specifying the minimum and maximum value representable in an object of
|
||
type `wchar_t'.
|
||
|
||
-- Macro: wint_t WCHAR_MIN
|
||
The macro `WCHAR_MIN' evaluates to the minimum value representable
|
||
by an object of type `wint_t'.
|
||
|
||
This macro was introduced in Amendment 1 to ISO C90.
|
||
|
||
-- Macro: wint_t WCHAR_MAX
|
||
The macro `WCHAR_MAX' evaluates to the maximum value representable
|
||
by an object of type `wint_t'.
|
||
|
||
This macro was introduced in Amendment 1 to ISO C90.
|
||
|
||
Another special wide character value is the equivalent to `EOF'.
|
||
|
||
-- Macro: wint_t WEOF
|
||
The macro `WEOF' evaluates to a constant expression of type
|
||
`wint_t' whose value is different from any member of the extended
|
||
character set.
|
||
|
||
`WEOF' need not be the same value as `EOF' and unlike `EOF' it
|
||
also need _not_ be negative. In other words, sloppy code like
|
||
|
||
{
|
||
int c;
|
||
...
|
||
while ((c = getc (fp)) < 0)
|
||
...
|
||
}
|
||
|
||
has to be rewritten to use `WEOF' explicitly when wide characters
|
||
are used:
|
||
|
||
{
|
||
wint_t c;
|
||
...
|
||
while ((c = wgetc (fp)) != WEOF)
|
||
...
|
||
}
|
||
|
||
This macro was introduced in Amendment 1 to ISO C90 and is defined
|
||
in `wchar.h'.
|
||
|
||
These internal representations present problems when it comes to
|
||
storage and transmittal. Because each single wide character consists
|
||
of more than one byte, they are affected by byte-ordering. Thus,
|
||
machines with different endianesses would see different values when
|
||
accessing the same data. This byte ordering concern also applies for
|
||
communication protocols that are all byte-based and therefore require
|
||
that the sender has to decide about splitting the wide character in
|
||
bytes. A last (but not least important) point is that wide characters
|
||
often require more storage space than a customized byte-oriented
|
||
character set.
|
||
|
||
For all the above reasons, an external encoding that is different
|
||
from the internal encoding is often used if the latter is UCS-2 or
|
||
UCS-4. The external encoding is byte-based and can be chosen
|
||
appropriately for the environment and for the texts to be handled. A
|
||
variety of different character sets can be used for this external
|
||
encoding (information that will not be exhaustively presented
|
||
here-instead, a description of the major groups will suffice). All of
|
||
the ASCII-based character sets fulfill one requirement: they are
|
||
"filesystem safe." This means that the character `'/'' is used in the
|
||
encoding _only_ to represent itself. Things are a bit different for
|
||
character sets like EBCDIC (Extended Binary Coded Decimal Interchange
|
||
Code, a character set family used by IBM), but if the operating system
|
||
does not understand EBCDIC directly the parameters-to-system calls have
|
||
to be converted first anyhow.
|
||
|
||
* The simplest character sets are single-byte character sets. There
|
||
can be only up to 256 characters (for 8 bit character sets), which
|
||
is not sufficient to cover all languages but might be sufficient
|
||
to handle a specific text. Handling of a 8 bit character sets is
|
||
simple. This is not true for other kinds presented later, and
|
||
therefore, the application one uses might require the use of 8 bit
|
||
character sets.
|
||
|
||
* The ISO 2022 standard defines a mechanism for extended character
|
||
sets where one character _can_ be represented by more than one
|
||
byte. This is achieved by associating a state with the text.
|
||
Characters that can be used to change the state can be embedded in
|
||
the text. Each byte in the text might have a different
|
||
interpretation in each state. The state might even influence
|
||
whether a given byte stands for a character on its own or whether
|
||
it has to be combined with some more bytes.
|
||
|
||
In most uses of ISO 2022 the defined character sets do not allow
|
||
state changes that cover more than the next character. This has
|
||
the big advantage that whenever one can identify the beginning of
|
||
the byte sequence of a character one can interpret a text
|
||
correctly. Examples of character sets using this policy are the
|
||
various EUC character sets (used by Sun's operating systems,
|
||
EUC-JP, EUC-KR, EUC-TW, and EUC-CN) or Shift_JIS (SJIS, a Japanese
|
||
encoding).
|
||
|
||
But there are also character sets using a state that is valid for
|
||
more than one character and has to be changed by another byte
|
||
sequence. Examples for this are ISO-2022-JP, ISO-2022-KR, and
|
||
ISO-2022-CN.
|
||
|
||
* Early attempts to fix 8 bit character sets for other languages
|
||
using the Roman alphabet lead to character sets like ISO 6937.
|
||
Here bytes representing characters like the acute accent do not
|
||
produce output themselves: one has to combine them with other
|
||
characters to get the desired result. For example, the byte
|
||
sequence `0xc2 0x61' (non-spacing acute accent, followed by
|
||
lower-case `a') to get the "small a with acute" character. To
|
||
get the acute accent character on its own, one has to write `0xc2
|
||
0x20' (the non-spacing acute followed by a space).
|
||
|
||
Character sets like ISO 6937 are used in some embedded systems such
|
||
as teletex.
|
||
|
||
* Instead of converting the Unicode or ISO 10646 text used
|
||
internally, it is often also sufficient to simply use an encoding
|
||
different than UCS-2/UCS-4. The Unicode and ISO 10646 standards
|
||
even specify such an encoding: UTF-8. This encoding is able to
|
||
represent all of ISO 10646 31 bits in a byte string of length one
|
||
to six.
|
||
|
||
There were a few other attempts to encode ISO 10646 such as UTF-7,
|
||
but UTF-8 is today the only encoding that should be used. In
|
||
fact, with any luck UTF-8 will soon be the only external encoding
|
||
that has to be supported. It proves to be universally usable and
|
||
its only disadvantage is that it favors Roman languages by making
|
||
the byte string representation of other scripts (Cyrillic, Greek,
|
||
Asian scripts) longer than necessary if using a specific character
|
||
set for these scripts. Methods like the Unicode compression
|
||
scheme can alleviate these problems.
|
||
|
||
The question remaining is: how to select the character set or
|
||
encoding to use. The answer: you cannot decide about it yourself, it
|
||
is decided by the developers of the system or the majority of the
|
||
users. Since the goal is interoperability one has to use whatever the
|
||
other people one works with use. If there are no constraints, the
|
||
selection is based on the requirements the expected circle of users
|
||
will have. In other words, if a project is expected to be used in
|
||
only, say, Russia it is fine to use KOI8-R or a similar character set.
|
||
But if at the same time people from, say, Greece are participating one
|
||
should use a character set that allows all people to collaborate.
|
||
|
||
The most widely useful solution seems to be: go with the most general
|
||
character set, namely ISO 10646. Use UTF-8 as the external encoding
|
||
and problems about users not being able to use their own language
|
||
adequately are a thing of the past.
|
||
|
||
One final comment about the choice of the wide character
|
||
representation is necessary at this point. We have said above that the
|
||
natural choice is using Unicode or ISO 10646. This is not required,
|
||
but at least encouraged, by the ISO C standard. The standard defines
|
||
at least a macro `__STDC_ISO_10646__' that is only defined on systems
|
||
where the `wchar_t' type encodes ISO 10646 characters. If this symbol
|
||
is not defined one should avoid making assumptions about the wide
|
||
character representation. If the programmer uses only the functions
|
||
provided by the C library to handle wide character strings there should
|
||
be no compatibility problems with other systems.
|
||
|
||
|
||
File: libc.info, Node: Charset Function Overview, Next: Restartable multibyte conversion, Prev: Extended Char Intro, Up: Character Set Handling
|
||
|
||
6.2 Overview about Character Handling Functions
|
||
===============================================
|
||
|
||
A Unix C library contains three different sets of functions in two
|
||
families to handle character set conversion. One of the function
|
||
families (the most commonly used) is specified in the ISO C90 standard
|
||
and, therefore, is portable even beyond the Unix world. Unfortunately
|
||
this family is the least useful one. These functions should be avoided
|
||
whenever possible, especially when developing libraries (as opposed to
|
||
applications).
|
||
|
||
The second family of functions got introduced in the early Unix
|
||
standards (XPG2) and is still part of the latest and greatest Unix
|
||
standard: Unix 98. It is also the most powerful and useful set of
|
||
functions. But we will start with the functions defined in Amendment 1
|
||
to ISO C90.
|
||
|
||
|
||
File: libc.info, Node: Restartable multibyte conversion, Next: Non-reentrant Conversion, Prev: Charset Function Overview, Up: Character Set Handling
|
||
|
||
6.3 Restartable Multibyte Conversion Functions
|
||
==============================================
|
||
|
||
The ISO C standard defines functions to convert strings from a
|
||
multibyte representation to wide character strings. There are a number
|
||
of peculiarities:
|
||
|
||
* The character set assumed for the multibyte encoding is not
|
||
specified as an argument to the functions. Instead the character
|
||
set specified by the `LC_CTYPE' category of the current locale is
|
||
used; see *Note Locale Categories::.
|
||
|
||
* The functions handling more than one character at a time require
|
||
NUL terminated strings as the argument (i.e., converting blocks of
|
||
text does not work unless one can add a NUL byte at an appropriate
|
||
place). The GNU C Library contains some extensions to the
|
||
standard that allow specifying a size, but basically they also
|
||
expect terminated strings.
|
||
|
||
Despite these limitations the ISO C functions can be used in many
|
||
contexts. In graphical user interfaces, for instance, it is not
|
||
uncommon to have functions that require text to be displayed in a wide
|
||
character string if the text is not simple ASCII. The text itself might
|
||
come from a file with translations and the user should decide about the
|
||
current locale, which determines the translation and therefore also the
|
||
external encoding used. In such a situation (and many others) the
|
||
functions described here are perfect. If more freedom while performing
|
||
the conversion is necessary take a look at the `iconv' functions (*note
|
||
Generic Charset Conversion::).
|
||
|
||
* Menu:
|
||
|
||
* Selecting the Conversion:: Selecting the conversion and its properties.
|
||
* Keeping the state:: Representing the state of the conversion.
|
||
* Converting a Character:: Converting Single Characters.
|
||
* Converting Strings:: Converting Multibyte and Wide Character
|
||
Strings.
|
||
* Multibyte Conversion Example:: A Complete Multibyte Conversion Example.
|
||
|
||
|
||
File: libc.info, Node: Selecting the Conversion, Next: Keeping the state, Up: Restartable multibyte conversion
|
||
|
||
6.3.1 Selecting the conversion and its properties
|
||
-------------------------------------------------
|
||
|
||
We already said above that the currently selected locale for the
|
||
`LC_CTYPE' category decides the conversion that is performed by the
|
||
functions we are about to describe. Each locale uses its own character
|
||
set (given as an argument to `localedef') and this is the one assumed
|
||
as the external multibyte encoding. The wide character set is always
|
||
UCS-4 in the GNU C Library.
|
||
|
||
A characteristic of each multibyte character set is the maximum
|
||
number of bytes that can be necessary to represent one character. This
|
||
information is quite important when writing code that uses the
|
||
conversion functions (as shown in the examples below). The ISO C
|
||
standard defines two macros that provide this information.
|
||
|
||
-- Macro: int MB_LEN_MAX
|
||
`MB_LEN_MAX' specifies the maximum number of bytes in the multibyte
|
||
sequence for a single character in any of the supported locales.
|
||
It is a compile-time constant and is defined in `limits.h'.
|
||
|
||
-- Macro: int MB_CUR_MAX
|
||
`MB_CUR_MAX' expands into a positive integer expression that is the
|
||
maximum number of bytes in a multibyte character in the current
|
||
locale. The value is never greater than `MB_LEN_MAX'. Unlike
|
||
`MB_LEN_MAX' this macro need not be a compile-time constant, and in
|
||
the GNU C Library it is not.
|
||
|
||
`MB_CUR_MAX' is defined in `stdlib.h'.
|
||
|
||
Two different macros are necessary since strictly ISO C90 compilers
|
||
do not allow variable length array definitions, but still it is
|
||
desirable to avoid dynamic allocation. This incomplete piece of code
|
||
shows the problem:
|
||
|
||
{
|
||
char buf[MB_LEN_MAX];
|
||
ssize_t len = 0;
|
||
|
||
while (! feof (fp))
|
||
{
|
||
fread (&buf[len], 1, MB_CUR_MAX - len, fp);
|
||
/* ... process buf */
|
||
len -= used;
|
||
}
|
||
}
|
||
|
||
The code in the inner loop is expected to have always enough bytes in
|
||
the array BUF to convert one multibyte character. The array BUF has to
|
||
be sized statically since many compilers do not allow a variable size.
|
||
The `fread' call makes sure that `MB_CUR_MAX' bytes are always
|
||
available in BUF. Note that it isn't a problem if `MB_CUR_MAX' is not
|
||
a compile-time constant.
|
||
|
||
|
||
File: libc.info, Node: Keeping the state, Next: Converting a Character, Prev: Selecting the Conversion, Up: Restartable multibyte conversion
|
||
|
||
6.3.2 Representing the state of the conversion
|
||
----------------------------------------------
|
||
|
||
In the introduction of this chapter it was said that certain character
|
||
sets use a "stateful" encoding. That is, the encoded values depend in
|
||
some way on the previous bytes in the text.
|
||
|
||
Since the conversion functions allow converting a text in more than
|
||
one step we must have a way to pass this information from one call of
|
||
the functions to another.
|
||
|
||
-- Data type: mbstate_t
|
||
A variable of type `mbstate_t' can contain all the information
|
||
about the "shift state" needed from one call to a conversion
|
||
function to another.
|
||
|
||
`mbstate_t' is defined in `wchar.h'. It was introduced in
|
||
Amendment 1 to ISO C90.
|
||
|
||
To use objects of type `mbstate_t' the programmer has to define such
|
||
objects (normally as local variables on the stack) and pass a pointer to
|
||
the object to the conversion functions. This way the conversion
|
||
function can update the object if the current multibyte character set
|
||
is stateful.
|
||
|
||
There is no specific function or initializer to put the state object
|
||
in any specific state. The rules are that the object should always
|
||
represent the initial state before the first use, and this is achieved
|
||
by clearing the whole variable with code such as follows:
|
||
|
||
{
|
||
mbstate_t state;
|
||
memset (&state, '\0', sizeof (state));
|
||
/* from now on STATE can be used. */
|
||
...
|
||
}
|
||
|
||
When using the conversion functions to generate output it is often
|
||
necessary to test whether the current state corresponds to the initial
|
||
state. This is necessary, for example, to decide whether to emit
|
||
escape sequences to set the state to the initial state at certain
|
||
sequence points. Communication protocols often require this.
|
||
|
||
-- Function: int mbsinit (const mbstate_t *PS)
|
||
Preliminary: | MT-Safe | AS-Safe | AC-Safe | *Note POSIX Safety
|
||
Concepts::.
|
||
|
||
The `mbsinit' function determines whether the state object pointed
|
||
to by PS is in the initial state. If PS is a null pointer or the
|
||
object is in the initial state the return value is nonzero.
|
||
Otherwise it is zero.
|
||
|
||
`mbsinit' was introduced in Amendment 1 to ISO C90 and is declared
|
||
in `wchar.h'.
|
||
|
||
Code using `mbsinit' often looks similar to this:
|
||
|
||
{
|
||
mbstate_t state;
|
||
memset (&state, '\0', sizeof (state));
|
||
/* Use STATE. */
|
||
...
|
||
if (! mbsinit (&state))
|
||
{
|
||
/* Emit code to return to initial state. */
|
||
const wchar_t empty[] = L"";
|
||
const wchar_t *srcp = empty;
|
||
wcsrtombs (outbuf, &srcp, outbuflen, &state);
|
||
}
|
||
...
|
||
}
|
||
|
||
The code to emit the escape sequence to get back to the initial
|
||
state is interesting. The `wcsrtombs' function can be used to
|
||
determine the necessary output code (*note Converting Strings::).
|
||
Please note that with the GNU C Library it is not necessary to perform
|
||
this extra action for the conversion from multibyte text to wide
|
||
character text since the wide character encoding is not stateful. But
|
||
there is nothing mentioned in any standard that prohibits making
|
||
`wchar_t' use a stateful encoding.
|
||
|
||
|
||
File: libc.info, Node: Converting a Character, Next: Converting Strings, Prev: Keeping the state, Up: Restartable multibyte conversion
|
||
|
||
6.3.3 Converting Single Characters
|
||
----------------------------------
|
||
|
||
The most fundamental of the conversion functions are those dealing with
|
||
single characters. Please note that this does not always mean single
|
||
bytes. But since there is very often a subset of the multibyte
|
||
character set that consists of single byte sequences, there are
|
||
functions to help with converting bytes. Frequently, ASCII is a subset
|
||
of the multibyte character set. In such a scenario, each ASCII
|
||
character stands for itself, and all other characters have at least a
|
||
first byte that is beyond the range 0 to 127.
|
||
|
||
-- Function: wint_t btowc (int C)
|
||
Preliminary: | MT-Safe | AS-Unsafe corrupt heap lock dlopen |
|
||
AC-Unsafe corrupt lock mem fd | *Note POSIX Safety Concepts::.
|
||
|
||
The `btowc' function ("byte to wide character") converts a valid
|
||
single byte character C in the initial shift state into the wide
|
||
character equivalent using the conversion rules from the currently
|
||
selected locale of the `LC_CTYPE' category.
|
||
|
||
If `(unsigned char) C' is no valid single byte multibyte character
|
||
or if C is `EOF', the function returns `WEOF'.
|
||
|
||
Please note the restriction of C being tested for validity only in
|
||
the initial shift state. No `mbstate_t' object is used from which
|
||
the state information is taken, and the function also does not use
|
||
any static state.
|
||
|
||
The `btowc' function was introduced in Amendment 1 to ISO C90 and
|
||
is declared in `wchar.h'.
|
||
|
||
Despite the limitation that the single byte value is always
|
||
interpreted in the initial state, this function is actually useful most
|
||
of the time. Most characters are either entirely single-byte character
|
||
sets or they are extensions to ASCII. But then it is possible to write
|
||
code like this (not that this specific example is very useful):
|
||
|
||
wchar_t *
|
||
itow (unsigned long int val)
|
||
{
|
||
static wchar_t buf[30];
|
||
wchar_t *wcp = &buf[29];
|
||
*wcp = L'\0';
|
||
while (val != 0)
|
||
{
|
||
*--wcp = btowc ('0' + val % 10);
|
||
val /= 10;
|
||
}
|
||
if (wcp == &buf[29])
|
||
*--wcp = L'0';
|
||
return wcp;
|
||
}
|
||
|
||
Why is it necessary to use such a complicated implementation and not
|
||
simply cast `'0' + val % 10' to a wide character? The answer is that
|
||
there is no guarantee that one can perform this kind of arithmetic on
|
||
the character of the character set used for `wchar_t' representation.
|
||
In other situations the bytes are not constant at compile time and so
|
||
the compiler cannot do the work. In situations like this, using
|
||
`btowc' is required.
|
||
|
||
There is also a function for the conversion in the other direction.
|
||
|
||
-- Function: int wctob (wint_t C)
|
||
Preliminary: | MT-Safe | AS-Unsafe corrupt heap lock dlopen |
|
||
AC-Unsafe corrupt lock mem fd | *Note POSIX Safety Concepts::.
|
||
|
||
The `wctob' function ("wide character to byte") takes as the
|
||
parameter a valid wide character. If the multibyte representation
|
||
for this character in the initial state is exactly one byte long,
|
||
the return value of this function is this character. Otherwise
|
||
the return value is `EOF'.
|
||
|
||
`wctob' was introduced in Amendment 1 to ISO C90 and is declared
|
||
in `wchar.h'.
|
||
|
||
There are more general functions to convert single characters from
|
||
multibyte representation to wide characters and vice versa. These
|
||
functions pose no limit on the length of the multibyte representation
|
||
and they also do not require it to be in the initial state.
|
||
|
||
-- Function: size_t mbrtowc (wchar_t *restrict PWC, const char
|
||
*restrict S, size_t N, mbstate_t *restrict PS)
|
||
Preliminary: | MT-Unsafe race:mbrtowc/!ps | AS-Unsafe corrupt heap
|
||
lock dlopen | AC-Unsafe corrupt lock mem fd | *Note POSIX Safety
|
||
Concepts::.
|
||
|
||
The `mbrtowc' function ("multibyte restartable to wide character")
|
||
converts the next multibyte character in the string pointed to by
|
||
S into a wide character and stores it in the location pointed to
|
||
by PWC. The conversion is performed according to the locale
|
||
currently selected for the `LC_CTYPE' category. If the conversion
|
||
for the character set used in the locale requires a state, the
|
||
multibyte string is interpreted in the state represented by the
|
||
object pointed to by PS. If PS is a null pointer, a static,
|
||
internal state variable used only by the `mbrtowc' function is
|
||
used.
|
||
|
||
If the next multibyte character corresponds to the null wide
|
||
character, the return value of the function is 0 and the state
|
||
object is afterwards in the initial state. If the next N or fewer
|
||
bytes form a correct multibyte character, the return value is the
|
||
number of bytes starting from S that form the multibyte character.
|
||
The conversion state is updated according to the bytes consumed
|
||
in the conversion. In both cases the wide character (either the
|
||
`L'\0'' or the one found in the conversion) is stored in the
|
||
string pointed to by PWC if PWC is not null.
|
||
|
||
If the first N bytes of the multibyte string possibly form a valid
|
||
multibyte character but there are more than N bytes needed to
|
||
complete it, the return value of the function is `(size_t) -2' and
|
||
no value is stored in `*PWC'. The conversion state is updated and
|
||
all N input bytes are consumed and should not be submitted again.
|
||
Please note that this can happen even if N has a value greater
|
||
than or equal to `MB_CUR_MAX' since the input might contain
|
||
redundant shift sequences.
|
||
|
||
If the first `n' bytes of the multibyte string cannot possibly form
|
||
a valid multibyte character, no value is stored, the global
|
||
variable `errno' is set to the value `EILSEQ', and the function
|
||
returns `(size_t) -1'. The conversion state is afterwards
|
||
undefined.
|
||
|
||
As specified, the `mbrtowc' function could deal with multibyte
|
||
sequences which contain embedded null bytes (which happens in
|
||
Unicode encodings such as UTF-16), but the GNU C Library does not
|
||
support such multibyte encodings. When encountering a null input
|
||
byte, the function will either return zero, or return `(size_t)
|
||
-1)' and report a `EILSEQ' error. The `iconv' function can be
|
||
used for converting between arbitrary encodings. *Note Generic
|
||
Conversion Interface::.
|
||
|
||
`mbrtowc' was introduced in Amendment 1 to ISO C90 and is declared
|
||
in `wchar.h'.
|
||
|
||
A function that copies a multibyte string into a wide character
|
||
string while at the same time converting all lowercase characters into
|
||
uppercase could look like this:
|
||
|
||
wchar_t *
|
||
mbstouwcs (const char *s)
|
||
{
|
||
/* Include the null terminator in the conversion. */
|
||
size_t len = strlen (s) + 1;
|
||
wchar_t *result = reallocarray (NULL, len, sizeof (wchar_t));
|
||
if (result == NULL)
|
||
return NULL;
|
||
|
||
wchar_t *wcp = result;
|
||
mbstate_t state;
|
||
memset (&state, '\0', sizeof (state));
|
||
|
||
while (true)
|
||
{
|
||
wchar_t wc;
|
||
size_t nbytes = mbrtowc (&wc, s, len, &state);
|
||
if (nbytes == 0)
|
||
{
|
||
/* Terminate the result string. */
|
||
*wcp = L'\0';
|
||
break;
|
||
}
|
||
else if (nbytes == (size_t) -2)
|
||
{
|
||
/* Truncated input string. */
|
||
errno = EILSEQ;
|
||
free (result);
|
||
return NULL;
|
||
}
|
||
else if (nbytes == (size_t) -1)
|
||
{
|
||
/* Some other error (including EILSEQ). */
|
||
free (result);
|
||
return NULL;
|
||
}
|
||
else
|
||
{
|
||
/* A character was converted. */
|
||
*wcp++ = towupper (wc);
|
||
len -= nbytes;
|
||
s += nbytes;
|
||
}
|
||
}
|
||
return result;
|
||
}
|
||
|
||
In the inner loop, a single wide character is stored in `wc', and
|
||
the number of consumed bytes is stored in the variable `nbytes'. If
|
||
the conversion is successful, the uppercase variant of the wide
|
||
character is stored in the `result' array and the pointer to the input
|
||
string and the number of available bytes is adjusted. If the `mbrtowc'
|
||
function returns zero, the null input byte has not been converted, so
|
||
it must be stored explicitly in the result.
|
||
|
||
The above code uses the fact that there can never be more wide
|
||
characters in the converted result than there are bytes in the multibyte
|
||
input string. This method yields a pessimistic guess about the size of
|
||
the result, and if many wide character strings have to be constructed
|
||
this way or if the strings are long, the extra memory required to be
|
||
allocated because the input string contains multibyte characters might
|
||
be significant. The allocated memory block can be resized to the
|
||
correct size before returning it, but a better solution might be to
|
||
allocate just the right amount of space for the result right away.
|
||
Unfortunately there is no function to compute the length of the wide
|
||
character string directly from the multibyte string. There is, however,
|
||
a function that does part of the work.
|
||
|
||
-- Function: size_t mbrlen (const char *restrict S, size_t N,
|
||
mbstate_t *PS)
|
||
Preliminary: | MT-Unsafe race:mbrlen/!ps | AS-Unsafe corrupt heap
|
||
lock dlopen | AC-Unsafe corrupt lock mem fd | *Note POSIX Safety
|
||
Concepts::.
|
||
|
||
The `mbrlen' function ("multibyte restartable length") computes
|
||
the number of at most N bytes starting at S, which form the next
|
||
valid and complete multibyte character.
|
||
|
||
If the next multibyte character corresponds to the NUL wide
|
||
character, the return value is 0. If the next N bytes form a valid
|
||
multibyte character, the number of bytes belonging to this
|
||
multibyte character byte sequence is returned.
|
||
|
||
If the first N bytes possibly form a valid multibyte character but
|
||
the character is incomplete, the return value is `(size_t) -2'.
|
||
Otherwise the multibyte character sequence is invalid and the
|
||
return value is `(size_t) -1'.
|
||
|
||
The multibyte sequence is interpreted in the state represented by
|
||
the object pointed to by PS. If PS is a null pointer, a state
|
||
object local to `mbrlen' is used.
|
||
|
||
`mbrlen' was introduced in Amendment 1 to ISO C90 and is declared
|
||
in `wchar.h'.
|
||
|
||
The attentive reader now will note that `mbrlen' can be implemented
|
||
as
|
||
|
||
mbrtowc (NULL, s, n, ps != NULL ? ps : &internal)
|
||
|
||
This is true and in fact is mentioned in the official specification.
|
||
How can this function be used to determine the length of the wide
|
||
character string created from a multibyte character string? It is not
|
||
directly usable, but we can define a function `mbslen' using it:
|
||
|
||
size_t
|
||
mbslen (const char *s)
|
||
{
|
||
mbstate_t state;
|
||
size_t result = 0;
|
||
size_t nbytes;
|
||
memset (&state, '\0', sizeof (state));
|
||
while ((nbytes = mbrlen (s, MB_LEN_MAX, &state)) > 0)
|
||
{
|
||
if (nbytes >= (size_t) -2)
|
||
/* Something is wrong. */
|
||
return (size_t) -1;
|
||
s += nbytes;
|
||
++result;
|
||
}
|
||
return result;
|
||
}
|
||
|
||
This function simply calls `mbrlen' for each multibyte character in
|
||
the string and counts the number of function calls. Please note that
|
||
we here use `MB_LEN_MAX' as the size argument in the `mbrlen' call.
|
||
This is acceptable since a) this value is larger than the length of the
|
||
longest multibyte character sequence and b) we know that the string S
|
||
ends with a NUL byte, which cannot be part of any other multibyte
|
||
character sequence but the one representing the NUL wide character.
|
||
Therefore, the `mbrlen' function will never read invalid memory.
|
||
|
||
Now that this function is available (just to make this clear, this
|
||
function is _not_ part of the GNU C Library) we can compute the number
|
||
of wide characters required to store the converted multibyte character
|
||
string S using
|
||
|
||
wcs_bytes = (mbslen (s) + 1) * sizeof (wchar_t);
|
||
|
||
Please note that the `mbslen' function is quite inefficient. The
|
||
implementation of `mbstouwcs' with `mbslen' would have to perform the
|
||
conversion of the multibyte character input string twice, and this
|
||
conversion might be quite expensive. So it is necessary to think about
|
||
the consequences of using the easier but imprecise method before doing
|
||
the work twice.
|
||
|
||
-- Function: size_t wcrtomb (char *restrict S, wchar_t WC, mbstate_t
|
||
*restrict PS)
|
||
Preliminary: | MT-Unsafe race:wcrtomb/!ps | AS-Unsafe corrupt heap
|
||
lock dlopen | AC-Unsafe corrupt lock mem fd | *Note POSIX Safety
|
||
Concepts::.
|
||
|
||
The `wcrtomb' function ("wide character restartable to multibyte")
|
||
converts a single wide character into a multibyte string
|
||
corresponding to that wide character.
|
||
|
||
If S is a null pointer, the function resets the state stored in
|
||
the object pointed to by PS (or the internal `mbstate_t' object)
|
||
to the initial state. This can also be achieved by a call like
|
||
this:
|
||
|
||
wcrtombs (temp_buf, L'\0', ps)
|
||
|
||
since, if S is a null pointer, `wcrtomb' performs as if it writes
|
||
into an internal buffer, which is guaranteed to be large enough.
|
||
|
||
If WC is the NUL wide character, `wcrtomb' emits, if necessary, a
|
||
shift sequence to get the state PS into the initial state followed
|
||
by a single NUL byte, which is stored in the string S.
|
||
|
||
Otherwise a byte sequence (possibly including shift sequences) is
|
||
written into the string S. This only happens if WC is a valid wide
|
||
character (i.e., it has a multibyte representation in the
|
||
character set selected by locale of the `LC_CTYPE' category). If
|
||
WC is no valid wide character, nothing is stored in the strings S,
|
||
`errno' is set to `EILSEQ', the conversion state in PS is
|
||
undefined and the return value is `(size_t) -1'.
|
||
|
||
If no error occurred the function returns the number of bytes
|
||
stored in the string S. This includes all bytes representing shift
|
||
sequences.
|
||
|
||
One word about the interface of the function: there is no parameter
|
||
specifying the length of the array S. Instead the function
|
||
assumes that there are at least `MB_CUR_MAX' bytes available since
|
||
this is the maximum length of any byte sequence representing a
|
||
single character. So the caller has to make sure that there is
|
||
enough space available, otherwise buffer overruns can occur.
|
||
|
||
`wcrtomb' was introduced in Amendment 1 to ISO C90 and is declared
|
||
in `wchar.h'.
|
||
|
||
Using `wcrtomb' is as easy as using `mbrtowc'. The following
|
||
example appends a wide character string to a multibyte character string.
|
||
Again, the code is not really useful (or correct), it is simply here to
|
||
demonstrate the use and some problems.
|
||
|
||
char *
|
||
mbscatwcs (char *s, size_t len, const wchar_t *ws)
|
||
{
|
||
mbstate_t state;
|
||
/* Find the end of the existing string. */
|
||
char *wp = strchr (s, '\0');
|
||
len -= wp - s;
|
||
memset (&state, '\0', sizeof (state));
|
||
do
|
||
{
|
||
size_t nbytes;
|
||
if (len < MB_CUR_LEN)
|
||
{
|
||
/* We cannot guarantee that the next
|
||
character fits into the buffer, so
|
||
return an error. */
|
||
errno = E2BIG;
|
||
return NULL;
|
||
}
|
||
nbytes = wcrtomb (wp, *ws, &state);
|
||
if (nbytes == (size_t) -1)
|
||
/* Error in the conversion. */
|
||
return NULL;
|
||
len -= nbytes;
|
||
wp += nbytes;
|
||
}
|
||
while (*ws++ != L'\0');
|
||
return s;
|
||
}
|
||
|
||
First the function has to find the end of the string currently in the
|
||
array S. The `strchr' call does this very efficiently since a
|
||
requirement for multibyte character representations is that the NUL byte
|
||
is never used except to represent itself (and in this context, the end
|
||
of the string).
|
||
|
||
After initializing the state object the loop is entered where the
|
||
first task is to make sure there is enough room in the array S. We
|
||
abort if there are not at least `MB_CUR_LEN' bytes available. This is
|
||
not always optimal but we have no other choice. We might have less
|
||
than `MB_CUR_LEN' bytes available but the next multibyte character
|
||
might also be only one byte long. At the time the `wcrtomb' call
|
||
returns it is too late to decide whether the buffer was large enough.
|
||
If this solution is unsuitable, there is a very slow but more accurate
|
||
solution.
|
||
|
||
...
|
||
if (len < MB_CUR_LEN)
|
||
{
|
||
mbstate_t temp_state;
|
||
memcpy (&temp_state, &state, sizeof (state));
|
||
if (wcrtomb (NULL, *ws, &temp_state) > len)
|
||
{
|
||
/* We cannot guarantee that the next
|
||
character fits into the buffer, so
|
||
return an error. */
|
||
errno = E2BIG;
|
||
return NULL;
|
||
}
|
||
}
|
||
...
|
||
|
||
Here we perform the conversion that might overflow the buffer so that
|
||
we are afterwards in the position to make an exact decision about the
|
||
buffer size. Please note the `NULL' argument for the destination
|
||
buffer in the new `wcrtomb' call; since we are not interested in the
|
||
converted text at this point, this is a nice way to express this. The
|
||
most unusual thing about this piece of code certainly is the duplication
|
||
of the conversion state object, but if a change of the state is
|
||
necessary to emit the next multibyte character, we want to have the
|
||
same shift state change performed in the real conversion. Therefore,
|
||
we have to preserve the initial shift state information.
|
||
|
||
There are certainly many more and even better solutions to this
|
||
problem. This example is only provided for educational purposes.
|
||
|
||
|
||
File: libc.info, Node: Converting Strings, Next: Multibyte Conversion Example, Prev: Converting a Character, Up: Restartable multibyte conversion
|
||
|
||
6.3.4 Converting Multibyte and Wide Character Strings
|
||
-----------------------------------------------------
|
||
|
||
The functions described in the previous section only convert a single
|
||
character at a time. Most operations to be performed in real-world
|
||
programs include strings and therefore the ISO C standard also defines
|
||
conversions on entire strings. However, the defined set of functions
|
||
is quite limited; therefore, the GNU C Library contains a few
|
||
extensions that can help in some important situations.
|
||
|
||
-- Function: size_t mbsrtowcs (wchar_t *restrict DST, const char
|
||
**restrict SRC, size_t LEN, mbstate_t *restrict PS)
|
||
Preliminary: | MT-Unsafe race:mbsrtowcs/!ps | AS-Unsafe corrupt
|
||
heap lock dlopen | AC-Unsafe corrupt lock mem fd | *Note POSIX
|
||
Safety Concepts::.
|
||
|
||
The `mbsrtowcs' function ("multibyte string restartable to wide
|
||
character string") converts the NUL-terminated multibyte character
|
||
string at `*SRC' into an equivalent wide character string,
|
||
including the NUL wide character at the end. The conversion is
|
||
started using the state information from the object pointed to by
|
||
PS or from an internal object of `mbsrtowcs' if PS is a null
|
||
pointer. Before returning, the state object is updated to match
|
||
the state after the last converted character. The state is the
|
||
initial state if the terminating NUL byte is reached and converted.
|
||
|
||
If DST is not a null pointer, the result is stored in the array
|
||
pointed to by DST; otherwise, the conversion result is not
|
||
available since it is stored in an internal buffer.
|
||
|
||
If LEN wide characters are stored in the array DST before reaching
|
||
the end of the input string, the conversion stops and LEN is
|
||
returned. If DST is a null pointer, LEN is never checked.
|
||
|
||
Another reason for a premature return from the function call is if
|
||
the input string contains an invalid multibyte sequence. In this
|
||
case the global variable `errno' is set to `EILSEQ' and the
|
||
function returns `(size_t) -1'.
|
||
|
||
In all other cases the function returns the number of wide
|
||
characters converted during this call. If DST is not null,
|
||
`mbsrtowcs' stores in the pointer pointed to by SRC either a null
|
||
pointer (if the NUL byte in the input string was reached) or the
|
||
address of the byte following the last converted multibyte
|
||
character.
|
||
|
||
`mbsrtowcs' was introduced in Amendment 1 to ISO C90 and is
|
||
declared in `wchar.h'.
|
||
|
||
The definition of the `mbsrtowcs' function has one important
|
||
limitation. The requirement that DST has to be a NUL-terminated string
|
||
provides problems if one wants to convert buffers with text. A buffer
|
||
is not normally a collection of NUL-terminated strings but instead a
|
||
continuous collection of lines, separated by newline characters. Now
|
||
assume that a function to convert one line from a buffer is needed.
|
||
Since the line is not NUL-terminated, the source pointer cannot
|
||
directly point into the unmodified text buffer. This means, either one
|
||
inserts the NUL byte at the appropriate place for the time of the
|
||
`mbsrtowcs' function call (which is not doable for a read-only buffer
|
||
or in a multi-threaded application) or one copies the line in an extra
|
||
buffer where it can be terminated by a NUL byte. Note that it is not
|
||
in general possible to limit the number of characters to convert by
|
||
setting the parameter LEN to any specific value. Since it is not known
|
||
how many bytes each multibyte character sequence is in length, one can
|
||
only guess.
|
||
|
||
There is still a problem with the method of NUL-terminating a line
|
||
right after the newline character, which could lead to very strange
|
||
results. As said in the description of the `mbsrtowcs' function above,
|
||
the conversion state is guaranteed to be in the initial shift state
|
||
after processing the NUL byte at the end of the input string. But this
|
||
NUL byte is not really part of the text (i.e., the conversion state
|
||
after the newline in the original text could be something different
|
||
than the initial shift state and therefore the first character of the
|
||
next line is encoded using this state). But the state in question is
|
||
never accessible to the user since the conversion stops after the NUL
|
||
byte (which resets the state). Most stateful character sets in use
|
||
today require that the shift state after a newline be the initial
|
||
state-but this is not a strict guarantee. Therefore, simply
|
||
NUL-terminating a piece of a running text is not always an adequate
|
||
solution and, therefore, should never be used in generally used code.
|
||
|
||
The generic conversion interface (*note Generic Charset Conversion::)
|
||
does not have this limitation (it simply works on buffers, not
|
||
strings), and the GNU C Library contains a set of functions that take
|
||
additional parameters specifying the maximal number of bytes that are
|
||
consumed from the input string. This way the problem of `mbsrtowcs''s
|
||
example above could be solved by determining the line length and
|
||
passing this length to the function.
|
||
|
||
-- Function: size_t wcsrtombs (char *restrict DST, const wchar_t
|
||
**restrict SRC, size_t LEN, mbstate_t *restrict PS)
|
||
Preliminary: | MT-Unsafe race:wcsrtombs/!ps | AS-Unsafe corrupt
|
||
heap lock dlopen | AC-Unsafe corrupt lock mem fd | *Note POSIX
|
||
Safety Concepts::.
|
||
|
||
The `wcsrtombs' function ("wide character string restartable to
|
||
multibyte string") converts the NUL-terminated wide character
|
||
string at `*SRC' into an equivalent multibyte character string and
|
||
stores the result in the array pointed to by DST. The NUL wide
|
||
character is also converted. The conversion starts in the state
|
||
described in the object pointed to by PS or by a state object
|
||
local to `wcsrtombs' in case PS is a null pointer. If DST is a
|
||
null pointer, the conversion is performed as usual but the result
|
||
is not available. If all characters of the input string were
|
||
successfully converted and if DST is not a null pointer, the
|
||
pointer pointed to by SRC gets assigned a null pointer.
|
||
|
||
If one of the wide characters in the input string has no valid
|
||
multibyte character equivalent, the conversion stops early, sets
|
||
the global variable `errno' to `EILSEQ', and returns `(size_t) -1'.
|
||
|
||
Another reason for a premature stop is if DST is not a null
|
||
pointer and the next converted character would require more than
|
||
LEN bytes in total to the array DST. In this case (and if DST is
|
||
not a null pointer) the pointer pointed to by SRC is assigned a
|
||
value pointing to the wide character right after the last one
|
||
successfully converted.
|
||
|
||
Except in the case of an encoding error the return value of the
|
||
`wcsrtombs' function is the number of bytes in all the multibyte
|
||
character sequences stored in DST. Before returning, the state in
|
||
the object pointed to by PS (or the internal object in case PS is
|
||
a null pointer) is updated to reflect the state after the last
|
||
conversion. The state is the initial shift state in case the
|
||
terminating NUL wide character was converted.
|
||
|
||
The `wcsrtombs' function was introduced in Amendment 1 to ISO C90
|
||
and is declared in `wchar.h'.
|
||
|
||
The restriction mentioned above for the `mbsrtowcs' function applies
|
||
here also. There is no possibility of directly controlling the number
|
||
of input characters. One has to place the NUL wide character at the
|
||
correct place or control the consumed input indirectly via the
|
||
available output array size (the LEN parameter).
|
||
|
||
-- Function: size_t mbsnrtowcs (wchar_t *restrict DST, const char
|
||
**restrict SRC, size_t NMC, size_t LEN, mbstate_t *restrict
|
||
PS)
|
||
Preliminary: | MT-Unsafe race:mbsnrtowcs/!ps | AS-Unsafe corrupt
|
||
heap lock dlopen | AC-Unsafe corrupt lock mem fd | *Note POSIX
|
||
Safety Concepts::.
|
||
|
||
The `mbsnrtowcs' function is very similar to the `mbsrtowcs'
|
||
function. All the parameters are the same except for NMC, which is
|
||
new. The return value is the same as for `mbsrtowcs'.
|
||
|
||
This new parameter specifies how many bytes at most can be used
|
||
from the multibyte character string. In other words, the
|
||
multibyte character string `*SRC' need not be NUL-terminated. But
|
||
if a NUL byte is found within the NMC first bytes of the string,
|
||
the conversion stops there.
|
||
|
||
This function is a GNU extension. It is meant to work around the
|
||
problems mentioned above. Now it is possible to convert a buffer
|
||
with multibyte character text piece by piece without having to
|
||
care about inserting NUL bytes and the effect of NUL bytes on the
|
||
conversion state.
|
||
|
||
A function to convert a multibyte string into a wide character string
|
||
and display it could be written like this (this is not a really useful
|
||
example):
|
||
|
||
void
|
||
showmbs (const char *src, FILE *fp)
|
||
{
|
||
mbstate_t state;
|
||
int cnt = 0;
|
||
memset (&state, '\0', sizeof (state));
|
||
while (1)
|
||
{
|
||
wchar_t linebuf[100];
|
||
const char *endp = strchr (src, '\n');
|
||
size_t n;
|
||
|
||
/* Exit if there is no more line. */
|
||
if (endp == NULL)
|
||
break;
|
||
|
||
n = mbsnrtowcs (linebuf, &src, endp - src, 99, &state);
|
||
linebuf[n] = L'\0';
|
||
fprintf (fp, "line %d: \"%S\"\n", linebuf);
|
||
}
|
||
}
|
||
|
||
There is no problem with the state after a call to `mbsnrtowcs'.
|
||
Since we don't insert characters in the strings that were not in there
|
||
right from the beginning and we use STATE only for the conversion of
|
||
the given buffer, there is no problem with altering the state.
|
||
|
||
-- Function: size_t wcsnrtombs (char *restrict DST, const wchar_t
|
||
**restrict SRC, size_t NWC, size_t LEN, mbstate_t *restrict
|
||
PS)
|
||
Preliminary: | MT-Unsafe race:wcsnrtombs/!ps | AS-Unsafe corrupt
|
||
heap lock dlopen | AC-Unsafe corrupt lock mem fd | *Note POSIX
|
||
Safety Concepts::.
|
||
|
||
The `wcsnrtombs' function implements the conversion from wide
|
||
character strings to multibyte character strings. It is similar to
|
||
`wcsrtombs' but, just like `mbsnrtowcs', it takes an extra
|
||
parameter, which specifies the length of the input string.
|
||
|
||
No more than NWC wide characters from the input string `*SRC' are
|
||
converted. If the input string contains a NUL wide character in
|
||
the first NWC characters, the conversion stops at this place.
|
||
|
||
The `wcsnrtombs' function is a GNU extension and just like
|
||
`mbsnrtowcs' helps in situations where no NUL-terminated input
|
||
strings are available.
|
||
|
||
|
||
File: libc.info, Node: Multibyte Conversion Example, Prev: Converting Strings, Up: Restartable multibyte conversion
|
||
|
||
6.3.5 A Complete Multibyte Conversion Example
|
||
---------------------------------------------
|
||
|
||
The example programs given in the last sections are only brief and do
|
||
not contain all the error checking, etc. Presented here is a complete
|
||
and documented example. It features the `mbrtowc' function but it
|
||
should be easy to derive versions using the other functions.
|
||
|
||
int
|
||
file_mbsrtowcs (int input, int output)
|
||
{
|
||
/* Note the use of `MB_LEN_MAX'.
|
||
`MB_CUR_MAX' cannot portably be used here. */
|
||
char buffer[BUFSIZ + MB_LEN_MAX];
|
||
mbstate_t state;
|
||
int filled = 0;
|
||
int eof = 0;
|
||
|
||
/* Initialize the state. */
|
||
memset (&state, '\0', sizeof (state));
|
||
|
||
while (!eof)
|
||
{
|
||
ssize_t nread;
|
||
ssize_t nwrite;
|
||
char *inp = buffer;
|
||
wchar_t outbuf[BUFSIZ];
|
||
wchar_t *outp = outbuf;
|
||
|
||
/* Fill up the buffer from the input file. */
|
||
nread = read (input, buffer + filled, BUFSIZ);
|
||
if (nread < 0)
|
||
{
|
||
perror ("read");
|
||
return 0;
|
||
}
|
||
/* If we reach end of file, make a note to read no more. */
|
||
if (nread == 0)
|
||
eof = 1;
|
||
|
||
/* `filled' is now the number of bytes in `buffer'. */
|
||
filled += nread;
|
||
|
||
/* Convert those bytes to wide characters-as many as we can. */
|
||
while (1)
|
||
{
|
||
size_t thislen = mbrtowc (outp, inp, filled, &state);
|
||
/* Stop converting at invalid character;
|
||
this can mean we have read just the first part
|
||
of a valid character. */
|
||
if (thislen == (size_t) -1)
|
||
break;
|
||
/* We want to handle embedded NUL bytes
|
||
but the return value is 0. Correct this. */
|
||
if (thislen == 0)
|
||
thislen = 1;
|
||
/* Advance past this character. */
|
||
inp += thislen;
|
||
filled -= thislen;
|
||
++outp;
|
||
}
|
||
|
||
/* Write the wide characters we just made. */
|
||
nwrite = write (output, outbuf,
|
||
(outp - outbuf) * sizeof (wchar_t));
|
||
if (nwrite < 0)
|
||
{
|
||
perror ("write");
|
||
return 0;
|
||
}
|
||
|
||
/* See if we have a _real_ invalid character. */
|
||
if ((eof && filled > 0) || filled >= MB_CUR_MAX)
|
||
{
|
||
error (0, 0, "invalid multibyte character");
|
||
return 0;
|
||
}
|
||
|
||
/* If any characters must be carried forward,
|
||
put them at the beginning of `buffer'. */
|
||
if (filled > 0)
|
||
memmove (buffer, inp, filled);
|
||
}
|
||
|
||
return 1;
|
||
}
|
||
|
||
|
||
File: libc.info, Node: Non-reentrant Conversion, Next: Generic Charset Conversion, Prev: Restartable multibyte conversion, Up: Character Set Handling
|
||
|
||
6.4 Non-reentrant Conversion Function
|
||
=====================================
|
||
|
||
The functions described in the previous chapter are defined in
|
||
Amendment 1 to ISO C90, but the original ISO C90 standard also
|
||
contained functions for character set conversion. The reason that
|
||
these original functions are not described first is that they are almost
|
||
entirely useless.
|
||
|
||
The problem is that all the conversion functions described in the
|
||
original ISO C90 use a local state. Using a local state implies that
|
||
multiple conversions at the same time (not only when using threads)
|
||
cannot be done, and that you cannot first convert single characters and
|
||
then strings since you cannot tell the conversion functions which state
|
||
to use.
|
||
|
||
These original functions are therefore usable only in a very limited
|
||
set of situations. One must complete converting the entire string
|
||
before starting a new one, and each string/text must be converted with
|
||
the same function (there is no problem with the library itself; it is
|
||
guaranteed that no library function changes the state of any of these
|
||
functions). *For the above reasons it is highly requested that the
|
||
functions described in the previous section be used in place of
|
||
non-reentrant conversion functions.*
|
||
|
||
* Menu:
|
||
|
||
* Non-reentrant Character Conversion:: Non-reentrant Conversion of Single
|
||
Characters.
|
||
* Non-reentrant String Conversion:: Non-reentrant Conversion of Strings.
|
||
* Shift State:: States in Non-reentrant Functions.
|
||
|
||
|
||
File: libc.info, Node: Non-reentrant Character Conversion, Next: Non-reentrant String Conversion, Up: Non-reentrant Conversion
|
||
|
||
6.4.1 Non-reentrant Conversion of Single Characters
|
||
---------------------------------------------------
|
||
|
||
-- Function: int mbtowc (wchar_t *restrict RESULT, const char
|
||
*restrict STRING, size_t SIZE)
|
||
Preliminary: | MT-Unsafe race | AS-Unsafe corrupt heap lock dlopen
|
||
| AC-Unsafe corrupt lock mem fd | *Note POSIX Safety Concepts::.
|
||
|
||
The `mbtowc' ("multibyte to wide character") function when called
|
||
with non-null STRING converts the first multibyte character
|
||
beginning at STRING to its corresponding wide character code. It
|
||
stores the result in `*RESULT'.
|
||
|
||
`mbtowc' never examines more than SIZE bytes. (The idea is to
|
||
supply for SIZE the number of bytes of data you have in hand.)
|
||
|
||
`mbtowc' with non-null STRING distinguishes three possibilities:
|
||
the first SIZE bytes at STRING start with valid multibyte
|
||
characters, they start with an invalid byte sequence or just part
|
||
of a character, or STRING points to an empty string (a null
|
||
character).
|
||
|
||
For a valid multibyte character, `mbtowc' converts it to a wide
|
||
character and stores that in `*RESULT', and returns the number of
|
||
bytes in that character (always at least 1 and never more than
|
||
SIZE).
|
||
|
||
For an invalid byte sequence, `mbtowc' returns -1. For an empty
|
||
string, it returns 0, also storing `'\0'' in `*RESULT'.
|
||
|
||
If the multibyte character code uses shift characters, then
|
||
`mbtowc' maintains and updates a shift state as it scans. If you
|
||
call `mbtowc' with a null pointer for STRING, that initializes the
|
||
shift state to its standard initial value. It also returns
|
||
nonzero if the multibyte character code in use actually has a
|
||
shift state. *Note Shift State::.
|
||
|
||
-- Function: int wctomb (char *STRING, wchar_t WCHAR)
|
||
Preliminary: | MT-Unsafe race | AS-Unsafe corrupt heap lock dlopen
|
||
| AC-Unsafe corrupt lock mem fd | *Note POSIX Safety Concepts::.
|
||
|
||
The `wctomb' ("wide character to multibyte") function converts the
|
||
wide character code WCHAR to its corresponding multibyte character
|
||
sequence, and stores the result in bytes starting at STRING. At
|
||
most `MB_CUR_MAX' characters are stored.
|
||
|
||
`wctomb' with non-null STRING distinguishes three possibilities
|
||
for WCHAR: a valid wide character code (one that can be translated
|
||
to a multibyte character), an invalid code, and `L'\0''.
|
||
|
||
Given a valid code, `wctomb' converts it to a multibyte character,
|
||
storing the bytes starting at STRING. Then it returns the number
|
||
of bytes in that character (always at least 1 and never more than
|
||
`MB_CUR_MAX').
|
||
|
||
If WCHAR is an invalid wide character code, `wctomb' returns -1.
|
||
If WCHAR is `L'\0'', it returns `0', also storing `'\0'' in
|
||
`*STRING'.
|
||
|
||
If the multibyte character code uses shift characters, then
|
||
`wctomb' maintains and updates a shift state as it scans. If you
|
||
call `wctomb' with a null pointer for STRING, that initializes the
|
||
shift state to its standard initial value. It also returns
|
||
nonzero if the multibyte character code in use actually has a
|
||
shift state. *Note Shift State::.
|
||
|
||
Calling this function with a WCHAR argument of zero when STRING is
|
||
not null has the side-effect of reinitializing the stored shift
|
||
state _as well as_ storing the multibyte character `'\0'' and
|
||
returning 0.
|
||
|
||
Similar to `mbrlen' there is also a non-reentrant function that
|
||
computes the length of a multibyte character. It can be defined in
|
||
terms of `mbtowc'.
|
||
|
||
-- Function: int mblen (const char *STRING, size_t SIZE)
|
||
Preliminary: | MT-Unsafe race | AS-Unsafe corrupt heap lock dlopen
|
||
| AC-Unsafe corrupt lock mem fd | *Note POSIX Safety Concepts::.
|
||
|
||
The `mblen' function with a non-null STRING argument returns the
|
||
number of bytes that make up the multibyte character beginning at
|
||
STRING, never examining more than SIZE bytes. (The idea is to
|
||
supply for SIZE the number of bytes of data you have in hand.)
|
||
|
||
The return value of `mblen' distinguishes three possibilities: the
|
||
first SIZE bytes at STRING start with valid multibyte characters,
|
||
they start with an invalid byte sequence or just part of a
|
||
character, or STRING points to an empty string (a null character).
|
||
|
||
For a valid multibyte character, `mblen' returns the number of
|
||
bytes in that character (always at least `1' and never more than
|
||
SIZE). For an invalid byte sequence, `mblen' returns -1. For an
|
||
empty string, it returns 0.
|
||
|
||
If the multibyte character code uses shift characters, then `mblen'
|
||
maintains and updates a shift state as it scans. If you call
|
||
`mblen' with a null pointer for STRING, that initializes the shift
|
||
state to its standard initial value. It also returns a nonzero
|
||
value if the multibyte character code in use actually has a shift
|
||
state. *Note Shift State::.
|
||
|
||
The function `mblen' is declared in `stdlib.h'.
|
||
|
||
|
||
File: libc.info, Node: Non-reentrant String Conversion, Next: Shift State, Prev: Non-reentrant Character Conversion, Up: Non-reentrant Conversion
|
||
|
||
6.4.2 Non-reentrant Conversion of Strings
|
||
-----------------------------------------
|
||
|
||
For convenience the ISO C90 standard also defines functions to convert
|
||
entire strings instead of single characters. These functions suffer
|
||
from the same problems as their reentrant counterparts from Amendment 1
|
||
to ISO C90; see *Note Converting Strings::.
|
||
|
||
-- Function: size_t mbstowcs (wchar_t *WSTRING, const char *STRING,
|
||
size_t SIZE)
|
||
Preliminary: | MT-Safe | AS-Unsafe corrupt heap lock dlopen |
|
||
AC-Unsafe corrupt lock mem fd | *Note POSIX Safety Concepts::.
|
||
|
||
The `mbstowcs' ("multibyte string to wide character string")
|
||
function converts the null-terminated string of multibyte
|
||
characters STRING to an array of wide character codes, storing not
|
||
more than SIZE wide characters into the array beginning at WSTRING.
|
||
The terminating null character counts towards the size, so if SIZE
|
||
is less than the actual number of wide characters resulting from
|
||
STRING, no terminating null character is stored.
|
||
|
||
The conversion of characters from STRING begins in the initial
|
||
shift state.
|
||
|
||
If an invalid multibyte character sequence is found, the `mbstowcs'
|
||
function returns a value of -1. Otherwise, it returns the number
|
||
of wide characters stored in the array WSTRING. This number does
|
||
not include the terminating null character, which is present if the
|
||
number is less than SIZE.
|
||
|
||
Here is an example showing how to convert a string of multibyte
|
||
characters, allocating enough space for the result.
|
||
|
||
wchar_t *
|
||
mbstowcs_alloc (const char *string)
|
||
{
|
||
size_t size = strlen (string) + 1;
|
||
wchar_t *buf = xmalloc (size * sizeof (wchar_t));
|
||
|
||
size = mbstowcs (buf, string, size);
|
||
if (size == (size_t) -1)
|
||
return NULL;
|
||
buf = xrealloc (buf, (size + 1) * sizeof (wchar_t));
|
||
return buf;
|
||
}
|
||
|
||
|
||
-- Function: size_t wcstombs (char *STRING, const wchar_t *WSTRING,
|
||
size_t SIZE)
|
||
Preliminary: | MT-Safe | AS-Unsafe corrupt heap lock dlopen |
|
||
AC-Unsafe corrupt lock mem fd | *Note POSIX Safety Concepts::.
|
||
|
||
The `wcstombs' ("wide character string to multibyte string")
|
||
function converts the null-terminated wide character array WSTRING
|
||
into a string containing multibyte characters, storing not more
|
||
than SIZE bytes starting at STRING, followed by a terminating null
|
||
character if there is room. The conversion of characters begins in
|
||
the initial shift state.
|
||
|
||
The terminating null character counts towards the size, so if SIZE
|
||
is less than or equal to the number of bytes needed in WSTRING, no
|
||
terminating null character is stored.
|
||
|
||
If a code that does not correspond to a valid multibyte character
|
||
is found, the `wcstombs' function returns a value of -1.
|
||
Otherwise, the return value is the number of bytes stored in the
|
||
array STRING. This number does not include the terminating null
|
||
character, which is present if the number is less than SIZE.
|
||
|
||
|
||
File: libc.info, Node: Shift State, Prev: Non-reentrant String Conversion, Up: Non-reentrant Conversion
|
||
|
||
6.4.3 States in Non-reentrant Functions
|
||
---------------------------------------
|
||
|
||
In some multibyte character codes, the _meaning_ of any particular byte
|
||
sequence is not fixed; it depends on what other sequences have come
|
||
earlier in the same string. Typically there are just a few sequences
|
||
that can change the meaning of other sequences; these few are called
|
||
"shift sequences" and we say that they set the "shift state" for other
|
||
sequences that follow.
|
||
|
||
To illustrate shift state and shift sequences, suppose we decide that
|
||
the sequence `0200' (just one byte) enters Japanese mode, in which
|
||
pairs of bytes in the range from `0240' to `0377' are single
|
||
characters, while `0201' enters Latin-1 mode, in which single bytes in
|
||
the range from `0240' to `0377' are characters, and interpreted
|
||
according to the ISO Latin-1 character set. This is a multibyte code
|
||
that has two alternative shift states ("Japanese mode" and "Latin-1
|
||
mode"), and two shift sequences that specify particular shift states.
|
||
|
||
When the multibyte character code in use has shift states, then
|
||
`mblen', `mbtowc', and `wctomb' must maintain and update the current
|
||
shift state as they scan the string. To make this work properly, you
|
||
must follow these rules:
|
||
|
||
* Before starting to scan a string, call the function with a null
|
||
pointer for the multibyte character address--for example, `mblen
|
||
(NULL, 0)'. This initializes the shift state to its standard
|
||
initial value.
|
||
|
||
* Scan the string one character at a time, in order. Do not "back
|
||
up" and rescan characters already scanned, and do not intersperse
|
||
the processing of different strings.
|
||
|
||
Here is an example of using `mblen' following these rules:
|
||
|
||
void
|
||
scan_string (char *s)
|
||
{
|
||
int length = strlen (s);
|
||
|
||
/* Initialize shift state. */
|
||
mblen (NULL, 0);
|
||
|
||
while (1)
|
||
{
|
||
int thischar = mblen (s, length);
|
||
/* Deal with end of string and invalid characters. */
|
||
if (thischar == 0)
|
||
break;
|
||
if (thischar == -1)
|
||
{
|
||
error ("invalid multibyte character");
|
||
break;
|
||
}
|
||
/* Advance past this character. */
|
||
s += thischar;
|
||
length -= thischar;
|
||
}
|
||
}
|
||
|
||
The functions `mblen', `mbtowc' and `wctomb' are not reentrant when
|
||
using a multibyte code that uses a shift state. However, no other
|
||
library functions call these functions, so you don't have to worry that
|
||
the shift state will be changed mysteriously.
|
||
|
||
|
||
File: libc.info, Node: Generic Charset Conversion, Prev: Non-reentrant Conversion, Up: Character Set Handling
|
||
|
||
6.5 Generic Charset Conversion
|
||
==============================
|
||
|
||
The conversion functions mentioned so far in this chapter all had in
|
||
common that they operate on character sets that are not directly
|
||
specified by the functions. The multibyte encoding used is specified by
|
||
the currently selected locale for the `LC_CTYPE' category. The wide
|
||
character set is fixed by the implementation (in the case of the GNU C
|
||
Library it is always UCS-4 encoded ISO 10646).
|
||
|
||
This has of course several problems when it comes to general
|
||
character conversion:
|
||
|
||
* For every conversion where neither the source nor the destination
|
||
character set is the character set of the locale for the `LC_CTYPE'
|
||
category, one has to change the `LC_CTYPE' locale using
|
||
`setlocale'.
|
||
|
||
Changing the `LC_CTYPE' locale introduces major problems for the
|
||
rest of the programs since several more functions (e.g., the
|
||
character classification functions, *note Classification of
|
||
Characters::) use the `LC_CTYPE' category.
|
||
|
||
* Parallel conversions to and from different character sets are not
|
||
possible since the `LC_CTYPE' selection is global and shared by all
|
||
threads.
|
||
|
||
* If neither the source nor the destination character set is the
|
||
character set used for `wchar_t' representation, there is at least
|
||
a two-step process necessary to convert a text using the functions
|
||
above. One would have to select the source character set as the
|
||
multibyte encoding, convert the text into a `wchar_t' text, select
|
||
the destination character set as the multibyte encoding, and
|
||
convert the wide character text to the multibyte (= destination)
|
||
character set.
|
||
|
||
Even if this is possible (which is not guaranteed) it is a very
|
||
tiring work. Plus it suffers from the other two raised points
|
||
even more due to the steady changing of the locale.
|
||
|
||
The XPG2 standard defines a completely new set of functions, which
|
||
has none of these limitations. They are not at all coupled to the
|
||
selected locales, and they have no constraints on the character sets
|
||
selected for source and destination. Only the set of available
|
||
conversions limits them. The standard does not specify that any
|
||
conversion at all must be available. Such availability is a measure of
|
||
the quality of the implementation.
|
||
|
||
In the following text first the interface to `iconv' and then the
|
||
conversion function, will be described. Comparisons with other
|
||
implementations will show what obstacles stand in the way of portable
|
||
applications. Finally, the implementation is described in so far as
|
||
might interest the advanced user who wants to extend conversion
|
||
capabilities.
|
||
|
||
* Menu:
|
||
|
||
* Generic Conversion Interface:: Generic Character Set Conversion Interface.
|
||
* iconv Examples:: A complete `iconv' example.
|
||
* Other iconv Implementations:: Some Details about other `iconv'
|
||
Implementations.
|
||
* glibc iconv Implementation:: The `iconv' Implementation in the GNU C
|
||
library.
|
||
|
||
|
||
File: libc.info, Node: Generic Conversion Interface, Next: iconv Examples, Up: Generic Charset Conversion
|
||
|
||
6.5.1 Generic Character Set Conversion Interface
|
||
------------------------------------------------
|
||
|
||
This set of functions follows the traditional cycle of using a resource:
|
||
open-use-close. The interface consists of three functions, each of
|
||
which implements one step.
|
||
|
||
Before the interfaces are described it is necessary to introduce a
|
||
data type. Just like other open-use-close interfaces the functions
|
||
introduced here work using handles and the `iconv.h' header defines a
|
||
special type for the handles used.
|
||
|
||
-- Data Type: iconv_t
|
||
This data type is an abstract type defined in `iconv.h'. The user
|
||
must not assume anything about the definition of this type; it
|
||
must be completely opaque.
|
||
|
||
Objects of this type can be assigned handles for the conversions
|
||
using the `iconv' functions. The objects themselves need not be
|
||
freed, but the conversions for which the handles stand for have to.
|
||
|
||
The first step is the function to create a handle.
|
||
|
||
-- Function: iconv_t iconv_open (const char *TOCODE, const char
|
||
*FROMCODE)
|
||
Preliminary: | MT-Safe locale | AS-Unsafe corrupt heap lock dlopen
|
||
| AC-Unsafe corrupt lock mem fd | *Note POSIX Safety Concepts::.
|
||
|
||
The `iconv_open' function has to be used before starting a
|
||
conversion. The two parameters this function takes determine the
|
||
source and destination character set for the conversion, and if the
|
||
implementation has the possibility to perform such a conversion,
|
||
the function returns a handle.
|
||
|
||
If the wanted conversion is not available, the `iconv_open'
|
||
function returns `(iconv_t) -1'. In this case the global variable
|
||
`errno' can have the following values:
|
||
|
||
`EMFILE'
|
||
The process already has `OPEN_MAX' file descriptors open.
|
||
|
||
`ENFILE'
|
||
The system limit of open files is reached.
|
||
|
||
`ENOMEM'
|
||
Not enough memory to carry out the operation.
|
||
|
||
`EINVAL'
|
||
The conversion from FROMCODE to TOCODE is not supported.
|
||
|
||
It is not possible to use the same descriptor in different threads
|
||
to perform independent conversions. The data structures associated
|
||
with the descriptor include information about the conversion state.
|
||
This must not be messed up by using it in different conversions.
|
||
|
||
An `iconv' descriptor is like a file descriptor as for every use a
|
||
new descriptor must be created. The descriptor does not stand for
|
||
all of the conversions from FROMSET to TOSET.
|
||
|
||
The GNU C Library implementation of `iconv_open' has one
|
||
significant extension to other implementations. To ease the
|
||
extension of the set of available conversions, the implementation
|
||
allows storing the necessary files with data and code in an
|
||
arbitrary number of directories. How this extension must be
|
||
written will be explained below (*note glibc iconv
|
||
Implementation::). Here it is only important to say that all
|
||
directories mentioned in the `GCONV_PATH' environment variable are
|
||
considered only if they contain a file `gconv-modules'. These
|
||
directories need not necessarily be created by the system
|
||
administrator. In fact, this extension is introduced to help users
|
||
writing and using their own, new conversions. Of course, this
|
||
does not work for security reasons in SUID binaries; in this case
|
||
only the system directory is considered and this normally is
|
||
`PREFIX/lib/gconv'. The `GCONV_PATH' environment variable is
|
||
examined exactly once at the first call of the `iconv_open'
|
||
function. Later modifications of the variable have no effect.
|
||
|
||
The `iconv_open' function was introduced early in the X/Open
|
||
Portability Guide, version 2. It is supported by all commercial
|
||
Unices as it is required for the Unix branding. However, the
|
||
quality and completeness of the implementation varies widely. The
|
||
`iconv_open' function is declared in `iconv.h'.
|
||
|
||
The `iconv' implementation can associate large data structure with
|
||
the handle returned by `iconv_open'. Therefore, it is crucial to free
|
||
all the resources once all conversions are carried out and the
|
||
conversion is not needed anymore.
|
||
|
||
-- Function: int iconv_close (iconv_t CD)
|
||
Preliminary: | MT-Safe | AS-Unsafe corrupt heap lock dlopen |
|
||
AC-Unsafe corrupt lock mem | *Note POSIX Safety Concepts::.
|
||
|
||
The `iconv_close' function frees all resources associated with the
|
||
handle CD, which must have been returned by a successful call to
|
||
the `iconv_open' function.
|
||
|
||
If the function call was successful the return value is 0.
|
||
Otherwise it is -1 and `errno' is set appropriately. Defined
|
||
errors are:
|
||
|
||
`EBADF'
|
||
The conversion descriptor is invalid.
|
||
|
||
The `iconv_close' function was introduced together with the rest
|
||
of the `iconv' functions in XPG2 and is declared in `iconv.h'.
|
||
|
||
The standard defines only one actual conversion function. This has,
|
||
therefore, the most general interface: it allows conversion from one
|
||
buffer to another. Conversion from a file to a buffer, vice versa, or
|
||
even file to file can be implemented on top of it.
|
||
|
||
-- Function: size_t iconv (iconv_t CD, char **INBUF, size_t
|
||
*INBYTESLEFT, char **OUTBUF, size_t *OUTBYTESLEFT)
|
||
Preliminary: | MT-Safe race:cd | AS-Safe | AC-Unsafe corrupt |
|
||
*Note POSIX Safety Concepts::.
|
||
|
||
The `iconv' function converts the text in the input buffer
|
||
according to the rules associated with the descriptor CD and
|
||
stores the result in the output buffer. It is possible to call the
|
||
function for the same text several times in a row since for
|
||
stateful character sets the necessary state information is kept in
|
||
the data structures associated with the descriptor.
|
||
|
||
The input buffer is specified by `*INBUF' and it contains
|
||
`*INBYTESLEFT' bytes. The extra indirection is necessary for
|
||
communicating the used input back to the caller (see below). It is
|
||
important to note that the buffer pointer is of type `char' and the
|
||
length is measured in bytes even if the input text is encoded in
|
||
wide characters.
|
||
|
||
The output buffer is specified in a similar way. `*OUTBUF' points
|
||
to the beginning of the buffer with at least `*OUTBYTESLEFT' bytes
|
||
room for the result. The buffer pointer again is of type `char'
|
||
and the length is measured in bytes. If OUTBUF or `*OUTBUF' is a
|
||
null pointer, the conversion is performed but no output is
|
||
available.
|
||
|
||
If INBUF is a null pointer, the `iconv' function performs the
|
||
necessary action to put the state of the conversion into the
|
||
initial state. This is obviously a no-op for non-stateful
|
||
encodings, but if the encoding has a state, such a function call
|
||
might put some byte sequences in the output buffer, which perform
|
||
the necessary state changes. The next call with INBUF not being a
|
||
null pointer then simply goes on from the initial state. It is
|
||
important that the programmer never makes any assumption as to
|
||
whether the conversion has to deal with states. Even if the input
|
||
and output character sets are not stateful, the implementation
|
||
might still have to keep states. This is due to the
|
||
implementation chosen for the GNU C Library as it is described
|
||
below. Therefore an `iconv' call to reset the state should always
|
||
be performed if some protocol requires this for the output text.
|
||
|
||
The conversion stops for one of three reasons. The first is that
|
||
all characters from the input buffer are converted. This actually
|
||
can mean two things: either all bytes from the input buffer are
|
||
consumed or there are some bytes at the end of the buffer that
|
||
possibly can form a complete character but the input is
|
||
incomplete. The second reason for a stop is that the output
|
||
buffer is full. And the third reason is that the input contains
|
||
invalid characters.
|
||
|
||
In all of these cases the buffer pointers after the last successful
|
||
conversion, for the input and output buffers, are stored in INBUF
|
||
and OUTBUF, and the available room in each buffer is stored in
|
||
INBYTESLEFT and OUTBYTESLEFT.
|
||
|
||
Since the character sets selected in the `iconv_open' call can be
|
||
almost arbitrary, there can be situations where the input buffer
|
||
contains valid characters, which have no identical representation
|
||
in the output character set. The behavior in this situation is
|
||
undefined. The _current_ behavior of the GNU C Library in this
|
||
situation is to return with an error immediately. This certainly
|
||
is not the most desirable solution; therefore, future versions
|
||
will provide better ones, but they are not yet finished.
|
||
|
||
If all input from the input buffer is successfully converted and
|
||
stored in the output buffer, the function returns the number of
|
||
non-reversible conversions performed. In all other cases the
|
||
return value is `(size_t) -1' and `errno' is set appropriately.
|
||
In such cases the value pointed to by INBYTESLEFT is nonzero.
|
||
|
||
`EILSEQ'
|
||
The conversion stopped because of an invalid byte sequence in
|
||
the input. After the call, `*INBUF' points at the first byte
|
||
of the invalid byte sequence.
|
||
|
||
`E2BIG'
|
||
The conversion stopped because it ran out of space in the
|
||
output buffer.
|
||
|
||
`EINVAL'
|
||
The conversion stopped because of an incomplete byte sequence
|
||
at the end of the input buffer.
|
||
|
||
`EBADF'
|
||
The CD argument is invalid.
|
||
|
||
The `iconv' function was introduced in the XPG2 standard and is
|
||
declared in the `iconv.h' header.
|
||
|
||
The definition of the `iconv' function is quite good overall. It
|
||
provides quite flexible functionality. The only problems lie in the
|
||
boundary cases, which are incomplete byte sequences at the end of the
|
||
input buffer and invalid input. A third problem, which is not really a
|
||
design problem, is the way conversions are selected. The standard does
|
||
not say anything about the legitimate names, a minimal set of available
|
||
conversions. We will see how this negatively impacts other
|
||
implementations, as demonstrated below.
|
||
|
||
|
||
File: libc.info, Node: iconv Examples, Next: Other iconv Implementations, Prev: Generic Conversion Interface, Up: Generic Charset Conversion
|
||
|
||
6.5.2 A complete `iconv' example
|
||
--------------------------------
|
||
|
||
The example below features a solution for a common problem. Given that
|
||
one knows the internal encoding used by the system for `wchar_t'
|
||
strings, one often is in the position to read text from a file and store
|
||
it in wide character buffers. One can do this using `mbsrtowcs', but
|
||
then we run into the problems discussed above.
|
||
|
||
int
|
||
file2wcs (int fd, const char *charset, wchar_t *outbuf, size_t avail)
|
||
{
|
||
char inbuf[BUFSIZ];
|
||
size_t insize = 0;
|
||
char *wrptr = (char *) outbuf;
|
||
int result = 0;
|
||
iconv_t cd;
|
||
|
||
cd = iconv_open ("WCHAR_T", charset);
|
||
if (cd == (iconv_t) -1)
|
||
{
|
||
/* Something went wrong. */
|
||
if (errno == EINVAL)
|
||
error (0, 0, "conversion from '%s' to wchar_t not available",
|
||
charset);
|
||
else
|
||
perror ("iconv_open");
|
||
|
||
/* Terminate the output string. */
|
||
*outbuf = L'\0';
|
||
|
||
return -1;
|
||
}
|
||
|
||
while (avail > 0)
|
||
{
|
||
size_t nread;
|
||
size_t nconv;
|
||
char *inptr = inbuf;
|
||
|
||
/* Read more input. */
|
||
nread = read (fd, inbuf + insize, sizeof (inbuf) - insize);
|
||
if (nread == 0)
|
||
{
|
||
/* When we come here the file is completely read.
|
||
This still could mean there are some unused
|
||
characters in the `inbuf'. Put them back. */
|
||
if (lseek (fd, -insize, SEEK_CUR) == -1)
|
||
result = -1;
|
||
|
||
/* Now write out the byte sequence to get into the
|
||
initial state if this is necessary. */
|
||
iconv (cd, NULL, NULL, &wrptr, &avail);
|
||
|
||
break;
|
||
}
|
||
insize += nread;
|
||
|
||
/* Do the conversion. */
|
||
nconv = iconv (cd, &inptr, &insize, &wrptr, &avail);
|
||
if (nconv == (size_t) -1)
|
||
{
|
||
/* Not everything went right. It might only be
|
||
an unfinished byte sequence at the end of the
|
||
buffer. Or it is a real problem. */
|
||
if (errno == EINVAL)
|
||
/* This is harmless. Simply move the unused
|
||
bytes to the beginning of the buffer so that
|
||
they can be used in the next round. */
|
||
memmove (inbuf, inptr, insize);
|
||
else
|
||
{
|
||
/* It is a real problem. Maybe we ran out of
|
||
space in the output buffer or we have invalid
|
||
input. In any case back the file pointer to
|
||
the position of the last processed byte. */
|
||
lseek (fd, -insize, SEEK_CUR);
|
||
result = -1;
|
||
break;
|
||
}
|
||
}
|
||
}
|
||
|
||
/* Terminate the output string. */
|
||
if (avail >= sizeof (wchar_t))
|
||
*((wchar_t *) wrptr) = L'\0';
|
||
|
||
if (iconv_close (cd) != 0)
|
||
perror ("iconv_close");
|
||
|
||
return (wchar_t *) wrptr - outbuf;
|
||
}
|
||
|
||
This example shows the most important aspects of using the `iconv'
|
||
functions. It shows how successive calls to `iconv' can be used to
|
||
convert large amounts of text. The user does not have to care about
|
||
stateful encodings as the functions take care of everything.
|
||
|
||
An interesting point is the case where `iconv' returns an error and
|
||
`errno' is set to `EINVAL'. This is not really an error in the
|
||
transformation. It can happen whenever the input character set contains
|
||
byte sequences of more than one byte for some character and texts are
|
||
not processed in one piece. In this case there is a chance that a
|
||
multibyte sequence is cut. The caller can then simply read the
|
||
remainder of the takes and feed the offending bytes together with new
|
||
character from the input to `iconv' and continue the work. The
|
||
internal state kept in the descriptor is _not_ unspecified after such
|
||
an event as is the case with the conversion functions from the ISO C
|
||
standard.
|
||
|
||
The example also shows the problem of using wide character strings
|
||
with `iconv'. As explained in the description of the `iconv' function
|
||
above, the function always takes a pointer to a `char' array and the
|
||
available space is measured in bytes. In the example, the output
|
||
buffer is a wide character buffer; therefore, we use a local variable
|
||
WRPTR of type `char *', which is used in the `iconv' calls.
|
||
|
||
This looks rather innocent but can lead to problems on platforms that
|
||
have tight restriction on alignment. Therefore the caller of `iconv'
|
||
has to make sure that the pointers passed are suitable for access of
|
||
characters from the appropriate character set. Since, in the above
|
||
case, the input parameter to the function is a `wchar_t' pointer, this
|
||
is the case (unless the user violates alignment when computing the
|
||
parameter). But in other situations, especially when writing generic
|
||
functions where one does not know what type of character set one uses
|
||
and, therefore, treats text as a sequence of bytes, it might become
|
||
tricky.
|
||
|
||
|
||
File: libc.info, Node: Other iconv Implementations, Next: glibc iconv Implementation, Prev: iconv Examples, Up: Generic Charset Conversion
|
||
|
||
6.5.3 Some Details about other `iconv' Implementations
|
||
------------------------------------------------------
|
||
|
||
This is not really the place to discuss the `iconv' implementation of
|
||
other systems but it is necessary to know a bit about them to write
|
||
portable programs. The above mentioned problems with the specification
|
||
of the `iconv' functions can lead to portability issues.
|
||
|
||
The first thing to notice is that, due to the large number of
|
||
character sets in use, it is certainly not practical to encode the
|
||
conversions directly in the C library. Therefore, the conversion
|
||
information must come from files outside the C library. This is
|
||
usually done in one or both of the following ways:
|
||
|
||
* The C library contains a set of generic conversion functions that
|
||
can read the needed conversion tables and other information from
|
||
data files. These files get loaded when necessary.
|
||
|
||
This solution is problematic as it requires a great deal of effort
|
||
to apply to all character sets (potentially an infinite set). The
|
||
differences in the structure of the different character sets is so
|
||
large that many different variants of the table-processing
|
||
functions must be developed. In addition, the generic nature of
|
||
these functions make them slower than specifically implemented
|
||
functions.
|
||
|
||
* The C library only contains a framework that can dynamically load
|
||
object files and execute the conversion functions contained
|
||
therein.
|
||
|
||
This solution provides much more flexibility. The C library itself
|
||
contains only very little code and therefore reduces the general
|
||
memory footprint. Also, with a documented interface between the C
|
||
library and the loadable modules it is possible for third parties
|
||
to extend the set of available conversion modules. A drawback of
|
||
this solution is that dynamic loading must be available.
|
||
|
||
Some implementations in commercial Unices implement a mixture of
|
||
these possibilities; the majority implement only the second solution.
|
||
Using loadable modules moves the code out of the library itself and
|
||
keeps the door open for extensions and improvements, but this design is
|
||
also limiting on some platforms since not many platforms support dynamic
|
||
loading in statically linked programs. On platforms without this
|
||
capability it is therefore not possible to use this interface in
|
||
statically linked programs. The GNU C Library has, on ELF platforms, no
|
||
problems with dynamic loading in these situations; therefore, this
|
||
point is moot. The danger is that one gets acquainted with this
|
||
situation and forgets about the restrictions on other systems.
|
||
|
||
A second thing to know about other `iconv' implementations is that
|
||
the number of available conversions is often very limited. Some
|
||
implementations provide, in the standard release (not special
|
||
international or developer releases), at most 100 to 200 conversion
|
||
possibilities. This does not mean 200 different character sets are
|
||
supported; for example, conversions from one character set to a set of
|
||
10 others might count as 10 conversions. Together with the other
|
||
direction this makes 20 conversion possibilities used up by one
|
||
character set. One can imagine the thin coverage these platforms
|
||
provide. Some Unix vendors even provide only a handful of conversions,
|
||
which renders them useless for almost all uses.
|
||
|
||
This directly leads to a third and probably the most problematic
|
||
point. The way the `iconv' conversion functions are implemented on all
|
||
known Unix systems and the availability of the conversion functions from
|
||
character set A to B and the conversion from B to C does _not_ imply
|
||
that the conversion from A to C is available.
|
||
|
||
This might not seem unreasonable and problematic at first, but it is
|
||
a quite big problem as one will notice shortly after hitting it. To
|
||
show the problem we assume to write a program that has to convert from
|
||
A to C. A call like
|
||
|
||
cd = iconv_open ("C", "A");
|
||
|
||
fails according to the assumption above. But what does the program do
|
||
now? The conversion is necessary; therefore, simply giving up is not
|
||
an option.
|
||
|
||
This is a nuisance. The `iconv' function should take care of this.
|
||
But how should the program proceed from here on? If it tries to convert
|
||
to character set B, first the two `iconv_open' calls
|
||
|
||
cd1 = iconv_open ("B", "A");
|
||
|
||
and
|
||
|
||
cd2 = iconv_open ("C", "B");
|
||
|
||
will succeed, but how to find B?
|
||
|
||
Unfortunately, the answer is: there is no general solution. On some
|
||
systems guessing might help. On those systems most character sets can
|
||
convert to and from UTF-8 encoded ISO 10646 or Unicode text. Besides
|
||
this only some very system-specific methods can help. Since the
|
||
conversion functions come from loadable modules and these modules must
|
||
be stored somewhere in the filesystem, one _could_ try to find them and
|
||
determine from the available file which conversions are available and
|
||
whether there is an indirect route from A to C.
|
||
|
||
This example shows one of the design errors of `iconv' mentioned
|
||
above. It should at least be possible to determine the list of
|
||
available conversions programmatically so that if `iconv_open' says
|
||
there is no such conversion, one could make sure this also is true for
|
||
indirect routes.
|
||
|
||
|
||
File: libc.info, Node: glibc iconv Implementation, Prev: Other iconv Implementations, Up: Generic Charset Conversion
|
||
|
||
6.5.4 The `iconv' Implementation in the GNU C Library
|
||
-----------------------------------------------------
|
||
|
||
After reading about the problems of `iconv' implementations in the last
|
||
section it is certainly good to note that the implementation in the GNU
|
||
C Library has none of the problems mentioned above. What follows is a
|
||
step-by-step analysis of the points raised above. The evaluation is
|
||
based on the current state of the development (as of January 1999).
|
||
The development of the `iconv' functions is not complete, but basic
|
||
functionality has solidified.
|
||
|
||
The GNU C Library's `iconv' implementation uses shared loadable
|
||
modules to implement the conversions. A very small number of
|
||
conversions are built into the library itself but these are only rather
|
||
trivial conversions.
|
||
|
||
All the benefits of loadable modules are available in the GNU C
|
||
Library implementation. This is especially appealing since the
|
||
interface is well documented (see below), and it, therefore, is easy to
|
||
write new conversion modules. The drawback of using loadable objects
|
||
is not a problem in the GNU C Library, at least on ELF systems. Since
|
||
the library is able to load shared objects even in statically linked
|
||
binaries, static linking need not be forbidden in case one wants to use
|
||
`iconv'.
|
||
|
||
The second mentioned problem is the number of supported conversions.
|
||
Currently, the GNU C Library supports more than 150 character sets. The
|
||
way the implementation is designed the number of supported conversions
|
||
is greater than 22350 (150 times 149). If any conversion from or to a
|
||
character set is missing, it can be added easily.
|
||
|
||
Particularly impressive as it may be, this high number is due to the
|
||
fact that the GNU C Library implementation of `iconv' does not have the
|
||
third problem mentioned above (i.e., whenever there is a conversion
|
||
from a character set A to B and from B to C it is always possible to
|
||
convert from A to C directly). If the `iconv_open' returns an error
|
||
and sets `errno' to `EINVAL', there is no known way, directly or
|
||
indirectly, to perform the wanted conversion.
|
||
|
||
Triangulation is achieved by providing for each character set a
|
||
conversion from and to UCS-4 encoded ISO 10646. Using ISO 10646 as an
|
||
intermediate representation it is possible to "triangulate" (i.e.,
|
||
convert with an intermediate representation).
|
||
|
||
There is no inherent requirement to provide a conversion to
|
||
ISO 10646 for a new character set, and it is also possible to provide
|
||
other conversions where neither source nor destination character set is
|
||
ISO 10646. The existing set of conversions is simply meant to cover all
|
||
conversions that might be of interest.
|
||
|
||
All currently available conversions use the triangulation method
|
||
above, making conversion run unnecessarily slow. If, for example,
|
||
somebody often needs the conversion from ISO-2022-JP to EUC-JP, a
|
||
quicker solution would involve direct conversion between the two
|
||
character sets, skipping the input to ISO 10646 first. The two
|
||
character sets of interest are much more similar to each other than to
|
||
ISO 10646.
|
||
|
||
In such a situation one easily can write a new conversion and
|
||
provide it as a better alternative. The GNU C Library `iconv'
|
||
implementation would automatically use the module implementing the
|
||
conversion if it is specified to be more efficient.
|
||
|
||
6.5.4.1 Format of `gconv-modules' files
|
||
.......................................
|
||
|
||
All information about the available conversions comes from a file named
|
||
`gconv-modules', which can be found in any of the directories along the
|
||
`GCONV_PATH'. The `gconv-modules' files are line-oriented text files,
|
||
where each of the lines has one of the following formats:
|
||
|
||
* If the first non-whitespace character is a `#' the line contains
|
||
only comments and is ignored.
|
||
|
||
* Lines starting with `alias' define an alias name for a character
|
||
set. Two more words are expected on the line. The first word
|
||
defines the alias name, and the second defines the original name
|
||
of the character set. The effect is that it is possible to use
|
||
the alias name in the FROMSET or TOSET parameters of `iconv_open'
|
||
and achieve the same result as when using the real character set
|
||
name.
|
||
|
||
This is quite important as a character set has often many different
|
||
names. There is normally an official name but this need not
|
||
correspond to the most popular name. Besides this many character
|
||
sets have special names that are somehow constructed. For
|
||
example, all character sets specified by the ISO have an alias of
|
||
the form `ISO-IR-NNN' where NNN is the registration number. This
|
||
allows programs that know about the registration number to
|
||
construct character set names and use them in `iconv_open' calls.
|
||
More on the available names and aliases follows below.
|
||
|
||
* Lines starting with `module' introduce an available conversion
|
||
module. These lines must contain three or four more words.
|
||
|
||
The first word specifies the source character set, the second word
|
||
the destination character set of conversion implemented in this
|
||
module, and the third word is the name of the loadable module.
|
||
The filename is constructed by appending the usual shared object
|
||
suffix (normally `.so') and this file is then supposed to be found
|
||
in the same directory the `gconv-modules' file is in. The last
|
||
word on the line, which is optional, is a numeric value
|
||
representing the cost of the conversion. If this word is missing,
|
||
a cost of 1 is assumed. The numeric value itself does not matter
|
||
that much; what counts are the relative values of the sums of
|
||
costs for all possible conversion paths. Below is a more precise
|
||
description of the use of the cost value.
|
||
|
||
Returning to the example above where one has written a module to
|
||
directly convert from ISO-2022-JP to EUC-JP and back. All that has to
|
||
be done is to put the new module, let its name be ISO2022JP-EUCJP.so,
|
||
in a directory and add a file `gconv-modules' with the following
|
||
content in the same directory:
|
||
|
||
module ISO-2022-JP// EUC-JP// ISO2022JP-EUCJP 1
|
||
module EUC-JP// ISO-2022-JP// ISO2022JP-EUCJP 1
|
||
|
||
To see why this is sufficient, it is necessary to understand how the
|
||
conversion used by `iconv' (and described in the descriptor) is
|
||
selected. The approach to this problem is quite simple.
|
||
|
||
At the first call of the `iconv_open' function the program reads all
|
||
available `gconv-modules' files and builds up two tables: one
|
||
containing all the known aliases and another that contains the
|
||
information about the conversions and which shared object implements
|
||
them.
|
||
|
||
6.5.4.2 Finding the conversion path in `iconv'
|
||
..............................................
|
||
|
||
The set of available conversions form a directed graph with weighted
|
||
edges. The weights on the edges are the costs specified in the
|
||
`gconv-modules' files. The `iconv_open' function uses an algorithm
|
||
suitable for search for the best path in such a graph and so constructs
|
||
a list of conversions that must be performed in succession to get the
|
||
transformation from the source to the destination character set.
|
||
|
||
Explaining why the above `gconv-modules' files allows the `iconv'
|
||
implementation to resolve the specific ISO-2022-JP to EUC-JP conversion
|
||
module instead of the conversion coming with the library itself is
|
||
straightforward. Since the latter conversion takes two steps (from
|
||
ISO-2022-JP to ISO 10646 and then from ISO 10646 to EUC-JP), the cost
|
||
is 1+1 = 2. The above `gconv-modules' file, however, specifies that
|
||
the new conversion modules can perform this conversion with only the
|
||
cost of 1.
|
||
|
||
A mysterious item about the `gconv-modules' file above (and also the
|
||
file coming with the GNU C Library) are the names of the character sets
|
||
specified in the `module' lines. Why do almost all the names end in
|
||
`//'? And this is not all: the names can actually be regular
|
||
expressions. At this point in time this mystery should not be
|
||
revealed, unless you have the relevant spell-casting materials: ashes
|
||
from an original DOS 6.2 boot disk burnt in effigy, a crucifix blessed
|
||
by St. Emacs, assorted herbal roots from Central America, sand from
|
||
Cebu, etc. Sorry! *The part of the implementation where this is used
|
||
is not yet finished. For now please simply follow the existing
|
||
examples. It'll become clearer once it is. -drepper*
|
||
|
||
A last remark about the `gconv-modules' is about the names not
|
||
ending with `//'. A character set named `INTERNAL' is often mentioned.
|
||
From the discussion above and the chosen name it should have become
|
||
clear that this is the name for the representation used in the
|
||
intermediate step of the triangulation. We have said that this is UCS-4
|
||
but actually that is not quite right. The UCS-4 specification also
|
||
includes the specification of the byte ordering used. Since a UCS-4
|
||
value consists of four bytes, a stored value is affected by byte
|
||
ordering. The internal representation is _not_ the same as UCS-4 in
|
||
case the byte ordering of the processor (or at least the running
|
||
process) is not the same as the one required for UCS-4. This is done
|
||
for performance reasons as one does not want to perform unnecessary
|
||
byte-swapping operations if one is not interested in actually seeing
|
||
the result in UCS-4. To avoid trouble with endianness, the internal
|
||
representation consistently is named `INTERNAL' even on big-endian
|
||
systems where the representations are identical.
|
||
|
||
6.5.4.3 `iconv' module data structures
|
||
......................................
|
||
|
||
So far this section has described how modules are located and considered
|
||
to be used. What remains to be described is the interface of the
|
||
modules so that one can write new ones. This section describes the
|
||
interface as it is in use in January 1999. The interface will change a
|
||
bit in the future but, with luck, only in an upwardly compatible way.
|
||
|
||
The definitions necessary to write new modules are publicly available
|
||
in the non-standard header `gconv.h'. The following text, therefore,
|
||
describes the definitions from this header file. First, however, it is
|
||
necessary to get an overview.
|
||
|
||
From the perspective of the user of `iconv' the interface is quite
|
||
simple: the `iconv_open' function returns a handle that can be used in
|
||
calls to `iconv', and finally the handle is freed with a call to
|
||
`iconv_close'. The problem is that the handle has to be able to
|
||
represent the possibly long sequences of conversion steps and also the
|
||
state of each conversion since the handle is all that is passed to the
|
||
`iconv' function. Therefore, the data structures are really the
|
||
elements necessary to understanding the implementation.
|
||
|
||
We need two different kinds of data structures. The first describes
|
||
the conversion and the second describes the state etc. There are
|
||
really two type definitions like this in `gconv.h'.
|
||
|
||
-- Data type: struct __gconv_step
|
||
This data structure describes one conversion a module can perform.
|
||
For each function in a loaded module with conversion functions
|
||
there is exactly one object of this type. This object is shared
|
||
by all users of the conversion (i.e., this object does not contain
|
||
any information corresponding to an actual conversion; it only
|
||
describes the conversion itself).
|
||
|
||
`struct __gconv_loaded_object *__shlib_handle'
|
||
`const char *__modname'
|
||
`int __counter'
|
||
All these elements of the structure are used internally in
|
||
the C library to coordinate loading and unloading the shared
|
||
object. One must not expect any of the other elements to be
|
||
available or initialized.
|
||
|
||
`const char *__from_name'
|
||
`const char *__to_name'
|
||
`__from_name' and `__to_name' contain the names of the source
|
||
and destination character sets. They can be used to identify
|
||
the actual conversion to be carried out since one module
|
||
might implement conversions for more than one character set
|
||
and/or direction.
|
||
|
||
`gconv_fct __fct'
|
||
`gconv_init_fct __init_fct'
|
||
`gconv_end_fct __end_fct'
|
||
These elements contain pointers to the functions in the
|
||
loadable module. The interface will be explained below.
|
||
|
||
`int __min_needed_from'
|
||
`int __max_needed_from'
|
||
`int __min_needed_to'
|
||
`int __max_needed_to;'
|
||
These values have to be supplied in the init function of the
|
||
module. The `__min_needed_from' value specifies how many
|
||
bytes a character of the source character set at least needs.
|
||
The `__max_needed_from' specifies the maximum value that
|
||
also includes possible shift sequences.
|
||
|
||
The `__min_needed_to' and `__max_needed_to' values serve the
|
||
same purpose as `__min_needed_from' and `__max_needed_from'
|
||
but this time for the destination character set.
|
||
|
||
It is crucial that these values be accurate since otherwise
|
||
the conversion functions will have problems or not work at
|
||
all.
|
||
|
||
`int __stateful'
|
||
This element must also be initialized by the init function.
|
||
`int __stateful' is nonzero if the source character set is
|
||
stateful. Otherwise it is zero.
|
||
|
||
`void *__data'
|
||
This element can be used freely by the conversion functions
|
||
in the module. `void *__data' can be used to communicate
|
||
extra information from one call to another. `void *__data'
|
||
need not be initialized if not needed at all. If `void
|
||
*__data' element is assigned a pointer to dynamically
|
||
allocated memory (presumably in the init function) it has to
|
||
be made sure that the end function deallocates the memory.
|
||
Otherwise the application will leak memory.
|
||
|
||
It is important to be aware that this data structure is
|
||
shared by all users of this specification conversion and
|
||
therefore the `__data' element must not contain data specific
|
||
to one specific use of the conversion function.
|
||
|
||
-- Data type: struct __gconv_step_data
|
||
This is the data structure that contains the information specific
|
||
to each use of the conversion functions.
|
||
|
||
`char *__outbuf'
|
||
`char *__outbufend'
|
||
These elements specify the output buffer for the conversion
|
||
step. The `__outbuf' element points to the beginning of the
|
||
buffer, and `__outbufend' points to the byte following the
|
||
last byte in the buffer. The conversion function must not
|
||
assume anything about the size of the buffer but it can be
|
||
safely assumed there is room for at least one complete
|
||
character in the output buffer.
|
||
|
||
Once the conversion is finished, if the conversion is the
|
||
last step, the `__outbuf' element must be modified to point
|
||
after the last byte written into the buffer to signal how
|
||
much output is available. If this conversion step is not the
|
||
last one, the element must not be modified. The
|
||
`__outbufend' element must not be modified.
|
||
|
||
`int __is_last'
|
||
This element is nonzero if this conversion step is the last
|
||
one. This information is necessary for the recursion. See
|
||
the description of the conversion function internals below.
|
||
This element must never be modified.
|
||
|
||
`int __invocation_counter'
|
||
The conversion function can use this element to see how many
|
||
calls of the conversion function already happened. Some
|
||
character sets require a certain prolog when generating
|
||
output, and by comparing this value with zero, one can find
|
||
out whether it is the first call and whether, therefore, the
|
||
prolog should be emitted. This element must never be
|
||
modified.
|
||
|
||
`int __internal_use'
|
||
This element is another one rarely used but needed in certain
|
||
situations. It is assigned a nonzero value in case the
|
||
conversion functions are used to implement `mbsrtowcs' et.al.
|
||
(i.e., the function is not used directly through the `iconv'
|
||
interface).
|
||
|
||
This sometimes makes a difference as it is expected that the
|
||
`iconv' functions are used to translate entire texts while the
|
||
`mbsrtowcs' functions are normally used only to convert single
|
||
strings and might be used multiple times to convert entire
|
||
texts.
|
||
|
||
But in this situation we would have problem complying with
|
||
some rules of the character set specification. Some
|
||
character sets require a prolog, which must appear exactly
|
||
once for an entire text. If a number of `mbsrtowcs' calls
|
||
are used to convert the text, only the first call must add
|
||
the prolog. However, because there is no communication
|
||
between the different calls of `mbsrtowcs', the conversion
|
||
functions have no possibility to find this out. The
|
||
situation is different for sequences of `iconv' calls since
|
||
the handle allows access to the needed information.
|
||
|
||
The `int __internal_use' element is mostly used together with
|
||
`__invocation_counter' as follows:
|
||
|
||
if (!data->__internal_use
|
||
&& data->__invocation_counter == 0)
|
||
/* Emit prolog. */
|
||
...
|
||
|
||
This element must never be modified.
|
||
|
||
`mbstate_t *__statep'
|
||
The `__statep' element points to an object of type `mbstate_t'
|
||
(*note Keeping the state::). The conversion of a stateful
|
||
character set must use the object pointed to by `__statep' to
|
||
store information about the conversion state. The `__statep'
|
||
element itself must never be modified.
|
||
|
||
`mbstate_t __state'
|
||
This element must _never_ be used directly. It is only part
|
||
of this structure to have the needed space allocated.
|
||
|
||
6.5.4.4 `iconv' module interfaces
|
||
.................................
|
||
|
||
With the knowledge about the data structures we now can describe the
|
||
conversion function itself. To understand the interface a bit of
|
||
knowledge is necessary about the functionality in the C library that
|
||
loads the objects with the conversions.
|
||
|
||
It is often the case that one conversion is used more than once
|
||
(i.e., there are several `iconv_open' calls for the same set of
|
||
character sets during one program run). The `mbsrtowcs' et.al.
|
||
functions in the GNU C Library also use the `iconv' functionality, which
|
||
increases the number of uses of the same functions even more.
|
||
|
||
Because of this multiple use of conversions, the modules do not get
|
||
loaded exclusively for one conversion. Instead a module once loaded can
|
||
be used by an arbitrary number of `iconv' or `mbsrtowcs' calls at the
|
||
same time. The splitting of the information between conversion-
|
||
function-specific information and conversion data makes this possible.
|
||
The last section showed the two data structures used to do this.
|
||
|
||
This is of course also reflected in the interface and semantics of
|
||
the functions that the modules must provide. There are three functions
|
||
that must have the following names:
|
||
|
||
`gconv_init'
|
||
The `gconv_init' function initializes the conversion function
|
||
specific data structure. This very same object is shared by all
|
||
conversions that use this conversion and, therefore, no state
|
||
information about the conversion itself must be stored in here.
|
||
If a module implements more than one conversion, the `gconv_init'
|
||
function will be called multiple times.
|
||
|
||
`gconv_end'
|
||
The `gconv_end' function is responsible for freeing all resources
|
||
allocated by the `gconv_init' function. If there is nothing to do,
|
||
this function can be missing. Special care must be taken if the
|
||
module implements more than one conversion and the `gconv_init'
|
||
function does not allocate the same resources for all conversions.
|
||
|
||
`gconv'
|
||
This is the actual conversion function. It is called to convert
|
||
one block of text. It gets passed the conversion step information
|
||
initialized by `gconv_init' and the conversion data, specific to
|
||
this use of the conversion functions.
|
||
|
||
There are three data types defined for the three module interface
|
||
functions and these define the interface.
|
||
|
||
-- Data type: int (*__gconv_init_fct) (struct __gconv_step *)
|
||
This specifies the interface of the initialization function of the
|
||
module. It is called exactly once for each conversion the module
|
||
implements.
|
||
|
||
As explained in the description of the `struct __gconv_step' data
|
||
structure above the initialization function has to initialize
|
||
parts of it.
|
||
|
||
`__min_needed_from'
|
||
`__max_needed_from'
|
||
`__min_needed_to'
|
||
`__max_needed_to'
|
||
These elements must be initialized to the exact numbers of
|
||
the minimum and maximum number of bytes used by one character
|
||
in the source and destination character sets, respectively.
|
||
If the characters all have the same size, the minimum and
|
||
maximum values are the same.
|
||
|
||
`__stateful'
|
||
This element must be initialized to a nonzero value if the
|
||
source character set is stateful. Otherwise it must be zero.
|
||
|
||
If the initialization function needs to communicate some
|
||
information to the conversion function, this communication can
|
||
happen using the `__data' element of the `__gconv_step' structure.
|
||
But since this data is shared by all the conversions, it must not
|
||
be modified by the conversion function. The example below shows
|
||
how this can be used.
|
||
|
||
#define MIN_NEEDED_FROM 1
|
||
#define MAX_NEEDED_FROM 4
|
||
#define MIN_NEEDED_TO 4
|
||
#define MAX_NEEDED_TO 4
|
||
|
||
int
|
||
gconv_init (struct __gconv_step *step)
|
||
{
|
||
/* Determine which direction. */
|
||
struct iso2022jp_data *new_data;
|
||
enum direction dir = illegal_dir;
|
||
enum variant var = illegal_var;
|
||
int result;
|
||
|
||
if (__strcasecmp (step->__from_name, "ISO-2022-JP//") == 0)
|
||
{
|
||
dir = from_iso2022jp;
|
||
var = iso2022jp;
|
||
}
|
||
else if (__strcasecmp (step->__to_name, "ISO-2022-JP//") == 0)
|
||
{
|
||
dir = to_iso2022jp;
|
||
var = iso2022jp;
|
||
}
|
||
else if (__strcasecmp (step->__from_name, "ISO-2022-JP-2//") == 0)
|
||
{
|
||
dir = from_iso2022jp;
|
||
var = iso2022jp2;
|
||
}
|
||
else if (__strcasecmp (step->__to_name, "ISO-2022-JP-2//") == 0)
|
||
{
|
||
dir = to_iso2022jp;
|
||
var = iso2022jp2;
|
||
}
|
||
|
||
result = __GCONV_NOCONV;
|
||
if (dir != illegal_dir)
|
||
{
|
||
new_data = (struct iso2022jp_data *)
|
||
malloc (sizeof (struct iso2022jp_data));
|
||
|
||
result = __GCONV_NOMEM;
|
||
if (new_data != NULL)
|
||
{
|
||
new_data->dir = dir;
|
||
new_data->var = var;
|
||
step->__data = new_data;
|
||
|
||
if (dir == from_iso2022jp)
|
||
{
|
||
step->__min_needed_from = MIN_NEEDED_FROM;
|
||
step->__max_needed_from = MAX_NEEDED_FROM;
|
||
step->__min_needed_to = MIN_NEEDED_TO;
|
||
step->__max_needed_to = MAX_NEEDED_TO;
|
||
}
|
||
else
|
||
{
|
||
step->__min_needed_from = MIN_NEEDED_TO;
|
||
step->__max_needed_from = MAX_NEEDED_TO;
|
||
step->__min_needed_to = MIN_NEEDED_FROM;
|
||
step->__max_needed_to = MAX_NEEDED_FROM + 2;
|
||
}
|
||
|
||
/* Yes, this is a stateful encoding. */
|
||
step->__stateful = 1;
|
||
|
||
result = __GCONV_OK;
|
||
}
|
||
}
|
||
|
||
return result;
|
||
}
|
||
|
||
The function first checks which conversion is wanted. The module
|
||
from which this function is taken implements four different
|
||
conversions; which one is selected can be determined by comparing
|
||
the names. The comparison should always be done without paying
|
||
attention to the case.
|
||
|
||
Next, a data structure, which contains the necessary information
|
||
about which conversion is selected, is allocated. The data
|
||
structure `struct iso2022jp_data' is locally defined since,
|
||
outside the module, this data is not used at all. Please note
|
||
that if all four conversions this module supports are requested
|
||
there are four data blocks.
|
||
|
||
One interesting thing is the initialization of the `__min_' and
|
||
`__max_' elements of the step data object. A single ISO-2022-JP
|
||
character can consist of one to four bytes. Therefore the
|
||
`MIN_NEEDED_FROM' and `MAX_NEEDED_FROM' macros are defined this
|
||
way. The output is always the `INTERNAL' character set (aka
|
||
UCS-4) and therefore each character consists of exactly four
|
||
bytes. For the conversion from `INTERNAL' to ISO-2022-JP we have
|
||
to take into account that escape sequences might be necessary to
|
||
switch the character sets. Therefore the `__max_needed_to'
|
||
element for this direction gets assigned `MAX_NEEDED_FROM + 2'.
|
||
This takes into account the two bytes needed for the escape
|
||
sequences to signal the switching. The asymmetry in the maximum
|
||
values for the two directions can be explained easily: when
|
||
reading ISO-2022-JP text, escape sequences can be handled alone
|
||
(i.e., it is not necessary to process a real character since the
|
||
effect of the escape sequence can be recorded in the state
|
||
information). The situation is different for the other direction.
|
||
Since it is in general not known which character comes next, one
|
||
cannot emit escape sequences to change the state in advance. This
|
||
means the escape sequences have to be emitted together with the
|
||
next character. Therefore one needs more room than only for the
|
||
character itself.
|
||
|
||
The possible return values of the initialization function are:
|
||
|
||
`__GCONV_OK'
|
||
The initialization succeeded
|
||
|
||
`__GCONV_NOCONV'
|
||
The requested conversion is not supported in the module.
|
||
This can happen if the `gconv-modules' file has errors.
|
||
|
||
`__GCONV_NOMEM'
|
||
Memory required to store additional information could not be
|
||
allocated.
|
||
|
||
The function called before the module is unloaded is significantly
|
||
easier. It often has nothing at all to do; in which case it can be left
|
||
out completely.
|
||
|
||
-- Data type: void (*__gconv_end_fct) (struct gconv_step *)
|
||
The task of this function is to free all resources allocated in the
|
||
initialization function. Therefore only the `__data' element of
|
||
the object pointed to by the argument is of interest. Continuing
|
||
the example from the initialization function, the finalization
|
||
function looks like this:
|
||
|
||
void
|
||
gconv_end (struct __gconv_step *data)
|
||
{
|
||
free (data->__data);
|
||
}
|
||
|
||
The most important function is the conversion function itself, which
|
||
can get quite complicated for complex character sets. But since this
|
||
is not of interest here, we will only describe a possible skeleton for
|
||
the conversion function.
|
||
|
||
-- Data type: int (*__gconv_fct) (struct __gconv_step *, struct
|
||
__gconv_step_data *, const char **, const char *, size_t *,
|
||
int)
|
||
The conversion function can be called for two basic reasons: to
|
||
convert text or to reset the state. From the description of the
|
||
`iconv' function it can be seen why the flushing mode is
|
||
necessary. What mode is selected is determined by the sixth
|
||
argument, an integer. This argument being nonzero means that
|
||
flushing is selected.
|
||
|
||
Common to both modes is where the output buffer can be found. The
|
||
information about this buffer is stored in the conversion step
|
||
data. A pointer to this information is passed as the second
|
||
argument to this function. The description of the `struct
|
||
__gconv_step_data' structure has more information on the
|
||
conversion step data.
|
||
|
||
What has to be done for flushing depends on the source character
|
||
set. If the source character set is not stateful, nothing has to
|
||
be done. Otherwise the function has to emit a byte sequence to
|
||
bring the state object into the initial state. Once this all
|
||
happened the other conversion modules in the chain of conversions
|
||
have to get the same chance. Whether another step follows can be
|
||
determined from the `__is_last' element of the step data structure
|
||
to which the first parameter points.
|
||
|
||
The more interesting mode is when actual text has to be converted.
|
||
The first step in this case is to convert as much text as
|
||
possible from the input buffer and store the result in the output
|
||
buffer. The start of the input buffer is determined by the third
|
||
argument, which is a pointer to a pointer variable referencing the
|
||
beginning of the buffer. The fourth argument is a pointer to the
|
||
byte right after the last byte in the buffer.
|
||
|
||
The conversion has to be performed according to the current state
|
||
if the character set is stateful. The state is stored in an
|
||
object pointed to by the `__statep' element of the step data
|
||
(second argument). Once either the input buffer is empty or the
|
||
output buffer is full the conversion stops. At this point, the
|
||
pointer variable referenced by the third parameter must point to
|
||
the byte following the last processed byte (i.e., if all of the
|
||
input is consumed, this pointer and the fourth parameter have the
|
||
same value).
|
||
|
||
What now happens depends on whether this step is the last one. If
|
||
it is the last step, the only thing that has to be done is to
|
||
update the `__outbuf' element of the step data structure to point
|
||
after the last written byte. This update gives the caller the
|
||
information on how much text is available in the output buffer.
|
||
In addition, the variable pointed to by the fifth parameter, which
|
||
is of type `size_t', must be incremented by the number of
|
||
characters (_not bytes_) that were converted in a non-reversible
|
||
way. Then, the function can return.
|
||
|
||
In case the step is not the last one, the later conversion
|
||
functions have to get a chance to do their work. Therefore, the
|
||
appropriate conversion function has to be called. The information
|
||
about the functions is stored in the conversion data structures,
|
||
passed as the first parameter. This information and the step data
|
||
are stored in arrays, so the next element in both cases can be
|
||
found by simple pointer arithmetic:
|
||
|
||
int
|
||
gconv (struct __gconv_step *step, struct __gconv_step_data *data,
|
||
const char **inbuf, const char *inbufend, size_t *written,
|
||
int do_flush)
|
||
{
|
||
struct __gconv_step *next_step = step + 1;
|
||
struct __gconv_step_data *next_data = data + 1;
|
||
...
|
||
|
||
The `next_step' pointer references the next step information and
|
||
`next_data' the next data record. The call of the next function
|
||
therefore will look similar to this:
|
||
|
||
next_step->__fct (next_step, next_data, &outerr, outbuf,
|
||
written, 0)
|
||
|
||
But this is not yet all. Once the function call returns the
|
||
conversion function might have some more to do. If the return
|
||
value of the function is `__GCONV_EMPTY_INPUT', more room is
|
||
available in the output buffer. Unless the input buffer is empty,
|
||
the conversion functions start all over again and process the rest
|
||
of the input buffer. If the return value is not
|
||
`__GCONV_EMPTY_INPUT', something went wrong and we have to recover
|
||
from this.
|
||
|
||
A requirement for the conversion function is that the input buffer
|
||
pointer (the third argument) always point to the last character
|
||
that was put in converted form into the output buffer. This is
|
||
trivially true after the conversion performed in the current step,
|
||
but if the conversion functions deeper downstream stop
|
||
prematurely, not all characters from the output buffer are
|
||
consumed and, therefore, the input buffer pointers must be backed
|
||
off to the right position.
|
||
|
||
Correcting the input buffers is easy to do if the input and output
|
||
character sets have a fixed width for all characters. In this
|
||
situation we can compute how many characters are left in the
|
||
output buffer and, therefore, can correct the input buffer pointer
|
||
appropriately with a similar computation. Things are getting
|
||
tricky if either character set has characters represented with
|
||
variable length byte sequences, and it gets even more complicated
|
||
if the conversion has to take care of the state. In these cases
|
||
the conversion has to be performed once again, from the known
|
||
state before the initial conversion (i.e., if necessary the state
|
||
of the conversion has to be reset and the conversion loop has to be
|
||
executed again). The difference now is that it is known how much
|
||
input must be created, and the conversion can stop before
|
||
converting the first unused character. Once this is done the
|
||
input buffer pointers must be updated again and the function can
|
||
return.
|
||
|
||
One final thing should be mentioned. If it is necessary for the
|
||
conversion to know whether it is the first invocation (in case a
|
||
prolog has to be emitted), the conversion function should
|
||
increment the `__invocation_counter' element of the step data
|
||
structure just before returning to the caller. See the
|
||
description of the `struct __gconv_step_data' structure above for
|
||
more information on how this can be used.
|
||
|
||
The return value must be one of the following values:
|
||
|
||
`__GCONV_EMPTY_INPUT'
|
||
All input was consumed and there is room left in the output
|
||
buffer.
|
||
|
||
`__GCONV_FULL_OUTPUT'
|
||
No more room in the output buffer. In case this is not the
|
||
last step this value is propagated down from the call of the
|
||
next conversion function in the chain.
|
||
|
||
`__GCONV_INCOMPLETE_INPUT'
|
||
The input buffer is not entirely empty since it contains an
|
||
incomplete character sequence.
|
||
|
||
The following example provides a framework for a conversion
|
||
function. In case a new conversion has to be written the holes in
|
||
this implementation have to be filled and that is it.
|
||
|
||
int
|
||
gconv (struct __gconv_step *step, struct __gconv_step_data *data,
|
||
const char **inbuf, const char *inbufend, size_t *written,
|
||
int do_flush)
|
||
{
|
||
struct __gconv_step *next_step = step + 1;
|
||
struct __gconv_step_data *next_data = data + 1;
|
||
gconv_fct fct = next_step->__fct;
|
||
int status;
|
||
|
||
/* If the function is called with no input this means we have
|
||
to reset to the initial state. The possibly partly
|
||
converted input is dropped. */
|
||
if (do_flush)
|
||
{
|
||
status = __GCONV_OK;
|
||
|
||
/* Possible emit a byte sequence which put the state object
|
||
into the initial state. */
|
||
|
||
/* Call the steps down the chain if there are any but only
|
||
if we successfully emitted the escape sequence. */
|
||
if (status == __GCONV_OK && ! data->__is_last)
|
||
status = fct (next_step, next_data, NULL, NULL,
|
||
written, 1);
|
||
}
|
||
else
|
||
{
|
||
/* We preserve the initial values of the pointer variables. */
|
||
const char *inptr = *inbuf;
|
||
char *outbuf = data->__outbuf;
|
||
char *outend = data->__outbufend;
|
||
char *outptr;
|
||
|
||
do
|
||
{
|
||
/* Remember the start value for this round. */
|
||
inptr = *inbuf;
|
||
/* The outbuf buffer is empty. */
|
||
outptr = outbuf;
|
||
|
||
/* For stateful encodings the state must be safe here. */
|
||
|
||
/* Run the conversion loop. `status' is set
|
||
appropriately afterwards. */
|
||
|
||
/* If this is the last step, leave the loop. There is
|
||
nothing we can do. */
|
||
if (data->__is_last)
|
||
{
|
||
/* Store information about how many bytes are
|
||
available. */
|
||
data->__outbuf = outbuf;
|
||
|
||
/* If any non-reversible conversions were performed,
|
||
add the number to `*written'. */
|
||
|
||
break;
|
||
}
|
||
|
||
/* Write out all output that was produced. */
|
||
if (outbuf > outptr)
|
||
{
|
||
const char *outerr = data->__outbuf;
|
||
int result;
|
||
|
||
result = fct (next_step, next_data, &outerr,
|
||
outbuf, written, 0);
|
||
|
||
if (result != __GCONV_EMPTY_INPUT)
|
||
{
|
||
if (outerr != outbuf)
|
||
{
|
||
/* Reset the input buffer pointer. We
|
||
document here the complex case. */
|
||
size_t nstatus;
|
||
|
||
/* Reload the pointers. */
|
||
*inbuf = inptr;
|
||
outbuf = outptr;
|
||
|
||
/* Possibly reset the state. */
|
||
|
||
/* Redo the conversion, but this time
|
||
the end of the output buffer is at
|
||
`outerr'. */
|
||
}
|
||
|
||
/* Change the status. */
|
||
status = result;
|
||
}
|
||
else
|
||
/* All the output is consumed, we can make
|
||
another run if everything was ok. */
|
||
if (status == __GCONV_FULL_OUTPUT)
|
||
status = __GCONV_OK;
|
||
}
|
||
}
|
||
while (status == __GCONV_OK);
|
||
|
||
/* We finished one use of this step. */
|
||
++data->__invocation_counter;
|
||
}
|
||
|
||
return status;
|
||
}
|
||
|
||
This information should be sufficient to write new modules. Anybody
|
||
doing so should also take a look at the available source code in the
|
||
GNU C Library sources. It contains many examples of working and
|
||
optimized modules.
|
||
|
||
|
||
File: libc.info, Node: Locales, Next: Message Translation, Prev: Character Set Handling, Up: Top
|
||
|
||
7 Locales and Internationalization
|
||
**********************************
|
||
|
||
Different countries and cultures have varying conventions for how to
|
||
communicate. These conventions range from very simple ones, such as the
|
||
format for representing dates and times, to very complex ones, such as
|
||
the language spoken.
|
||
|
||
"Internationalization" of software means programming it to be able
|
||
to adapt to the user's favorite conventions. In ISO C,
|
||
internationalization works by means of "locales". Each locale
|
||
specifies a collection of conventions, one convention for each purpose.
|
||
The user chooses a set of conventions by specifying a locale (via
|
||
environment variables).
|
||
|
||
All programs inherit the chosen locale as part of their environment.
|
||
Provided the programs are written to obey the choice of locale, they
|
||
will follow the conventions preferred by the user.
|
||
|
||
* Menu:
|
||
|
||
* Effects of Locale:: Actions affected by the choice of
|
||
locale.
|
||
* Choosing Locale:: How the user specifies a locale.
|
||
* Locale Categories:: Different purposes for which you can
|
||
select a locale.
|
||
* Setting the Locale:: How a program specifies the locale
|
||
with library functions.
|
||
* Standard Locales:: Locale names available on all systems.
|
||
* Locale Names:: Format of system-specific locale names.
|
||
* Locale Information:: How to access the information for the locale.
|
||
* Formatting Numbers:: A dedicated function to format numbers.
|
||
* Yes-or-No Questions:: Check a Response against the locale.
|
||
|
||
|
||
File: libc.info, Node: Effects of Locale, Next: Choosing Locale, Up: Locales
|
||
|
||
7.1 What Effects a Locale Has
|
||
=============================
|
||
|
||
Each locale specifies conventions for several purposes, including the
|
||
following:
|
||
|
||
* What multibyte character sequences are valid, and how they are
|
||
interpreted (*note Character Set Handling::).
|
||
|
||
* Classification of which characters in the local character set are
|
||
considered alphabetic, and upper- and lower-case conversion
|
||
conventions (*note Character Handling::).
|
||
|
||
* The collating sequence for the local language and character set
|
||
(*note Collation Functions::).
|
||
|
||
* Formatting of numbers and currency amounts (*note General
|
||
Numeric::).
|
||
|
||
* Formatting of dates and times (*note Formatting Calendar Time::).
|
||
|
||
* What language to use for output, including error messages (*note
|
||
Message Translation::).
|
||
|
||
* What language to use for user answers to yes-or-no questions
|
||
(*note Yes-or-No Questions::).
|
||
|
||
* What language to use for more complex user input. (The C library
|
||
doesn't yet help you implement this.)
|
||
|
||
Some aspects of adapting to the specified locale are handled
|
||
automatically by the library subroutines. For example, all your program
|
||
needs to do in order to use the collating sequence of the chosen locale
|
||
is to use `strcoll' or `strxfrm' to compare strings.
|
||
|
||
Other aspects of locales are beyond the comprehension of the library.
|
||
For example, the library can't automatically translate your program's
|
||
output messages into other languages. The only way you can support
|
||
output in the user's favorite language is to program this more or less
|
||
by hand. The C library provides functions to handle translations for
|
||
multiple languages easily.
|
||
|
||
This chapter discusses the mechanism by which you can modify the
|
||
current locale. The effects of the current locale on specific library
|
||
functions are discussed in more detail in the descriptions of those
|
||
functions.
|
||
|
||
|
||
File: libc.info, Node: Choosing Locale, Next: Locale Categories, Prev: Effects of Locale, Up: Locales
|
||
|
||
7.2 Choosing a Locale
|
||
=====================
|
||
|
||
The simplest way for the user to choose a locale is to set the
|
||
environment variable `LANG'. This specifies a single locale to use for
|
||
all purposes. For example, a user could specify a hypothetical locale
|
||
named `espana-castellano' to use the standard conventions of most of
|
||
Spain.
|
||
|
||
The set of locales supported depends on the operating system you are
|
||
using, and so do their names, except that the standard locale called
|
||
`C' or `POSIX' always exist. *Note Locale Names::.
|
||
|
||
In order to force the system to always use the default locale, the
|
||
user can set the `LC_ALL' environment variable to `C'.
|
||
|
||
A user also has the option of specifying different locales for
|
||
different purposes--in effect, choosing a mixture of multiple locales.
|
||
*Note Locale Categories::.
|
||
|
||
For example, the user might specify the locale `espana-castellano'
|
||
for most purposes, but specify the locale `usa-english' for currency
|
||
formatting. This might make sense if the user is a Spanish-speaking
|
||
American, working in Spanish, but representing monetary amounts in US
|
||
dollars.
|
||
|
||
Note that both locales `espana-castellano' and `usa-english', like
|
||
all locales, would include conventions for all of the purposes to which
|
||
locales apply. However, the user can choose to use each locale for a
|
||
particular subset of those purposes.
|
||
|
||
|
||
File: libc.info, Node: Locale Categories, Next: Setting the Locale, Prev: Choosing Locale, Up: Locales
|
||
|
||
7.3 Locale Categories
|
||
=====================
|
||
|
||
The purposes that locales serve are grouped into "categories", so that
|
||
a user or a program can choose the locale for each category
|
||
independently. Here is a table of categories; each name is both an
|
||
environment variable that a user can set, and a macro name that you can
|
||
use as the first argument to `setlocale'.
|
||
|
||
The contents of the environment variable (or the string in the second
|
||
argument to `setlocale') has to be a valid locale name. *Note Locale
|
||
Names::.
|
||
|
||
`LC_COLLATE'
|
||
This category applies to collation of strings (functions `strcoll'
|
||
and `strxfrm'); see *Note Collation Functions::.
|
||
|
||
`LC_CTYPE'
|
||
This category applies to classification and conversion of
|
||
characters, and to multibyte and wide characters; see *Note
|
||
Character Handling::, and *Note Character Set Handling::.
|
||
|
||
`LC_MONETARY'
|
||
This category applies to formatting monetary values; see *Note
|
||
General Numeric::.
|
||
|
||
`LC_NUMERIC'
|
||
This category applies to formatting numeric values that are not
|
||
monetary; see *Note General Numeric::.
|
||
|
||
`LC_TIME'
|
||
This category applies to formatting date and time values; see
|
||
*Note Formatting Calendar Time::.
|
||
|
||
`LC_MESSAGES'
|
||
This category applies to selecting the language used in the user
|
||
interface for message translation (*note The Uniforum approach::;
|
||
*note Message catalogs a la X/Open::) and contains regular
|
||
expressions for affirmative and negative responses.
|
||
|
||
`LC_ALL'
|
||
This is not a category; it is only a macro that you can use with
|
||
`setlocale' to set a single locale for all purposes. Setting this
|
||
environment variable overwrites all selections by the other `LC_*'
|
||
variables or `LANG'.
|
||
|
||
`LANG'
|
||
If this environment variable is defined, its value specifies the
|
||
locale to use for all purposes except as overridden by the
|
||
variables above.
|
||
|
||
When developing the message translation functions it was felt that
|
||
the functionality provided by the variables above is not sufficient.
|
||
For example, it should be possible to specify more than one locale name.
|
||
Take a Swedish user who better speaks German than English, and a program
|
||
whose messages are output in English by default. It should be possible
|
||
to specify that the first choice of language is Swedish, the second
|
||
German, and if this also fails to use English. This is possible with
|
||
the variable `LANGUAGE'. For further description of this GNU extension
|
||
see *Note Using gettextized software::.
|
||
|
||
|
||
File: libc.info, Node: Setting the Locale, Next: Standard Locales, Prev: Locale Categories, Up: Locales
|
||
|
||
7.4 How Programs Set the Locale
|
||
===============================
|
||
|
||
A C program inherits its locale environment variables when it starts up.
|
||
This happens automatically. However, these variables do not
|
||
automatically control the locale used by the library functions, because
|
||
ISO C says that all programs start by default in the standard `C'
|
||
locale. To use the locales specified by the environment, you must call
|
||
`setlocale'. Call it as follows:
|
||
|
||
setlocale (LC_ALL, "");
|
||
|
||
to select a locale based on the user choice of the appropriate
|
||
environment variables.
|
||
|
||
You can also use `setlocale' to specify a particular locale, for
|
||
general use or for a specific category.
|
||
|
||
The symbols in this section are defined in the header file
|
||
`locale.h'.
|
||
|
||
-- Function: char * setlocale (int CATEGORY, const char *LOCALE)
|
||
Preliminary: | MT-Unsafe const:locale env | AS-Unsafe init lock
|
||
heap corrupt | AC-Unsafe init corrupt lock mem fd | *Note POSIX
|
||
Safety Concepts::.
|
||
|
||
The function `setlocale' sets the current locale for category
|
||
CATEGORY to LOCALE.
|
||
|
||
If CATEGORY is `LC_ALL', this specifies the locale for all
|
||
purposes. The other possible values of CATEGORY specify a single
|
||
purpose (*note Locale Categories::).
|
||
|
||
You can also use this function to find out the current locale by
|
||
passing a null pointer as the LOCALE argument. In this case,
|
||
`setlocale' returns a string that is the name of the locale
|
||
currently selected for category CATEGORY.
|
||
|
||
The string returned by `setlocale' can be overwritten by subsequent
|
||
calls, so you should make a copy of the string (*note Copying
|
||
Strings and Arrays::) if you want to save it past any further
|
||
calls to `setlocale'. (The standard library is guaranteed never
|
||
to call `setlocale' itself.)
|
||
|
||
You should not modify the string returned by `setlocale'. It might
|
||
be the same string that was passed as an argument in a previous
|
||
call to `setlocale'. One requirement is that the CATEGORY must be
|
||
the same in the call the string was returned and the one when the
|
||
string is passed in as LOCALE parameter.
|
||
|
||
When you read the current locale for category `LC_ALL', the value
|
||
encodes the entire combination of selected locales for all
|
||
categories. If you specify the same "locale name" with `LC_ALL'
|
||
in a subsequent call to `setlocale', it restores the same
|
||
combination of locale selections.
|
||
|
||
To be sure you can use the returned string encoding the currently
|
||
selected locale at a later time, you must make a copy of the
|
||
string. It is not guaranteed that the returned pointer remains
|
||
valid over time.
|
||
|
||
When the LOCALE argument is not a null pointer, the string returned
|
||
by `setlocale' reflects the newly-modified locale.
|
||
|
||
If you specify an empty string for LOCALE, this means to read the
|
||
appropriate environment variable and use its value to select the
|
||
locale for CATEGORY.
|
||
|
||
If a nonempty string is given for LOCALE, then the locale of that
|
||
name is used if possible.
|
||
|
||
The effective locale name (either the second argument to
|
||
`setlocale', or if the argument is an empty string, the name
|
||
obtained from the process environment) must be a valid locale name.
|
||
*Note Locale Names::.
|
||
|
||
If you specify an invalid locale name, `setlocale' returns a null
|
||
pointer and leaves the current locale unchanged.
|
||
|
||
Here is an example showing how you might use `setlocale' to
|
||
temporarily switch to a new locale.
|
||
|
||
#include <stddef.h>
|
||
#include <locale.h>
|
||
#include <stdlib.h>
|
||
#include <string.h>
|
||
|
||
void
|
||
with_other_locale (char *new_locale,
|
||
void (*subroutine) (int),
|
||
int argument)
|
||
{
|
||
char *old_locale, *saved_locale;
|
||
|
||
/* Get the name of the current locale. */
|
||
old_locale = setlocale (LC_ALL, NULL);
|
||
|
||
/* Copy the name so it won't be clobbered by `setlocale'. */
|
||
saved_locale = strdup (old_locale);
|
||
if (saved_locale == NULL)
|
||
fatal ("Out of memory");
|
||
|
||
/* Now change the locale and do some stuff with it. */
|
||
setlocale (LC_ALL, new_locale);
|
||
(*subroutine) (argument);
|
||
|
||
/* Restore the original locale. */
|
||
setlocale (LC_ALL, saved_locale);
|
||
free (saved_locale);
|
||
}
|
||
|
||
*Portability Note:* Some ISO C systems may define additional locale
|
||
categories, and future versions of the library will do so. For
|
||
portability, assume that any symbol beginning with `LC_' might be
|
||
defined in `locale.h'.
|
||
|
||
|
||
File: libc.info, Node: Standard Locales, Next: Locale Names, Prev: Setting the Locale, Up: Locales
|
||
|
||
7.5 Standard Locales
|
||
====================
|
||
|
||
The only locale names you can count on finding on all operating systems
|
||
are these three standard ones:
|
||
|
||
`"C"'
|
||
This is the standard C locale. The attributes and behavior it
|
||
provides are specified in the ISO C standard. When your program
|
||
starts up, it initially uses this locale by default.
|
||
|
||
`"POSIX"'
|
||
This is the standard POSIX locale. Currently, it is an alias for
|
||
the standard C locale.
|
||
|
||
`""'
|
||
The empty name says to select a locale based on environment
|
||
variables. *Note Locale Categories::.
|
||
|
||
Defining and installing named locales is normally a responsibility of
|
||
the system administrator at your site (or the person who installed the
|
||
GNU C Library). It is also possible for the user to create private
|
||
locales. All this will be discussed later when describing the tool to
|
||
do so.
|
||
|
||
If your program needs to use something other than the `C' locale, it
|
||
will be more portable if you use whatever locale the user specifies
|
||
with the environment, rather than trying to specify some non-standard
|
||
locale explicitly by name. Remember, different machines might have
|
||
different sets of locales installed.
|
||
|
||
|
||
File: libc.info, Node: Locale Names, Next: Locale Information, Prev: Standard Locales, Up: Locales
|
||
|
||
7.6 Locale Names
|
||
================
|
||
|
||
The following command prints a list of locales supported by the system:
|
||
|
||
locale -a
|
||
|
||
*Portability Note:* With the notable exception of the standard
|
||
locale names `C' and `POSIX', locale names are system-specific.
|
||
|
||
Most locale names follow XPG syntax and consist of up to four parts:
|
||
|
||
LANGUAGE[_TERRITORY[.CODESET]][@MODIFIER]
|
||
|
||
Beside the first part, all of them are allowed to be missing. If the
|
||
full specified locale is not found, less specific ones are looked for.
|
||
The various parts will be stripped off, in the following order:
|
||
|
||
1. codeset
|
||
|
||
2. normalized codeset
|
||
|
||
3. territory
|
||
|
||
4. modifier
|
||
|
||
For example, the locale name `de_AT.iso885915@euro' denotes a
|
||
German-language locale for use in Austria, using the ISO-8859-15
|
||
(Latin-9) character set, and with the Euro as the currency symbol.
|
||
|
||
In addition to locale names which follow XPG syntax, systems may
|
||
provide aliases such as `german'. Both categories of names must not
|
||
contain the slash character `/'.
|
||
|
||
If the locale name starts with a slash `/', it is treated as a path
|
||
relative to the configured locale directories; see `LOCPATH' below.
|
||
The specified path must not contain a component `..', or the name is
|
||
invalid, and `setlocale' will fail.
|
||
|
||
*Portability Note:* POSIX suggests that if a locale name starts with
|
||
a slash `/', it is resolved as an absolute path. However, the GNU C
|
||
Library treats it as a relative path under the directories listed in
|
||
`LOCPATH' (or the default locale directory if `LOCPATH' is unset).
|
||
|
||
Locale names which are longer than an implementation-defined limit
|
||
are invalid and cause `setlocale' to fail.
|
||
|
||
As a special case, locale names used with `LC_ALL' can combine
|
||
several locales, reflecting different locale settings for different
|
||
categories. For example, you might want to use a U.S. locale with ISO
|
||
A4 paper format, so you set `LANG' to `en_US.UTF-8', and `LC_PAPER' to
|
||
`de_DE.UTF-8'. In this case, the `LC_ALL'-style combined locale name is
|
||
|
||
LC_CTYPE=en_US.UTF-8;LC_TIME=en_US.UTF-8;LC_PAPER=de_DE.UTF-8;...
|
||
|
||
followed by other category settings not shown here.
|
||
|
||
The path used for finding locale data can be set using the `LOCPATH'
|
||
environment variable. This variable lists the directories in which to
|
||
search for locale definitions, separated by a colon `:'.
|
||
|
||
The default path for finding locale data is system specific. A
|
||
typical value for the `LOCPATH' default is:
|
||
|
||
/usr/share/locale
|
||
|
||
The value of `LOCPATH' is ignored by privileged programs for
|
||
security reasons, and only the default directory is used.
|
||
|