commit d1edce71135cc6d98c0a4b5729774542b676e769 Author: sophgo-forum-service <forum_service@sophgo.com> Date: Fri Mar 15 16:07:33 2024 +0800 [fix] recommend using ssh method to clone repo. [fix] fix sensor driver repo branch name.
6860 lines
291 KiB
Plaintext
6860 lines
291 KiB
Plaintext
This is libc.info, produced by makeinfo version 5.2 from libc.texinfo.
|
||
|
||
This file documents the GNU C Library.
|
||
|
||
This is ‘The GNU C Library Reference Manual’, for version 2.23.
|
||
|
||
Copyright © 1993–2016 Free Software Foundation, Inc.
|
||
|
||
Permission is granted to copy, distribute and/or modify this document
|
||
under the terms of the GNU Free Documentation License, Version 1.3 or
|
||
any later version published by the Free Software Foundation; with the
|
||
Invariant Sections being “Free Software Needs Free Documentation” and
|
||
“GNU Lesser General Public License”, the Front-Cover texts being “A GNU
|
||
Manual”, and with the Back-Cover Texts as in (a) below. A copy of the
|
||
license is included in the section entitled "GNU Free Documentation
|
||
License".
|
||
|
||
(a) The FSF’s Back-Cover Text is: “You have the freedom to copy and
|
||
modify this GNU manual. Buying copies from the FSF supports it in
|
||
developing GNU and promoting software freedom.”
|
||
INFO-DIR-SECTION Software libraries
|
||
START-INFO-DIR-ENTRY
|
||
* Libc: (libc). C library.
|
||
END-INFO-DIR-ENTRY
|
||
|
||
INFO-DIR-SECTION GNU C library functions and macros
|
||
START-INFO-DIR-ENTRY
|
||
* ALTWERASE: (libc)Local Modes.
|
||
* ARGP_ERR_UNKNOWN: (libc)Argp Parser Functions.
|
||
* ARG_MAX: (libc)General Limits.
|
||
* BC_BASE_MAX: (libc)Utility Limits.
|
||
* BC_DIM_MAX: (libc)Utility Limits.
|
||
* BC_SCALE_MAX: (libc)Utility Limits.
|
||
* BC_STRING_MAX: (libc)Utility Limits.
|
||
* BRKINT: (libc)Input Modes.
|
||
* BUFSIZ: (libc)Controlling Buffering.
|
||
* CCTS_OFLOW: (libc)Control Modes.
|
||
* CHILD_MAX: (libc)General Limits.
|
||
* CIGNORE: (libc)Control Modes.
|
||
* CLK_TCK: (libc)Processor Time.
|
||
* CLOCAL: (libc)Control Modes.
|
||
* CLOCKS_PER_SEC: (libc)CPU Time.
|
||
* COLL_WEIGHTS_MAX: (libc)Utility Limits.
|
||
* CPU_CLR: (libc)CPU Affinity.
|
||
* CPU_ISSET: (libc)CPU Affinity.
|
||
* CPU_SET: (libc)CPU Affinity.
|
||
* CPU_SETSIZE: (libc)CPU Affinity.
|
||
* CPU_ZERO: (libc)CPU Affinity.
|
||
* CREAD: (libc)Control Modes.
|
||
* CRTS_IFLOW: (libc)Control Modes.
|
||
* CS5: (libc)Control Modes.
|
||
* CS6: (libc)Control Modes.
|
||
* CS7: (libc)Control Modes.
|
||
* CS8: (libc)Control Modes.
|
||
* CSIZE: (libc)Control Modes.
|
||
* CSTOPB: (libc)Control Modes.
|
||
* DES_FAILED: (libc)DES Encryption.
|
||
* DTTOIF: (libc)Directory Entries.
|
||
* E2BIG: (libc)Error Codes.
|
||
* EACCES: (libc)Error Codes.
|
||
* EADDRINUSE: (libc)Error Codes.
|
||
* EADDRNOTAVAIL: (libc)Error Codes.
|
||
* EADV: (libc)Error Codes.
|
||
* EAFNOSUPPORT: (libc)Error Codes.
|
||
* EAGAIN: (libc)Error Codes.
|
||
* EALREADY: (libc)Error Codes.
|
||
* EAUTH: (libc)Error Codes.
|
||
* EBACKGROUND: (libc)Error Codes.
|
||
* EBADE: (libc)Error Codes.
|
||
* EBADF: (libc)Error Codes.
|
||
* EBADFD: (libc)Error Codes.
|
||
* EBADMSG: (libc)Error Codes.
|
||
* EBADR: (libc)Error Codes.
|
||
* EBADRPC: (libc)Error Codes.
|
||
* EBADRQC: (libc)Error Codes.
|
||
* EBADSLT: (libc)Error Codes.
|
||
* EBFONT: (libc)Error Codes.
|
||
* EBUSY: (libc)Error Codes.
|
||
* ECANCELED: (libc)Error Codes.
|
||
* ECHILD: (libc)Error Codes.
|
||
* ECHO: (libc)Local Modes.
|
||
* ECHOCTL: (libc)Local Modes.
|
||
* ECHOE: (libc)Local Modes.
|
||
* ECHOK: (libc)Local Modes.
|
||
* ECHOKE: (libc)Local Modes.
|
||
* ECHONL: (libc)Local Modes.
|
||
* ECHOPRT: (libc)Local Modes.
|
||
* ECHRNG: (libc)Error Codes.
|
||
* ECOMM: (libc)Error Codes.
|
||
* ECONNABORTED: (libc)Error Codes.
|
||
* ECONNREFUSED: (libc)Error Codes.
|
||
* ECONNRESET: (libc)Error Codes.
|
||
* ED: (libc)Error Codes.
|
||
* EDEADLK: (libc)Error Codes.
|
||
* EDEADLOCK: (libc)Error Codes.
|
||
* EDESTADDRREQ: (libc)Error Codes.
|
||
* EDIED: (libc)Error Codes.
|
||
* EDOM: (libc)Error Codes.
|
||
* EDOTDOT: (libc)Error Codes.
|
||
* EDQUOT: (libc)Error Codes.
|
||
* EEXIST: (libc)Error Codes.
|
||
* EFAULT: (libc)Error Codes.
|
||
* EFBIG: (libc)Error Codes.
|
||
* EFTYPE: (libc)Error Codes.
|
||
* EGRATUITOUS: (libc)Error Codes.
|
||
* EGREGIOUS: (libc)Error Codes.
|
||
* EHOSTDOWN: (libc)Error Codes.
|
||
* EHOSTUNREACH: (libc)Error Codes.
|
||
* EHWPOISON: (libc)Error Codes.
|
||
* EIDRM: (libc)Error Codes.
|
||
* EIEIO: (libc)Error Codes.
|
||
* EILSEQ: (libc)Error Codes.
|
||
* EINPROGRESS: (libc)Error Codes.
|
||
* EINTR: (libc)Error Codes.
|
||
* EINVAL: (libc)Error Codes.
|
||
* EIO: (libc)Error Codes.
|
||
* EISCONN: (libc)Error Codes.
|
||
* EISDIR: (libc)Error Codes.
|
||
* EISNAM: (libc)Error Codes.
|
||
* EKEYEXPIRED: (libc)Error Codes.
|
||
* EKEYREJECTED: (libc)Error Codes.
|
||
* EKEYREVOKED: (libc)Error Codes.
|
||
* EL2HLT: (libc)Error Codes.
|
||
* EL2NSYNC: (libc)Error Codes.
|
||
* EL3HLT: (libc)Error Codes.
|
||
* EL3RST: (libc)Error Codes.
|
||
* ELIBACC: (libc)Error Codes.
|
||
* ELIBBAD: (libc)Error Codes.
|
||
* ELIBEXEC: (libc)Error Codes.
|
||
* ELIBMAX: (libc)Error Codes.
|
||
* ELIBSCN: (libc)Error Codes.
|
||
* ELNRNG: (libc)Error Codes.
|
||
* ELOOP: (libc)Error Codes.
|
||
* EMEDIUMTYPE: (libc)Error Codes.
|
||
* EMFILE: (libc)Error Codes.
|
||
* EMLINK: (libc)Error Codes.
|
||
* EMSGSIZE: (libc)Error Codes.
|
||
* EMULTIHOP: (libc)Error Codes.
|
||
* ENAMETOOLONG: (libc)Error Codes.
|
||
* ENAVAIL: (libc)Error Codes.
|
||
* ENEEDAUTH: (libc)Error Codes.
|
||
* ENETDOWN: (libc)Error Codes.
|
||
* ENETRESET: (libc)Error Codes.
|
||
* ENETUNREACH: (libc)Error Codes.
|
||
* ENFILE: (libc)Error Codes.
|
||
* ENOANO: (libc)Error Codes.
|
||
* ENOBUFS: (libc)Error Codes.
|
||
* ENOCSI: (libc)Error Codes.
|
||
* ENODATA: (libc)Error Codes.
|
||
* ENODEV: (libc)Error Codes.
|
||
* ENOENT: (libc)Error Codes.
|
||
* ENOEXEC: (libc)Error Codes.
|
||
* ENOKEY: (libc)Error Codes.
|
||
* ENOLCK: (libc)Error Codes.
|
||
* ENOLINK: (libc)Error Codes.
|
||
* ENOMEDIUM: (libc)Error Codes.
|
||
* ENOMEM: (libc)Error Codes.
|
||
* ENOMSG: (libc)Error Codes.
|
||
* ENONET: (libc)Error Codes.
|
||
* ENOPKG: (libc)Error Codes.
|
||
* ENOPROTOOPT: (libc)Error Codes.
|
||
* ENOSPC: (libc)Error Codes.
|
||
* ENOSR: (libc)Error Codes.
|
||
* ENOSTR: (libc)Error Codes.
|
||
* ENOSYS: (libc)Error Codes.
|
||
* ENOTBLK: (libc)Error Codes.
|
||
* ENOTCONN: (libc)Error Codes.
|
||
* ENOTDIR: (libc)Error Codes.
|
||
* ENOTEMPTY: (libc)Error Codes.
|
||
* ENOTNAM: (libc)Error Codes.
|
||
* ENOTRECOVERABLE: (libc)Error Codes.
|
||
* ENOTSOCK: (libc)Error Codes.
|
||
* ENOTSUP: (libc)Error Codes.
|
||
* ENOTTY: (libc)Error Codes.
|
||
* ENOTUNIQ: (libc)Error Codes.
|
||
* ENXIO: (libc)Error Codes.
|
||
* EOF: (libc)EOF and Errors.
|
||
* EOPNOTSUPP: (libc)Error Codes.
|
||
* EOVERFLOW: (libc)Error Codes.
|
||
* EOWNERDEAD: (libc)Error Codes.
|
||
* EPERM: (libc)Error Codes.
|
||
* EPFNOSUPPORT: (libc)Error Codes.
|
||
* EPIPE: (libc)Error Codes.
|
||
* EPROCLIM: (libc)Error Codes.
|
||
* EPROCUNAVAIL: (libc)Error Codes.
|
||
* EPROGMISMATCH: (libc)Error Codes.
|
||
* EPROGUNAVAIL: (libc)Error Codes.
|
||
* EPROTO: (libc)Error Codes.
|
||
* EPROTONOSUPPORT: (libc)Error Codes.
|
||
* EPROTOTYPE: (libc)Error Codes.
|
||
* EQUIV_CLASS_MAX: (libc)Utility Limits.
|
||
* ERANGE: (libc)Error Codes.
|
||
* EREMCHG: (libc)Error Codes.
|
||
* EREMOTE: (libc)Error Codes.
|
||
* EREMOTEIO: (libc)Error Codes.
|
||
* ERESTART: (libc)Error Codes.
|
||
* ERFKILL: (libc)Error Codes.
|
||
* EROFS: (libc)Error Codes.
|
||
* ERPCMISMATCH: (libc)Error Codes.
|
||
* ESHUTDOWN: (libc)Error Codes.
|
||
* ESOCKTNOSUPPORT: (libc)Error Codes.
|
||
* ESPIPE: (libc)Error Codes.
|
||
* ESRCH: (libc)Error Codes.
|
||
* ESRMNT: (libc)Error Codes.
|
||
* ESTALE: (libc)Error Codes.
|
||
* ESTRPIPE: (libc)Error Codes.
|
||
* ETIME: (libc)Error Codes.
|
||
* ETIMEDOUT: (libc)Error Codes.
|
||
* ETOOMANYREFS: (libc)Error Codes.
|
||
* ETXTBSY: (libc)Error Codes.
|
||
* EUCLEAN: (libc)Error Codes.
|
||
* EUNATCH: (libc)Error Codes.
|
||
* EUSERS: (libc)Error Codes.
|
||
* EWOULDBLOCK: (libc)Error Codes.
|
||
* EXDEV: (libc)Error Codes.
|
||
* EXFULL: (libc)Error Codes.
|
||
* EXIT_FAILURE: (libc)Exit Status.
|
||
* EXIT_SUCCESS: (libc)Exit Status.
|
||
* EXPR_NEST_MAX: (libc)Utility Limits.
|
||
* FD_CLOEXEC: (libc)Descriptor Flags.
|
||
* FD_CLR: (libc)Waiting for I/O.
|
||
* FD_ISSET: (libc)Waiting for I/O.
|
||
* FD_SET: (libc)Waiting for I/O.
|
||
* FD_SETSIZE: (libc)Waiting for I/O.
|
||
* FD_ZERO: (libc)Waiting for I/O.
|
||
* FILENAME_MAX: (libc)Limits for Files.
|
||
* FLUSHO: (libc)Local Modes.
|
||
* FOPEN_MAX: (libc)Opening Streams.
|
||
* FP_ILOGB0: (libc)Exponents and Logarithms.
|
||
* FP_ILOGBNAN: (libc)Exponents and Logarithms.
|
||
* F_DUPFD: (libc)Duplicating Descriptors.
|
||
* F_GETFD: (libc)Descriptor Flags.
|
||
* F_GETFL: (libc)Getting File Status Flags.
|
||
* F_GETLK: (libc)File Locks.
|
||
* F_GETOWN: (libc)Interrupt Input.
|
||
* F_OFD_GETLK: (libc)Open File Description Locks.
|
||
* F_OFD_SETLK: (libc)Open File Description Locks.
|
||
* F_OFD_SETLKW: (libc)Open File Description Locks.
|
||
* F_OK: (libc)Testing File Access.
|
||
* F_SETFD: (libc)Descriptor Flags.
|
||
* F_SETFL: (libc)Getting File Status Flags.
|
||
* F_SETLK: (libc)File Locks.
|
||
* F_SETLKW: (libc)File Locks.
|
||
* F_SETOWN: (libc)Interrupt Input.
|
||
* HUGE_VAL: (libc)Math Error Reporting.
|
||
* HUGE_VALF: (libc)Math Error Reporting.
|
||
* HUGE_VALL: (libc)Math Error Reporting.
|
||
* HUPCL: (libc)Control Modes.
|
||
* I: (libc)Complex Numbers.
|
||
* ICANON: (libc)Local Modes.
|
||
* ICRNL: (libc)Input Modes.
|
||
* IEXTEN: (libc)Local Modes.
|
||
* IFNAMSIZ: (libc)Interface Naming.
|
||
* IFTODT: (libc)Directory Entries.
|
||
* IGNBRK: (libc)Input Modes.
|
||
* IGNCR: (libc)Input Modes.
|
||
* IGNPAR: (libc)Input Modes.
|
||
* IMAXBEL: (libc)Input Modes.
|
||
* INADDR_ANY: (libc)Host Address Data Type.
|
||
* INADDR_BROADCAST: (libc)Host Address Data Type.
|
||
* INADDR_LOOPBACK: (libc)Host Address Data Type.
|
||
* INADDR_NONE: (libc)Host Address Data Type.
|
||
* INFINITY: (libc)Infinity and NaN.
|
||
* INLCR: (libc)Input Modes.
|
||
* INPCK: (libc)Input Modes.
|
||
* IPPORT_RESERVED: (libc)Ports.
|
||
* IPPORT_USERRESERVED: (libc)Ports.
|
||
* ISIG: (libc)Local Modes.
|
||
* ISTRIP: (libc)Input Modes.
|
||
* IXANY: (libc)Input Modes.
|
||
* IXOFF: (libc)Input Modes.
|
||
* IXON: (libc)Input Modes.
|
||
* LINE_MAX: (libc)Utility Limits.
|
||
* LINK_MAX: (libc)Limits for Files.
|
||
* L_ctermid: (libc)Identifying the Terminal.
|
||
* L_cuserid: (libc)Who Logged In.
|
||
* L_tmpnam: (libc)Temporary Files.
|
||
* MAXNAMLEN: (libc)Limits for Files.
|
||
* MAXSYMLINKS: (libc)Symbolic Links.
|
||
* MAX_CANON: (libc)Limits for Files.
|
||
* MAX_INPUT: (libc)Limits for Files.
|
||
* MB_CUR_MAX: (libc)Selecting the Conversion.
|
||
* MB_LEN_MAX: (libc)Selecting the Conversion.
|
||
* MDMBUF: (libc)Control Modes.
|
||
* MSG_DONTROUTE: (libc)Socket Data Options.
|
||
* MSG_OOB: (libc)Socket Data Options.
|
||
* MSG_PEEK: (libc)Socket Data Options.
|
||
* NAME_MAX: (libc)Limits for Files.
|
||
* NAN: (libc)Infinity and NaN.
|
||
* NCCS: (libc)Mode Data Types.
|
||
* NGROUPS_MAX: (libc)General Limits.
|
||
* NOFLSH: (libc)Local Modes.
|
||
* NOKERNINFO: (libc)Local Modes.
|
||
* NSIG: (libc)Standard Signals.
|
||
* NULL: (libc)Null Pointer Constant.
|
||
* ONLCR: (libc)Output Modes.
|
||
* ONOEOT: (libc)Output Modes.
|
||
* OPEN_MAX: (libc)General Limits.
|
||
* OPOST: (libc)Output Modes.
|
||
* OXTABS: (libc)Output Modes.
|
||
* O_ACCMODE: (libc)Access Modes.
|
||
* O_APPEND: (libc)Operating Modes.
|
||
* O_ASYNC: (libc)Operating Modes.
|
||
* O_CREAT: (libc)Open-time Flags.
|
||
* O_EXCL: (libc)Open-time Flags.
|
||
* O_EXEC: (libc)Access Modes.
|
||
* O_EXLOCK: (libc)Open-time Flags.
|
||
* O_FSYNC: (libc)Operating Modes.
|
||
* O_IGNORE_CTTY: (libc)Open-time Flags.
|
||
* O_NDELAY: (libc)Operating Modes.
|
||
* O_NOATIME: (libc)Operating Modes.
|
||
* O_NOCTTY: (libc)Open-time Flags.
|
||
* O_NOLINK: (libc)Open-time Flags.
|
||
* O_NONBLOCK: (libc)Open-time Flags.
|
||
* O_NONBLOCK: (libc)Operating Modes.
|
||
* O_NOTRANS: (libc)Open-time Flags.
|
||
* O_RDONLY: (libc)Access Modes.
|
||
* O_RDWR: (libc)Access Modes.
|
||
* O_READ: (libc)Access Modes.
|
||
* O_SHLOCK: (libc)Open-time Flags.
|
||
* O_SYNC: (libc)Operating Modes.
|
||
* O_TRUNC: (libc)Open-time Flags.
|
||
* O_WRITE: (libc)Access Modes.
|
||
* O_WRONLY: (libc)Access Modes.
|
||
* PARENB: (libc)Control Modes.
|
||
* PARMRK: (libc)Input Modes.
|
||
* PARODD: (libc)Control Modes.
|
||
* PATH_MAX: (libc)Limits for Files.
|
||
* PA_FLAG_MASK: (libc)Parsing a Template String.
|
||
* PENDIN: (libc)Local Modes.
|
||
* PF_FILE: (libc)Local Namespace Details.
|
||
* PF_INET6: (libc)Internet Namespace.
|
||
* PF_INET: (libc)Internet Namespace.
|
||
* PF_LOCAL: (libc)Local Namespace Details.
|
||
* PF_UNIX: (libc)Local Namespace Details.
|
||
* PIPE_BUF: (libc)Limits for Files.
|
||
* P_tmpdir: (libc)Temporary Files.
|
||
* RAND_MAX: (libc)ISO Random.
|
||
* RE_DUP_MAX: (libc)General Limits.
|
||
* RLIM_INFINITY: (libc)Limits on Resources.
|
||
* R_OK: (libc)Testing File Access.
|
||
* SA_NOCLDSTOP: (libc)Flags for Sigaction.
|
||
* SA_ONSTACK: (libc)Flags for Sigaction.
|
||
* SA_RESTART: (libc)Flags for Sigaction.
|
||
* SEEK_CUR: (libc)File Positioning.
|
||
* SEEK_END: (libc)File Positioning.
|
||
* SEEK_SET: (libc)File Positioning.
|
||
* SIGABRT: (libc)Program Error Signals.
|
||
* SIGALRM: (libc)Alarm Signals.
|
||
* SIGBUS: (libc)Program Error Signals.
|
||
* SIGCHLD: (libc)Job Control Signals.
|
||
* SIGCLD: (libc)Job Control Signals.
|
||
* SIGCONT: (libc)Job Control Signals.
|
||
* SIGEMT: (libc)Program Error Signals.
|
||
* SIGFPE: (libc)Program Error Signals.
|
||
* SIGHUP: (libc)Termination Signals.
|
||
* SIGILL: (libc)Program Error Signals.
|
||
* SIGINFO: (libc)Miscellaneous Signals.
|
||
* SIGINT: (libc)Termination Signals.
|
||
* SIGIO: (libc)Asynchronous I/O Signals.
|
||
* SIGIOT: (libc)Program Error Signals.
|
||
* SIGKILL: (libc)Termination Signals.
|
||
* SIGLOST: (libc)Operation Error Signals.
|
||
* SIGPIPE: (libc)Operation Error Signals.
|
||
* SIGPOLL: (libc)Asynchronous I/O Signals.
|
||
* SIGPROF: (libc)Alarm Signals.
|
||
* SIGQUIT: (libc)Termination Signals.
|
||
* SIGSEGV: (libc)Program Error Signals.
|
||
* SIGSTOP: (libc)Job Control Signals.
|
||
* SIGSYS: (libc)Program Error Signals.
|
||
* SIGTERM: (libc)Termination Signals.
|
||
* SIGTRAP: (libc)Program Error Signals.
|
||
* SIGTSTP: (libc)Job Control Signals.
|
||
* SIGTTIN: (libc)Job Control Signals.
|
||
* SIGTTOU: (libc)Job Control Signals.
|
||
* SIGURG: (libc)Asynchronous I/O Signals.
|
||
* SIGUSR1: (libc)Miscellaneous Signals.
|
||
* SIGUSR2: (libc)Miscellaneous Signals.
|
||
* SIGVTALRM: (libc)Alarm Signals.
|
||
* SIGWINCH: (libc)Miscellaneous Signals.
|
||
* SIGXCPU: (libc)Operation Error Signals.
|
||
* SIGXFSZ: (libc)Operation Error Signals.
|
||
* SIG_ERR: (libc)Basic Signal Handling.
|
||
* SOCK_DGRAM: (libc)Communication Styles.
|
||
* SOCK_RAW: (libc)Communication Styles.
|
||
* SOCK_RDM: (libc)Communication Styles.
|
||
* SOCK_SEQPACKET: (libc)Communication Styles.
|
||
* SOCK_STREAM: (libc)Communication Styles.
|
||
* SOL_SOCKET: (libc)Socket-Level Options.
|
||
* SSIZE_MAX: (libc)General Limits.
|
||
* STREAM_MAX: (libc)General Limits.
|
||
* SUN_LEN: (libc)Local Namespace Details.
|
||
* S_IFMT: (libc)Testing File Type.
|
||
* S_ISBLK: (libc)Testing File Type.
|
||
* S_ISCHR: (libc)Testing File Type.
|
||
* S_ISDIR: (libc)Testing File Type.
|
||
* S_ISFIFO: (libc)Testing File Type.
|
||
* S_ISLNK: (libc)Testing File Type.
|
||
* S_ISREG: (libc)Testing File Type.
|
||
* S_ISSOCK: (libc)Testing File Type.
|
||
* S_TYPEISMQ: (libc)Testing File Type.
|
||
* S_TYPEISSEM: (libc)Testing File Type.
|
||
* S_TYPEISSHM: (libc)Testing File Type.
|
||
* TMP_MAX: (libc)Temporary Files.
|
||
* TOSTOP: (libc)Local Modes.
|
||
* TZNAME_MAX: (libc)General Limits.
|
||
* VDISCARD: (libc)Other Special.
|
||
* VDSUSP: (libc)Signal Characters.
|
||
* VEOF: (libc)Editing Characters.
|
||
* VEOL2: (libc)Editing Characters.
|
||
* VEOL: (libc)Editing Characters.
|
||
* VERASE: (libc)Editing Characters.
|
||
* VINTR: (libc)Signal Characters.
|
||
* VKILL: (libc)Editing Characters.
|
||
* VLNEXT: (libc)Other Special.
|
||
* VMIN: (libc)Noncanonical Input.
|
||
* VQUIT: (libc)Signal Characters.
|
||
* VREPRINT: (libc)Editing Characters.
|
||
* VSTART: (libc)Start/Stop Characters.
|
||
* VSTATUS: (libc)Other Special.
|
||
* VSTOP: (libc)Start/Stop Characters.
|
||
* VSUSP: (libc)Signal Characters.
|
||
* VTIME: (libc)Noncanonical Input.
|
||
* VWERASE: (libc)Editing Characters.
|
||
* WCHAR_MAX: (libc)Extended Char Intro.
|
||
* WCHAR_MIN: (libc)Extended Char Intro.
|
||
* WCOREDUMP: (libc)Process Completion Status.
|
||
* WEOF: (libc)EOF and Errors.
|
||
* WEOF: (libc)Extended Char Intro.
|
||
* WEXITSTATUS: (libc)Process Completion Status.
|
||
* WIFEXITED: (libc)Process Completion Status.
|
||
* WIFSIGNALED: (libc)Process Completion Status.
|
||
* WIFSTOPPED: (libc)Process Completion Status.
|
||
* WSTOPSIG: (libc)Process Completion Status.
|
||
* WTERMSIG: (libc)Process Completion Status.
|
||
* W_OK: (libc)Testing File Access.
|
||
* X_OK: (libc)Testing File Access.
|
||
* _Complex_I: (libc)Complex Numbers.
|
||
* _Exit: (libc)Termination Internals.
|
||
* _IOFBF: (libc)Controlling Buffering.
|
||
* _IOLBF: (libc)Controlling Buffering.
|
||
* _IONBF: (libc)Controlling Buffering.
|
||
* _Imaginary_I: (libc)Complex Numbers.
|
||
* _PATH_UTMP: (libc)Manipulating the Database.
|
||
* _PATH_WTMP: (libc)Manipulating the Database.
|
||
* _POSIX2_C_DEV: (libc)System Options.
|
||
* _POSIX2_C_VERSION: (libc)Version Supported.
|
||
* _POSIX2_FORT_DEV: (libc)System Options.
|
||
* _POSIX2_FORT_RUN: (libc)System Options.
|
||
* _POSIX2_LOCALEDEF: (libc)System Options.
|
||
* _POSIX2_SW_DEV: (libc)System Options.
|
||
* _POSIX_CHOWN_RESTRICTED: (libc)Options for Files.
|
||
* _POSIX_JOB_CONTROL: (libc)System Options.
|
||
* _POSIX_NO_TRUNC: (libc)Options for Files.
|
||
* _POSIX_SAVED_IDS: (libc)System Options.
|
||
* _POSIX_VDISABLE: (libc)Options for Files.
|
||
* _POSIX_VERSION: (libc)Version Supported.
|
||
* __fbufsize: (libc)Controlling Buffering.
|
||
* __flbf: (libc)Controlling Buffering.
|
||
* __fpending: (libc)Controlling Buffering.
|
||
* __fpurge: (libc)Flushing Buffers.
|
||
* __freadable: (libc)Opening Streams.
|
||
* __freading: (libc)Opening Streams.
|
||
* __fsetlocking: (libc)Streams and Threads.
|
||
* __fwritable: (libc)Opening Streams.
|
||
* __fwriting: (libc)Opening Streams.
|
||
* __gconv_end_fct: (libc)glibc iconv Implementation.
|
||
* __gconv_fct: (libc)glibc iconv Implementation.
|
||
* __gconv_init_fct: (libc)glibc iconv Implementation.
|
||
* __ppc_get_timebase: (libc)PowerPC.
|
||
* __ppc_get_timebase_freq: (libc)PowerPC.
|
||
* __ppc_mdoio: (libc)PowerPC.
|
||
* __ppc_mdoom: (libc)PowerPC.
|
||
* __ppc_set_ppr_low: (libc)PowerPC.
|
||
* __ppc_set_ppr_med: (libc)PowerPC.
|
||
* __ppc_set_ppr_med_high: (libc)PowerPC.
|
||
* __ppc_set_ppr_med_low: (libc)PowerPC.
|
||
* __ppc_set_ppr_very_low: (libc)PowerPC.
|
||
* __ppc_yield: (libc)PowerPC.
|
||
* __va_copy: (libc)Argument Macros.
|
||
* _exit: (libc)Termination Internals.
|
||
* _flushlbf: (libc)Flushing Buffers.
|
||
* _tolower: (libc)Case Conversion.
|
||
* _toupper: (libc)Case Conversion.
|
||
* a64l: (libc)Encode Binary Data.
|
||
* abort: (libc)Aborting a Program.
|
||
* abs: (libc)Absolute Value.
|
||
* accept: (libc)Accepting Connections.
|
||
* access: (libc)Testing File Access.
|
||
* acos: (libc)Inverse Trig Functions.
|
||
* acosf: (libc)Inverse Trig Functions.
|
||
* acosh: (libc)Hyperbolic Functions.
|
||
* acoshf: (libc)Hyperbolic Functions.
|
||
* acoshl: (libc)Hyperbolic Functions.
|
||
* acosl: (libc)Inverse Trig Functions.
|
||
* addmntent: (libc)mtab.
|
||
* addseverity: (libc)Adding Severity Classes.
|
||
* adjtime: (libc)High-Resolution Calendar.
|
||
* adjtimex: (libc)High-Resolution Calendar.
|
||
* aio_cancel64: (libc)Cancel AIO Operations.
|
||
* aio_cancel: (libc)Cancel AIO Operations.
|
||
* aio_error64: (libc)Status of AIO Operations.
|
||
* aio_error: (libc)Status of AIO Operations.
|
||
* aio_fsync64: (libc)Synchronizing AIO Operations.
|
||
* aio_fsync: (libc)Synchronizing AIO Operations.
|
||
* aio_init: (libc)Configuration of AIO.
|
||
* aio_read64: (libc)Asynchronous Reads/Writes.
|
||
* aio_read: (libc)Asynchronous Reads/Writes.
|
||
* aio_return64: (libc)Status of AIO Operations.
|
||
* aio_return: (libc)Status of AIO Operations.
|
||
* aio_suspend64: (libc)Synchronizing AIO Operations.
|
||
* aio_suspend: (libc)Synchronizing AIO Operations.
|
||
* aio_write64: (libc)Asynchronous Reads/Writes.
|
||
* aio_write: (libc)Asynchronous Reads/Writes.
|
||
* alarm: (libc)Setting an Alarm.
|
||
* aligned_alloc: (libc)Aligned Memory Blocks.
|
||
* alloca: (libc)Variable Size Automatic.
|
||
* alphasort64: (libc)Scanning Directory Content.
|
||
* alphasort: (libc)Scanning Directory Content.
|
||
* argp_error: (libc)Argp Helper Functions.
|
||
* argp_failure: (libc)Argp Helper Functions.
|
||
* argp_help: (libc)Argp Help.
|
||
* argp_parse: (libc)Argp.
|
||
* argp_state_help: (libc)Argp Helper Functions.
|
||
* argp_usage: (libc)Argp Helper Functions.
|
||
* argz_add: (libc)Argz Functions.
|
||
* argz_add_sep: (libc)Argz Functions.
|
||
* argz_append: (libc)Argz Functions.
|
||
* argz_count: (libc)Argz Functions.
|
||
* argz_create: (libc)Argz Functions.
|
||
* argz_create_sep: (libc)Argz Functions.
|
||
* argz_delete: (libc)Argz Functions.
|
||
* argz_extract: (libc)Argz Functions.
|
||
* argz_insert: (libc)Argz Functions.
|
||
* argz_next: (libc)Argz Functions.
|
||
* argz_replace: (libc)Argz Functions.
|
||
* argz_stringify: (libc)Argz Functions.
|
||
* asctime: (libc)Formatting Calendar Time.
|
||
* asctime_r: (libc)Formatting Calendar Time.
|
||
* asin: (libc)Inverse Trig Functions.
|
||
* asinf: (libc)Inverse Trig Functions.
|
||
* asinh: (libc)Hyperbolic Functions.
|
||
* asinhf: (libc)Hyperbolic Functions.
|
||
* asinhl: (libc)Hyperbolic Functions.
|
||
* asinl: (libc)Inverse Trig Functions.
|
||
* asprintf: (libc)Dynamic Output.
|
||
* assert: (libc)Consistency Checking.
|
||
* assert_perror: (libc)Consistency Checking.
|
||
* atan2: (libc)Inverse Trig Functions.
|
||
* atan2f: (libc)Inverse Trig Functions.
|
||
* atan2l: (libc)Inverse Trig Functions.
|
||
* atan: (libc)Inverse Trig Functions.
|
||
* atanf: (libc)Inverse Trig Functions.
|
||
* atanh: (libc)Hyperbolic Functions.
|
||
* atanhf: (libc)Hyperbolic Functions.
|
||
* atanhl: (libc)Hyperbolic Functions.
|
||
* atanl: (libc)Inverse Trig Functions.
|
||
* atexit: (libc)Cleanups on Exit.
|
||
* atof: (libc)Parsing of Floats.
|
||
* atoi: (libc)Parsing of Integers.
|
||
* atol: (libc)Parsing of Integers.
|
||
* atoll: (libc)Parsing of Integers.
|
||
* backtrace: (libc)Backtraces.
|
||
* backtrace_symbols: (libc)Backtraces.
|
||
* backtrace_symbols_fd: (libc)Backtraces.
|
||
* basename: (libc)Finding Tokens in a String.
|
||
* basename: (libc)Finding Tokens in a String.
|
||
* bcmp: (libc)String/Array Comparison.
|
||
* bcopy: (libc)Copying Strings and Arrays.
|
||
* bind: (libc)Setting Address.
|
||
* bind_textdomain_codeset: (libc)Charset conversion in gettext.
|
||
* bindtextdomain: (libc)Locating gettext catalog.
|
||
* brk: (libc)Resizing the Data Segment.
|
||
* bsearch: (libc)Array Search Function.
|
||
* btowc: (libc)Converting a Character.
|
||
* bzero: (libc)Copying Strings and Arrays.
|
||
* cabs: (libc)Absolute Value.
|
||
* cabsf: (libc)Absolute Value.
|
||
* cabsl: (libc)Absolute Value.
|
||
* cacos: (libc)Inverse Trig Functions.
|
||
* cacosf: (libc)Inverse Trig Functions.
|
||
* cacosh: (libc)Hyperbolic Functions.
|
||
* cacoshf: (libc)Hyperbolic Functions.
|
||
* cacoshl: (libc)Hyperbolic Functions.
|
||
* cacosl: (libc)Inverse Trig Functions.
|
||
* calloc: (libc)Allocating Cleared Space.
|
||
* canonicalize_file_name: (libc)Symbolic Links.
|
||
* carg: (libc)Operations on Complex.
|
||
* cargf: (libc)Operations on Complex.
|
||
* cargl: (libc)Operations on Complex.
|
||
* casin: (libc)Inverse Trig Functions.
|
||
* casinf: (libc)Inverse Trig Functions.
|
||
* casinh: (libc)Hyperbolic Functions.
|
||
* casinhf: (libc)Hyperbolic Functions.
|
||
* casinhl: (libc)Hyperbolic Functions.
|
||
* casinl: (libc)Inverse Trig Functions.
|
||
* catan: (libc)Inverse Trig Functions.
|
||
* catanf: (libc)Inverse Trig Functions.
|
||
* catanh: (libc)Hyperbolic Functions.
|
||
* catanhf: (libc)Hyperbolic Functions.
|
||
* catanhl: (libc)Hyperbolic Functions.
|
||
* catanl: (libc)Inverse Trig Functions.
|
||
* catclose: (libc)The catgets Functions.
|
||
* catgets: (libc)The catgets Functions.
|
||
* catopen: (libc)The catgets Functions.
|
||
* cbc_crypt: (libc)DES Encryption.
|
||
* cbrt: (libc)Exponents and Logarithms.
|
||
* cbrtf: (libc)Exponents and Logarithms.
|
||
* cbrtl: (libc)Exponents and Logarithms.
|
||
* ccos: (libc)Trig Functions.
|
||
* ccosf: (libc)Trig Functions.
|
||
* ccosh: (libc)Hyperbolic Functions.
|
||
* ccoshf: (libc)Hyperbolic Functions.
|
||
* ccoshl: (libc)Hyperbolic Functions.
|
||
* ccosl: (libc)Trig Functions.
|
||
* ceil: (libc)Rounding Functions.
|
||
* ceilf: (libc)Rounding Functions.
|
||
* ceill: (libc)Rounding Functions.
|
||
* cexp: (libc)Exponents and Logarithms.
|
||
* cexpf: (libc)Exponents and Logarithms.
|
||
* cexpl: (libc)Exponents and Logarithms.
|
||
* cfgetispeed: (libc)Line Speed.
|
||
* cfgetospeed: (libc)Line Speed.
|
||
* cfmakeraw: (libc)Noncanonical Input.
|
||
* cfree: (libc)Freeing after Malloc.
|
||
* cfsetispeed: (libc)Line Speed.
|
||
* cfsetospeed: (libc)Line Speed.
|
||
* cfsetspeed: (libc)Line Speed.
|
||
* chdir: (libc)Working Directory.
|
||
* chmod: (libc)Setting Permissions.
|
||
* chown: (libc)File Owner.
|
||
* cimag: (libc)Operations on Complex.
|
||
* cimagf: (libc)Operations on Complex.
|
||
* cimagl: (libc)Operations on Complex.
|
||
* clearenv: (libc)Environment Access.
|
||
* clearerr: (libc)Error Recovery.
|
||
* clearerr_unlocked: (libc)Error Recovery.
|
||
* clock: (libc)CPU Time.
|
||
* clog10: (libc)Exponents and Logarithms.
|
||
* clog10f: (libc)Exponents and Logarithms.
|
||
* clog10l: (libc)Exponents and Logarithms.
|
||
* clog: (libc)Exponents and Logarithms.
|
||
* clogf: (libc)Exponents and Logarithms.
|
||
* clogl: (libc)Exponents and Logarithms.
|
||
* close: (libc)Opening and Closing Files.
|
||
* closedir: (libc)Reading/Closing Directory.
|
||
* closelog: (libc)closelog.
|
||
* confstr: (libc)String Parameters.
|
||
* conj: (libc)Operations on Complex.
|
||
* conjf: (libc)Operations on Complex.
|
||
* conjl: (libc)Operations on Complex.
|
||
* connect: (libc)Connecting.
|
||
* copysign: (libc)FP Bit Twiddling.
|
||
* copysignf: (libc)FP Bit Twiddling.
|
||
* copysignl: (libc)FP Bit Twiddling.
|
||
* cos: (libc)Trig Functions.
|
||
* cosf: (libc)Trig Functions.
|
||
* cosh: (libc)Hyperbolic Functions.
|
||
* coshf: (libc)Hyperbolic Functions.
|
||
* coshl: (libc)Hyperbolic Functions.
|
||
* cosl: (libc)Trig Functions.
|
||
* cpow: (libc)Exponents and Logarithms.
|
||
* cpowf: (libc)Exponents and Logarithms.
|
||
* cpowl: (libc)Exponents and Logarithms.
|
||
* cproj: (libc)Operations on Complex.
|
||
* cprojf: (libc)Operations on Complex.
|
||
* cprojl: (libc)Operations on Complex.
|
||
* creal: (libc)Operations on Complex.
|
||
* crealf: (libc)Operations on Complex.
|
||
* creall: (libc)Operations on Complex.
|
||
* creat64: (libc)Opening and Closing Files.
|
||
* creat: (libc)Opening and Closing Files.
|
||
* crypt: (libc)crypt.
|
||
* crypt_r: (libc)crypt.
|
||
* csin: (libc)Trig Functions.
|
||
* csinf: (libc)Trig Functions.
|
||
* csinh: (libc)Hyperbolic Functions.
|
||
* csinhf: (libc)Hyperbolic Functions.
|
||
* csinhl: (libc)Hyperbolic Functions.
|
||
* csinl: (libc)Trig Functions.
|
||
* csqrt: (libc)Exponents and Logarithms.
|
||
* csqrtf: (libc)Exponents and Logarithms.
|
||
* csqrtl: (libc)Exponents and Logarithms.
|
||
* ctan: (libc)Trig Functions.
|
||
* ctanf: (libc)Trig Functions.
|
||
* ctanh: (libc)Hyperbolic Functions.
|
||
* ctanhf: (libc)Hyperbolic Functions.
|
||
* ctanhl: (libc)Hyperbolic Functions.
|
||
* ctanl: (libc)Trig Functions.
|
||
* ctermid: (libc)Identifying the Terminal.
|
||
* ctime: (libc)Formatting Calendar Time.
|
||
* ctime_r: (libc)Formatting Calendar Time.
|
||
* cuserid: (libc)Who Logged In.
|
||
* dcgettext: (libc)Translation with gettext.
|
||
* dcngettext: (libc)Advanced gettext functions.
|
||
* des_setparity: (libc)DES Encryption.
|
||
* dgettext: (libc)Translation with gettext.
|
||
* difftime: (libc)Elapsed Time.
|
||
* dirfd: (libc)Opening a Directory.
|
||
* dirname: (libc)Finding Tokens in a String.
|
||
* div: (libc)Integer Division.
|
||
* dngettext: (libc)Advanced gettext functions.
|
||
* drand48: (libc)SVID Random.
|
||
* drand48_r: (libc)SVID Random.
|
||
* drem: (libc)Remainder Functions.
|
||
* dremf: (libc)Remainder Functions.
|
||
* dreml: (libc)Remainder Functions.
|
||
* dup2: (libc)Duplicating Descriptors.
|
||
* dup: (libc)Duplicating Descriptors.
|
||
* ecb_crypt: (libc)DES Encryption.
|
||
* ecvt: (libc)System V Number Conversion.
|
||
* ecvt_r: (libc)System V Number Conversion.
|
||
* encrypt: (libc)DES Encryption.
|
||
* encrypt_r: (libc)DES Encryption.
|
||
* endfsent: (libc)fstab.
|
||
* endgrent: (libc)Scanning All Groups.
|
||
* endhostent: (libc)Host Names.
|
||
* endmntent: (libc)mtab.
|
||
* endnetent: (libc)Networks Database.
|
||
* endnetgrent: (libc)Lookup Netgroup.
|
||
* endprotoent: (libc)Protocols Database.
|
||
* endpwent: (libc)Scanning All Users.
|
||
* endservent: (libc)Services Database.
|
||
* endutent: (libc)Manipulating the Database.
|
||
* endutxent: (libc)XPG Functions.
|
||
* envz_add: (libc)Envz Functions.
|
||
* envz_entry: (libc)Envz Functions.
|
||
* envz_get: (libc)Envz Functions.
|
||
* envz_merge: (libc)Envz Functions.
|
||
* envz_remove: (libc)Envz Functions.
|
||
* envz_strip: (libc)Envz Functions.
|
||
* erand48: (libc)SVID Random.
|
||
* erand48_r: (libc)SVID Random.
|
||
* erf: (libc)Special Functions.
|
||
* erfc: (libc)Special Functions.
|
||
* erfcf: (libc)Special Functions.
|
||
* erfcl: (libc)Special Functions.
|
||
* erff: (libc)Special Functions.
|
||
* erfl: (libc)Special Functions.
|
||
* err: (libc)Error Messages.
|
||
* errno: (libc)Checking for Errors.
|
||
* error: (libc)Error Messages.
|
||
* error_at_line: (libc)Error Messages.
|
||
* errx: (libc)Error Messages.
|
||
* execl: (libc)Executing a File.
|
||
* execle: (libc)Executing a File.
|
||
* execlp: (libc)Executing a File.
|
||
* execv: (libc)Executing a File.
|
||
* execve: (libc)Executing a File.
|
||
* execvp: (libc)Executing a File.
|
||
* exit: (libc)Normal Termination.
|
||
* exp10: (libc)Exponents and Logarithms.
|
||
* exp10f: (libc)Exponents and Logarithms.
|
||
* exp10l: (libc)Exponents and Logarithms.
|
||
* exp2: (libc)Exponents and Logarithms.
|
||
* exp2f: (libc)Exponents and Logarithms.
|
||
* exp2l: (libc)Exponents and Logarithms.
|
||
* exp: (libc)Exponents and Logarithms.
|
||
* expf: (libc)Exponents and Logarithms.
|
||
* expl: (libc)Exponents and Logarithms.
|
||
* expm1: (libc)Exponents and Logarithms.
|
||
* expm1f: (libc)Exponents and Logarithms.
|
||
* expm1l: (libc)Exponents and Logarithms.
|
||
* fabs: (libc)Absolute Value.
|
||
* fabsf: (libc)Absolute Value.
|
||
* fabsl: (libc)Absolute Value.
|
||
* fchdir: (libc)Working Directory.
|
||
* fchmod: (libc)Setting Permissions.
|
||
* fchown: (libc)File Owner.
|
||
* fclose: (libc)Closing Streams.
|
||
* fcloseall: (libc)Closing Streams.
|
||
* fcntl: (libc)Control Operations.
|
||
* fcvt: (libc)System V Number Conversion.
|
||
* fcvt_r: (libc)System V Number Conversion.
|
||
* fdatasync: (libc)Synchronizing I/O.
|
||
* fdim: (libc)Misc FP Arithmetic.
|
||
* fdimf: (libc)Misc FP Arithmetic.
|
||
* fdiml: (libc)Misc FP Arithmetic.
|
||
* fdopen: (libc)Descriptors and Streams.
|
||
* fdopendir: (libc)Opening a Directory.
|
||
* feclearexcept: (libc)Status bit operations.
|
||
* fedisableexcept: (libc)Control Functions.
|
||
* feenableexcept: (libc)Control Functions.
|
||
* fegetenv: (libc)Control Functions.
|
||
* fegetexcept: (libc)Control Functions.
|
||
* fegetexceptflag: (libc)Status bit operations.
|
||
* fegetround: (libc)Rounding.
|
||
* feholdexcept: (libc)Control Functions.
|
||
* feof: (libc)EOF and Errors.
|
||
* feof_unlocked: (libc)EOF and Errors.
|
||
* feraiseexcept: (libc)Status bit operations.
|
||
* ferror: (libc)EOF and Errors.
|
||
* ferror_unlocked: (libc)EOF and Errors.
|
||
* fesetenv: (libc)Control Functions.
|
||
* fesetexceptflag: (libc)Status bit operations.
|
||
* fesetround: (libc)Rounding.
|
||
* fetestexcept: (libc)Status bit operations.
|
||
* feupdateenv: (libc)Control Functions.
|
||
* fflush: (libc)Flushing Buffers.
|
||
* fflush_unlocked: (libc)Flushing Buffers.
|
||
* fgetc: (libc)Character Input.
|
||
* fgetc_unlocked: (libc)Character Input.
|
||
* fgetgrent: (libc)Scanning All Groups.
|
||
* fgetgrent_r: (libc)Scanning All Groups.
|
||
* fgetpos64: (libc)Portable Positioning.
|
||
* fgetpos: (libc)Portable Positioning.
|
||
* fgetpwent: (libc)Scanning All Users.
|
||
* fgetpwent_r: (libc)Scanning All Users.
|
||
* fgets: (libc)Line Input.
|
||
* fgets_unlocked: (libc)Line Input.
|
||
* fgetwc: (libc)Character Input.
|
||
* fgetwc_unlocked: (libc)Character Input.
|
||
* fgetws: (libc)Line Input.
|
||
* fgetws_unlocked: (libc)Line Input.
|
||
* fileno: (libc)Descriptors and Streams.
|
||
* fileno_unlocked: (libc)Descriptors and Streams.
|
||
* finite: (libc)Floating Point Classes.
|
||
* finitef: (libc)Floating Point Classes.
|
||
* finitel: (libc)Floating Point Classes.
|
||
* flockfile: (libc)Streams and Threads.
|
||
* floor: (libc)Rounding Functions.
|
||
* floorf: (libc)Rounding Functions.
|
||
* floorl: (libc)Rounding Functions.
|
||
* fma: (libc)Misc FP Arithmetic.
|
||
* fmaf: (libc)Misc FP Arithmetic.
|
||
* fmal: (libc)Misc FP Arithmetic.
|
||
* fmax: (libc)Misc FP Arithmetic.
|
||
* fmaxf: (libc)Misc FP Arithmetic.
|
||
* fmaxl: (libc)Misc FP Arithmetic.
|
||
* fmemopen: (libc)String Streams.
|
||
* fmin: (libc)Misc FP Arithmetic.
|
||
* fminf: (libc)Misc FP Arithmetic.
|
||
* fminl: (libc)Misc FP Arithmetic.
|
||
* fmod: (libc)Remainder Functions.
|
||
* fmodf: (libc)Remainder Functions.
|
||
* fmodl: (libc)Remainder Functions.
|
||
* fmtmsg: (libc)Printing Formatted Messages.
|
||
* fnmatch: (libc)Wildcard Matching.
|
||
* fopen64: (libc)Opening Streams.
|
||
* fopen: (libc)Opening Streams.
|
||
* fopencookie: (libc)Streams and Cookies.
|
||
* fork: (libc)Creating a Process.
|
||
* forkpty: (libc)Pseudo-Terminal Pairs.
|
||
* fpathconf: (libc)Pathconf.
|
||
* fpclassify: (libc)Floating Point Classes.
|
||
* fprintf: (libc)Formatted Output Functions.
|
||
* fputc: (libc)Simple Output.
|
||
* fputc_unlocked: (libc)Simple Output.
|
||
* fputs: (libc)Simple Output.
|
||
* fputs_unlocked: (libc)Simple Output.
|
||
* fputwc: (libc)Simple Output.
|
||
* fputwc_unlocked: (libc)Simple Output.
|
||
* fputws: (libc)Simple Output.
|
||
* fputws_unlocked: (libc)Simple Output.
|
||
* fread: (libc)Block Input/Output.
|
||
* fread_unlocked: (libc)Block Input/Output.
|
||
* free: (libc)Freeing after Malloc.
|
||
* freopen64: (libc)Opening Streams.
|
||
* freopen: (libc)Opening Streams.
|
||
* frexp: (libc)Normalization Functions.
|
||
* frexpf: (libc)Normalization Functions.
|
||
* frexpl: (libc)Normalization Functions.
|
||
* fscanf: (libc)Formatted Input Functions.
|
||
* fseek: (libc)File Positioning.
|
||
* fseeko64: (libc)File Positioning.
|
||
* fseeko: (libc)File Positioning.
|
||
* fsetpos64: (libc)Portable Positioning.
|
||
* fsetpos: (libc)Portable Positioning.
|
||
* fstat64: (libc)Reading Attributes.
|
||
* fstat: (libc)Reading Attributes.
|
||
* fsync: (libc)Synchronizing I/O.
|
||
* ftell: (libc)File Positioning.
|
||
* ftello64: (libc)File Positioning.
|
||
* ftello: (libc)File Positioning.
|
||
* ftruncate64: (libc)File Size.
|
||
* ftruncate: (libc)File Size.
|
||
* ftrylockfile: (libc)Streams and Threads.
|
||
* ftw64: (libc)Working with Directory Trees.
|
||
* ftw: (libc)Working with Directory Trees.
|
||
* funlockfile: (libc)Streams and Threads.
|
||
* futimes: (libc)File Times.
|
||
* fwide: (libc)Streams and I18N.
|
||
* fwprintf: (libc)Formatted Output Functions.
|
||
* fwrite: (libc)Block Input/Output.
|
||
* fwrite_unlocked: (libc)Block Input/Output.
|
||
* fwscanf: (libc)Formatted Input Functions.
|
||
* gamma: (libc)Special Functions.
|
||
* gammaf: (libc)Special Functions.
|
||
* gammal: (libc)Special Functions.
|
||
* gcvt: (libc)System V Number Conversion.
|
||
* get_avphys_pages: (libc)Query Memory Parameters.
|
||
* get_current_dir_name: (libc)Working Directory.
|
||
* get_nprocs: (libc)Processor Resources.
|
||
* get_nprocs_conf: (libc)Processor Resources.
|
||
* get_phys_pages: (libc)Query Memory Parameters.
|
||
* getauxval: (libc)Auxiliary Vector.
|
||
* getc: (libc)Character Input.
|
||
* getc_unlocked: (libc)Character Input.
|
||
* getchar: (libc)Character Input.
|
||
* getchar_unlocked: (libc)Character Input.
|
||
* getcontext: (libc)System V contexts.
|
||
* getcwd: (libc)Working Directory.
|
||
* getdate: (libc)General Time String Parsing.
|
||
* getdate_r: (libc)General Time String Parsing.
|
||
* getdelim: (libc)Line Input.
|
||
* getdomainnname: (libc)Host Identification.
|
||
* getegid: (libc)Reading Persona.
|
||
* getenv: (libc)Environment Access.
|
||
* geteuid: (libc)Reading Persona.
|
||
* getfsent: (libc)fstab.
|
||
* getfsfile: (libc)fstab.
|
||
* getfsspec: (libc)fstab.
|
||
* getgid: (libc)Reading Persona.
|
||
* getgrent: (libc)Scanning All Groups.
|
||
* getgrent_r: (libc)Scanning All Groups.
|
||
* getgrgid: (libc)Lookup Group.
|
||
* getgrgid_r: (libc)Lookup Group.
|
||
* getgrnam: (libc)Lookup Group.
|
||
* getgrnam_r: (libc)Lookup Group.
|
||
* getgrouplist: (libc)Setting Groups.
|
||
* getgroups: (libc)Reading Persona.
|
||
* gethostbyaddr: (libc)Host Names.
|
||
* gethostbyaddr_r: (libc)Host Names.
|
||
* gethostbyname2: (libc)Host Names.
|
||
* gethostbyname2_r: (libc)Host Names.
|
||
* gethostbyname: (libc)Host Names.
|
||
* gethostbyname_r: (libc)Host Names.
|
||
* gethostent: (libc)Host Names.
|
||
* gethostid: (libc)Host Identification.
|
||
* gethostname: (libc)Host Identification.
|
||
* getitimer: (libc)Setting an Alarm.
|
||
* getline: (libc)Line Input.
|
||
* getloadavg: (libc)Processor Resources.
|
||
* getlogin: (libc)Who Logged In.
|
||
* getmntent: (libc)mtab.
|
||
* getmntent_r: (libc)mtab.
|
||
* getnetbyaddr: (libc)Networks Database.
|
||
* getnetbyname: (libc)Networks Database.
|
||
* getnetent: (libc)Networks Database.
|
||
* getnetgrent: (libc)Lookup Netgroup.
|
||
* getnetgrent_r: (libc)Lookup Netgroup.
|
||
* getopt: (libc)Using Getopt.
|
||
* getopt_long: (libc)Getopt Long Options.
|
||
* getopt_long_only: (libc)Getopt Long Options.
|
||
* getpagesize: (libc)Query Memory Parameters.
|
||
* getpass: (libc)getpass.
|
||
* getpeername: (libc)Who is Connected.
|
||
* getpgid: (libc)Process Group Functions.
|
||
* getpgrp: (libc)Process Group Functions.
|
||
* getpid: (libc)Process Identification.
|
||
* getppid: (libc)Process Identification.
|
||
* getpriority: (libc)Traditional Scheduling Functions.
|
||
* getprotobyname: (libc)Protocols Database.
|
||
* getprotobynumber: (libc)Protocols Database.
|
||
* getprotoent: (libc)Protocols Database.
|
||
* getpt: (libc)Allocation.
|
||
* getpwent: (libc)Scanning All Users.
|
||
* getpwent_r: (libc)Scanning All Users.
|
||
* getpwnam: (libc)Lookup User.
|
||
* getpwnam_r: (libc)Lookup User.
|
||
* getpwuid: (libc)Lookup User.
|
||
* getpwuid_r: (libc)Lookup User.
|
||
* getrlimit64: (libc)Limits on Resources.
|
||
* getrlimit: (libc)Limits on Resources.
|
||
* getrusage: (libc)Resource Usage.
|
||
* gets: (libc)Line Input.
|
||
* getservbyname: (libc)Services Database.
|
||
* getservbyport: (libc)Services Database.
|
||
* getservent: (libc)Services Database.
|
||
* getsid: (libc)Process Group Functions.
|
||
* getsockname: (libc)Reading Address.
|
||
* getsockopt: (libc)Socket Option Functions.
|
||
* getsubopt: (libc)Suboptions.
|
||
* gettext: (libc)Translation with gettext.
|
||
* gettimeofday: (libc)High-Resolution Calendar.
|
||
* getuid: (libc)Reading Persona.
|
||
* getumask: (libc)Setting Permissions.
|
||
* getutent: (libc)Manipulating the Database.
|
||
* getutent_r: (libc)Manipulating the Database.
|
||
* getutid: (libc)Manipulating the Database.
|
||
* getutid_r: (libc)Manipulating the Database.
|
||
* getutline: (libc)Manipulating the Database.
|
||
* getutline_r: (libc)Manipulating the Database.
|
||
* getutmp: (libc)XPG Functions.
|
||
* getutmpx: (libc)XPG Functions.
|
||
* getutxent: (libc)XPG Functions.
|
||
* getutxid: (libc)XPG Functions.
|
||
* getutxline: (libc)XPG Functions.
|
||
* getw: (libc)Character Input.
|
||
* getwc: (libc)Character Input.
|
||
* getwc_unlocked: (libc)Character Input.
|
||
* getwchar: (libc)Character Input.
|
||
* getwchar_unlocked: (libc)Character Input.
|
||
* getwd: (libc)Working Directory.
|
||
* glob64: (libc)Calling Glob.
|
||
* glob: (libc)Calling Glob.
|
||
* globfree64: (libc)More Flags for Globbing.
|
||
* globfree: (libc)More Flags for Globbing.
|
||
* gmtime: (libc)Broken-down Time.
|
||
* gmtime_r: (libc)Broken-down Time.
|
||
* grantpt: (libc)Allocation.
|
||
* gsignal: (libc)Signaling Yourself.
|
||
* gtty: (libc)BSD Terminal Modes.
|
||
* hasmntopt: (libc)mtab.
|
||
* hcreate: (libc)Hash Search Function.
|
||
* hcreate_r: (libc)Hash Search Function.
|
||
* hdestroy: (libc)Hash Search Function.
|
||
* hdestroy_r: (libc)Hash Search Function.
|
||
* hsearch: (libc)Hash Search Function.
|
||
* hsearch_r: (libc)Hash Search Function.
|
||
* htonl: (libc)Byte Order.
|
||
* htons: (libc)Byte Order.
|
||
* hypot: (libc)Exponents and Logarithms.
|
||
* hypotf: (libc)Exponents and Logarithms.
|
||
* hypotl: (libc)Exponents and Logarithms.
|
||
* iconv: (libc)Generic Conversion Interface.
|
||
* iconv_close: (libc)Generic Conversion Interface.
|
||
* iconv_open: (libc)Generic Conversion Interface.
|
||
* if_freenameindex: (libc)Interface Naming.
|
||
* if_indextoname: (libc)Interface Naming.
|
||
* if_nameindex: (libc)Interface Naming.
|
||
* if_nametoindex: (libc)Interface Naming.
|
||
* ilogb: (libc)Exponents and Logarithms.
|
||
* ilogbf: (libc)Exponents and Logarithms.
|
||
* ilogbl: (libc)Exponents and Logarithms.
|
||
* imaxabs: (libc)Absolute Value.
|
||
* imaxdiv: (libc)Integer Division.
|
||
* in6addr_any: (libc)Host Address Data Type.
|
||
* in6addr_loopback: (libc)Host Address Data Type.
|
||
* index: (libc)Search Functions.
|
||
* inet_addr: (libc)Host Address Functions.
|
||
* inet_aton: (libc)Host Address Functions.
|
||
* inet_lnaof: (libc)Host Address Functions.
|
||
* inet_makeaddr: (libc)Host Address Functions.
|
||
* inet_netof: (libc)Host Address Functions.
|
||
* inet_network: (libc)Host Address Functions.
|
||
* inet_ntoa: (libc)Host Address Functions.
|
||
* inet_ntop: (libc)Host Address Functions.
|
||
* inet_pton: (libc)Host Address Functions.
|
||
* initgroups: (libc)Setting Groups.
|
||
* initstate: (libc)BSD Random.
|
||
* initstate_r: (libc)BSD Random.
|
||
* innetgr: (libc)Netgroup Membership.
|
||
* ioctl: (libc)IOCTLs.
|
||
* isalnum: (libc)Classification of Characters.
|
||
* isalpha: (libc)Classification of Characters.
|
||
* isascii: (libc)Classification of Characters.
|
||
* isatty: (libc)Is It a Terminal.
|
||
* isblank: (libc)Classification of Characters.
|
||
* iscntrl: (libc)Classification of Characters.
|
||
* isdigit: (libc)Classification of Characters.
|
||
* isfinite: (libc)Floating Point Classes.
|
||
* isgraph: (libc)Classification of Characters.
|
||
* isgreater: (libc)FP Comparison Functions.
|
||
* isgreaterequal: (libc)FP Comparison Functions.
|
||
* isinf: (libc)Floating Point Classes.
|
||
* isinff: (libc)Floating Point Classes.
|
||
* isinfl: (libc)Floating Point Classes.
|
||
* isless: (libc)FP Comparison Functions.
|
||
* islessequal: (libc)FP Comparison Functions.
|
||
* islessgreater: (libc)FP Comparison Functions.
|
||
* islower: (libc)Classification of Characters.
|
||
* isnan: (libc)Floating Point Classes.
|
||
* isnan: (libc)Floating Point Classes.
|
||
* isnanf: (libc)Floating Point Classes.
|
||
* isnanl: (libc)Floating Point Classes.
|
||
* isnormal: (libc)Floating Point Classes.
|
||
* isprint: (libc)Classification of Characters.
|
||
* ispunct: (libc)Classification of Characters.
|
||
* issignaling: (libc)Floating Point Classes.
|
||
* isspace: (libc)Classification of Characters.
|
||
* isunordered: (libc)FP Comparison Functions.
|
||
* isupper: (libc)Classification of Characters.
|
||
* iswalnum: (libc)Classification of Wide Characters.
|
||
* iswalpha: (libc)Classification of Wide Characters.
|
||
* iswblank: (libc)Classification of Wide Characters.
|
||
* iswcntrl: (libc)Classification of Wide Characters.
|
||
* iswctype: (libc)Classification of Wide Characters.
|
||
* iswdigit: (libc)Classification of Wide Characters.
|
||
* iswgraph: (libc)Classification of Wide Characters.
|
||
* iswlower: (libc)Classification of Wide Characters.
|
||
* iswprint: (libc)Classification of Wide Characters.
|
||
* iswpunct: (libc)Classification of Wide Characters.
|
||
* iswspace: (libc)Classification of Wide Characters.
|
||
* iswupper: (libc)Classification of Wide Characters.
|
||
* iswxdigit: (libc)Classification of Wide Characters.
|
||
* isxdigit: (libc)Classification of Characters.
|
||
* j0: (libc)Special Functions.
|
||
* j0f: (libc)Special Functions.
|
||
* j0l: (libc)Special Functions.
|
||
* j1: (libc)Special Functions.
|
||
* j1f: (libc)Special Functions.
|
||
* j1l: (libc)Special Functions.
|
||
* jn: (libc)Special Functions.
|
||
* jnf: (libc)Special Functions.
|
||
* jnl: (libc)Special Functions.
|
||
* jrand48: (libc)SVID Random.
|
||
* jrand48_r: (libc)SVID Random.
|
||
* kill: (libc)Signaling Another Process.
|
||
* killpg: (libc)Signaling Another Process.
|
||
* l64a: (libc)Encode Binary Data.
|
||
* labs: (libc)Absolute Value.
|
||
* lcong48: (libc)SVID Random.
|
||
* lcong48_r: (libc)SVID Random.
|
||
* ldexp: (libc)Normalization Functions.
|
||
* ldexpf: (libc)Normalization Functions.
|
||
* ldexpl: (libc)Normalization Functions.
|
||
* ldiv: (libc)Integer Division.
|
||
* lfind: (libc)Array Search Function.
|
||
* lgamma: (libc)Special Functions.
|
||
* lgamma_r: (libc)Special Functions.
|
||
* lgammaf: (libc)Special Functions.
|
||
* lgammaf_r: (libc)Special Functions.
|
||
* lgammal: (libc)Special Functions.
|
||
* lgammal_r: (libc)Special Functions.
|
||
* link: (libc)Hard Links.
|
||
* lio_listio64: (libc)Asynchronous Reads/Writes.
|
||
* lio_listio: (libc)Asynchronous Reads/Writes.
|
||
* listen: (libc)Listening.
|
||
* llabs: (libc)Absolute Value.
|
||
* lldiv: (libc)Integer Division.
|
||
* llrint: (libc)Rounding Functions.
|
||
* llrintf: (libc)Rounding Functions.
|
||
* llrintl: (libc)Rounding Functions.
|
||
* llround: (libc)Rounding Functions.
|
||
* llroundf: (libc)Rounding Functions.
|
||
* llroundl: (libc)Rounding Functions.
|
||
* localeconv: (libc)The Lame Way to Locale Data.
|
||
* localtime: (libc)Broken-down Time.
|
||
* localtime_r: (libc)Broken-down Time.
|
||
* log10: (libc)Exponents and Logarithms.
|
||
* log10f: (libc)Exponents and Logarithms.
|
||
* log10l: (libc)Exponents and Logarithms.
|
||
* log1p: (libc)Exponents and Logarithms.
|
||
* log1pf: (libc)Exponents and Logarithms.
|
||
* log1pl: (libc)Exponents and Logarithms.
|
||
* log2: (libc)Exponents and Logarithms.
|
||
* log2f: (libc)Exponents and Logarithms.
|
||
* log2l: (libc)Exponents and Logarithms.
|
||
* log: (libc)Exponents and Logarithms.
|
||
* logb: (libc)Exponents and Logarithms.
|
||
* logbf: (libc)Exponents and Logarithms.
|
||
* logbl: (libc)Exponents and Logarithms.
|
||
* logf: (libc)Exponents and Logarithms.
|
||
* login: (libc)Logging In and Out.
|
||
* login_tty: (libc)Logging In and Out.
|
||
* logl: (libc)Exponents and Logarithms.
|
||
* logout: (libc)Logging In and Out.
|
||
* logwtmp: (libc)Logging In and Out.
|
||
* longjmp: (libc)Non-Local Details.
|
||
* lrand48: (libc)SVID Random.
|
||
* lrand48_r: (libc)SVID Random.
|
||
* lrint: (libc)Rounding Functions.
|
||
* lrintf: (libc)Rounding Functions.
|
||
* lrintl: (libc)Rounding Functions.
|
||
* lround: (libc)Rounding Functions.
|
||
* lroundf: (libc)Rounding Functions.
|
||
* lroundl: (libc)Rounding Functions.
|
||
* lsearch: (libc)Array Search Function.
|
||
* lseek64: (libc)File Position Primitive.
|
||
* lseek: (libc)File Position Primitive.
|
||
* lstat64: (libc)Reading Attributes.
|
||
* lstat: (libc)Reading Attributes.
|
||
* lutimes: (libc)File Times.
|
||
* madvise: (libc)Memory-mapped I/O.
|
||
* makecontext: (libc)System V contexts.
|
||
* mallinfo: (libc)Statistics of Malloc.
|
||
* malloc: (libc)Basic Allocation.
|
||
* mallopt: (libc)Malloc Tunable Parameters.
|
||
* mblen: (libc)Non-reentrant Character Conversion.
|
||
* mbrlen: (libc)Converting a Character.
|
||
* mbrtowc: (libc)Converting a Character.
|
||
* mbsinit: (libc)Keeping the state.
|
||
* mbsnrtowcs: (libc)Converting Strings.
|
||
* mbsrtowcs: (libc)Converting Strings.
|
||
* mbstowcs: (libc)Non-reentrant String Conversion.
|
||
* mbtowc: (libc)Non-reentrant Character Conversion.
|
||
* mcheck: (libc)Heap Consistency Checking.
|
||
* memalign: (libc)Aligned Memory Blocks.
|
||
* memccpy: (libc)Copying Strings and Arrays.
|
||
* memchr: (libc)Search Functions.
|
||
* memcmp: (libc)String/Array Comparison.
|
||
* memcpy: (libc)Copying Strings and Arrays.
|
||
* memfrob: (libc)Trivial Encryption.
|
||
* memmem: (libc)Search Functions.
|
||
* memmove: (libc)Copying Strings and Arrays.
|
||
* mempcpy: (libc)Copying Strings and Arrays.
|
||
* memrchr: (libc)Search Functions.
|
||
* memset: (libc)Copying Strings and Arrays.
|
||
* mkdir: (libc)Creating Directories.
|
||
* mkdtemp: (libc)Temporary Files.
|
||
* mkfifo: (libc)FIFO Special Files.
|
||
* mknod: (libc)Making Special Files.
|
||
* mkstemp: (libc)Temporary Files.
|
||
* mktemp: (libc)Temporary Files.
|
||
* mktime: (libc)Broken-down Time.
|
||
* mlock: (libc)Page Lock Functions.
|
||
* mlockall: (libc)Page Lock Functions.
|
||
* mmap64: (libc)Memory-mapped I/O.
|
||
* mmap: (libc)Memory-mapped I/O.
|
||
* modf: (libc)Rounding Functions.
|
||
* modff: (libc)Rounding Functions.
|
||
* modfl: (libc)Rounding Functions.
|
||
* mount: (libc)Mount-Unmount-Remount.
|
||
* mprobe: (libc)Heap Consistency Checking.
|
||
* mrand48: (libc)SVID Random.
|
||
* mrand48_r: (libc)SVID Random.
|
||
* mremap: (libc)Memory-mapped I/O.
|
||
* msync: (libc)Memory-mapped I/O.
|
||
* mtrace: (libc)Tracing malloc.
|
||
* munlock: (libc)Page Lock Functions.
|
||
* munlockall: (libc)Page Lock Functions.
|
||
* munmap: (libc)Memory-mapped I/O.
|
||
* muntrace: (libc)Tracing malloc.
|
||
* nan: (libc)FP Bit Twiddling.
|
||
* nanf: (libc)FP Bit Twiddling.
|
||
* nanl: (libc)FP Bit Twiddling.
|
||
* nanosleep: (libc)Sleeping.
|
||
* nearbyint: (libc)Rounding Functions.
|
||
* nearbyintf: (libc)Rounding Functions.
|
||
* nearbyintl: (libc)Rounding Functions.
|
||
* nextafter: (libc)FP Bit Twiddling.
|
||
* nextafterf: (libc)FP Bit Twiddling.
|
||
* nextafterl: (libc)FP Bit Twiddling.
|
||
* nexttoward: (libc)FP Bit Twiddling.
|
||
* nexttowardf: (libc)FP Bit Twiddling.
|
||
* nexttowardl: (libc)FP Bit Twiddling.
|
||
* nftw64: (libc)Working with Directory Trees.
|
||
* nftw: (libc)Working with Directory Trees.
|
||
* ngettext: (libc)Advanced gettext functions.
|
||
* nice: (libc)Traditional Scheduling Functions.
|
||
* nl_langinfo: (libc)The Elegant and Fast Way.
|
||
* nrand48: (libc)SVID Random.
|
||
* nrand48_r: (libc)SVID Random.
|
||
* ntohl: (libc)Byte Order.
|
||
* ntohs: (libc)Byte Order.
|
||
* ntp_adjtime: (libc)High Accuracy Clock.
|
||
* ntp_gettime: (libc)High Accuracy Clock.
|
||
* obstack_1grow: (libc)Growing Objects.
|
||
* obstack_1grow_fast: (libc)Extra Fast Growing.
|
||
* obstack_alignment_mask: (libc)Obstacks Data Alignment.
|
||
* obstack_alloc: (libc)Allocation in an Obstack.
|
||
* obstack_base: (libc)Status of an Obstack.
|
||
* obstack_blank: (libc)Growing Objects.
|
||
* obstack_blank_fast: (libc)Extra Fast Growing.
|
||
* obstack_chunk_size: (libc)Obstack Chunks.
|
||
* obstack_copy0: (libc)Allocation in an Obstack.
|
||
* obstack_copy: (libc)Allocation in an Obstack.
|
||
* obstack_finish: (libc)Growing Objects.
|
||
* obstack_free: (libc)Freeing Obstack Objects.
|
||
* obstack_grow0: (libc)Growing Objects.
|
||
* obstack_grow: (libc)Growing Objects.
|
||
* obstack_init: (libc)Preparing for Obstacks.
|
||
* obstack_int_grow: (libc)Growing Objects.
|
||
* obstack_int_grow_fast: (libc)Extra Fast Growing.
|
||
* obstack_next_free: (libc)Status of an Obstack.
|
||
* obstack_object_size: (libc)Growing Objects.
|
||
* obstack_object_size: (libc)Status of an Obstack.
|
||
* obstack_printf: (libc)Dynamic Output.
|
||
* obstack_ptr_grow: (libc)Growing Objects.
|
||
* obstack_ptr_grow_fast: (libc)Extra Fast Growing.
|
||
* obstack_room: (libc)Extra Fast Growing.
|
||
* obstack_vprintf: (libc)Variable Arguments Output.
|
||
* offsetof: (libc)Structure Measurement.
|
||
* on_exit: (libc)Cleanups on Exit.
|
||
* open64: (libc)Opening and Closing Files.
|
||
* open: (libc)Opening and Closing Files.
|
||
* open_memstream: (libc)String Streams.
|
||
* opendir: (libc)Opening a Directory.
|
||
* openlog: (libc)openlog.
|
||
* openpty: (libc)Pseudo-Terminal Pairs.
|
||
* parse_printf_format: (libc)Parsing a Template String.
|
||
* pathconf: (libc)Pathconf.
|
||
* pause: (libc)Using Pause.
|
||
* pclose: (libc)Pipe to a Subprocess.
|
||
* perror: (libc)Error Messages.
|
||
* pipe: (libc)Creating a Pipe.
|
||
* popen: (libc)Pipe to a Subprocess.
|
||
* posix_fallocate64: (libc)Storage Allocation.
|
||
* posix_fallocate: (libc)Storage Allocation.
|
||
* posix_memalign: (libc)Aligned Memory Blocks.
|
||
* pow10: (libc)Exponents and Logarithms.
|
||
* pow10f: (libc)Exponents and Logarithms.
|
||
* pow10l: (libc)Exponents and Logarithms.
|
||
* pow: (libc)Exponents and Logarithms.
|
||
* powf: (libc)Exponents and Logarithms.
|
||
* powl: (libc)Exponents and Logarithms.
|
||
* pread64: (libc)I/O Primitives.
|
||
* pread: (libc)I/O Primitives.
|
||
* printf: (libc)Formatted Output Functions.
|
||
* printf_size: (libc)Predefined Printf Handlers.
|
||
* printf_size_info: (libc)Predefined Printf Handlers.
|
||
* psignal: (libc)Signal Messages.
|
||
* pthread_getattr_default_np: (libc)Default Thread Attributes.
|
||
* pthread_getspecific: (libc)Thread-specific Data.
|
||
* pthread_key_create: (libc)Thread-specific Data.
|
||
* pthread_key_delete: (libc)Thread-specific Data.
|
||
* pthread_setattr_default_np: (libc)Default Thread Attributes.
|
||
* pthread_setspecific: (libc)Thread-specific Data.
|
||
* ptsname: (libc)Allocation.
|
||
* ptsname_r: (libc)Allocation.
|
||
* putc: (libc)Simple Output.
|
||
* putc_unlocked: (libc)Simple Output.
|
||
* putchar: (libc)Simple Output.
|
||
* putchar_unlocked: (libc)Simple Output.
|
||
* putenv: (libc)Environment Access.
|
||
* putpwent: (libc)Writing a User Entry.
|
||
* puts: (libc)Simple Output.
|
||
* pututline: (libc)Manipulating the Database.
|
||
* pututxline: (libc)XPG Functions.
|
||
* putw: (libc)Simple Output.
|
||
* putwc: (libc)Simple Output.
|
||
* putwc_unlocked: (libc)Simple Output.
|
||
* putwchar: (libc)Simple Output.
|
||
* putwchar_unlocked: (libc)Simple Output.
|
||
* pwrite64: (libc)I/O Primitives.
|
||
* pwrite: (libc)I/O Primitives.
|
||
* qecvt: (libc)System V Number Conversion.
|
||
* qecvt_r: (libc)System V Number Conversion.
|
||
* qfcvt: (libc)System V Number Conversion.
|
||
* qfcvt_r: (libc)System V Number Conversion.
|
||
* qgcvt: (libc)System V Number Conversion.
|
||
* qsort: (libc)Array Sort Function.
|
||
* raise: (libc)Signaling Yourself.
|
||
* rand: (libc)ISO Random.
|
||
* rand_r: (libc)ISO Random.
|
||
* random: (libc)BSD Random.
|
||
* random_r: (libc)BSD Random.
|
||
* rawmemchr: (libc)Search Functions.
|
||
* read: (libc)I/O Primitives.
|
||
* readdir64: (libc)Reading/Closing Directory.
|
||
* readdir64_r: (libc)Reading/Closing Directory.
|
||
* readdir: (libc)Reading/Closing Directory.
|
||
* readdir_r: (libc)Reading/Closing Directory.
|
||
* readlink: (libc)Symbolic Links.
|
||
* readv: (libc)Scatter-Gather.
|
||
* realloc: (libc)Changing Block Size.
|
||
* realpath: (libc)Symbolic Links.
|
||
* recv: (libc)Receiving Data.
|
||
* recvfrom: (libc)Receiving Datagrams.
|
||
* recvmsg: (libc)Receiving Datagrams.
|
||
* regcomp: (libc)POSIX Regexp Compilation.
|
||
* regerror: (libc)Regexp Cleanup.
|
||
* regexec: (libc)Matching POSIX Regexps.
|
||
* regfree: (libc)Regexp Cleanup.
|
||
* register_printf_function: (libc)Registering New Conversions.
|
||
* remainder: (libc)Remainder Functions.
|
||
* remainderf: (libc)Remainder Functions.
|
||
* remainderl: (libc)Remainder Functions.
|
||
* remove: (libc)Deleting Files.
|
||
* rename: (libc)Renaming Files.
|
||
* rewind: (libc)File Positioning.
|
||
* rewinddir: (libc)Random Access Directory.
|
||
* rindex: (libc)Search Functions.
|
||
* rint: (libc)Rounding Functions.
|
||
* rintf: (libc)Rounding Functions.
|
||
* rintl: (libc)Rounding Functions.
|
||
* rmdir: (libc)Deleting Files.
|
||
* round: (libc)Rounding Functions.
|
||
* roundf: (libc)Rounding Functions.
|
||
* roundl: (libc)Rounding Functions.
|
||
* rpmatch: (libc)Yes-or-No Questions.
|
||
* sbrk: (libc)Resizing the Data Segment.
|
||
* scalb: (libc)Normalization Functions.
|
||
* scalbf: (libc)Normalization Functions.
|
||
* scalbl: (libc)Normalization Functions.
|
||
* scalbln: (libc)Normalization Functions.
|
||
* scalblnf: (libc)Normalization Functions.
|
||
* scalblnl: (libc)Normalization Functions.
|
||
* scalbn: (libc)Normalization Functions.
|
||
* scalbnf: (libc)Normalization Functions.
|
||
* scalbnl: (libc)Normalization Functions.
|
||
* scandir64: (libc)Scanning Directory Content.
|
||
* scandir: (libc)Scanning Directory Content.
|
||
* scanf: (libc)Formatted Input Functions.
|
||
* sched_get_priority_max: (libc)Basic Scheduling Functions.
|
||
* sched_get_priority_min: (libc)Basic Scheduling Functions.
|
||
* sched_getaffinity: (libc)CPU Affinity.
|
||
* sched_getparam: (libc)Basic Scheduling Functions.
|
||
* sched_getscheduler: (libc)Basic Scheduling Functions.
|
||
* sched_rr_get_interval: (libc)Basic Scheduling Functions.
|
||
* sched_setaffinity: (libc)CPU Affinity.
|
||
* sched_setparam: (libc)Basic Scheduling Functions.
|
||
* sched_setscheduler: (libc)Basic Scheduling Functions.
|
||
* sched_yield: (libc)Basic Scheduling Functions.
|
||
* secure_getenv: (libc)Environment Access.
|
||
* seed48: (libc)SVID Random.
|
||
* seed48_r: (libc)SVID Random.
|
||
* seekdir: (libc)Random Access Directory.
|
||
* select: (libc)Waiting for I/O.
|
||
* sem_close: (libc)Semaphores.
|
||
* sem_destroy: (libc)Semaphores.
|
||
* sem_getvalue: (libc)Semaphores.
|
||
* sem_init: (libc)Semaphores.
|
||
* sem_open: (libc)Semaphores.
|
||
* sem_post: (libc)Semaphores.
|
||
* sem_timedwait: (libc)Semaphores.
|
||
* sem_trywait: (libc)Semaphores.
|
||
* sem_unlink: (libc)Semaphores.
|
||
* sem_wait: (libc)Semaphores.
|
||
* semctl: (libc)Semaphores.
|
||
* semget: (libc)Semaphores.
|
||
* semop: (libc)Semaphores.
|
||
* semtimedop: (libc)Semaphores.
|
||
* send: (libc)Sending Data.
|
||
* sendmsg: (libc)Receiving Datagrams.
|
||
* sendto: (libc)Sending Datagrams.
|
||
* setbuf: (libc)Controlling Buffering.
|
||
* setbuffer: (libc)Controlling Buffering.
|
||
* setcontext: (libc)System V contexts.
|
||
* setdomainname: (libc)Host Identification.
|
||
* setegid: (libc)Setting Groups.
|
||
* setenv: (libc)Environment Access.
|
||
* seteuid: (libc)Setting User ID.
|
||
* setfsent: (libc)fstab.
|
||
* setgid: (libc)Setting Groups.
|
||
* setgrent: (libc)Scanning All Groups.
|
||
* setgroups: (libc)Setting Groups.
|
||
* sethostent: (libc)Host Names.
|
||
* sethostid: (libc)Host Identification.
|
||
* sethostname: (libc)Host Identification.
|
||
* setitimer: (libc)Setting an Alarm.
|
||
* setjmp: (libc)Non-Local Details.
|
||
* setkey: (libc)DES Encryption.
|
||
* setkey_r: (libc)DES Encryption.
|
||
* setlinebuf: (libc)Controlling Buffering.
|
||
* setlocale: (libc)Setting the Locale.
|
||
* setlogmask: (libc)setlogmask.
|
||
* setmntent: (libc)mtab.
|
||
* setnetent: (libc)Networks Database.
|
||
* setnetgrent: (libc)Lookup Netgroup.
|
||
* setpgid: (libc)Process Group Functions.
|
||
* setpgrp: (libc)Process Group Functions.
|
||
* setpriority: (libc)Traditional Scheduling Functions.
|
||
* setprotoent: (libc)Protocols Database.
|
||
* setpwent: (libc)Scanning All Users.
|
||
* setregid: (libc)Setting Groups.
|
||
* setreuid: (libc)Setting User ID.
|
||
* setrlimit64: (libc)Limits on Resources.
|
||
* setrlimit: (libc)Limits on Resources.
|
||
* setservent: (libc)Services Database.
|
||
* setsid: (libc)Process Group Functions.
|
||
* setsockopt: (libc)Socket Option Functions.
|
||
* setstate: (libc)BSD Random.
|
||
* setstate_r: (libc)BSD Random.
|
||
* settimeofday: (libc)High-Resolution Calendar.
|
||
* setuid: (libc)Setting User ID.
|
||
* setutent: (libc)Manipulating the Database.
|
||
* setutxent: (libc)XPG Functions.
|
||
* setvbuf: (libc)Controlling Buffering.
|
||
* shm_open: (libc)Memory-mapped I/O.
|
||
* shm_unlink: (libc)Memory-mapped I/O.
|
||
* shutdown: (libc)Closing a Socket.
|
||
* sigaction: (libc)Advanced Signal Handling.
|
||
* sigaddset: (libc)Signal Sets.
|
||
* sigaltstack: (libc)Signal Stack.
|
||
* sigblock: (libc)BSD Signal Handling.
|
||
* sigdelset: (libc)Signal Sets.
|
||
* sigemptyset: (libc)Signal Sets.
|
||
* sigfillset: (libc)Signal Sets.
|
||
* siginterrupt: (libc)BSD Signal Handling.
|
||
* sigismember: (libc)Signal Sets.
|
||
* siglongjmp: (libc)Non-Local Exits and Signals.
|
||
* sigmask: (libc)BSD Signal Handling.
|
||
* signal: (libc)Basic Signal Handling.
|
||
* signbit: (libc)FP Bit Twiddling.
|
||
* significand: (libc)Normalization Functions.
|
||
* significandf: (libc)Normalization Functions.
|
||
* significandl: (libc)Normalization Functions.
|
||
* sigpause: (libc)BSD Signal Handling.
|
||
* sigpending: (libc)Checking for Pending Signals.
|
||
* sigprocmask: (libc)Process Signal Mask.
|
||
* sigsetjmp: (libc)Non-Local Exits and Signals.
|
||
* sigsetmask: (libc)BSD Signal Handling.
|
||
* sigstack: (libc)Signal Stack.
|
||
* sigsuspend: (libc)Sigsuspend.
|
||
* sin: (libc)Trig Functions.
|
||
* sincos: (libc)Trig Functions.
|
||
* sincosf: (libc)Trig Functions.
|
||
* sincosl: (libc)Trig Functions.
|
||
* sinf: (libc)Trig Functions.
|
||
* sinh: (libc)Hyperbolic Functions.
|
||
* sinhf: (libc)Hyperbolic Functions.
|
||
* sinhl: (libc)Hyperbolic Functions.
|
||
* sinl: (libc)Trig Functions.
|
||
* sleep: (libc)Sleeping.
|
||
* snprintf: (libc)Formatted Output Functions.
|
||
* socket: (libc)Creating a Socket.
|
||
* socketpair: (libc)Socket Pairs.
|
||
* sprintf: (libc)Formatted Output Functions.
|
||
* sqrt: (libc)Exponents and Logarithms.
|
||
* sqrtf: (libc)Exponents and Logarithms.
|
||
* sqrtl: (libc)Exponents and Logarithms.
|
||
* srand48: (libc)SVID Random.
|
||
* srand48_r: (libc)SVID Random.
|
||
* srand: (libc)ISO Random.
|
||
* srandom: (libc)BSD Random.
|
||
* srandom_r: (libc)BSD Random.
|
||
* sscanf: (libc)Formatted Input Functions.
|
||
* ssignal: (libc)Basic Signal Handling.
|
||
* stat64: (libc)Reading Attributes.
|
||
* stat: (libc)Reading Attributes.
|
||
* stime: (libc)Simple Calendar Time.
|
||
* stpcpy: (libc)Copying Strings and Arrays.
|
||
* stpncpy: (libc)Truncating Strings.
|
||
* strcasecmp: (libc)String/Array Comparison.
|
||
* strcasestr: (libc)Search Functions.
|
||
* strcat: (libc)Concatenating Strings.
|
||
* strchr: (libc)Search Functions.
|
||
* strchrnul: (libc)Search Functions.
|
||
* strcmp: (libc)String/Array Comparison.
|
||
* strcoll: (libc)Collation Functions.
|
||
* strcpy: (libc)Copying Strings and Arrays.
|
||
* strcspn: (libc)Search Functions.
|
||
* strdup: (libc)Copying Strings and Arrays.
|
||
* strdupa: (libc)Copying Strings and Arrays.
|
||
* strerror: (libc)Error Messages.
|
||
* strerror_r: (libc)Error Messages.
|
||
* strfmon: (libc)Formatting Numbers.
|
||
* strfry: (libc)strfry.
|
||
* strftime: (libc)Formatting Calendar Time.
|
||
* strlen: (libc)String Length.
|
||
* strncasecmp: (libc)String/Array Comparison.
|
||
* strncat: (libc)Truncating Strings.
|
||
* strncmp: (libc)String/Array Comparison.
|
||
* strncpy: (libc)Truncating Strings.
|
||
* strndup: (libc)Truncating Strings.
|
||
* strndupa: (libc)Truncating Strings.
|
||
* strnlen: (libc)String Length.
|
||
* strpbrk: (libc)Search Functions.
|
||
* strptime: (libc)Low-Level Time String Parsing.
|
||
* strrchr: (libc)Search Functions.
|
||
* strsep: (libc)Finding Tokens in a String.
|
||
* strsignal: (libc)Signal Messages.
|
||
* strspn: (libc)Search Functions.
|
||
* strstr: (libc)Search Functions.
|
||
* strtod: (libc)Parsing of Floats.
|
||
* strtof: (libc)Parsing of Floats.
|
||
* strtoimax: (libc)Parsing of Integers.
|
||
* strtok: (libc)Finding Tokens in a String.
|
||
* strtok_r: (libc)Finding Tokens in a String.
|
||
* strtol: (libc)Parsing of Integers.
|
||
* strtold: (libc)Parsing of Floats.
|
||
* strtoll: (libc)Parsing of Integers.
|
||
* strtoq: (libc)Parsing of Integers.
|
||
* strtoul: (libc)Parsing of Integers.
|
||
* strtoull: (libc)Parsing of Integers.
|
||
* strtoumax: (libc)Parsing of Integers.
|
||
* strtouq: (libc)Parsing of Integers.
|
||
* strverscmp: (libc)String/Array Comparison.
|
||
* strxfrm: (libc)Collation Functions.
|
||
* stty: (libc)BSD Terminal Modes.
|
||
* swapcontext: (libc)System V contexts.
|
||
* swprintf: (libc)Formatted Output Functions.
|
||
* swscanf: (libc)Formatted Input Functions.
|
||
* symlink: (libc)Symbolic Links.
|
||
* sync: (libc)Synchronizing I/O.
|
||
* syscall: (libc)System Calls.
|
||
* sysconf: (libc)Sysconf Definition.
|
||
* sysctl: (libc)System Parameters.
|
||
* syslog: (libc)syslog; vsyslog.
|
||
* system: (libc)Running a Command.
|
||
* sysv_signal: (libc)Basic Signal Handling.
|
||
* tan: (libc)Trig Functions.
|
||
* tanf: (libc)Trig Functions.
|
||
* tanh: (libc)Hyperbolic Functions.
|
||
* tanhf: (libc)Hyperbolic Functions.
|
||
* tanhl: (libc)Hyperbolic Functions.
|
||
* tanl: (libc)Trig Functions.
|
||
* tcdrain: (libc)Line Control.
|
||
* tcflow: (libc)Line Control.
|
||
* tcflush: (libc)Line Control.
|
||
* tcgetattr: (libc)Mode Functions.
|
||
* tcgetpgrp: (libc)Terminal Access Functions.
|
||
* tcgetsid: (libc)Terminal Access Functions.
|
||
* tcsendbreak: (libc)Line Control.
|
||
* tcsetattr: (libc)Mode Functions.
|
||
* tcsetpgrp: (libc)Terminal Access Functions.
|
||
* tdelete: (libc)Tree Search Function.
|
||
* tdestroy: (libc)Tree Search Function.
|
||
* telldir: (libc)Random Access Directory.
|
||
* tempnam: (libc)Temporary Files.
|
||
* textdomain: (libc)Locating gettext catalog.
|
||
* tfind: (libc)Tree Search Function.
|
||
* tgamma: (libc)Special Functions.
|
||
* tgammaf: (libc)Special Functions.
|
||
* tgammal: (libc)Special Functions.
|
||
* time: (libc)Simple Calendar Time.
|
||
* timegm: (libc)Broken-down Time.
|
||
* timelocal: (libc)Broken-down Time.
|
||
* times: (libc)Processor Time.
|
||
* tmpfile64: (libc)Temporary Files.
|
||
* tmpfile: (libc)Temporary Files.
|
||
* tmpnam: (libc)Temporary Files.
|
||
* tmpnam_r: (libc)Temporary Files.
|
||
* toascii: (libc)Case Conversion.
|
||
* tolower: (libc)Case Conversion.
|
||
* toupper: (libc)Case Conversion.
|
||
* towctrans: (libc)Wide Character Case Conversion.
|
||
* towlower: (libc)Wide Character Case Conversion.
|
||
* towupper: (libc)Wide Character Case Conversion.
|
||
* trunc: (libc)Rounding Functions.
|
||
* truncate64: (libc)File Size.
|
||
* truncate: (libc)File Size.
|
||
* truncf: (libc)Rounding Functions.
|
||
* truncl: (libc)Rounding Functions.
|
||
* tsearch: (libc)Tree Search Function.
|
||
* ttyname: (libc)Is It a Terminal.
|
||
* ttyname_r: (libc)Is It a Terminal.
|
||
* twalk: (libc)Tree Search Function.
|
||
* tzset: (libc)Time Zone Functions.
|
||
* ulimit: (libc)Limits on Resources.
|
||
* umask: (libc)Setting Permissions.
|
||
* umount2: (libc)Mount-Unmount-Remount.
|
||
* umount: (libc)Mount-Unmount-Remount.
|
||
* uname: (libc)Platform Type.
|
||
* ungetc: (libc)How Unread.
|
||
* ungetwc: (libc)How Unread.
|
||
* unlink: (libc)Deleting Files.
|
||
* unlockpt: (libc)Allocation.
|
||
* unsetenv: (libc)Environment Access.
|
||
* updwtmp: (libc)Manipulating the Database.
|
||
* utime: (libc)File Times.
|
||
* utimes: (libc)File Times.
|
||
* utmpname: (libc)Manipulating the Database.
|
||
* utmpxname: (libc)XPG Functions.
|
||
* va_arg: (libc)Argument Macros.
|
||
* va_copy: (libc)Argument Macros.
|
||
* va_end: (libc)Argument Macros.
|
||
* va_start: (libc)Argument Macros.
|
||
* valloc: (libc)Aligned Memory Blocks.
|
||
* vasprintf: (libc)Variable Arguments Output.
|
||
* verr: (libc)Error Messages.
|
||
* verrx: (libc)Error Messages.
|
||
* versionsort64: (libc)Scanning Directory Content.
|
||
* versionsort: (libc)Scanning Directory Content.
|
||
* vfork: (libc)Creating a Process.
|
||
* vfprintf: (libc)Variable Arguments Output.
|
||
* vfscanf: (libc)Variable Arguments Input.
|
||
* vfwprintf: (libc)Variable Arguments Output.
|
||
* vfwscanf: (libc)Variable Arguments Input.
|
||
* vlimit: (libc)Limits on Resources.
|
||
* vprintf: (libc)Variable Arguments Output.
|
||
* vscanf: (libc)Variable Arguments Input.
|
||
* vsnprintf: (libc)Variable Arguments Output.
|
||
* vsprintf: (libc)Variable Arguments Output.
|
||
* vsscanf: (libc)Variable Arguments Input.
|
||
* vswprintf: (libc)Variable Arguments Output.
|
||
* vswscanf: (libc)Variable Arguments Input.
|
||
* vsyslog: (libc)syslog; vsyslog.
|
||
* vtimes: (libc)Resource Usage.
|
||
* vwarn: (libc)Error Messages.
|
||
* vwarnx: (libc)Error Messages.
|
||
* vwprintf: (libc)Variable Arguments Output.
|
||
* vwscanf: (libc)Variable Arguments Input.
|
||
* wait3: (libc)BSD Wait Functions.
|
||
* wait4: (libc)Process Completion.
|
||
* wait: (libc)Process Completion.
|
||
* waitpid: (libc)Process Completion.
|
||
* warn: (libc)Error Messages.
|
||
* warnx: (libc)Error Messages.
|
||
* wcpcpy: (libc)Copying Strings and Arrays.
|
||
* wcpncpy: (libc)Truncating Strings.
|
||
* wcrtomb: (libc)Converting a Character.
|
||
* wcscasecmp: (libc)String/Array Comparison.
|
||
* wcscat: (libc)Concatenating Strings.
|
||
* wcschr: (libc)Search Functions.
|
||
* wcschrnul: (libc)Search Functions.
|
||
* wcscmp: (libc)String/Array Comparison.
|
||
* wcscoll: (libc)Collation Functions.
|
||
* wcscpy: (libc)Copying Strings and Arrays.
|
||
* wcscspn: (libc)Search Functions.
|
||
* wcsdup: (libc)Copying Strings and Arrays.
|
||
* wcsftime: (libc)Formatting Calendar Time.
|
||
* wcslen: (libc)String Length.
|
||
* wcsncasecmp: (libc)String/Array Comparison.
|
||
* wcsncat: (libc)Truncating Strings.
|
||
* wcsncmp: (libc)String/Array Comparison.
|
||
* wcsncpy: (libc)Truncating Strings.
|
||
* wcsnlen: (libc)String Length.
|
||
* wcsnrtombs: (libc)Converting Strings.
|
||
* wcspbrk: (libc)Search Functions.
|
||
* wcsrchr: (libc)Search Functions.
|
||
* wcsrtombs: (libc)Converting Strings.
|
||
* wcsspn: (libc)Search Functions.
|
||
* wcsstr: (libc)Search Functions.
|
||
* wcstod: (libc)Parsing of Floats.
|
||
* wcstof: (libc)Parsing of Floats.
|
||
* wcstoimax: (libc)Parsing of Integers.
|
||
* wcstok: (libc)Finding Tokens in a String.
|
||
* wcstol: (libc)Parsing of Integers.
|
||
* wcstold: (libc)Parsing of Floats.
|
||
* wcstoll: (libc)Parsing of Integers.
|
||
* wcstombs: (libc)Non-reentrant String Conversion.
|
||
* wcstoq: (libc)Parsing of Integers.
|
||
* wcstoul: (libc)Parsing of Integers.
|
||
* wcstoull: (libc)Parsing of Integers.
|
||
* wcstoumax: (libc)Parsing of Integers.
|
||
* wcstouq: (libc)Parsing of Integers.
|
||
* wcswcs: (libc)Search Functions.
|
||
* wcsxfrm: (libc)Collation Functions.
|
||
* wctob: (libc)Converting a Character.
|
||
* wctomb: (libc)Non-reentrant Character Conversion.
|
||
* wctrans: (libc)Wide Character Case Conversion.
|
||
* wctype: (libc)Classification of Wide Characters.
|
||
* wmemchr: (libc)Search Functions.
|
||
* wmemcmp: (libc)String/Array Comparison.
|
||
* wmemcpy: (libc)Copying Strings and Arrays.
|
||
* wmemmove: (libc)Copying Strings and Arrays.
|
||
* wmempcpy: (libc)Copying Strings and Arrays.
|
||
* wmemset: (libc)Copying Strings and Arrays.
|
||
* wordexp: (libc)Calling Wordexp.
|
||
* wordfree: (libc)Calling Wordexp.
|
||
* wprintf: (libc)Formatted Output Functions.
|
||
* write: (libc)I/O Primitives.
|
||
* writev: (libc)Scatter-Gather.
|
||
* wscanf: (libc)Formatted Input Functions.
|
||
* y0: (libc)Special Functions.
|
||
* y0f: (libc)Special Functions.
|
||
* y0l: (libc)Special Functions.
|
||
* y1: (libc)Special Functions.
|
||
* y1f: (libc)Special Functions.
|
||
* y1l: (libc)Special Functions.
|
||
* yn: (libc)Special Functions.
|
||
* ynf: (libc)Special Functions.
|
||
* ynl: (libc)Special Functions.
|
||
END-INFO-DIR-ENTRY
|
||
|
||
|
||
File: libc.info, Node: Multibyte Conversion Example, Prev: Converting Strings, Up: Restartable multibyte conversion
|
||
|
||
6.3.5 A Complete Multibyte Conversion Example
|
||
---------------------------------------------
|
||
|
||
The example programs given in the last sections are only brief and do
|
||
not contain all the error checking, etc. Presented here is a complete
|
||
and documented example. It features the ‘mbrtowc’ function but it
|
||
should be easy to derive versions using the other functions.
|
||
|
||
int
|
||
file_mbsrtowcs (int input, int output)
|
||
{
|
||
/* Note the use of ‘MB_LEN_MAX’.
|
||
‘MB_CUR_MAX’ cannot portably be used here. */
|
||
char buffer[BUFSIZ + MB_LEN_MAX];
|
||
mbstate_t state;
|
||
int filled = 0;
|
||
int eof = 0;
|
||
|
||
/* Initialize the state. */
|
||
memset (&state, '\0', sizeof (state));
|
||
|
||
while (!eof)
|
||
{
|
||
ssize_t nread;
|
||
ssize_t nwrite;
|
||
char *inp = buffer;
|
||
wchar_t outbuf[BUFSIZ];
|
||
wchar_t *outp = outbuf;
|
||
|
||
/* Fill up the buffer from the input file. */
|
||
nread = read (input, buffer + filled, BUFSIZ);
|
||
if (nread < 0)
|
||
{
|
||
perror ("read");
|
||
return 0;
|
||
}
|
||
/* If we reach end of file, make a note to read no more. */
|
||
if (nread == 0)
|
||
eof = 1;
|
||
|
||
/* ‘filled’ is now the number of bytes in ‘buffer’. */
|
||
filled += nread;
|
||
|
||
/* Convert those bytes to wide characters–as many as we can. */
|
||
while (1)
|
||
{
|
||
size_t thislen = mbrtowc (outp, inp, filled, &state);
|
||
/* Stop converting at invalid character;
|
||
this can mean we have read just the first part
|
||
of a valid character. */
|
||
if (thislen == (size_t) -1)
|
||
break;
|
||
/* We want to handle embedded NUL bytes
|
||
but the return value is 0. Correct this. */
|
||
if (thislen == 0)
|
||
thislen = 1;
|
||
/* Advance past this character. */
|
||
inp += thislen;
|
||
filled -= thislen;
|
||
++outp;
|
||
}
|
||
|
||
/* Write the wide characters we just made. */
|
||
nwrite = write (output, outbuf,
|
||
(outp - outbuf) * sizeof (wchar_t));
|
||
if (nwrite < 0)
|
||
{
|
||
perror ("write");
|
||
return 0;
|
||
}
|
||
|
||
/* See if we have a _real_ invalid character. */
|
||
if ((eof && filled > 0) || filled >= MB_CUR_MAX)
|
||
{
|
||
error (0, 0, "invalid multibyte character");
|
||
return 0;
|
||
}
|
||
|
||
/* If any characters must be carried forward,
|
||
put them at the beginning of ‘buffer’. */
|
||
if (filled > 0)
|
||
memmove (buffer, inp, filled);
|
||
}
|
||
|
||
return 1;
|
||
}
|
||
|
||
|
||
File: libc.info, Node: Non-reentrant Conversion, Next: Generic Charset Conversion, Prev: Restartable multibyte conversion, Up: Character Set Handling
|
||
|
||
6.4 Non-reentrant Conversion Function
|
||
=====================================
|
||
|
||
The functions described in the previous chapter are defined in Amendment 1
|
||
to ISO C90, but the original ISO C90 standard also contained functions
|
||
for character set conversion. The reason that these original functions
|
||
are not described first is that they are almost entirely useless.
|
||
|
||
The problem is that all the conversion functions described in the
|
||
original ISO C90 use a local state. Using a local state implies that
|
||
multiple conversions at the same time (not only when using threads)
|
||
cannot be done, and that you cannot first convert single characters and
|
||
then strings since you cannot tell the conversion functions which state
|
||
to use.
|
||
|
||
These original functions are therefore usable only in a very limited
|
||
set of situations. One must complete converting the entire string
|
||
before starting a new one, and each string/text must be converted with
|
||
the same function (there is no problem with the library itself; it is
|
||
guaranteed that no library function changes the state of any of these
|
||
functions). *For the above reasons it is highly requested that the
|
||
functions described in the previous section be used in place of
|
||
non-reentrant conversion functions.*
|
||
|
||
* Menu:
|
||
|
||
* Non-reentrant Character Conversion:: Non-reentrant Conversion of Single
|
||
Characters.
|
||
* Non-reentrant String Conversion:: Non-reentrant Conversion of Strings.
|
||
* Shift State:: States in Non-reentrant Functions.
|
||
|
||
|
||
File: libc.info, Node: Non-reentrant Character Conversion, Next: Non-reentrant String Conversion, Up: Non-reentrant Conversion
|
||
|
||
6.4.1 Non-reentrant Conversion of Single Characters
|
||
---------------------------------------------------
|
||
|
||
-- Function: int mbtowc (wchar_t *restrict RESULT, const char *restrict
|
||
STRING, size_t SIZE)
|
||
Preliminary: | MT-Unsafe race | AS-Unsafe corrupt heap lock dlopen
|
||
| AC-Unsafe corrupt lock mem fd | *Note POSIX Safety Concepts::.
|
||
|
||
The ‘mbtowc’ (“multibyte to wide character”) function when called
|
||
with non-null STRING converts the first multibyte character
|
||
beginning at STRING to its corresponding wide character code. It
|
||
stores the result in ‘*RESULT’.
|
||
|
||
‘mbtowc’ never examines more than SIZE bytes. (The idea is to
|
||
supply for SIZE the number of bytes of data you have in hand.)
|
||
|
||
‘mbtowc’ with non-null STRING distinguishes three possibilities:
|
||
the first SIZE bytes at STRING start with valid multibyte
|
||
characters, they start with an invalid byte sequence or just part
|
||
of a character, or STRING points to an empty string (a null
|
||
character).
|
||
|
||
For a valid multibyte character, ‘mbtowc’ converts it to a wide
|
||
character and stores that in ‘*RESULT’, and returns the number of
|
||
bytes in that character (always at least 1 and never more than
|
||
SIZE).
|
||
|
||
For an invalid byte sequence, ‘mbtowc’ returns -1. For an empty
|
||
string, it returns 0, also storing ‘'\0'’ in ‘*RESULT’.
|
||
|
||
If the multibyte character code uses shift characters, then
|
||
‘mbtowc’ maintains and updates a shift state as it scans. If you
|
||
call ‘mbtowc’ with a null pointer for STRING, that initializes the
|
||
shift state to its standard initial value. It also returns nonzero
|
||
if the multibyte character code in use actually has a shift state.
|
||
*Note Shift State::.
|
||
|
||
-- Function: int wctomb (char *STRING, wchar_t WCHAR)
|
||
Preliminary: | MT-Unsafe race | AS-Unsafe corrupt heap lock dlopen
|
||
| AC-Unsafe corrupt lock mem fd | *Note POSIX Safety Concepts::.
|
||
|
||
The ‘wctomb’ (“wide character to multibyte”) function converts the
|
||
wide character code WCHAR to its corresponding multibyte character
|
||
sequence, and stores the result in bytes starting at STRING. At
|
||
most ‘MB_CUR_MAX’ characters are stored.
|
||
|
||
‘wctomb’ with non-null STRING distinguishes three possibilities for
|
||
WCHAR: a valid wide character code (one that can be translated to a
|
||
multibyte character), an invalid code, and ‘L'\0'’.
|
||
|
||
Given a valid code, ‘wctomb’ converts it to a multibyte character,
|
||
storing the bytes starting at STRING. Then it returns the number
|
||
of bytes in that character (always at least 1 and never more than
|
||
‘MB_CUR_MAX’).
|
||
|
||
If WCHAR is an invalid wide character code, ‘wctomb’ returns -1.
|
||
If WCHAR is ‘L'\0'’, it returns ‘0’, also storing ‘'\0'’ in
|
||
‘*STRING’.
|
||
|
||
If the multibyte character code uses shift characters, then
|
||
‘wctomb’ maintains and updates a shift state as it scans. If you
|
||
call ‘wctomb’ with a null pointer for STRING, that initializes the
|
||
shift state to its standard initial value. It also returns nonzero
|
||
if the multibyte character code in use actually has a shift state.
|
||
*Note Shift State::.
|
||
|
||
Calling this function with a WCHAR argument of zero when STRING is
|
||
not null has the side-effect of reinitializing the stored shift
|
||
state _as well as_ storing the multibyte character ‘'\0'’ and
|
||
returning 0.
|
||
|
||
Similar to ‘mbrlen’ there is also a non-reentrant function that
|
||
computes the length of a multibyte character. It can be defined in
|
||
terms of ‘mbtowc’.
|
||
|
||
-- Function: int mblen (const char *STRING, size_t SIZE)
|
||
Preliminary: | MT-Unsafe race | AS-Unsafe corrupt heap lock dlopen
|
||
| AC-Unsafe corrupt lock mem fd | *Note POSIX Safety Concepts::.
|
||
|
||
The ‘mblen’ function with a non-null STRING argument returns the
|
||
number of bytes that make up the multibyte character beginning at
|
||
STRING, never examining more than SIZE bytes. (The idea is to
|
||
supply for SIZE the number of bytes of data you have in hand.)
|
||
|
||
The return value of ‘mblen’ distinguishes three possibilities: the
|
||
first SIZE bytes at STRING start with valid multibyte characters,
|
||
they start with an invalid byte sequence or just part of a
|
||
character, or STRING points to an empty string (a null character).
|
||
|
||
For a valid multibyte character, ‘mblen’ returns the number of
|
||
bytes in that character (always at least ‘1’ and never more than
|
||
SIZE). For an invalid byte sequence, ‘mblen’ returns -1. For an
|
||
empty string, it returns 0.
|
||
|
||
If the multibyte character code uses shift characters, then ‘mblen’
|
||
maintains and updates a shift state as it scans. If you call
|
||
‘mblen’ with a null pointer for STRING, that initializes the shift
|
||
state to its standard initial value. It also returns a nonzero
|
||
value if the multibyte character code in use actually has a shift
|
||
state. *Note Shift State::.
|
||
|
||
The function ‘mblen’ is declared in ‘stdlib.h’.
|
||
|
||
|
||
File: libc.info, Node: Non-reentrant String Conversion, Next: Shift State, Prev: Non-reentrant Character Conversion, Up: Non-reentrant Conversion
|
||
|
||
6.4.2 Non-reentrant Conversion of Strings
|
||
-----------------------------------------
|
||
|
||
For convenience the ISO C90 standard also defines functions to convert
|
||
entire strings instead of single characters. These functions suffer
|
||
from the same problems as their reentrant counterparts from Amendment 1
|
||
to ISO C90; see *note Converting Strings::.
|
||
|
||
-- Function: size_t mbstowcs (wchar_t *WSTRING, const char *STRING,
|
||
size_t SIZE)
|
||
Preliminary: | MT-Safe | AS-Unsafe corrupt heap lock dlopen |
|
||
AC-Unsafe corrupt lock mem fd | *Note POSIX Safety Concepts::.
|
||
|
||
The ‘mbstowcs’ (“multibyte string to wide character string”)
|
||
function converts the null-terminated string of multibyte
|
||
characters STRING to an array of wide character codes, storing not
|
||
more than SIZE wide characters into the array beginning at WSTRING.
|
||
The terminating null character counts towards the size, so if SIZE
|
||
is less than the actual number of wide characters resulting from
|
||
STRING, no terminating null character is stored.
|
||
|
||
The conversion of characters from STRING begins in the initial
|
||
shift state.
|
||
|
||
If an invalid multibyte character sequence is found, the ‘mbstowcs’
|
||
function returns a value of -1. Otherwise, it returns the number
|
||
of wide characters stored in the array WSTRING. This number does
|
||
not include the terminating null character, which is present if the
|
||
number is less than SIZE.
|
||
|
||
Here is an example showing how to convert a string of multibyte
|
||
characters, allocating enough space for the result.
|
||
|
||
wchar_t *
|
||
mbstowcs_alloc (const char *string)
|
||
{
|
||
size_t size = strlen (string) + 1;
|
||
wchar_t *buf = xmalloc (size * sizeof (wchar_t));
|
||
|
||
size = mbstowcs (buf, string, size);
|
||
if (size == (size_t) -1)
|
||
return NULL;
|
||
buf = xrealloc (buf, (size + 1) * sizeof (wchar_t));
|
||
return buf;
|
||
}
|
||
|
||
-- Function: size_t wcstombs (char *STRING, const wchar_t *WSTRING,
|
||
size_t SIZE)
|
||
Preliminary: | MT-Safe | AS-Unsafe corrupt heap lock dlopen |
|
||
AC-Unsafe corrupt lock mem fd | *Note POSIX Safety Concepts::.
|
||
|
||
The ‘wcstombs’ (“wide character string to multibyte string”)
|
||
function converts the null-terminated wide character array WSTRING
|
||
into a string containing multibyte characters, storing not more
|
||
than SIZE bytes starting at STRING, followed by a terminating null
|
||
character if there is room. The conversion of characters begins in
|
||
the initial shift state.
|
||
|
||
The terminating null character counts towards the size, so if SIZE
|
||
is less than or equal to the number of bytes needed in WSTRING, no
|
||
terminating null character is stored.
|
||
|
||
If a code that does not correspond to a valid multibyte character
|
||
is found, the ‘wcstombs’ function returns a value of -1.
|
||
Otherwise, the return value is the number of bytes stored in the
|
||
array STRING. This number does not include the terminating null
|
||
character, which is present if the number is less than SIZE.
|
||
|
||
|
||
File: libc.info, Node: Shift State, Prev: Non-reentrant String Conversion, Up: Non-reentrant Conversion
|
||
|
||
6.4.3 States in Non-reentrant Functions
|
||
---------------------------------------
|
||
|
||
In some multibyte character codes, the _meaning_ of any particular byte
|
||
sequence is not fixed; it depends on what other sequences have come
|
||
earlier in the same string. Typically there are just a few sequences
|
||
that can change the meaning of other sequences; these few are called
|
||
"shift sequences" and we say that they set the "shift state" for other
|
||
sequences that follow.
|
||
|
||
To illustrate shift state and shift sequences, suppose we decide that
|
||
the sequence ‘0200’ (just one byte) enters Japanese mode, in which pairs
|
||
of bytes in the range from ‘0240’ to ‘0377’ are single characters, while
|
||
‘0201’ enters Latin-1 mode, in which single bytes in the range from
|
||
‘0240’ to ‘0377’ are characters, and interpreted according to the ISO
|
||
Latin-1 character set. This is a multibyte code that has two
|
||
alternative shift states (“Japanese mode” and “Latin-1 mode”), and two
|
||
shift sequences that specify particular shift states.
|
||
|
||
When the multibyte character code in use has shift states, then
|
||
‘mblen’, ‘mbtowc’, and ‘wctomb’ must maintain and update the current
|
||
shift state as they scan the string. To make this work properly, you
|
||
must follow these rules:
|
||
|
||
• Before starting to scan a string, call the function with a null
|
||
pointer for the multibyte character address—for example, ‘mblen
|
||
(NULL, 0)’. This initializes the shift state to its standard
|
||
initial value.
|
||
|
||
• Scan the string one character at a time, in order. Do not “back
|
||
up” and rescan characters already scanned, and do not intersperse
|
||
the processing of different strings.
|
||
|
||
Here is an example of using ‘mblen’ following these rules:
|
||
|
||
void
|
||
scan_string (char *s)
|
||
{
|
||
int length = strlen (s);
|
||
|
||
/* Initialize shift state. */
|
||
mblen (NULL, 0);
|
||
|
||
while (1)
|
||
{
|
||
int thischar = mblen (s, length);
|
||
/* Deal with end of string and invalid characters. */
|
||
if (thischar == 0)
|
||
break;
|
||
if (thischar == -1)
|
||
{
|
||
error ("invalid multibyte character");
|
||
break;
|
||
}
|
||
/* Advance past this character. */
|
||
s += thischar;
|
||
length -= thischar;
|
||
}
|
||
}
|
||
|
||
The functions ‘mblen’, ‘mbtowc’ and ‘wctomb’ are not reentrant when
|
||
using a multibyte code that uses a shift state. However, no other
|
||
library functions call these functions, so you don’t have to worry that
|
||
the shift state will be changed mysteriously.
|
||
|
||
|
||
File: libc.info, Node: Generic Charset Conversion, Prev: Non-reentrant Conversion, Up: Character Set Handling
|
||
|
||
6.5 Generic Charset Conversion
|
||
==============================
|
||
|
||
The conversion functions mentioned so far in this chapter all had in
|
||
common that they operate on character sets that are not directly
|
||
specified by the functions. The multibyte encoding used is specified by
|
||
the currently selected locale for the ‘LC_CTYPE’ category. The wide
|
||
character set is fixed by the implementation (in the case of the GNU C
|
||
Library it is always UCS-4 encoded ISO 10646.
|
||
|
||
This has of course several problems when it comes to general
|
||
character conversion:
|
||
|
||
• For every conversion where neither the source nor the destination
|
||
character set is the character set of the locale for the ‘LC_CTYPE’
|
||
category, one has to change the ‘LC_CTYPE’ locale using
|
||
‘setlocale’.
|
||
|
||
Changing the ‘LC_CTYPE’ locale introduces major problems for the
|
||
rest of the programs since several more functions (e.g., the
|
||
character classification functions, *note Classification of
|
||
Characters::) use the ‘LC_CTYPE’ category.
|
||
|
||
• Parallel conversions to and from different character sets are not
|
||
possible since the ‘LC_CTYPE’ selection is global and shared by all
|
||
threads.
|
||
|
||
• If neither the source nor the destination character set is the
|
||
character set used for ‘wchar_t’ representation, there is at least
|
||
a two-step process necessary to convert a text using the functions
|
||
above. One would have to select the source character set as the
|
||
multibyte encoding, convert the text into a ‘wchar_t’ text, select
|
||
the destination character set as the multibyte encoding, and
|
||
convert the wide character text to the multibyte (= destination)
|
||
character set.
|
||
|
||
Even if this is possible (which is not guaranteed) it is a very
|
||
tiring work. Plus it suffers from the other two raised points even
|
||
more due to the steady changing of the locale.
|
||
|
||
The XPG2 standard defines a completely new set of functions, which
|
||
has none of these limitations. They are not at all coupled to the
|
||
selected locales, and they have no constraints on the character sets
|
||
selected for source and destination. Only the set of available
|
||
conversions limits them. The standard does not specify that any
|
||
conversion at all must be available. Such availability is a measure of
|
||
the quality of the implementation.
|
||
|
||
In the following text first the interface to ‘iconv’ and then the
|
||
conversion function, will be described. Comparisons with other
|
||
implementations will show what obstacles stand in the way of portable
|
||
applications. Finally, the implementation is described in so far as
|
||
might interest the advanced user who wants to extend conversion
|
||
capabilities.
|
||
|
||
* Menu:
|
||
|
||
* Generic Conversion Interface:: Generic Character Set Conversion Interface.
|
||
* iconv Examples:: A complete ‘iconv’ example.
|
||
* Other iconv Implementations:: Some Details about other ‘iconv’
|
||
Implementations.
|
||
* glibc iconv Implementation:: The ‘iconv’ Implementation in the GNU C
|
||
library.
|
||
|
||
|
||
File: libc.info, Node: Generic Conversion Interface, Next: iconv Examples, Up: Generic Charset Conversion
|
||
|
||
6.5.1 Generic Character Set Conversion Interface
|
||
------------------------------------------------
|
||
|
||
This set of functions follows the traditional cycle of using a resource:
|
||
open–use–close. The interface consists of three functions, each of
|
||
which implements one step.
|
||
|
||
Before the interfaces are described it is necessary to introduce a
|
||
data type. Just like other open–use–close interfaces the functions
|
||
introduced here work using handles and the ‘iconv.h’ header defines a
|
||
special type for the handles used.
|
||
|
||
-- Data Type: iconv_t
|
||
This data type is an abstract type defined in ‘iconv.h’. The user
|
||
must not assume anything about the definition of this type; it must
|
||
be completely opaque.
|
||
|
||
Objects of this type can get assigned handles for the conversions
|
||
using the ‘iconv’ functions. The objects themselves need not be
|
||
freed, but the conversions for which the handles stand for have to.
|
||
|
||
The first step is the function to create a handle.
|
||
|
||
-- Function: iconv_t iconv_open (const char *TOCODE, const char
|
||
*FROMCODE)
|
||
Preliminary: | MT-Safe locale | AS-Unsafe corrupt heap lock dlopen
|
||
| AC-Unsafe corrupt lock mem fd | *Note POSIX Safety Concepts::.
|
||
|
||
The ‘iconv_open’ function has to be used before starting a
|
||
conversion. The two parameters this function takes determine the
|
||
source and destination character set for the conversion, and if the
|
||
implementation has the possibility to perform such a conversion,
|
||
the function returns a handle.
|
||
|
||
If the wanted conversion is not available, the ‘iconv_open’
|
||
function returns ‘(iconv_t) -1’. In this case the global variable
|
||
‘errno’ can have the following values:
|
||
|
||
‘EMFILE’
|
||
The process already has ‘OPEN_MAX’ file descriptors open.
|
||
‘ENFILE’
|
||
The system limit of open file is reached.
|
||
‘ENOMEM’
|
||
Not enough memory to carry out the operation.
|
||
‘EINVAL’
|
||
The conversion from FROMCODE to TOCODE is not supported.
|
||
|
||
It is not possible to use the same descriptor in different threads
|
||
to perform independent conversions. The data structures associated
|
||
with the descriptor include information about the conversion state.
|
||
This must not be messed up by using it in different conversions.
|
||
|
||
An ‘iconv’ descriptor is like a file descriptor as for every use a
|
||
new descriptor must be created. The descriptor does not stand for
|
||
all of the conversions from FROMSET to TOSET.
|
||
|
||
The GNU C Library implementation of ‘iconv_open’ has one
|
||
significant extension to other implementations. To ease the
|
||
extension of the set of available conversions, the implementation
|
||
allows storing the necessary files with data and code in an
|
||
arbitrary number of directories. How this extension must be
|
||
written will be explained below (*note glibc iconv
|
||
Implementation::). Here it is only important to say that all
|
||
directories mentioned in the ‘GCONV_PATH’ environment variable are
|
||
considered only if they contain a file ‘gconv-modules’. These
|
||
directories need not necessarily be created by the system
|
||
administrator. In fact, this extension is introduced to help users
|
||
writing and using their own, new conversions. Of course, this does
|
||
not work for security reasons in SUID binaries; in this case only
|
||
the system directory is considered and this normally is
|
||
‘PREFIX/lib/gconv’. The ‘GCONV_PATH’ environment variable is
|
||
examined exactly once at the first call of the ‘iconv_open’
|
||
function. Later modifications of the variable have no effect.
|
||
|
||
The ‘iconv_open’ function was introduced early in the X/Open
|
||
Portability Guide, version 2. It is supported by all commercial
|
||
Unices as it is required for the Unix branding. However, the
|
||
quality and completeness of the implementation varies widely. The
|
||
‘iconv_open’ function is declared in ‘iconv.h’.
|
||
|
||
The ‘iconv’ implementation can associate large data structure with
|
||
the handle returned by ‘iconv_open’. Therefore, it is crucial to free
|
||
all the resources once all conversions are carried out and the
|
||
conversion is not needed anymore.
|
||
|
||
-- Function: int iconv_close (iconv_t CD)
|
||
Preliminary: | MT-Safe | AS-Unsafe corrupt heap lock dlopen |
|
||
AC-Unsafe corrupt lock mem | *Note POSIX Safety Concepts::.
|
||
|
||
The ‘iconv_close’ function frees all resources associated with the
|
||
handle CD, which must have been returned by a successful call to
|
||
the ‘iconv_open’ function.
|
||
|
||
If the function call was successful the return value is 0.
|
||
Otherwise it is -1 and ‘errno’ is set appropriately. Defined error
|
||
are:
|
||
|
||
‘EBADF’
|
||
The conversion descriptor is invalid.
|
||
|
||
The ‘iconv_close’ function was introduced together with the rest of
|
||
the ‘iconv’ functions in XPG2 and is declared in ‘iconv.h’.
|
||
|
||
The standard defines only one actual conversion function. This has,
|
||
therefore, the most general interface: it allows conversion from one
|
||
buffer to another. Conversion from a file to a buffer, vice versa, or
|
||
even file to file can be implemented on top of it.
|
||
|
||
-- Function: size_t iconv (iconv_t CD, char **INBUF, size_t
|
||
*INBYTESLEFT, char **OUTBUF, size_t *OUTBYTESLEFT)
|
||
Preliminary: | MT-Safe race:cd | AS-Safe | AC-Unsafe corrupt |
|
||
*Note POSIX Safety Concepts::.
|
||
|
||
The ‘iconv’ function converts the text in the input buffer
|
||
according to the rules associated with the descriptor CD and stores
|
||
the result in the output buffer. It is possible to call the
|
||
function for the same text several times in a row since for
|
||
stateful character sets the necessary state information is kept in
|
||
the data structures associated with the descriptor.
|
||
|
||
The input buffer is specified by ‘*INBUF’ and it contains
|
||
‘*INBYTESLEFT’ bytes. The extra indirection is necessary for
|
||
communicating the used input back to the caller (see below). It is
|
||
important to note that the buffer pointer is of type ‘char’ and the
|
||
length is measured in bytes even if the input text is encoded in
|
||
wide characters.
|
||
|
||
The output buffer is specified in a similar way. ‘*OUTBUF’ points
|
||
to the beginning of the buffer with at least ‘*OUTBYTESLEFT’ bytes
|
||
room for the result. The buffer pointer again is of type ‘char’
|
||
and the length is measured in bytes. If OUTBUF or ‘*OUTBUF’ is a
|
||
null pointer, the conversion is performed but no output is
|
||
available.
|
||
|
||
If INBUF is a null pointer, the ‘iconv’ function performs the
|
||
necessary action to put the state of the conversion into the
|
||
initial state. This is obviously a no-op for non-stateful
|
||
encodings, but if the encoding has a state, such a function call
|
||
might put some byte sequences in the output buffer, which perform
|
||
the necessary state changes. The next call with INBUF not being a
|
||
null pointer then simply goes on from the initial state. It is
|
||
important that the programmer never makes any assumption as to
|
||
whether the conversion has to deal with states. Even if the input
|
||
and output character sets are not stateful, the implementation
|
||
might still have to keep states. This is due to the implementation
|
||
chosen for the GNU C Library as it is described below. Therefore
|
||
an ‘iconv’ call to reset the state should always be performed if
|
||
some protocol requires this for the output text.
|
||
|
||
The conversion stops for one of three reasons. The first is that
|
||
all characters from the input buffer are converted. This actually
|
||
can mean two things: either all bytes from the input buffer are
|
||
consumed or there are some bytes at the end of the buffer that
|
||
possibly can form a complete character but the input is incomplete.
|
||
The second reason for a stop is that the output buffer is full.
|
||
And the third reason is that the input contains invalid characters.
|
||
|
||
In all of these cases the buffer pointers after the last successful
|
||
conversion, for input and output buffer, are stored in INBUF and
|
||
OUTBUF, and the available room in each buffer is stored in
|
||
INBYTESLEFT and OUTBYTESLEFT.
|
||
|
||
Since the character sets selected in the ‘iconv_open’ call can be
|
||
almost arbitrary, there can be situations where the input buffer
|
||
contains valid characters, which have no identical representation
|
||
in the output character set. The behavior in this situation is
|
||
undefined. The _current_ behavior of the GNU C Library in this
|
||
situation is to return with an error immediately. This certainly
|
||
is not the most desirable solution; therefore, future versions will
|
||
provide better ones, but they are not yet finished.
|
||
|
||
If all input from the input buffer is successfully converted and
|
||
stored in the output buffer, the function returns the number of
|
||
non-reversible conversions performed. In all other cases the
|
||
return value is ‘(size_t) -1’ and ‘errno’ is set appropriately. In
|
||
such cases the value pointed to by INBYTESLEFT is nonzero.
|
||
|
||
‘EILSEQ’
|
||
The conversion stopped because of an invalid byte sequence in
|
||
the input. After the call, ‘*INBUF’ points at the first byte
|
||
of the invalid byte sequence.
|
||
|
||
‘E2BIG’
|
||
The conversion stopped because it ran out of space in the
|
||
output buffer.
|
||
|
||
‘EINVAL’
|
||
The conversion stopped because of an incomplete byte sequence
|
||
at the end of the input buffer.
|
||
|
||
‘EBADF’
|
||
The CD argument is invalid.
|
||
|
||
The ‘iconv’ function was introduced in the XPG2 standard and is
|
||
declared in the ‘iconv.h’ header.
|
||
|
||
The definition of the ‘iconv’ function is quite good overall. It
|
||
provides quite flexible functionality. The only problems lie in the
|
||
boundary cases, which are incomplete byte sequences at the end of the
|
||
input buffer and invalid input. A third problem, which is not really a
|
||
design problem, is the way conversions are selected. The standard does
|
||
not say anything about the legitimate names, a minimal set of available
|
||
conversions. We will see how this negatively impacts other
|
||
implementations, as demonstrated below.
|
||
|
||
|
||
File: libc.info, Node: iconv Examples, Next: Other iconv Implementations, Prev: Generic Conversion Interface, Up: Generic Charset Conversion
|
||
|
||
6.5.2 A complete ‘iconv’ example
|
||
--------------------------------
|
||
|
||
The example below features a solution for a common problem. Given that
|
||
one knows the internal encoding used by the system for ‘wchar_t’
|
||
strings, one often is in the position to read text from a file and store
|
||
it in wide character buffers. One can do this using ‘mbsrtowcs’, but
|
||
then we run into the problems discussed above.
|
||
|
||
int
|
||
file2wcs (int fd, const char *charset, wchar_t *outbuf, size_t avail)
|
||
{
|
||
char inbuf[BUFSIZ];
|
||
size_t insize = 0;
|
||
char *wrptr = (char *) outbuf;
|
||
int result = 0;
|
||
iconv_t cd;
|
||
|
||
cd = iconv_open ("WCHAR_T", charset);
|
||
if (cd == (iconv_t) -1)
|
||
{
|
||
/* Something went wrong. */
|
||
if (errno == EINVAL)
|
||
error (0, 0, "conversion from '%s' to wchar_t not available",
|
||
charset);
|
||
else
|
||
perror ("iconv_open");
|
||
|
||
/* Terminate the output string. */
|
||
*outbuf = L'\0';
|
||
|
||
return -1;
|
||
}
|
||
|
||
while (avail > 0)
|
||
{
|
||
size_t nread;
|
||
size_t nconv;
|
||
char *inptr = inbuf;
|
||
|
||
/* Read more input. */
|
||
nread = read (fd, inbuf + insize, sizeof (inbuf) - insize);
|
||
if (nread == 0)
|
||
{
|
||
/* When we come here the file is completely read.
|
||
This still could mean there are some unused
|
||
characters in the ‘inbuf’. Put them back. */
|
||
if (lseek (fd, -insize, SEEK_CUR) == -1)
|
||
result = -1;
|
||
|
||
/* Now write out the byte sequence to get into the
|
||
initial state if this is necessary. */
|
||
iconv (cd, NULL, NULL, &wrptr, &avail);
|
||
|
||
break;
|
||
}
|
||
insize += nread;
|
||
|
||
/* Do the conversion. */
|
||
nconv = iconv (cd, &inptr, &insize, &wrptr, &avail);
|
||
if (nconv == (size_t) -1)
|
||
{
|
||
/* Not everything went right. It might only be
|
||
an unfinished byte sequence at the end of the
|
||
buffer. Or it is a real problem. */
|
||
if (errno == EINVAL)
|
||
/* This is harmless. Simply move the unused
|
||
bytes to the beginning of the buffer so that
|
||
they can be used in the next round. */
|
||
memmove (inbuf, inptr, insize);
|
||
else
|
||
{
|
||
/* It is a real problem. Maybe we ran out of
|
||
space in the output buffer or we have invalid
|
||
input. In any case back the file pointer to
|
||
the position of the last processed byte. */
|
||
lseek (fd, -insize, SEEK_CUR);
|
||
result = -1;
|
||
break;
|
||
}
|
||
}
|
||
}
|
||
|
||
/* Terminate the output string. */
|
||
if (avail >= sizeof (wchar_t))
|
||
*((wchar_t *) wrptr) = L'\0';
|
||
|
||
if (iconv_close (cd) != 0)
|
||
perror ("iconv_close");
|
||
|
||
return (wchar_t *) wrptr - outbuf;
|
||
}
|
||
|
||
This example shows the most important aspects of using the ‘iconv’
|
||
functions. It shows how successive calls to ‘iconv’ can be used to
|
||
convert large amounts of text. The user does not have to care about
|
||
stateful encodings as the functions take care of everything.
|
||
|
||
An interesting point is the case where ‘iconv’ returns an error and
|
||
‘errno’ is set to ‘EINVAL’. This is not really an error in the
|
||
transformation. It can happen whenever the input character set contains
|
||
byte sequences of more than one byte for some character and texts are
|
||
not processed in one piece. In this case there is a chance that a
|
||
multibyte sequence is cut. The caller can then simply read the
|
||
remainder of the takes and feed the offending bytes together with new
|
||
character from the input to ‘iconv’ and continue the work. The internal
|
||
state kept in the descriptor is _not_ unspecified after such an event as
|
||
is the case with the conversion functions from the ISO C standard.
|
||
|
||
The example also shows the problem of using wide character strings
|
||
with ‘iconv’. As explained in the description of the ‘iconv’ function
|
||
above, the function always takes a pointer to a ‘char’ array and the
|
||
available space is measured in bytes. In the example, the output buffer
|
||
is a wide character buffer; therefore, we use a local variable WRPTR of
|
||
type ‘char *’, which is used in the ‘iconv’ calls.
|
||
|
||
This looks rather innocent but can lead to problems on platforms that
|
||
have tight restriction on alignment. Therefore the caller of ‘iconv’
|
||
has to make sure that the pointers passed are suitable for access of
|
||
characters from the appropriate character set. Since, in the above
|
||
case, the input parameter to the function is a ‘wchar_t’ pointer, this
|
||
is the case (unless the user violates alignment when computing the
|
||
parameter). But in other situations, especially when writing generic
|
||
functions where one does not know what type of character set one uses
|
||
and, therefore, treats text as a sequence of bytes, it might become
|
||
tricky.
|
||
|
||
|
||
File: libc.info, Node: Other iconv Implementations, Next: glibc iconv Implementation, Prev: iconv Examples, Up: Generic Charset Conversion
|
||
|
||
6.5.3 Some Details about other ‘iconv’ Implementations
|
||
------------------------------------------------------
|
||
|
||
This is not really the place to discuss the ‘iconv’ implementation of
|
||
other systems but it is necessary to know a bit about them to write
|
||
portable programs. The above mentioned problems with the specification
|
||
of the ‘iconv’ functions can lead to portability issues.
|
||
|
||
The first thing to notice is that, due to the large number of
|
||
character sets in use, it is certainly not practical to encode the
|
||
conversions directly in the C library. Therefore, the conversion
|
||
information must come from files outside the C library. This is usually
|
||
done in one or both of the following ways:
|
||
|
||
• The C library contains a set of generic conversion functions that
|
||
can read the needed conversion tables and other information from
|
||
data files. These files get loaded when necessary.
|
||
|
||
This solution is problematic as it requires a great deal of effort
|
||
to apply to all character sets (potentially an infinite set). The
|
||
differences in the structure of the different character sets is so
|
||
large that many different variants of the table-processing
|
||
functions must be developed. In addition, the generic nature of
|
||
these functions make them slower than specifically implemented
|
||
functions.
|
||
|
||
• The C library only contains a framework that can dynamically load
|
||
object files and execute the conversion functions contained
|
||
therein.
|
||
|
||
This solution provides much more flexibility. The C library itself
|
||
contains only very little code and therefore reduces the general
|
||
memory footprint. Also, with a documented interface between the C
|
||
library and the loadable modules it is possible for third parties
|
||
to extend the set of available conversion modules. A drawback of
|
||
this solution is that dynamic loading must be available.
|
||
|
||
Some implementations in commercial Unices implement a mixture of
|
||
these possibilities; the majority implement only the second solution.
|
||
Using loadable modules moves the code out of the library itself and
|
||
keeps the door open for extensions and improvements, but this design is
|
||
also limiting on some platforms since not many platforms support dynamic
|
||
loading in statically linked programs. On platforms without this
|
||
capability it is therefore not possible to use this interface in
|
||
statically linked programs. The GNU C Library has, on ELF platforms, no
|
||
problems with dynamic loading in these situations; therefore, this point
|
||
is moot. The danger is that one gets acquainted with this situation and
|
||
forgets about the restrictions on other systems.
|
||
|
||
A second thing to know about other ‘iconv’ implementations is that
|
||
the number of available conversions is often very limited. Some
|
||
implementations provide, in the standard release (not special
|
||
international or developer releases), at most 100 to 200 conversion
|
||
possibilities. This does not mean 200 different character sets are
|
||
supported; for example, conversions from one character set to a set of
|
||
10 others might count as 10 conversions. Together with the other
|
||
direction this makes 20 conversion possibilities used up by one
|
||
character set. One can imagine the thin coverage these platform
|
||
provide. Some Unix vendors even provide only a handful of conversions,
|
||
which renders them useless for almost all uses.
|
||
|
||
This directly leads to a third and probably the most problematic
|
||
point. The way the ‘iconv’ conversion functions are implemented on all
|
||
known Unix systems and the availability of the conversion functions from
|
||
character set A to B and the conversion from B to C does _not_ imply
|
||
that the conversion from A to C is available.
|
||
|
||
This might not seem unreasonable and problematic at first, but it is
|
||
a quite big problem as one will notice shortly after hitting it. To
|
||
show the problem we assume to write a program that has to convert from A
|
||
to C. A call like
|
||
|
||
cd = iconv_open ("C", "A");
|
||
|
||
fails according to the assumption above. But what does the program do
|
||
now? The conversion is necessary; therefore, simply giving up is not an
|
||
option.
|
||
|
||
This is a nuisance. The ‘iconv’ function should take care of this.
|
||
But how should the program proceed from here on? If it tries to convert
|
||
to character set B, first the two ‘iconv_open’ calls
|
||
|
||
cd1 = iconv_open ("B", "A");
|
||
|
||
and
|
||
|
||
cd2 = iconv_open ("C", "B");
|
||
|
||
will succeed, but how to find B?
|
||
|
||
Unfortunately, the answer is: there is no general solution. On some
|
||
systems guessing might help. On those systems most character sets can
|
||
convert to and from UTF-8 encoded ISO 10646 or Unicode text. Beside
|
||
this only some very system-specific methods can help. Since the
|
||
conversion functions come from loadable modules and these modules must
|
||
be stored somewhere in the filesystem, one _could_ try to find them and
|
||
determine from the available file which conversions are available and
|
||
whether there is an indirect route from A to C.
|
||
|
||
This example shows one of the design errors of ‘iconv’ mentioned
|
||
above. It should at least be possible to determine the list of
|
||
available conversion programmatically so that if ‘iconv_open’ says there
|
||
is no such conversion, one could make sure this also is true for
|
||
indirect routes.
|
||
|
||
|
||
File: libc.info, Node: glibc iconv Implementation, Prev: Other iconv Implementations, Up: Generic Charset Conversion
|
||
|
||
6.5.4 The ‘iconv’ Implementation in the GNU C Library
|
||
-----------------------------------------------------
|
||
|
||
After reading about the problems of ‘iconv’ implementations in the last
|
||
section it is certainly good to note that the implementation in the GNU
|
||
C Library has none of the problems mentioned above. What follows is a
|
||
step-by-step analysis of the points raised above. The evaluation is
|
||
based on the current state of the development (as of January 1999). The
|
||
development of the ‘iconv’ functions is not complete, but basic
|
||
functionality has solidified.
|
||
|
||
The GNU C Library’s ‘iconv’ implementation uses shared loadable
|
||
modules to implement the conversions. A very small number of
|
||
conversions are built into the library itself but these are only rather
|
||
trivial conversions.
|
||
|
||
All the benefits of loadable modules are available in the GNU C
|
||
Library implementation. This is especially appealing since the
|
||
interface is well documented (see below), and it, therefore, is easy to
|
||
write new conversion modules. The drawback of using loadable objects is
|
||
not a problem in the GNU C Library, at least on ELF systems. Since the
|
||
library is able to load shared objects even in statically linked
|
||
binaries, static linking need not be forbidden in case one wants to use
|
||
‘iconv’.
|
||
|
||
The second mentioned problem is the number of supported conversions.
|
||
Currently, the GNU C Library supports more than 150 character sets. The
|
||
way the implementation is designed the number of supported conversions
|
||
is greater than 22350 (150 times 149). If any conversion from or to a
|
||
character set is missing, it can be added easily.
|
||
|
||
Particularly impressive as it may be, this high number is due to the
|
||
fact that the GNU C Library implementation of ‘iconv’ does not have the
|
||
third problem mentioned above (i.e., whenever there is a conversion from
|
||
a character set A to B and from B to C it is always possible to convert
|
||
from A to C directly). If the ‘iconv_open’ returns an error and sets
|
||
‘errno’ to ‘EINVAL’, there is no known way, directly or indirectly, to
|
||
perform the wanted conversion.
|
||
|
||
Triangulation is achieved by providing for each character set a
|
||
conversion from and to UCS-4 encoded ISO 10646. Using ISO 10646 as an
|
||
intermediate representation it is possible to "triangulate" (i.e.,
|
||
convert with an intermediate representation).
|
||
|
||
There is no inherent requirement to provide a conversion to ISO 10646
|
||
for a new character set, and it is also possible to provide other
|
||
conversions where neither source nor destination character set is
|
||
ISO 10646. The existing set of conversions is simply meant to cover all
|
||
conversions that might be of interest.
|
||
|
||
All currently available conversions use the triangulation method
|
||
above, making conversion run unnecessarily slow. If, for example,
|
||
somebody often needs the conversion from ISO-2022-JP to EUC-JP, a
|
||
quicker solution would involve direct conversion between the two
|
||
character sets, skipping the input to ISO 10646 first. The two
|
||
character sets of interest are much more similar to each other than to
|
||
ISO 10646.
|
||
|
||
In such a situation one easily can write a new conversion and provide
|
||
it as a better alternative. The GNU C Library ‘iconv’ implementation
|
||
would automatically use the module implementing the conversion if it is
|
||
specified to be more efficient.
|
||
|
||
6.5.4.1 Format of ‘gconv-modules’ files
|
||
.......................................
|
||
|
||
All information about the available conversions comes from a file named
|
||
‘gconv-modules’, which can be found in any of the directories along the
|
||
‘GCONV_PATH’. The ‘gconv-modules’ files are line-oriented text files,
|
||
where each of the lines has one of the following formats:
|
||
|
||
• If the first non-whitespace character is a ‘#’ the line contains
|
||
only comments and is ignored.
|
||
|
||
• Lines starting with ‘alias’ define an alias name for a character
|
||
set. Two more words are expected on the line. The first word
|
||
defines the alias name, and the second defines the original name of
|
||
the character set. The effect is that it is possible to use the
|
||
alias name in the FROMSET or TOSET parameters of ‘iconv_open’ and
|
||
achieve the same result as when using the real character set name.
|
||
|
||
This is quite important as a character set has often many different
|
||
names. There is normally an official name but this need not
|
||
correspond to the most popular name. Beside this many character
|
||
sets have special names that are somehow constructed. For example,
|
||
all character sets specified by the ISO have an alias of the form
|
||
‘ISO-IR-NNN’ where NNN is the registration number. This allows
|
||
programs that know about the registration number to construct
|
||
character set names and use them in ‘iconv_open’ calls. More on
|
||
the available names and aliases follows below.
|
||
|
||
• Lines starting with ‘module’ introduce an available conversion
|
||
module. These lines must contain three or four more words.
|
||
|
||
The first word specifies the source character set, the second word
|
||
the destination character set of conversion implemented in this
|
||
module, and the third word is the name of the loadable module. The
|
||
filename is constructed by appending the usual shared object suffix
|
||
(normally ‘.so’) and this file is then supposed to be found in the
|
||
same directory the ‘gconv-modules’ file is in. The last word on
|
||
the line, which is optional, is a numeric value representing the
|
||
cost of the conversion. If this word is missing, a cost of 1 is
|
||
assumed. The numeric value itself does not matter that much; what
|
||
counts are the relative values of the sums of costs for all
|
||
possible conversion paths. Below is a more precise description of
|
||
the use of the cost value.
|
||
|
||
Returning to the example above where one has written a module to
|
||
directly convert from ISO-2022-JP to EUC-JP and back. All that has to
|
||
be done is to put the new module, let its name be ISO2022JP-EUCJP.so, in
|
||
a directory and add a file ‘gconv-modules’ with the following content in
|
||
the same directory:
|
||
|
||
module ISO-2022-JP// EUC-JP// ISO2022JP-EUCJP 1
|
||
module EUC-JP// ISO-2022-JP// ISO2022JP-EUCJP 1
|
||
|
||
To see why this is sufficient, it is necessary to understand how the
|
||
conversion used by ‘iconv’ (and described in the descriptor) is
|
||
selected. The approach to this problem is quite simple.
|
||
|
||
At the first call of the ‘iconv_open’ function the program reads all
|
||
available ‘gconv-modules’ files and builds up two tables: one containing
|
||
all the known aliases and another that contains the information about
|
||
the conversions and which shared object implements them.
|
||
|
||
6.5.4.2 Finding the conversion path in ‘iconv’
|
||
..............................................
|
||
|
||
The set of available conversions form a directed graph with weighted
|
||
edges. The weights on the edges are the costs specified in the
|
||
‘gconv-modules’ files. The ‘iconv_open’ function uses an algorithm
|
||
suitable for search for the best path in such a graph and so constructs
|
||
a list of conversions that must be performed in succession to get the
|
||
transformation from the source to the destination character set.
|
||
|
||
Explaining why the above ‘gconv-modules’ files allows the ‘iconv’
|
||
implementation to resolve the specific ISO-2022-JP to EUC-JP conversion
|
||
module instead of the conversion coming with the library itself is
|
||
straightforward. Since the latter conversion takes two steps (from
|
||
ISO-2022-JP to ISO 10646 and then from ISO 10646 to EUC-JP), the cost is
|
||
1+1 = 2. The above ‘gconv-modules’ file, however, specifies that the
|
||
new conversion modules can perform this conversion with only the cost of
|
||
1.
|
||
|
||
A mysterious item about the ‘gconv-modules’ file above (and also the
|
||
file coming with the GNU C Library) are the names of the character sets
|
||
specified in the ‘module’ lines. Why do almost all the names end in
|
||
‘//’? And this is not all: the names can actually be regular
|
||
expressions. At this point in time this mystery should not be revealed,
|
||
unless you have the relevant spell-casting materials: ashes from an
|
||
original DOS 6.2 boot disk burnt in effigy, a crucifix blessed by St.
|
||
Emacs, assorted herbal roots from Central America, sand from Cebu, etc.
|
||
Sorry! *The part of the implementation where this is used is not yet
|
||
finished. For now please simply follow the existing examples. It’ll
|
||
become clearer once it is. –drepper*
|
||
|
||
A last remark about the ‘gconv-modules’ is about the names not ending
|
||
with ‘//’. A character set named ‘INTERNAL’ is often mentioned. From
|
||
the discussion above and the chosen name it should have become clear
|
||
that this is the name for the representation used in the intermediate
|
||
step of the triangulation. We have said that this is UCS-4 but actually
|
||
that is not quite right. The UCS-4 specification also includes the
|
||
specification of the byte ordering used. Since a UCS-4 value consists
|
||
of four bytes, a stored value is affected by byte ordering. The
|
||
internal representation is _not_ the same as UCS-4 in case the byte
|
||
ordering of the processor (or at least the running process) is not the
|
||
same as the one required for UCS-4. This is done for performance
|
||
reasons as one does not want to perform unnecessary byte-swapping
|
||
operations if one is not interested in actually seeing the result in
|
||
UCS-4. To avoid trouble with endianness, the internal representation
|
||
consistently is named ‘INTERNAL’ even on big-endian systems where the
|
||
representations are identical.
|
||
|
||
6.5.4.3 ‘iconv’ module data structures
|
||
......................................
|
||
|
||
So far this section has described how modules are located and considered
|
||
to be used. What remains to be described is the interface of the
|
||
modules so that one can write new ones. This section describes the
|
||
interface as it is in use in January 1999. The interface will change a
|
||
bit in the future but, with luck, only in an upwardly compatible way.
|
||
|
||
The definitions necessary to write new modules are publicly available
|
||
in the non-standard header ‘gconv.h’. The following text, therefore,
|
||
describes the definitions from this header file. First, however, it is
|
||
necessary to get an overview.
|
||
|
||
From the perspective of the user of ‘iconv’ the interface is quite
|
||
simple: the ‘iconv_open’ function returns a handle that can be used in
|
||
calls to ‘iconv’, and finally the handle is freed with a call to
|
||
‘iconv_close’. The problem is that the handle has to be able to
|
||
represent the possibly long sequences of conversion steps and also the
|
||
state of each conversion since the handle is all that is passed to the
|
||
‘iconv’ function. Therefore, the data structures are really the
|
||
elements necessary to understanding the implementation.
|
||
|
||
We need two different kinds of data structures. The first describes
|
||
the conversion and the second describes the state etc. There are really
|
||
two type definitions like this in ‘gconv.h’.
|
||
|
||
-- Data type: struct __gconv_step
|
||
This data structure describes one conversion a module can perform.
|
||
For each function in a loaded module with conversion functions
|
||
there is exactly one object of this type. This object is shared by
|
||
all users of the conversion (i.e., this object does not contain any
|
||
information corresponding to an actual conversion; it only
|
||
describes the conversion itself).
|
||
|
||
‘struct __gconv_loaded_object *__shlib_handle’
|
||
‘const char *__modname’
|
||
‘int __counter’
|
||
All these elements of the structure are used internally in the
|
||
C library to coordinate loading and unloading the shared. One
|
||
must not expect any of the other elements to be available or
|
||
initialized.
|
||
|
||
‘const char *__from_name’
|
||
‘const char *__to_name’
|
||
‘__from_name’ and ‘__to_name’ contain the names of the source
|
||
and destination character sets. They can be used to identify
|
||
the actual conversion to be carried out since one module might
|
||
implement conversions for more than one character set and/or
|
||
direction.
|
||
|
||
‘gconv_fct __fct’
|
||
‘gconv_init_fct __init_fct’
|
||
‘gconv_end_fct __end_fct’
|
||
These elements contain pointers to the functions in the
|
||
loadable module. The interface will be explained below.
|
||
|
||
‘int __min_needed_from’
|
||
‘int __max_needed_from’
|
||
‘int __min_needed_to’
|
||
‘int __max_needed_to;’
|
||
These values have to be supplied in the init function of the
|
||
module. The ‘__min_needed_from’ value specifies how many
|
||
bytes a character of the source character set at least needs.
|
||
The ‘__max_needed_from’ specifies the maximum value that also
|
||
includes possible shift sequences.
|
||
|
||
The ‘__min_needed_to’ and ‘__max_needed_to’ values serve the
|
||
same purpose as ‘__min_needed_from’ and ‘__max_needed_from’
|
||
but this time for the destination character set.
|
||
|
||
It is crucial that these values be accurate since otherwise
|
||
the conversion functions will have problems or not work at
|
||
all.
|
||
|
||
‘int __stateful’
|
||
This element must also be initialized by the init function.
|
||
‘int __stateful’ is nonzero if the source character set is
|
||
stateful. Otherwise it is zero.
|
||
|
||
‘void *__data’
|
||
This element can be used freely by the conversion functions in
|
||
the module. ‘void *__data’ can be used to communicate extra
|
||
information from one call to another. ‘void *__data’ need not
|
||
be initialized if not needed at all. If ‘void *__data’
|
||
element is assigned a pointer to dynamically allocated memory
|
||
(presumably in the init function) it has to be made sure that
|
||
the end function deallocates the memory. Otherwise the
|
||
application will leak memory.
|
||
|
||
It is important to be aware that this data structure is shared
|
||
by all users of this specification conversion and therefore
|
||
the ‘__data’ element must not contain data specific to one
|
||
specific use of the conversion function.
|
||
|
||
-- Data type: struct __gconv_step_data
|
||
This is the data structure that contains the information specific
|
||
to each use of the conversion functions.
|
||
|
||
‘char *__outbuf’
|
||
‘char *__outbufend’
|
||
These elements specify the output buffer for the conversion
|
||
step. The ‘__outbuf’ element points to the beginning of the
|
||
buffer, and ‘__outbufend’ points to the byte following the
|
||
last byte in the buffer. The conversion function must not
|
||
assume anything about the size of the buffer but it can be
|
||
safely assumed the there is room for at least one complete
|
||
character in the output buffer.
|
||
|
||
Once the conversion is finished, if the conversion is the last
|
||
step, the ‘__outbuf’ element must be modified to point after
|
||
the last byte written into the buffer to signal how much
|
||
output is available. If this conversion step is not the last
|
||
one, the element must not be modified. The ‘__outbufend’
|
||
element must not be modified.
|
||
|
||
‘int __is_last’
|
||
This element is nonzero if this conversion step is the last
|
||
one. This information is necessary for the recursion. See
|
||
the description of the conversion function internals below.
|
||
This element must never be modified.
|
||
|
||
‘int __invocation_counter’
|
||
The conversion function can use this element to see how many
|
||
calls of the conversion function already happened. Some
|
||
character sets require a certain prolog when generating
|
||
output, and by comparing this value with zero, one can find
|
||
out whether it is the first call and whether, therefore, the
|
||
prolog should be emitted. This element must never be
|
||
modified.
|
||
|
||
‘int __internal_use’
|
||
This element is another one rarely used but needed in certain
|
||
situations. It is assigned a nonzero value in case the
|
||
conversion functions are used to implement ‘mbsrtowcs’ et.al.
|
||
(i.e., the function is not used directly through the ‘iconv’
|
||
interface).
|
||
|
||
This sometimes makes a difference as it is expected that the
|
||
‘iconv’ functions are used to translate entire texts while the
|
||
‘mbsrtowcs’ functions are normally used only to convert single
|
||
strings and might be used multiple times to convert entire
|
||
texts.
|
||
|
||
But in this situation we would have problem complying with
|
||
some rules of the character set specification. Some character
|
||
sets require a prolog, which must appear exactly once for an
|
||
entire text. If a number of ‘mbsrtowcs’ calls are used to
|
||
convert the text, only the first call must add the prolog.
|
||
However, because there is no communication between the
|
||
different calls of ‘mbsrtowcs’, the conversion functions have
|
||
no possibility to find this out. The situation is different
|
||
for sequences of ‘iconv’ calls since the handle allows access
|
||
to the needed information.
|
||
|
||
The ‘int __internal_use’ element is mostly used together with
|
||
‘__invocation_counter’ as follows:
|
||
|
||
if (!data->__internal_use
|
||
&& data->__invocation_counter == 0)
|
||
/* Emit prolog. */
|
||
…
|
||
|
||
This element must never be modified.
|
||
|
||
‘mbstate_t *__statep’
|
||
The ‘__statep’ element points to an object of type ‘mbstate_t’
|
||
(*note Keeping the state::). The conversion of a stateful
|
||
character set must use the object pointed to by ‘__statep’ to
|
||
store information about the conversion state. The ‘__statep’
|
||
element itself must never be modified.
|
||
|
||
‘mbstate_t __state’
|
||
This element must _never_ be used directly. It is only part
|
||
of this structure to have the needed space allocated.
|
||
|
||
6.5.4.4 ‘iconv’ module interfaces
|
||
.................................
|
||
|
||
With the knowledge about the data structures we now can describe the
|
||
conversion function itself. To understand the interface a bit of
|
||
knowledge is necessary about the functionality in the C library that
|
||
loads the objects with the conversions.
|
||
|
||
It is often the case that one conversion is used more than once
|
||
(i.e., there are several ‘iconv_open’ calls for the same set of
|
||
character sets during one program run). The ‘mbsrtowcs’ et.al.
|
||
functions in the GNU C Library also use the ‘iconv’ functionality, which
|
||
increases the number of uses of the same functions even more.
|
||
|
||
Because of this multiple use of conversions, the modules do not get
|
||
loaded exclusively for one conversion. Instead a module once loaded can
|
||
be used by an arbitrary number of ‘iconv’ or ‘mbsrtowcs’ calls at the
|
||
same time. The splitting of the information between conversion-
|
||
function-specific information and conversion data makes this possible.
|
||
The last section showed the two data structures used to do this.
|
||
|
||
This is of course also reflected in the interface and semantics of
|
||
the functions that the modules must provide. There are three functions
|
||
that must have the following names:
|
||
|
||
‘gconv_init’
|
||
The ‘gconv_init’ function initializes the conversion function
|
||
specific data structure. This very same object is shared by all
|
||
conversions that use this conversion and, therefore, no state
|
||
information about the conversion itself must be stored in here. If
|
||
a module implements more than one conversion, the ‘gconv_init’
|
||
function will be called multiple times.
|
||
|
||
‘gconv_end’
|
||
The ‘gconv_end’ function is responsible for freeing all resources
|
||
allocated by the ‘gconv_init’ function. If there is nothing to do,
|
||
this function can be missing. Special care must be taken if the
|
||
module implements more than one conversion and the ‘gconv_init’
|
||
function does not allocate the same resources for all conversions.
|
||
|
||
‘gconv’
|
||
This is the actual conversion function. It is called to convert
|
||
one block of text. It gets passed the conversion step information
|
||
initialized by ‘gconv_init’ and the conversion data, specific to
|
||
this use of the conversion functions.
|
||
|
||
There are three data types defined for the three module interface
|
||
functions and these define the interface.
|
||
|
||
-- Data type: int (*__gconv_init_fct) (struct __gconv_step *)
|
||
This specifies the interface of the initialization function of the
|
||
module. It is called exactly once for each conversion the module
|
||
implements.
|
||
|
||
As explained in the description of the ‘struct __gconv_step’ data
|
||
structure above the initialization function has to initialize parts
|
||
of it.
|
||
|
||
‘__min_needed_from’
|
||
‘__max_needed_from’
|
||
‘__min_needed_to’
|
||
‘__max_needed_to’
|
||
These elements must be initialized to the exact numbers of the
|
||
minimum and maximum number of bytes used by one character in
|
||
the source and destination character sets, respectively. If
|
||
the characters all have the same size, the minimum and maximum
|
||
values are the same.
|
||
|
||
‘__stateful’
|
||
This element must be initialized to a nonzero value if the
|
||
source character set is stateful. Otherwise it must be zero.
|
||
|
||
If the initialization function needs to communicate some
|
||
information to the conversion function, this communication can
|
||
happen using the ‘__data’ element of the ‘__gconv_step’ structure.
|
||
But since this data is shared by all the conversions, it must not
|
||
be modified by the conversion function. The example below shows
|
||
how this can be used.
|
||
|
||
#define MIN_NEEDED_FROM 1
|
||
#define MAX_NEEDED_FROM 4
|
||
#define MIN_NEEDED_TO 4
|
||
#define MAX_NEEDED_TO 4
|
||
|
||
int
|
||
gconv_init (struct __gconv_step *step)
|
||
{
|
||
/* Determine which direction. */
|
||
struct iso2022jp_data *new_data;
|
||
enum direction dir = illegal_dir;
|
||
enum variant var = illegal_var;
|
||
int result;
|
||
|
||
if (__strcasecmp (step->__from_name, "ISO-2022-JP//") == 0)
|
||
{
|
||
dir = from_iso2022jp;
|
||
var = iso2022jp;
|
||
}
|
||
else if (__strcasecmp (step->__to_name, "ISO-2022-JP//") == 0)
|
||
{
|
||
dir = to_iso2022jp;
|
||
var = iso2022jp;
|
||
}
|
||
else if (__strcasecmp (step->__from_name, "ISO-2022-JP-2//") == 0)
|
||
{
|
||
dir = from_iso2022jp;
|
||
var = iso2022jp2;
|
||
}
|
||
else if (__strcasecmp (step->__to_name, "ISO-2022-JP-2//") == 0)
|
||
{
|
||
dir = to_iso2022jp;
|
||
var = iso2022jp2;
|
||
}
|
||
|
||
result = __GCONV_NOCONV;
|
||
if (dir != illegal_dir)
|
||
{
|
||
new_data = (struct iso2022jp_data *)
|
||
malloc (sizeof (struct iso2022jp_data));
|
||
|
||
result = __GCONV_NOMEM;
|
||
if (new_data != NULL)
|
||
{
|
||
new_data->dir = dir;
|
||
new_data->var = var;
|
||
step->__data = new_data;
|
||
|
||
if (dir == from_iso2022jp)
|
||
{
|
||
step->__min_needed_from = MIN_NEEDED_FROM;
|
||
step->__max_needed_from = MAX_NEEDED_FROM;
|
||
step->__min_needed_to = MIN_NEEDED_TO;
|
||
step->__max_needed_to = MAX_NEEDED_TO;
|
||
}
|
||
else
|
||
{
|
||
step->__min_needed_from = MIN_NEEDED_TO;
|
||
step->__max_needed_from = MAX_NEEDED_TO;
|
||
step->__min_needed_to = MIN_NEEDED_FROM;
|
||
step->__max_needed_to = MAX_NEEDED_FROM + 2;
|
||
}
|
||
|
||
/* Yes, this is a stateful encoding. */
|
||
step->__stateful = 1;
|
||
|
||
result = __GCONV_OK;
|
||
}
|
||
}
|
||
|
||
return result;
|
||
}
|
||
|
||
The function first checks which conversion is wanted. The module
|
||
from which this function is taken implements four different
|
||
conversions; which one is selected can be determined by comparing
|
||
the names. The comparison should always be done without paying
|
||
attention to the case.
|
||
|
||
Next, a data structure, which contains the necessary information
|
||
about which conversion is selected, is allocated. The data
|
||
structure ‘struct iso2022jp_data’ is locally defined since, outside
|
||
the module, this data is not used at all. Please note that if all
|
||
four conversions this modules supports are requested there are four
|
||
data blocks.
|
||
|
||
One interesting thing is the initialization of the ‘__min_’ and
|
||
‘__max_’ elements of the step data object. A single ISO-2022-JP
|
||
character can consist of one to four bytes. Therefore the
|
||
‘MIN_NEEDED_FROM’ and ‘MAX_NEEDED_FROM’ macros are defined this
|
||
way. The output is always the ‘INTERNAL’ character set (aka UCS-4)
|
||
and therefore each character consists of exactly four bytes. For
|
||
the conversion from ‘INTERNAL’ to ISO-2022-JP we have to take into
|
||
account that escape sequences might be necessary to switch the
|
||
character sets. Therefore the ‘__max_needed_to’ element for this
|
||
direction gets assigned ‘MAX_NEEDED_FROM + 2’. This takes into
|
||
account the two bytes needed for the escape sequences to single the
|
||
switching. The asymmetry in the maximum values for the two
|
||
directions can be explained easily: when reading ISO-2022-JP text,
|
||
escape sequences can be handled alone (i.e., it is not necessary to
|
||
process a real character since the effect of the escape sequence
|
||
can be recorded in the state information). The situation is
|
||
different for the other direction. Since it is in general not
|
||
known which character comes next, one cannot emit escape sequences
|
||
to change the state in advance. This means the escape sequences
|
||
that have to be emitted together with the next character.
|
||
Therefore one needs more room than only for the character itself.
|
||
|
||
The possible return values of the initialization function are:
|
||
|
||
‘__GCONV_OK’
|
||
The initialization succeeded
|
||
‘__GCONV_NOCONV’
|
||
The requested conversion is not supported in the module. This
|
||
can happen if the ‘gconv-modules’ file has errors.
|
||
‘__GCONV_NOMEM’
|
||
Memory required to store additional information could not be
|
||
allocated.
|
||
|
||
The function called before the module is unloaded is significantly
|
||
easier. It often has nothing at all to do; in which case it can be left
|
||
out completely.
|
||
|
||
-- Data type: void (*__gconv_end_fct) (struct gconv_step *)
|
||
The task of this function is to free all resources allocated in the
|
||
initialization function. Therefore only the ‘__data’ element of
|
||
the object pointed to by the argument is of interest. Continuing
|
||
the example from the initialization function, the finalization
|
||
function looks like this:
|
||
|
||
void
|
||
gconv_end (struct __gconv_step *data)
|
||
{
|
||
free (data->__data);
|
||
}
|
||
|
||
The most important function is the conversion function itself, which
|
||
can get quite complicated for complex character sets. But since this is
|
||
not of interest here, we will only describe a possible skeleton for the
|
||
conversion function.
|
||
|
||
-- Data type: int (*__gconv_fct) (struct __gconv_step *, struct
|
||
__gconv_step_data *, const char **, const char *, size_t *,
|
||
int)
|
||
The conversion function can be called for two basic reason: to
|
||
convert text or to reset the state. From the description of the
|
||
‘iconv’ function it can be seen why the flushing mode is necessary.
|
||
What mode is selected is determined by the sixth argument, an
|
||
integer. This argument being nonzero means that flushing is
|
||
selected.
|
||
|
||
Common to both modes is where the output buffer can be found. The
|
||
information about this buffer is stored in the conversion step
|
||
data. A pointer to this information is passed as the second
|
||
argument to this function. The description of the ‘struct
|
||
__gconv_step_data’ structure has more information on the conversion
|
||
step data.
|
||
|
||
What has to be done for flushing depends on the source character
|
||
set. If the source character set is not stateful, nothing has to
|
||
be done. Otherwise the function has to emit a byte sequence to
|
||
bring the state object into the initial state. Once this all
|
||
happened the other conversion modules in the chain of conversions
|
||
have to get the same chance. Whether another step follows can be
|
||
determined from the ‘__is_last’ element of the step data structure
|
||
to which the first parameter points.
|
||
|
||
The more interesting mode is when actual text has to be converted.
|
||
The first step in this case is to convert as much text as possible
|
||
from the input buffer and store the result in the output buffer.
|
||
The start of the input buffer is determined by the third argument,
|
||
which is a pointer to a pointer variable referencing the beginning
|
||
of the buffer. The fourth argument is a pointer to the byte right
|
||
after the last byte in the buffer.
|
||
|
||
The conversion has to be performed according to the current state
|
||
if the character set is stateful. The state is stored in an object
|
||
pointed to by the ‘__statep’ element of the step data (second
|
||
argument). Once either the input buffer is empty or the output
|
||
buffer is full the conversion stops. At this point, the pointer
|
||
variable referenced by the third parameter must point to the byte
|
||
following the last processed byte (i.e., if all of the input is
|
||
consumed, this pointer and the fourth parameter have the same
|
||
value).
|
||
|
||
What now happens depends on whether this step is the last one. If
|
||
it is the last step, the only thing that has to be done is to
|
||
update the ‘__outbuf’ element of the step data structure to point
|
||
after the last written byte. This update gives the caller the
|
||
information on how much text is available in the output buffer. In
|
||
addition, the variable pointed to by the fifth parameter, which is
|
||
of type ‘size_t’, must be incremented by the number of characters
|
||
(_not bytes_) that were converted in a non-reversible way. Then,
|
||
the function can return.
|
||
|
||
In case the step is not the last one, the later conversion
|
||
functions have to get a chance to do their work. Therefore, the
|
||
appropriate conversion function has to be called. The information
|
||
about the functions is stored in the conversion data structures,
|
||
passed as the first parameter. This information and the step data
|
||
are stored in arrays, so the next element in both cases can be
|
||
found by simple pointer arithmetic:
|
||
|
||
int
|
||
gconv (struct __gconv_step *step, struct __gconv_step_data *data,
|
||
const char **inbuf, const char *inbufend, size_t *written,
|
||
int do_flush)
|
||
{
|
||
struct __gconv_step *next_step = step + 1;
|
||
struct __gconv_step_data *next_data = data + 1;
|
||
…
|
||
|
||
The ‘next_step’ pointer references the next step information and
|
||
‘next_data’ the next data record. The call of the next function
|
||
therefore will look similar to this:
|
||
|
||
next_step->__fct (next_step, next_data, &outerr, outbuf,
|
||
written, 0)
|
||
|
||
But this is not yet all. Once the function call returns the
|
||
conversion function might have some more to do. If the return
|
||
value of the function is ‘__GCONV_EMPTY_INPUT’, more room is
|
||
available in the output buffer. Unless the input buffer is empty
|
||
the conversion, functions start all over again and process the rest
|
||
of the input buffer. If the return value is not
|
||
‘__GCONV_EMPTY_INPUT’, something went wrong and we have to recover
|
||
from this.
|
||
|
||
A requirement for the conversion function is that the input buffer
|
||
pointer (the third argument) always point to the last character
|
||
that was put in converted form into the output buffer. This is
|
||
trivially true after the conversion performed in the current step,
|
||
but if the conversion functions deeper downstream stop prematurely,
|
||
not all characters from the output buffer are consumed and,
|
||
therefore, the input buffer pointers must be backed off to the
|
||
right position.
|
||
|
||
Correcting the input buffers is easy to do if the input and output
|
||
character sets have a fixed width for all characters. In this
|
||
situation we can compute how many characters are left in the output
|
||
buffer and, therefore, can correct the input buffer pointer
|
||
appropriately with a similar computation. Things are getting
|
||
tricky if either character set has characters represented with
|
||
variable length byte sequences, and it gets even more complicated
|
||
if the conversion has to take care of the state. In these cases
|
||
the conversion has to be performed once again, from the known state
|
||
before the initial conversion (i.e., if necessary the state of the
|
||
conversion has to be reset and the conversion loop has to be
|
||
executed again). The difference now is that it is known how much
|
||
input must be created, and the conversion can stop before
|
||
converting the first unused character. Once this is done the input
|
||
buffer pointers must be updated again and the function can return.
|
||
|
||
One final thing should be mentioned. If it is necessary for the
|
||
conversion to know whether it is the first invocation (in case a
|
||
prolog has to be emitted), the conversion function should increment
|
||
the ‘__invocation_counter’ element of the step data structure just
|
||
before returning to the caller. See the description of the ‘struct
|
||
__gconv_step_data’ structure above for more information on how this
|
||
can be used.
|
||
|
||
The return value must be one of the following values:
|
||
|
||
‘__GCONV_EMPTY_INPUT’
|
||
All input was consumed and there is room left in the output
|
||
buffer.
|
||
‘__GCONV_FULL_OUTPUT’
|
||
No more room in the output buffer. In case this is not the
|
||
last step this value is propagated down from the call of the
|
||
next conversion function in the chain.
|
||
‘__GCONV_INCOMPLETE_INPUT’
|
||
The input buffer is not entirely empty since it contains an
|
||
incomplete character sequence.
|
||
|
||
The following example provides a framework for a conversion
|
||
function. In case a new conversion has to be written the holes in
|
||
this implementation have to be filled and that is it.
|
||
|
||
int
|
||
gconv (struct __gconv_step *step, struct __gconv_step_data *data,
|
||
const char **inbuf, const char *inbufend, size_t *written,
|
||
int do_flush)
|
||
{
|
||
struct __gconv_step *next_step = step + 1;
|
||
struct __gconv_step_data *next_data = data + 1;
|
||
gconv_fct fct = next_step->__fct;
|
||
int status;
|
||
|
||
/* If the function is called with no input this means we have
|
||
to reset to the initial state. The possibly partly
|
||
converted input is dropped. */
|
||
if (do_flush)
|
||
{
|
||
status = __GCONV_OK;
|
||
|
||
/* Possible emit a byte sequence which put the state object
|
||
into the initial state. */
|
||
|
||
/* Call the steps down the chain if there are any but only
|
||
if we successfully emitted the escape sequence. */
|
||
if (status == __GCONV_OK && ! data->__is_last)
|
||
status = fct (next_step, next_data, NULL, NULL,
|
||
written, 1);
|
||
}
|
||
else
|
||
{
|
||
/* We preserve the initial values of the pointer variables. */
|
||
const char *inptr = *inbuf;
|
||
char *outbuf = data->__outbuf;
|
||
char *outend = data->__outbufend;
|
||
char *outptr;
|
||
|
||
do
|
||
{
|
||
/* Remember the start value for this round. */
|
||
inptr = *inbuf;
|
||
/* The outbuf buffer is empty. */
|
||
outptr = outbuf;
|
||
|
||
/* For stateful encodings the state must be safe here. */
|
||
|
||
/* Run the conversion loop. ‘status’ is set
|
||
appropriately afterwards. */
|
||
|
||
/* If this is the last step, leave the loop. There is
|
||
nothing we can do. */
|
||
if (data->__is_last)
|
||
{
|
||
/* Store information about how many bytes are
|
||
available. */
|
||
data->__outbuf = outbuf;
|
||
|
||
/* If any non-reversible conversions were performed,
|
||
add the number to ‘*written’. */
|
||
|
||
break;
|
||
}
|
||
|
||
/* Write out all output that was produced. */
|
||
if (outbuf > outptr)
|
||
{
|
||
const char *outerr = data->__outbuf;
|
||
int result;
|
||
|
||
result = fct (next_step, next_data, &outerr,
|
||
outbuf, written, 0);
|
||
|
||
if (result != __GCONV_EMPTY_INPUT)
|
||
{
|
||
if (outerr != outbuf)
|
||
{
|
||
/* Reset the input buffer pointer. We
|
||
document here the complex case. */
|
||
size_t nstatus;
|
||
|
||
/* Reload the pointers. */
|
||
*inbuf = inptr;
|
||
outbuf = outptr;
|
||
|
||
/* Possibly reset the state. */
|
||
|
||
/* Redo the conversion, but this time
|
||
the end of the output buffer is at
|
||
‘outerr’. */
|
||
}
|
||
|
||
/* Change the status. */
|
||
status = result;
|
||
}
|
||
else
|
||
/* All the output is consumed, we can make
|
||
another run if everything was ok. */
|
||
if (status == __GCONV_FULL_OUTPUT)
|
||
status = __GCONV_OK;
|
||
}
|
||
}
|
||
while (status == __GCONV_OK);
|
||
|
||
/* We finished one use of this step. */
|
||
++data->__invocation_counter;
|
||
}
|
||
|
||
return status;
|
||
}
|
||
|
||
This information should be sufficient to write new modules. Anybody
|
||
doing so should also take a look at the available source code in the GNU
|
||
C Library sources. It contains many examples of working and optimized
|
||
modules.
|
||
|
||
|
||
File: libc.info, Node: Locales, Next: Message Translation, Prev: Character Set Handling, Up: Top
|
||
|
||
7 Locales and Internationalization
|
||
**********************************
|
||
|
||
Different countries and cultures have varying conventions for how to
|
||
communicate. These conventions range from very simple ones, such as the
|
||
format for representing dates and times, to very complex ones, such as
|
||
the language spoken.
|
||
|
||
"Internationalization" of software means programming it to be able to
|
||
adapt to the user’s favorite conventions. In ISO C,
|
||
internationalization works by means of "locales". Each locale specifies
|
||
a collection of conventions, one convention for each purpose. The user
|
||
chooses a set of conventions by specifying a locale (via environment
|
||
variables).
|
||
|
||
All programs inherit the chosen locale as part of their environment.
|
||
Provided the programs are written to obey the choice of locale, they
|
||
will follow the conventions preferred by the user.
|
||
|
||
* Menu:
|
||
|
||
* Effects of Locale:: Actions affected by the choice of
|
||
locale.
|
||
* Choosing Locale:: How the user specifies a locale.
|
||
* Locale Categories:: Different purposes for which you can
|
||
select a locale.
|
||
* Setting the Locale:: How a program specifies the locale
|
||
with library functions.
|
||
* Standard Locales:: Locale names available on all systems.
|
||
* Locale Names:: Format of system-specific locale names.
|
||
* Locale Information:: How to access the information for the locale.
|
||
* Formatting Numbers:: A dedicated function to format numbers.
|
||
* Yes-or-No Questions:: Check a Response against the locale.
|
||
|
||
|
||
File: libc.info, Node: Effects of Locale, Next: Choosing Locale, Up: Locales
|
||
|
||
7.1 What Effects a Locale Has
|
||
=============================
|
||
|
||
Each locale specifies conventions for several purposes, including the
|
||
following:
|
||
|
||
• What multibyte character sequences are valid, and how they are
|
||
interpreted (*note Character Set Handling::).
|
||
|
||
• Classification of which characters in the local character set are
|
||
considered alphabetic, and upper- and lower-case conversion
|
||
conventions (*note Character Handling::).
|
||
|
||
• The collating sequence for the local language and character set
|
||
(*note Collation Functions::).
|
||
|
||
• Formatting of numbers and currency amounts (*note General
|
||
Numeric::).
|
||
|
||
• Formatting of dates and times (*note Formatting Calendar Time::).
|
||
|
||
• What language to use for output, including error messages (*note
|
||
Message Translation::).
|
||
|
||
• What language to use for user answers to yes-or-no questions (*note
|
||
Yes-or-No Questions::).
|
||
|
||
• What language to use for more complex user input. (The C library
|
||
doesn’t yet help you implement this.)
|
||
|
||
Some aspects of adapting to the specified locale are handled
|
||
automatically by the library subroutines. For example, all your program
|
||
needs to do in order to use the collating sequence of the chosen locale
|
||
is to use ‘strcoll’ or ‘strxfrm’ to compare strings.
|
||
|
||
Other aspects of locales are beyond the comprehension of the library.
|
||
For example, the library can’t automatically translate your program’s
|
||
output messages into other languages. The only way you can support
|
||
output in the user’s favorite language is to program this more or less
|
||
by hand. The C library provides functions to handle translations for
|
||
multiple languages easily.
|
||
|
||
This chapter discusses the mechanism by which you can modify the
|
||
current locale. The effects of the current locale on specific library
|
||
functions are discussed in more detail in the descriptions of those
|
||
functions.
|
||
|
||
|
||
File: libc.info, Node: Choosing Locale, Next: Locale Categories, Prev: Effects of Locale, Up: Locales
|
||
|
||
7.2 Choosing a Locale
|
||
=====================
|
||
|
||
The simplest way for the user to choose a locale is to set the
|
||
environment variable ‘LANG’. This specifies a single locale to use for
|
||
all purposes. For example, a user could specify a hypothetical locale
|
||
named ‘espana-castellano’ to use the standard conventions of most of
|
||
Spain.
|
||
|
||
The set of locales supported depends on the operating system you are
|
||
using, and so do their names, except that the standard locale called ‘C’
|
||
or ‘POSIX’ always exist. *Note Locale Names::.
|
||
|
||
In order to force the system to always use the default locale, the
|
||
user can set the ‘LC_ALL’ environment variable to ‘C’.
|
||
|
||
A user also has the option of specifying different locales for
|
||
different purposes—in effect, choosing a mixture of multiple locales.
|
||
*Note Locale Categories::.
|
||
|
||
For example, the user might specify the locale ‘espana-castellano’
|
||
for most purposes, but specify the locale ‘usa-english’ for currency
|
||
formatting. This might make sense if the user is a Spanish-speaking
|
||
American, working in Spanish, but representing monetary amounts in US
|
||
dollars.
|
||
|
||
Note that both locales ‘espana-castellano’ and ‘usa-english’, like
|
||
all locales, would include conventions for all of the purposes to which
|
||
locales apply. However, the user can choose to use each locale for a
|
||
particular subset of those purposes.
|
||
|
||
|
||
File: libc.info, Node: Locale Categories, Next: Setting the Locale, Prev: Choosing Locale, Up: Locales
|
||
|
||
7.3 Locale Categories
|
||
=====================
|
||
|
||
The purposes that locales serve are grouped into "categories", so that a
|
||
user or a program can choose the locale for each category independently.
|
||
Here is a table of categories; each name is both an environment variable
|
||
that a user can set, and a macro name that you can use as the first
|
||
argument to ‘setlocale’.
|
||
|
||
The contents of the environment variable (or the string in the second
|
||
argument to ‘setlocale’) has to be a valid locale name. *Note Locale
|
||
Names::.
|
||
|
||
‘LC_COLLATE’
|
||
This category applies to collation of strings (functions ‘strcoll’
|
||
and ‘strxfrm’); see *note Collation Functions::.
|
||
|
||
‘LC_CTYPE’
|
||
This category applies to classification and conversion of
|
||
characters, and to multibyte and wide characters; see *note
|
||
Character Handling::, and *note Character Set Handling::.
|
||
|
||
‘LC_MONETARY’
|
||
This category applies to formatting monetary values; see *note
|
||
General Numeric::.
|
||
|
||
‘LC_NUMERIC’
|
||
This category applies to formatting numeric values that are not
|
||
monetary; see *note General Numeric::.
|
||
|
||
‘LC_TIME’
|
||
This category applies to formatting date and time values; see *note
|
||
Formatting Calendar Time::.
|
||
|
||
‘LC_MESSAGES’
|
||
This category applies to selecting the language used in the user
|
||
interface for message translation (*note The Uniforum approach::;
|
||
*note Message catalogs a la X/Open::) and contains regular
|
||
expressions for affirmative and negative responses.
|
||
|
||
‘LC_ALL’
|
||
This is not a category; it is only a macro that you can use with
|
||
‘setlocale’ to set a single locale for all purposes. Setting this
|
||
environment variable overwrites all selections by the other ‘LC_*’
|
||
variables or ‘LANG’.
|
||
|
||
‘LANG’
|
||
If this environment variable is defined, its value specifies the
|
||
locale to use for all purposes except as overridden by the
|
||
variables above.
|
||
|
||
When developing the message translation functions it was felt that
|
||
the functionality provided by the variables above is not sufficient.
|
||
For example, it should be possible to specify more than one locale name.
|
||
Take a Swedish user who better speaks German than English, and a program
|
||
whose messages are output in English by default. It should be possible
|
||
to specify that the first choice of language is Swedish, the second
|
||
German, and if this also fails to use English. This is possible with
|
||
the variable ‘LANGUAGE’. For further description of this GNU extension
|
||
see *note Using gettextized software::.
|
||
|
||
|
||
File: libc.info, Node: Setting the Locale, Next: Standard Locales, Prev: Locale Categories, Up: Locales
|
||
|
||
7.4 How Programs Set the Locale
|
||
===============================
|
||
|
||
A C program inherits its locale environment variables when it starts up.
|
||
This happens automatically. However, these variables do not
|
||
automatically control the locale used by the library functions, because ISO C
|
||
says that all programs start by default in the standard ‘C’ locale. To
|
||
use the locales specified by the environment, you must call ‘setlocale’.
|
||
Call it as follows:
|
||
|
||
setlocale (LC_ALL, "");
|
||
|
||
to select a locale based on the user choice of the appropriate
|
||
environment variables.
|
||
|
||
You can also use ‘setlocale’ to specify a particular locale, for
|
||
general use or for a specific category.
|
||
|
||
The symbols in this section are defined in the header file
|
||
‘locale.h’.
|
||
|
||
-- Function: char * setlocale (int CATEGORY, const char *LOCALE)
|
||
Preliminary: | MT-Unsafe const:locale env | AS-Unsafe init lock
|
||
heap corrupt | AC-Unsafe init corrupt lock mem fd | *Note POSIX
|
||
Safety Concepts::.
|
||
|
||
The function ‘setlocale’ sets the current locale for category
|
||
CATEGORY to LOCALE.
|
||
|
||
If CATEGORY is ‘LC_ALL’, this specifies the locale for all
|
||
purposes. The other possible values of CATEGORY specify an single
|
||
purpose (*note Locale Categories::).
|
||
|
||
You can also use this function to find out the current locale by
|
||
passing a null pointer as the LOCALE argument. In this case,
|
||
‘setlocale’ returns a string that is the name of the locale
|
||
currently selected for category CATEGORY.
|
||
|
||
The string returned by ‘setlocale’ can be overwritten by subsequent
|
||
calls, so you should make a copy of the string (*note Copying
|
||
Strings and Arrays::) if you want to save it past any further calls
|
||
to ‘setlocale’. (The standard library is guaranteed never to call
|
||
‘setlocale’ itself.)
|
||
|
||
You should not modify the string returned by ‘setlocale’. It might
|
||
be the same string that was passed as an argument in a previous
|
||
call to ‘setlocale’. One requirement is that the CATEGORY must be
|
||
the same in the call the string was returned and the one when the
|
||
string is passed in as LOCALE parameter.
|
||
|
||
When you read the current locale for category ‘LC_ALL’, the value
|
||
encodes the entire combination of selected locales for all
|
||
categories. If you specify the same “locale name” with ‘LC_ALL’ in
|
||
a subsequent call to ‘setlocale’, it restores the same combination
|
||
of locale selections.
|
||
|
||
To be sure you can use the returned string encoding the currently
|
||
selected locale at a later time, you must make a copy of the
|
||
string. It is not guaranteed that the returned pointer remains
|
||
valid over time.
|
||
|
||
When the LOCALE argument is not a null pointer, the string returned
|
||
by ‘setlocale’ reflects the newly-modified locale.
|
||
|
||
If you specify an empty string for LOCALE, this means to read the
|
||
appropriate environment variable and use its value to select the
|
||
locale for CATEGORY.
|
||
|
||
If a nonempty string is given for LOCALE, then the locale of that
|
||
name is used if possible.
|
||
|
||
The effective locale name (either the second argument to
|
||
‘setlocale’, or if the argument is an empty string, the name
|
||
obtained from the process environment) must be valid locale name.
|
||
*Note Locale Names::.
|
||
|
||
If you specify an invalid locale name, ‘setlocale’ returns a null
|
||
pointer and leaves the current locale unchanged.
|
||
|
||
Here is an example showing how you might use ‘setlocale’ to
|
||
temporarily switch to a new locale.
|
||
|
||
#include <stddef.h>
|
||
#include <locale.h>
|
||
#include <stdlib.h>
|
||
#include <string.h>
|
||
|
||
void
|
||
with_other_locale (char *new_locale,
|
||
void (*subroutine) (int),
|
||
int argument)
|
||
{
|
||
char *old_locale, *saved_locale;
|
||
|
||
/* Get the name of the current locale. */
|
||
old_locale = setlocale (LC_ALL, NULL);
|
||
|
||
/* Copy the name so it won’t be clobbered by ‘setlocale’. */
|
||
saved_locale = strdup (old_locale);
|
||
if (saved_locale == NULL)
|
||
fatal ("Out of memory");
|
||
|
||
/* Now change the locale and do some stuff with it. */
|
||
setlocale (LC_ALL, new_locale);
|
||
(*subroutine) (argument);
|
||
|
||
/* Restore the original locale. */
|
||
setlocale (LC_ALL, saved_locale);
|
||
free (saved_locale);
|
||
}
|
||
|
||
*Portability Note:* Some ISO C systems may define additional locale
|
||
categories, and future versions of the library will do so. For
|
||
portability, assume that any symbol beginning with ‘LC_’ might be
|
||
defined in ‘locale.h’.
|
||
|
||
|
||
File: libc.info, Node: Standard Locales, Next: Locale Names, Prev: Setting the Locale, Up: Locales
|
||
|
||
7.5 Standard Locales
|
||
====================
|
||
|
||
The only locale names you can count on finding on all operating systems
|
||
are these three standard ones:
|
||
|
||
‘"C"’
|
||
This is the standard C locale. The attributes and behavior it
|
||
provides are specified in the ISO C standard. When your program
|
||
starts up, it initially uses this locale by default.
|
||
|
||
‘"POSIX"’
|
||
This is the standard POSIX locale. Currently, it is an alias for
|
||
the standard C locale.
|
||
|
||
‘""’
|
||
The empty name says to select a locale based on environment
|
||
variables. *Note Locale Categories::.
|
||
|
||
Defining and installing named locales is normally a responsibility of
|
||
the system administrator at your site (or the person who installed the
|
||
GNU C Library). It is also possible for the user to create private
|
||
locales. All this will be discussed later when describing the tool to
|
||
do so.
|
||
|
||
If your program needs to use something other than the ‘C’ locale, it
|
||
will be more portable if you use whatever locale the user specifies with
|
||
the environment, rather than trying to specify some non-standard locale
|
||
explicitly by name. Remember, different machines might have different
|
||
sets of locales installed.
|
||
|
||
|
||
File: libc.info, Node: Locale Names, Next: Locale Information, Prev: Standard Locales, Up: Locales
|
||
|
||
7.6 Locale Names
|
||
================
|
||
|
||
The following command prints a list of locales supported by the system:
|
||
|
||
locale -a
|
||
|
||
*Portability Note:* With the notable exception of the standard locale
|
||
names ‘C’ and ‘POSIX’, locale names are system-specific.
|
||
|
||
Most locale names follow XPG syntax and consist of up to four parts:
|
||
|
||
LANGUAGE[_TERRITORY[.CODESET]][@MODIFIER]
|
||
|
||
Beside the first part, all of them are allowed to be missing. If the
|
||
full specified locale is not found, less specific ones are looked for.
|
||
The various parts will be stripped off, in the following order:
|
||
|
||
1. codeset
|
||
2. normalized codeset
|
||
3. territory
|
||
4. modifier
|
||
|
||
For example, the locale name ‘de_AT.iso885915@euro’ denotes a
|
||
German-language locale for use in Austria, using the ISO-8859-15
|
||
(Latin-9) character set, and with the Euro as the currency symbol.
|
||
|
||
In addition to locale names which follow XPG syntax, systems may
|
||
provide aliases such as ‘german’. Both categories of names must not
|
||
contain the slash character ‘/’.
|
||
|
||
If the locale name starts with a slash ‘/’, it is treated as a path
|
||
relative to the configured locale directories; see ‘LOCPATH’ below. The
|
||
specified path must not contain a component ‘..’, or the name is
|
||
invalid, and ‘setlocale’ will fail.
|
||
|
||
*Portability Note:* POSIX suggests that if a locale name starts with
|
||
a slash ‘/’, it is resolved as an absolute path. However, the GNU C
|
||
Library treats it as a relative path under the directories listed in
|
||
‘LOCPATH’ (or the default locale directory if ‘LOCPATH’ is unset).
|
||
|
||
Locale names which are longer than an implementation-defined limit
|
||
are invalid and cause ‘setlocale’ to fail.
|
||
|
||
As a special case, locale names used with ‘LC_ALL’ can combine
|
||
several locales, reflecting different locale settings for different
|
||
categories. For example, you might want to use a U.S. locale with ISO
|
||
A4 paper format, so you set ‘LANG’ to ‘en_US.UTF-8’, and ‘LC_PAPER’ to
|
||
‘de_DE.UTF-8’. In this case, the ‘LC_ALL’-style combined locale name is
|
||
|
||
LC_CTYPE=en_US.UTF-8;LC_TIME=en_US.UTF-8;LC_PAPER=de_DE.UTF-8;…
|
||
|
||
followed by other category settings not shown here.
|
||
|
||
The path used for finding locale data can be set using the ‘LOCPATH’
|
||
environment variable. This variable lists the directories in which to
|
||
search for locale definitions, separated by a colon ‘:’.
|
||
|
||
The default path for finding locale data is system specific. A
|
||
typical value for the ‘LOCPATH’ default is:
|
||
|
||
/usr/share/locale
|
||
|
||
The value of ‘LOCPATH’ is ignored by privileged programs for security
|
||
reasons, and only the default directory is used.
|
||
|
||
|
||
File: libc.info, Node: Locale Information, Next: Formatting Numbers, Prev: Locale Names, Up: Locales
|
||
|
||
7.7 Accessing Locale Information
|
||
================================
|
||
|
||
There are several ways to access locale information. The simplest way
|
||
is to let the C library itself do the work. Several of the functions in
|
||
this library implicitly access the locale data, and use what information
|
||
is provided by the currently selected locale. This is how the locale
|
||
model is meant to work normally.
|
||
|
||
As an example take the ‘strftime’ function, which is meant to nicely
|
||
format date and time information (*note Formatting Calendar Time::).
|
||
Part of the standard information contained in the ‘LC_TIME’ category is
|
||
the names of the months. Instead of requiring the programmer to take
|
||
care of providing the translations the ‘strftime’ function does this all
|
||
by itself. ‘%A’ in the format string is replaced by the appropriate
|
||
weekday name of the locale currently selected by ‘LC_TIME’. This is an
|
||
easy example, and wherever possible functions do things automatically in
|
||
this way.
|
||
|
||
But there are quite often situations when there is simply no function
|
||
to perform the task, or it is simply not possible to do the work
|
||
automatically. For these cases it is necessary to access the
|
||
information in the locale directly. To do this the C library provides
|
||
two functions: ‘localeconv’ and ‘nl_langinfo’. The former is part of ISO C
|
||
and therefore portable, but has a brain-damaged interface. The second
|
||
is part of the Unix interface and is portable in as far as the system
|
||
follows the Unix standards.
|
||
|
||
* Menu:
|
||
|
||
* The Lame Way to Locale Data:: ISO C’s ‘localeconv’.
|
||
* The Elegant and Fast Way:: X/Open’s ‘nl_langinfo’.
|
||
|
||
|
||
File: libc.info, Node: The Lame Way to Locale Data, Next: The Elegant and Fast Way, Up: Locale Information
|
||
|
||
7.7.1 ‘localeconv’: It is portable but …
|
||
----------------------------------------
|
||
|
||
Together with the ‘setlocale’ function the ISO C people invented the
|
||
‘localeconv’ function. It is a masterpiece of poor design. It is
|
||
expensive to use, not extendable, and not generally usable as it
|
||
provides access to only ‘LC_MONETARY’ and ‘LC_NUMERIC’ related
|
||
information. Nevertheless, if it is applicable to a given situation it
|
||
should be used since it is very portable. The function ‘strfmon’
|
||
formats monetary amounts according to the selected locale using this
|
||
information.
|
||
|
||
-- Function: struct lconv * localeconv (void)
|
||
Preliminary: | MT-Unsafe race:localeconv locale | AS-Unsafe |
|
||
AC-Safe | *Note POSIX Safety Concepts::.
|
||
|
||
The ‘localeconv’ function returns a pointer to a structure whose
|
||
components contain information about how numeric and monetary
|
||
values should be formatted in the current locale.
|
||
|
||
You should not modify the structure or its contents. The structure
|
||
might be overwritten by subsequent calls to ‘localeconv’, or by
|
||
calls to ‘setlocale’, but no other function in the library
|
||
overwrites this value.
|
||
|
||
-- Data Type: struct lconv
|
||
‘localeconv’’s return value is of this data type. Its elements are
|
||
described in the following subsections.
|
||
|
||
If a member of the structure ‘struct lconv’ has type ‘char’, and the
|
||
value is ‘CHAR_MAX’, it means that the current locale has no value for
|
||
that parameter.
|
||
|
||
* Menu:
|
||
|
||
* General Numeric:: Parameters for formatting numbers and
|
||
currency amounts.
|
||
* Currency Symbol:: How to print the symbol that identifies an
|
||
amount of money (e.g. ‘$’).
|
||
* Sign of Money Amount:: How to print the (positive or negative) sign
|
||
for a monetary amount, if one exists.
|
||
|
||
|
||
File: libc.info, Node: General Numeric, Next: Currency Symbol, Up: The Lame Way to Locale Data
|
||
|
||
7.7.1.1 Generic Numeric Formatting Parameters
|
||
.............................................
|
||
|
||
These are the standard members of ‘struct lconv’; there may be others.
|
||
|
||
‘char *decimal_point’
|
||
‘char *mon_decimal_point’
|
||
These are the decimal-point separators used in formatting
|
||
non-monetary and monetary quantities, respectively. In the ‘C’
|
||
locale, the value of ‘decimal_point’ is ‘"."’, and the value of
|
||
‘mon_decimal_point’ is ‘""’.
|
||
|
||
‘char *thousands_sep’
|
||
‘char *mon_thousands_sep’
|
||
These are the separators used to delimit groups of digits to the
|
||
left of the decimal point in formatting non-monetary and monetary
|
||
quantities, respectively. In the ‘C’ locale, both members have a
|
||
value of ‘""’ (the empty string).
|
||
|
||
‘char *grouping’
|
||
‘char *mon_grouping’
|
||
These are strings that specify how to group the digits to the left
|
||
of the decimal point. ‘grouping’ applies to non-monetary
|
||
quantities and ‘mon_grouping’ applies to monetary quantities. Use
|
||
either ‘thousands_sep’ or ‘mon_thousands_sep’ to separate the digit
|
||
groups.
|
||
|
||
Each member of these strings is to be interpreted as an integer
|
||
value of type ‘char’. Successive numbers (from left to right) give
|
||
the sizes of successive groups (from right to left, starting at the
|
||
decimal point.) The last member is either ‘0’, in which case the
|
||
previous member is used over and over again for all the remaining
|
||
groups, or ‘CHAR_MAX’, in which case there is no more grouping—or,
|
||
put another way, any remaining digits form one large group without
|
||
separators.
|
||
|
||
For example, if ‘grouping’ is ‘"\04\03\02"’, the correct grouping
|
||
for the number ‘123456787654321’ is ‘12’, ‘34’, ‘56’, ‘78’, ‘765’,
|
||
‘4321’. This uses a group of 4 digits at the end, preceded by a
|
||
group of 3 digits, preceded by groups of 2 digits (as many as
|
||
needed). With a separator of ‘,’, the number would be printed as
|
||
‘12,34,56,78,765,4321’.
|
||
|
||
A value of ‘"\03"’ indicates repeated groups of three digits, as
|
||
normally used in the U.S.
|
||
|
||
In the standard ‘C’ locale, both ‘grouping’ and ‘mon_grouping’ have
|
||
a value of ‘""’. This value specifies no grouping at all.
|
||
|
||
‘char int_frac_digits’
|
||
‘char frac_digits’
|
||
These are small integers indicating how many fractional digits (to
|
||
the right of the decimal point) should be displayed in a monetary
|
||
value in international and local formats, respectively. (Most
|
||
often, both members have the same value.)
|
||
|
||
In the standard ‘C’ locale, both of these members have the value
|
||
‘CHAR_MAX’, meaning “unspecified”. The ISO standard doesn’t say
|
||
what to do when you find this value; we recommend printing no
|
||
fractional digits. (This locale also specifies the empty string
|
||
for ‘mon_decimal_point’, so printing any fractional digits would be
|
||
confusing!)
|
||
|
||
|
||
File: libc.info, Node: Currency Symbol, Next: Sign of Money Amount, Prev: General Numeric, Up: The Lame Way to Locale Data
|
||
|
||
7.7.1.2 Printing the Currency Symbol
|
||
....................................
|
||
|
||
These members of the ‘struct lconv’ structure specify how to print the
|
||
symbol to identify a monetary value—the international analog of ‘$’ for
|
||
US dollars.
|
||
|
||
Each country has two standard currency symbols. The "local currency
|
||
symbol" is used commonly within the country, while the "international
|
||
currency symbol" is used internationally to refer to that country’s
|
||
currency when it is necessary to indicate the country unambiguously.
|
||
|
||
For example, many countries use the dollar as their monetary unit,
|
||
and when dealing with international currencies it’s important to specify
|
||
that one is dealing with (say) Canadian dollars instead of U.S. dollars
|
||
or Australian dollars. But when the context is known to be Canada,
|
||
there is no need to make this explicit—dollar amounts are implicitly
|
||
assumed to be in Canadian dollars.
|
||
|
||
‘char *currency_symbol’
|
||
The local currency symbol for the selected locale.
|
||
|
||
In the standard ‘C’ locale, this member has a value of ‘""’ (the
|
||
empty string), meaning “unspecified”. The ISO standard doesn’t say
|
||
what to do when you find this value; we recommend you simply print
|
||
the empty string as you would print any other string pointed to by
|
||
this variable.
|
||
|
||
‘char *int_curr_symbol’
|
||
The international currency symbol for the selected locale.
|
||
|
||
The value of ‘int_curr_symbol’ should normally consist of a
|
||
three-letter abbreviation determined by the international standard
|
||
‘ISO 4217 Codes for the Representation of Currency and Funds’,
|
||
followed by a one-character separator (often a space).
|
||
|
||
In the standard ‘C’ locale, this member has a value of ‘""’ (the
|
||
empty string), meaning “unspecified”. We recommend you simply
|
||
print the empty string as you would print any other string pointed
|
||
to by this variable.
|
||
|
||
‘char p_cs_precedes’
|
||
‘char n_cs_precedes’
|
||
‘char int_p_cs_precedes’
|
||
‘char int_n_cs_precedes’
|
||
These members are ‘1’ if the ‘currency_symbol’ or ‘int_curr_symbol’
|
||
strings should precede the value of a monetary amount, or ‘0’ if
|
||
the strings should follow the value. The ‘p_cs_precedes’ and
|
||
‘int_p_cs_precedes’ members apply to positive amounts (or zero),
|
||
and the ‘n_cs_precedes’ and ‘int_n_cs_precedes’ members apply to
|
||
negative amounts.
|
||
|
||
In the standard ‘C’ locale, all of these members have a value of
|
||
‘CHAR_MAX’, meaning “unspecified”. The ISO standard doesn’t say
|
||
what to do when you find this value. We recommend printing the
|
||
currency symbol before the amount, which is right for most
|
||
countries. In other words, treat all nonzero values alike in these
|
||
members.
|
||
|
||
The members with the ‘int_’ prefix apply to the ‘int_curr_symbol’
|
||
while the other two apply to ‘currency_symbol’.
|
||
|
||
‘char p_sep_by_space’
|
||
‘char n_sep_by_space’
|
||
‘char int_p_sep_by_space’
|
||
‘char int_n_sep_by_space’
|
||
These members are ‘1’ if a space should appear between the
|
||
‘currency_symbol’ or ‘int_curr_symbol’ strings and the amount, or
|
||
‘0’ if no space should appear. The ‘p_sep_by_space’ and
|
||
‘int_p_sep_by_space’ members apply to positive amounts (or zero),
|
||
and the ‘n_sep_by_space’ and ‘int_n_sep_by_space’ members apply to
|
||
negative amounts.
|
||
|
||
In the standard ‘C’ locale, all of these members have a value of
|
||
‘CHAR_MAX’, meaning “unspecified”. The ISO standard doesn’t say
|
||
what you should do when you find this value; we suggest you treat
|
||
it as 1 (print a space). In other words, treat all nonzero values
|
||
alike in these members.
|
||
|
||
The members with the ‘int_’ prefix apply to the ‘int_curr_symbol’
|
||
while the other two apply to ‘currency_symbol’. There is one
|
||
specialty with the ‘int_curr_symbol’, though. Since all legal
|
||
values contain a space at the end the string one either printf this
|
||
space (if the currency symbol must appear in front and must be
|
||
separated) or one has to avoid printing this character at all
|
||
(especially when at the end of the string).
|
||
|
||
|
||
File: libc.info, Node: Sign of Money Amount, Prev: Currency Symbol, Up: The Lame Way to Locale Data
|
||
|
||
7.7.1.3 Printing the Sign of a Monetary Amount
|
||
..............................................
|
||
|
||
These members of the ‘struct lconv’ structure specify how to print the
|
||
sign (if any) of a monetary value.
|
||
|
||
‘char *positive_sign’
|
||
‘char *negative_sign’
|
||
These are strings used to indicate positive (or zero) and negative
|
||
monetary quantities, respectively.
|
||
|
||
In the standard ‘C’ locale, both of these members have a value of
|
||
‘""’ (the empty string), meaning “unspecified”.
|
||
|
||
The ISO standard doesn’t say what to do when you find this value;
|
||
we recommend printing ‘positive_sign’ as you find it, even if it is
|
||
empty. For a negative value, print ‘negative_sign’ as you find it
|
||
unless both it and ‘positive_sign’ are empty, in which case print
|
||
‘-’ instead. (Failing to indicate the sign at all seems rather
|
||
unreasonable.)
|
||
|
||
‘char p_sign_posn’
|
||
‘char n_sign_posn’
|
||
‘char int_p_sign_posn’
|
||
‘char int_n_sign_posn’
|
||
These members are small integers that indicate how to position the
|
||
sign for nonnegative and negative monetary quantities,
|
||
respectively. (The string used by the sign is what was specified
|
||
with ‘positive_sign’ or ‘negative_sign’.) The possible values are
|
||
as follows:
|
||
|
||
‘0’
|
||
The currency symbol and quantity should be surrounded by
|
||
parentheses.
|
||
|
||
‘1’
|
||
Print the sign string before the quantity and currency symbol.
|
||
|
||
‘2’
|
||
Print the sign string after the quantity and currency symbol.
|
||
|
||
‘3’
|
||
Print the sign string right before the currency symbol.
|
||
|
||
‘4’
|
||
Print the sign string right after the currency symbol.
|
||
|
||
‘CHAR_MAX’
|
||
“Unspecified”. Both members have this value in the standard
|
||
‘C’ locale.
|
||
|
||
The ISO standard doesn’t say what you should do when the value is
|
||
‘CHAR_MAX’. We recommend you print the sign after the currency
|
||
symbol.
|
||
|
||
The members with the ‘int_’ prefix apply to the ‘int_curr_symbol’
|
||
while the other two apply to ‘currency_symbol’.
|
||
|
||
|
||
File: libc.info, Node: The Elegant and Fast Way, Prev: The Lame Way to Locale Data, Up: Locale Information
|
||
|
||
7.7.2 Pinpoint Access to Locale Data
|
||
------------------------------------
|
||
|
||
When writing the X/Open Portability Guide the authors realized that the
|
||
‘localeconv’ function is not enough to provide reasonable access to
|
||
locale information. The information which was meant to be available in
|
||
the locale (as later specified in the POSIX.1 standard) requires more
|
||
ways to access it. Therefore the ‘nl_langinfo’ function was introduced.
|
||
|
||
-- Function: char * nl_langinfo (nl_item ITEM)
|
||
Preliminary: | MT-Safe locale | AS-Safe | AC-Safe | *Note POSIX
|
||
Safety Concepts::.
|
||
|
||
The ‘nl_langinfo’ function can be used to access individual
|
||
elements of the locale categories. Unlike the ‘localeconv’
|
||
function, which returns all the information, ‘nl_langinfo’ lets the
|
||
caller select what information it requires. This is very fast and
|
||
it is not a problem to call this function multiple times.
|
||
|
||
A second advantage is that in addition to the numeric and monetary
|
||
formatting information, information from the ‘LC_TIME’ and
|
||
‘LC_MESSAGES’ categories is available.
|
||
|
||
The type ‘nl_type’ is defined in ‘nl_types.h’. The argument ITEM
|
||
is a numeric value defined in the header ‘langinfo.h’. The X/Open
|
||
standard defines the following values:
|
||
|
||
‘CODESET’
|
||
‘nl_langinfo’ returns a string with the name of the coded
|
||
character set used in the selected locale.
|
||
|
||
‘ABDAY_1’
|
||
‘ABDAY_2’
|
||
‘ABDAY_3’
|
||
‘ABDAY_4’
|
||
‘ABDAY_5’
|
||
‘ABDAY_6’
|
||
‘ABDAY_7’
|
||
‘nl_langinfo’ returns the abbreviated weekday name. ‘ABDAY_1’
|
||
corresponds to Sunday.
|
||
‘DAY_1’
|
||
‘DAY_2’
|
||
‘DAY_3’
|
||
‘DAY_4’
|
||
‘DAY_5’
|
||
‘DAY_6’
|
||
‘DAY_7’
|
||
Similar to ‘ABDAY_1’ etc., but here the return value is the
|
||
unabbreviated weekday name.
|
||
‘ABMON_1’
|
||
‘ABMON_2’
|
||
‘ABMON_3’
|
||
‘ABMON_4’
|
||
‘ABMON_5’
|
||
‘ABMON_6’
|
||
‘ABMON_7’
|
||
‘ABMON_8’
|
||
‘ABMON_9’
|
||
‘ABMON_10’
|
||
‘ABMON_11’
|
||
‘ABMON_12’
|
||
The return value is abbreviated name of the month. ‘ABMON_1’
|
||
corresponds to January.
|
||
‘MON_1’
|
||
‘MON_2’
|
||
‘MON_3’
|
||
‘MON_4’
|
||
‘MON_5’
|
||
‘MON_6’
|
||
‘MON_7’
|
||
‘MON_8’
|
||
‘MON_9’
|
||
‘MON_10’
|
||
‘MON_11’
|
||
‘MON_12’
|
||
Similar to ‘ABMON_1’ etc., but here the month names are not
|
||
abbreviated. Here the first value ‘MON_1’ also corresponds to
|
||
January.
|
||
‘AM_STR’
|
||
‘PM_STR’
|
||
The return values are strings which can be used in the
|
||
representation of time as an hour from 1 to 12 plus an am/pm
|
||
specifier.
|
||
|
||
Note that in locales which do not use this time representation
|
||
these strings might be empty, in which case the am/pm format
|
||
cannot be used at all.
|
||
‘D_T_FMT’
|
||
The return value can be used as a format string for ‘strftime’
|
||
to represent time and date in a locale-specific way.
|
||
‘D_FMT’
|
||
The return value can be used as a format string for ‘strftime’
|
||
to represent a date in a locale-specific way.
|
||
‘T_FMT’
|
||
The return value can be used as a format string for ‘strftime’
|
||
to represent time in a locale-specific way.
|
||
‘T_FMT_AMPM’
|
||
The return value can be used as a format string for ‘strftime’
|
||
to represent time in the am/pm format.
|
||
|
||
Note that if the am/pm format does not make any sense for the
|
||
selected locale, the return value might be the same as the one
|
||
for ‘T_FMT’.
|
||
‘ERA’
|
||
The return value represents the era used in the current
|
||
locale.
|
||
|
||
Most locales do not define this value. An example of a locale
|
||
which does define this value is the Japanese one. In Japan,
|
||
the traditional representation of dates includes the name of
|
||
the era corresponding to the then-emperor’s reign.
|
||
|
||
Normally it should not be necessary to use this value
|
||
directly. Specifying the ‘E’ modifier in their format strings
|
||
causes the ‘strftime’ functions to use this information. The
|
||
format of the returned string is not specified, and therefore
|
||
you should not assume knowledge of it on different systems.
|
||
‘ERA_YEAR’
|
||
The return value gives the year in the relevant era of the
|
||
locale. As for ‘ERA’ it should not be necessary to use this
|
||
value directly.
|
||
‘ERA_D_T_FMT’
|
||
This return value can be used as a format string for
|
||
‘strftime’ to represent dates and times in a locale-specific
|
||
era-based way.
|
||
‘ERA_D_FMT’
|
||
This return value can be used as a format string for
|
||
‘strftime’ to represent a date in a locale-specific era-based
|
||
way.
|
||
‘ERA_T_FMT’
|
||
This return value can be used as a format string for
|
||
‘strftime’ to represent time in a locale-specific era-based
|
||
way.
|
||
‘ALT_DIGITS’
|
||
The return value is a representation of up to 100 values used
|
||
to represent the values 0 to 99. As for ‘ERA’ this value is
|
||
not intended to be used directly, but instead indirectly
|
||
through the ‘strftime’ function. When the modifier ‘O’ is
|
||
used in a format which would otherwise use numerals to
|
||
represent hours, minutes, seconds, weekdays, months, or weeks,
|
||
the appropriate value for the locale is used instead.
|
||
‘INT_CURR_SYMBOL’
|
||
The same as the value returned by ‘localeconv’ in the
|
||
‘int_curr_symbol’ element of the ‘struct lconv’.
|
||
‘CURRENCY_SYMBOL’
|
||
‘CRNCYSTR’
|
||
The same as the value returned by ‘localeconv’ in the
|
||
‘currency_symbol’ element of the ‘struct lconv’.
|
||
|
||
‘CRNCYSTR’ is a deprecated alias still required by Unix98.
|
||
‘MON_DECIMAL_POINT’
|
||
The same as the value returned by ‘localeconv’ in the
|
||
‘mon_decimal_point’ element of the ‘struct lconv’.
|
||
‘MON_THOUSANDS_SEP’
|
||
The same as the value returned by ‘localeconv’ in the
|
||
‘mon_thousands_sep’ element of the ‘struct lconv’.
|
||
‘MON_GROUPING’
|
||
The same as the value returned by ‘localeconv’ in the
|
||
‘mon_grouping’ element of the ‘struct lconv’.
|
||
‘POSITIVE_SIGN’
|
||
The same as the value returned by ‘localeconv’ in the
|
||
‘positive_sign’ element of the ‘struct lconv’.
|
||
‘NEGATIVE_SIGN’
|
||
The same as the value returned by ‘localeconv’ in the
|
||
‘negative_sign’ element of the ‘struct lconv’.
|
||
‘INT_FRAC_DIGITS’
|
||
The same as the value returned by ‘localeconv’ in the
|
||
‘int_frac_digits’ element of the ‘struct lconv’.
|
||
‘FRAC_DIGITS’
|
||
The same as the value returned by ‘localeconv’ in the
|
||
‘frac_digits’ element of the ‘struct lconv’.
|
||
‘P_CS_PRECEDES’
|
||
The same as the value returned by ‘localeconv’ in the
|
||
‘p_cs_precedes’ element of the ‘struct lconv’.
|
||
‘P_SEP_BY_SPACE’
|
||
The same as the value returned by ‘localeconv’ in the
|
||
‘p_sep_by_space’ element of the ‘struct lconv’.
|
||
‘N_CS_PRECEDES’
|
||
The same as the value returned by ‘localeconv’ in the
|
||
‘n_cs_precedes’ element of the ‘struct lconv’.
|
||
‘N_SEP_BY_SPACE’
|
||
The same as the value returned by ‘localeconv’ in the
|
||
‘n_sep_by_space’ element of the ‘struct lconv’.
|
||
‘P_SIGN_POSN’
|
||
The same as the value returned by ‘localeconv’ in the
|
||
‘p_sign_posn’ element of the ‘struct lconv’.
|
||
‘N_SIGN_POSN’
|
||
The same as the value returned by ‘localeconv’ in the
|
||
‘n_sign_posn’ element of the ‘struct lconv’.
|
||
|
||
‘INT_P_CS_PRECEDES’
|
||
The same as the value returned by ‘localeconv’ in the
|
||
‘int_p_cs_precedes’ element of the ‘struct lconv’.
|
||
‘INT_P_SEP_BY_SPACE’
|
||
The same as the value returned by ‘localeconv’ in the
|
||
‘int_p_sep_by_space’ element of the ‘struct lconv’.
|
||
‘INT_N_CS_PRECEDES’
|
||
The same as the value returned by ‘localeconv’ in the
|
||
‘int_n_cs_precedes’ element of the ‘struct lconv’.
|
||
‘INT_N_SEP_BY_SPACE’
|
||
The same as the value returned by ‘localeconv’ in the
|
||
‘int_n_sep_by_space’ element of the ‘struct lconv’.
|
||
‘INT_P_SIGN_POSN’
|
||
The same as the value returned by ‘localeconv’ in the
|
||
‘int_p_sign_posn’ element of the ‘struct lconv’.
|
||
‘INT_N_SIGN_POSN’
|
||
The same as the value returned by ‘localeconv’ in the
|
||
‘int_n_sign_posn’ element of the ‘struct lconv’.
|
||
|
||
‘DECIMAL_POINT’
|
||
‘RADIXCHAR’
|
||
The same as the value returned by ‘localeconv’ in the
|
||
‘decimal_point’ element of the ‘struct lconv’.
|
||
|
||
The name ‘RADIXCHAR’ is a deprecated alias still used in
|
||
Unix98.
|
||
‘THOUSANDS_SEP’
|
||
‘THOUSEP’
|
||
The same as the value returned by ‘localeconv’ in the
|
||
‘thousands_sep’ element of the ‘struct lconv’.
|
||
|
||
The name ‘THOUSEP’ is a deprecated alias still used in Unix98.
|
||
‘GROUPING’
|
||
The same as the value returned by ‘localeconv’ in the
|
||
‘grouping’ element of the ‘struct lconv’.
|
||
‘YESEXPR’
|
||
The return value is a regular expression which can be used
|
||
with the ‘regex’ function to recognize a positive response to
|
||
a yes/no question. The GNU C Library provides the ‘rpmatch’
|
||
function for easier handling in applications.
|
||
‘NOEXPR’
|
||
The return value is a regular expression which can be used
|
||
with the ‘regex’ function to recognize a negative response to
|
||
a yes/no question.
|
||
‘YESSTR’
|
||
The return value is a locale-specific translation of the
|
||
positive response to a yes/no question.
|
||
|
||
Using this value is deprecated since it is a very special case
|
||
of message translation, and is better handled by the message
|
||
translation functions (*note Message Translation::).
|
||
|
||
The use of this symbol is deprecated. Instead message
|
||
translation should be used.
|
||
‘NOSTR’
|
||
The return value is a locale-specific translation of the
|
||
negative response to a yes/no question. What is said for
|
||
‘YESSTR’ is also true here.
|
||
|
||
The use of this symbol is deprecated. Instead message
|
||
translation should be used.
|
||
|
||
The file ‘langinfo.h’ defines a lot more symbols but none of them
|
||
is official. Using them is not portable, and the format of the
|
||
return values might change. Therefore we recommended you not use
|
||
them.
|
||
|
||
Note that the return value for any valid argument can be used for
|
||
in all situations (with the possible exception of the am/pm time
|
||
formatting codes). If the user has not selected any locale for the
|
||
appropriate category, ‘nl_langinfo’ returns the information from
|
||
the ‘"C"’ locale. It is therefore possible to use this function as
|
||
shown in the example below.
|
||
|
||
If the argument ITEM is not valid, a pointer to an empty string is
|
||
returned.
|
||
|
||
An example of ‘nl_langinfo’ usage is a function which has to print a
|
||
given date and time in a locale-specific way. At first one might think
|
||
that, since ‘strftime’ internally uses the locale information, writing
|
||
something like the following is enough:
|
||
|
||
size_t
|
||
i18n_time_n_data (char *s, size_t len, const struct tm *tp)
|
||
{
|
||
return strftime (s, len, "%X %D", tp);
|
||
}
|
||
|
||
The format contains no weekday or month names and therefore is
|
||
internationally usable. Wrong! The output produced is something like
|
||
‘"hh:mm:ss MM/DD/YY"’. This format is only recognizable in the USA.
|
||
Other countries use different formats. Therefore the function should be
|
||
rewritten like this:
|
||
|
||
size_t
|
||
i18n_time_n_data (char *s, size_t len, const struct tm *tp)
|
||
{
|
||
return strftime (s, len, nl_langinfo (D_T_FMT), tp);
|
||
}
|
||
|
||
Now it uses the date and time format of the locale selected when the
|
||
program runs. If the user selects the locale correctly there should
|
||
never be a misunderstanding over the time and date format.
|
||
|
||
|
||
File: libc.info, Node: Formatting Numbers, Next: Yes-or-No Questions, Prev: Locale Information, Up: Locales
|
||
|
||
7.8 A dedicated function to format numbers
|
||
==========================================
|
||
|
||
We have seen that the structure returned by ‘localeconv’ as well as the
|
||
values given to ‘nl_langinfo’ allow you to retrieve the various pieces
|
||
of locale-specific information to format numbers and monetary amounts.
|
||
We have also seen that the underlying rules are quite complex.
|
||
|
||
Therefore the X/Open standards introduce a function which uses such
|
||
locale information, making it easier for the user to format numbers
|
||
according to these rules.
|
||
|
||
-- Function: ssize_t strfmon (char *S, size_t MAXSIZE, const char
|
||
*FORMAT, …)
|
||
Preliminary: | MT-Safe locale | AS-Unsafe heap | AC-Unsafe mem |
|
||
*Note POSIX Safety Concepts::.
|
||
|
||
The ‘strfmon’ function is similar to the ‘strftime’ function in
|
||
that it takes a buffer, its size, a format string, and values to
|
||
write into the buffer as text in a form specified by the format
|
||
string. Like ‘strftime’, the function also returns the number of
|
||
bytes written into the buffer.
|
||
|
||
There are two differences: ‘strfmon’ can take more than one
|
||
argument, and, of course, the format specification is different.
|
||
Like ‘strftime’, the format string consists of normal text, which
|
||
is output as is, and format specifiers, which are indicated by a
|
||
‘%’. Immediately after the ‘%’, you can optionally specify various
|
||
flags and formatting information before the main formatting
|
||
character, in a similar way to ‘printf’:
|
||
|
||
• Immediately following the ‘%’ there can be one or more of the
|
||
following flags:
|
||
‘=F’
|
||
The single byte character F is used for this field as the
|
||
numeric fill character. By default this character is a
|
||
space character. Filling with this character is only
|
||
performed if a left precision is specified. It is not
|
||
just to fill to the given field width.
|
||
‘^’
|
||
The number is printed without grouping the digits
|
||
according to the rules of the current locale. By default
|
||
grouping is enabled.
|
||
‘+’, ‘(’
|
||
At most one of these flags can be used. They select
|
||
which format to represent the sign of a currency amount.
|
||
By default, and if ‘+’ is given, the locale equivalent of
|
||
+/- is used. If ‘(’ is given, negative amounts are
|
||
enclosed in parentheses. The exact format is determined
|
||
by the values of the ‘LC_MONETARY’ category of the locale
|
||
selected at program runtime.
|
||
‘!’
|
||
The output will not contain the currency symbol.
|
||
‘-’
|
||
The output will be formatted left-justified instead of
|
||
right-justified if it does not fill the entire field
|
||
width.
|
||
|
||
The next part of a specification is an optional field width. If no
|
||
width is specified 0 is taken. During output, the function first
|
||
determines how much space is required. If it requires at least as
|
||
many characters as given by the field width, it is output using as
|
||
much space as necessary. Otherwise, it is extended to use the full
|
||
width by filling with the space character. The presence or absence
|
||
of the ‘-’ flag determines the side at which such padding occurs.
|
||
If present, the spaces are added at the right making the output
|
||
left-justified, and vice versa.
|
||
|
||
So far the format looks familiar, being similar to the ‘printf’ and
|
||
‘strftime’ formats. However, the next two optional fields
|
||
introduce something new. The first one is a ‘#’ character followed
|
||
by a decimal digit string. The value of the digit string specifies
|
||
the number of _digit_ positions to the left of the decimal point
|
||
(or equivalent). This does _not_ include the grouping character
|
||
when the ‘^’ flag is not given. If the space needed to print the
|
||
number does not fill the whole width, the field is padded at the
|
||
left side with the fill character, which can be selected using the
|
||
‘=’ flag and by default is a space. For example, if the field
|
||
width is selected as 6 and the number is 123, the fill character is
|
||
‘*’ the result will be ‘***123’.
|
||
|
||
The second optional field starts with a ‘.’ (period) and consists
|
||
of another decimal digit string. Its value describes the number of
|
||
characters printed after the decimal point. The default is
|
||
selected from the current locale (‘frac_digits’, ‘int_frac_digits’,
|
||
see *note General Numeric::). If the exact representation needs
|
||
more digits than given by the field width, the displayed value is
|
||
rounded. If the number of fractional digits is selected to be
|
||
zero, no decimal point is printed.
|
||
|
||
As a GNU extension, the ‘strfmon’ implementation in the GNU C
|
||
Library allows an optional ‘L’ next as a format modifier. If this
|
||
modifier is given, the argument is expected to be a ‘long double’
|
||
instead of a ‘double’ value.
|
||
|
||
Finally, the last component is a format specifier. There are three
|
||
specifiers defined:
|
||
|
||
‘i’
|
||
Use the locale’s rules for formatting an international
|
||
currency value.
|
||
‘n’
|
||
Use the locale’s rules for formatting a national currency
|
||
value.
|
||
‘%’
|
||
Place a ‘%’ in the output. There must be no flag, width
|
||
specifier or modifier given, only ‘%%’ is allowed.
|
||
|
||
As for ‘printf’, the function reads the format string from left to
|
||
right and uses the values passed to the function following the
|
||
format string. The values are expected to be either of type
|
||
‘double’ or ‘long double’, depending on the presence of the
|
||
modifier ‘L’. The result is stored in the buffer pointed to by S.
|
||
At most MAXSIZE characters are stored.
|
||
|
||
The return value of the function is the number of characters stored
|
||
in S, including the terminating ‘NULL’ byte. If the number of
|
||
characters stored would exceed MAXSIZE, the function returns -1 and
|
||
the content of the buffer S is unspecified. In this case ‘errno’
|
||
is set to ‘E2BIG’.
|
||
|
||
A few examples should make clear how the function works. It is
|
||
assumed that all the following pieces of code are executed in a program
|
||
which uses the USA locale (‘en_US’). The simplest form of the format is
|
||
this:
|
||
|
||
strfmon (buf, 100, "@%n@%n@%n@", 123.45, -567.89, 12345.678);
|
||
|
||
The output produced is
|
||
"@$123.45@-$567.89@$12,345.68@"
|
||
|
||
We can notice several things here. First, the widths of the output
|
||
numbers are different. We have not specified a width in the format
|
||
string, and so this is no wonder. Second, the third number is printed
|
||
using thousands separators. The thousands separator for the ‘en_US’
|
||
locale is a comma. The number is also rounded. .678 is rounded to .68
|
||
since the format does not specify a precision and the default value in
|
||
the locale is 2. Finally, note that the national currency symbol is
|
||
printed since ‘%n’ was used, not ‘i’. The next example shows how we can
|
||
align the output.
|
||
|
||
strfmon (buf, 100, "@%=*11n@%=*11n@%=*11n@", 123.45, -567.89, 12345.678);
|
||
|
||
The output this time is:
|
||
|
||
"@ $123.45@ -$567.89@ $12,345.68@"
|
||
|
||
Two things stand out. Firstly, all fields have the same width
|
||
(eleven characters) since this is the width given in the format and
|
||
since no number required more characters to be printed. The second
|
||
important point is that the fill character is not used. This is correct
|
||
since the white space was not used to achieve a precision given by a ‘#’
|
||
modifier, but instead to fill to the given width. The difference
|
||
becomes obvious if we now add a width specification.
|
||
|
||
strfmon (buf, 100, "@%=*11#5n@%=*11#5n@%=*11#5n@",
|
||
123.45, -567.89, 12345.678);
|
||
|
||
The output is
|
||
|
||
"@ $***123.45@-$***567.89@ $12,456.68@"
|
||
|
||
Here we can see that all the currency symbols are now aligned, and
|
||
that the space between the currency sign and the number is filled with
|
||
the selected fill character. Note that although the width is selected
|
||
to be 5 and 123.45 has three digits left of the decimal point, the space
|
||
is filled with three asterisks. This is correct since, as explained
|
||
above, the width does not include the positions used to store thousands
|
||
separators. One last example should explain the remaining
|
||
functionality.
|
||
|
||
strfmon (buf, 100, "@%=0(16#5.3i@%=0(16#5.3i@%=0(16#5.3i@",
|
||
123.45, -567.89, 12345.678);
|
||
|
||
This rather complex format string produces the following output:
|
||
|
||
"@ USD 000123,450 @(USD 000567.890)@ USD 12,345.678 @"
|
||
|
||
The most noticeable change is the alternative way of representing
|
||
negative numbers. In financial circles this is often done using
|
||
parentheses, and this is what the ‘(’ flag selected. The fill character
|
||
is now ‘0’. Note that this ‘0’ character is not regarded as a numeric
|
||
zero, and therefore the first and second numbers are not printed using a
|
||
thousands separator. Since we used the format specifier ‘i’ instead of
|
||
‘n’, the international form of the currency symbol is used. This is a
|
||
four letter string, in this case ‘"USD "’. The last point is that since
|
||
the precision right of the decimal point is selected to be three, the
|
||
first and second numbers are printed with an extra zero at the end and
|
||
the third number is printed without rounding.
|
||
|
||
|
||
File: libc.info, Node: Yes-or-No Questions, Prev: Formatting Numbers, Up: Locales
|
||
|
||
7.9 Yes-or-No Questions
|
||
=======================
|
||
|
||
Some non GUI programs ask a yes-or-no question. If the messages
|
||
(especially the questions) are translated into foreign languages, be
|
||
sure that you localize the answers too. It would be very bad habit to
|
||
ask a question in one language and request the answer in another, often
|
||
English.
|
||
|
||
The GNU C Library contains ‘rpmatch’ to give applications easy access
|
||
to the corresponding locale definitions.
|
||
|
||
-- Function: int rpmatch (const char *RESPONSE)
|
||
Preliminary: | MT-Safe locale | AS-Unsafe corrupt heap lock dlopen
|
||
| AC-Unsafe corrupt lock mem fd | *Note POSIX Safety Concepts::.
|
||
|
||
The function ‘rpmatch’ checks the string in RESPONSE whether or not
|
||
it is a correct yes-or-no answer and if yes, which one. The check
|
||
uses the ‘YESEXPR’ and ‘NOEXPR’ data in the ‘LC_MESSAGES’ category
|
||
of the currently selected locale. The return value is as follows:
|
||
|
||
‘1’
|
||
The user entered an affirmative answer.
|
||
|
||
‘0’
|
||
The user entered a negative answer.
|
||
|
||
‘-1’
|
||
The answer matched neither the ‘YESEXPR’ nor the ‘NOEXPR’
|
||
regular expression.
|
||
|
||
This function is not standardized but available beside in the GNU C
|
||
Library at least also in the IBM AIX library.
|
||
|
||
This function would normally be used like this:
|
||
|
||
…
|
||
/* Use a safe default. */
|
||
_Bool doit = false;
|
||
|
||
fputs (gettext ("Do you really want to do this? "), stdout);
|
||
fflush (stdout);
|
||
/* Prepare the ‘getline’ call. */
|
||
line = NULL;
|
||
len = 0;
|
||
while (getline (&line, &len, stdin) >= 0)
|
||
{
|
||
/* Check the response. */
|
||
int res = rpmatch (line);
|
||
if (res >= 0)
|
||
{
|
||
/* We got a definitive answer. */
|
||
if (res > 0)
|
||
doit = true;
|
||
break;
|
||
}
|
||
}
|
||
/* Free what ‘getline’ allocated. */
|
||
free (line);
|
||
|
||
Note that the loop continues until a read error is detected or until
|
||
a definitive (positive or negative) answer is read.
|
||
|
||
|
||
File: libc.info, Node: Message Translation, Next: Searching and Sorting, Prev: Locales, Up: Top
|
||
|
||
8 Message Translation
|
||
*********************
|
||
|
||
The program’s interface with the user should be designed to ease the
|
||
user’s task. One way to ease the user’s task is to use messages in
|
||
whatever language the user prefers.
|
||
|
||
Printing messages in different languages can be implemented in
|
||
different ways. One could add all the different languages in the source
|
||
code and choose among the variants every time a message has to be
|
||
printed. This is certainly not a good solution since extending the set
|
||
of languages is cumbersome (the code must be changed) and the code
|
||
itself can become really big with dozens of message sets.
|
||
|
||
A better solution is to keep the message sets for each language in
|
||
separate files which are loaded at runtime depending on the language
|
||
selection of the user.
|
||
|
||
The GNU C Library provides two different sets of functions to support
|
||
message translation. The problem is that neither of the interfaces is
|
||
officially defined by the POSIX standard. The ‘catgets’ family of
|
||
functions is defined in the X/Open standard but this is derived from
|
||
industry decisions and therefore not necessarily based on reasonable
|
||
decisions.
|
||
|
||
As mentioned above the message catalog handling provides easy
|
||
extendibility by using external data files which contain the message
|
||
translations. I.e., these files contain for each of the messages used
|
||
in the program a translation for the appropriate language. So the tasks
|
||
of the message handling functions are
|
||
|
||
• locate the external data file with the appropriate translations
|
||
• load the data and make it possible to address the messages
|
||
• map a given key to the translated message
|
||
|
||
The two approaches mainly differ in the implementation of this last
|
||
step. Decisions made in the last step influence the rest of the design.
|
||
|
||
* Menu:
|
||
|
||
* Message catalogs a la X/Open:: The ‘catgets’ family of functions.
|
||
* The Uniforum approach:: The ‘gettext’ family of functions.
|
||
|
||
|
||
File: libc.info, Node: Message catalogs a la X/Open, Next: The Uniforum approach, Up: Message Translation
|
||
|
||
8.1 X/Open Message Catalog Handling
|
||
===================================
|
||
|
||
The ‘catgets’ functions are based on the simple scheme:
|
||
|
||
Associate every message to translate in the source code with a
|
||
unique identifier. To retrieve a message from a catalog file
|
||
solely the identifier is used.
|
||
|
||
This means for the author of the program that s/he will have to make
|
||
sure the meaning of the identifier in the program code and in the
|
||
message catalogs are always the same.
|
||
|
||
Before a message can be translated the catalog file must be located.
|
||
The user of the program must be able to guide the responsible function
|
||
to find whatever catalog the user wants. This is separated from what
|
||
the programmer had in mind.
|
||
|
||
All the types, constants and functions for the ‘catgets’ functions
|
||
are defined/declared in the ‘nl_types.h’ header file.
|
||
|
||
* Menu:
|
||
|
||
* The catgets Functions:: The ‘catgets’ function family.
|
||
* The message catalog files:: Format of the message catalog files.
|
||
* The gencat program:: How to generate message catalogs files which
|
||
can be used by the functions.
|
||
* Common Usage:: How to use the ‘catgets’ interface.
|
||
|
||
|
||
File: libc.info, Node: The catgets Functions, Next: The message catalog files, Up: Message catalogs a la X/Open
|
||
|
||
8.1.1 The ‘catgets’ function family
|
||
-----------------------------------
|
||
|
||
-- Function: nl_catd catopen (const char *CAT_NAME, int FLAG)
|
||
Preliminary: | MT-Safe env | AS-Unsafe heap | AC-Unsafe mem | *Note
|
||
POSIX Safety Concepts::.
|
||
|
||
The ‘catopen’ function tries to locate the message data file names
|
||
CAT_NAME and loads it when found. The return value is of an opaque
|
||
type and can be used in calls to the other functions to refer to
|
||
this loaded catalog.
|
||
|
||
The return value is ‘(nl_catd) -1’ in case the function failed and
|
||
no catalog was loaded. The global variable ERRNO contains a code
|
||
for the error causing the failure. But even if the function call
|
||
succeeded this does not mean that all messages can be translated.
|
||
|
||
Locating the catalog file must happen in a way which lets the user
|
||
of the program influence the decision. It is up to the user to
|
||
decide about the language to use and sometimes it is useful to use
|
||
alternate catalog files. All this can be specified by the user by
|
||
setting some environment variables.
|
||
|
||
The first problem is to find out where all the message catalogs are
|
||
stored. Every program could have its own place to keep all the
|
||
different files but usually the catalog files are grouped by
|
||
languages and the catalogs for all programs are kept in the same
|
||
place.
|
||
|
||
To tell the ‘catopen’ function where the catalog for the program
|
||
can be found the user can set the environment variable ‘NLSPATH’ to
|
||
a value which describes her/his choice. Since this value must be
|
||
usable for different languages and locales it cannot be a simple
|
||
string. Instead it is a format string (similar to ‘printf’’s). An
|
||
example is
|
||
|
||
/usr/share/locale/%L/%N:/usr/share/locale/%L/LC_MESSAGES/%N
|
||
|
||
First one can see that more than one directory can be specified
|
||
(with the usual syntax of separating them by colons). The next
|
||
things to observe are the format string, ‘%L’ and ‘%N’ in this
|
||
case. The ‘catopen’ function knows about several of them and the
|
||
replacement for all of them is of course different.
|
||
|
||
‘%N’
|
||
This format element is substituted with the name of the
|
||
catalog file. This is the value of the CAT_NAME argument
|
||
given to ‘catgets’.
|
||
|
||
‘%L’
|
||
This format element is substituted with the name of the
|
||
currently selected locale for translating messages. How this
|
||
is determined is explained below.
|
||
|
||
‘%l’
|
||
(This is the lowercase ell.) This format element is
|
||
substituted with the language element of the locale name. The
|
||
string describing the selected locale is expected to have the
|
||
form ‘LANG[_TERR[.CODESET]]’ and this format uses the first
|
||
part LANG.
|
||
|
||
‘%t’
|
||
This format element is substituted by the territory part TERR
|
||
of the name of the currently selected locale. See the
|
||
explanation of the format above.
|
||
|
||
‘%c’
|
||
This format element is substituted by the codeset part CODESET
|
||
of the name of the currently selected locale. See the
|
||
explanation of the format above.
|
||
|
||
‘%%’
|
||
Since ‘%’ is used in a meta character there must be a way to
|
||
express the ‘%’ character in the result itself. Using ‘%%’
|
||
does this just like it works for ‘printf’.
|
||
|
||
Using ‘NLSPATH’ allows arbitrary directories to be searched for
|
||
message catalogs while still allowing different languages to be
|
||
used. If the ‘NLSPATH’ environment variable is not set, the
|
||
default value is
|
||
|
||
PREFIX/share/locale/%L/%N:PREFIX/share/locale/%L/LC_MESSAGES/%N
|
||
|
||
where PREFIX is given to ‘configure’ while installing the GNU C
|
||
Library (this value is in many cases ‘/usr’ or the empty string).
|
||
|
||
The remaining problem is to decide which must be used. The value
|
||
decides about the substitution of the format elements mentioned
|
||
above. First of all the user can specify a path in the message
|
||
catalog name (i.e., the name contains a slash character). In this
|
||
situation the ‘NLSPATH’ environment variable is not used. The
|
||
catalog must exist as specified in the program, perhaps relative to
|
||
the current working directory. This situation in not desirable and
|
||
catalogs names never should be written this way. Beside this, this
|
||
behavior is not portable to all other platforms providing the
|
||
‘catgets’ interface.
|
||
|
||
Otherwise the values of environment variables from the standard
|
||
environment are examined (*note Standard Environment::). Which
|
||
variables are examined is decided by the FLAG parameter of
|
||
‘catopen’. If the value is ‘NL_CAT_LOCALE’ (which is defined in
|
||
‘nl_types.h’) then the ‘catopen’ function use the name of the
|
||
locale currently selected for the ‘LC_MESSAGES’ category.
|
||
|
||
If FLAG is zero the ‘LANG’ environment variable is examined. This
|
||
is a left-over from the early days where the concept of the locales
|
||
had not even reached the level of POSIX locales.
|
||
|
||
The environment variable and the locale name should have a value of
|
||
the form ‘LANG[_TERR[.CODESET]]’ as explained above. If no
|
||
environment variable is set the ‘"C"’ locale is used which prevents
|
||
any translation.
|
||
|
||
The return value of the function is in any case a valid string.
|
||
Either it is a translation from a message catalog or it is the same
|
||
as the STRING parameter. So a piece of code to decide whether a
|
||
translation actually happened must look like this:
|
||
|
||
{
|
||
char *trans = catgets (desc, set, msg, input_string);
|
||
if (trans == input_string)
|
||
{
|
||
/* Something went wrong. */
|
||
}
|
||
}
|
||
|
||
When an error occurred the global variable ERRNO is set to
|
||
|
||
EBADF
|
||
The catalog does not exist.
|
||
ENOMSG
|
||
The set/message tuple does not name an existing element in the
|
||
message catalog.
|
||
|
||
While it sometimes can be useful to test for errors programs
|
||
normally will avoid any test. If the translation is not available
|
||
it is no big problem if the original, untranslated message is
|
||
printed. Either the user understands this as well or s/he will
|
||
look for the reason why the messages are not translated.
|
||
|
||
Please note that the currently selected locale does not depend on a
|
||
call to the ‘setlocale’ function. It is not necessary that the locale
|
||
data files for this locale exist and calling ‘setlocale’ succeeds. The
|
||
‘catopen’ function directly reads the values of the environment
|
||
variables.
|
||
|
||
-- Function: char * catgets (nl_catd CATALOG_DESC, int SET, int
|
||
MESSAGE, const char *STRING)
|
||
Preliminary: | MT-Safe | AS-Safe | AC-Safe | *Note POSIX Safety
|
||
Concepts::.
|
||
|
||
The function ‘catgets’ has to be used to access the massage catalog
|
||
previously opened using the ‘catopen’ function. The CATALOG_DESC
|
||
parameter must be a value previously returned by ‘catopen’.
|
||
|
||
The next two parameters, SET and MESSAGE, reflect the internal
|
||
organization of the message catalog files. This will be explained
|
||
in detail below. For now it is interesting to know that a catalog
|
||
can consists of several set and the messages in each thread are
|
||
individually numbered using numbers. Neither the set number nor
|
||
the message number must be consecutive. They can be arbitrarily
|
||
chosen. But each message (unless equal to another one) must have
|
||
its own unique pair of set and message number.
|
||
|
||
Since it is not guaranteed that the message catalog for the
|
||
language selected by the user exists the last parameter STRING
|
||
helps to handle this case gracefully. If no matching string can be
|
||
found STRING is returned. This means for the programmer that
|
||
|
||
• the STRING parameters should contain reasonable text (this
|
||
also helps to understand the program seems otherwise there
|
||
would be no hint on the string which is expected to be
|
||
returned.
|
||
• all STRING arguments should be written in the same language.
|
||
|
||
It is somewhat uncomfortable to write a program using the ‘catgets’
|
||
functions if no supporting functionality is available. Since each
|
||
set/message number tuple must be unique the programmer must keep lists
|
||
of the messages at the same time the code is written. And the work
|
||
between several people working on the same project must be coordinated.
|
||
We will see some how these problems can be relaxed a bit (*note Common
|
||
Usage::).
|
||
|
||
-- Function: int catclose (nl_catd CATALOG_DESC)
|
||
Preliminary: | MT-Safe | AS-Unsafe heap | AC-Unsafe corrupt mem |
|
||
*Note POSIX Safety Concepts::.
|
||
|
||
The ‘catclose’ function can be used to free the resources
|
||
associated with a message catalog which previously was opened by a
|
||
call to ‘catopen’. If the resources can be successfully freed the
|
||
function returns ‘0’. Otherwise it return ‘−1’ and the global
|
||
variable ERRNO is set. Errors can occur if the catalog descriptor
|
||
CATALOG_DESC is not valid in which case ERRNO is set to ‘EBADF’.
|
||
|
||
|
||
File: libc.info, Node: The message catalog files, Next: The gencat program, Prev: The catgets Functions, Up: Message catalogs a la X/Open
|
||
|
||
8.1.2 Format of the message catalog files
|
||
-----------------------------------------
|
||
|
||
The only reasonable way the translate all the messages of a function and
|
||
store the result in a message catalog file which can be read by the
|
||
‘catopen’ function is to write all the message text to the translator
|
||
and let her/him translate them all. I.e., we must have a file with
|
||
entries which associate the set/message tuple with a specific
|
||
translation. This file format is specified in the X/Open standard and
|
||
is as follows:
|
||
|
||
• Lines containing only whitespace characters or empty lines are
|
||
ignored.
|
||
|
||
• Lines which contain as the first non-whitespace character a ‘$’
|
||
followed by a whitespace character are comment and are also
|
||
ignored.
|
||
|
||
• If a line contains as the first non-whitespace characters the
|
||
sequence ‘$set’ followed by a whitespace character an additional
|
||
argument is required to follow. This argument can either be:
|
||
|
||
− a number. In this case the value of this number determines
|
||
the set to which the following messages are added.
|
||
|
||
− an identifier consisting of alphanumeric characters plus the
|
||
underscore character. In this case the set get automatically
|
||
a number assigned. This value is one added to the largest set
|
||
number which so far appeared.
|
||
|
||
How to use the symbolic names is explained in section *note
|
||
Common Usage::.
|
||
|
||
It is an error if a symbol name appears more than once. All
|
||
following messages are placed in a set with this number.
|
||
|
||
• If a line contains as the first non-whitespace characters the
|
||
sequence ‘$delset’ followed by a whitespace character an additional
|
||
argument is required to follow. This argument can either be:
|
||
|
||
− a number. In this case the value of this number determines
|
||
the set which will be deleted.
|
||
|
||
− an identifier consisting of alphanumeric characters plus the
|
||
underscore character. This symbolic identifier must match a
|
||
name for a set which previously was defined. It is an error
|
||
if the name is unknown.
|
||
|
||
In both cases all messages in the specified set will be removed.
|
||
They will not appear in the output. But if this set is later again
|
||
selected with a ‘$set’ command again messages could be added and
|
||
these messages will appear in the output.
|
||
|
||
• If a line contains after leading whitespaces the sequence ‘$quote’,
|
||
the quoting character used for this input file is changed to the
|
||
first non-whitespace character following the ‘$quote’. If no
|
||
non-whitespace character is present before the line ends quoting is
|
||
disable.
|
||
|
||
By default no quoting character is used. In this mode strings are
|
||
terminated with the first unescaped line break. If there is a
|
||
‘$quote’ sequence present newline need not be escaped. Instead a
|
||
string is terminated with the first unescaped appearance of the
|
||
quote character.
|
||
|
||
A common usage of this feature would be to set the quote character
|
||
to ‘"’. Then any appearance of the ‘"’ in the strings must be
|
||
escaped using the backslash (i.e., ‘\"’ must be written).
|
||
|
||
• Any other line must start with a number or an alphanumeric
|
||
identifier (with the underscore character included). The following
|
||
characters (starting after the first whitespace character) will
|
||
form the string which gets associated with the currently selected
|
||
set and the message number represented by the number and identifier
|
||
respectively.
|
||
|
||
If the start of the line is a number the message number is obvious.
|
||
It is an error if the same message number already appeared for this
|
||
set.
|
||
|
||
If the leading token was an identifier the message number gets
|
||
automatically assigned. The value is the current maximum messages
|
||
number for this set plus one. It is an error if the identifier was
|
||
already used for a message in this set. It is OK to reuse the
|
||
identifier for a message in another thread. How to use the
|
||
symbolic identifiers will be explained below (*note Common
|
||
Usage::). There is one limitation with the identifier: it must not
|
||
be ‘Set’. The reason will be explained below.
|
||
|
||
The text of the messages can contain escape characters. The usual
|
||
bunch of characters known from the ISO C language are recognized
|
||
(‘\n’, ‘\t’, ‘\v’, ‘\b’, ‘\r’, ‘\f’, ‘\\’, and ‘\NNN’, where NNN is
|
||
the octal coding of a character code).
|
||
|
||
*Important:* The handling of identifiers instead of numbers for the
|
||
set and messages is a GNU extension. Systems strictly following the
|
||
X/Open specification do not have this feature. An example for a message
|
||
catalog file is this:
|
||
|
||
$ This is a leading comment.
|
||
$quote "
|
||
|
||
$set SetOne
|
||
1 Message with ID 1.
|
||
two " Message with ID \"two\", which gets the value 2 assigned"
|
||
|
||
$set SetTwo
|
||
$ Since the last set got the number 1 assigned this set has number 2.
|
||
4000 "The numbers can be arbitrary, they need not start at one."
|
||
|
||
This small example shows various aspects:
|
||
• Lines 1 and 9 are comments since they start with ‘$’ followed by a
|
||
whitespace.
|
||
• The quoting character is set to ‘"’. Otherwise the quotes in the
|
||
message definition would have to be left away and in this case the
|
||
message with the identifier ‘two’ would loose its leading
|
||
whitespace.
|
||
• Mixing numbered messages with message having symbolic names is no
|
||
problem and the numbering happens automatically.
|
||
|
||
While this file format is pretty easy it is not the best possible for
|
||
use in a running program. The ‘catopen’ function would have to parser
|
||
the file and handle syntactic errors gracefully. This is not so easy
|
||
and the whole process is pretty slow. Therefore the ‘catgets’ functions
|
||
expect the data in another more compact and ready-to-use file format.
|
||
There is a special program ‘gencat’ which is explained in detail in the
|
||
next section.
|
||
|
||
Files in this other format are not human readable. To be easy to use
|
||
by programs it is a binary file. But the format is byte order
|
||
independent so translation files can be shared by systems of arbitrary
|
||
architecture (as long as they use the GNU C Library).
|
||
|
||
Details about the binary file format are not important to know since
|
||
these files are always created by the ‘gencat’ program. The sources of
|
||
the GNU C Library also provide the sources for the ‘gencat’ program and
|
||
so the interested reader can look through these source files to learn
|
||
about the file format.
|
||
|
||
|
||
File: libc.info, Node: The gencat program, Next: Common Usage, Prev: The message catalog files, Up: Message catalogs a la X/Open
|
||
|
||
8.1.3 Generate Message Catalogs files
|
||
-------------------------------------
|
||
|
||
The ‘gencat’ program is specified in the X/Open standard and the GNU
|
||
implementation follows this specification and so processes all correctly
|
||
formed input files. Additionally some extension are implemented which
|
||
help to work in a more reasonable way with the ‘catgets’ functions.
|
||
|
||
The ‘gencat’ program can be invoked in two ways:
|
||
|
||
`gencat [OPTION]… [OUTPUT-FILE [INPUT-FILE]…]`
|
||
|
||
This is the interface defined in the X/Open standard. If no
|
||
INPUT-FILE parameter is given input will be read from standard input.
|
||
Multiple input files will be read as if they are concatenated. If
|
||
OUTPUT-FILE is also missing, the output will be written to standard
|
||
output. To provide the interface one is used to from other programs a
|
||
second interface is provided.
|
||
|
||
`gencat [OPTION]… -o OUTPUT-FILE [INPUT-FILE]…`
|
||
|
||
The option ‘-o’ is used to specify the output file and all file
|
||
arguments are used as input files.
|
||
|
||
Beside this one can use ‘-’ or ‘/dev/stdin’ for INPUT-FILE to denote
|
||
the standard input. Corresponding one can use ‘-’ and ‘/dev/stdout’ for
|
||
OUTPUT-FILE to denote standard output. Using ‘-’ as a file name is
|
||
allowed in X/Open while using the device names is a GNU extension.
|
||
|
||
The ‘gencat’ program works by concatenating all input files and then
|
||
*merge* the resulting collection of message sets with a possibly
|
||
existing output file. This is done by removing all messages with
|
||
set/message number tuples matching any of the generated messages from
|
||
the output file and then adding all the new messages. To regenerate a
|
||
catalog file while ignoring the old contents therefore requires to
|
||
remove the output file if it exists. If the output is written to
|
||
standard output no merging takes place.
|
||
|
||
The following table shows the options understood by the ‘gencat’
|
||
program. The X/Open standard does not specify any option for the
|
||
program so all of these are GNU extensions.
|
||
|
||
‘-V’
|
||
‘--version’
|
||
Print the version information and exit.
|
||
‘-h’
|
||
‘--help’
|
||
Print a usage message listing all available options, then exit
|
||
successfully.
|
||
‘--new’
|
||
Do never merge the new messages from the input files with the old
|
||
content of the output files. The old content of the output file is
|
||
discarded.
|
||
‘-H’
|
||
‘--header=name’
|
||
This option is used to emit the symbolic names given to sets and
|
||
messages in the input files for use in the program. Details about
|
||
how to use this are given in the next section. The NAME parameter
|
||
to this option specifies the name of the output file. It will
|
||
contain a number of C preprocessor ‘#define’s to associate a name
|
||
with a number.
|
||
|
||
Please note that the generated file only contains the symbols from
|
||
the input files. If the output is merged with the previous content
|
||
of the output file the possibly existing symbols from the file(s)
|
||
which generated the old output files are not in the generated
|
||
header file.
|
||
|
||
|
||
File: libc.info, Node: Common Usage, Prev: The gencat program, Up: Message catalogs a la X/Open
|
||
|
||
8.1.4 How to use the ‘catgets’ interface
|
||
----------------------------------------
|
||
|
||
The ‘catgets’ functions can be used in two different ways. By following
|
||
slavishly the X/Open specs and not relying on the extension and by using
|
||
the GNU extensions. We will take a look at the former method first to
|
||
understand the benefits of extensions.
|
||
|
||
8.1.4.1 Not using symbolic names
|
||
................................
|
||
|
||
Since the X/Open format of the message catalog files does not allow
|
||
symbol names we have to work with numbers all the time. When we start
|
||
writing a program we have to replace all appearances of translatable
|
||
strings with something like
|
||
|
||
catgets (catdesc, set, msg, "string")
|
||
|
||
CATGETS is retrieved from a call to ‘catopen’ which is normally done
|
||
once at the program start. The ‘"string"’ is the string we want to
|
||
translate. The problems start with the set and message numbers.
|
||
|
||
In a bigger program several programmers usually work at the same time
|
||
on the program and so coordinating the number allocation is crucial.
|
||
Though no two different strings must be indexed by the same tuple of
|
||
numbers it is highly desirable to reuse the numbers for equal strings
|
||
with equal translations (please note that there might be strings which
|
||
are equal in one language but have different translations due to
|
||
difference contexts).
|
||
|
||
The allocation process can be relaxed a bit by different set numbers
|
||
for different parts of the program. So the number of developers who
|
||
have to coordinate the allocation can be reduced. But still lists must
|
||
be keep track of the allocation and errors can easily happen. These
|
||
errors cannot be discovered by the compiler or the ‘catgets’ functions.
|
||
Only the user of the program might see wrong messages printed. In the
|
||
worst cases the messages are so irritating that they cannot be
|
||
recognized as wrong. Think about the translations for ‘"true"’ and
|
||
‘"false"’ being exchanged. This could result in a disaster.
|
||
|
||
8.1.4.2 Using symbolic names
|
||
............................
|
||
|
||
The problems mentioned in the last section derive from the fact that:
|
||
|
||
1. the numbers are allocated once and due to the possibly frequent use
|
||
of them it is difficult to change a number later.
|
||
2. the numbers do not allow to guess anything about the string and
|
||
therefore collisions can easily happen.
|
||
|
||
By constantly using symbolic names and by providing a method which
|
||
maps the string content to a symbolic name (however this will happen)
|
||
one can prevent both problems above. The cost of this is that the
|
||
programmer has to write a complete message catalog file while s/he is
|
||
writing the program itself.
|
||
|
||
This is necessary since the symbolic names must be mapped to numbers
|
||
before the program sources can be compiled. In the last section it was
|
||
described how to generate a header containing the mapping of the names.
|
||
E.g., for the example message file given in the last section we could
|
||
call the ‘gencat’ program as follow (assume ‘ex.msg’ contains the
|
||
sources).
|
||
|
||
gencat -H ex.h -o ex.cat ex.msg
|
||
|
||
This generates a header file with the following content:
|
||
|
||
#define SetTwoSet 0x2 /* ex.msg:8 */
|
||
|
||
#define SetOneSet 0x1 /* ex.msg:4 */
|
||
#define SetOnetwo 0x2 /* ex.msg:6 */
|
||
|
||
As can be seen the various symbols given in the source file are
|
||
mangled to generate unique identifiers and these identifiers get numbers
|
||
assigned. Reading the source file and knowing about the rules will
|
||
allow to predict the content of the header file (it is deterministic)
|
||
but this is not necessary. The ‘gencat’ program can take care for
|
||
everything. All the programmer has to do is to put the generated header
|
||
file in the dependency list of the source files of her/his project and
|
||
to add a rules to regenerate the header of any of the input files
|
||
change.
|
||
|
||
One word about the symbol mangling. Every symbol consists of two
|
||
parts: the name of the message set plus the name of the message or the
|
||
special string ‘Set’. So ‘SetOnetwo’ means this macro can be used to
|
||
access the translation with identifier ‘two’ in the message set
|
||
‘SetOne’.
|
||
|
||
The other names denote the names of the message sets. The special
|
||
string ‘Set’ is used in the place of the message identifier.
|
||
|
||
If in the code the second string of the set ‘SetOne’ is used the C
|
||
code should look like this:
|
||
|
||
catgets (catdesc, SetOneSet, SetOnetwo,
|
||
" Message with ID \"two\", which gets the value 2 assigned")
|
||
|
||
Writing the function this way will allow to change the message number
|
||
and even the set number without requiring any change in the C source
|
||
code. (The text of the string is normally not the same; this is only
|
||
for this example.)
|
||
|
||
8.1.4.3 How does to this allow to develop
|
||
.........................................
|
||
|
||
To illustrate the usual way to work with the symbolic version numbers
|
||
here is a little example. Assume we want to write the very complex and
|
||
famous greeting program. We start by writing the code as usual:
|
||
|
||
#include <stdio.h>
|
||
int
|
||
main (void)
|
||
{
|
||
printf ("Hello, world!\n");
|
||
return 0;
|
||
}
|
||
|
||
Now we want to internationalize the message and therefore replace the
|
||
message with whatever the user wants.
|
||
|
||
#include <nl_types.h>
|
||
#include <stdio.h>
|
||
#include "msgnrs.h"
|
||
int
|
||
main (void)
|
||
{
|
||
nl_catd catdesc = catopen ("hello.cat", NL_CAT_LOCALE);
|
||
printf (catgets (catdesc, SetMainSet, SetMainHello,
|
||
"Hello, world!\n"));
|
||
catclose (catdesc);
|
||
return 0;
|
||
}
|
||
|
||
We see how the catalog object is opened and the returned descriptor
|
||
used in the other function calls. It is not really necessary to check
|
||
for failure of any of the functions since even in these situations the
|
||
functions will behave reasonable. They simply will be return a
|
||
translation.
|
||
|
||
What remains unspecified here are the constants ‘SetMainSet’ and
|
||
‘SetMainHello’. These are the symbolic names describing the message.
|
||
To get the actual definitions which match the information in the catalog
|
||
file we have to create the message catalog source file and process it
|
||
using the ‘gencat’ program.
|
||
|
||
$ Messages for the famous greeting program.
|
||
$quote "
|
||
|
||
$set Main
|
||
Hello "Hallo, Welt!\n"
|
||
|
||
Now we can start building the program (assume the message catalog
|
||
source file is named ‘hello.msg’ and the program source file ‘hello.c’):
|
||
|
||
% gencat -H msgnrs.h -o hello.cat hello.msg
|
||
% cat msgnrs.h
|
||
#define MainSet 0x1 /* hello.msg:4 */
|
||
#define MainHello 0x1 /* hello.msg:5 */
|
||
% gcc -o hello hello.c -I.
|
||
% cp hello.cat /usr/share/locale/de/LC_MESSAGES
|
||
% echo $LC_ALL
|
||
de
|
||
% ./hello
|
||
Hallo, Welt!
|
||
%
|
||
|
||
The call of the ‘gencat’ program creates the missing header file
|
||
‘msgnrs.h’ as well as the message catalog binary. The former is used in
|
||
the compilation of ‘hello.c’ while the later is placed in a directory in
|
||
which the ‘catopen’ function will try to locate it. Please check the
|
||
‘LC_ALL’ environment variable and the default path for ‘catopen’
|
||
presented in the description above.
|
||
|
||
|
||
File: libc.info, Node: The Uniforum approach, Prev: Message catalogs a la X/Open, Up: Message Translation
|
||
|
||
8.2 The Uniforum approach to Message Translation
|
||
================================================
|
||
|
||
Sun Microsystems tried to standardize a different approach to message
|
||
translation in the Uniforum group. There never was a real standard
|
||
defined but still the interface was used in Sun’s operating systems.
|
||
Since this approach fits better in the development process of free
|
||
software it is also used throughout the GNU project and the GNU
|
||
‘gettext’ package provides support for this outside the GNU C Library.
|
||
|
||
The code of the ‘libintl’ from GNU ‘gettext’ is the same as the code
|
||
in the GNU C Library. So the documentation in the GNU ‘gettext’ manual
|
||
is also valid for the functionality here. The following text will
|
||
describe the library functions in detail. But the numerous helper
|
||
programs are not described in this manual. Instead people should read
|
||
the GNU ‘gettext’ manual (*note GNU gettext utilities: (gettext)Top.).
|
||
We will only give a short overview.
|
||
|
||
Though the ‘catgets’ functions are available by default on more
|
||
systems the ‘gettext’ interface is at least as portable as the former.
|
||
The GNU ‘gettext’ package can be used wherever the functions are not
|
||
available.
|
||
|
||
* Menu:
|
||
|
||
* Message catalogs with gettext:: The ‘gettext’ family of functions.
|
||
* Helper programs for gettext:: Programs to handle message catalogs
|
||
for ‘gettext’.
|
||
|
||
|
||
File: libc.info, Node: Message catalogs with gettext, Next: Helper programs for gettext, Up: The Uniforum approach
|
||
|
||
8.2.1 The ‘gettext’ family of functions
|
||
---------------------------------------
|
||
|
||
The paradigms underlying the ‘gettext’ approach to message translations
|
||
is different from that of the ‘catgets’ functions the basic functionally
|
||
is equivalent. There are functions of the following categories:
|
||
|
||
* Menu:
|
||
|
||
* Translation with gettext:: What has to be done to translate a message.
|
||
* Locating gettext catalog:: How to determine which catalog to be used.
|
||
* Advanced gettext functions:: Additional functions for more complicated
|
||
situations.
|
||
* Charset conversion in gettext:: How to specify the output character set
|
||
‘gettext’ uses.
|
||
* GUI program problems:: How to use ‘gettext’ in GUI programs.
|
||
* Using gettextized software:: The possibilities of the user to influence
|
||
the way ‘gettext’ works.
|
||
|
||
|
||
File: libc.info, Node: Translation with gettext, Next: Locating gettext catalog, Up: Message catalogs with gettext
|
||
|
||
8.2.1.1 What has to be done to translate a message?
|
||
...................................................
|
||
|
||
The ‘gettext’ functions have a very simple interface. The most basic
|
||
function just takes the string which shall be translated as the argument
|
||
and it returns the translation. This is fundamentally different from
|
||
the ‘catgets’ approach where an extra key is necessary and the original
|
||
string is only used for the error case.
|
||
|
||
If the string which has to be translated is the only argument this of
|
||
course means the string itself is the key. I.e., the translation will
|
||
be selected based on the original string. The message catalogs must
|
||
therefore contain the original strings plus one translation for any such
|
||
string. The task of the ‘gettext’ function is it to compare the
|
||
argument string with the available strings in the catalog and return the
|
||
appropriate translation. Of course this process is optimized so that
|
||
this process is not more expensive than an access using an atomic key
|
||
like in ‘catgets’.
|
||
|
||
The ‘gettext’ approach has some advantages but also some
|
||
disadvantages. Please see the GNU ‘gettext’ manual for a detailed
|
||
discussion of the pros and cons.
|
||
|
||
All the definitions and declarations for ‘gettext’ can be found in
|
||
the ‘libintl.h’ header file. On systems where these functions are not
|
||
part of the C library they can be found in a separate library named
|
||
‘libintl.a’ (or accordingly different for shared libraries).
|
||
|
||
-- Function: char * gettext (const char *MSGID)
|
||
Preliminary: | MT-Safe env | AS-Unsafe corrupt heap lock dlopen |
|
||
AC-Unsafe corrupt lock fd mem | *Note POSIX Safety Concepts::.
|
||
|
||
The ‘gettext’ function searches the currently selected message
|
||
catalogs for a string which is equal to MSGID. If there is such a
|
||
string available it is returned. Otherwise the argument string
|
||
MSGID is returned.
|
||
|
||
Please note that although the return value is ‘char *’ the returned
|
||
string must not be changed. This broken type results from the
|
||
history of the function and does not reflect the way the function
|
||
should be used.
|
||
|
||
Please note that above we wrote “message catalogs” (plural). This
|
||
is a specialty of the GNU implementation of these functions and we
|
||
will say more about this when we talk about the ways message
|
||
catalogs are selected (*note Locating gettext catalog::).
|
||
|
||
The ‘gettext’ function does not modify the value of the global
|
||
ERRNO variable. This is necessary to make it possible to write
|
||
something like
|
||
|
||
printf (gettext ("Operation failed: %m\n"));
|
||
|
||
Here the ERRNO value is used in the ‘printf’ function while
|
||
processing the ‘%m’ format element and if the ‘gettext’ function
|
||
would change this value (it is called before ‘printf’ is called) we
|
||
would get a wrong message.
|
||
|
||
So there is no easy way to detect a missing message catalog beside
|
||
comparing the argument string with the result. But it is normally
|
||
the task of the user to react on missing catalogs. The program
|
||
cannot guess when a message catalog is really necessary since for a
|
||
user who speaks the language the program was developed in does not
|
||
need any translation.
|
||
|
||
The remaining two functions to access the message catalog add some
|
||
functionality to select a message catalog which is not the default one.
|
||
This is important if parts of the program are developed independently.
|
||
Every part can have its own message catalog and all of them can be used
|
||
at the same time. The C library itself is an example: internally it
|
||
uses the ‘gettext’ functions but since it must not depend on a currently
|
||
selected default message catalog it must specify all ambiguous
|
||
information.
|
||
|
||
-- Function: char * dgettext (const char *DOMAINNAME, const char
|
||
*MSGID)
|
||
Preliminary: | MT-Safe env | AS-Unsafe corrupt heap lock dlopen |
|
||
AC-Unsafe corrupt lock fd mem | *Note POSIX Safety Concepts::.
|
||
|
||
The ‘dgettext’ functions acts just like the ‘gettext’ function. It
|
||
only takes an additional first argument DOMAINNAME which guides the
|
||
selection of the message catalogs which are searched for the
|
||
translation. If the DOMAINNAME parameter is the null pointer the
|
||
‘dgettext’ function is exactly equivalent to ‘gettext’ since the
|
||
default value for the domain name is used.
|
||
|
||
As for ‘gettext’ the return value type is ‘char *’ which is an
|
||
anachronism. The returned string must never be modified.
|
||
|
||
-- Function: char * dcgettext (const char *DOMAINNAME, const char
|
||
*MSGID, int CATEGORY)
|
||
Preliminary: | MT-Safe env | AS-Unsafe corrupt heap lock dlopen |
|
||
AC-Unsafe corrupt lock fd mem | *Note POSIX Safety Concepts::.
|
||
|
||
The ‘dcgettext’ adds another argument to those which ‘dgettext’
|
||
takes. This argument CATEGORY specifies the last piece of
|
||
information needed to localize the message catalog. I.e., the
|
||
domain name and the locale category exactly specify which message
|
||
catalog has to be used (relative to a given directory, see below).
|
||
|
||
The ‘dgettext’ function can be expressed in terms of ‘dcgettext’ by
|
||
using
|
||
|
||
dcgettext (domain, string, LC_MESSAGES)
|
||
|
||
instead of
|
||
|
||
dgettext (domain, string)
|
||
|
||
This also shows which values are expected for the third parameter.
|
||
One has to use the available selectors for the categories available
|
||
in ‘locale.h’. Normally the available values are ‘LC_CTYPE’,
|
||
‘LC_COLLATE’, ‘LC_MESSAGES’, ‘LC_MONETARY’, ‘LC_NUMERIC’, and
|
||
‘LC_TIME’. Please note that ‘LC_ALL’ must not be used and even
|
||
though the names might suggest this, there is no relation to the
|
||
environments variables of this name.
|
||
|
||
The ‘dcgettext’ function is only implemented for compatibility with
|
||
other systems which have ‘gettext’ functions. There is not really
|
||
any situation where it is necessary (or useful) to use a different
|
||
value but ‘LC_MESSAGES’ in for the CATEGORY parameter. We are
|
||
dealing with messages here and any other choice can only be
|
||
irritating.
|
||
|
||
As for ‘gettext’ the return value type is ‘char *’ which is an
|
||
anachronism. The returned string must never be modified.
|
||
|
||
When using the three functions above in a program it is a frequent
|
||
case that the MSGID argument is a constant string. So it is worth to
|
||
optimize this case. Thinking shortly about this one will realize that
|
||
as long as no new message catalog is loaded the translation of a message
|
||
will not change. This optimization is actually implemented by the
|
||
‘gettext’, ‘dgettext’ and ‘dcgettext’ functions.
|
||
|
||
|
||
File: libc.info, Node: Locating gettext catalog, Next: Advanced gettext functions, Prev: Translation with gettext, Up: Message catalogs with gettext
|
||
|
||
8.2.1.2 How to determine which catalog to be used
|
||
.................................................
|
||
|
||
The functions to retrieve the translations for a given message have a
|
||
remarkable simple interface. But to provide the user of the program
|
||
still the opportunity to select exactly the translation s/he wants and
|
||
also to provide the programmer the possibility to influence the way to
|
||
locate the search for catalogs files there is a quite complicated
|
||
underlying mechanism which controls all this. The code is complicated
|
||
the use is easy.
|
||
|
||
Basically we have two different tasks to perform which can also be
|
||
performed by the ‘catgets’ functions:
|
||
|
||
1. Locate the set of message catalogs. There are a number of files
|
||
for different languages and which all belong to the package.
|
||
Usually they are all stored in the filesystem below a certain
|
||
directory.
|
||
|
||
There can be arbitrary many packages installed and they can follow
|
||
different guidelines for the placement of their files.
|
||
|
||
2. Relative to the location specified by the package the actual
|
||
translation files must be searched, based on the wishes of the
|
||
user. I.e., for each language the user selects the program should
|
||
be able to locate the appropriate file.
|
||
|
||
This is the functionality required by the specifications for
|
||
‘gettext’ and this is also what the ‘catgets’ functions are able to do.
|
||
But there are some problems unresolved:
|
||
|
||
• The language to be used can be specified in several different ways.
|
||
There is no generally accepted standard for this and the user
|
||
always expects the program understand what s/he means. E.g., to
|
||
select the German translation one could write ‘de’, ‘german’, or
|
||
‘deutsch’ and the program should always react the same.
|
||
|
||
• Sometimes the specification of the user is too detailed. If s/he,
|
||
e.g., specifies ‘de_DE.ISO-8859-1’ which means German, spoken in
|
||
Germany, coded using the ISO 8859-1 character set there is the
|
||
possibility that a message catalog matching this exactly is not
|
||
available. But there could be a catalog matching ‘de’ and if the
|
||
character set used on the machine is always ISO 8859-1 there is no
|
||
reason why this later message catalog should not be used. (We call
|
||
this "message inheritance".)
|
||
|
||
• If a catalog for a wanted language is not available it is not
|
||
always the second best choice to fall back on the language of the
|
||
developer and simply not translate any message. Instead a user
|
||
might be better able to read the messages in another language and
|
||
so the user of the program should be able to define a precedence
|
||
order of languages.
|
||
|
||
We can divide the configuration actions in two parts: the one is
|
||
performed by the programmer, the other by the user. We will start with
|
||
the functions the programmer can use since the user configuration will
|
||
be based on this.
|
||
|
||
As the functions described in the last sections already mention
|
||
separate sets of messages can be selected by a "domain name". This is a
|
||
simple string which should be unique for each program part with uses a
|
||
separate domain. It is possible to use in one program arbitrary many
|
||
domains at the same time. E.g., the GNU C Library itself uses a domain
|
||
named ‘libc’ while the program using the C Library could use a domain
|
||
named ‘foo’. The important point is that at any time exactly one domain
|
||
is active. This is controlled with the following function.
|
||
|
||
-- Function: char * textdomain (const char *DOMAINNAME)
|
||
Preliminary: | MT-Safe | AS-Unsafe lock heap | AC-Unsafe lock mem |
|
||
*Note POSIX Safety Concepts::.
|
||
|
||
The ‘textdomain’ function sets the default domain, which is used in
|
||
all future ‘gettext’ calls, to DOMAINNAME. Please note that
|
||
‘dgettext’ and ‘dcgettext’ calls are not influenced if the
|
||
DOMAINNAME parameter of these functions is not the null pointer.
|
||
|
||
Before the first call to ‘textdomain’ the default domain is
|
||
‘messages’. This is the name specified in the specification of the
|
||
‘gettext’ API. This name is as good as any other name. No program
|
||
should ever really use a domain with this name since this can only
|
||
lead to problems.
|
||
|
||
The function returns the value which is from now on taken as the
|
||
default domain. If the system went out of memory the returned
|
||
value is ‘NULL’ and the global variable ERRNO is set to ‘ENOMEM’.
|
||
Despite the return value type being ‘char *’ the return string must
|
||
not be changed. It is allocated internally by the ‘textdomain’
|
||
function.
|
||
|
||
If the DOMAINNAME parameter is the null pointer no new default
|
||
domain is set. Instead the currently selected default domain is
|
||
returned.
|
||
|
||
If the DOMAINNAME parameter is the empty string the default domain
|
||
is reset to its initial value, the domain with the name ‘messages’.
|
||
This possibility is questionable to use since the domain ‘messages’
|
||
really never should be used.
|
||
|
||
-- Function: char * bindtextdomain (const char *DOMAINNAME, const char
|
||
*DIRNAME)
|
||
Preliminary: | MT-Safe | AS-Unsafe heap | AC-Unsafe mem | *Note
|
||
POSIX Safety Concepts::.
|
||
|
||
The ‘bindtextdomain’ function can be used to specify the directory
|
||
which contains the message catalogs for domain DOMAINNAME for the
|
||
different languages. To be correct, this is the directory where
|
||
the hierarchy of directories is expected. Details are explained
|
||
below.
|
||
|
||
For the programmer it is important to note that the translations
|
||
which come with the program have be placed in a directory hierarchy
|
||
starting at, say, ‘/foo/bar’. Then the program should make a
|
||
‘bindtextdomain’ call to bind the domain for the current program to
|
||
this directory. So it is made sure the catalogs are found. A
|
||
correctly running program does not depend on the user setting an
|
||
environment variable.
|
||
|
||
The ‘bindtextdomain’ function can be used several times and if the
|
||
DOMAINNAME argument is different the previously bound domains will
|
||
not be overwritten.
|
||
|
||
If the program which wish to use ‘bindtextdomain’ at some point of
|
||
time use the ‘chdir’ function to change the current working
|
||
directory it is important that the DIRNAME strings ought to be an
|
||
absolute pathname. Otherwise the addressed directory might vary
|
||
with the time.
|
||
|
||
If the DIRNAME parameter is the null pointer ‘bindtextdomain’
|
||
returns the currently selected directory for the domain with the
|
||
name DOMAINNAME.
|
||
|
||
The ‘bindtextdomain’ function returns a pointer to a string
|
||
containing the name of the selected directory name. The string is
|
||
allocated internally in the function and must not be changed by the
|
||
user. If the system went out of core during the execution of
|
||
‘bindtextdomain’ the return value is ‘NULL’ and the global variable
|
||
ERRNO is set accordingly.
|
||
|
||
|
||
File: libc.info, Node: Advanced gettext functions, Next: Charset conversion in gettext, Prev: Locating gettext catalog, Up: Message catalogs with gettext
|
||
|
||
8.2.1.3 Additional functions for more complicated situations
|
||
............................................................
|
||
|
||
The functions of the ‘gettext’ family described so far (and all the
|
||
‘catgets’ functions as well) have one problem in the real world which
|
||
have been neglected completely in all existing approaches. What is
|
||
meant here is the handling of plural forms.
|
||
|
||
Looking through Unix source code before the time anybody thought
|
||
about internationalization (and, sadly, even afterwards) one can often
|
||
find code similar to the following:
|
||
|
||
printf ("%d file%s deleted", n, n == 1 ? "" : "s");
|
||
|
||
After the first complaints from people internationalizing the code
|
||
people either completely avoided formulations like this or used strings
|
||
like ‘"file(s)"’. Both look unnatural and should be avoided. First
|
||
tries to solve the problem correctly looked like this:
|
||
|
||
if (n == 1)
|
||
printf ("%d file deleted", n);
|
||
else
|
||
printf ("%d files deleted", n);
|
||
|
||
But this does not solve the problem. It helps languages where the
|
||
plural form of a noun is not simply constructed by adding an ‘s’ but
|
||
that is all. Once again people fell into the trap of believing the
|
||
rules their language is using are universal. But the handling of plural
|
||
forms differs widely between the language families. There are two
|
||
things we can differ between (and even inside language families);
|
||
|
||
• The form how plural forms are build differs. This is a problem
|
||
with language which have many irregularities. German, for
|
||
instance, is a drastic case. Though English and German are part of
|
||
the same language family (Germanic), the almost regular forming of
|
||
plural noun forms (appending an ‘s’) is hardly found in German.
|
||
|
||
• The number of plural forms differ. This is somewhat surprising for
|
||
those who only have experiences with Romanic and Germanic languages
|
||
since here the number is the same (there are two).
|
||
|
||
But other language families have only one form or many forms. More
|
||
information on this in an extra section.
|
||
|
||
The consequence of this is that application writers should not try to
|
||
solve the problem in their code. This would be localization since it is
|
||
only usable for certain, hardcoded language environments. Instead the
|
||
extended ‘gettext’ interface should be used.
|
||
|
||
These extra functions are taking instead of the one key string two
|
||
strings and a numerical argument. The idea behind this is that using
|
||
the numerical argument and the first string as a key, the implementation
|
||
can select using rules specified by the translator the right plural
|
||
form. The two string arguments then will be used to provide a return
|
||
value in case no message catalog is found (similar to the normal
|
||
‘gettext’ behavior). In this case the rules for Germanic language is
|
||
used and it is assumed that the first string argument is the singular
|
||
form, the second the plural form.
|
||
|
||
This has the consequence that programs without language catalogs can
|
||
display the correct strings only if the program itself is written using
|
||
a Germanic language. This is a limitation but since the GNU C Library
|
||
(as well as the GNU ‘gettext’ package) are written as part of the GNU
|
||
package and the coding standards for the GNU project require program
|
||
being written in English, this solution nevertheless fulfills its
|
||
purpose.
|
||
|
||
-- Function: char * ngettext (const char *MSGID1, const char *MSGID2,
|
||
unsigned long int N)
|
||
Preliminary: | MT-Safe env | AS-Unsafe corrupt heap lock dlopen |
|
||
AC-Unsafe corrupt lock fd mem | *Note POSIX Safety Concepts::.
|
||
|
||
The ‘ngettext’ function is similar to the ‘gettext’ function as it
|
||
finds the message catalogs in the same way. But it takes two extra
|
||
arguments. The MSGID1 parameter must contain the singular form of
|
||
the string to be converted. It is also used as the key for the
|
||
search in the catalog. The MSGID2 parameter is the plural form.
|
||
The parameter N is used to determine the plural form. If no
|
||
message catalog is found MSGID1 is returned if ‘n == 1’, otherwise
|
||
‘msgid2’.
|
||
|
||
An example for the us of this function is:
|
||
|
||
printf (ngettext ("%d file removed", "%d files removed", n), n);
|
||
|
||
Please note that the numeric value N has to be passed to the
|
||
‘printf’ function as well. It is not sufficient to pass it only to
|
||
‘ngettext’.
|
||
|
||
-- Function: char * dngettext (const char *DOMAIN, const char *MSGID1,
|
||
const char *MSGID2, unsigned long int N)
|
||
Preliminary: | MT-Safe env | AS-Unsafe corrupt heap lock dlopen |
|
||
AC-Unsafe corrupt lock fd mem | *Note POSIX Safety Concepts::.
|
||
|
||
The ‘dngettext’ is similar to the ‘dgettext’ function in the way
|
||
the message catalog is selected. The difference is that it takes
|
||
two extra parameter to provide the correct plural form. These two
|
||
parameters are handled in the same way ‘ngettext’ handles them.
|
||
|
||
-- Function: char * dcngettext (const char *DOMAIN, const char *MSGID1,
|
||
const char *MSGID2, unsigned long int N, int CATEGORY)
|
||
Preliminary: | MT-Safe env | AS-Unsafe corrupt heap lock dlopen |
|
||
AC-Unsafe corrupt lock fd mem | *Note POSIX Safety Concepts::.
|
||
|
||
The ‘dcngettext’ is similar to the ‘dcgettext’ function in the way
|
||
the message catalog is selected. The difference is that it takes
|
||
two extra parameter to provide the correct plural form. These two
|
||
parameters are handled in the same way ‘ngettext’ handles them.
|
||
|
||
The problem of plural forms
|
||
...........................
|
||
|
||
A description of the problem can be found at the beginning of the last
|
||
section. Now there is the question how to solve it. Without the input
|
||
of linguists (which was not available) it was not possible to determine
|
||
whether there are only a few different forms in which plural forms are
|
||
formed or whether the number can increase with every new supported
|
||
language.
|
||
|
||
Therefore the solution implemented is to allow the translator to
|
||
specify the rules of how to select the plural form. Since the formula
|
||
varies with every language this is the only viable solution except for
|
||
hardcoding the information in the code (which still would require the
|
||
possibility of extensions to not prevent the use of new languages). The
|
||
details are explained in the GNU ‘gettext’ manual. Here only a bit of
|
||
information is provided.
|
||
|
||
The information about the plural form selection has to be stored in
|
||
the header entry (the one with the empty (‘msgid’ string). It looks
|
||
like this:
|
||
|
||
Plural-Forms: nplurals=2; plural=n == 1 ? 0 : 1;
|
||
|
||
The ‘nplurals’ value must be a decimal number which specifies how
|
||
many different plural forms exist for this language. The string
|
||
following ‘plural’ is an expression which is using the C language
|
||
syntax. Exceptions are that no negative number are allowed, numbers
|
||
must be decimal, and the only variable allowed is ‘n’. This expression
|
||
will be evaluated whenever one of the functions ‘ngettext’, ‘dngettext’,
|
||
or ‘dcngettext’ is called. The numeric value passed to these functions
|
||
is then substituted for all uses of the variable ‘n’ in the expression.
|
||
The resulting value then must be greater or equal to zero and smaller
|
||
than the value given as the value of ‘nplurals’.
|
||
|
||
The following rules are known at this point. The language with families
|
||
are listed. But this does not necessarily mean the information can be
|
||
generalized for the whole family (as can be easily seen in the table
|
||
below).(1)
|
||
|
||
Only one form:
|
||
Some languages only require one single form. There is no
|
||
distinction between the singular and plural form. An appropriate
|
||
header entry would look like this:
|
||
|
||
Plural-Forms: nplurals=1; plural=0;
|
||
|
||
Languages with this property include:
|
||
|
||
Finno-Ugric family
|
||
Hungarian
|
||
Asian family
|
||
Japanese, Korean
|
||
Turkic/Altaic family
|
||
Turkish
|
||
|
||
Two forms, singular used for one only
|
||
This is the form used in most existing programs since it is what
|
||
English is using. A header entry would look like this:
|
||
|
||
Plural-Forms: nplurals=2; plural=n != 1;
|
||
|
||
(Note: this uses the feature of C expressions that boolean
|
||
expressions have to value zero or one.)
|
||
|
||
Languages with this property include:
|
||
|
||
Germanic family
|
||
Danish, Dutch, English, German, Norwegian, Swedish
|
||
Finno-Ugric family
|
||
Estonian, Finnish
|
||
Latin/Greek family
|
||
Greek
|
||
Semitic family
|
||
Hebrew
|
||
Romance family
|
||
Italian, Portuguese, Spanish
|
||
Artificial
|
||
Esperanto
|
||
|
||
Two forms, singular used for zero and one
|
||
Exceptional case in the language family. The header entry would
|
||
be:
|
||
|
||
Plural-Forms: nplurals=2; plural=n>1;
|
||
|
||
Languages with this property include:
|
||
|
||
Romanic family
|
||
French, Brazilian Portuguese
|
||
|
||
Three forms, special case for zero
|
||
The header entry would be:
|
||
|
||
Plural-Forms: nplurals=3; plural=n%10==1 && n%100!=11 ? 0 : n != 0 ? 1 : 2;
|
||
|
||
Languages with this property include:
|
||
|
||
Baltic family
|
||
Latvian
|
||
|
||
Three forms, special cases for one and two
|
||
The header entry would be:
|
||
|
||
Plural-Forms: nplurals=3; plural=n==1 ? 0 : n==2 ? 1 : 2;
|
||
|
||
Languages with this property include:
|
||
|
||
Celtic
|
||
Gaeilge (Irish)
|
||
|
||
Three forms, special case for numbers ending in 1[2-9]
|
||
The header entry would look like this:
|
||
|
||
Plural-Forms: nplurals=3; \
|
||
plural=n%10==1 && n%100!=11 ? 0 : \
|
||
n%10>=2 && (n%100<10 || n%100>=20) ? 1 : 2;
|
||
|
||
Languages with this property include:
|
||
|
||
Baltic family
|
||
Lithuanian
|
||
|
||
Three forms, special cases for numbers ending in 1 and 2, 3, 4, except those ending in 1[1-4]
|
||
The header entry would look like this:
|
||
|
||
Plural-Forms: nplurals=3; \
|
||
plural=n%100/10==1 ? 2 : n%10==1 ? 0 : (n+9)%10>3 ? 2 : 1;
|
||
|
||
Languages with this property include:
|
||
|
||
Slavic family
|
||
Croatian, Czech, Russian, Ukrainian
|
||
|
||
Three forms, special cases for 1 and 2, 3, 4
|
||
The header entry would look like this:
|
||
|
||
Plural-Forms: nplurals=3; \
|
||
plural=(n==1) ? 1 : (n>=2 && n<=4) ? 2 : 0;
|
||
|
||
Languages with this property include:
|
||
|
||
Slavic family
|
||
Slovak
|
||
|
||
Three forms, special case for one and some numbers ending in 2, 3, or 4
|
||
The header entry would look like this:
|
||
|
||
Plural-Forms: nplurals=3; \
|
||
plural=n==1 ? 0 : \
|
||
n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2;
|
||
|
||
Languages with this property include:
|
||
|
||
Slavic family
|
||
Polish
|
||
|
||
Four forms, special case for one and all numbers ending in 02, 03, or 04
|
||
The header entry would look like this:
|
||
|
||
Plural-Forms: nplurals=4; \
|
||
plural=n%100==1 ? 0 : n%100==2 ? 1 : n%100==3 || n%100==4 ? 2 : 3;
|
||
|
||
Languages with this property include:
|
||
|
||
Slavic family
|
||
Slovenian
|
||
|
||
---------- Footnotes ----------
|
||
|
||
(1) Additions are welcome. Send appropriate information to
|
||
<bug-glibc-manual@gnu.org>.
|
||
|
||
|
||
File: libc.info, Node: Charset conversion in gettext, Next: GUI program problems, Prev: Advanced gettext functions, Up: Message catalogs with gettext
|
||
|
||
8.2.1.4 How to specify the output character set ‘gettext’ uses
|
||
..............................................................
|
||
|
||
‘gettext’ not only looks up a translation in a message catalog. It also
|
||
converts the translation on the fly to the desired output character set.
|
||
This is useful if the user is working in a different character set than
|
||
the translator who created the message catalog, because it avoids
|
||
distributing variants of message catalogs which differ only in the
|
||
character set.
|
||
|
||
The output character set is, by default, the value of ‘nl_langinfo
|
||
(CODESET)’, which depends on the ‘LC_CTYPE’ part of the current locale.
|
||
But programs which store strings in a locale independent way (e.g.
|
||
UTF-8) can request that ‘gettext’ and related functions return the
|
||
translations in that encoding, by use of the ‘bind_textdomain_codeset’
|
||
function.
|
||
|
||
Note that the MSGID argument to ‘gettext’ is not subject to character
|
||
set conversion. Also, when ‘gettext’ does not find a translation for
|
||
MSGID, it returns MSGID unchanged – independently of the current output
|
||
character set. It is therefore recommended that all MSGIDs be US-ASCII
|
||
strings.
|
||
|
||
-- Function: char * bind_textdomain_codeset (const char *DOMAINNAME,
|
||
const char *CODESET)
|
||
Preliminary: | MT-Safe | AS-Unsafe heap | AC-Unsafe mem | *Note
|
||
POSIX Safety Concepts::.
|
||
|
||
The ‘bind_textdomain_codeset’ function can be used to specify the
|
||
output character set for message catalogs for domain DOMAINNAME.
|
||
The CODESET argument must be a valid codeset name which can be used
|
||
for the ‘iconv_open’ function, or a null pointer.
|
||
|
||
If the CODESET parameter is the null pointer,
|
||
‘bind_textdomain_codeset’ returns the currently selected codeset
|
||
for the domain with the name DOMAINNAME. It returns ‘NULL’ if no
|
||
codeset has yet been selected.
|
||
|
||
The ‘bind_textdomain_codeset’ function can be used several times.
|
||
If used multiple times with the same DOMAINNAME argument, the later
|
||
call overrides the settings made by the earlier one.
|
||
|
||
The ‘bind_textdomain_codeset’ function returns a pointer to a
|
||
string containing the name of the selected codeset. The string is
|
||
allocated internally in the function and must not be changed by the
|
||
user. If the system went out of core during the execution of
|
||
‘bind_textdomain_codeset’, the return value is ‘NULL’ and the
|
||
global variable ERRNO is set accordingly.
|
||
|
||
|
||
File: libc.info, Node: GUI program problems, Next: Using gettextized software, Prev: Charset conversion in gettext, Up: Message catalogs with gettext
|
||
|
||
8.2.1.5 How to use ‘gettext’ in GUI programs
|
||
............................................
|
||
|
||
One place where the ‘gettext’ functions, if used normally, have big
|
||
problems is within programs with graphical user interfaces (GUIs). The
|
||
problem is that many of the strings which have to be translated are very
|
||
short. They have to appear in pull-down menus which restricts the
|
||
length. But strings which are not containing entire sentences or at
|
||
least large fragments of a sentence may appear in more than one
|
||
situation in the program but might have different translations. This is
|
||
especially true for the one-word strings which are frequently used in
|
||
GUI programs.
|
||
|
||
As a consequence many people say that the ‘gettext’ approach is wrong
|
||
and instead ‘catgets’ should be used which indeed does not have this
|
||
problem. But there is a very simple and powerful method to handle these
|
||
kind of problems with the ‘gettext’ functions.
|
||
|
||
As an example consider the following fictional situation. A GUI program
|
||
has a menu bar with the following entries:
|
||
|
||
+------------+------------+--------------------------------------+
|
||
| File | Printer | |
|
||
+------------+------------+--------------------------------------+
|
||
| Open | | Select |
|
||
| New | | Open |
|
||
+----------+ | Connect |
|
||
+----------+
|
||
|
||
To have the strings ‘File’, ‘Printer’, ‘Open’, ‘New’, ‘Select’, and
|
||
‘Connect’ translated there has to be at some point in the code a call to
|
||
a function of the ‘gettext’ family. But in two places the string passed
|
||
into the function would be ‘Open’. The translations might not be the
|
||
same and therefore we are in the dilemma described above.
|
||
|
||
One solution to this problem is to artificially enlengthen the
|
||
strings to make them unambiguous. But what would the program do if no
|
||
translation is available? The enlengthened string is not what should be
|
||
printed. So we should use a little bit modified version of the
|
||
functions.
|
||
|
||
To enlengthen the strings a uniform method should be used. E.g., in
|
||
the example above the strings could be chosen as
|
||
|
||
Menu|File
|
||
Menu|Printer
|
||
Menu|File|Open
|
||
Menu|File|New
|
||
Menu|Printer|Select
|
||
Menu|Printer|Open
|
||
Menu|Printer|Connect
|
||
|
||
Now all the strings are different and if now instead of ‘gettext’ the
|
||
following little wrapper function is used, everything works just fine:
|
||
|
||
char *
|
||
sgettext (const char *msgid)
|
||
{
|
||
char *msgval = gettext (msgid);
|
||
if (msgval == msgid)
|
||
msgval = strrchr (msgid, '|') + 1;
|
||
return msgval;
|
||
}
|
||
|
||
What this little function does is to recognize the case when no
|
||
translation is available. This can be done very efficiently by a
|
||
pointer comparison since the return value is the input value. If there
|
||
is no translation we know that the input string is in the format we used
|
||
for the Menu entries and therefore contains a ‘|’ character. We simply
|
||
search for the last occurrence of this character and return a pointer to
|
||
the character following it. That’s it!
|
||
|
||
If one now consistently uses the enlengthened string form and
|
||
replaces the ‘gettext’ calls with calls to ‘sgettext’ (this is normally
|
||
limited to very few places in the GUI implementation) then it is
|
||
possible to produce a program which can be internationalized.
|
||
|
||
With advanced compilers (such as GNU C) one can write the ‘sgettext’
|
||
functions as an inline function or as a macro like this:
|
||
|
||
#define sgettext(msgid) \
|
||
({ const char *__msgid = (msgid); \
|
||
char *__msgstr = gettext (__msgid); \
|
||
if (__msgval == __msgid) \
|
||
__msgval = strrchr (__msgid, '|') + 1; \
|
||
__msgval; })
|
||
|
||
The other ‘gettext’ functions (‘dgettext’, ‘dcgettext’ and the
|
||
‘ngettext’ equivalents) can and should have corresponding functions as
|
||
well which look almost identical, except for the parameters and the call
|
||
to the underlying function.
|
||
|
||
Now there is of course the question why such functions do not exist
|
||
in the GNU C Library? There are two parts of the answer to this
|
||
question.
|
||
|
||
• They are easy to write and therefore can be provided by the project
|
||
they are used in. This is not an answer by itself and must be seen
|
||
together with the second part which is:
|
||
|
||
• There is no way the C library can contain a version which can work
|
||
everywhere. The problem is the selection of the character to
|
||
separate the prefix from the actual string in the enlenghtened
|
||
string. The examples above used ‘|’ which is a quite good choice
|
||
because it resembles a notation frequently used in this context and
|
||
it also is a character not often used in message strings.
|
||
|
||
But what if the character is used in message strings. Or if the
|
||
chose character is not available in the character set on the
|
||
machine one compiles (e.g., ‘|’ is not required to exist for ISO C;
|
||
this is why the ‘iso646.h’ file exists in ISO C programming
|
||
environments).
|
||
|
||
There is only one more comment to make left. The wrapper function
|
||
above require that the translations strings are not enlengthened
|
||
themselves. This is only logical. There is no need to disambiguate the
|
||
strings (since they are never used as keys for a search) and one also
|
||
saves quite some memory and disk space by doing this.
|
||
|
||
|
||
File: libc.info, Node: Using gettextized software, Prev: GUI program problems, Up: Message catalogs with gettext
|
||
|
||
8.2.1.6 User influence on ‘gettext’
|
||
...................................
|
||
|
||
The last sections described what the programmer can do to
|
||
internationalize the messages of the program. But it is finally up to
|
||
the user to select the message s/he wants to see. S/He must understand
|
||
them.
|
||
|
||
The POSIX locale model uses the environment variables ‘LC_COLLATE’,
|
||
‘LC_CTYPE’, ‘LC_MESSAGES’, ‘LC_MONETARY’, ‘LC_NUMERIC’, and ‘LC_TIME’ to
|
||
select the locale which is to be used. This way the user can influence
|
||
lots of functions. As we mentioned above the ‘gettext’ functions also
|
||
take advantage of this.
|
||
|
||
To understand how this happens it is necessary to take a look at the
|
||
various components of the filename which gets computed to locate a
|
||
message catalog. It is composed as follows:
|
||
|
||
DIR_NAME/LOCALE/LC_CATEGORY/DOMAIN_NAME.mo
|
||
|
||
The default value for DIR_NAME is system specific. It is computed
|
||
from the value given as the prefix while configuring the C library.
|
||
This value normally is ‘/usr’ or ‘/’. For the former the complete
|
||
DIR_NAME is:
|
||
|
||
/usr/share/locale
|
||
|
||
We can use ‘/usr/share’ since the ‘.mo’ files containing the message
|
||
catalogs are system independent, so all systems can use the same files.
|
||
If the program executed the ‘bindtextdomain’ function for the message
|
||
domain that is currently handled, the ‘dir_name’ component is exactly
|
||
the value which was given to the function as the second parameter.
|
||
I.e., ‘bindtextdomain’ allows overwriting the only system dependent and
|
||
fixed value to make it possible to address files anywhere in the
|
||
filesystem.
|
||
|
||
The CATEGORY is the name of the locale category which was selected in
|
||
the program code. For ‘gettext’ and ‘dgettext’ this is always
|
||
‘LC_MESSAGES’, for ‘dcgettext’ this is selected by the value of the
|
||
third parameter. As said above it should be avoided to ever use a
|
||
category other than ‘LC_MESSAGES’.
|
||
|
||
The LOCALE component is computed based on the category used. Just
|
||
like for the ‘setlocale’ function here comes the user selection into the
|
||
play. Some environment variables are examined in a fixed order and the
|
||
first environment variable set determines the return value of the lookup
|
||
process. In detail, for the category ‘LC_xxx’ the following variables
|
||
in this order are examined:
|
||
|
||
‘LANGUAGE’
|
||
‘LC_ALL’
|
||
‘LC_xxx’
|
||
‘LANG’
|
||
|
||
This looks very familiar. With the exception of the ‘LANGUAGE’
|
||
environment variable this is exactly the lookup order the ‘setlocale’
|
||
function uses. But why introducing the ‘LANGUAGE’ variable?
|
||
|
||
The reason is that the syntax of the values these variables can have
|
||
is different to what is expected by the ‘setlocale’ function. If we
|
||
would set ‘LC_ALL’ to a value following the extended syntax that would
|
||
mean the ‘setlocale’ function will never be able to use the value of
|
||
this variable as well. An additional variable removes this problem plus
|
||
we can select the language independently of the locale setting which
|
||
sometimes is useful.
|
||
|
||
While for the ‘LC_xxx’ variables the value should consist of exactly
|
||
one specification of a locale the ‘LANGUAGE’ variable’s value can
|
||
consist of a colon separated list of locale names. The attentive reader
|
||
will realize that this is the way we manage to implement one of our
|
||
additional demands above: we want to be able to specify an ordered list
|
||
of language.
|
||
|
||
Back to the constructed filename we have only one component missing.
|
||
The DOMAIN_NAME part is the name which was either registered using the
|
||
‘textdomain’ function or which was given to ‘dgettext’ or ‘dcgettext’ as
|
||
the first parameter. Now it becomes obvious that a good choice for the
|
||
domain name in the program code is a string which is closely related to
|
||
the program/package name. E.g., for the GNU C Library the domain name
|
||
is ‘libc’.
|
||
|
||
A limit piece of example code should show how the programmer is supposed
|
||
to work:
|
||
|
||
{
|
||
setlocale (LC_ALL, "");
|
||
textdomain ("test-package");
|
||
bindtextdomain ("test-package", "/usr/local/share/locale");
|
||
puts (gettext ("Hello, world!"));
|
||
}
|
||
|
||
At the program start the default domain is ‘messages’, and the
|
||
default locale is "C". The ‘setlocale’ call sets the locale according to
|
||
the user’s environment variables; remember that correct functioning of
|
||
‘gettext’ relies on the correct setting of the ‘LC_MESSAGES’ locale (for
|
||
looking up the message catalog) and of the ‘LC_CTYPE’ locale (for the
|
||
character set conversion). The ‘textdomain’ call changes the default
|
||
domain to ‘test-package’. The ‘bindtextdomain’ call specifies that the
|
||
message catalogs for the domain ‘test-package’ can be found below the
|
||
directory ‘/usr/local/share/locale’.
|
||
|
||
If now the user set in her/his environment the variable ‘LANGUAGE’ to
|
||
‘de’ the ‘gettext’ function will try to use the translations from the
|
||
file
|
||
|
||
/usr/local/share/locale/de/LC_MESSAGES/test-package.mo
|
||
|
||
From the above descriptions it should be clear which component of
|
||
this filename is determined by which source.
|
||
|
||
In the above example we assumed that the ‘LANGUAGE’ environment
|
||
variable to ‘de’. This might be an appropriate selection but what
|
||
happens if the user wants to use ‘LC_ALL’ because of the wider usability
|
||
and here the required value is ‘de_DE.ISO-8859-1’? We already mentioned
|
||
above that a situation like this is not infrequent. E.g., a person
|
||
might prefer reading a dialect and if this is not available fall back on
|
||
the standard language.
|
||
|
||
The ‘gettext’ functions know about situations like this and can
|
||
handle them gracefully. The functions recognize the format of the value
|
||
of the environment variable. It can split the value is different pieces
|
||
and by leaving out the only or the other part it can construct new
|
||
values. This happens of course in a predictable way. To understand
|
||
this one must know the format of the environment variable value. There
|
||
is one more or less standardized form, originally from the X/Open
|
||
specification:
|
||
|
||
‘language[_territory[.codeset]][@modifier]’
|
||
|
||
Less specific locale names will be stripped of in the order of the
|
||
following list:
|
||
|
||
1. ‘codeset’
|
||
2. ‘normalized codeset’
|
||
3. ‘territory’
|
||
4. ‘modifier’
|
||
|
||
The ‘language’ field will never be dropped for obvious reasons.
|
||
|
||
The only new thing is the ‘normalized codeset’ entry. This is
|
||
another goodie which is introduced to help reducing the chaos which
|
||
derives from the inability of the people to standardize the names of
|
||
character sets. Instead of ISO-8859-1 one can often see 8859-1, 88591,
|
||
iso8859-1, or iso_8859-1. The ‘normalized codeset’ value is generated
|
||
from the user-provided character set name by applying the following
|
||
rules:
|
||
|
||
1. Remove all characters beside numbers and letters.
|
||
2. Fold letters to lowercase.
|
||
3. If the same only contains digits prepend the string ‘"iso"’.
|
||
|
||
So all of the above name will be normalized to ‘iso88591’. This allows
|
||
the program user much more freely choosing the locale name.
|
||
|
||
Even this extended functionality still does not help to solve the
|
||
problem that completely different names can be used to denote the same
|
||
locale (e.g., ‘de’ and ‘german’). To be of help in this situation the
|
||
locale implementation and also the ‘gettext’ functions know about
|
||
aliases.
|
||
|
||
The file ‘/usr/share/locale/locale.alias’ (replace ‘/usr’ with
|
||
whatever prefix you used for configuring the C library) contains a
|
||
mapping of alternative names to more regular names. The system manager
|
||
is free to add new entries to fill her/his own needs. The selected
|
||
locale from the environment is compared with the entries in the first
|
||
column of this file ignoring the case. If they match the value of the
|
||
second column is used instead for the further handling.
|
||
|
||
In the description of the format of the environment variables we
|
||
already mentioned the character set as a factor in the selection of the
|
||
message catalog. In fact, only catalogs which contain text written
|
||
using the character set of the system/program can be used (directly;
|
||
there will come a solution for this some day). This means for the user
|
||
that s/he will always have to take care for this. If in the collection
|
||
of the message catalogs there are files for the same language but coded
|
||
using different character sets the user has to be careful.
|
||
|
||
|
||
File: libc.info, Node: Helper programs for gettext, Prev: Message catalogs with gettext, Up: The Uniforum approach
|
||
|
||
8.2.2 Programs to handle message catalogs for ‘gettext’
|
||
-------------------------------------------------------
|
||
|
||
The GNU C Library does not contain the source code for the programs to
|
||
handle message catalogs for the ‘gettext’ functions. As part of the GNU
|
||
project the GNU gettext package contains everything the developer needs.
|
||
The functionality provided by the tools in this package by far exceeds
|
||
the abilities of the ‘gencat’ program described above for the ‘catgets’
|
||
functions.
|
||
|
||
There is a program ‘msgfmt’ which is the equivalent program to the
|
||
‘gencat’ program. It generates from the human-readable and -editable
|
||
form of the message catalog a binary file which can be used by the
|
||
‘gettext’ functions. But there are several more programs available.
|
||
|
||
The ‘xgettext’ program can be used to automatically extract the
|
||
translatable messages from a source file. I.e., the programmer need not
|
||
take care of the translations and the list of messages which have to be
|
||
translated. S/He will simply wrap the translatable string in calls to
|
||
‘gettext’ et.al and the rest will be done by ‘xgettext’. This program
|
||
has a lot of options which help to customize the output or help to
|
||
understand the input better.
|
||
|
||
Other programs help to manage the development cycle when new messages
|
||
appear in the source files or when a new translation of the messages
|
||
appears. Here it should only be noted that using all the tools in GNU
|
||
gettext it is possible to _completely_ automate the handling of message
|
||
catalogs. Beside marking the translatable strings in the source code
|
||
and generating the translations the developers do not have anything to
|
||
do themselves.
|
||
|
||
|
||
File: libc.info, Node: Searching and Sorting, Next: Pattern Matching, Prev: Message Translation, Up: Top
|
||
|
||
9 Searching and Sorting
|
||
***********************
|
||
|
||
This chapter describes functions for searching and sorting arrays of
|
||
arbitrary objects. You pass the appropriate comparison function to be
|
||
applied as an argument, along with the size of the objects in the array
|
||
and the total number of elements.
|
||
|
||
* Menu:
|
||
|
||
* Comparison Functions:: Defining how to compare two objects.
|
||
Since the sort and search facilities
|
||
are general, you have to specify the
|
||
ordering.
|
||
* Array Search Function:: The ‘bsearch’ function.
|
||
* Array Sort Function:: The ‘qsort’ function.
|
||
* Search/Sort Example:: An example program.
|
||
* Hash Search Function:: The ‘hsearch’ function.
|
||
* Tree Search Function:: The ‘tsearch’ function.
|
||
|
||
|
||
File: libc.info, Node: Comparison Functions, Next: Array Search Function, Up: Searching and Sorting
|
||
|
||
9.1 Defining the Comparison Function
|
||
====================================
|
||
|
||
In order to use the sorted array library functions, you have to describe
|
||
how to compare the elements of the array.
|
||
|
||
To do this, you supply a comparison function to compare two elements
|
||
of the array. The library will call this function, passing as arguments
|
||
pointers to two array elements to be compared. Your comparison function
|
||
should return a value the way ‘strcmp’ (*note String/Array Comparison::)
|
||
does: negative if the first argument is “less” than the second, zero if
|
||
they are “equal”, and positive if the first argument is “greater”.
|
||
|
||
Here is an example of a comparison function which works with an array
|
||
of numbers of type ‘double’:
|
||
|
||
int
|
||
compare_doubles (const void *a, const void *b)
|
||
{
|
||
const double *da = (const double *) a;
|
||
const double *db = (const double *) b;
|
||
|
||
return (*da > *db) - (*da < *db);
|
||
}
|
||
|
||
The header file ‘stdlib.h’ defines a name for the data type of
|
||
comparison functions. This type is a GNU extension.
|
||
|
||
int comparison_fn_t (const void *, const void *);
|
||
|
||
|
||
File: libc.info, Node: Array Search Function, Next: Array Sort Function, Prev: Comparison Functions, Up: Searching and Sorting
|
||
|
||
9.2 Array Search Function
|
||
=========================
|
||
|
||
Generally searching for a specific element in an array means that
|
||
potentially all elements must be checked. The GNU C Library contains
|
||
functions to perform linear search. The prototypes for the following
|
||
two functions can be found in ‘search.h’.
|
||
|
||
-- Function: void * lfind (const void *KEY, const void *BASE, size_t
|
||
*NMEMB, size_t SIZE, comparison_fn_t COMPAR)
|
||
Preliminary: | MT-Safe | AS-Safe | AC-Safe | *Note POSIX Safety
|
||
Concepts::.
|
||
|
||
The ‘lfind’ function searches in the array with ‘*NMEMB’ elements
|
||
of SIZE bytes pointed to by BASE for an element which matches the
|
||
one pointed to by KEY. The function pointed to by COMPAR is used
|
||
decide whether two elements match.
|
||
|
||
The return value is a pointer to the matching element in the array
|
||
starting at BASE if it is found. If no matching element is
|
||
available ‘NULL’ is returned.
|
||
|
||
The mean runtime of this function is ‘*NMEMB’/2. This function
|
||
should only be used if elements often get added to or deleted from
|
||
the array in which case it might not be useful to sort the array
|
||
before searching.
|
||
|
||
-- Function: void * lsearch (const void *KEY, void *BASE, size_t
|
||
*NMEMB, size_t SIZE, comparison_fn_t COMPAR)
|
||
Preliminary: | MT-Safe | AS-Safe | AC-Safe | *Note POSIX Safety
|
||
Concepts::.
|
||
|
||
The ‘lsearch’ function is similar to the ‘lfind’ function. It
|
||
searches the given array for an element and returns it if found.
|
||
The difference is that if no matching element is found the
|
||
‘lsearch’ function adds the object pointed to by KEY (with a size
|
||
of SIZE bytes) at the end of the array and it increments the value
|
||
of ‘*NMEMB’ to reflect this addition.
|
||
|
||
This means for the caller that if it is not sure that the array
|
||
contains the element one is searching for the memory allocated for
|
||
the array starting at BASE must have room for at least SIZE more
|
||
bytes. If one is sure the element is in the array it is better to
|
||
use ‘lfind’ so having more room in the array is always necessary
|
||
when calling ‘lsearch’.
|
||
|
||
To search a sorted array for an element matching the key, use the
|
||
‘bsearch’ function. The prototype for this function is in the header
|
||
file ‘stdlib.h’.
|
||
|
||
-- Function: void * bsearch (const void *KEY, const void *ARRAY, size_t
|
||
COUNT, size_t SIZE, comparison_fn_t COMPARE)
|
||
Preliminary: | MT-Safe | AS-Safe | AC-Safe | *Note POSIX Safety
|
||
Concepts::.
|
||
|
||
The ‘bsearch’ function searches the sorted array ARRAY for an
|
||
object that is equivalent to KEY. The array contains COUNT
|
||
elements, each of which is of size SIZE bytes.
|
||
|
||
The COMPARE function is used to perform the comparison. This
|
||
function is called with two pointer arguments and should return an
|
||
integer less than, equal to, or greater than zero corresponding to
|
||
whether its first argument is considered less than, equal to, or
|
||
greater than its second argument. The elements of the ARRAY must
|
||
already be sorted in ascending order according to this comparison
|
||
function.
|
||
|
||
The return value is a pointer to the matching array element, or a
|
||
null pointer if no match is found. If the array contains more than
|
||
one element that matches, the one that is returned is unspecified.
|
||
|
||
This function derives its name from the fact that it is implemented
|
||
using the binary search algorithm.
|
||
|
||
|
||
File: libc.info, Node: Array Sort Function, Next: Search/Sort Example, Prev: Array Search Function, Up: Searching and Sorting
|
||
|
||
9.3 Array Sort Function
|
||
=======================
|
||
|
||
To sort an array using an arbitrary comparison function, use the ‘qsort’
|
||
function. The prototype for this function is in ‘stdlib.h’.
|
||
|
||
-- Function: void qsort (void *ARRAY, size_t COUNT, size_t SIZE,
|
||
comparison_fn_t COMPARE)
|
||
Preliminary: | MT-Safe | AS-Safe | AC-Unsafe corrupt | *Note POSIX
|
||
Safety Concepts::.
|
||
|
||
The ‘qsort’ function sorts the array ARRAY. The array contains
|
||
COUNT elements, each of which is of size SIZE.
|
||
|
||
The COMPARE function is used to perform the comparison on the array
|
||
elements. This function is called with two pointer arguments and
|
||
should return an integer less than, equal to, or greater than zero
|
||
corresponding to whether its first argument is considered less
|
||
than, equal to, or greater than its second argument.
|
||
|
||
*Warning:* If two objects compare as equal, their order after
|
||
sorting is unpredictable. That is to say, the sorting is not
|
||
stable. This can make a difference when the comparison considers
|
||
only part of the elements. Two elements with the same sort key may
|
||
differ in other respects.
|
||
|
||
Although the object addresses passed to the comparison function lie
|
||
within the array, they need not correspond with the original
|
||
locations of those objects because the sorting algorithm may swap
|
||
around objects in the array before making some comparisons. The
|
||
only way to perform a stable sort with ‘qsort’ is to first augment
|
||
the objects with a monotonic counter of some kind.
|
||
|
||
Here is a simple example of sorting an array of doubles in
|
||
numerical order, using the comparison function defined above (*note
|
||
Comparison Functions::):
|
||
|
||
{
|
||
double *array;
|
||
int size;
|
||
…
|
||
qsort (array, size, sizeof (double), compare_doubles);
|
||
}
|
||
|
||
The ‘qsort’ function derives its name from the fact that it was
|
||
originally implemented using the “quick sort” algorithm.
|
||
|
||
The implementation of ‘qsort’ in this library might not be an
|
||
in-place sort and might thereby use an extra amount of memory to
|
||
store the array.
|
||
|
||
|
||
File: libc.info, Node: Search/Sort Example, Next: Hash Search Function, Prev: Array Sort Function, Up: Searching and Sorting
|
||
|
||
9.4 Searching and Sorting Example
|
||
=================================
|
||
|
||
Here is an example showing the use of ‘qsort’ and ‘bsearch’ with an
|
||
array of structures. The objects in the array are sorted by comparing
|
||
their ‘name’ fields with the ‘strcmp’ function. Then, we can look up
|
||
individual objects based on their names.
|
||
|
||
|
||
#include <stdlib.h>
|
||
#include <stdio.h>
|
||
#include <string.h>
|
||
|
||
/* Define an array of critters to sort. */
|
||
|
||
struct critter
|
||
{
|
||
const char *name;
|
||
const char *species;
|
||
};
|
||
|
||
struct critter muppets[] =
|
||
{
|
||
{"Kermit", "frog"},
|
||
{"Piggy", "pig"},
|
||
{"Gonzo", "whatever"},
|
||
{"Fozzie", "bear"},
|
||
{"Sam", "eagle"},
|
||
{"Robin", "frog"},
|
||
{"Animal", "animal"},
|
||
{"Camilla", "chicken"},
|
||
{"Sweetums", "monster"},
|
||
{"Dr. Strangepork", "pig"},
|
||
{"Link Hogthrob", "pig"},
|
||
{"Zoot", "human"},
|
||
{"Dr. Bunsen Honeydew", "human"},
|
||
{"Beaker", "human"},
|
||
{"Swedish Chef", "human"}
|
||
};
|
||
|
||
int count = sizeof (muppets) / sizeof (struct critter);
|
||
|
||
|
||
|
||
/* This is the comparison function used for sorting and searching. */
|
||
|
||
int
|
||
critter_cmp (const void *v1, const void *v2)
|
||
{
|
||
const struct critter *c1 = v1;
|
||
const struct critter *c2 = v2;
|
||
|
||
return strcmp (c1->name, c2->name);
|
||
}
|
||
|
||
|
||
/* Print information about a critter. */
|
||
|
||
void
|
||
print_critter (const struct critter *c)
|
||
{
|
||
printf ("%s, the %s\n", c->name, c->species);
|
||
}
|
||
|
||
|
||
/* Do the lookup into the sorted array. */
|
||
|
||
void
|
||
find_critter (const char *name)
|
||
{
|
||
struct critter target, *result;
|
||
target.name = name;
|
||
result = bsearch (&target, muppets, count, sizeof (struct critter),
|
||
critter_cmp);
|
||
if (result)
|
||
print_critter (result);
|
||
else
|
||
printf ("Couldn't find %s.\n", name);
|
||
}
|
||
|
||
/* Main program. */
|
||
|
||
int
|
||
main (void)
|
||
{
|
||
int i;
|
||
|
||
for (i = 0; i < count; i++)
|
||
print_critter (&muppets[i]);
|
||
printf ("\n");
|
||
|
||
qsort (muppets, count, sizeof (struct critter), critter_cmp);
|
||
|
||
for (i = 0; i < count; i++)
|
||
print_critter (&muppets[i]);
|
||
printf ("\n");
|
||
|
||
find_critter ("Kermit");
|
||
find_critter ("Gonzo");
|
||
find_critter ("Janice");
|
||
|
||
return 0;
|
||
}
|
||
|
||
The output from this program looks like:
|
||
|
||
Kermit, the frog
|
||
Piggy, the pig
|
||
Gonzo, the whatever
|
||
Fozzie, the bear
|
||
Sam, the eagle
|
||
Robin, the frog
|
||
Animal, the animal
|
||
Camilla, the chicken
|
||
Sweetums, the monster
|
||
Dr. Strangepork, the pig
|
||
Link Hogthrob, the pig
|
||
Zoot, the human
|
||
Dr. Bunsen Honeydew, the human
|
||
Beaker, the human
|
||
Swedish Chef, the human
|
||
|
||
Animal, the animal
|
||
Beaker, the human
|
||
Camilla, the chicken
|
||
Dr. Bunsen Honeydew, the human
|
||
Dr. Strangepork, the pig
|
||
Fozzie, the bear
|
||
Gonzo, the whatever
|
||
Kermit, the frog
|
||
Link Hogthrob, the pig
|
||
Piggy, the pig
|
||
Robin, the frog
|
||
Sam, the eagle
|
||
Swedish Chef, the human
|
||
Sweetums, the monster
|
||
Zoot, the human
|
||
|
||
Kermit, the frog
|
||
Gonzo, the whatever
|
||
Couldn't find Janice.
|
||
|
||
|
||
File: libc.info, Node: Hash Search Function, Next: Tree Search Function, Prev: Search/Sort Example, Up: Searching and Sorting
|
||
|
||
9.5 The ‘hsearch’ function.
|
||
===========================
|
||
|
||
The functions mentioned so far in this chapter are for searching in a
|
||
sorted or unsorted array. There are other methods to organize
|
||
information which later should be searched. The costs of insert, delete
|
||
and search differ. One possible implementation is using hashing tables.
|
||
The following functions are declared in the header file ‘search.h’.
|
||
|
||
-- Function: int hcreate (size_t NEL)
|
||
Preliminary: | MT-Unsafe race:hsearch | AS-Unsafe heap | AC-Unsafe
|
||
corrupt mem | *Note POSIX Safety Concepts::.
|
||
|
||
The ‘hcreate’ function creates a hashing table which can contain at
|
||
least NEL elements. There is no possibility to grow this table so
|
||
it is necessary to choose the value for NEL wisely. The method
|
||
used to implement this function might make it necessary to make the
|
||
number of elements in the hashing table larger than the expected
|
||
maximal number of elements. Hashing tables usually work
|
||
inefficiently if they are filled 80% or more. The constant access
|
||
time guaranteed by hashing can only be achieved if few collisions
|
||
exist. See Knuth’s “The Art of Computer Programming, Part 3:
|
||
Searching and Sorting” for more information.
|
||
|
||
The weakest aspect of this function is that there can be at most
|
||
one hashing table used through the whole program. The table is
|
||
allocated in local memory out of control of the programmer. As an
|
||
extension the GNU C Library provides an additional set of functions
|
||
with a reentrant interface which provide a similar interface but
|
||
which allow to keep arbitrarily many hashing tables.
|
||
|
||
It is possible to use more than one hashing table in the program
|
||
run if the former table is first destroyed by a call to ‘hdestroy’.
|
||
|
||
The function returns a non-zero value if successful. If it return
|
||
zero something went wrong. This could either mean there is already
|
||
a hashing table in use or the program runs out of memory.
|
||
|
||
-- Function: void hdestroy (void)
|
||
Preliminary: | MT-Unsafe race:hsearch | AS-Unsafe heap | AC-Unsafe
|
||
corrupt mem | *Note POSIX Safety Concepts::.
|
||
|
||
The ‘hdestroy’ function can be used to free all the resources
|
||
allocated in a previous call of ‘hcreate’. After a call to this
|
||
function it is again possible to call ‘hcreate’ and allocate a new
|
||
table with possibly different size.
|
||
|
||
It is important to remember that the elements contained in the
|
||
hashing table at the time ‘hdestroy’ is called are _not_ freed by
|
||
this function. It is the responsibility of the program code to
|
||
free those strings (if necessary at all). Freeing all the element
|
||
memory is not possible without extra, separately kept information
|
||
since there is no function to iterate through all available
|
||
elements in the hashing table. If it is really necessary to free a
|
||
table and all elements the programmer has to keep a list of all
|
||
table elements and before calling ‘hdestroy’ s/he has to free all
|
||
element’s data using this list. This is a very unpleasant
|
||
mechanism and it also shows that this kind of hashing tables is
|
||
mainly meant for tables which are created once and used until the
|
||
end of the program run.
|
||
|
||
Entries of the hashing table and keys for the search are defined
|
||
using this type:
|
||
|
||
-- Data type: struct ENTRY
|
||
Both elements of this structure are pointers to zero-terminated
|
||
strings. This is a limiting restriction of the functionality of
|
||
the ‘hsearch’ functions. They can only be used for data sets which
|
||
use the NUL character always and solely to terminate the records.
|
||
It is not possible to handle general binary data.
|
||
|
||
‘char *key’
|
||
Pointer to a zero-terminated string of characters describing
|
||
the key for the search or the element in the hashing table.
|
||
‘char *data’
|
||
Pointer to a zero-terminated string of characters describing
|
||
the data. If the functions will be called only for searching
|
||
an existing entry this element might stay undefined since it
|
||
is not used.
|
||
|
||
-- Function: ENTRY * hsearch (ENTRY ITEM, ACTION ACTION)
|
||
Preliminary: | MT-Unsafe race:hsearch | AS-Unsafe | AC-Unsafe
|
||
corrupt/action==ENTER | *Note POSIX Safety Concepts::.
|
||
|
||
To search in a hashing table created using ‘hcreate’ the ‘hsearch’
|
||
function must be used. This function can perform simple search for
|
||
an element (if ACTION has the ‘FIND’) or it can alternatively
|
||
insert the key element into the hashing table. Entries are never
|
||
replaced.
|
||
|
||
The key is denoted by a pointer to an object of type ‘ENTRY’. For
|
||
locating the corresponding position in the hashing table only the
|
||
‘key’ element of the structure is used.
|
||
|
||
If an entry with matching key is found the ACTION parameter is
|
||
irrelevant. The found entry is returned. If no matching entry is
|
||
found and the ACTION parameter has the value ‘FIND’ the function
|
||
returns a ‘NULL’ pointer. If no entry is found and the ACTION
|
||
parameter has the value ‘ENTER’ a new entry is added to the hashing
|
||
table which is initialized with the parameter ITEM. A pointer to
|
||
the newly added entry is returned.
|
||
|
||
As mentioned before the hashing table used by the functions described
|
||
so far is global and there can be at any time at most one hashing table
|
||
in the program. A solution is to use the following functions which are
|
||
a GNU extension. All have in common that they operate on a hashing
|
||
table which is described by the content of an object of the type ‘struct
|
||
hsearch_data’. This type should be treated as opaque, none of its
|
||
members should be changed directly.
|
||
|
||
-- Function: int hcreate_r (size_t NEL, struct hsearch_data *HTAB)
|
||
Preliminary: | MT-Safe race:htab | AS-Unsafe heap | AC-Unsafe
|
||
corrupt mem | *Note POSIX Safety Concepts::.
|
||
|
||
The ‘hcreate_r’ function initializes the object pointed to by HTAB
|
||
to contain a hashing table with at least NEL elements. So this
|
||
function is equivalent to the ‘hcreate’ function except that the
|
||
initialized data structure is controlled by the user.
|
||
|
||
This allows having more than one hashing table at one time. The
|
||
memory necessary for the ‘struct hsearch_data’ object can be
|
||
allocated dynamically. It must be initialized with zero before
|
||
calling this function.
|
||
|
||
The return value is non-zero if the operation was successful. If
|
||
the return value is zero, something went wrong, which probably
|
||
means the programs ran out of memory.
|
||
|
||
-- Function: void hdestroy_r (struct hsearch_data *HTAB)
|
||
Preliminary: | MT-Safe race:htab | AS-Unsafe heap | AC-Unsafe
|
||
corrupt mem | *Note POSIX Safety Concepts::.
|
||
|
||
The ‘hdestroy_r’ function frees all resources allocated by the
|
||
‘hcreate_r’ function for this very same object HTAB. As for
|
||
‘hdestroy’ it is the programs responsibility to free the strings
|
||
for the elements of the table.
|
||
|
||
-- Function: int hsearch_r (ENTRY ITEM, ACTION ACTION, ENTRY **RETVAL,
|
||
struct hsearch_data *HTAB)
|
||
Preliminary: | MT-Safe race:htab | AS-Safe | AC-Unsafe
|
||
corrupt/action==ENTER | *Note POSIX Safety Concepts::.
|
||
|
||
The ‘hsearch_r’ function is equivalent to ‘hsearch’. The meaning
|
||
of the first two arguments is identical. But instead of operating
|
||
on a single global hashing table the function works on the table
|
||
described by the object pointed to by HTAB (which is initialized by
|
||
a call to ‘hcreate_r’).
|
||
|
||
Another difference to ‘hcreate’ is that the pointer to the found
|
||
entry in the table is not the return value of the functions. It is
|
||
returned by storing it in a pointer variables pointed to by the
|
||
RETVAL parameter. The return value of the function is an integer
|
||
value indicating success if it is non-zero and failure if it is
|
||
zero. In the latter case the global variable ERRNO signals the
|
||
reason for the failure.
|
||
|
||
‘ENOMEM’
|
||
The table is filled and ‘hsearch_r’ was called with a so far
|
||
unknown key and ACTION set to ‘ENTER’.
|
||
‘ESRCH’
|
||
The ACTION parameter is ‘FIND’ and no corresponding element is
|
||
found in the table.
|
||
|