From 8c091938dbee25441ff72f2e14f2e453147e34a9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafael=20Laboissi=C3=A8re?= Date: Fri, 30 Jul 2021 05:50:27 -0300 Subject: Import octave-nan_3.6.0.orig.tar.gz [dgit import orig octave-nan_3.6.0.orig.tar.gz] --- COPYING | 674 +++++ DESCRIPTION | 12 + INDEX | 20 + Makefile | 243 ++ NEWS | 242 ++ doc/INSTALL | 55 + doc/README.TXT | 299 ++ inst/betacdf.m | 99 + inst/betainv.m | 145 + inst/betapdf.m | 136 + inst/bland_altman.m | 121 + inst/cat2bin.m | 90 + inst/cdfplot.m | 57 + inst/center.m | 63 + inst/classify.m | 77 + inst/coefficient_of_variation.m | 44 + inst/cor.m | 101 + inst/corrcoef.m | 392 +++ inst/corrplot.m | 58 + inst/cov.m | 95 + inst/covm.m | 254 ++ inst/cumsumskipnan.m | 49 + inst/decovm.m | 77 + inst/detrend.m | 152 + inst/ecdf.m | 82 + inst/fishers_exact_test.m | 84 + inst/flag_accuracy_level.m | 76 + inst/flag_implicit_significance.m | 67 + inst/flag_implicit_skip_nan.m | 65 + inst/flag_nans_occured.m | 41 + inst/fss.m | 144 + inst/geomean.m | 58 + inst/gini.m | 35 + inst/gscatter.m | 97 + inst/harmmean.m | 59 + inst/hist2res.m | 147 + inst/histo.m | 76 + inst/histo2.m | 105 + inst/histo3.m | 158 + inst/histo4.m | 104 + inst/iqr.m | 53 + inst/kappa.m | 202 ++ inst/knnsearch.m | 166 + inst/kolmogorov_smirnov.m | 110 + inst/kstest2.m | 69 + inst/kurtosis.m | 66 + inst/lmom.m | 89 + inst/load_cifar10.m | 62 + inst/load_cifar100.m | 56 + inst/load_fisheriris.m | 74 + inst/load_mnist.m | 80 + inst/mad.m | 64 + inst/mahal.m | 54 + inst/mean.m | 131 + inst/meandev.m | 62 + inst/meansq.m | 53 + inst/medAbsDev.m | 44 + inst/median.m | 94 + inst/moment.m | 106 + inst/nanconv.m | 59 + inst/nanfft.m | 58 + inst/nanfilter.m | 62 + inst/nanfilter1uc.m | 54 + inst/naninsttest.m | 186 ++ inst/nanmean.m | 41 + inst/nanstd.m | 71 + inst/nansum.m | 43 + inst/nantest.m | 308 ++ inst/normcdf.m | 60 + inst/norminv.m | 60 + inst/normpdf.m | 54 + inst/partcorrcoef.m | 166 + inst/percentile.m | 47 + inst/prctile.m | 50 + inst/quantile.m | 152 + inst/range.m | 65 + inst/rankcorr.m | 45 + inst/ranks.m | 199 ++ inst/rms.m | 58 + inst/roc.m | 357 +++ inst/row_col_deletion.m | 113 + inst/sem.m | 61 + inst/signrank.m | 154 + inst/skewness.m | 68 + inst/spearman.m | 45 + inst/statistic.m | 173 ++ inst/std.m | 124 + inst/sumskipnan.m | 195 ++ inst/sumsq.m | 50 + inst/tcdf.m | 65 + inst/test_sc.m | 298 ++ inst/tiedrank.m | 54 + inst/tinv.m | 54 + inst/tpdf.m | 49 + inst/train_lda_sparse.m | 145 + inst/train_sc.m | 965 ++++++ inst/trimean.m | 82 + inst/trimmean.m | 73 + inst/ttest.m | 161 + inst/ttest2.m | 128 + inst/var.m | 102 + inst/xcovf.m | 117 + inst/xptopen.m | 31 + inst/xval.m | 209 ++ inst/zScoreMedian.m | 46 + inst/zscore.m | 79 + src/Makefile.in | 250 ++ src/config.h.in | 209 ++ src/configure | 6017 +++++++++++++++++++++++++++++++++++++ src/configure.ac | 59 + src/covm_mex.cpp | 843 ++++++ src/histo_mex.cpp | 435 +++ src/kth_element.cpp | 212 ++ src/linear.cpp | 3128 +++++++++++++++++++ src/linear.h | 121 + src/linear_model_matlab.c | 212 ++ src/linear_model_matlab.h | 50 + src/make.m | 76 + src/mexTF.c | 285 ++ src/predict.c | 376 +++ src/str2array.cpp | 338 +++ src/sumskipnan_mex.cpp | 1026 +++++++ src/svm.cpp | 3219 ++++++++++++++++++++ src/svm.h | 141 + src/svm_model_matlab.c | 411 +++ src/svm_model_matlab.h | 52 + src/svmpredict_mex.cpp | 408 +++ src/svmtrain_mex.cpp | 533 ++++ src/train.c | 528 ++++ src/tron.cpp | 321 ++ src/tron.h | 74 + src/xptopen.cpp | 1155 +++++++ test/test_classify.m | 48 + test/test_fss.m | 77 + test/test_mex_accuracy.m | 95 + test/test_perf_skipnan.m | 56 + test/test_str2array.csv | 9 + test/test_str2array.m | 8 + test/test_train_sc.m | 95 + test/test_xptopen.m | 75 + test/test_xval.m | 61 + 141 files changed, 33427 insertions(+) create mode 100644 COPYING create mode 100644 DESCRIPTION create mode 100644 INDEX create mode 100644 Makefile create mode 100644 NEWS create mode 100644 doc/INSTALL create mode 100644 doc/README.TXT create mode 100644 inst/betacdf.m create mode 100644 inst/betainv.m create mode 100644 inst/betapdf.m create mode 100644 inst/bland_altman.m create mode 100644 inst/cat2bin.m create mode 100644 inst/cdfplot.m create mode 100644 inst/center.m create mode 100644 inst/classify.m create mode 100644 inst/coefficient_of_variation.m create mode 100644 inst/cor.m create mode 100644 inst/corrcoef.m create mode 100644 inst/corrplot.m create mode 100644 inst/cov.m create mode 100644 inst/covm.m create mode 100644 inst/cumsumskipnan.m create mode 100644 inst/decovm.m create mode 100644 inst/detrend.m create mode 100644 inst/ecdf.m create mode 100644 inst/fishers_exact_test.m create mode 100644 inst/flag_accuracy_level.m create mode 100644 inst/flag_implicit_significance.m create mode 100644 inst/flag_implicit_skip_nan.m create mode 100644 inst/flag_nans_occured.m create mode 100644 inst/fss.m create mode 100644 inst/geomean.m create mode 100644 inst/gini.m create mode 100644 inst/gscatter.m create mode 100644 inst/harmmean.m create mode 100644 inst/hist2res.m create mode 100644 inst/histo.m create mode 100644 inst/histo2.m create mode 100644 inst/histo3.m create mode 100644 inst/histo4.m create mode 100644 inst/iqr.m create mode 100644 inst/kappa.m create mode 100644 inst/knnsearch.m create mode 100644 inst/kolmogorov_smirnov.m create mode 100644 inst/kstest2.m create mode 100644 inst/kurtosis.m create mode 100644 inst/lmom.m create mode 100644 inst/load_cifar10.m create mode 100644 inst/load_cifar100.m create mode 100644 inst/load_fisheriris.m create mode 100644 inst/load_mnist.m create mode 100644 inst/mad.m create mode 100644 inst/mahal.m create mode 100644 inst/mean.m create mode 100644 inst/meandev.m create mode 100644 inst/meansq.m create mode 100644 inst/medAbsDev.m create mode 100644 inst/median.m create mode 100644 inst/moment.m create mode 100644 inst/nanconv.m create mode 100644 inst/nanfft.m create mode 100644 inst/nanfilter.m create mode 100644 inst/nanfilter1uc.m create mode 100644 inst/naninsttest.m create mode 100644 inst/nanmean.m create mode 100644 inst/nanstd.m create mode 100644 inst/nansum.m create mode 100644 inst/nantest.m create mode 100644 inst/normcdf.m create mode 100644 inst/norminv.m create mode 100644 inst/normpdf.m create mode 100644 inst/partcorrcoef.m create mode 100644 inst/percentile.m create mode 100644 inst/prctile.m create mode 100644 inst/quantile.m create mode 100644 inst/range.m create mode 100644 inst/rankcorr.m create mode 100644 inst/ranks.m create mode 100644 inst/rms.m create mode 100644 inst/roc.m create mode 100644 inst/row_col_deletion.m create mode 100644 inst/sem.m create mode 100644 inst/signrank.m create mode 100644 inst/skewness.m create mode 100644 inst/spearman.m create mode 100644 inst/statistic.m create mode 100644 inst/std.m create mode 100644 inst/sumskipnan.m create mode 100644 inst/sumsq.m create mode 100644 inst/tcdf.m create mode 100644 inst/test_sc.m create mode 100644 inst/tiedrank.m create mode 100644 inst/tinv.m create mode 100644 inst/tpdf.m create mode 100644 inst/train_lda_sparse.m create mode 100644 inst/train_sc.m create mode 100644 inst/trimean.m create mode 100644 inst/trimmean.m create mode 100644 inst/ttest.m create mode 100644 inst/ttest2.m create mode 100644 inst/var.m create mode 100644 inst/xcovf.m create mode 100644 inst/xptopen.m create mode 100644 inst/xval.m create mode 100644 inst/zScoreMedian.m create mode 100644 inst/zscore.m create mode 100644 src/Makefile.in create mode 100644 src/config.h.in create mode 100755 src/configure create mode 100644 src/configure.ac create mode 100644 src/covm_mex.cpp create mode 100644 src/histo_mex.cpp create mode 100644 src/kth_element.cpp create mode 100644 src/linear.cpp create mode 100644 src/linear.h create mode 100644 src/linear_model_matlab.c create mode 100644 src/linear_model_matlab.h create mode 100644 src/make.m create mode 100644 src/mexTF.c create mode 100644 src/predict.c create mode 100644 src/str2array.cpp create mode 100644 src/sumskipnan_mex.cpp create mode 100644 src/svm.cpp create mode 100644 src/svm.h create mode 100644 src/svm_model_matlab.c create mode 100644 src/svm_model_matlab.h create mode 100644 src/svmpredict_mex.cpp create mode 100644 src/svmtrain_mex.cpp create mode 100644 src/train.c create mode 100644 src/tron.cpp create mode 100644 src/tron.h create mode 100644 src/xptopen.cpp create mode 100644 test/test_classify.m create mode 100644 test/test_fss.m create mode 100644 test/test_mex_accuracy.m create mode 100644 test/test_perf_skipnan.m create mode 100644 test/test_str2array.csv create mode 100644 test/test_str2array.m create mode 100644 test/test_train_sc.m create mode 100644 test/test_xptopen.m create mode 100644 test/test_xval.m diff --git a/COPYING b/COPYING new file mode 100644 index 0000000..94a9ed0 --- /dev/null +++ b/COPYING @@ -0,0 +1,674 @@ + GNU GENERAL PUBLIC LICENSE + Version 3, 29 June 2007 + + Copyright (C) 2007 Free Software Foundation, Inc. + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The GNU General Public License is a free, copyleft license for +software and other kinds of works. + + The licenses for most software and other practical works are designed +to take away your freedom to share and change the works. By contrast, +the GNU General Public License is intended to guarantee your freedom to +share and change all versions of a program--to make sure it remains free +software for all its users. We, the Free Software Foundation, use the +GNU General Public License for most of our software; it applies also to +any other work released this way by its authors. You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +them if you wish), that you receive source code or can get it if you +want it, that you can change the software or use pieces of it in new +free programs, and that you know you can do these things. + + To protect your rights, we need to prevent others from denying you +these rights or asking you to surrender the rights. Therefore, you have +certain responsibilities if you distribute copies of the software, or if +you modify it: responsibilities to respect the freedom of others. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must pass on to the recipients the same +freedoms that you received. You must make sure that they, too, receive +or can get the source code. And you must show them these terms so they +know their rights. + + Developers that use the GNU GPL protect your rights with two steps: +(1) assert copyright on the software, and (2) offer you this License +giving you legal permission to copy, distribute and/or modify it. + + For the developers' and authors' protection, the GPL clearly explains +that there is no warranty for this free software. For both users' and +authors' sake, the GPL requires that modified versions be marked as +changed, so that their problems will not be attributed erroneously to +authors of previous versions. + + Some devices are designed to deny users access to install or run +modified versions of the software inside them, although the manufacturer +can do so. This is fundamentally incompatible with the aim of +protecting users' freedom to change the software. The systematic +pattern of such abuse occurs in the area of products for individuals to +use, which is precisely where it is most unacceptable. Therefore, we +have designed this version of the GPL to prohibit the practice for those +products. If such problems arise substantially in other domains, we +stand ready to extend this provision to those domains in future versions +of the GPL, as needed to protect the freedom of users. + + Finally, every program is threatened constantly by software patents. +States should not allow patents to restrict development and use of +software on general-purpose computers, but in those that do, we wish to +avoid the special danger that patents applied to a free program could +make it effectively proprietary. To prevent this, the GPL assures that +patents cannot be used to render the program non-free. + + The precise terms and conditions for copying, distribution and +modification follow. + + TERMS AND CONDITIONS + + 0. Definitions. + + "This License" refers to version 3 of the GNU General Public License. + + "Copyright" also means copyright-like laws that apply to other kinds of +works, such as semiconductor masks. + + "The Program" refers to any copyrightable work licensed under this +License. Each licensee is addressed as "you". "Licensees" and +"recipients" may be individuals or organizations. + + To "modify" a work means to copy from or adapt all or part of the work +in a fashion requiring copyright permission, other than the making of an +exact copy. The resulting work is called a "modified version" of the +earlier work or a work "based on" the earlier work. + + A "covered work" means either the unmodified Program or a work based +on the Program. + + To "propagate" a work means to do anything with it that, without +permission, would make you directly or secondarily liable for +infringement under applicable copyright law, except executing it on a +computer or modifying a private copy. Propagation includes copying, +distribution (with or without modification), making available to the +public, and in some countries other activities as well. + + To "convey" a work means any kind of propagation that enables other +parties to make or receive copies. Mere interaction with a user through +a computer network, with no transfer of a copy, is not conveying. + + An interactive user interface displays "Appropriate Legal Notices" +to the extent that it includes a convenient and prominently visible +feature that (1) displays an appropriate copyright notice, and (2) +tells the user that there is no warranty for the work (except to the +extent that warranties are provided), that licensees may convey the +work under this License, and how to view a copy of this License. If +the interface presents a list of user commands or options, such as a +menu, a prominent item in the list meets this criterion. + + 1. Source Code. + + The "source code" for a work means the preferred form of the work +for making modifications to it. "Object code" means any non-source +form of a work. + + A "Standard Interface" means an interface that either is an official +standard defined by a recognized standards body, or, in the case of +interfaces specified for a particular programming language, one that +is widely used among developers working in that language. + + The "System Libraries" of an executable work include anything, other +than the work as a whole, that (a) is included in the normal form of +packaging a Major Component, but which is not part of that Major +Component, and (b) serves only to enable use of the work with that +Major Component, or to implement a Standard Interface for which an +implementation is available to the public in source code form. A +"Major Component", in this context, means a major essential component +(kernel, window system, and so on) of the specific operating system +(if any) on which the executable work runs, or a compiler used to +produce the work, or an object code interpreter used to run it. + + The "Corresponding Source" for a work in object code form means all +the source code needed to generate, install, and (for an executable +work) run the object code and to modify the work, including scripts to +control those activities. However, it does not include the work's +System Libraries, or general-purpose tools or generally available free +programs which are used unmodified in performing those activities but +which are not part of the work. For example, Corresponding Source +includes interface definition files associated with source files for +the work, and the source code for shared libraries and dynamically +linked subprograms that the work is specifically designed to require, +such as by intimate data communication or control flow between those +subprograms and other parts of the work. + + The Corresponding Source need not include anything that users +can regenerate automatically from other parts of the Corresponding +Source. + + The Corresponding Source for a work in source code form is that +same work. + + 2. Basic Permissions. + + All rights granted under this License are granted for the term of +copyright on the Program, and are irrevocable provided the stated +conditions are met. This License explicitly affirms your unlimited +permission to run the unmodified Program. The output from running a +covered work is covered by this License only if the output, given its +content, constitutes a covered work. This License acknowledges your +rights of fair use or other equivalent, as provided by copyright law. + + You may make, run and propagate covered works that you do not +convey, without conditions so long as your license otherwise remains +in force. You may convey covered works to others for the sole purpose +of having them make modifications exclusively for you, or provide you +with facilities for running those works, provided that you comply with +the terms of this License in conveying all material for which you do +not control copyright. Those thus making or running the covered works +for you must do so exclusively on your behalf, under your direction +and control, on terms that prohibit them from making any copies of +your copyrighted material outside their relationship with you. + + Conveying under any other circumstances is permitted solely under +the conditions stated below. Sublicensing is not allowed; section 10 +makes it unnecessary. + + 3. Protecting Users' Legal Rights From Anti-Circumvention Law. + + No covered work shall be deemed part of an effective technological +measure under any applicable law fulfilling obligations under article +11 of the WIPO copyright treaty adopted on 20 December 1996, or +similar laws prohibiting or restricting circumvention of such +measures. + + When you convey a covered work, you waive any legal power to forbid +circumvention of technological measures to the extent such circumvention +is effected by exercising rights under this License with respect to +the covered work, and you disclaim any intention to limit operation or +modification of the work as a means of enforcing, against the work's +users, your or third parties' legal rights to forbid circumvention of +technological measures. + + 4. Conveying Verbatim Copies. + + You may convey verbatim copies of the Program's source code as you +receive it, in any medium, provided that you conspicuously and +appropriately publish on each copy an appropriate copyright notice; +keep intact all notices stating that this License and any +non-permissive terms added in accord with section 7 apply to the code; +keep intact all notices of the absence of any warranty; and give all +recipients a copy of this License along with the Program. + + You may charge any price or no price for each copy that you convey, +and you may offer support or warranty protection for a fee. + + 5. Conveying Modified Source Versions. + + You may convey a work based on the Program, or the modifications to +produce it from the Program, in the form of source code under the +terms of section 4, provided that you also meet all of these conditions: + + a) The work must carry prominent notices stating that you modified + it, and giving a relevant date. + + b) The work must carry prominent notices stating that it is + released under this License and any conditions added under section + 7. This requirement modifies the requirement in section 4 to + "keep intact all notices". + + c) You must license the entire work, as a whole, under this + License to anyone who comes into possession of a copy. This + License will therefore apply, along with any applicable section 7 + additional terms, to the whole of the work, and all its parts, + regardless of how they are packaged. This License gives no + permission to license the work in any other way, but it does not + invalidate such permission if you have separately received it. + + d) If the work has interactive user interfaces, each must display + Appropriate Legal Notices; however, if the Program has interactive + interfaces that do not display Appropriate Legal Notices, your + work need not make them do so. + + A compilation of a covered work with other separate and independent +works, which are not by their nature extensions of the covered work, +and which are not combined with it such as to form a larger program, +in or on a volume of a storage or distribution medium, is called an +"aggregate" if the compilation and its resulting copyright are not +used to limit the access or legal rights of the compilation's users +beyond what the individual works permit. Inclusion of a covered work +in an aggregate does not cause this License to apply to the other +parts of the aggregate. + + 6. Conveying Non-Source Forms. + + You may convey a covered work in object code form under the terms +of sections 4 and 5, provided that you also convey the +machine-readable Corresponding Source under the terms of this License, +in one of these ways: + + a) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by the + Corresponding Source fixed on a durable physical medium + customarily used for software interchange. + + b) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by a + written offer, valid for at least three years and valid for as + long as you offer spare parts or customer support for that product + model, to give anyone who possesses the object code either (1) a + copy of the Corresponding Source for all the software in the + product that is covered by this License, on a durable physical + medium customarily used for software interchange, for a price no + more than your reasonable cost of physically performing this + conveying of source, or (2) access to copy the + Corresponding Source from a network server at no charge. + + c) Convey individual copies of the object code with a copy of the + written offer to provide the Corresponding Source. This + alternative is allowed only occasionally and noncommercially, and + only if you received the object code with such an offer, in accord + with subsection 6b. + + d) Convey the object code by offering access from a designated + place (gratis or for a charge), and offer equivalent access to the + Corresponding Source in the same way through the same place at no + further charge. You need not require recipients to copy the + Corresponding Source along with the object code. If the place to + copy the object code is a network server, the Corresponding Source + may be on a different server (operated by you or a third party) + that supports equivalent copying facilities, provided you maintain + clear directions next to the object code saying where to find the + Corresponding Source. Regardless of what server hosts the + Corresponding Source, you remain obligated to ensure that it is + available for as long as needed to satisfy these requirements. + + e) Convey the object code using peer-to-peer transmission, provided + you inform other peers where the object code and Corresponding + Source of the work are being offered to the general public at no + charge under subsection 6d. + + A separable portion of the object code, whose source code is excluded +from the Corresponding Source as a System Library, need not be +included in conveying the object code work. + + A "User Product" is either (1) a "consumer product", which means any +tangible personal property which is normally used for personal, family, +or household purposes, or (2) anything designed or sold for incorporation +into a dwelling. In determining whether a product is a consumer product, +doubtful cases shall be resolved in favor of coverage. For a particular +product received by a particular user, "normally used" refers to a +typical or common use of that class of product, regardless of the status +of the particular user or of the way in which the particular user +actually uses, or expects or is expected to use, the product. A product +is a consumer product regardless of whether the product has substantial +commercial, industrial or non-consumer uses, unless such uses represent +the only significant mode of use of the product. + + "Installation Information" for a User Product means any methods, +procedures, authorization keys, or other information required to install +and execute modified versions of a covered work in that User Product from +a modified version of its Corresponding Source. The information must +suffice to ensure that the continued functioning of the modified object +code is in no case prevented or interfered with solely because +modification has been made. + + If you convey an object code work under this section in, or with, or +specifically for use in, a User Product, and the conveying occurs as +part of a transaction in which the right of possession and use of the +User Product is transferred to the recipient in perpetuity or for a +fixed term (regardless of how the transaction is characterized), the +Corresponding Source conveyed under this section must be accompanied +by the Installation Information. But this requirement does not apply +if neither you nor any third party retains the ability to install +modified object code on the User Product (for example, the work has +been installed in ROM). + + The requirement to provide Installation Information does not include a +requirement to continue to provide support service, warranty, or updates +for a work that has been modified or installed by the recipient, or for +the User Product in which it has been modified or installed. Access to a +network may be denied when the modification itself materially and +adversely affects the operation of the network or violates the rules and +protocols for communication across the network. + + Corresponding Source conveyed, and Installation Information provided, +in accord with this section must be in a format that is publicly +documented (and with an implementation available to the public in +source code form), and must require no special password or key for +unpacking, reading or copying. + + 7. Additional Terms. + + "Additional permissions" are terms that supplement the terms of this +License by making exceptions from one or more of its conditions. +Additional permissions that are applicable to the entire Program shall +be treated as though they were included in this License, to the extent +that they are valid under applicable law. If additional permissions +apply only to part of the Program, that part may be used separately +under those permissions, but the entire Program remains governed by +this License without regard to the additional permissions. + + When you convey a copy of a covered work, you may at your option +remove any additional permissions from that copy, or from any part of +it. (Additional permissions may be written to require their own +removal in certain cases when you modify the work.) You may place +additional permissions on material, added by you to a covered work, +for which you have or can give appropriate copyright permission. + + Notwithstanding any other provision of this License, for material you +add to a covered work, you may (if authorized by the copyright holders of +that material) supplement the terms of this License with terms: + + a) Disclaiming warranty or limiting liability differently from the + terms of sections 15 and 16 of this License; or + + b) Requiring preservation of specified reasonable legal notices or + author attributions in that material or in the Appropriate Legal + Notices displayed by works containing it; or + + c) Prohibiting misrepresentation of the origin of that material, or + requiring that modified versions of such material be marked in + reasonable ways as different from the original version; or + + d) Limiting the use for publicity purposes of names of licensors or + authors of the material; or + + e) Declining to grant rights under trademark law for use of some + trade names, trademarks, or service marks; or + + f) Requiring indemnification of licensors and authors of that + material by anyone who conveys the material (or modified versions of + it) with contractual assumptions of liability to the recipient, for + any liability that these contractual assumptions directly impose on + those licensors and authors. + + All other non-permissive additional terms are considered "further +restrictions" within the meaning of section 10. If the Program as you +received it, or any part of it, contains a notice stating that it is +governed by this License along with a term that is a further +restriction, you may remove that term. If a license document contains +a further restriction but permits relicensing or conveying under this +License, you may add to a covered work material governed by the terms +of that license document, provided that the further restriction does +not survive such relicensing or conveying. + + If you add terms to a covered work in accord with this section, you +must place, in the relevant source files, a statement of the +additional terms that apply to those files, or a notice indicating +where to find the applicable terms. + + Additional terms, permissive or non-permissive, may be stated in the +form of a separately written license, or stated as exceptions; +the above requirements apply either way. + + 8. Termination. + + You may not propagate or modify a covered work except as expressly +provided under this License. Any attempt otherwise to propagate or +modify it is void, and will automatically terminate your rights under +this License (including any patent licenses granted under the third +paragraph of section 11). + + However, if you cease all violation of this License, then your +license from a particular copyright holder is reinstated (a) +provisionally, unless and until the copyright holder explicitly and +finally terminates your license, and (b) permanently, if the copyright +holder fails to notify you of the violation by some reasonable means +prior to 60 days after the cessation. + + Moreover, your license from a particular copyright holder is +reinstated permanently if the copyright holder notifies you of the +violation by some reasonable means, this is the first time you have +received notice of violation of this License (for any work) from that +copyright holder, and you cure the violation prior to 30 days after +your receipt of the notice. + + Termination of your rights under this section does not terminate the +licenses of parties who have received copies or rights from you under +this License. If your rights have been terminated and not permanently +reinstated, you do not qualify to receive new licenses for the same +material under section 10. + + 9. Acceptance Not Required for Having Copies. + + You are not required to accept this License in order to receive or +run a copy of the Program. Ancillary propagation of a covered work +occurring solely as a consequence of using peer-to-peer transmission +to receive a copy likewise does not require acceptance. However, +nothing other than this License grants you permission to propagate or +modify any covered work. These actions infringe copyright if you do +not accept this License. Therefore, by modifying or propagating a +covered work, you indicate your acceptance of this License to do so. + + 10. Automatic Licensing of Downstream Recipients. + + Each time you convey a covered work, the recipient automatically +receives a license from the original licensors, to run, modify and +propagate that work, subject to this License. You are not responsible +for enforcing compliance by third parties with this License. + + An "entity transaction" is a transaction transferring control of an +organization, or substantially all assets of one, or subdividing an +organization, or merging organizations. If propagation of a covered +work results from an entity transaction, each party to that +transaction who receives a copy of the work also receives whatever +licenses to the work the party's predecessor in interest had or could +give under the previous paragraph, plus a right to possession of the +Corresponding Source of the work from the predecessor in interest, if +the predecessor has it or can get it with reasonable efforts. + + You may not impose any further restrictions on the exercise of the +rights granted or affirmed under this License. For example, you may +not impose a license fee, royalty, or other charge for exercise of +rights granted under this License, and you may not initiate litigation +(including a cross-claim or counterclaim in a lawsuit) alleging that +any patent claim is infringed by making, using, selling, offering for +sale, or importing the Program or any portion of it. + + 11. Patents. + + A "contributor" is a copyright holder who authorizes use under this +License of the Program or a work on which the Program is based. The +work thus licensed is called the contributor's "contributor version". + + A contributor's "essential patent claims" are all patent claims +owned or controlled by the contributor, whether already acquired or +hereafter acquired, that would be infringed by some manner, permitted +by this License, of making, using, or selling its contributor version, +but do not include claims that would be infringed only as a +consequence of further modification of the contributor version. For +purposes of this definition, "control" includes the right to grant +patent sublicenses in a manner consistent with the requirements of +this License. + + Each contributor grants you a non-exclusive, worldwide, royalty-free +patent license under the contributor's essential patent claims, to +make, use, sell, offer for sale, import and otherwise run, modify and +propagate the contents of its contributor version. + + In the following three paragraphs, a "patent license" is any express +agreement or commitment, however denominated, not to enforce a patent +(such as an express permission to practice a patent or covenant not to +sue for patent infringement). To "grant" such a patent license to a +party means to make such an agreement or commitment not to enforce a +patent against the party. + + If you convey a covered work, knowingly relying on a patent license, +and the Corresponding Source of the work is not available for anyone +to copy, free of charge and under the terms of this License, through a +publicly available network server or other readily accessible means, +then you must either (1) cause the Corresponding Source to be so +available, or (2) arrange to deprive yourself of the benefit of the +patent license for this particular work, or (3) arrange, in a manner +consistent with the requirements of this License, to extend the patent +license to downstream recipients. "Knowingly relying" means you have +actual knowledge that, but for the patent license, your conveying the +covered work in a country, or your recipient's use of the covered work +in a country, would infringe one or more identifiable patents in that +country that you have reason to believe are valid. + + If, pursuant to or in connection with a single transaction or +arrangement, you convey, or propagate by procuring conveyance of, a +covered work, and grant a patent license to some of the parties +receiving the covered work authorizing them to use, propagate, modify +or convey a specific copy of the covered work, then the patent license +you grant is automatically extended to all recipients of the covered +work and works based on it. + + A patent license is "discriminatory" if it does not include within +the scope of its coverage, prohibits the exercise of, or is +conditioned on the non-exercise of one or more of the rights that are +specifically granted under this License. You may not convey a covered +work if you are a party to an arrangement with a third party that is +in the business of distributing software, under which you make payment +to the third party based on the extent of your activity of conveying +the work, and under which the third party grants, to any of the +parties who would receive the covered work from you, a discriminatory +patent license (a) in connection with copies of the covered work +conveyed by you (or copies made from those copies), or (b) primarily +for and in connection with specific products or compilations that +contain the covered work, unless you entered into that arrangement, +or that patent license was granted, prior to 28 March 2007. + + Nothing in this License shall be construed as excluding or limiting +any implied license or other defenses to infringement that may +otherwise be available to you under applicable patent law. + + 12. No Surrender of Others' Freedom. + + If conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot convey a +covered work so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you may +not convey it at all. For example, if you agree to terms that obligate you +to collect a royalty for further conveying from those to whom you convey +the Program, the only way you could satisfy both those terms and this +License would be to refrain entirely from conveying the Program. + + 13. Use with the GNU Affero General Public License. + + Notwithstanding any other provision of this License, you have +permission to link or combine any covered work with a work licensed +under version 3 of the GNU Affero General Public License into a single +combined work, and to convey the resulting work. The terms of this +License will continue to apply to the part which is the covered work, +but the special requirements of the GNU Affero General Public License, +section 13, concerning interaction through a network will apply to the +combination as such. + + 14. Revised Versions of this License. + + The Free Software Foundation may publish revised and/or new versions of +the GNU General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + + Each version is given a distinguishing version number. If the +Program specifies that a certain numbered version of the GNU General +Public License "or any later version" applies to it, you have the +option of following the terms and conditions either of that numbered +version or of any later version published by the Free Software +Foundation. If the Program does not specify a version number of the +GNU General Public License, you may choose any version ever published +by the Free Software Foundation. + + If the Program specifies that a proxy can decide which future +versions of the GNU General Public License can be used, that proxy's +public statement of acceptance of a version permanently authorizes you +to choose that version for the Program. + + Later license versions may give you additional or different +permissions. However, no additional obligations are imposed on any +author or copyright holder as a result of your choosing to follow a +later version. + + 15. Disclaimer of Warranty. + + THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY +APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT +HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY +OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, +THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM +IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF +ALL NECESSARY SERVICING, REPAIR OR CORRECTION. + + 16. Limitation of Liability. + + IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS +THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY +GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE +USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF +DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD +PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), +EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF +SUCH DAMAGES. + + 17. Interpretation of Sections 15 and 16. + + If the disclaimer of warranty and limitation of liability provided +above cannot be given local legal effect according to their terms, +reviewing courts shall apply local law that most closely approximates +an absolute waiver of all civil liability in connection with the +Program, unless a warranty or assumption of liability accompanies a +copy of the Program in return for a fee. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +state the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + + Copyright (C) + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . + +Also add information on how to contact you by electronic and paper mail. + + If the program does terminal interaction, make it output a short +notice like this when it starts in an interactive mode: + + Copyright (C) + This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, your program's commands +might be different; for a GUI interface, you would use an "about box". + + You should also get your employer (if you work as a programmer) or school, +if any, to sign a "copyright disclaimer" for the program, if necessary. +For more information on this, and how to apply and follow the GNU GPL, see +. + + The GNU General Public License does not permit incorporating your program +into proprietary programs. If your program is a subroutine library, you +may consider it more useful to permit linking proprietary applications with +the library. If this is what you want to do, use the GNU Lesser General +Public License instead of this License. But first, please read +. diff --git a/DESCRIPTION b/DESCRIPTION new file mode 100644 index 0000000..a356c66 --- /dev/null +++ b/DESCRIPTION @@ -0,0 +1,12 @@ +Name: NaN +Version: 3.6.0 +Date: 2021-07-26 +Author: Alois Schlögl +Maintainer: Alois Schlögl +Title: The NaN-toolbox +Description: A statistics and machine learning toolbox for data with and w/o missing values +Depends: octave (>= 4.4.1) +License: GPLv3+ +Url: http://pub.ist.ac.at/~schloegl/matlab/NaN +Autoload: no + diff --git a/INDEX b/INDEX new file mode 100644 index 0000000..5339e1f --- /dev/null +++ b/INDEX @@ -0,0 +1,20 @@ +nan >> A statistics and machine learning toolbox +A statistics and machine learning toolbox for data with and w/o missing values + coefficient_of_variation geomean meansq skewness + covm cor cov corrcoef harmmean median statistic + detrend kurtosis moment std mad naninsttest nantest + nansum nanstd nanconv nanfft nanfilter + nanfilter1uc normpdf normcdf norminv meandev + percentile quantile rankcorr ranks rms sumskipnan + var mean sem spearman trimean tpdf tcdf tinv zscore + flag_implicit_significance xcovf train_sc test_sc + xval classify train_lda_sparse decovm gscatter mahal + cdfplot hist2res fss cat2bin ttest ttest2 xptopen + bland_altman cumsumskipnan range signrank histo + histo2 histo3 histo4 kolmogorov_smirnov kstest2 roc + kappa load_cifar100 load_cifar10 load_mnist + fishers_exact_test betapdf betacdf betainv + gini lmom corrplot knnsearch + + + diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..f6277a1 --- /dev/null +++ b/Makefile @@ -0,0 +1,243 @@ +## Copyright 2015-2016 Carnë Draug +## Copyright 2015-2016 Oliver Heimlich +## Copyright 2017 Julien Bect +## Copyright 2017 Olaf Till +## +## Copying and distribution of this file, with or without modification, +## are permitted in any medium without royalty provided the copyright +## notice and this notice are preserved. This file is offered as-is, +## without any warranty. + +## Some basic tools (can be overriden using environment variables) +SED ?= sed +TAR ?= tar +GREP ?= grep +CUT ?= cut +TR ?= tr + +## Note the use of ':=' (immediate set) and not just '=' (lazy set). +## http://stackoverflow.com/a/448939/1609556 +package := $(shell $(GREP) "^Name: " DESCRIPTION | $(CUT) -f2 -d" " | \ +$(TR) '[:upper:]' '[:lower:]') +version := $(shell $(GREP) "^Version: " DESCRIPTION | $(CUT) -f2 -d" ") + +## These are the paths that will be created for the releases. +target_dir := target +release_dir := $(target_dir)/$(package)-$(version) +release_tarball := $(target_dir)/$(package)-$(version).tar.gz +html_dir := $(target_dir)/$(package)-html +html_tarball := $(target_dir)/$(package)-html.tar.gz +## Using $(realpath ...) avoids problems with symlinks due to bug +## #50994 in Octaves scripts/pkg/private/install.m. But at least the +## release directory above is needed in the relative form, for 'git +## archive --format=tar --prefix=$(release_dir). +real_target_dir := $(realpath .)/$(target_dir) +installation_dir := $(real_target_dir)/.installation +package_list := $(installation_dir)/.octave_packages +install_stamp := $(installation_dir)/.install_stamp + +## These can be set by environment variables which allow to easily +## test with different Octave versions. +ifndef OCTAVE +OCTAVE := octave +endif +OCTAVE := $(OCTAVE) --no-gui --silent --norc +MKOCTFILE ?= mkoctfile + +## Command used to set permissions before creating tarballs +FIX_PERMISSIONS ?= chmod -R a+rX,u+w,go-w,ug-s + +## Detect which VCS is used +vcs := $(if $(wildcard .hg),hg,$(if $(wildcard .git),git,unknown)) +ifeq ($(vcs),hg) +release_dir_dep := .hg/dirstate +endif +ifeq ($(vcs),git) +release_dir_dep := .git/index +endif + + +## .PHONY indicates targets that are not filenames +## (https://www.gnu.org/software/make/manual/html_node/Phony-Targets.html) +.PHONY: help + +## make will display the command before runnning them. Use @command +## to not display it (makes specially sense for echo). +help: + @echo "Targets:" + @echo " dist - Create $(release_tarball) for release." + @echo " html - Create $(html_tarball) for release." + @echo " release - Create both of the above and show md5sums." + @echo " install - Install the package in $(installation_dir), where it is not visible in a normal Octave session." + @echo " check - Execute package tests." + @echo " doctest - Test the help texts with the doctest package." + @echo " run - Run Octave with the package installed in $(installation_dir) in the path." + @echo " clean - Remove everything made with this Makefile." + + +## +## Recipes for release tarballs (package + html) +## + +.PHONY: release dist html clean-tarballs clean-unpacked-release + +## To make a release, build the distribution and html tarballs. +release: dist html + md5sum $(release_tarball) $(html_tarball) + @echo "Upload @ https://sourceforge.net/p/octave/package-releases/new/" + @echo " and note the changeset the release corresponds to" + +## dist and html targets are only PHONY/alias targets to the release +## and html tarballs. +dist: $(release_tarball) +html: $(html_tarball) + +## An implicit rule with a recipe to build the tarballs correctly. +%.tar.gz: % + $(TAR) -c -f - --posix -C "$(target_dir)/" "$(notdir $<)" | gzip -9n > "$@" + +clean-tarballs: + @echo "## Cleaning release tarballs (package + html)..." + -$(RM) $(release_tarball) $(html_tarball) + @echo + +## Create the unpacked package. +## +## Notes: +## * having ".hg/dirstate" (or ".git/index") as a prerequesite means it is +## only rebuilt if we are at a different commit. +## * the variable RM usually defaults to "rm -f" +## * having this recipe separate from the one that makes the tarball +## makes it easy to have packages in alternative formats (such as zip) +## * note that if a commands needs to be run in a specific directory, +## the command to "cd" needs to be on the same line. Each line restores +## the original working directory. +$(release_dir): $(release_dir_dep) + -$(RM) -r "$@" +ifeq (${vcs},hg) + hg archive --exclude ".hg*" --type files "$@" +endif +ifeq (${vcs},git) + git archive --format=tar --prefix="$@/" HEAD | $(TAR) -x + $(RM) "$@/.gitignore" +endif +## Don't fall back to run the supposed necessary contents of +## 'bootstrap' here. Users are better off if they provide +## 'bootstrap'. Administrators, checking build reproducibility, can +## put in the missing 'bootstrap' file if they feel they know its +## necessary contents. +ifneq (,$(wildcard src/bootstrap)) + cd "$@/src" && ./bootstrap && $(RM) -r "autom4te.cache" +endif +## Uncomment this if your src/Makefile.in has these targets for +## pre-building something for the release (e.g. documentation). +# cd "$@/src" && ./configure && $(MAKE) prebuild && \ +# $(MAKE) distclean && $(RM) Makefile +## + ${FIX_PERMISSIONS} "$@" + +run_in_place = $(OCTAVE) --eval ' pkg ("local_list", "$(package_list)"); ' \ + --eval ' pkg ("load", "$(package)"); ' + +html_options = --eval 'options = get_html_options ("octave-forge");' +## Uncomment this for package documentation. +# html_options = --eval 'options = get_html_options ("octave-forge");' \ +# --eval 'options.package_doc = "$(package).texi";' +$(html_dir): $(install_stamp) + $(RM) -r "$@"; + $(run_in_place) \ + --eval ' pkg load generate_html; ' \ + $(html_options) \ + --eval ' generate_package_html ("$(package)", "$@", options); '; + $(FIX_PERMISSIONS) "$@"; + +clean-unpacked-release: + @echo "## Cleaning unpacked release tarballs (package + html)..." + -$(RM) -r $(release_dir) $(html_dir) + @echo + +## +## Recipes for installing the package. +## + +.PHONY: install clean-install + +octave_install_commands = \ +' llist_path = pkg ("local_list"); \ + mkdir ("$(installation_dir)"); \ + load (llist_path); \ + local_packages(cellfun (@ (x) strcmp ("$(package)", x.name), local_packages)) = []; \ + save ("$(package_list)", "local_packages"); \ + pkg ("local_list", "$(package_list)"); \ + pkg ("prefix", "$(installation_dir)", "$(installation_dir)"); \ + pkg ("install", "-local", "-verbose", "$(release_tarball)"); ' + +## Install unconditionally. Maybe useful for testing installation with +## different versions of Octave. +install: $(release_tarball) + @echo "Installing package under $(installation_dir) ..." + $(OCTAVE) --eval $(octave_install_commands) + touch $(install_stamp) + +## Install only if installation (under target/...) is not current. +$(install_stamp): $(release_tarball) + @echo "Installing package under $(installation_dir) ..." + $(OCTAVE) --eval $(octave_install_commands) + touch $(install_stamp) + +clean-install: + @echo "## Cleaning installation under $(installation_dir) ..." + -$(RM) -r $(installation_dir) + @echo + + +## +## Recipes for testing purposes +## + +.PHONY: run doctest check + +## Start an Octave session with the package directories on the path for +## interactice test of development sources. +run: $(install_stamp) + $(run_in_place) --persist + +## Test example blocks in the documentation. Needs doctest package +## https://octave.sourceforge.io/doctest/index.html +doctest: $(install_stamp) + $(run_in_place) --eval 'pkg load doctest;' \ + --eval "targets = '$(shell (ls inst; ls src | $(GREP) .oct) | $(CUT) -f2 -d@ | $(CUT) -f1 -d.)';" \ + --eval "targets = strsplit (targets, ' '); doctest (targets);" + + +## Test package. +octave_test_commands = \ +' dirs = {"inst", "src"}; \ + dirs(cellfun (@ (x) isempty (a = stat (x)) || ! S_ISDIR (a.mode), dirs)) = []; \ + if (isempty (dirs)) error ("no \"inst\" or \"src\" directory"); exit (1); \ + else \ + dirs = \ + cellfun (@ (x) canonicalize_file_name (x), dirs, "UniformOutput", false); \ + __run_test_suite__ (dirs, {}); \ + endif ' +## the following works, too, but provides no overall summary output as +## __run_test_suite__ does: +## +## else cellfun (@runtests, horzcat (cellfun (@ (dir) ostrsplit (([~, dirs] = system (sprintf ("find %s -type d", dir))), "\n\r", true), dirs, "UniformOutput", false){:})); endif ' +check: $(install_stamp) + $(run_in_place) --eval $(octave_test_commands) + + +## +## CLEAN +## + +.PHONY: clean + +clean: clean-tarballs clean-unpacked-release clean-install + @echo "## Removing target directory (if empty)..." + -rmdir $(target_dir) + @echo + @echo "## Cleaning done" + @echo + diff --git a/NEWS b/NEWS new file mode 100644 index 0000000..29faaf0 --- /dev/null +++ b/NEWS @@ -0,0 +1,242 @@ +2021-07-26: Release of NaN-toolbox 3.6.0 + +* corrplot, knnsearch + + +2021-02-15: Release of NaN-toolbox 3.5.3 + +* kappa: Youden index (YI), and SSE as return value +* roc: add maxYI as output value +* naninsttest: check ttest for NaN handling + + +2020-10-31: Release of NaN-toolbox 3.5.2 + +* new functions added + gini: Gini index + lmom: L-moments + betapdf, betacdf, betainv: + beta distribution functions + +* bug fixes: + bland-altman: fix ylabel + fix debian bug #964210: test tries to download data + fix #59335: "'load_fisheriris' fails on Windows" + fix #59336: "(nan) BIST for `ttest` fails" + this failed because betainv(...) is not available + the suggestion of add a dependency on statistics package + does not work, because of a conflict with statistics + (it would not handle NaNs in the indended way). + Therefore, functions for beta* distributions are included. + + +2020-07-12: Release of NaN-toolbox 3.5.0 + +* ttest: + - support output arguments CI and STATS + +* fishers_exact_test: + - use VPA from symbolic pkg if available; + - add unit tests; + - improve help + + +2019-11-16: Release of NaN-Tb v3.4.5 + +- fishers_exact_test: added + +- bug fixes for: + #57263: [octave forge] (nan) fails to build on a system without libblas (Octave built with OpenBLAS) + #57232: [octave forge] (nan) Spelling error in message of function str2array + #50248: [octave forge] (nan) Spelling error in the help string of the train function + #57228 "[octave forge] (nan) fails to build with undefined references in Octave 5 when LDFLAGS is set" +- gscatter: fix use of argument "doleg" +- address "warning: no return statement in function returning non-void" + +2019-10-27: Release of NaN-Tb v3.4.3 + +- Makefile.* + Add missing copyright/licenses + +- liblinear, libsvm: + add license from original author + +- roc.m: number of data points limited to about 5000 + add final data point at (1,1). + + +2019-09-23: Release of NaN-Tb v3.4 + +- quadratic classifiers: + check for positive-definiteness of covariance matrix + and return no classification (encoded as NaN), if not. + +- mad, meandev: + make sure source is ascii/utf-8 compliant + + +2019-06-08: Release of NaN-Tb v3.3 + +- detrend: the 2nd output (trend) can be interpolated and should + not contain NaNs. + +- train_sc: some supported classifiers should not be used + with the short-cut for two-class problems. + +- xval: report also results w/o crossvaliation + +- ecdf: fix ylim in case data contains missing values. + + +2019-04-15: Release of NaN-Tb v3.2.3 + +- ROC is included. ROC is derived + from the implementation in "biosig for octave and matlab" + biosig-code/biosig4matlab/t490_EvaluationCriteria/ + https://sourceforge.net/p/biosig/code/ci/master/tree/biosig4matlab/t490_EvaluationCriteria/ + +- load_cifar100 load_cifar10 load_mnist: + loading of various machine-learning databases + +- kolmogorov_smirnov: + Multiple two-sample Kolmogorov-Smirnov test + +- kstest2: + wrapper for kolmogorov_smirnov + +- requires Octave 3.8 or later + +- histo2, histo3: + bug fix for case when whole column contains NaNs + +- inst/signrank.m: add Wilcoxon signred rank test + Unlike Octave's wilcoxon_test, this works also for + sample sizes smaller than N=25 + +- corrcoef: improve documentation on one- + and two- pass algorithm + +- histogram functions (histo.m) included + +- upgrade to liblinear 2.2.1 + weightening of samples not supported anymore + +- upgrade to libsvm 3.2.3 + weightening of samples not supported anymore + +- minor changes + + +2017-08-23: Release of NaN-Tb v3.1.4 + +- minor fixes: + license text, file permissions, + remove dependency on -lcholmod + + +2017-08-14: Release of NaN-Tb v3.1.3 + +- fix bug #43931: bug in help of corrcoef.m + reported by: locker@vp... +- fix bug #47673: build failure for ppc64 architecture + reported by: Michel Normand +- bug #48771 "Link against the shared library libsvm" + had been fixed previously in Nov 2017 + +- tiedrank: increase matlab compatibility by allowing + third input argument + + +2017-01-10: Release of NaN-tb v3.1.2 + +- fix build when Octave does not support openmp + +2017-01-03: Release of NaN-tb v3.1.1 + +- add Makefile to built release + +2016-11-30: Release of NaN-tb v3.1.0 + +- fix bug 48426: built for Octave 4.2.0 +- prepare for using external libsvm and liblinear +- fix typos, address compiler warnings and improve docu + +2016-08-10: Release of NaN-tb v3.0.3 +- tcdf: fix bug #48731 (thanks to Nir Krakauer) +- coercoef: add comment on Matlab compatibiliy in help +- test_fss, load_fisheriris: + fix support on mingw platform (Windows) + +2016-07-30: Release of NaN-tb v3.0.2 +- Octave/Windows: setenv CC=gcc (fixes bug 47559) +- no OpenMP for Octave/Windows and MacOSX + + +2015-02-26: Release of NaN-tb v3.0.1 + +- fixes version number + two digit version number like 3.0 cannot be used in Octave-forge + +2015-09-12: Release of NaN-tb v2.8.3 + +- ready for gcc/g++ v5 +- honor environment variables CC and CXX +- fix installer issue on OpenSuse +- support debian hardening flags +- internal improvents (in type and macro usage) +- address a number of compiler warnings +- bug fixes + +2015-07-06: Release of NaN-tb v2.8.1 + +- fix nantest to avoid crashing octave 4.0.0 on windows + +2015-06-24: Release of NaN-tb v2.8.0 + +- fix Makefile for Octave4 on windows + (fixes #45363 and #44859) + +- check for sparse input matrices and + convert to full when needed + +2015-06-01: Release of NaN v.2.7.6 + +- improve accuracy of normcdf (bug #38170) + +2015-04-23: Release of NaN v2.7.5 + +- fix compiler issue with __isnan +- do not display diagnostic messages + +2015-04-12 + +- upgrade to libsvm-3.12 +- fix multi-threaded build (make -j) +- improve some tests + + +2015-03-31: Release of NaN 2.7.2 + +- bug fix in nanstd.m +- cross-compiler issues in histo_mex +- fix Makefile for use with Matlab +- address compatibility issue on debian/jessie +- minor issues (some compiler warnings are addressed) + + +2015-01-24: Release of NaN 2.7.1 + +- support for MacOSX added + (tested on Homebrew Octave) + + +2015-01-17: Release of NaN 2.7.0 + +Fix compatibility issues with core functions in Matlab and Octave +- zscore: order of input output argument +- mahal gives now same result than matlab +- sumskipnan allows DIM argument larger than ndims +- a few minor issues + +For changes of previous releases see: + http://pub.ist.ac.at/~schloegl/matlab/NaN/CHANGELOG diff --git a/doc/INSTALL b/doc/INSTALL new file mode 100644 index 0000000..b423054 --- /dev/null +++ b/doc/INSTALL @@ -0,0 +1,55 @@ + +Installing the NaN-tb for Octave and Matlab: +-------------------------------------------- +a) Extract files and save them in /your/directory/structure/to/NaN/ + +b) Include the path with one of the following commands: + + HOME = getenv('HOME'); % if needed, change path to /your/directory/structure/to/ + + addpath([HOME,'/NaN/') + addpath([HOME,'/NaN/inst') + addpath([HOME,'/NaN/src') + + Make sure the functions in the NaN-toolbox are found before the default functions. + The NaN-toolbox contains some functions like mean, var, std, cor, cov, and corrcoef + which work equivalently than the orginal function - the only exception is that + missing values (NaN) are skipped. [You can turn off that behavior by + settting flag_implicit_skip_nan(0) ]. + Alternatively, You can avoid this by including the directories at the end of the path + + addpath([HOME,'/NaN/','-end') + addpath([HOME,'/NaN/inst','-end') + addpath([HOME,'/NaN/src','-end') + +c) The use of mex-files is recommended for using the full capabilities of the NaN-toolbox. + Without the mex-files, SVN- and Liblinear classifiers are not available, and in some cases + slower m-functions are used. + + Setup your mex compiler using GCC, the GNU Compiler Collection (or some derivative like MinGW) + >> mex -setup + Run MAKE from the directory .../NaN/src/ + + Compiling the mex-files is well tested on Linux, but might not work completely on other platforms. + Specifically, on Windows you need to setup gnumex and gcc (typically mingw or cygwin). + (covm_mex and sumskipnan_mex are strongly recommended for performance reasons, + train.mex and svmtrain_mex are needed when using support vector machines) + Precompiled binaries are provided for 32bit Windows with Matlab 7.x (tested with 7.1 and 7.6). + Please note, for Matlab 7.2 or earlier, the pre-compiled mex-files need to be renamed to *.dll; + Maybe a *.lnk file with the extension *.dll will also do. + +d) run "naninsttest" from the Octave/Matlab command line prompt + >> naninsttest + In case of success, You should see the following message: + >> naninsttest + NANINSTTEST successful - your NaN-tools are correctly installed + + This checks whether the installation was successful. + +----------------------- + + $Id$ + Copyright (c) 2000-2003,2005,2006,2009,2010,2011,2014 by Alois Schloegl + This is part of the NaN-toolbox + http://pub.ist.ac.at/~schloegl/matlab/NaN/ + diff --git a/doc/README.TXT b/doc/README.TXT new file mode 100644 index 0000000..bcd30bb --- /dev/null +++ b/doc/README.TXT @@ -0,0 +1,299 @@ +NaN-Tb: A statistics toolbox +------------------------------------------------------------ +Copyright (C) 2000-2021 Alois Schlögl + + +FEATURES of the NaN-tb: +----------------------- + - statistical toolbox + - machine learning and classification toolbox + - NaN's are treated as missing values + - supports weightening of data + - usage of multiple CPU cores + + - supports DIM argument + - less round-off errors using extended double + - less but more powerful functions (no nan-FUN needed) + - supports unbiased estimation + - fixes known bugs + - compatible with Matlab and Octave + - easy to use + - The toolbox is tested with Octave 4.4+ and Matlab 7.x + + +Currently are implemented: +-------------------------- +level 1: basic functions (not derived) + SUMSKIPNAN SUM is a built-in function and cannot not be replaced, + For this reason, a different name (than SUM) had to be chosen. + SUMSKIPNAN is central, it implements skipping NaN's, the + DIM-argument and returns the number of valid elements, too. + COVM covariance estimation (several modes) + Round-off errors avoided by using internally extended accuracy + CUMSUMSKIPNAN Cumulative sum, skipping NaN's + DECOVM decomposes the extended covarianced matrix into mean and cov + XCOVF cross-correlation function + FLAG_NANS_OCCURED returns 0 if no NaN's appeared in the input data + of the last call to one of the following functions, and 1 otherwise: + sumskipnan, covm, center, cor, coefficient of variation, corrcoef, geomean, harmmean, + kurtosis, mad, mean, meandev, meansq, moment, nanmean, nanstd, nansum, + rms, sem, skewness, statistic, std, var + FLAG_IMPLICIT_SKIP_NAN can be used to turn off and on the NaN-skipping behaviour. This can + be useful for debugging or for compatibility reasons. + FLAG_ACCURACY_LEVEL can be used to increase the accuracy of summations (sumskipnan and covm) + at the cost of speed. + LOAD_FISHERIRIS loads famous fisher iris data set + STR2ARRAY convert string to array - useful to extract numeric data from + delimiter files + XPTOPEN read and write SAS Transport Format (XPT); reads ARFF and STATA files + + The following functions are experimental, not all effects of missing values are fully understood. + E.g. Missing values can cause aliasing, also effects on bandpass und highpass filters need to be investigated. + NANCONV convolution + NANCONV2 2-dimensional convolution + NANFILTER filter function + NANFFT Fourier transform + +level 2a: derived functions + MEAN mean (options: arithmetic, geometric, harmonic) + VAR variance + STD standard deviation + MEDIAN median (currently only for 2-dim matrices) + SEM standard error of the mean (does not depend on distribution) + TRIMMEAN trimmed mean + medAbsDev median absolute deviation + + MEANSQ mean square + RMS root mean square + + STATISTIC estimates various statistics at once + MOMENT moment + SKEWNESS skewness + KURTOSIS excess + +* IQR interquartile range + MAD mean absolute deviation +* RANGE range (max-min) + + CENTER removes mean + ZSCORE normalizes x to zero mean and variance 1 (z = (x-mean)/std) + zScoreMedian non-parametric z-score, normalizes is to zero median and 1/(1.483*median absolute deviation) + + HARMMEAN harmonic mean + GEOMEAN geometric mean + + NANTEST checks whether all functions have been replaced + DETREND detrending of data with missing values and non-equidistant sampled data + + COR correlation matrix + COV covariance matrix + CORRCOEF correlation coefficient, including rank correlation, + significance test and confidence intervals + SPEARMAN, RANKCORR spearman's rank correlation coefficient. They might be replaced by CORRCOEF. + PARTCORRCOEF partial correlation coefficient + RANKS calculates ranks for non-parametric statistics + TIEDRANK similar to RANKS, used for compatibility reasons + + QUANTILE q-th quantile + PRCTILE,PERCENTILE p-th percentile + TRIMEAN trimean + + BLAND_ALTMANN Bland-Altmann plot + ECDF empirical cumulative distribution function + CDFPLOT plot empirical cumulative distribution function + GSCATTER scatter plot of grouped data + NORMPDF normal probability distribution + NORMCDF normal cumulative distribution + NORMINV inverse of the normal cumulative distribution + TPDF student probability distribution + TCDF student cumulative distribution + TINV inverse of the student cumulative distribution + NANSUM, NANSTD fixes for buggy versions included + TTEST paired t-test + TTEST2 (unpaired) t-test + SIGNRANK Wilcoxon's signed-rank test + +level 2b: classification, cross-validation + TRAIN_SC train classifier + TEST_SC test classifier + CLASSIFY classify data (no cross validation) + XVAL classify data with cross validation + KAPPA performance evaluation + TRAIN_LDA_SPARSE utility function + FSS feature subset selection and feature ranking + CAT2BIN converts categorical to binary data + SVMTRAIN_MEX libSVM-training algorithm + ROW_COL_DELETION heuristic to select rows and columns to remove missing values + + +REFERENCE(S): +---------------------------------- +[1] http://www.itl.nist.gov/ +[2] http://mathworld.wolfram.com/ + + +What is the difference to previous implementations? +=================================================== +1) The default behavior of previous implementations is that NaNs in the input +data results in NaNs in the output data. In many applications this behavior +is not what you want. In this implementation, NaNs are handled as missing values and +are skipped. + +2) In previous implementations the workaround was using different functions +like NANSUM, NANMEAN etc. In this toolbox, the same routines can be applied to +data with and without NaNs. This enables more natural (better read- and +understandable) applications. + +3) SUMSKIPNAN is central to the other functions. +It implements +- the DIMENSION-argument, +- handles NaNs as missing values or as exception signal (depending on a + hidden FLAG), +- and returns the number of valid elements (which are not NaNs) in the + second output argument. +(Note, NANSUM from Matlab does not support the DIM-argument, and NANSUM(NaN) +gives NaN instead of 0); + +4) [obsolete] + +5) The DIMENSION argument is implemented in most routines. +These should work in all Matlab and Octave versions. A workaround for a bug in +Octave versions <=2.1.35 is implemented. Also several functions from Matlab +have no support for the DIM argument (e.g. SKEWNESS, KURTOSIS, VAR) + +6) Compatible to previous Octave implementation +MEAN implements also the GEOMETRIC and HARMONIC mean. Handling of some special +cases has been removed because its not necessary, anymore. +MOMENT implements Mode 'ac' (absolute and/or central) moment as implemented +in Octave. + +7) Performance increase +In most numerical applications, NaN's should be simply skipped. Therefore, +it is efficient to skip NaN's in the default case. +In case an explicit check for NaN's is necessary, implicit exception +handling could be avoided. Eventually the overall performance could increase. + +8) More readable code +An explicit check for NaN's display the importance of this special case. +Therefore, the application program might be more readable. + +9) ZSCORE, MAD, HARMMEAN and GEOMEAN +DIM-argument and skipping of NaN's implemented. None of these features is +implemented in the Matlab versions. + +10a) NANMEAN, NANVAR, NANMEDIAN +These are not necessary anymore. They are implemented in SUMSKIPNAN, MEAN, +VAR, STD and MEDIAN, respectively. + +10b) NANSUM, NANSTD +These functions are obsolete, too. However, previous implementations +do not always provide the expected result. Therefore, a correct +version is included for backward compatibility. + +11) GPL license +Permits to implement useful modifications. + +12) NORMPDF, NORMCDF, NORMINV +In the Matlab statistics toolbox V 3.0, NORMPDF, NORMCDF and NORMINV gave +incorrect results for SIGMA=0; A similar problem was observed in Octave +with NORMAL_INV, NORMAL_PDF, and NORMALCDF. + +The problem is fixed with this version. Furthermore, the check of the input +arguments is implemented simpler and easier in this versions. + +13) TPDF, TCDF, TINV +In the Matlab statistics toolbox V3.0(12.1) and V4.0(13), TCDF and TINV do not handle NaNs +correctly. TINV returns 0 instead of NaN, TCDF stops with an error message. +In Stats-tb V2.2(R11) TINV has also the same problem. + +For these reasons, the NaN-tb is a bug fix. Furthermore, the check of the input +arguments is implemented simpler. Overall, the code becomes cleaner and leaner. + +14) NANCONV, NANCONV2, NANFFT, NANFILTER, NANFILTER1UC +are signal processing functions for graceful handling of data with +missing values. These functions are very experimental, because the behavior in +case of data with missing values is not fully investigated. +E.g. missing values can cause aliasing, and also the behavior of bandpass and highpass +filters is not sufficiently investigated. Therefore, these functions should be +used with care. + + +Q: WHY SKIPPING NaN's?: +------------------------ +A: Usually, NaN means that the value is not available. This meaning is most +common, even many different reasons might cause NaN's. In statistics, NaN's +represent missing values, in biosignal processing such missing values might +have been caused by some recording error. Other reasons for NaN's are, +undetermined expressions like e.g. 0/0, inf-inf, data not available, unknown value, +not a numeric value, etc. + +If NaN has the meaning of a missing value, it is only consequent to say, the +sum of NaN's should be zero. Similar arguments hold for the other functions. +The mean of X is undefined if and only if X contains no numbers. The +implementation sum(X)/sum(~isnan(X)) gives 0/0=NaN, which is the desired +result. The variance of X is undefined if and only if X contains less than +2 numbers. + +In most numerical applications, NaN's should be simply skipped. Therefore, +it is efficient to skip NaN's in the default case. In the other cases, the +NaN's can still be checked explicitly. This could eventually result in a +more readable code and in improved performance, too. + + +Q: What if I need to check for NaN's: +------------------------------------- +A: You can always check whether there were some skipped NaN's in your +data with the command FLAG_NANS_OCCURED(). + +m = mean(x); +if flag_nans_occured() + % do your error handling, e.g. + error('there were NaN's in x, ignore m'); +end; + +Its also easy to control the granularity of the checks + +flag_nans_occured(); % reset flag + % do any statistical analysis you want +if flag_nans_occured() + % check, whether some NaN's occured. +end; + + +Installing the NaN-tb for Octave and Matlab: +-------------------------------------------- +a) Extract files and save them in /your/directory/structure/to/NaN/ + +b) Include the path with one of the following commands: + addpath('/your/directory/structure/to/NaN/') + path('/your/directory/structure/to/NaN/',path) + Make sure the functions in the NaN-toolbox are found before the default functions. + +c) run NANINSTTEST +This checks whether the installation was successful. + +d) Compile mex files: + This is useful to improve speed, and is required if you used weighted samples. + Check if precompiled binaries are provided. If your platform is not supported, + compile the C-Mex-functions using "make". + + Run NANINSTTEST again to check the stability of the compiled SUMSKIPNAN. + + Copyright (C) 2000-2021 by Alois Schlögl + http://pub.ist.ac.at/~schloegl/matlab/NaN/ + + +LICENSE: + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, see . + diff --git a/inst/betacdf.m b/inst/betacdf.m new file mode 100644 index 0000000..32f1dcc --- /dev/null +++ b/inst/betacdf.m @@ -0,0 +1,99 @@ +%% Copyright (C) 2012 Rik Wehbring +%% Copyright (C) 1995-2016 Kurt Hornik +%% +%% This program is free software: you can redistribute it and/or +%% modify it under the terms of the GNU General Public License as +%% published by the Free Software Foundation, either version 3 of the +%% License, or (at your option) any later version. +%% +%% This program is distributed in the hope that it will be useful, but +%% WITHOUT ANY WARRANTY; without even the implied warranty of +%% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +%% General Public License for more details. +%% +%% You should have received a copy of the GNU General Public License +%% along with this program; see the file COPYING. If not, see +%% . + +%% cdf = betacdf (x, a, b) +%% For each element of @var{x}, compute the cumulative distribution function +%% (CDF) at x of the Beta distribution with parameters a and b. + +%% Author: KH +%% Description: CDF of the Beta distribution + +%% Adapted for the use with Matlab and the NaN-toolbox. + +function cdf = betacdf (x, a, b) + + if (nargin ~= 3) + print_usage (); + end + + if (~ isscalar (a) || ~ isscalar (b)) + % [retval, x, a, b] = common_size (x, a, b); + retval = ~isscalar(a) && any(size(x)~=size(a)); + retval = retval || (~isscalar(b) && any(size(x)~=size(b))); + + if (retval > 0) + error ('betacdf: X, A, and B must be of common size or scalars'); + end + end + if isscalar(a) + a = repmat(a,size(x)); + end + if isscalar(b) + b = repmat(b,size(x)); + end + + + if (~isreal (x) || ~isreal (a) || ~isreal (b)) + error ('betacdf: X, A, and B must not be complex'); + end + + if (isa (x, 'single') || isa (a, 'single') || isa (b, 'single')) + cdf = zeros (size (x), 'single'); + else + cdf = zeros (size (x)); + end + + k = isnan (x) | ~(a > 0) | ~(b > 0); + cdf(k) = NaN; + + k = (x >= 1) & (a > 0) & (b > 0); + cdf(k) = 1; + + k = (x > 0) & (x < 1) & (a > 0) & (b > 0); + if (isscalar (a) && isscalar (b)) + cdf(k) = betainc (x(k), a, b); + else + cdf(k) = betainc (x(k), a(k), b(k)); + end + +end + + +%!shared x,y +%! x = [-1 0 0.5 1 2]; +%! y = [0 0 0.75 1 1]; +%!assert (betacdf (x, ones (1,5), 2*ones (1,5)), y) +%!assert (betacdf (x, 1, 2*ones (1,5)), y) +%!assert (betacdf (x, ones (1,5), 2), y) +%!assert (betacdf (x, [0 1 NaN 1 1], 2), [NaN 0 NaN 1 1]) +%!assert (betacdf (x, 1, 2*[0 1 NaN 1 1]), [NaN 0 NaN 1 1]) +%!assert (betacdf ([x(1:2) NaN x(4:5)], 1, 2), [y(1:2) NaN y(4:5)]) + +%% Test class of input preserved +%!assert (betacdf ([x, NaN], 1, 2), [y, NaN]) +%!assert (betacdf (single ([x, NaN]), 1, 2), single ([y, NaN])) +%!assert (betacdf ([x, NaN], single (1), 2), single ([y, NaN])) +%!assert (betacdf ([x, NaN], 1, single (2)), single ([y, NaN])) + +%% Test input validation +%!error betacdf () +%!error betacdf (1) +%!error betacdf (1,2) +%!error betacdf (1,2,3,4) +%!error betacdf (ones (3), ones (2), ones (2)) +%!error betacdf (ones (2), ones (3), ones (2)) +%!error betacdf (ones (2), ones (2), ones (3)) diff --git a/inst/betainv.m b/inst/betainv.m new file mode 100644 index 0000000..7f2d54a --- /dev/null +++ b/inst/betainv.m @@ -0,0 +1,145 @@ +%% Copyright (C) 2012 Rik Wehbring +%% Copyright (C) 1995-2016 Kurt Hornik +%% Copyright (C) 2020 Alois Schlögl +%% +%% This program is free software: you can redistribute it and/or +%% modify it under the terms of the GNU General Public License as +%% published by the Free Software Foundation, either version 3 of the +%% License, or (at your option) any later version. +%% +%% This program is distributed in the hope that it will be useful, but +%% WITHOUT ANY WARRANTY; without even the implied warranty of +%% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +%% General Public License for more details. +%% +%% You should have received a copy of the GNU General Public License +%% along with this program; see the file COPYING. If not, see +%% . + +%% inv = betainv (x, a, b) +%% For each element of x, compute the quantile (the inverse of the CDF) +%% at x of the Beta distribution with parameters a and b. + +%% Author: KH +%% Description: Quantile function of the Beta distribution + +%% Adapted for the use with Matlab and the NaN-toolbox. + + +function inv = betainv (x, a, b) + + if (nargin ~= 3) + print_usage (); + end + + if (~isscalar (a) || ~isscalar (b)) + retval = ~isscalar(a) && any(size(x)~=size(a)); + retval = retval || (~isscalar(b) && any(size(x)~=size(b))); + + if (retval > 0) + error ('betainv: X, A, and B must be of common size or scalars'); + end + end + if isscalar(a) + a = repmat(a,size(x)); + end + if isscalar(b) + b = repmat(b,size(x)); + end + + if (~isreal (x) || ~isreal (a) || ~isreal (b)) + error ('betainv: X, A, and B must not be complex'); + end + + if (isa (x, 'single') || isa (a, 'single') || isa (b, 'single')) + inv = zeros (size (x), 'single'); + else + inv = zeros (size (x)); + end + + k = (x < 0) | (x > 1) | ~(a > 0) | ~(b > 0) | isnan (x); + inv(k) = NaN; + + k = (x == 1) & (a > 0) & (b > 0); + inv(k) = 1; + + k = find ((x > 0) & (x < 1) & (a > 0) & (b > 0)); + if (~ isempty (k)) + if (~isscalar (a) || ~isscalar (b)) + a = a(k); + b = b(k); + y = a ./ (a + b); + else + y = a / (a + b) * ones (size (k)); + end + x = x(k); + + if (isa (y, 'single')) + myeps = eps ('single'); + else + myeps = eps; + end + + l = find (y < myeps); + if (any (l)) + y(l) = sqrt (myeps) * ones (length (l), 1); + end + l = find (y > 1 - myeps); + if (any (l)) + y(l) = 1 - sqrt (myeps) * ones (length (l), 1); + end + + y_new = y; + loopcnt = 0; + while (1), + y_old = y_new; + h = (betacdf (y_old, a, b) - x) ./ betapdf (y_old, a, b); + y_new = y_old - h; + ind = find (y_new <= myeps); + if (any (ind)) + y_new(ind) = y_old(ind) / 10; + end + ind = find (y_new >= 1 - myeps); + if (any (ind)) + y_new(ind) = 1 - (1 - y_old(ind)) / 10; + end + h = y_old - y_new; + loopcnt = loopcnt+1; + if ( (max(abs(h)) < sqrt(myeps)) || (loopcnt >= 40)) break; end + end + + if (loopcnt == 40) + warning ('betainv: calculation failed to converge for some values'); + end + inv(k) = y_new; + end + +end + + +%!shared x +%! x = [-1 0 0.75 1 2]; +%!assert (betainv (x, ones (1,5), 2*ones (1,5)), [NaN 0 0.5 1 NaN], eps) +%!assert (betainv (x, 1, 2*ones (1,5)), [NaN 0 0.5 1 NaN], eps) +%!assert (betainv (x, ones (1,5), 2), [NaN 0 0.5 1 NaN], eps) +%!assert (betainv (x, [1 0 NaN 1 1], 2), [NaN NaN NaN 1 NaN]) +%!assert (betainv (x, 1, 2*[1 0 NaN 1 1]), [NaN NaN NaN 1 NaN]) +%!assert (betainv ([x(1:2) NaN x(4:5)], 1, 2), [NaN 0 NaN 1 NaN]) + +%% Test class of input preserved +%!assert (betainv ([x, NaN], 1, 2), [NaN 0 0.5 1 NaN NaN], eps) +%!assert (betainv (single ([x, NaN]), 1, 2), single ([NaN 0 0.5 1 NaN NaN])) +%!assert (betainv ([x, NaN], single (1), 2), single ([NaN 0 0.5 1 NaN NaN]), eps('single')) +%!assert (betainv ([x, NaN], 1, single (2)), single ([NaN 0 0.5 1 NaN NaN]), eps('single')) + +%% Test input validation +%!error betainv () +%!error betainv (1) +%!error betainv (1,2) +%!error betainv (1,2,3,4) +%!error betainv (ones (3), ones (2), ones (2)) +%!error betainv (ones (2), ones (3), ones (2)) +%!error betainv (ones (2), ones (2), ones (3)) +%!error betainv (i, 2, 2) +%!error betainv (2, i, 2) +%!error betainv (2, 2, i) diff --git a/inst/betapdf.m b/inst/betapdf.m new file mode 100644 index 0000000..386ca26 --- /dev/null +++ b/inst/betapdf.m @@ -0,0 +1,136 @@ +%% Copyright (C) 2012 Rik Wehbring +%% Copyright (C) 1995-2016 Kurt Hornik +%% Copyright (C) 2010 Christos Dimitrakakis +%% Copyright (C) 2020 Alois Schlögl +%% +%% This program is free software: you can redistribute it and/or +%% modify it under the terms of the GNU General Public License as +%% published by the Free Software Foundation, either version 3 of the +%% License, or (at your option) any later version. +%% +%% This program is distributed in the hope that it will be useful, but +%% WITHOUT ANY WARRANTY; without even the implied warranty of +%% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +%% General Public License for more details. +%% +%% You should have received a copy of the GNU General Public License +%% along with this program; see the file COPYING. If not, see +%% . + +%% pdf = betapdf (x, a, b) +%% For each element of x, compute the probability density function (PDF) +%% at x of the Beta distribution with parameters a and b. + +%% Author: KH , CD +%% Description: PDF of the Beta distribution + +%% Adapted for the use with Matlab and the NaN-toolbox. + +function pdf = betapdf (x, a, b) + + if (nargin ~= 3) + print_usage (); + end + + if (~ isscalar (a) || ~ isscalar (b)) + % [retval, x, a, b] = common_size (x, a, b); + retval = ~isscalar(a) && any(size(x)~=size(a)); + retval = retval || (~isscalar(b) && any(size(x)~=size(b))); + + if (retval > 0) + error ('betapdf: X, A, and B must be of common size or scalars'); + end + end + if isscalar(a) + a = repmat(a,size(x)); + end + if isscalar(b) + b = repmat(b,size(x)); + end + + + if (~isreal (x) || ~isreal (a) || ~isreal (b)) + error ('betapdf: X, A, and B must not be complex'); + end + + if (isa (x, 'single') || isa (a, 'single') || isa (b, 'single')); + pdf = zeros (size (x), 'single'); + else + pdf = zeros (size (x)); + end + + k = ~(a > 0) | ~(b > 0) | isnan (x); + pdf(k) = NaN; + + k = (x > 0) & (x < 1) & (a > 0) & (b > 0) & ((a ~= 1) | (b ~= 1)); + if (isscalar (a) && isscalar (b)), + pdf(k) = exp ((a - 1) * log (x(k)) + (b - 1) * log (1 - x(k)) + gammaln (a + b) - gammaln (a) - gammaln (b)); + else + pdf(k) = exp ((a(k) - 1) .* log (x(k)) + (b(k) - 1) .* log (1 - x(k)) + gammaln (a(k) + b(k)) - gammaln (a(k)) - gammaln (b(k))); + end + + %% Most important special cases when the density is finite. + k = (x == 0) & (a == 1) & (b > 0) & (b ~= 1); + if (isscalar (a) && isscalar (b)) + pdf(k) = exp (gammaln (a + b) - gammaln (a) - gammaln (b)); + else + pdf(k) = exp (gammaln (a(k) + b(k)) - gammaln (a(k)) - gammaln (b(k))); + end + + k = (x == 1) & (b == 1) & (a > 0) & (a ~= 1); + if (isscalar (a) && isscalar (b)) + pdf(k) = exp (gammaln (a + b) - gammaln (a) - gammaln (b)); + else + pdf(k) = exp (gammaln (a(k) + b(k)) - gammaln (a(k)) - gammaln (b(k))); + end + + k = (x >= 0) & (x <= 1) & (a == 1) & (b == 1); + pdf(k) = 1; + + %% Other special case when the density at the boundary is infinite. + k = (x == 0) & (a < 1); + pdf(k) = Inf; + + k = (x == 1) & (b < 1); + pdf(k) = Inf; + +end + + +%!shared x,y +%! x = [-1 0 0.5 1 2]; +%! y = [0 2 1 0 0]; +%!assert (betapdf (x, ones (1,5), 2*ones (1,5)), y) +%!assert (betapdf (x, 1, 2*ones (1,5)), y) +%!assert (betapdf (x, ones (1,5), 2), y) +%!assert (betapdf (x, [0 NaN 1 1 1], 2), [NaN NaN y(3:5)]) +%!assert (betapdf (x, 1, 2*[0 NaN 1 1 1]), [NaN NaN y(3:5)]) +%!assert (betapdf ([x, NaN], 1, 2), [y, NaN]) + +%% Test class of input preserved +%!assert (betapdf (single ([x, NaN]), 1, 2), single ([y, NaN])) +%!assert (betapdf ([x, NaN], single (1), 2), single ([y, NaN])) +%!assert (betapdf ([x, NaN], 1, single (2)), single ([y, NaN])) + +%% Beta (1/2,1/2) == arcsine distribution +%!test +%! x = rand (10,1); +%! y = 1./(pi * sqrt (x.*(1-x))); +%! assert (betapdf (x, 1/2, 1/2), y, 50*eps); + +%% Test large input values to betapdf +%!assert (betapdf (0.5, 1000, 1000), 35.678, 1e-3) + +%% Test input validation +%!error betapdf () +%!error betapdf (1) +%!error betapdf (1,2) +%!error betapdf (1,2,3,4) +%!error betapdf (ones (3), ones (2), ones (2)) +%!error betapdf (ones (2), ones (3), ones (2)) +%!error betapdf (ones (2), ones (2), ones (3)) +%!error betapdf (i, 2, 2) +%!error betapdf (2, i, 2) +%!error betapdf (2, 2, i) + + diff --git a/inst/bland_altman.m b/inst/bland_altman.m new file mode 100644 index 0000000..eaa0316 --- /dev/null +++ b/inst/bland_altman.m @@ -0,0 +1,121 @@ +function RES = bland_altman(data,group,arg3) +% BLAND_ALTMANN shows the Bland-Altman plot of two columns of measurements +% and computes several summary results. +% +% bland_altman(m1, m2 [,group]) +% bland_altman(data [, group]) +% R = bland_altman(...) +% +% m1,m2 are two colums with the same number of elements +% containing the measurements. m1,m2 can be also combined +% in a single two column data matrix. +% group [optional] indicates which measurements belong to the same group +% This is useful to account for repeated measurements. +% +% +% References: +% [1] JM Bland and DG Altman, Measuring agreement in method comparison studies. +% Statistical Methods in Medical Research, 1999; 8; 135. +% doi:10.1177/09622802990080204 +% [2] P.S. Myles, Using the Bland– Altman method to measure agreement with repeated measures +% British Journal of Anaesthesia 99(3):309–11 (2007) +% doi:10.1093/bja/aem214 + +% $Id$ +% Copyright (C) 2010,2011 by Alois Schloegl +% This function is part of the NaN-toolbox +% http://pub.ist.ac.at/~schloegl/matlab/NaN/ + +% This program is free software; you can redistribute it and/or +% modify it under the terms of the GNU General Public License +% as published by the Free Software Foundation; either version 3 +% of the License, or (at your option) any later version. +% +% This program is distributed in the hope that it will be useful, +% but WITHOUT ANY WARRANTY; without even the implied warranty of +% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +% GNU General Public License for more details. +% +% You should have received a copy of the GNU General Public License +% along with this program; if not, write to the Free Software +% Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA 02110-1301, USA. + +if nargin<2, group = []; end; +if nargin<3, arg3 = []; end; + +if (size(data,2)==1) + data = [data, group]; + group = arg3; +end; + + +D = data * [1;-1]; +M = data * [1;1]/2; + +RES.corrcoef = corrcoef(data(:,1),data(:,2),'spearman'); +[REs.cc,RES.p] = corrcoef(M,D,'spearman'); +if (RES.p<0.05) + warning('A regression model according to section 3.2 [1] should be used'); + %% TODO: implement support for this type of data. + RES.a = [ones(size(data,1),1),D]\M; + RES.b = [ones(size(data,1),1),M]\D; +end; + +if isempty(group) + G = [1:size(data,1)]'; + m = ones(size(data,1),1); + d = D; + RES.Bias = mean(d,1); + RES.Var = var(d); + +elseif ~isempty(group) + %% TODO: this is not finished + warning('analysis of data with repetitions is experimental - it might yield incorrect results - you are warned.!') + [G,I,J] = unique (group); + R = zeros(size(data)); + m = repmat(NaN,length(G),1); + n = repmat(NaN,length(G),1); + d = repmat(NaN,length(G),1); + d2 = repmat(NaN,length(G),1); + data2 = repmat(NaN,length(G),size(data,2)); + SW2 = repmat(NaN,length(G),size(data,2)); + for i = 1:length(G), + ix = find(group==G(i)); + n(i) = length(ix); +% IX((i-1)*N+1:i*N) = ix(ceil(rand(N,1)*n(i))); + + [R(ix,:), data2(i,:)] = center(data(ix,:),1); + d(i) = mean(D(ix,:),1); + m(i) = mean(M(ix,:),1); + d2(i) = mean(D(ix,:).^2,1); + RES.SW2(i,:) = var(data(ix,:),[],1); + RES.avg(i,:) = mean(data(ix,:),1); + end; + + W = 1./n(J); + RES.SSW = sumskipnan(R.^2,1,W); + RES.SSB = var(data,[],1,W)*sum(W)*(sum(W)-1); + RES.sigma2_w = RES.SSW/(sum(W)*(length(G)-1)); + RES.sigma2_u = RES.SSB/(sum(W)*(length(G)-1)) - RES.sigma2_w/(length(G)); + RES.group = bland_altman(data2); % FIXME: this plot shows incorrect interval, it does not account for the group/repeated samples. + RES.repeatability_coefficient1 = 2.77*sqrt(var(R,1,1)); % variance with factor group removed + RES.repeatability_coefficient = 2.77*sqrt(mean(SW2,1)); % variance with factor group removed + RES.std_d_ = std(d); + RES.std_D_ = std(D); + RES.std_m_ = std(m); + + RES.n = n; + return; + + D = d; + M = m; +% RES.sigma2_dw = + + RES.Bias = mean(d,1,[],n); +end; + + +plot(M,D,'o', [min(M),max(M)]', [0,0]','k--', [min(M),max(M)]', [1,1,1; 0,1.96,-1.96]'*[RES.Bias;std(D)]*[1,1], 'k-'); +xlabel('mean'); +ylabel('difference (m1-m2)'); + diff --git a/inst/cat2bin.m b/inst/cat2bin.m new file mode 100644 index 0000000..e889ceb --- /dev/null +++ b/inst/cat2bin.m @@ -0,0 +1,90 @@ +function [B,BLab]=cat2bin(D, Label, MODE) +% CAT2BIN converts categorical into binary data +% each category of each column in D is converted into a logical column +% +% B = cat2bin(C); +% [B,BinLabel] = cat2bin(C,Label); +% [B,BinLabel] = cat2bin(C,Label,MODE) +% +% C categorical data +% B binary data +% Label description of each column in C +% BinLabel description of each column in B +% MODE default [], ignores NaN +% 'notIgnoreNAN' includes binary column for NaN +% 'IgnoreZeros' zeros do not get a separate category +% 'IgnoreZeros+NaN' zeros and NaN are ignored +% +% example: +% cat2bin([1;2;5;1;5]) results in +% 1 0 0 +% 0 1 0 +% 0 0 1 +% 1 0 0 +% 0 0 1 + +% $Id$ +% Copyright (C) 2009 by Alois Schloegl +% This function is part of the NaN-toolbox +% http://pub.ist.ac.at/~schloegl/matlab/NaN/ + +% This program is free software; you can redistribute it and/or +% modify it under the terms of the GNU General Public License +% as published by the Free Software Foundation; either version 3 +% of the License, or (at your option) any later version. +% +% This program is distributed in the hope that it will be useful, +% but WITHOUT ANY WARRANTY; without even the implied warranty of +% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +% GNU General Public License for more details. +% +% You should have received a copy of the GNU General Public License +% along with this program; if not, write to the Free Software +% Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA 02110-1301, USA. + +if nargin<3, + MODE = []; +end; + +% convert data +B = []; + +c = 0; +k1 = 0; +BLab = []; +for m = 1:size(D,2) + h = histo_mex(D(:,m)); + x = h.X(h.H>0); + if strcmpi(MODE,'notIgnoreNaN') + ; + elseif strcmpi(MODE,'IgnoreZeros') + x = x(x~=0); + elseif strcmpi(MODE,'IgnoreZeros+NaN') + x = x((x~=0) & (x==x)); + else + x = x(x==x); + end; + for k = 1:size(D,1), + if ~isnan(D(k,m)) + B(k, c + find(D(k,m)==x)) = 1; + elseif isnan(x(end)), + B(k, c + length(x)) = 1; + end; + end; + + c = c + length(x); + if nargout>1, + for k = 1:length(x), + k1 = k1+1; + if isempty(Label) + BLab{k1} = ['#',int2str(m),':',int2str(x(k))]; + else + BLab{k1} = [Label{m},':',int2str(x(k))]; + end; + end; + end; +end; + + +%!assert(cat2bin([1;2;5;1;5]),[1,0,0;0,1,0;0,0,1;1,0,0;0,0,1]) + diff --git a/inst/cdfplot.m b/inst/cdfplot.m new file mode 100644 index 0000000..48815a5 --- /dev/null +++ b/inst/cdfplot.m @@ -0,0 +1,57 @@ +function [h,stats] = cdfplot(X, varargin) +% CDFPLOT plots empirical commulative distribution function +% +% cdfplot(X) +% cdfplot(X, FMT) +% cdfplot(X, PROPERTY, VALUE,...) +% h = cdfplot(...) +% [h,stats] = cdfplot(X) +% +% X contains the data vector +% (matrix data is currently changed to a vector, this might change in future) +% FMT,PROPERTY,VALUE +% are used for formating; see HELP PLOT for more details +% h graphics handle to the cdf curve +% stats +% a struct containing various summary statistics including +% mean, std, median, min, max. +% +% see also: ecdf, median, statistics, hist2res, plot +% +% References: + +% $Id$ +% Copyright (C) 2009,2010 by Alois Schloegl +% This function is part of the NaN-toolbox +% http://pub.ist.ac.at/~schloegl/matlab/NaN/ + +% This program is free software; you can redistribute it and/or +% modify it under the terms of the GNU General Public License +% as published by the Free Software Foundation; either version 3 +% of the License, or (at your option) any later version. +% +% This program is distributed in the hope that it will be useful, +% but WITHOUT ANY WARRANTY; without even the implied warranty of +% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +% GNU General Public License for more details. +% +% You should have received a copy of the GNU General Public License +% along with this program; if not, write to the Free Software +% Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA 02110-1301, USA. + + +his = histo_mex(X(:)); +cdf = cumsum(his.H,1) ./ sum(his.H,1); +ix1 = ceil ([1:2*size(his.X,1)]'/2); +ix2 = floor([2:2*size(his.X,1)]'/2); +hh = plot (his.X(ix1), [0; cdf(ix2)], varargin{:}); + +if nargout>0, + h = hh; +end; +if nargout>1, + stats = hist2res(his); + stats.median = quantile(his,.5); +end; + + diff --git a/inst/center.m b/inst/center.m new file mode 100644 index 0000000..9dfb68b --- /dev/null +++ b/inst/center.m @@ -0,0 +1,63 @@ +function [i,S] = center(i,DIM,W) +% CENTER removes the mean +% +% [z,mu] = center(x,DIM,W) +% removes mean x along dimension DIM +% +% x input data +% DIM dimension +% 1: column +% 2: row +% default or []: first DIMENSION, with more than 1 element +% W weights to computed weighted mean (default: [], all weights = 1) +% numel(W) must be equal to size(x,DIM) +% +% features: +% - can deal with NaN's (missing values) +% - weighting of data +% - dimension argument +% - compatible to Matlab and Octave +% +% see also: SUMSKIPNAN, MEAN, STD, DETREND, ZSCORE +% +% REFERENCE(S): + +% This program is free software; you can redistribute it and/or modify +% it under the terms of the GNU General Public License as published by +% the Free Software Foundation; either version 2 of the License, or +% (at your option) any later version. +% +% This program is distributed in the hope that it will be useful, +% but WITHOUT ANY WARRANTY; without even the implied warranty of +% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +% GNU General Public License for more details. +% +% You should have received a copy of the GNU General Public License +% along with this program; If not, see . + + +% Copyright (C) 2000-2019 by Alois Schloegl +% This is part of the NaN-toolbox. For more details see +% http://pub.ist.ac.at/~schloegl/matlab/NaN/ + + +if any(size(i)==0); return; end; + +if nargin<3, + W = []; +end; +if nargin>1, + [S,N] = sumskipnan(i,DIM,W); +else + [S,N] = sumskipnan(i,[],W); +end; + +S = S./N; +szi = size(i); +szs = size(S); +if length(szs) +% This function is part of the NaN-toolbox +% http://pub.ist.ac.at/~schloegl/matlab/NaN/ + +% This program is free software; you can redistribute it and/or +% modify it under the terms of the GNU General Public License +% as published by the Free Software Foundation; either version 3 +% of the License, or (at your option) any later version. +% +% This program is distributed in the hope that it will be useful, +% but WITHOUT ANY WARRANTY; without even the implied warranty of +% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +% GNU General Public License for more details. +% +% You should have received a copy of the GNU General Public License +% along with this program; if not, write to the Free Software +% Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA 02110-1301, USA. + +if nargin<4 + TYPE = 'linear'; +end; + +if strcmp(TYPE,'linear') + TYPE = 'LDA'; +elseif strcmp(TYPE,'quadratic') + TYPE = 'QDA2'; % result is closer to Matlab +elseif strcmp(TYPE,'diagLinear') + TYPE = 'NBC'; +elseif strcmp(TYPE,'diagQuadratic') + TYPE = 'NBC'; +elseif strcmp(TYPE,'mahalanobis') + TYPE = 'MDA'; +end; + +[group,I,classlabel] = unique(classlabel); + +CC = train_sc(training,classlabel,TYPE); +R = test_sc(CC,sample); +CLASS = group(R.classlabel); + +if nargout>1, + R = test_sc(CC,training,[],classlabel); + ERR = 1-R.ACC; +end; + +if nargout>2, + warning('output arguments POSTERIOR,LOGP and COEF not supported') + POSTERIOR = []; + LOGP = []; + COEF = []; +end; + diff --git a/inst/coefficient_of_variation.m b/inst/coefficient_of_variation.m new file mode 100644 index 0000000..f0d7f0a --- /dev/null +++ b/inst/coefficient_of_variation.m @@ -0,0 +1,44 @@ +function cv=coefficient_of_variation(i,DIM) +% COEFFICIENT_OF_VARIATION returns STD(X)/MEAN(X) +% +% cv=coefficient_of_variation(x [,DIM]) +% cv=std(x)/mean(x) +% +% see also: SUMSKIPNAN, MEAN, STD +% +% REFERENCE(S): +% http://mathworld.wolfram.com/VariationCoefficient.html + +% $Id$ +% Copyright (C) 1997-2003 by Alois Schloegl +% This function is part of the NaN-toolbox +% http://pub.ist.ac.at/~schloegl/matlab/NaN/ + +% This program is free software; you can redistribute it and/or modify +% it under the terms of the GNU General Public License as published by +% the Free Software Foundation; either version 2 of the License, or +% (at your option) any later version. +% +% This program is distributed in the hope that it will be useful, +% but WITHOUT ANY WARRANTY; without even the implied warranty of +% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +% GNU General Public License for more details. +% +% You should have received a copy of the GNU General Public License +% along with this program; If not, see . + + +if nargin<2, + DIM = find(size(i)>1,1); + if isempty(DIM), DIM=1; end; +end; + +[S,N,SSQ] = sumskipnan(i,DIM); + +% sqrt((SSQ-S.*S./N)./max(N-1,0))/(S./N); % = std(i)/mean(i) + +cv = sqrt(SSQ.*N./(S.*S)-1); + +%if flag_implicit_unbiased_estim, + cv = cv.*sqrt(N./max(N-1,0)); +%end; diff --git a/inst/cor.m b/inst/cor.m new file mode 100644 index 0000000..7c9d9d1 --- /dev/null +++ b/inst/cor.m @@ -0,0 +1,101 @@ +function [r2] = cor(X,Y); +% COR calculates the correlation matrix +% X and Y can contain missing values encoded with NaN. +% NaN's are skipped, NaN do not result in a NaN output. +% (Its assumed that the occurence of NaN's is uncorrelated) +% The output gives NaN only if there are insufficient input data +% +% COR(X); +% calculates the (auto-)correlation matrix of X +% COR(X,Y); +% calculates the crosscorrelation between X and Y +% +% c = COR(...); +% c is the correlation matrix +% +% W weights to compute weighted mean (default: []) +% if W=[], all weights are 1. +% number of elements in W must match size(x,DIM) + +% NOTE: Under certain circumstances (Missing values and small number of samples) +% abs(COR) can be larger than 1. +% If you need abs(COR)<=1, use CORRCOEF. CORRCOEF garantees abs(COR)<=1. +% +% see also: SUMSKIPNAN, COVM, COV, CORRCOEF +% +% REFERENCES: +% http://mathworld.wolfram.com/CorrelationCoefficient.html + + +% $Id$ +% Copyright (C) 2000-2004,2010 by Alois Schloegl +% This function is part of the NaN-toolbox +% http://pub.ist.ac.at/~schloegl/matlab/NaN/ + + +% This program is free software; you can redistribute it and/or modify +% it under the terms of the GNU General Public License as published by +% the Free Software Foundation; either version 2 of the License, or +% (at your option) any later version. +% +% This program is distributed in the hope that it will be useful, +% but WITHOUT ANY WARRANTY; without even the implied warranty of +% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +% GNU General Public License for more details. +% +% You should have received a copy of the GNU General Public License +% along with this program; If not, see . + + +if nargin==1 + Y = []; +elseif nargin==0 + fprintf(2,'Error COR: Missing argument(s)\n'); +end; + +[r1,c1]=size(X); +if (c1>r1), + fprintf(2,'Warning COR: Covariance is ill-defined, because of too less observations (rows).\n'); +end; + +[r1,c1]=size(X); +if ~isempty(Y) + [r2,c2]=size(Y); + if r1~=r2, + fprintf(2,'Error COR: X and Y must have the same number of observations (rows).\n'); + return; + end; +else + [r2,c2]=size(X); +end; + +if (c1>r1) || (c2>r2), + fprintf(2,'Warning COR: Covariance is ill-defined, because of too less observations (rows).\n'); +end; + +if ~isempty(Y), + [S1,N1,SSQ1] = sumskipnan(X,1); + [S2,N2,SSQ2] = sumskipnan(Y,1); + + NN = double(~isnan(X)')*double(~isnan(Y)); + X(isnan(X)) = 0; % skip NaN's + Y(isnan(Y)) = 0; % skip NaN's + CC = X'*Y; + + M1 = S1./N1; + M2 = S2./N2; + cc = CC./NN - M1'*M2; + r2 = cc./sqrt((SSQ1./N1-M1.*M1)'*(SSQ2./N2-M2.*M2)); + +else + [S,N,SSQ] = sumskipnan(X,1); + + NN = double(~isnan(X)')*double(~isnan(X)); + X(isnan(X)) = 0; % skip NaN's + CC = X'*X; + + M = S./N; + cc = CC./NN - M'*M; + v = (SSQ./N- M.*M); %max(N-1,0); + r2 = cc./sqrt(v'*v); +end; diff --git a/inst/corrcoef.m b/inst/corrcoef.m new file mode 100644 index 0000000..f0a0621 --- /dev/null +++ b/inst/corrcoef.m @@ -0,0 +1,392 @@ +function [R,sig,ci1,ci2,nan_sig] = corrcoef(X,Y,varargin) +% CORRCOEF calculates the correlation matrix from pairwise correlations. +% The input data can contain missing values encoded with NaN. +% Missing data (NaN's) are handled by pairwise deletion [15]. +% In order to avoid possible pitfalls, use case-wise deletion or +% or check the correlation of NaN's with your data (see below). +% A significance test for testing the Hypothesis +% 'correlation coefficient R is significantly different to zero' +% is included. +% +% [...] = CORRCOEF(X); +% calculates the (auto-)correlation matrix of X +% [...] = CORRCOEF(X,Y); +% calculates the crosscorrelation between X and Y +% NOTE: matlab's CORRCOEF(X,Y) returns the result of CORRCOEF([X,Y]) +% use CORRCOEF([X,Y]) if your software should be compatible with both. +% +% [...] = CORRCOEF(..., Mode); +% Mode='Pearson' or 'parametric' [default] +% gives the correlation coefficient +% also known as the 'product-moment coefficient of correlation' +% or 'Pearson''s correlation' [1]. Currently, the unstable one-pass or +% single pass method [7,8] is implemented. If this is a problem, use +% instead the two-pass method by doing +% corrcoef(center(X),center(Y)) +% Mode='Spearman' gives 'Spearman''s Rank Correlation Coefficient' +% This replaces SPEARMAN.M +% Mode='Rank' gives a nonparametric Rank Correlation Coefficient +% This is the "Spearman rank correlation with proper handling of ties" +% This replaces RANKCORR.M +% +% [...] = CORRCOEF(..., param1, value1, param2, value2, ... ); +% param value +% 'Mode' type of correlation +% 'Pearson','parametric' +% 'Spearman' +% 'rank' +% 'rows' how do deal with missing values encoded as NaN's. +% 'complete': remove all rows with at least one NaN +% 'pairwise': [default] +% 'alpha' 0.01 : significance level to compute confidence interval +% +% [R,p,ci1,ci2,nan_sig] = CORRCOEF(...); +% R is the correlation matrix +% R(i,j) is the correlation coefficient r between X(:,i) and Y(:,j) +% p gives the significance of R +% It tests the null hypothesis that the product moment correlation coefficient is zero +% using Student's t-test on the statistic t = r*sqrt(N-2)/sqrt(1-r^2) +% where N is the number of samples (Statistics, M. Spiegel, Schaum series). +% p > alpha: do not reject the Null hypothesis: 'R is zero'. +% p < alpha: The alternative hypothesis 'R is larger than zero' is true with probability (1-alpha). +% ci1 lower (1-alpha) confidence interval +% ci2 upper (1-alpha) confidence interval +% If no alpha is provided, the default alpha is 0.01. This can be changed with function flag_implicit_significance. +% nan_sig p-value whether H0: 'NaN''s are not correlated' could be correct +% if nan_sig < alpha, H1 ('NaNs are correlated') is very likely. +% +% The result is only valid if the occurence of NaN's is uncorrelated. In +% order to avoid this pitfall, the correlation of NaN's should be checked +% or case-wise deletion should be applied. +% Case-Wise deletion can be implemented +% ix = ~any(isnan([X,Y]),2); +% [...] = CORRCOEF(X(ix,:),Y(ix,:),...); +% +% Correlation (non-random distribution) of NaN's can be checked with +% [nan_R,nan_sig]=corrcoef(X,isnan(X)) +% or [nan_R,nan_sig]=corrcoef([X,Y],isnan([X,Y])) +% or [R,p,ci1,ci2] = CORRCOEF(...); +% +% Further recommandation related to the correlation coefficient: +% + LOOK AT THE SCATTERPLOTS to make sure that the relationship is linear +% + Correlation is not causation because +% it is not clear which parameter is 'cause' and which is 'effect' and +% the observed correlation between two variables might be due to the action of other, unobserved variables. +% +% see also: SUMSKIPNAN, COVM, COV, COR, SPEARMAN, RANKCORR, RANKS, +% PARTCORRCOEF, flag_implicit_significance +% +% REFERENCES: +% on the correlation coefficient +% [ 1] http://mathworld.wolfram.com/CorrelationCoefficient.html +% [ 2] http://www.geography.btinternet.co.uk/spearman.htm +% [ 3] Hogg, R. V. and Craig, A. T. Introduction to Mathematical Statistics, 5th ed. New York: Macmillan, pp. 338 and 400, 1995. +% [ 4] Lehmann, E. L. and D'Abrera, H. J. M. Nonparametrics: Statistical Methods Based on Ranks, rev. ed. Englewood Cliffs, NJ: Prentice-Hall, pp. 292, 300, and 323, 1998. +% [ 5] Press, W. H.; Flannery, B. P.; Teukolsky, S. A.; and Vetterling, W. T. Numerical Recipes in FORTRAN: The Art of Scientific Computing, 2nd ed. Cambridge, England: Cambridge University Press, pp. 634-637, 1992 +% [ 6] http://mathworld.wolfram.com/SpearmanRankCorrelationCoefficient.html +% [ 7] https://stats.stackexchange.com/questions/94056/instability-of-one-pass-algorithm-for-correlation-coefficient +% [ 8] https://en.wikipedia.org/wiki/Pearson_product-moment_correlation_coefficient#For_a_sample +% on the significance test of the correlation coefficient +% [11] http://www.met.rdg.ac.uk/cag/STATS/corr.html +% [12] http://www.janda.org/c10/Lectures/topic06/L24-significanceR.htm +% [13] http://faculty.vassar.edu/lowry/ch4apx.html +% [14] http://davidmlane.com/hyperstat/B134689.html +% [15] http://www.statsoft.com/textbook/stbasic.html%Correlations +% others +% [20] http://www.tufts.edu/~gdallal/corr.htm +% [21] Fisher transformation http://en.wikipedia.org/wiki/Fisher_transformation + +% Copyright (C) 2000-2004,2008,2009,2011,2016,2018 by Alois Schloegl +% This function is part of the NaN-toolbox +% http://pub.ist.ac.at/~schloegl/matlab/NaN/ + +% This program is free software: you can redistribute it and/or modify +% it under the terms of the GNU General Public License as published by +% the Free Software Foundation, either version 3 of the License, or +% (at your option) any later version. +% +% This program is distributed in the hope that it will be useful, +% but WITHOUT ANY WARRANTY; without even the implied warranty of +% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +% GNU General Public License for more details. +% +% You should have received a copy of the GNU General Public License +% along with this program. If not, see . + +% Features: +% + handles missing values (encoded as NaN's) +% + pairwise deletion of missing data +% + checks independence of missing values (NaNs) +% + parametric and non-parametric (rank) correlation +% + Pearson's correlation +% + Spearman's rank correlation +% + Rank correlation (non-parametric, Spearman rank correlation with proper handling of ties) +% + is fast, using an efficient algorithm O(n.log(n)) for calculating the ranks +% + significance test for null-hypthesis: r=0 +% + confidence interval included +% - rank correlation works for cell arrays, too (no check for missing values). +% + compatible with Octave and Matlab + +global FLAG_NANS_OCCURED; + +NARG = nargout; % needed because nargout is not reentrant in Octave, and corrcoef is recursive +mode = []; + +if nargin==1 + Y = []; + Mode='Pearson'; +elseif nargin==0 + fprintf(2,'Error CORRCOEF: Missing argument(s)\n'); +elseif nargin>1 + if ischar(Y) + varg = [Y,varargin]; + Y=[]; + else + varg = varargin; + end; + + if length(varg)<1, + Mode = 'Pearson'; + elseif length(varg)==1, + Mode = varg{1}; + else + for k = 2:2:length(varg), + mode = setfield(mode,lower(varg{k-1}),varg{k}); + end; + if isfield(mode,'mode') + Mode = mode.mode; + end; + end; +end; +if isempty(Mode) Mode='pearson'; end; +Mode=[Mode,' ']; + + + +FLAG_WARNING = warning; % save warning status +warning('off'); + +[r1,c1]=size(X); +if ~isempty(Y) + [r2,c2]=size(Y); + if r1~=r2, + fprintf(2,'Error CORRCOEF: X and Y must have the same number of observations (rows).\n'); + return; + end; + NN = real(~isnan(X)')*real(~isnan(Y)); +else + [r2,c2]=size(X); + NN = real(~isnan(X)')*real(~isnan(X)); +end; + +%%%%% generate combinations using indices for pairwise calculation of the correlation +YESNAN = any(isnan(X(:))) | any(isnan(Y(:))); +if YESNAN, + FLAG_NANS_OCCURED=(1==1); + if isfield(mode,'rows') + if strcmp(mode.rows,'complete') + ix = ~any([X,Y],2); + X = X(ix,:); + if ~isempty(Y) + Y = Y(ix,:); + end; + YESNAN = 0; + NN = size(X,1); + elseif strcmp(mode.rows,'all') + fprintf(1,'Warning: data contains NaNs, rows=pairwise is used.'); + %%NN(NN < size(X,1)) = NaN; + elseif strcmp(mode.rows,'pairwise') + %%% default + end; + end; +end; +if isempty(Y), + IX = ones(c1)-diag(ones(c1,1)); + [jx, jy ] = find(IX); + [jxo,jyo] = find(IX); + R = eye(c1); +else + IX = sparse([],[],[],c1+c2,c1+c2,c1*c2); + IX(1:c1,c1+(1:c2)) = 1; + [jx,jy] = find(IX); + + IX = ones(c1,c2); + [jxo,jyo] = find(IX); + R = zeros(c1,c2); +end; + +if strcmp(lower(Mode(1:7)),'pearson'); + % see http://mathworld.wolfram.com/CorrelationCoefficient.html + if ~YESNAN, + [S,N,SSQ] = sumskipnan(X,1); + if ~isempty(Y), + [S2,N2,SSQ2] = sumskipnan(Y,1); + CC = X'*Y; + M1 = S./N; + M2 = S2./N2; + cc = CC./NN - M1'*M2; + R = cc./sqrt((SSQ./N-M1.*M1)'*(SSQ2./N2-M2.*M2)); + else + CC = X'*X; + M = S./N; + cc = CC./NN - M'*M; + v = SSQ./N - M.*M; %max(N-1,0); + R = cc./sqrt(v'*v); + end; + else + if ~isempty(Y), + X = [X,Y]; + end; + for k = 1:length(jx), + %ik = ~any(isnan(X(:,[jx(k),jy(k)])),2); + ik = ~isnan(X(:,jx(k))) & ~isnan(X(:,jy(k))); + [s,n,s2] = sumskipnan(X(ik,[jx(k),jy(k)]),1); + v = (s2-s.*s./n)./n; + cc = X(ik,jx(k))'*X(ik,jy(k)); + cc = cc/n(1) - prod(s./n); + %r(k) = cc./sqrt(prod(v)); + R(jxo(k),jyo(k)) = cc./sqrt(prod(v)); + end; + end + +elseif strcmp(lower(Mode(1:4)),'rank'); + % see [ 6] http://mathworld.wolfram.com/SpearmanRankCorrelationCoefficient.html + if ~YESNAN, + if isempty(Y) + R = corrcoef(ranks(X)); + else + R = corrcoef(ranks(X),ranks(Y)); + end; + else + if ~isempty(Y), + X = [X,Y]; + end; + for k = 1:length(jx), + %ik = ~any(isnan(X(:,[jx(k),jy(k)])),2); + ik = ~isnan(X(:,jx(k))) & ~isnan(X(:,jy(k))); + il = ranks(X(ik,[jx(k),jy(k)])); + R(jxo(k),jyo(k)) = corrcoef(il(:,1),il(:,2)); + end; + X = ranks(X); + end; + +elseif strcmp(lower(Mode(1:8)),'spearman'); + % see [ 6] http://mathworld.wolfram.com/SpearmanRankCorrelationCoefficient.html + if ~isempty(Y), + X = [X,Y]; + end; + + n = repmat(nan,c1,c2); + + if ~YESNAN, + iy = ranks(X); % calculates ranks; + + for k = 1:length(jx), + [R(jxo(k),jyo(k)),n(jxo(k),jyo(k))] = sumskipnan((iy(:,jx(k)) - iy(:,jy(k))).^2); % NN is the number of non-missing values + end; + else + for k = 1:length(jx), + %ik = ~any(isnan(X(:,[jx(k),jy(k)])),2); + ik = ~isnan(X(:,jx(k))) & ~isnan(X(:,jy(k))); + il = ranks(X(ik,[jx(k),jy(k)])); + % NN is the number of non-missing values + [R(jxo(k),jyo(k)),n(jxo(k),jyo(k))] = sumskipnan((il(:,1) - il(:,2)).^2); + end; + X = ranks(X); + end; + R = 1 - 6 * R ./ (n.*(n.*n-1)); + +elseif strcmp(lower(Mode(1:7)),'partial'); + fprintf(2,'Error CORRCOEF: use PARTCORRCOEF \n',Mode); + + return; + +elseif strcmp(lower(Mode(1:7)),'kendall'); + fprintf(2,'Error CORRCOEF: mode ''%s'' not implemented yet.\n',Mode); + + return; +else + fprintf(2,'Error CORRCOEF: unknown mode ''%s''\n',Mode); +end; + +if (NARG<2), + warning(FLAG_WARNING); % restore warning status + return; +end; + + +% CONFIDENCE INTERVAL +if isfield(mode,'alpha') + alpha = mode.alpha; +elseif exist('flag_implicit_significance','file'), + alpha = flag_implicit_significance; +else + alpha = 0.01; +end; +% fprintf(1,'CORRCOEF: confidence interval is based on alpha=%f\n',alpha); + + +% SIGNIFICANCE TEST +R(isnan(R))=0; +tmp = 1 - R.*R; +tmp(tmp<0) = 0; % prevent tmp<0 i.e. imag(t)~=0 +t = R.*sqrt(max(NN-2,0)./tmp); + +if exist('t_cdf','file'); + sig = t_cdf(t,NN-2); +elseif exist('tcdf','file')>1; + sig = tcdf(t,NN-2); +else + fprintf('CORRCOEF: significance test not completed because of missing TCDF-function\n') + sig = repmat(nan,size(R)); +end; +sig = 2 * min(sig,1 - sig); + + +if NARG<3, + warning(FLAG_WARNING); % restore warning status + return; +end; + + +tmp = R; +%tmp(ix1 | ix2) = nan; % avoid division-by-zero warning +z = log((1+tmp)./(1-tmp))/2; % Fisher transformation [21] +%sz = 1./sqrt(NN-3); % standard error of z +sz = sqrt(2)*erfinv(1-alpha)./sqrt(NN-3); % confidence interval for alpha of z + +ci1 = tanh(z-sz); +ci2 = tanh(z+sz); + +%ci1(isnan(ci1))=R(isnan(ci1)); % in case of isnan(ci), the interval limits are exactly the R value +%ci2(isnan(ci2))=R(isnan(ci2)); + +if (NARG<5) || ~YESNAN, + nan_sig = repmat(NaN,size(R)); + warning(FLAG_WARNING); % restore warning status + return; +end; + +%%%%% ----- check independence of NaNs (missing values) ----- +[nan_R, nan_sig] = corrcoef(X,double(isnan(X))); + +% remove diagonal elements, because these have not any meaning % +nan_sig(isnan(nan_R)) = nan; +% remove diagonal elements, because these have not any meaning % +nan_R(isnan(nan_R)) = 0; + +if 0, any(nan_sig(:) < alpha), + tmp = nan_sig(:); % Hack to skip NaN's in MIN(X) + min_sig = min(tmp(~isnan(tmp))); % Necessary, because Octave returns NaN rather than min(X) for min(NaN,X) + fprintf(1,'CORRCOFF Warning: Missing Values (i.e. NaNs) are not independent of data (p-value=%f)\n', min_sig); + fprintf(1,' Its recommended to remove all samples (i.e. rows) with any missing value (NaN).\n'); + fprintf(1,' The null-hypotheses (NaNs are uncorrelated) is rejected for the following parameter pair(s).\n'); + [ix,iy] = find(nan_sig < alpha); + disp([ix,iy]) +end; + +%%%%% ----- end of independence check ------ + +warning(FLAG_WARNING); % restore warning status +return; + diff --git a/inst/corrplot.m b/inst/corrplot.m new file mode 100644 index 0000000..70ae079 --- /dev/null +++ b/inst/corrplot.m @@ -0,0 +1,58 @@ +function RES = corrplot(data, varargin) +% CORRPLOT displays the correlation plot +% +% corrplot(data) +% corrplot(data,'type',TYPE) +% [R,PValue,H] = corrplot(data,Name,Value) +% +% Input: +% data +% TYPE: 'Pearson' (default), 'Kendall', 'Spearman' +% +% +% Copyright (C) 2021 by Alois Schloegl +% This function is part of the NaN-toolbox +% http://pub.ist.ac.at/~schloegl/matlab/NaN/ + +% This program is free software; you can redistribute it and/or +% modify it under the terms of the GNU General Public License +% as published by the Free Software Foundation; either version 3 +% of the License, or (at your option) any later version. +% +% This program is distributed in the hope that it will be useful, +% but WITHOUT ANY WARRANTY; without even the implied warranty of +% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +% GNU General Public License for more details. +% +% You should have received a copy of the GNU General Public License +% along with this program; if not, write to the Free Software +% Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA 02110-1301, USA. + +Mode=[]; +alpha=0.05; + +k=1; +while k<=nargin, + if strcmpi(varargin{k},'type') + Mode=varargin{k+1}; + k=k+1; + elseif strcmpi(varargin{k},'alpha') + alpha=varargin{k+1}; + + end + + k=k+1; +end + +[nr,nc]=size(data); + +for k1=1:nc +for k2=1:nc + subplot(nc,nc,k1*nc+k2-nc) + plot(data(:,k1),data(:,k2),'d') +end +end + +R = corrcoef(data); + + diff --git a/inst/cov.m b/inst/cov.m new file mode 100644 index 0000000..843873a --- /dev/null +++ b/inst/cov.m @@ -0,0 +1,95 @@ +function CC = cov(X,Y,Mode) +% COV covariance matrix +% X and Y can contain missing values encoded with NaN. +% NaN's are skipped, NaN do not result in a NaN output. +% The output gives NaN only if there are insufficient input data +% The mean is removed from the data. +% +% Remark: for data contains missing values, the resulting +% matrix might not be positiv definite, and its elements have magnitudes +% larger than one. This ill-behavior is more likely for small sample +% sizes, but there is no garantee that the result "behaves well" for larger +% sample sizes. If you want the a "well behaved" result (i.e. positive +% definiteness and magnitude of elements not larger than 1), use CORRCOEF. +% However, COV is faster than CORRCOEF and might be good enough in some cases. +% +% C = COV(X [,Mode]); +% calculates the (auto-)correlation matrix of X +% C = COV(X,Y [,Mode]); +% calculates the crosscorrelation between X and Y. +% C(i,j) is the correlation between the i-th and jth +% column of X and Y, respectively. +% NOTE: Octave and Matlab have (in some special cases) incompatible implemenations. +% This implementation follows Octave. If the result could be ambigous or +% incompatible, a warning will be presented in Matlab. To avoid this warning use: +% a) use COV([X(:),Y(:)]) if you want the traditional Matlab result. +% b) use C = COV([X,Y]), C = C(1:size(X,2),size(X,2)+1:size(C,2)); if you want to be compatible with this software. +% +% Mode = 0 [default] scales C by (N-1) +% Mode = 1 scales C by N. +% +% see also: COVM, COR, CORRCOEF, SUMSKIPNAN +% +% REFERENCES: +% http://mathworld.wolfram.com/Covariance.html + +% $Id$ +% Copyright (C) 2000-2003,2005,2009,2011,2012 by Alois Schloegl +% This function is part of the NaN-toolbox +% http://pub.ist.ac.at/~schloegl/matlab/NaN/ + +% This program is free software; you can redistribute it and/or modify +% it under the terms of the GNU General Public License as published by +% the Free Software Foundation; either version 2 of the License, or +% (at your option) any later version. +% +% This program is distributed in the hope that it will be useful, +% but WITHOUT ANY WARRANTY; without even the implied warranty of +% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +% GNU General Public License for more details. +% +% You should have received a copy of the GNU General Public License +% along with this program; If not, see . + + +if nargin==1 + Mode = 0; + Y = []; +elseif nargin==2, + % if all(size(Y)==1) & any(Y==[0,1]); % This is not compatible with octave + % short-circuit evaluation is required + % but for compatibility to matlab, && is avoided + SW = all(size(Y)==1); + if SW, SW = any(Y==[0,1]); end; + if SW, + Mode = Y; + Y = []; + else + Mode = 0; + end; +elseif nargin==3, + +else + fprintf(2,'Error COV: invalid number of arguments\n'); +end; + +if ~exist('OCTAVE_VERSION','builtin') && ~isempty(Y) && (size(X,2)+size(Y,2)~=2), + % COV in Matlab is differently defined than COV in Octave. + % For compatibility reasons, this branch reflects the difference. + fprintf(2,'Warning NaN/COV: This kind of use of COV is discouraged because it produces different results for Matlab and Octave. \n'); + fprintf(2,' (a) the traditional Matlab result can be obtained with: C = COV([X(:),Y(:)]).\n'); + fprintf(2,' (b) the traditional Octave result can be obtained with: C = COV([X,Y]); C = C(1:size(X,2),size(X,2)+1:size(C,2)).\n'); + + if numel(Y)~=numel(X), + error('The lengths of X and Y must match.'); + end; + X = [X(:),Y(:)]; + Y = []; +end; + +if isempty(Y) + CC = covm(X,['D',int2str(Mode>0)]); +else + CC = covm(X,Y,['D',int2str(Mode>0)]); +end; + diff --git a/inst/covm.m b/inst/covm.m new file mode 100644 index 0000000..a1214dd --- /dev/null +++ b/inst/covm.m @@ -0,0 +1,254 @@ +function [CC,NN] = covm(X,Y,Mode,W) +% COVM generates covariance matrix +% X and Y can contain missing values encoded with NaN. +% NaN's are skipped, NaN do not result in a NaN output. +% The output gives NaN only if there are insufficient input data +% +% COVM(X,Mode); +% calculates the (auto-)correlation matrix of X +% COVM(X,Y,Mode); +% calculates the crosscorrelation between X and Y +% COVM(...,W); +% weighted crosscorrelation +% +% Mode = 'M' minimum or standard mode [default] +% C = X'*X; or X'*Y correlation matrix +% +% Mode = 'E' extended mode +% C = [1 X]'*[1 X]; % l is a matching column of 1's +% C is additive, i.e. it can be applied to subsequent blocks and summed up afterwards +% the mean (or sum) is stored on the 1st row and column of C +% +% Mode = 'D' or 'D0' detrended mode +% the mean of X (and Y) is removed. If combined with extended mode (Mode='DE'), +% the mean (or sum) is stored in the 1st row and column of C. +% The default scaling is factor (N-1). +% Mode = 'D1' is the same as 'D' but uses N for scaling. +% +% C = covm(...); +% C is the scaled by N in Mode M and by (N-1) in mode D. +% [C,N] = covm(...); +% C is not scaled, provides the scaling factor N +% C./N gives the scaled version. +% +% see also: DECOVM, XCOVF + +% $Id$ +% Copyright (C) 2000-2005,2009 by Alois Schloegl +% This function is part of the NaN-toolbox +% http://pub.ist.ac.at/~schloegl/matlab/NaN/ + +% This program is free software; you can redistribute it and/or modify +% it under the terms of the GNU General Public License as published by +% the Free Software Foundation; either version 3 of the License, or +% (at your option) any later version. +% +% This program is distributed in the hope that it will be useful, +% but WITHOUT ANY WARRANTY; without even the implied warranty of +% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +% GNU General Public License for more details. +% +% You should have received a copy of the GNU General Public License +% along with this program; If not, see . + + +global FLAG_NANS_OCCURED; + +if nargin<3, + W = []; + if nargin==2, + if isnumeric(Y), + Mode='M'; + else + Mode=Y; + Y=[]; + end; + elseif nargin==1, + Mode = 'M'; + Y = []; + elseif nargin==0, + error('Missing argument(s)'); + end; + +elseif (nargin==3) && isnumeric(Y) && ~isnumeric(Mode); + W = []; + +elseif (nargin==3) && ~isnumeric(Y) && isnumeric(Mode); + W = Mode; + Mode = Y; + Y = []; + +elseif (nargin==4) && ~isnumeric(Mode) && isnumeric(Y); + ; %% ok +else + error('invalid input arguments'); +end; + +Mode = upper(Mode); + +[r1,c1]=size(X); +if ~isempty(Y) + [r2,c2]=size(Y); + if r1~=r2, + error('X and Y must have the same number of observations (rows).'); + end; +else + [r2,c2]=size(X); +end; + +persistent mexFLAG2; +persistent mexFLAG; +if isempty(mexFLAG2) + mexFLAG2 = exist('covm_mex','file'); +end; +if isempty(mexFLAG) + mexFLAG = exist('sumskipnan_mex','file'); +end; + + +if ~isempty(W) + W = W(:); + if (r1~=numel(W)) + error('Error COVM: size of weight vector does not fit number of rows'); + end; + %w = spdiags(W(:),0,numel(W),numel(W)); + %nn = sum(W(:)); + nn = sum(W); +else + nn = r1; +end; + + +if mexFLAG2 && mexFLAG && ~isempty(W), + %% the mex-functions here are much slower than the m-scripts below + %% however, the mex-functions support weighting of samples. + if isempty(FLAG_NANS_OCCURED), + %% mex-files require that FLAG_NANS_OCCURED is not empty, + %% otherwise, the status of NAN occurence can not be returned. + FLAG_NANS_OCCURED = logical(0); % default value + end; + + if any(Mode=='D') || any(Mode=='E'), + [S1,N1] = sumskipnan(X,1,W); + if ~isempty(Y) + [S2,N2] = sumskipnan(Y,1,W); + else + S2 = S1; N2 = N1; + end; + if any(Mode=='D'), % detrending mode + X = X - ones(r1,1)*(S1./N1); + if ~isempty(Y) + Y = Y - ones(r1,1)*(S2./N2); + end; + end; + end; + + if issparse(X) || issparse(Y), + fprintf(2,'sumskipnan: sparse matrix converted to full matrix\n'); + X=full(X); + Y=full(Y); + end; + + [CC,NN] = covm_mex(real(X), real(Y), FLAG_NANS_OCCURED, W); + %% complex matrices + if ~isreal(X) && ~isreal(Y) + [iCC,inn] = covm_mex(imag(X), imag(Y), FLAG_NANS_OCCURED, W); + CC = CC + iCC; + end; + if isempty(Y) Y = X; end; + if ~isreal(X) + [iCC,inn] = covm_mex(imag(X), real(Y), FLAG_NANS_OCCURED, W); + CC = CC - i*iCC; + end; + if ~isreal(Y) + [iCC,inn] = covm_mex(real(X), imag(Y), FLAG_NANS_OCCURED, W); + CC = CC + i*iCC; + end; + + if any(Mode=='D') && ~any(Mode=='1'), % 'D1' + NN = max(NN-1,0); + end; + if any(Mode=='E'), % extended mode + NN = [nn, N2; N1', NN]; + CC = [nn, S2; S1', CC]; + end; + + +elseif ~isempty(W), + + error('Error COVM: weighted COVM requires sumskipnan_mex and covm_mex but it is not available'); + + %% weighted covm without mex-file support + %% this part is not working. + +elseif ~isempty(Y), + if (~any(Mode=='D') && ~any(Mode=='E')), % if Mode == M + NN = real(X==X)'*real(Y==Y); + FLAG_NANS_OCCURED = any(NN(:). + +% $Id$ +% Copyright (C) 2011 by Alois Schloegl +% This function is part of the NaN-toolbox +% http://pub.ist.ac.at/~schloegl/matlab/NaN/ + + +%%% TODO: implement as mex-function + +i = isnan(x); +x(i) = 0; + +if nargin==2, + x = cumsum(x,DIM); + x(i) = NaN; +elseif nargin==1, + x = cumsum(x); + x(i) = NaN; +else + help cumsumskipnan +end; + + + diff --git a/inst/decovm.m b/inst/decovm.m new file mode 100644 index 0000000..c504c0a --- /dev/null +++ b/inst/decovm.m @@ -0,0 +1,77 @@ +function [mu,sd,COV,xc,M,R2]=decovm(XCN,NN) +% decompose extended covariance matrix into mean (mu), +% standard deviation, the (pure) Covariance (COV), +% correlation (xc) matrix and the correlation coefficients R2. +% NaN's are condsidered as missing values. +% [mu,sd,COV,xc,N,R2]=decovm(ECM[,NN]) +% +% ECM is the extended covariance matrix +% NN is the number of elements, each estimate (in ECM) is based on +% +% see also: MDBC, COVM, R2 + +% Copyright (c) 1999-2002,2009,2019 by Alois Schloegl +% This function is part of the NaN-toolbox +% http://pub.ist.ac.at/~schloegl/matlab/NaN/ + +% This program is free software; you can redistribute it and/or +% modify it under the terms of the GNU General Public License +% as published by the Free Software Foundation; either version 3 +% of the License, or (at your option) any later version. +% +% This program is distributed in the hope that it will be useful, +% but WITHOUT ANY WARRANTY; without even the implied warranty of +% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +% GNU General Public License for more details. +% +% You should have received a copy of the GNU General Public License +% along with this program; if not, write to the Free Software +% Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA 02110-1301, USA. + +[r,c]=size(XCN); +if r~=c, + fprintf(2,'Warning DECOVM: input argument is not a square matrix\n'); + XCN = covm(XCN,'E'); + c = c + 1; +else + M = XCN(1,1); + if nargin<2, + XCN = XCN/(XCN(1,1)); + else %if nargin==2 + XCN = XCN./(NN); + end; + + if any(isnan(XCN(:))), + warning('DECOVM: Extended Covariance Matrix should not contain NaN''s'); + end; + if 0, %det(XCN)<0; % check removed for performance reasons + warning('DECOVM: Extended Covariance Matrix must be non-negative definite'); + end; +end; + +mu = XCN(1,2:c); +COV = XCN(2:c,2:c) - mu'*mu; +sd = sqrt(diag(COV))'; +if nargout<4, return; end; +xc = COV./(sd'*sd); +M = XCN(1,1); +if nargout<6, return; end; +R2 = xc.*xc; + +return; + +mu=XCN(2:N,1)/XCN(1,1); +COV=(XCN(2:N,2:N)/XCN(1,1)-XCN(2:N,1)*XCN(1,2:N)/XCN(1,1)^2); +sd=sqrt(diag(COV)); +xc=COV./(sd*sd'); + +% function [ECM] = ecovm(signal); +% Generates extended Covariance matrix, +% ECM= [l signal]'*[l signal]; % l is a matching column of 1's +% ECM is additive, i.e. it can be applied to subsequent blocks and summed up afterwards +% [ECM1] = ecovm(s1); +% [ECM2] = ecovm(s1); +% [ECM] = ecovm([s1;s2]); +% ECM1+ECM2==ECM; +% +% SS=sum(signal); ECM=[[size(signal,1),SS];[SS',signal'*signal]]; diff --git a/inst/detrend.m b/inst/detrend.m new file mode 100644 index 0000000..5be0e29 --- /dev/null +++ b/inst/detrend.m @@ -0,0 +1,152 @@ +function [X,T]=detrend(t,X,p) +% DETREND removes the trend from data, NaN's are considered as missing values +% +% DETREND is fully compatible to previous Matlab and Octave DETREND with the following features added: +% - handles NaN's by assuming that these are missing values +% - handles unequally spaced data +% - second output parameter gives the trend of the data +% - compatible to Matlab and Octave +% +% [...]=detrend([t,] X [,p]) +% removes trend for unequally spaced data +% t represents the time points +% X(i) is the value at time t(i) +% p must be a scalar +% +% [...]=detrend(X,0) +% [...]=detrend(X,'constant') +% removes the mean +% +% [...]=detrend(X,p) +% removes polynomial of order p (default p=1) +% +% [...]=detrend(X,1) - default +% [...]=detrend(X,'linear') +% removes linear trend +% +% [X,T]=detrend(...) +% +% X is the detrended data +% T is the removed trend +% +% see also: SUMSKIPNAN, ZSCORE + +% This program is free software; you can redistribute it and/or modify +% it under the terms of the GNU General Public License as published by +% the Free Software Foundation; either version 3 of the License, or +% (at your option) any later version. +% +% This program is distributed in the hope that it will be useful, +% but WITHOUT ANY WARRANTY; without even the implied warranty of +% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +% GNU General Public License for more details. +% +% You should have received a copy of the GNU General Public License +% along with this program; If not, see . + + +% Copyright (C) 1995, 1996 Kurt Hornik +% Copyright (C) 2001,2007,2008,2019 by Alois Schloegl +% This function is part of the NaN-toolbox +% http://pub.ist.ac.at/~schloegl/matlab/NaN/ + + +if (nargin == 1) + p = 1; + X = t; + t = []; +elseif (nargin == 2) + if strcmpi(X,'constant'), + p = 0; + X = t; + t = []; + elseif strcmpi(X,'linear'), + p = 1; + X = t; + t = []; + elseif ischar(X) + error('unknown 2nd input argument'); + elseif all(size(X)==1), + p = X; + X = t; + t = []; + else + p = 1; + end; +elseif (nargin == 3) + if ischar(X), + warning('input arguments are not supported'); + end; + +elseif (nargin > 3) + fprintf (1,'usage: detrend (x [, p])\n'); +end; + +% check data, must be in culomn order +[m, n] = size (X); +if (m == 1) + X = X'; + r=n; +else + r=m; +end +% check time scale +if isempty(t), + t = (1:r).'; % make time scale +elseif ~all(size(t)==size(X)) + t = t(:); +end; +% check dimension of t and X +if ~all(size(X,1)==size(t,1)) + fprintf (2,'detrend: size(t,1) must same as size(x,1) \n'); +end; +% check the order of the polynomial +if ( any(size(p)>1) || any(p - round (p)) || ~(p >= 0) ) + fprintf (2,'detrend: p must be a nonnegative integer\n'); +end + +if (nargout>1) , % needs more memory + T = zeros(size(X))+nan; + + if (size(t,2)>1), % for multiple time scales + for k=1:size(X,2), + idx=find(~isnan(X(:,k))); + b = (t(:,k) * ones (1, p + 1)) .^ (ones (size(t,1),1) * (0 : p)); + T(:,k) = b * (b(idx,:) \ X(idx,k)); + end; + + else % if only one time scale is used + b = (t * ones (1, p + 1)) .^ (ones (length(t),1) * (0 : p)); + for k=1:size(X,2), + idx=find(~isnan(X(:,k))); + T(:,k) = b * (b(idx,:) \ X(idx,k)); + end; + end; + X = X-T; + + if (m == 1) + X = X'; + T = T'; + end +else % needs less memory + if (size(t,2)>1), % for multiple time scales + for k = 1:size(X,2), + idx = find(~isnan(X(:,k))); + b = (t(idx,k) * ones (1, p + 1)) .^ (ones (length(idx),1) * (0 : p)); + X(idx,k) = X(idx,k) - b * (b \ X(idx,k)); + end; + else % if only one time scale is used + b = (t * ones (1, p + 1)) .^ (ones (length(t),1) * (0 : p)); + for k = 1:size(X,2), + idx = find(~isnan(X(:,k))); + X(idx,k) = X(idx,k) - b(idx,:) * (b(idx,:) \ X(idx,k)); + end; + end; + + if (m == 1) + X = X'; + end +end; + + + diff --git a/inst/ecdf.m b/inst/ecdf.m new file mode 100644 index 0000000..6ad655e --- /dev/null +++ b/inst/ecdf.m @@ -0,0 +1,82 @@ +function [F,X]=ecdf(h,Y) +% ECDF empirical cumulative function +% NaN's are considered Missing values and are ignored. +% +% [F,X] = ecdf(Y) +% calculates empirical cumulative distribution functions (i.e Kaplan-Meier estimate) +% ecdf(Y) +% ecdf(gca,Y) +% without output arguments plots the empirical cdf, in axis gca. +% +% Y input data +% must be a vector or matrix, in case Y is a matrix, the ecdf for every column is computed. +% +% see also: HISTO2, HISTO3, PERCENTILE, QUANTILE + +% Copyright (C) 2009,2010,2019 by Alois Schloegl +% This function is part of the NaN-toolbox +% http://pub.ist.ac.at/~schloegl/matlab/NaN/ + +% This program is free software; you can redistribute it and/or modify +% it under the terms of the GNU General Public License as published by +% the Free Software Foundation; either version 3 of the License, or +% (at your option) any later version. +% +% This program is distributed in the hope that it will be useful, +% but WITHOUT ANY WARRANTY; without even the implied warranty of +% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +% GNU General Public License for more details. +% +% You should have received a copy of the GNU General Public License +% along with this program; If not, see . + +if ~isscalar(h) || ~ishandle(h) || isstruct(h), + Y = h; + h = []; +end; + +DIM = []; + + SW = isstruct(Y); + if SW, SW = isfield(Y,'datatype'); end; + if SW, SW = strcmp(Y.datatype,'HISTOGRAM'); end; + if SW, + [yr,yc]=size(Y.H); + if ~isfield(Y,'N'); + Y.N = sum(Y.H,1); + end; + f = [zeros(1,yc);cumsum(Y.H,1)]; + for k=1:yc, + f(:,k)=f(:,k)/Y.N(k); + end; + t = [Y.X(1,:);Y.X]; + + elseif isnumeric(Y), + sz = size(Y); + if isempty(DIM), + DIM = min(find(sz>1)); + if isempty(DIM), DIM = 1; end; + end; + if DIM==2, Y=Y.'; DIM = 1; end; + + t = sort(Y,1); + t = [t(1,:);t]; + N = sum(~isnan(Y),1); + f = zeros(size(Y,1)+1,size(Y,2)); + for k=1:size(Y,2), + f(:,k)=[0:size(Y,1)]'/N(k); + end; + end; + + if nargout<1, + if ~isempty(h), axes(h); end; + stairs(t,f); + set(gca,'ylim',[0,1]); + else + F = f; + X = t; + end; + +%!assert(ecdf(1:2)*2==[0:2]') +%!assert(ecdf([1:2,NaN])*2==[0:3]') + diff --git a/inst/fishers_exact_test.m b/inst/fishers_exact_test.m new file mode 100644 index 0000000..8483f5a --- /dev/null +++ b/inst/fishers_exact_test.m @@ -0,0 +1,84 @@ +function p = fishers_exact_test(a,b,c,d) +% FISHERS_EXACT_TEST implements Fisher's exact test for the analysis of +% contincency tables e.g. "Lady tasting tea" experiment [1-6]. +% +% Usage: +% p = fishers_exact_test(H) +% p = fishers_exact_test(a,b,c,d) +% +% with H being a 2x2 matrix representing a contincency table H = [[a,b];[c,d]] +% and p is the resulting p-value. The implementation provides exact results, +% when (1) the symbolic toolbox (with vpa) is loaded, or (2) for small sample +% sizes. In the latter case, the result might be subject to the limited accuracy of +% floating point numbers for large sample sizes (a warning might be shown); +% in the case, the symbolic toolbox should be loaded. +% +% References: +% [1] https://en.wikipedia.org/wiki/Fisher%27s_exact_test +% [2] https://en.wikipedia.org/wiki/Lady_tasting_tea +% [3] Fisher, R. A. (1922). "On the interpretation of χ2 from contingency +% tables, and the calculation of P". +% Journal of the Royal Statistical Society. 85 (1): 87–94. +% doi:10.2307/2340521. JSTOR 2340521. +% [4] Fisher, R.A. (1954). Statistical Methods for Research Workers. +% Oliver and Boyd. ISBN 0-05-002170-2. +% [5] Agresti, Alan (1992). "A Survey of Exact Inference for Contingency Tables". +% Statistical Science. 7 (1): 131–153. +% CiteSeerX 10.1.1.296.874. doi:10.1214/ss/1177011454. JSTOR 2246001. +% [6] Fisher, Sir Ronald A. (1956) [The Design of Experiments (1935)]. +% "Mathematics of a Lady Tasting Tea". In James Roy Newman (ed.). +% The World of Mathematics, volume 3. Courier Dover Publications. +% ISBN 978-0-486-41151-4. + +% Copyright (C) 2019 Alois Schloegl +% This is part of the BIOSIG-toolbox http://biosig.sf.net/ +% +% This library is free software; you can redistribute it and/or +% modify it under the terms of the GNU Library General Public +% License as published by the Free Software Foundation; either +% version 3 of the License, or (at your option) any later version. +% +% This library is distributed in the hope that it will be useful, +% but WITHOUT ANY WARRANTY; without even the implied warranty of +% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +% Library General Public License for more details. +% +% You should have received a copy of the GNU Library General Public +% License along with this library; if not, write to the +% Free Software Foundation, Inc., 59 Temple Place - Suite 330, +% Boston, MA 02111-1307, USA. + +if (nargin==1) && isequal(size(a),[2,2]), + H = a; + a=H(1,1); + b=H(1,2); + c=H(2,1); + d=H(2,2); +elseif (nargin==4) && isscalar(a) && isscalar(b) && isscalar(c) && isscalar(d) + H=[[a,b];[c,d]]; +else + error('invalid input argument') +end + +try + % use symbolic package if available + a = vpa(a); + b = vpa(b); + c = vpa(c); + d = vpa(d); +end + +u = nchoosek(a+b,a); +v = nchoosek(c+d,c); +w = nchoosek(a+b+c+d,a+c); + +if strcmp(lastwarn(),'nchoosek: possible loss of precision') + printf('It is recommended to load the symbolic package, and re-run fishers_exact_test.\n') +end + +p = u * v / w; + +%!assert((double(fishers_exact_test(1,1,1,1))-2/3) +% This function is part of the NaN-toolbox +% http://pub.ist.ac.at/~schloegl/matlab/NaN/ + + +persistent FLAG_ACCURACY_LEVEL; + +%% if strcmp(version,'3.6'), FLAG_ACCURACY_LEVEL=1; end; %% hack for the use with Freemat3.6 + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% set the default accuracy level for your platform, ACCTEST might help to determine the optimum for your platform. +%% If you use Matlab, use level 0 or 2; 1 and 3 are much slower but do not show a better accuracy +%% Octave seems to be able to use all 4 levels, were the differences of accuracy between succeeding levels become smaller +DEFAULT_ACCURACY_LEVEL = 0; %% maximum speed, accuracy sufficient for most needs. +%% DEFAULT_ACCURACY_LEVEL = 2; %% slower, but better accuracy for: AMDx64 Opteron, Phenom, Intel Pentium +%% DEFAULT_ACCURACY_LEVEL = 1; %% slower, but better accuracy for: Octave on Intel Atom (no improvement with Matlab, just slower) +%% DEFAULT_ACCURACY_LEVEL = 3; %% similar accuracy than 1 or 2 (depending on platform) but even slower. +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + + +%%% set DEFAULT value of FLAG +if isempty(FLAG_ACCURACY_LEVEL), + FLAG_ACCURACY_LEVEL = DEFAULT_ACCURACY_LEVEL; +end; + +if nargin>0, + if (i>3), i=3; end; + if (i<0), i=0; end; + FLAG_ACCURACY_LEVEL = double(i); +end; +FLAG = FLAG_ACCURACY_LEVEL; + diff --git a/inst/flag_implicit_significance.m b/inst/flag_implicit_significance.m new file mode 100644 index 0000000..18cab45 --- /dev/null +++ b/inst/flag_implicit_significance.m @@ -0,0 +1,67 @@ +function alpha=flag_implicit_significance(i) +% The use of FLAG_IMPLICIT_SIGNIFICANCE is in experimental state. +% flag_implicit_significance might even become obsolete. +% +% FLAG_IMPLICIT_SIGNIFICANCE sets and gets default alpha (level) of any significance test +% The default alpha-level is stored in the global variable FLAG_implicit_significance +% The idea is that the significance must not be assigned explicitely. +% This might yield more readable code. +% +% Choose alpha low enough, because in alpha*100% of the cases, you will +% reject the Null hypothesis just by change. For this reason, the default +% alpha is 0.01. +% +% flag_implicit_significance(0.01) +% sets the alpha-level for the significance test +% +% alpha = flag_implicit_significance() +% gets default alpha +% +% flag_implicit_significance(alpha) +% sets default alpha-level +% +% alpha = flag_implicit_significance(alpha) +% gets and sets alpha +% +% features: +% - compatible to Matlab and Octave +% +% see also: CORRCOEF, PARTCORRCOEF + +% $Id$ +% Copyright (C) 2000-2002,2009,2010 by Alois Schloegl +% This function is part of the NaN-toolbox +% http://pub.ist.ac.at/~schloegl/matlab/NaN/ +% +% This program is free software; you can redistribute it and/or modify +% it under the terms of the GNU General Public License as published by +% the Free Software Foundation; either version 3 of the License, or +% (at your option) any later version. +% +% This program is distributed in the hope that it will be useful, +% but WITHOUT ANY WARRANTY; without even the implied warranty of +% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +% GNU General Public License for more details. +% +% You should have received a copy of the GNU General Public License +% along with this program; If not, see . + + +persistent FLAG_implicit_significance; +DEFAULT_ALPHA = 0.01; + +%%% check whether FLAG was already defined +if ~exist('FLAG_implicit_significance','var'), + FLAG_implicit_significance = DEFAULT_ALPHA; % default value +end; +if isempty(FLAG_implicit_significance), + FLAG_implicit_significance = DEFAULT_ALPHA; % default value +end; + +if nargin>0, + fprintf(2,'Warning: flag_implicit_significance is in an experimental state\n'); + fprintf(2,'It might become obsolete.\n'); + FLAG_implicit_significance = i; +end; + +alpha = FLAG_implicit_significance; diff --git a/inst/flag_implicit_skip_nan.m b/inst/flag_implicit_skip_nan.m new file mode 100644 index 0000000..ab1e80b --- /dev/null +++ b/inst/flag_implicit_skip_nan.m @@ -0,0 +1,65 @@ +function FLAG = flag_implicit_skip_nan(i) +% FLAG_IMPLICIT_SKIP_NAN sets and gets default mode for handling NaNs +% 1 skips NaN's (the default mode if no mode is set) +% 0 NaNs are propagated; input NaN's give NaN's at the output +% +% FLAG = flag_implicit_skip_nan() +% gets current mode +% +% flag_implicit_skip_nan(FLAG) % sets mode +% +% prevFLAG = flag_implicit_skip_nan(nextFLAG) +% gets previous set FLAG and sets FLAG for the future +% flag_implicit_skip_nan(prevFLAG) +% resets FLAG to previous mode +% +% It is used in: +% SUMSKIPNAN, MEDIAN, QUANTILES, TRIMEAN +% and affects many other functions like: +% CENTER, KURTOSIS, MAD, MEAN, MOMENT, RMS, SEM, SKEWNESS, +% STATISTIC, STD, VAR, ZSCORE etc. +% +% The mode is stored in the global variable FLAG_implicit_skip_nan +% It is recommended to use flag_implicit_skip_nan(1) as default and +% flag_implicit_skip_nan(0) should be used for exceptional cases only. +% This feature might disappear without further notice, so you should really not +% rely on it. + + +% This program is free software; you can redistribute it and/or modify +% it under the terms of the GNU General Public License as published by +% the Free Software Foundation; either version 3 of the License, or +% (at your option) any later version. +% +% This program is distributed in the hope that it will be useful, +% but WITHOUT ANY WARRANTY; without even the implied warranty of +% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +% GNU General Public License for more details. +% +% You should have received a copy of the GNU General Public License +% along with this program; if not, write to the Free Software +% Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + +% $Id$ +% Copyright (C) 2001-2003,2009 by Alois Schloegl +% This function is part of the NaN-toolbox +% http://pub.ist.ac.at/~schloegl/matlab/NaN/ + + +persistent FLAG_implicit_skip_nan; + +%% if strcmp(version,'3.6'), FLAG_implicit_skip_nan=(1==1); end; %% hack for the use with Freemat3.6 + +%%% set DEFAULT value of FLAG +if isempty(FLAG_implicit_skip_nan), + FLAG_implicit_skip_nan = (1==1); %logical(1); % logical.m not available on 2.0.16 +end; + +FLAG = FLAG_implicit_skip_nan; +if nargin>0, + FLAG_implicit_skip_nan = (i~=0); %logical(i); %logical.m not available in 2.0.16 + if (~i) + warning('flag_implicit_skipnan(0): You are warned!!! You have turned off skipping NaN in sumskipnan. This is not recommended. Make sure you really know what you do.') + end; +end; + diff --git a/inst/flag_nans_occured.m b/inst/flag_nans_occured.m new file mode 100644 index 0000000..f3027c3 --- /dev/null +++ b/inst/flag_nans_occured.m @@ -0,0 +1,41 @@ +function [flag]=flag_nans_occured() +% FLAG_NANS_OCCURED checks whether the last call(s) to sumskipnan or covm +% contained any not-a-numbers in the input argument. Because many other +% functions like mean, std, etc. are also using sumskipnan, +% also these functions can be checked for NaN's in the input data. +% +% A call to FLAG_NANS_OCCURED() resets also the flag whether NaN's occured. +% Only sumskipnan or covm can set the flag again. +% +% see also: SUMSKIPNAN, COVM + +% $Id$ +% Copyright (C) 2009 by Alois Schloegl +% This function is part of the NaN-toolbox +% http://pub.ist.ac.at/~schloegl/matlab/NaN/ + +% This program is free software: you can redistribute it and/or modify +% it under the terms of the GNU General Public License as published by +% the Free Software Foundation, either version 3 of the License, or +% (at your option) any later version. +% +% This program is distributed in the hope that it will be useful, +% but WITHOUT ANY WARRANTY; without even the implied warranty of +% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +% GNU General Public License for more details. +% +% You should have received a copy of the GNU General Public License +% along with this program. If not, see . + +global FLAG_NANS_OCCURED; + +%%% check whether FLAG was already defined +if isempty(FLAG_NANS_OCCURED), + FLAG_NANS_OCCURED = logical(0); % default value +end; + +flag = FLAG_NANS_OCCURED; % return value + +FLAG_NANS_OCCURED = logical(0); % reset flag + +return; diff --git a/inst/fss.m b/inst/fss.m new file mode 100644 index 0000000..e0780e9 --- /dev/null +++ b/inst/fss.m @@ -0,0 +1,144 @@ +function [idx,score] = fss(D,cl,N,MODE) +% FSS - feature subset selection and feature ranking +% the method is motivated by the max-relevance-min-redundancy (mRMR) +% approach [1]. However, the default method uses partial correlation, +% which has been developed from scratch. PCCM [3] describes +% a similar idea, but is more complicated. +% An alternative method based on FSDD is implemented, too. +% +% [idx,score] = fss(D,cl) +% [idx,score] = fss(D,cl,MODE) +% [idx,score] = fss(D,cl,MODE) +% +% D data - each column represents a feature +% cl classlabel +% Mode 'Pearson' [default] correlation +% 'rank' correlation +% 'FSDD' feature selection algorithm based on a distance discriminant [2] +% %%% 'MRMR','MID','MIQ' max-relevance, min redundancy [1] - not supported yet. +% +% score score of the feature +% idx ranking of the feature +% [tmp,idx]=sort(-score) +% +% see also: TRAIN_SC, XVAL, ROW_COL_DELETION +% +% REFERENCES: +% [1] Peng, H.C., Long, F., and Ding, C., +% Feature selection based on mutual information: criteria of max-dependency, max-relevance, and min-redundancy, +% IEEE Transactions on Pattern Analysis and Machine Intelligence, +% Vol. 27, No. 8, pp.1226-1238, 2005. +% [2] Jianning Liang, Su Yang, Adam Winstanley, +% Invariant optimal feature selection: A distance discriminant and feature ranking based solution, +% Pattern Recognition, Volume 41, Issue 5, May 2008, Pages 1429-1439. +% ISSN 0031-3203, DOI: 10.1016/j.patcog.2007.10.018. +% [3] K. Raghuraj Rao and S. Lakshminarayanan +% Partial correlation based variable selection approach for multivariate data classification methods +% Chemometrics and Intelligent Laboratory Systems +% Volume 86, Issue 1, 15 March 2007, Pages 68-81 +% http://dx.doi.org/10.1016/j.chemolab.2006.08.007 + +% $Id$ +% Copyright (C) 2009,2010 by Alois Schloegl +% This function is part of the NaN-toolbox +% http://pub.ist.ac.at/~schloegl/matlab/NaN/ + +% This program is free software; you can redistribute it and/or modify +% it under the terms of the GNU General Public License as published by +% the Free Software Foundation; either version 3 of the License, or +% (at your option) any later version. +% +% This program is distributed in the hope that it will be useful, +% but WITHOUT ANY WARRANTY; without even the implied warranty of +% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +% GNU General Public License for more details. +% +% You should have received a copy of the GNU General Public License +% along with this program; If not, see . + + if nargin<3 + MODE = []; + N = []; + elseif ischar(N) + MODE = N; + N = []; + elseif nargin<4, + MODE = []; + end + + if isempty(N), N = size(D,2); end + score = repmat(NaN,1,size(D,2)); + + if 0, %strcmpi(MODE,'MRMR') || strcmpi(MODE,'MID') || strcmpi(MODE,'MIQ'); + %% RMRM/MID/MIQ is not supported + %% TODO: FIXME + + [tmp,t] = sort([cl,D]); + cl = t(:,1:size(cl,2)); + D = t(:,1:size(D,2)); + for k = 1:N, + V(k) = mi(cl, D(:,k)); + + for m = 1:N, + W(k,m) = mi(D(:,m), D(:,k)); + end + MID(k) = V(k) - mean(W(k,:)); + MIQ(k) = V(k) / mean(W(k,:)); + end + + if strcmpi(MODE,'MIQ') + [score,idx] = sort(MIQ,[],'descend'); + else + [score,idx] = sort(MID,[],'descend'); + end + + elseif strcmpi(MODE,'FSDD'); + [b,i,j]=unique(cl); + for k=1:length(b) + n(k,1) = sum(j==k); + m(k,:) = mean(D(j==k,:),1); + v(k,:) = var(D(j==k,:),1); + end + m0 = mean(m,1,n); + v0 = var(D,[],1); + s2 = mean(m.^2,1,n) - m0.^2; + score = (s2 - 2*mean(v,1,n)) ./ v0; + [t,idx] = sort(-score); + + elseif isempty(MODE) || strcmpi(MODE,'rank') || strcmpi(MODE,'Pearson') + cl = cat2bin(cl); + if strcmpi(MODE,'rank'), + [tmp,D] = sort(D,1); + end + idx = repmat(NaN,1,N); + for k = 1:N, + f = isnan(score); + + %%%%% compute partial correlation (X,Y|Z) + % r = partcorrcoef(cl, D(:,f), D(:,~f)); % obsolete, not very robust + + %% this is a more robust version + X = cl; Y = D(:,f); Z = D(:,~f); + if (k>1) + X = X-Z*(Z\X); + Y = Y-Z*(Z\Y); + end + r = corrcoef(X,Y); + + [s,ix] = max(sumsq(r,1)); + f = find(f); + idx(k) = f(ix); + score(idx(k)) = s; + end + + end +end + +function I = mi(x,y) + ix = ~any(isnan([x,y]),2); + H = sparse(x(ix),y(ix)); + pij = H./sum(ix); + Iij = pij.*log2(pij./(sum(pij,2)*sum(pij,1))); + Iij(isnan(Iij)) = 0; + I = sum(Iij(:)); +end diff --git a/inst/geomean.m b/inst/geomean.m new file mode 100644 index 0000000..69a8040 --- /dev/null +++ b/inst/geomean.m @@ -0,0 +1,58 @@ +function [y] = geomean(x,DIM,W) +% GEOMEAN calculates the geomentric mean of data elements. +% +% y = geomean(x [,DIM [,W]]) is the same as +% y = mean(x,'G' [,DIM]) +% +% DIM dimension +% 1 STD of columns +% 2 STD of rows +% default or []: first DIMENSION, with more than 1 element +% W weights to compute weighted mean (default: []) +% if W=[], all weights are 1. +% number of elements in W must match size(x,DIM) +% +% features: +% - can deal with NaN's (missing values) +% - weighting of data +% - dimension argument also in Octave +% - compatible to Matlab and Octave +% +% see also: SUMSKIPNAN, MEAN, HARMMEAN +% +% This program is free software; you can redistribute it and/or modify +% it under the terms of the GNU General Public License as published by +% the Free Software Foundation; either version 3 of the License, or +% (at your option) any later version. +% +% This program is distributed in the hope that it will be useful, +% but WITHOUT ANY WARRANTY; without even the implied warranty of +% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +% GNU General Public License for more details. +% +% You should have received a copy of the GNU General Public License +% along with this program; If not, see . + + +% $Id$ +% Copyright (C) 2000-2002,2009 by Alois Schloegl +% This is part of the NaN-toolbox. For more details see +% http://pub.ist.ac.at/~schloegl/matlab/NaN/ + + +if nargin<2 + DIM=min(find(size(x)>1)); + if isempty(DIM), DIM=1; end; +end +if nargin<3 + W = []; +end; + +[y, n] = sumskipnan(log(x),DIM,W); +y = exp (y./n); + +%!assert(geomean([1,2,4,NaN]) == 2) +%!assert(geomean([1,2,1/2,NaN]) == 1) +%!assert(geomean([1,2,0,NaN]) == 0) +%!assert(geomean([1,2,Inf,NaN]) == Inf) + diff --git a/inst/gini.m b/inst/gini.m new file mode 100644 index 0000000..bd3d905 --- /dev/null +++ b/inst/gini.m @@ -0,0 +1,35 @@ +function G = gini(data) +% GINI computes the gini-coefficient [1] using by +% computing the L-moments [2]. +% +% USAGE: +% G = gini(data) +% +% +% +% References: +% [1] https://en.wikipedia.org/wiki/Gini_coefficient +% [2] https://en.wikipedia.org/wiki/L-moment + +% Copyright (C) 2019,2020 by Alois Schlögl +% This function is part of the NaN-toolbox +% http://pub.ist.ac.at/~schloegl/matlab/NaN/ + +%% This program is free software: you can redistribute it and/or +%% modify it under the terms of the GNU General Public License as +%% published by the Free Software Foundation, either version 3 of the +%% License, or (at your option) any later version. +%% +%% This program is distributed in the hope that it will be useful, but +%% WITHOUT ANY WARRANTY; without even the implied warranty of +%% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +%% General Public License for more details. +%% +%% You should have received a copy of the GNU General Public License +%% along with this program; see the file COPYING. If not, see +%% . + + +L = lmom(data,2); +G = L(2)/L(1); + diff --git a/inst/gscatter.m b/inst/gscatter.m new file mode 100644 index 0000000..4ec2477 --- /dev/null +++ b/inst/gscatter.m @@ -0,0 +1,97 @@ +function [h] = gscatter(x,y,group,clr,sym,siz,doleg,xname,yname) +% GSCATTER scatter plot of groups +% +% gscatter(x,y,group) +% gscatter(x,y,group,clr,sym,siz) +% gscatter(x,y,group,clr,sym,siz,doleg) +% gscatter(x,y,group,clr,sym,siz,doleg,xname,yname) +% h = gscatter(...) +% +% x,y, group: vectors with equal length +% clf: color vector, default 'bgrcmyk' +% sym: symbol, default '.' +% siz: size of Marker +% doleg: 'on' (default) shows legend, 'off' turns of legend +% xname, yname: name of axis +% +% +% see also: ecdf, cdfplot +% +% References: + +% Copyright (C) 2009,2019 by Alois Schloegl +% This function is part of the NaN-toolbox +% http://pub.ist.ac.at/~schloegl/matlab/NaN/ + +% This program is free software; you can redistribute it and/or +% modify it under the terms of the GNU General Public License +% as published by the Free Software Foundation; either version 3 +% of the License, or (at your option) any later version. +% +% This program is distributed in the hope that it will be useful, +% but WITHOUT ANY WARRANTY; without even the implied warranty of +% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +% GNU General Public License for more details. +% +% You should have received a copy of the GNU General Public License +% along with this program; if not, write to the Free Software +% Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA 02110-1301, USA. + +[b,i,j] = unique(group); + +if nargin<3 + help gscatter; + error('invalid number of arguments;') +end; +if nargin<4 + clr = []; +end +if nargin<5 + sym = []; +end +if nargin<6 + siz = []; +end +if nargin<7 + doleg = []; +end +if nargin<8 + xname = []; +end +if nargin<9 + yname = []; +end; +if isempty(clr), clr='bgrcmyk'; end; +if isempty(sym), sym='.'; end; +if isempty(doleg), doleg='on'; end; + +for k=1:length(b); + %ix = find(k==j); + c = clr(mod(k-1,length(clr))+1); + s = sym(mod(k-1,length(sym))+1); + hh(k) = plot(x(k==j),y(k==j),[c,s]); + if ~isempty(siz) + z = siz(mod(k-1,length(siz))+1); + set(hh(k),'MarkerSize',z); + end + hold on; +end; +hold off; + +if strcmpi(doleg,'off') + if isnumeric(b) + b={num2str(b(:))}; + end; + legend(b,'box','off'); +end; +if ~isempty(xname) + xlabel(xname); +end; +if ~isempty(yname) + ylabel(yname); +end; + +if nargout>0, + h = hh; +end; + diff --git a/inst/harmmean.m b/inst/harmmean.m new file mode 100644 index 0000000..b9408b4 --- /dev/null +++ b/inst/harmmean.m @@ -0,0 +1,59 @@ +function [y] = harmmean(x,DIM,W) +% HARMMEAN calculates the harmonic mean of data elements. +% The harmonic mean is the inverse of the mean of the inverse elements. +% +% y = harmmean(x [,DIM [,W]]) is the same as +% y = mean(x,'H' [,DIM [,W]]) +% +% DIM dimension +% 1 STD of columns +% 2 STD of rows +% default or []: first DIMENSION, with more than 1 element +% W weights to compute weighted mean (default: []) +% if W=[], all weights are 1. +% number of elements in W must match size(x,DIM) +% +% features: +% - can deal with NaN's (missing values) +% - weighting of data +% - dimension argument also in Octave +% - compatible to Matlab and Octave +% +% see also: SUMSKIPNAN, MEAN, GEOMEAN +% + +% This program is free software; you can redistribute it and/or modify +% it under the terms of the GNU General Public License as published by +% the Free Software Foundation; either version 2 of the License, or +% (at your option) any later version. +% +% This program is distributed in the hope that it will be useful, +% but WITHOUT ANY WARRANTY; without even the implied warranty of +% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +% GNU General Public License for more details. +% +% You should have received a copy of the GNU General Public License +% along with this program; If not, see . + + +% Copyright (C) 2000-2019 by Alois Schloegl +% This is part of the NaN-toolbox. For more details see +% http://pub.ist.ac.at/~schloegl/matlab/NaN/ + + +if nargin<2 + DIM=min(find(size(x)>1)); + if isempty(DIM), DIM=1; end; +end; +if nargin<3 + W = []; +end; + +[y, n] = sumskipnan(1./x,DIM,W); +y = n./y; + +%!assert(harmmean([1,1/4,1,NaN]) == 0.5) +%!assert(harmmean([1,2,0,NaN]) == 0) +%!assert(harmmean([1,2,Inf,NaN]) == 2) +%!assert(harmmean([1,2,-Inf,NaN]) == 2) + diff --git a/inst/hist2res.m b/inst/hist2res.m new file mode 100644 index 0000000..73dd439 --- /dev/null +++ b/inst/hist2res.m @@ -0,0 +1,147 @@ +function [R]=hist2res(H,fun) +% Evaluates Histogram data +% [R]=hist2res(H) +% +% [y]=hist2res(H,fun) +% estimates fun-statistic +% +% fun 'mean' mean +% 'std' standard deviation +% 'var' variance +% 'sem' standard error of the mean +% 'rms' root mean square +% 'meansq' mean of squares +% 'sum' sum +% 'sumsq' sum of squares +% 'CM#' central moment of order # +% 'skewness' skewness +% 'kurtosis' excess coefficient (Fisher kurtosis) +% +% see also: NaN/statistic +% +% REFERENCES: +% [1] C.L. Nikias and A.P. Petropulu "Higher-Order Spectra Analysis" Prentice Hall, 1993. +% [2] C.E. Shannon and W. Weaver "The mathematical theory of communication" University of Illinois Press, Urbana 1949 (reprint 1963). +% [3] http://www.itl.nist.gov/ +% [4] http://mathworld.wolfram.com/ + +% This program is free software; you can redistribute it and/or +% modify it under the terms of the GNU General Public License +% as published by the Free Software Foundation; either version 2 +% of the License, or (at your option) any later version. +% +% This program is distributed in the hope that it will be useful, +% but WITHOUT ANY WARRANTY; without even the implied warranty of +% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +% GNU General Public License for more details. +% +% You should have received a copy of the GNU General Public License +% along with this program; if not, write to the Free Software +% Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA 02110-1301, USA. + +% $Id$ +% Copyright (c) 1996-2002,2006 by Alois Schloegl +% This function is part of the NaN-toolbox +% http://pub.ist.ac.at/~schloegl/matlab/NaN/ + + +if strcmp(H.datatype,'HISTOGRAM'), + +elseif strcmp(H.datatype,'qc:histo') + HDR = H; + if isfield(H,'THRESHOLD'), + TH = H.THRESHOLD; + else + TH = repmat([-inf,inf],HDR.NS,1); + end; + HIS = H.HIS; + + % remove overflowing samples + HIS.N = sumskipnan(HIS.H); + for k = 1:size(HIS.H,2); + t = HIS.X(:,min(k,size(HIS.X,2))); + HIS.H(xor(t<=min(TH(k,:)), t>=max(TH(k,:))),k) = 0; + end; + Nnew = sumskipnan(HIS.H); + R.ratio_lost = 1-Nnew./HIS.N; + HIS.N = Nnew; + + % scale into physical values + if H.FLAG.UCAL, + %t = HIS.X; + %for k=1:length(HDR.InChanSelect), + % HIS.X(:,k) = t(:,min(size(t,2),k))*HDR.Calib(k+1,k)+HDR.Calib(1,k); + %end; + HIS.X = [ones(size(HIS.X,1),1),repmat(HIS.X,1,size(HIS.H,2)./size(HIS.X,2))]*H.Calib; + end; + H = HIS; +else + fprintf(2,'ERROR: arg1 is not a histogram\n'); + return; +end; +if nargin<2, fun=[]; end; + +global FLAG_implicit_unbiased_estimation; +%%% check whether FLAG was already defined +if ~exist('FLAG_implicit_unbiased_estimation','var'), + FLAG_implicit_unbiased_estimation=[]; +end; +%%% set DEFAULT value of FLAG +if isempty(FLAG_implicit_unbiased_estimation), + FLAG_implicit_unbiased_estimation=logical(1); +end; + +sz = size(H.H)./size(H.X); +R.N = sumskipnan(H.H,1); +R.SUM = sumskipnan(H.H.*repmat(H.X,sz),1); +R.SSQ = sumskipnan(H.H.*repmat(H.X.*H.X,sz),1); +%R.S3P = sumskipnan(H.H.*repmat(H.X.^3,sz),1); % sum of 3rd power +R.S4P = sumskipnan(H.H.*repmat(H.X.^4,sz),1); % sum of 4th power +%R.S5P = sumskipnan(H.H.*repmat(H.X.^5,sz),1); % sum of 5th power + +R.MEAN = R.SUM./R.N; +R.MSQ = R.SSQ./R.N; +R.RMS = sqrt(R.MSQ); +R.SSQ0 = R.SSQ-R.SUM.*R.MEAN; % sum square of mean removed + +if FLAG_implicit_unbiased_estimation, + n1 = max(R.N-1,0); % in case of n=0 and n=1, the (biased) variance, STD and STE are INF +else + n1 = R.N; +end; + +R.VAR = R.SSQ0./n1; % variance (unbiased) +R.STD = sqrt(R.VAR); % standard deviation +R.SEM = sqrt(R.SSQ0./(R.N.*n1)); % standard error of the mean +R.SEV = sqrt(n1.*(n1.*R.S4P./R.N+(R.N.^2-2*R.N+3).*(R.SSQ./R.N).^2)./(R.N.^3)); % standard error of the variance +R.Coefficient_of_variation = R.STD./R.MEAN; + +R.CM2 = R.SSQ0./n1; +x = repmat(H.X,sz) - repmat(R.MEAN,size(H.X,1),1); +R.CM3 = sumskipnan(H.H.*(x.^3),1)./n1; +R.CM4 = sumskipnan(H.H.*(x.^4),1)./n1; +%R.CM5 = sumskipnan(H.H.*(x.^5),1)./n1; + +R.SKEWNESS = R.CM3./(R.STD.^3); +R.KURTOSIS = R.CM4./(R.VAR.^2)-3; +R.MAD = sumskipnan(H.H.*abs(x),1)./R.N; % mean absolute deviation + +H.PDF = H.H./H.N(ones(size(H.H,1),1),:); +status=warning('off'); +R.ENTROPY = -sumskipnan(H.PDF.*log2(H.PDF),1); +warning(status); +R.QUANT = repmat(min(diff(H.X,[],1)),1,size(H.H,2)/size(H.X,2)); +R.MAX = max(H.X); +R.MIN = min(H.X); +R.RANGE = R.MAX-R.MIN; + +if ~isempty(fun), + fun=upper(fun); + if strncmp(fun,'CM',2) + oo = str2double(fun(3:length(fun))); + R = sumskipnan(H.PDF.*(x.^oo),1); + else + R = getfield(R,fun); + end; +end; + diff --git a/inst/histo.m b/inst/histo.m new file mode 100644 index 0000000..af09c4a --- /dev/null +++ b/inst/histo.m @@ -0,0 +1,76 @@ +function [H,X]=histo(Y,Mode) +% HISTO calculates histogram for each column +% [H,X] = HISTO(Y,Mode) +% +% Mode +% 'rows' : frequency of each row +% '1x' : single bin-values +% 'nx' : separate bin-values for each column +% X are the bin-values +% H is the frequency of occurence of value X +% +% HISTO(Y) with no output arguments: +% plots the histogram bar(X,H) +% +% more histogram-based results can be obtained by HIST2RES2 +% +% see also: HISTO, HISTO2, HISTO3, HISTO4 +% + +% Copyright (C) 1996-2019 by Alois Schloegl +% This is part of the NaN-toolbox +% https://octave.sourceforge.io/nan/index.html +% https://pub.ist.ac.at/~schloegl/matlab/NaN/ +% +% This program is free software: you can redistribute it and/or modify +% it under the terms of the GNU General Public License as published by +% the Free Software Foundation, either version 3 of the License, or +% (at your option) any later version. +% +% This program is distributed in the hope that it will be useful, +% but WITHOUT ANY WARRANTY; without even the implied warranty of +% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +% GNU General Public License for more details. +% +% You should have received a copy of the GNU General Public License +% along with this program. If not, see . + +if nargin<2, + Mode='1x'; +end; +Mode=lower(Mode); + +if strcmp(Mode,'rows') + R = histo4(Y); + +elseif strcmp(Mode,'column') + R = histo4(Y'); + R.X = R.X'; + +elseif strcmp(Mode,'1x') + R = histo3(Y); + +elseif strcmp(Mode,'nx') + R = histo2(Y); + +end; + +H = R.H; +X = R.X; +if nargout == 0, + if any(size(X)==1), + if exist('OCTAVE_VERSION') < 5, + bar(R.X,R.H,'stacked'); + else + bar(R.X,R.H); + end + else + warning('2-dim X-values not supported\n') + %bar3(R.X,R.H); + end; +end; + + +%!assert(issorted(getfield(histo_mex([5;NaN;3;NaN;-1;inf;-inf;4]),'X'))) +%!assert(sum(getfield(histo_mex([5;NaN;3;NaN;NaN;-1;inf;-inf;4]),'H'))==9) + diff --git a/inst/histo2.m b/inst/histo2.m new file mode 100644 index 0000000..5503d9d --- /dev/null +++ b/inst/histo2.m @@ -0,0 +1,105 @@ +function R = histo2(Y, W) +% HISTO2 calculates histogram for multiple columns with separate bin values +% for each data column. +% +% R = HISTO2(Y) +% R = HISTO2(Y, W) +% Y data +% W weight vector containing weights of each sample, +% number of rows of Y and W must match. +% default W=[] indicates that each sample is weighted with 1. +% +% R = HISTO(...) +% R is a struct with th fields +% R.X the bin-values, bin-values are computed separately for each +% data column, thus R.X is a matrix, each column contains the +% the bin values of for each data column, unused elements are indicated with NaN. +% In order to have common bin values, use HISTO3. +% R.H is the frequency of occurence of value X +% R.N are the number of valid (not NaN) samples (i.e. sum of weights) +% +% more histogram-based results can be obtained by HIST2RES +% +% see also: HISTO, HISTO2, HISTO3, HISTO4 +% + +% Copyright (C) 1996-2019 by Alois Schloegl +% This is part of the NaN-toolbox +% https://octave.sourceforge.io/nan/index.html +% https://pub.ist.ac.at/~schloegl/matlab/NaN/ +% +% This program is free software: you can redistribute it and/or modify +% it under the terms of the GNU General Public License as published by +% the Free Software Foundation, either version 3 of the License, or +% (at your option) any later version. +% +% This program is distributed in the hope that it will be useful, +% but WITHOUT ANY WARRANTY; without even the implied warranty of +% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +% GNU General Public License for more details. +% +% You should have received a copy of the GNU General Public License +% along with this program. If not, see . + + +%%%%% check input arguments %%%%% +[yr,yc] = size(Y); +if nargin < 2, + W = []; +end; +if ~isempty(W) && (yr ~= numel(W)), + error('number of rows of Y does not match number of elements in W'); +end; + +%%%%% identify all possible X's and generate overall Histogram %%%%% +N = sum(~isnan(Y), 1); +if all(N==0) + H=[]; + X=[]; +end; + +NN = N; +if isempty(W) + sY = sort(Y,1); +else + [sY, idx] = sort(Y,1); + W = cumsum(W(idx)); %% W becomes cumulative sum +end; +[ix,iy] = find( diff(sY, [], 1) > 0); +nn0 = 0; + +for k = 1:yc, + tmp = [ix(iy==k); N(k)]; + nn1 = sum(tmp>0); + + if isempty(W) + H(1:nn1,k) = [tmp(1); diff(tmp)]; + else + %%% Note that W is the cumulative sum + H(1:nn1,k) = [W(tmp(1),k); diff(W(tmp,k))]; + NN(k) = W(N(k), k); + end; + X(1:nn1, k) = sY(tmp(tmp>0), k); + + if k==1; + nn0 = nn1; + elseif nn1 < nn0, + H (1+nn1:nn0, k) = NaN; + X (1+nn1:nn0, k) = NaN; + elseif nn1 > nn0, + H (1+nn0:nn1, 1:k-1) = NaN; + X (1+nn0:nn1, 1:k-1) = NaN; + nn0 = nn1; + end; +end; + +R.datatype = 'HISTOGRAM'; +R.H = H; +R.X = X; +R.N = NN; + +%!assert(getfield(histo2([]),'N'), 0) +%!assert(getfield(histo2(1),'N'), 1) +%!assert(getfield(histo2([1;1]),'H'), 2) +%!assert(getfield(histo2([1;1]),'H'), 2) +%!assert(getfield(histo2([repmat(NaN,4,2),[1;1;1;3]]),'N')==[0,0,4]) diff --git a/inst/histo3.m b/inst/histo3.m new file mode 100644 index 0000000..ee99ad7 --- /dev/null +++ b/inst/histo3.m @@ -0,0 +1,158 @@ +function [R, tix] = histo3(Y, W) +% HISTO3 calculates histogram for multiple columns with common bin values +% among all data columns, and can be useful for data compression. +% +% R = HISTO3(Y) +% R = HISTO3(Y, W) +% Y data +% W weight vector containing weights of each sample, +% number of rows of Y and W must match. +% default W=[] indicates that each sample is weighted with 1. +% R struct with these fields +% R.X the bin-values, bin-values are equal for each channel +% thus R.X is a column vector. If bin values should +% be computed separately for each data column, use HISTO2 +% R.H is the frequency of occurence of value X +% R.N are the number of valid (not NaN) samples +% +% Data compression can be performed in this way +% [R,tix] = histo3(Y) +% is the compression step +% +% R.tix provides a compressed data representation. +% R.compressionratio estimates the compression ratio +% +% R.X(tix) and R.X(R.tix) +% reconstruct the orginal signal (decompression) +% +% The effort (in memory and speed) for compression is O(n*log(n)). +% The effort (in memory and speed) for decompression is O(n) only. +% +% see also: HISTO, HISTO2, HISTO3, HISTO4 +% + + +% Copyright (C) 1996-2019 by Alois Schloegl +% This is part of the NaN-toolbox +% https://octave.sourceforge.io/nan/index.html +% https://pub.ist.ac.at/~schloegl/matlab/NaN/ +% +% This program is free software: you can redistribute it and/or modify +% it under the terms of the GNU General Public License as published by +% the Free Software Foundation, either version 3 of the License, or +% (at your option) any later version. +% +% This program is distributed in the hope that it will be useful, +% but WITHOUT ANY WARRANTY; without even the implied warranty of +% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +% GNU General Public License for more details. +% +% You should have received a copy of the GNU General Public License +% along with this program. If not, see . + + +%%%%% check input arguments %%%%% +[yr,yc] = size(Y); +if nargin < 2, + W = []; +end; +if ~isempty(W) && (yr ~= numel(W)), + error('number of rows of Y does not match number of elements in W'); +end; + +%%%%% identify all possible X's and generate overall Histogram %%%%% +[sY, idx] = sort(Y(:),1); + +ix = diff(sY, [], 1) > 0; +tmp = [find(ix); sum(~isnan(sY))]; + +R.datatype = 'HISTOGRAM'; +R.N = sum(~isnan(Y), 1); +if all(R.N==0) + R.X=[]; + R.H=[]; + return; +end; +R.X = sY(tmp); + +% generate inverse index +if nargout>1, + tix = cumsum([1; ix]); % rank + [tmp,idx1] = sort(idx); % generate inverse index + tix = reshape(tix(idx1), yr, yc); % inverse sort rank + cc = 1; + tmp = sum(ix) + 1; + if exist('OCTAVE_VERSION') >= 5, + ; % NOP; no support for integer datatyp + elseif tmp <= 2^8; + tix = uint8(tix); + cc = 8/1; + elseif tmp <= 2^16; + tix = uint16(tix); + cc = 8/2; + elseif tmp <= 2^32; + tix = uint32(tix); + cc = 8/4; + end; + R.compressionratio = (prod(size(R.X)) + (yr*yc)/cc) / (yr*yc); + R.tix = tix; +end; + + +if yc==1, + if isempty(W) + R.H = [tmp(1); diff(tmp)]; + else + C = cumsum(W(idx)); % cumulative weights + R.H = [C(tmp(1)); diff(C(tmp))]; + end; + return; + +elseif yc>1, + % allocate memory + H = zeros(size(R.X,1),yc); + + % scan each channel + for k = 1:yc, + if isempty(W) + sY = sort(Y(:,k)); + else + [sY,ix] = sort(Y(:,k)); + C = cumsum(W(ix)); + end + ix = find(diff(sY,[],1) > 0); + if size(ix,1) > 0, + tmp = [ix; R.N(k)]; + else + tmp = R.N(k); + end; + + t = 0; + j = 1; + if isempty(W) + for x = tmp(tmp>0)', + acc = sY(x); + while R.X(j)~=acc, j=j+1; end; + %j = find(sY(x)==R.X); % identify position on X + H(j,k) = H(j,k) + (x-t); % add diff(tmp) + t = x; + end; + else + for x = tmp(tmp>0)', + acc = sY(x); + while R.X(j)~=acc, j=j+1; end; + %j = find(sY(x)==R.X); % identify position on X + H(j,k) = H(j,k) + C(x)-t; % add diff(tmp) + t = C(x); + end; + end; + end; + + R.H = H; +end; + +%!assert(getfield(histo3([]),'N'), 0) +%!assert(getfield(histo3(1),'N'), 1) +%!assert(getfield(histo3([1;1]),'H'), 2) +%!assert(getfield(histo3([repmat(NaN,4,2),[1;1;1;3]]),'N')==[0,0,4]) + diff --git a/inst/histo4.m b/inst/histo4.m new file mode 100644 index 0000000..9dadd8e --- /dev/null +++ b/inst/histo4.m @@ -0,0 +1,104 @@ +function [R, tix] = histo4(Y, W) +% HISTO4 calculates histogram of multidimensional data samples +% and supports data compression +% +% R = HISTO4(Y) +% R = HISTO4(Y, W) +% Y data: on sample per row, each sample has with size(Y,2) elements +% W weights of each sample (default: []) +% W = [] indicates that each sample has equal weight +% R is a struct with these fields: +% R.X are the bin-values +% R.H is the frequency of occurence of value X (weighted with W) +% R.N are the total number of samples (or sum of W) +% +% HISTO4 might be useful for data compression, because +% [R,tix] = histo4(Y) +% is the compression step +% R.X(tix,:) +% is the decompression step +% +% The effort (in memory and speed) for compression is O(n*log(n)) +% The effort (in memory and speed) for decompression is only O(n) +% +% see also: HISTO, HISTO2, HISTO3, HISTO4 +% + +% Copyright (C) 1996-2019 by Alois Schloegl +% This is part of the NaN-toolbox +% https://octave.sourceforge.io/nan/index.html +% https://pub.ist.ac.at/~schloegl/matlab/NaN/ +% +% This program is free software: you can redistribute it and/or modify +% it under the terms of the GNU General Public License as published by +% the Free Software Foundation, either version 3 of the License, or +% (at your option) any later version. +% +% This program is distributed in the hope that it will be useful, +% but WITHOUT ANY WARRANTY; without even the implied warranty of +% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +% GNU General Public License for more details. +% +% You should have received a copy of the GNU General Public License +% along with this program. If not, see . + + +%%%%% check input arguments %%%%% +[yr, yc] = size(Y); +if nargin<2, + W = []; +end; +if ~isempty(W) && (yr ~= numel(W)), + error('number of rows of Y does not match number of elements in W'); +end; +R.datatype = 'HISTOGRAM'; +if isempty(Y) + R.N = 0; + R.X = zeros(size(Y)); + R.H = []; + return +end + +%%%%% identify all possible X's and generate overall Histogram %%%%% +[Y, idx] = sortrows(Y); + +d = diff(Y,[],1); +ix = any( (~isnan(d) & (d~=0) ) | diff(isnan(Y),[],1), 2); + +tmp = [find(ix); yr]; +R.X = Y(tmp,:); +if isempty(W) + R.H = [tmp(1); diff(tmp)]; + R.N = yr; +else + W = cumsum(W(idx)); + R.H = [W(tmp(1)); diff(W(tmp))]; + R.N = W(end); +end; + +%%%%% generate inverse index %%%%% +if nargout>1, + tix = cumsum([1;ix]); % rank + cc = 1; + tmp = sum(ix); + if tmp < 2^8; + tix = uint8(tix); + cc = 8/1; + elseif tmp < 2^16; + tix = uint16(tix); + cc = 8/2; + elseif tmp < 2^32; + tix = uint32(tix); + cc = 8/4; + end; + [tmp, idx] = sort(idx); % inverse index + tix = tix(idx); % inverse sort rank + + R.compressionratio = (prod(size(R.X)) + yr/cc) / (yr*yc); + R.tix = tix; +end; + +%!assert(getfield(histo4([]),'N'), 0) +%!assert(getfield(histo4(1),'N'), 1) +%!assert(getfield(histo4([1;1]),'H'), 2) +%!assert(getfield(histo4([repmat(NaN,4,2),[1;1;1;3]]),'N')==4) diff --git a/inst/iqr.m b/inst/iqr.m new file mode 100644 index 0000000..c6411f9 --- /dev/null +++ b/inst/iqr.m @@ -0,0 +1,53 @@ +function Q=iqr(Y,DIM) +% IQR calculates the interquartile range +% Missing values (encoded as NaN) are ignored. +% +% Q = iqr(Y) +% Q = iqr(Y,DIM) +% returns the IQR along dimension DIM of sample array Y. +% +% Q = iqr(HIS) +% returns the IQR from the histogram HIS. +% HIS must be a HISTOGRAM struct as defined in HISTO2 or HISTO3. +% +% see also: MAD, RANGE, HISTO2, HISTO3, PERCENTILE, QUANTILE + + +% Copyright (C) 2009-2019 by Alois Schloegl +% This function is part of the NaN-toolbox +% http://pub.ist.ac.at/~schloegl/matlab/NaN/ + +% This program is free software; you can redistribute it and/or modify +% it under the terms of the GNU General Public License as published by +% the Free Software Foundation; either version 3 of the License, or +% (at your option) any later version. +% +% This program is distributed in the hope that it will be useful, +% but WITHOUT ANY WARRANTY; without even the implied warranty of +% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +% GNU General Public License for more details. +% +% You should have received a copy of the GNU General Public License +% along with this program; If not, see . + +if nargin<2, + DIM = []; +end; +if isempty(DIM), + DIM = min(find(size(Y)>1)); + if isempty(DIM), DIM = 1; end; +end; + + +if nargin<1, + help iqr + +else + Q = quantile(Y,[1,3]/4,DIM); + Q = diff(Q,[],DIM); +end; + +%!assert(iqr([1:5,NaN]) == 2) +%!assert(iqr([1:5,NaN]',1) == 2) +%!assert(iqr([1:5,NaN],2) == 2) + diff --git a/inst/kappa.m b/inst/kappa.m new file mode 100644 index 0000000..b70bcea --- /dev/null +++ b/inst/kappa.m @@ -0,0 +1,202 @@ +function [kap,se,H,z,p0,SA,R]=kappa(d,c,arg3,w) +% KAPPA estimates Cohen's kappa coefficient +% and related statistics +% +% [...] = kappa(d1,d2); +% NaN's are handled as missing values and are ignored +% [...] = kappa(d1,d2,'notIgnoreNAN'); +% NaN's are handled as just another Label. +% [kap,sd,H,z,ACC,sACC,MI] = kappa(...); +% X = kappa(...); +% +% d1 data of scorer 1 +% d2 data of scorer 2 +% +% kap Cohen's kappa coefficient point +% se standard error of the kappa estimate +% H Concordance matrix, i.e. confusion matrix +% z z-score +% ACC overall agreement (accuracy) +% sACC specific accuracy +% MI Mutual information or transfer information (in [bits]) +% X is a struct containing all the fields above +% For two classes, a number of additional summary statistics including +% TPR, FPR, FDR, PPV, NPF, F1, dprime, Matthews Correlation coefficient (MCC) or +% Phi coefficient (PHI=MCC), Specificity and Sensitivity, Youden index (YI) +% are provided. Note, the positive category must the larger label (in d and c), otherwise +% the confusion matrix becomes transposed and the summary statistics are messed up. +% +% +% Reference(s): +% [1] Cohen, J. (1960). A coefficient of agreement for nominal scales. Educational and Psychological Measurement, 20, 37-46. +% [2] J Bortz, GA Lienert (1998) Kurzgefasste Statistik f|r die klassische Forschung, Springer Berlin - Heidelberg. +% Kapitel 6: Uebereinstimmungsmasze fuer subjektive Merkmalsurteile. p. 265-270. +% [3] http://www.cmis.csiro.au/Fiona.Evans/personal/msc/html/chapter3.html +% [4] Kraemer, H. C. (1982). Kappa coefficient. In S. Kotz and N. L. Johnson (Eds.), +% Encyclopedia of Statistical Sciences. New York: John Wiley & Sons. +% [5] http://ourworld.compuserve.com/homepages/jsuebersax/kappa.htm +% [6] http://en.wikipedia.org/wiki/Receiver_operating_characteristic + +% Copyright (c) 1997-2021 by Alois Schloegl +% This function is part of the NaN-toolbox +% http://pub.ist.ac.at/~schloegl/matlab/NaN/ +% +% BioSig is free software: you can redistribute it and/or modify +% it under the terms of the GNU General Public License as published by +% the Free Software Foundation, either version 3 of the License, or +% (at your option) any later version. +% +% BioSig is distributed in the hope that it will be useful, +% but WITHOUT ANY WARRANTY; without even the implied warranty of +% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +% GNU General Public License for more details. +% +% You should have received a copy of the GNU General Public License +% along with BioSig. If not, see . + + +mode.ignoreNAN = 1; +kk = []; +if nargin>2 + if ischar(arg3) + if strcmpi(arg3,'notIgnoreNAN') + mode.ignoreNAN = 0; + end + else + kk = arg3; + end +end; +if nargin<4 + w = []; +end; + +if nargin>1, + d = d(:); + c = c(:); + + tmp = [d;c]; + maxtmp = max(tmp); + tmp(isnan(tmp)) = maxtmp+1; + [X.Label,i,j] = unique(tmp); + c = j(1+numel(d):end); + d = j(1:numel(d)); + kk = max(j); + maxCLASS = kk - any(tmp>maxtmp); + + if mode.ignoreNAN, + if any(j > maxCLASS) +% fprintf(2,'Warning KAPPA: some elements are NaN. These are handled as missing values and are ignored.\n'); +% fprintf(2,'If NaN should be handled as just another label, use kappa(..,''notIgnoreNaN'').\n'); + ix = find((c<=maxCLASS) & (d<=maxCLASS)); + d = d(ix); c=c(ix); + if ~isempty(w), w = w(ix); end; + kk = kk - 1; + end; + X.Label(X.Label>maxtmp) = []; + else + X.Label(X.Label>maxtmp) = NaN; + end; + + if isempty(w) + H = full( sparse (d, c, 1, kk, kk) ); + elseif ~isempty(w), + H = full( sparse (d, c, w, kk, kk) ); + end; + +else + X.Label = 1:min(size(d)); + H = d(X.Label,X.Label); + +end; + +s = warning; +warning('off'); + +N = sum(H(:)); +p0 = sum(diag(H))/N; %accuracy of observed agreement, overall agreement +%OA = sum(diag(H))/N); + +p_i = sum(H,1); +pi_ = sum(H,2)'; + +SA = 2*diag(H)'./(p_i+pi_); % specific agreement + +pe = (p_i*pi_')/(N*N); % estimate of change agreement + +px = sum(p_i.*pi_.*(p_i+pi_))/(N*N*N); + +%standard error +kap = (p0-pe)/(1-pe); +sd = sqrt((pe+pe*pe-px)/(N*(1-pe*pe))); + +%standard error +se = sqrt((p0+pe*pe-px)/N)/(1-pe); +if ~isreal(se), + z = NaN; +else + z = kap/se; +end + +if ((1 < nargout) && (nargout<7)) + warning(s); + return; +end; + +% Nykopp's entropy +pwi = sum(H,2)/N; % p(x_i) +pwj = sum(H,1)/N; % p(y_j) +pji = H./repmat(sum(H,2),1,size(H,2)); % p(y_j | x_i) +R = - sumskipnan(pwj.*log2(pwj)) + sumskipnan(pwi'*(pji.*log2(pji))); + +if (nargout>1), return; end; + +X.kappa = kap; +X.kappa_se = se; +X.data = H; +X.H = X.data; +X.z = z; +X.ACC = p0; +X.sACC = SA; +X.MI = R; +X.SSE = sum(X.data(:))-trace(X.data); +X.datatype = 'confusion'; + +if length(H)==2, + % see http://en.wikipedia.org/wiki/Receiver_operating_characteristic + % Note that the confusion matrix used here uses more positive values in + % the 2nd row and column, moreover the true values are indicated by + % rows (transposed). Thus, in summary H(1,1) and H(2,2) are exchanged + % as compared to the wikipedia article. + X.TP = H(2,2); + X.TN = H(1,1); + X.FP = H(1,2); + X.FN = H(2,1); + X.FNR = H(2,1) / sum(H(2,:)); + X.FPR = H(1,2) / sum(H(1,:)); + X.TPR = H(2,2) / sum(H(2,:)); + X.PPV = H(2,2) / sum(H(:,2)); + X.NPV = H(1,1) / sum(H(:,1)); + X.FDR = H(1,2) / sum(H(:,2)); + X.MCC = det(H) / sqrt(prod([sum(H), sum(H')])); + X.PHI = X.MCC; + X.F1 = 2 * X.TP / (sum(H(2,:)) + sum(H(:,2))); + X.Sensitivity = X.TPR; %% hit rate, recall + X.Specificity = 1 - X.FPR; + X.Precision = X.PPV; + X.dprime = norminv(X.TPR) - norminv(X.FDR); + X.YI = X.Sensitivity + X.Specificity - 1; % Youden Index + + % statistical significance test of Matthews' correlation coefficient + NN = sum(H(:)); + R = X.MCC; + R(isnan(R)) = 0; + tmp = 1 - R.*R; + tmp(tmp<0) = 0; % prevent tmp<0 i.e. imag(t)~=0 + t = R.*sqrt(max(NN-2,0)./tmp) + sig = tcdf(t,NN-2); + sig = 2 * min(sig,1 - sig); + X.MCC_p_value = sig; +end; + +kap = X; +warning(s); diff --git a/inst/knnsearch.m b/inst/knnsearch.m new file mode 100644 index 0000000..64b53c8 --- /dev/null +++ b/inst/knnsearch.m @@ -0,0 +1,166 @@ +function [idx, dist]=knnsearch(X,Y,varargin) +% KNNSEARCH search for K nearest neighbors +% and related statistics +% +% Usage: +% IDX = knnsearch(X,Y); +% finds for each element (row) in Y, the nearest +% of all elements in X, such that +% IDX(k) points to X(IDX(k),:) that is nearest to Y(k,:) +% IDX has as many elements as Y has rows +% [IDX,DIST] = knnsearch(X,Y); +% ... = knnsearch(...,'k',k); +% search for k nearest neighbors (default: k=2) +% ... = knnsearch(...,'Scale',Scale); +% Scaling vector of 'seuclidian' metric +% default value is std(X) +% ... = knnsearch(...,'Cov',Cov); +% Cov is the covariance matrix used for Mahalanobis distance +% default value is cov(X) +% ... = knnsearch(...,'Distance',Distance); +% the following distance metrics are currently supported: +% 'euclidean' [1], +% 'seuclidean', (scaled euclidian) +% 'minkowski' [3], +% 'cityblock' or 'manhattan' [4], +% 'hamming' [5], +% 'mahalanobis' [6], +% 'cosine' [7] +% (one minus the cosine of the angle between the two samples), +% 'correlation' +% (one minus the linear correlation between each pair f data vectors), +% 'spearman' +% (one minus the rank correlation between each pair of data vectors), +% +% SEE ALSO: corrcoef, spearman, rankcorr, cov, std +% +% Reference(s): +% [1] https://en.wikipedia.org/wiki/K-nearest_neighbors_algorithm +% [2] https://en.wikipedia.org/wiki/Euclidean_distance +% [3] https://en.wikipedia.org/wiki/Minkowski_distance +% [4] https://en.wikipedia.org/wiki/Taxicab_geometry +% [5] https://en.wikipedia.org/wiki/Hamming_distance +% [6] https://en.wikipedia.org/wiki/Mahalanobis_distance +% [7] https://en.wikipedia.org/wiki/Cosine_similarity + +% Copyright (C) 2021 by Alois Schloegl +% This function is part of the NaN-toolbox +% http://pub.ist.ac.at/~schloegl/matlab/NaN/ +% +% BioSig is free software: you can redistribute it and/or modify +% it under the terms of the GNU General Public License as published by +% the Free Software Foundation, either version 3 of the License, or +% (at your option) any later version. +% +% BioSig is distributed in the hope that it will be useful, +% but WITHOUT ANY WARRANTY; without even the implied warranty of +% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +% GNU General Public License for more details. +% +% You should have received a copy of the GNU General Public License +% along with BioSig. If not, see . + + +if nargin<2 + error('missing input arguments') +end + +if size(X,2)~=size(Y,2) + error('number of rows in X and Y must match') +end + +% default values +K=1; # number of NN +P=2; # exponent for minkowski distance +Distance='euclidean'; +NSMethod='exhaustive'; +Scale = []; +Cov = []; + +k=1; +while (k +% This is part of the NaN-toolbox +% https://octave.sourceforge.io/nan/index.html +% https://pub.ist.ac.at/~schloegl/matlab/NaN/ +% +% This program is free software: you can redistribute it and/or modify +% it under the terms of the GNU General Public License as published by +% the Free Software Foundation, either version 3 of the License, or +% (at your option) any later version. +% +% This program is distributed in the hope that it will be useful, +% but WITHOUT ANY WARRANTY; without even the implied warranty of +% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +% GNU General Public License for more details. +% +% You should have received a copy of the GNU General Public License +% along with this program. If not, see . + +% default +tail='unequal'; + +if nargin<2 + X=x; + IX=1:size(X,2); + IY=IX; +else + k=1; + if isnumeric(varargin{k}) + y = varargin{k}; + n1=size(x,1); + n2=size(y,1); + if n1==n2; + X=[x,y]; + elseif n1'] ); +% +% Input: +% x, y input vectors for comparison +% X matrix whos colums are pairwise compared, such +% +% Output: +% H 1: statistical significance (p < alpha) +% D maximum absolute difference between sample data +% D(k,l) is the m.a.d. from X(:,k) and X(:,l) +% df is the degree-of freedom +% df(k,l) = n(k)*n(l)/(n(k)+n(l)) with n samples of corresponding +% column X. +% p p-value, it's also a matrix where +% pval(k,l) is the p-value from column k and l +% +% see also: +% kolmogorov_smirnov + +% Copyright (C) 2019,2020 by Alois Schloegl +% This is part of the NaN-toolbox +% https://octave.sourceforge.io/nan/index.html +% https://pub.ist.ac.at/~schloegl/matlab/NaN/ +% +% This program is free software: you can redistribute it and/or modify +% it under the terms of the GNU General Public License as published by +% the Free Software Foundation, either version 3 of the License, or +% (at your option) any later version. +% +% This program is distributed in the hope that it will be useful, +% but WITHOUT ANY WARRANTY; without even the implied warranty of +% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +% GNU General Public License for more details. +% +% You should have received a copy of the GNU General Public License +% along with this program. If not, see . + + +% default +alpha=0.05; +tail='unequal'; + +k=2; +while k < length(varargin) + if strcmpi(varargin{k}, 'tail') + tail = varargin{k+1}; + k = k+1; + elseif strcmpi(varargin{k}, 'alpha') + alpha = varargin{k+1}; + k = k+1; + else + error(sprintf('argument %d not supported - ignored', k)) + end + k=k+1; +end + +[D, ks2stat, p, df] = kolmogorov_smirnov(x(:),y(:), 'tail', tail); +H = p < alpha; + +%! assert(kstest2([1:5]',[1:5]'+5)) + + + + diff --git a/inst/kurtosis.m b/inst/kurtosis.m new file mode 100644 index 0000000..29bd773 --- /dev/null +++ b/inst/kurtosis.m @@ -0,0 +1,66 @@ +function R=kurtosis(i,DIM) +% KURTOSIS estimates the kurtosis +% +% y = kurtosis(x,DIM) +% calculates kurtosis of x in dimension DIM +% +% DIM dimension +% 1: STATS of columns +% 2: STATS of rows +% default or []: first DIMENSION, with more than 1 element +% +% features: +% - can deal with NaN's (missing values) +% - dimension argument +% - compatible to Matlab and Octave +% +% see also: SUMSKIPNAN, VAR, STD, VAR, SKEWNESS, MOMENT, STATISTIC, +% IMPLICIT_SKIP_NAN +% +% REFERENCE(S): +% http://mathworld.wolfram.com/ + +% This program is free software; you can redistribute it and/or modify +% it under the terms of the GNU General Public License as published by +% the Free Software Foundation; either version 2 of the License, or +% (at your option) any later version. +% +% This program is distributed in the hope that it will be useful, +% but WITHOUT ANY WARRANTY; without even the implied warranty of +% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +% GNU General Public License for more details. +% +% You should have received a copy of the GNU General Public License +% along with this program; If not, see . + +% $Id$ +% Copyright (C) 2000-2003 by Alois Schloegl +% This function is part of the NaN-toolbox for Octave and Matlab +% http://pub.ist.ac.at/~schloegl/matlab/NaN/ + + +if nargin==1, + DIM=min(find(size(i)>1)); + if isempty(DIM), DIM=1; end; +end; + +[R.SUM,R.N,R.SSQ] = sumskipnan(i,DIM); % sum + +R.MEAN = R.SUM./R.N; % mean +R.SSQ0 = R.SSQ - real(R.SUM).*real(R.MEAN) - imag(R.SUM).*imag(R.MEAN); % sum square with mean removed + +%if flag_implicit_unbiased_estim; %% ------- unbiased estimates ----------- + n1 = max(R.N-1,0); % in case of n=0 and n=1, the (biased) variance, STD and SEM are INF +%else +% n1 = R.N; +%end; + +R.VAR = R.SSQ0./n1; % variance (unbiased) +%R.STD = sqrt(R.VAR); % standard deviation + +i = i - repmat(R.MEAN,size(i)./size(R.MEAN)); +%R.CM3 = sumskipnan(i.^3,DIM)./n1; +R.CM4 = sumskipnan(i.^4,DIM)./n1; + +%R.SKEWNESS = R.CM3./(R.STD.^3); +R = R.CM4./(R.VAR.^2)-3; diff --git a/inst/lmom.m b/inst/lmom.m new file mode 100644 index 0000000..3aa9030 --- /dev/null +++ b/inst/lmom.m @@ -0,0 +1,89 @@ +function LMOM = lmom(data,P,opt) +% LMOM estimates the L-Moments [1,2] from a sample distribution +% and might be a useful density estimation [1,3]. +% LMOM is equivalent to samlmo.F from dataplot [4]. +% +% Usage: +% XMOM = lmom(X,P) +% XMOM = lmom(X,P,'ratios') +% +% X input data, NaN's are ignored +% P maximum order, L moments 1:P are estimated +% option: default 'false', +% 'ratios': compute L-moment ratios +% XMOM vector of L-Moments from 1:P +% in case option='ratios', XMOM(3:P) will +% return the L-moment rations (i.e. scaled L-moments). +% +% The current implementation is tested only on data sets up to 1000 samples +% and P=10. The algorithm has not been analyzed with respect to accuracy and +% computational efficiency. Eventually, this implementation should be +% compared also to samlmu.F from dataplot [4], which is also used in [5]. +% +% References: +% [1] Hosking (1990), L-MOMENTS: ANALYSIS AND ESTIMATION OF DISTRIBUTIONS, +% J. R. Statist. Soc. B (1990), 52,No. 1,pp. 105-124 +% [2] https://en.wikipedia.org/wiki/L-moment +% [3] https://en.wikipedia.org/wiki/Density_estimation +% [4] Hosking, function samlmo.F from https://github.com/usnistgov/dataplot +% [5] 'lmom'-package for R, available from +% https://www.rdocumentation.org/packages/lmom/versions/2.8/topics/lmom-package + +% Copyright (C) 2019,2020 by Alois Schlögl +% This function is part of the NaN-toolbox +% http://pub.ist.ac.at/~schloegl/matlab/NaN/ + +%% This program is free software: you can redistribute it and/or +%% modify it under the terms of the GNU General Public License as +%% published by the Free Software Foundation, either version 3 of the +%% License, or (at your option) any later version. +%% +%% This program is distributed in the hope that it will be useful, but +%% WITHOUT ANY WARRANTY; without even the implied warranty of +%% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +%% General Public License for more details. +%% +%% You should have received a copy of the GNU General Public License +%% along with this program; see the file COPYING. If not, see +%% . + + +if nargin<3 + opt=0; +end +opt = strcmp(opt,'ratios'); + +data(isnan(data))=[]; +F = [0:length(data)]'/length(data); +u = sort(data); +u = u([1,1:end]); + +% TODO: one might do this more efficiently +p=repmat(NaN,P,P); +for r = 1:P +for k = 1:r + p(r,k)=bincoeff(r,k)*bincoeff(r+k,k)*(-1)^(r-k); +end +end + +for k = 1:P, + xi(k) = trapz(F, u .* F.^(k-1)); + if k==1, + LMOM(k) = xi(k); + else + LMOM(k) = xi(1:k) * [(-1).^(k-1); p(k-1, 1:k-1)']; + end +end +if (opt && (P>2)) + LMOM(3:P) = LMOM(3:P)/LMOM(2); +end + +return + +% lambda(1) = trapz(F, [u] ); +% lambda(2) = trapz(F, [u] .* (2*F-1)) ; +% lambda(3) = trapz(F, [u] .* (6*F.^2 - 6*F + 1)) ; +% lambda(4) = trapz(F, [u] .* (20*F.^3 - 30*F.^2 + 12*F - 1)) ; +% + + diff --git a/inst/load_cifar10.m b/inst/load_cifar10.m new file mode 100644 index 0000000..8e487cf --- /dev/null +++ b/inst/load_cifar10.m @@ -0,0 +1,62 @@ +function [data,labels]=load_cifar10() +% LOAD_CIFAR10 loads cifar10 data [1,2]. +% the data files will be downloaded and uncompressed into +% directory ~/.cache/ +% +% Usage: +% [data,labels]=load_cifar10(); +% +% References: +% [1] Alex Krizhevsky, CIFAR-10 dataset +% https://www.cs.toronto.edu/~kriz/cifar.html +% [2] https://www.cs.toronto.edu/~kriz/cifar-10-matlab.tar.gz + + +% Copyright (C) 2019 Alois Schlögl +% This is part of the NaN-toolbox +% https://octave.sourceforge.io/nan/index.html +% https://pub.ist.ac.at/~schloegl/matlab/NaN/ +% +% This program is free software: you can redistribute it and/or modify +% it under the terms of the GNU General Public License as published by +% the Free Software Foundation, either version 3 of the License, or +% (at your option) any later version. +% +% This program is distributed in the hope that it will be useful, +% but WITHOUT ANY WARRANTY; without even the implied warranty of +% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +% GNU General Public License for more details. +% +% You should have received a copy of the GNU General Public License +% along with this program. If not, see . + + + +URL="https://www.cs.toronto.edu/~kriz/cifar-10-matlab.tar.gz"; +DOWNLOAD_DIRECTORY = fullfile(getenv('HOME'),'.cache/'); +if ~exist(DOWNLOAD_DIRECTORY,'dir'), + mkdir(DOWNLOAD_DIRECTORY); +end; +DOWNLOAD = fullfile(DOWNLOAD_DIRECTORY,'cifar-10-matlab.tar.gz'); +DATAFILE = fullfile(DOWNLOAD_DIRECTORY,'cifar-10-batches-mat','data_batch_%d.mat'); + +if ~exist(sprintf(DATAFILE, 1)) + if ~exist(DOWNLOAD,'file') + fprintf(1,'Downloading cifar-10 database (~170 MB) to %s/\n', DOWNLOAD_DIRECTORY); + system(sprintf('wget %s -O %s',URL, DOWNLOAD)); + end + unpack(DOWNLOAD, DOWNLOAD_DIRECTORY) +end; + +data = []; +labels = []; +for k = 1:5, + x = load(sprintf(DATAFILE,k)); + data = [data; x.data]; + labels = [labels; x.labels]; +end +x = load(fullfile(fileparts(DATAFILE),'test_batch.mat')); +data = [data; x.data]; +labels = [labels; x.labels]; + + diff --git a/inst/load_cifar100.m b/inst/load_cifar100.m new file mode 100644 index 0000000..fa2b733 --- /dev/null +++ b/inst/load_cifar100.m @@ -0,0 +1,56 @@ +function [traindata,testdata,metadata]=load_cifar100() +% LOAD_CIFAR100 loads cifar100 data [1,2]. +% the data files will be downloaded and uncompressed into +% directory $HOME/.cache/ +% +% Usage: +% [traindata, testdata, meta]=load_cifar100(); +% +% References: +% [1] Alex Krizhevsky, CIFAR-100 dataset +% https://www.cs.toronto.edu/~kriz/cifar.html +% [2] https://www.cs.toronto.edu/~kriz/cifar-100-matlab.tar.gz + + +% Copyright (C) 2019 Alois Schlögl +% This is part of the NaN-toolbox +% https://octave.sourceforge.io/nan/index.html +% https://pub.ist.ac.at/~schloegl/matlab/NaN/ +% +% This program is free software: you can redistribute it and/or modify +% it under the terms of the GNU General Public License as published by +% the Free Software Foundation, either version 3 of the License, or +% (at your option) any later version. +% +% This program is distributed in the hope that it will be useful, +% but WITHOUT ANY WARRANTY; without even the implied warranty of +% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +% GNU General Public License for more details. +% +% You should have received a copy of the GNU General Public License +% along with this program. If not, see . + + + +URL="https://www.cs.toronto.edu/~kriz/cifar-100-matlab.tar.gz"; +DOWNLOAD_DIRECTORY = fullfile(getenv('HOME'),'.cache'); +if ~exist(DOWNLOAD_DIRECTORY,'dir'), + mkdir(DOWNLOAD_DIRECTORY); +end; +DOWNLOAD = fullfile(DOWNLOAD_DIRECTORY,'cifar-100-matlab.tar.gz'); +DATAFILE = fullfile(DOWNLOAD_DIRECTORY,'cifar-100-matlab','train.mat'); + +if ~exist(sprintf(DATAFILE,1)) + if ~exist(DOWNLOAD,'file') + fprintf(1,'Downloading cifar-100 database (~170 MB) to %s/\n', DOWNLOAD_DIRECTORY); + system(sprintf('wget %s -O %s',URL, DOWNLOAD)); + end + untar(DOWNLOAD, DOWNLOAD_DIRECTORY) +end; + +traindata = load(fullfile(fileparts(DATAFILE),'train.mat')); +testdata = load(fullfile(fileparts(DATAFILE),'test.mat')); +metadata = load(fullfile(fileparts(DATAFILE),'meta.mat')); + + + diff --git a/inst/load_fisheriris.m b/inst/load_fisheriris.m new file mode 100644 index 0000000..92f400e --- /dev/null +++ b/inst/load_fisheriris.m @@ -0,0 +1,74 @@ +% LOAD_FISHERIRIS +% loads famous iris data set from Fisher, 1936 [1]. +% +% References: +% [1] Fisher,R.A. "The use of multiple measurements in taxonomic problems" +% Annual Eugenics, 7, Part II, 179-188 (1936); also in "Contributions to Mathematical Statistics" (John Wiley, NY, 1950). +% [2] Duda,R.O., & Hart,P.E. (1973) Pattern Classification and Scene Analysis. +% (Q327.D83) John Wiley & Sons. ISBN 0-471-22361-1. See page 218. + +% Copyright (C) 2009,2010,2016,2019,2020 by Alois Schloegl +% This function is part of the NaN-toolbox +% http://pub.ist.ac.at/~schloegl/matlab/NaN/ + +% This program is free software; you can redistribute it and/or +% modify it under the terms of the GNU General Public License +% as published by the Free Software Foundation; either version 3 +% of the License, or (at your option) any later version. +% +% This program is distributed in the hope that it will be useful, +% but WITHOUT ANY WARRANTY; without even the implied warranty of +% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +% GNU General Public License for more details. +% +% You should have received a copy of the GNU General Public License +% along with this program; if not, write to the Free Software +% Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA 02110-1301, USA. + + +if exist('OCTAVE_VERSION','builtin') + IRIS=fullfile(tempdir(),'iris.dat'); + + if ~exist(IRIS,'file') + if exist('webread','file') % available in Octave 6 or 7 + species = webread('http://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data'); + fid=fopen(IRIS,'w'); fwrite(fid,species,'char'); fclose(fid); + elseif strncmp(computer,'PCWIN',5) % on MXE-OCTAVE for Windows, wget is available + fprintf(1,'Download http://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data and save in under %s\nPress any key to continue ...\n',IRIS); + else + system(['wget http://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data -O ',IRIS]); + end; + end; + + if exist('str2array','file')==3, + if ~exist('species','var') + tmp = fopen(IRIS); species=fread(tmp,[1,inf],'uint8=>char'); fclose(tmp); + end + [meas, tmp, species] = str2array(species,','); + meas = meas(1:150, 1:4); + species = species(1:150, 5); + + elseif exist('textread', 'file'), + [a,b,c,d,species] = textread (IRIS, '%f,%f,%f,%f,%s\n'); + meas = [a,b,c,d]; + clear a b c d; + if (size(meas,1)==151) + % remove empty line at the end + meas(151,:)=[]; + species(151)=[]; + end + else + tmp = fopen(IRIS); species=fread(tmp,[1,inf],'uint8=>char'); fclose(tmp); + [meas,tmp,species]=str2double(species,','); + meas = meas(:,1:4); + species = species(:,5); + end +else + load fisheriris; +end; + +%!xtest +%! load_fisheriris +%! assert(all(size(meas)==[150,4])) +%! assert(all(size(species)==[150,1])) + diff --git a/inst/load_mnist.m b/inst/load_mnist.m new file mode 100644 index 0000000..ac3d6c5 --- /dev/null +++ b/inst/load_mnist.m @@ -0,0 +1,80 @@ +function [train_data, train_labels, test_data, test_labels] = load_mnist(f) +% LOAD_MNIST load MNIST database [1] +% +% Usage: +% [train_data, train_labels, test_data, test_labels] = load_mnist(); +% +% +% References: +% [1] Yann LeCun, Corinna Cortes, Christopher J.C. Burges, +% THE MNIST DATABASE of handwritten digits +% http://yann.lecun.com/exdb/mnist/ + + +% Copyright (C) 2019 Alois Schlögl +% This is part of the NaN-toolbox +% https://octave.sourceforge.io/nan/index.html +% https://pub.ist.ac.at/~schloegl/matlab/NaN/ +% +% This program is free software: you can redistribute it and/or modify +% it under the terms of the GNU General Public License as published by +% the Free Software Foundation, either version 3 of the License, or +% (at your option) any later version. +% +% This program is distributed in the hope that it will be useful, +% but WITHOUT ANY WARRANTY; without even the implied warranty of +% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +% GNU General Public License for more details. +% +% You should have received a copy of the GNU General Public License +% along with this program. If not, see . + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +% Download and MNIST data +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +URL = 'http://yann.lecun.com/exdb/mnist/'; +files = {'train-images-idx3-ubyte.gz', 't10k-images-idx3-ubyte.gz', 'train-labels-idx1-ubyte.gz', 't10k-labels-idx1-ubyte.gz'}; +DOWNLOAD_DIRECTORY = fullfile(getenv('HOME'),'.cache/mnist'); +if ~exist(DOWNLOAD_DIRECTORY,'dir'), + mkdir(DOWNLOAD_DIRECTORY); +end; +for k = 1:length(files) + DOWNLOAD = fullfile(DOWNLOAD_DIRECTORY,files{k}); + if ~exist(DOWNLOAD,'file') + system(sprintf('wget %s/%s -O %s',URL, files{k},DOWNLOAD)); + end +end + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +% load all files +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +if ((nargin < 1) || (nargout>1)), + train_data = load_mnist(fullfile(DOWNLOAD_DIRECTORY,files{1})); + train_labels = load_mnist(fullfile(DOWNLOAD_DIRECTORY,files{2})); + test_data = load_mnist(fullfile(DOWNLOAD_DIRECTORY,files{3})); + test_labels = load_mnist(fullfile(DOWNLOAD_DIRECTORY,files{4})); + return; +end + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +% Open and read content of file(s) +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +fid = fopen(f, 'rz', 'ieee-be'); +if fid<0, error('can not open file'); end + +magic = fread(fid, 1, 'int32'); +N = fread(fid, 1, 'int32'); +if magic==2051, + sz = fread(fid, [1,2], 'int32'); + pixel = reshape(fread(fid, [prod(sz),N], 'uint8=>uint8')',[N,sz]); +elseif magic==2049, + pixel = fread(fid, [N,1], 'uint8=>uint8'); +else + error('unknown file type'); +end +fclose(fid); +train_data = pixel; +return + diff --git a/inst/mad.m b/inst/mad.m new file mode 100644 index 0000000..4d38f5b --- /dev/null +++ b/inst/mad.m @@ -0,0 +1,64 @@ +function R = mad(i,DIM) +% MAD estimates the Mean Absolute deviation +% (note that according to [1,2] this is the mean deviation; +% not the mean absolute deviation) +% +% y = mad(x,DIM) +% calculates the mean deviation of x in dimension DIM +% +% DIM dimension +% 1: STATS of columns +% 2: STATS of rows +% default or []: first DIMENSION, with more than 1 element +% +% features: +% - can deal with NaN's (missing values) +% - dimension argument +% - compatible to Matlab and Octave +% +% see also: SUMSKIPNAN, VAR, STD, +% +% REFERENCE(S): +% [1] http://mathworld.wolfram.com/MeanDeviation.html +% [2] L. Sachs, "Applied Statistics: A Handbook of Techniques", Springer-Verlag, 1984, page 253. +% +% [3] http://mathworld.wolfram.com/MeanAbsoluteDeviation.html +% [4] Kenney, J. F. and Keeping, E. S. "Mean Absolute Deviation." +% Chapter 6.4 in Mathematics of Statistics, Pt. 1, 3rd ed. Princeton, NJ: Van Nostrand, pp. 76-77 1962. + +% Copyright (C) 2000-2002,2010,2019 by Alois Schloegl +% This is part of the NaN-toolbox. For more details see +% http://pub.ist.ac.at/~schloegl/matlab/NaN/ + +% This program is free software; you can redistribute it and/or modify +% it under the terms of the GNU General Public License as published by +% the Free Software Foundation; either version 2 of the License, or +% (at your option) any later version. +% +% This program is distributed in the hope that it will be useful, +% but WITHOUT ANY WARRANTY; without even the implied warranty of +% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +% GNU General Public License for more details. +% +% You should have received a copy of the GNU General Public License +% along with this program; If not, see . + +if nargin==1, + DIM = find(size(i)>1,1); + if isempty(DIM), DIM=1; end; +end; + + +[S,N] = sumskipnan(i,DIM); % sum +i = i - repmat(S./N,size(i)./size(S)); % remove mean +[S,N] = sumskipnan(abs(i),DIM); % + +%if flag_implicit_unbiased_estim; %% ------- unbiased estimates ----------- + n1 = max(N-1,0); % in case of n=0 and n=1, the (biased) variance, STD and STE are INF +%else +% n1 = N; +%end; + +R = S./n1; + + diff --git a/inst/mahal.m b/inst/mahal.m new file mode 100644 index 0000000..7a68fa5 --- /dev/null +++ b/inst/mahal.m @@ -0,0 +1,54 @@ +function [d] = mahal(X,Y) +% MAHAL return the Mahalanobis' D-square distance between the +% multivariate samples x and y, which must have the same number +% of components (columns), but may have a different number of observations (rows). +% +% d = mahal(X,Y) +% +% d(k) = (X(k,:)-MU)*inv(SIGMA)*(X(k,:)-MU)' +% +% where MU and SIGMA are the mean and the covariance matrix of Y +% +% +% see also: TRAIN_SC, TEST_SC, COVM +% +% References: + +% Copyright (C) 2009,2014 by Alois Schloegl +% This function is part of the NaN-toolbox +% http://pub.ist.ac.at/~schloegl/matlab/NaN/ + +% This program is free software; you can redistribute it and/or +% modify it under the terms of the GNU General Public License +% as published by the Free Software Foundation; either version 2 +% of the License, or (at your option) any later version. +% +% This program is distributed in the hope that it will be useful, +% but WITHOUT ANY WARRANTY; without even the implied warranty of +% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +% GNU General Public License for more details. +% +% You should have received a copy of the GNU General Public License +% along with this program; if not, write to the Free Software +% Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA 02110-1301, USA. + +sx = size(X); +sy = size(Y); + +if sx(2)~=sy(2), + error('number of columns of X and Y do not fit'); +end; + + +% compute mean of Y and remove it +[Y,m] = center(Y,1); + +% compute inverse covariance matrix +[CC,MM] = covm(Y,'M'); +IR= inv(CC./max(0,MM-1)); + +% remove mean of Y +X = X-m(ones(size(X,1),1),:); +d = sum((X*IR).*X,2) + + diff --git a/inst/mean.m b/inst/mean.m new file mode 100644 index 0000000..b3a3d46 --- /dev/null +++ b/inst/mean.m @@ -0,0 +1,131 @@ +function [y]=mean(x,DIM,opt,W) +% MEAN calculates the mean of data elements. +% +% y = mean(x [,DIM] [,opt] [, W]) +% +% DIM dimension +% 1 MEAN of columns +% 2 MEAN of rows +% N MEAN of N-th dimension +% default or []: first DIMENSION, with more than 1 element +% +% opt options +% 'A' arithmetic mean +% 'G' geometric mean +% 'H' harmonic mean +% +% W weights to compute weighted mean (default: []) +% if W=[], all weights are 1. +% number of elements in W must match size(x,DIM) +% +% usage: +% mean(x) +% mean(x,DIM) +% mean(x,opt) +% mean(x,opt,DIM) +% mean(x,DIM,opt) +% mean(x,DIM,W) +% mean(x,DIM,opt,W); ' +% +% features: +% - can deal with NaN's (missing values) +% - weighting of data +% - dimension argument also in Octave +% - compatible to Matlab and Octave +% +% see also: SUMSKIPNAN, MEAN, GEOMEAN, HARMMEAN +% + +% $Id$ +% Copyright (C) 2000-2004,2008,2009,2011 by Alois Schloegl +% This is part of the NaN-toolbox. For more details see +% http://pub.ist.ac.at/~schloegl/matlab/NaN/ +% +% This program is free software; you can redistribute it and/or modify +% it under the terms of the GNU General Public License as published by +% the Free Software Foundation; either version 3 of the License, or +% (at your option) any later version. +% +% This program is distributed in the hope that it will be useful, +% but WITHOUT ANY WARRANTY; without even the implied warranty of +% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +% GNU General Public License for more details. +% +% You should have received a copy of the GNU General Public License +% along with this program; If not, see . + +if nargin==1, + %------ case: mean(x) + W = []; + DIM=[]; + opt='a'; +elseif (nargin==2) + W = []; + %if ~isnumeric(DIM), %>=65;%abs('A'), + if (DIM>64) %abs('A'), + %------ case: mean(x,opt) + opt=DIM; + DIM=[]; + elseif (DIM>length(size(x))) + y=x; + return; + else + %------ case: mean(x,DIM) + opt='a'; + end; +elseif (nargin == 3), + if isnumeric(DIM) && isnumeric(opt) + %------ case: mean(x,DIM,W) + W = opt; + opt='a'; + elseif (DIM>64) %abs('A'), + %------ case: mean(x,opt,DIM) + %if ~isnumeric(DIM), %>=65;%abs('A'), + tmp=opt; + opt=DIM; + DIM=tmp; + W = []; + elseif (DIM>length(size(x))) + y=x; + return; + else + %------ case: mean(x,DIM,opt) + W = []; + end; +elseif nargin==4, + %------ case: mean(x,DIM,opt,W) + ; +else + help mean +% fprintf(1,'usage: mean(x) or mean(x,DIM) or mean(x,opt,DIM) or mean(x,DIM,opt) or mean(x,DIM,W) or mean(x,DIM,opt,W); ' +end; + +if isempty(opt) + opt = 'A'; +elseif any(opt=='aAgGhH') + opt = upper(opt); % eliminate old version +else + error('Error MEAN: invalid opt argument'); +end; + +if (opt == 'A') + [y, n] = sumskipnan(x,DIM,W); + y = y./n; +elseif (opt == 'G') + [y, n] = sumskipnan(log(x),DIM,W); + y = exp (y./n); +elseif (opt == 'H') + [y, n] = sumskipnan(1./x,DIM,W); + y = n./y; +else + fprintf (2,'mean: option `%s` not recognized', opt); +end + +%!assert(mean([1,NaN],1),[1,NaN]) +%!assert(mean([1,NaN],2),1) +%!assert(mean([+inf,-inf]),NaN) +%!assert(mean([+0,-0],'h'),NaN) +%!assert(mean([1,4,NaN],'g'),2) + + + diff --git a/inst/meandev.m b/inst/meandev.m new file mode 100644 index 0000000..d9e0b82 --- /dev/null +++ b/inst/meandev.m @@ -0,0 +1,62 @@ +function R = meandev(i,DIM) +% MEANDEV estimates the Mean deviation +% (note that according to [1,2] this is the mean deviation; +% not the mean absolute deviation) +% +% y = meandev(x,DIM) +% calculates the mean deviation of x in dimension DIM +% +% DIM dimension +% 1: STATS of columns +% 2: STATS of rows +% default or []: first DIMENSION, with more than 1 element +% +% features: +% - can deal with NaN's (missing values) +% - dimension argument +% - compatible to Matlab and Octave +% +% see also: SUMSKIPNAN, VAR, STD, MAD +% +% REFERENCE(S): +% [1] http://mathworld.wolfram.com/MeanDeviation.html +% [2] L. Sachs, "Applied Statistics: A Handbook of Techniques", Springer-Verlag, 1984, page 253. +% [3] http://mathworld.wolfram.com/MeanAbsoluteDeviation.html +% [4] Kenney, J. F. and Keeping, E. S. "Mean Absolute Deviation." +% Chapter 6.4 in Mathematics of Statistics, Pt. 1, 3rd ed. Princeton, NJ: Van Nostrand, pp. 76-77 1962. + +% This program is free software; you can redistribute it and/or modify +% it under the terms of the GNU General Public License as published by +% the Free Software Foundation; either version 2 of the License, or +% (at your option) any later version. +% +% This program is distributed in the hope that it will be useful, +% but WITHOUT ANY WARRANTY; without even the implied warranty of +% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +% GNU General Public License for more details. +% +% You should have received a copy of the GNU General Public License +% along with this program; If not, see . + +% Copyright (C) 2000-2002,2010,2019 by Alois Schloegl +% This function is part of the NaN-toolbox for Octave and Matlab +% http://pub.ist.ac.at/~schloegl/matlab/NaN/ + +if nargin==1, + DIM = find(size(i)>1,1); + if isempty(DIM), DIM=1; end; +end; + +[S,N] = sumskipnan(i,DIM); % sum +i = i - repmat(S./N,size(i)./size(S)); % remove mean +[S,N] = sumskipnan(abs(i),DIM); % + +%if flag_implicit_unbiased_estim; %% ------- unbiased estimates ----------- + n1 = max(N-1,0); % in case of n=0 and n=1, the (biased) variance, STD and STE are INF +%else +% n1 = N; +%end; + +R = S./n1; + + diff --git a/inst/meansq.m b/inst/meansq.m new file mode 100644 index 0000000..67269bf --- /dev/null +++ b/inst/meansq.m @@ -0,0 +1,53 @@ +function o=meansq(x,DIM,W) +% MEANSQ calculates the mean of the squares +% +% y = meansq(x,DIM,W) +% +% DIM dimension +% 1 STD of columns +% 2 STD of rows +% N STD of N-th dimension +% default or []: first DIMENSION, with more than 1 element +% W weights to compute weighted mean (default: []) +% if W=[], all weights are 1. +% number of elements in W must match size(x,DIM) +% +% features: +% - can deal with NaN's (missing values) +% - weighting of data +% - dimension argument also in Octave +% - compatible to Matlab and Octave +% +% see also: SUMSQ, SUMSKIPNAN, MEAN, VAR, STD, RMS + +% This program is free software; you can redistribute it and/or modify +% it under the terms of the GNU General Public License as published by +% the Free Software Foundation; either version 2 of the License, or +% (at your option) any later version. +% +% This program is distributed in the hope that it will be useful, +% but WITHOUT ANY WARRANTY; without even the implied warranty of +% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +% GNU General Public License for more details. +% +% You should have received a copy of the GNU General Public License +% along with this program; If not, see . + +% Copyright (C) 2000-2003,2009 by Alois Schloegl +% $Id$ +% This function is part of the NaN-toolbox for Octave and Matlab +% http://pub.ist.ac.at/~schloegl/matlab/NaN/ + + +if nargin<3, + W = []; +end; +if nargin<2, + [o,N,ssq] = sumskipnan(x,[],W); +else + [o,N,ssq] = sumskipnan(x,DIM,W); +end; + +o = ssq./N; + + diff --git a/inst/medAbsDev.m b/inst/medAbsDev.m new file mode 100644 index 0000000..ba7a1b7 --- /dev/null +++ b/inst/medAbsDev.m @@ -0,0 +1,44 @@ +function [D, M] = medAbsDev(X, DIM) +% medAbsDev calculates the median absolute deviation +% +% Usage: D = medAbsDev(X, DIM) +% or: [D, M] = medAbsDev(X, DIM) +% Input: X : data +% DIM: dimension along which mad should be calculated (1=columns, 2=rows) +% (optional, default=first dimension with more than 1 element +% Output: D : median absolute deviations +% M : medians (optional) + + +% Copyright (C) 2003 Patrick Houweling % Copyright (C) 2009 Alois Schloegl +% $Id$ +% This function is part of the NaN-toolbox +% http://pub.ist.ac.at/~schloegl/matlab/NaN/ + +% This program is free software: you can redistribute it and/or modify +% it under the terms of the GNU General Public License as published by +% the Free Software Foundation, either version 3 of the License, or +% (at your option) any later version. +% +% This program is distributed in the hope that it will be useful, +% but WITHOUT ANY WARRANTY; without even the implied warranty of +% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +% GNU General Public License for more details. +% +% You should have received a copy of the GNU General Public License +% along with this program. If not, see . + + +% input checks +if any(size(X)==0), + return; +end; + +if nargin<2, + M = median(X); +else + M = median(X, DIM); +end; + +% median absolute deviation: median of absolute deviations to median +D = median(abs(X - repmat(M, size(X)./size(M))), DIM); \ No newline at end of file diff --git a/inst/median.m b/inst/median.m new file mode 100644 index 0000000..ab1f5f5 --- /dev/null +++ b/inst/median.m @@ -0,0 +1,94 @@ +function [y]=median(x,DIM) +% MEDIAN data elements, +% [y]=median(x [,DIM]) +% +% DIM dimension +% 1: median of columns +% 2: median of rows +% N: median of N-th dimension +% default or []: first DIMENSION, with more than 1 element +% +% features: +% - can deal with NaN's (missing values) +% - accepts dimension argument like in Matlab in Octave, too. +% - compatible to Matlab and Octave +% +% see also: SUMSKIPNAN + +% This program is free software; you can redistribute it and/or modify +% it under the terms of the GNU General Public License as published by +% the Free Software Foundation; either version 2 of the License, or +% (at your option) any later version. +% +% This program is distributed in the hope that it will be useful, +% but WITHOUT ANY WARRANTY; without even the implied warranty of +% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +% GNU General Public License for more details. +% +% You should have received a copy of the GNU General Public License +% along with this program; If not, see . + +% $Id$ +% Copyright (C) 2000-2003,2009 by Alois Schloegl +% This function is part of the NaN-toolbox +% http://pub.ist.ac.at/~schloegl/matlab/NaN/ + +global FLAG_NANS_OCCURED; + +% check dimension of x +sz=size(x); + +% find the dimension for median +if nargin<2, + DIM=min(find(sz>1)); + if isempty(DIM), DIM=1; end; +end; + +if DIM>length(sz), + sz = [sz,ones(1,DIM-length(sz))]; +end; + +D1 = prod(sz(1:DIM-1)); +D2 = sz(DIM); +D3 = prod(sz(DIM+1:length(sz))); +D0 = [sz(1:DIM-1),1,sz(DIM+1:length(sz))]; +y = repmat(nan,D0); +flag_MexKthElement = exist('kth_element','file')==3; + +for k = 0:D1-1, +for l = 0:D3-1, + xi = k + l * D1*sz(DIM) + 1 ; + xo = k + l * D1 + 1; + t = x(xi+(0:sz(DIM)-1)*D1); + t = t(~isnan(t)); + n = length(t); + + if n==0, + y(xo) = nan; + elseif flag_MexKthElement, + if (D1==1) t = t+0.0; end; % make sure a real copy (not just a reference to x) is used + flag_KthE = 0; % fast kth_element can be used, because t does not contain any NaN and there is need to care about in-place sorting + if ~rem(n,2), + y(xo) = sum( kth_element( double(t), n/2 + [0,1], flag_KthE) ) / 2; + elseif rem(n,2), + y(xo) = kth_element(double(t), (n+1)/2, flag_KthE); + end; + else + t = sort(t); + if ~rem(n,2), + y(xo) = (t(n/2) + t(n/2+1)) / 2; + elseif rem(n,2), + y(xo) = t((n+1)/2); + end; + end + + if (n +% This functions is part of the NaN-toolbox +% http://pub.ist.ac.at/~schloegl/matlab/NaN/ + +% This program is free software; you can redistribute it and/or modify +% it under the terms of the GNU General Public License as published by +% the Free Software Foundation; either version 3 of the License, or +% (at your option) any later version. +% +% This program is distributed in the hope that it will be useful, +% but WITHOUT ANY WARRANTY; without even the implied warranty of +% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +% GNU General Public License for more details. +% +% You should have received a copy of the GNU General Public License +% along with this program; If not, see . + +if nargin==2, + DIM=[]; + opt=[]; +elseif nargin==3, + DIM=[]; +elseif nargin==4, + +else + fprintf('Error MOMENT: invalid number of arguments\n'); + return; +end; + +if p<=0; + fprintf('Error MOMENT: invalid model order p=%f\n',p); + return; +end; + +if isnumeric(opt) || ~isnumeric(DIM), + tmp = DIM; + DIM = opt; + opt = tmp; +end; +if isempty(opt), + opt='r'; +end; +if isempty(DIM), + DIM = find(size(i)>1,1); + if isempty(DIM), DIM=1; end; +end; + +N = nan; +if isstruct(i), + if isfield(i,'HISTOGRAM'), + sz = size(i.H)./size(i.X); + X = repmat(i.X,sz); + if any(opt=='c'), + N = sumskipnan(i.H,1); % N + N = max(N-1,0); % for unbiased estimation + S = sumskipnan(i.H.*X,1); % sum + X = X - repmat(S./N, size(X)./size(S)); % remove mean + end; + if any(opt=='a'), + X = abs(X); + end; + [M,n] = sumskipnan(X.^p.*i.H,1); + else + warning('invalid datatype') + end; +else + if any(opt=='c'), + [S,N] = sumskipnan(i,DIM); % gemerate N and SUM + N = max(N-1,0); % for unbiased estimation + i = i - repmat(S./N, size(i)./size(S)); % remove mean + end; + if any(opt=='a'), + i = abs(i); + end; + [M,n] = sumskipnan(i.^p,DIM); +end; + +if isnan(N), N=n; end; +M = M./N; diff --git a/inst/nanconv.m b/inst/nanconv.m new file mode 100644 index 0000000..39d46b0 --- /dev/null +++ b/inst/nanconv.m @@ -0,0 +1,59 @@ +function [C,N,c] = nanconv(X,Y,arg3) +% NANCONV computes the convolution for data with missing values. +% X and Y can contain missing values encoded with NaN. +% NaN's are skipped, NaN do not result in a NaN output. +% The output gives NaN only if there are insufficient input data +% +% [...] = NANCONV(X,Y); +% calculates 2-dim convolution between X and Y +% [C] = NANCONV(X,Y); +% +% WARNING: missing values can introduce aliasing - causing unintended results. +% Moreover, the behavior of bandpass and highpass filters in case of missing values +% is not fully understood, and might contain some pitfalls. +% +% see also: CONV, NANCONV2, NANFFT, NANFILTER + +% $Id: conv2nan.m 6973 2010-02-28 20:19:12Z schloegl $ +% Copyright (C) 2000-2005,2010,2011 by Alois Schloegl +% This function is part of the NaN-toolbox +% http://pub.ist.ac.at/~schloegl/matlab/NaN/ and +% http://octave.svn.sourceforge.net/viewvc/octave/trunk/octave-forge/extra/NaN/inst/ + +% This program is free software; you can redistribute it and/or modify +% it under the terms of the GNU General Public License as published by +% the Free Software Foundation; either version 3 of the License, or +% (at your option) any later version. +% +% This program is distributed in the hope that it will be useful, +% but WITHOUT ANY WARRANTY; without even the implied warranty of +% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +% GNU General Public License for more details. +% +% You should have received a copy of the GNU General Public License +% along with this program; If not, see . + + +warning('NANCONV is experimental. For more details see HELP NANCONV'); + + +if nargin~=2, + fprintf(2,'Error NANCONV2: incorrect number of input arguments\n'); +end; + +m = isnan(X); +n = isnan(Y); + +X(m) = 0; +Y(n) = 0; + +C = conv(X,Y); % 2-dim convolution +N = conv(real(~m),real(~n)); % normalization term +c = conv(ones(size(X)),ones(size(Y))); % correction of normalization + +if nargout==1, + C = C.*c./N; +elseif nargout==2, + N = N./c; +end; + diff --git a/inst/nanfft.m b/inst/nanfft.m new file mode 100644 index 0000000..220d163 --- /dev/null +++ b/inst/nanfft.m @@ -0,0 +1,58 @@ +function [Y,N,N2] = nanfft(X,N,DIM); +% NANFFT calculates the Fourier-Transform of X for data with missing values. +% NANFFT is the same as FFT but X can contain missing values encoded with NaN. +% NaN's are skipped, NaN do not result in a NaN output. +% +% Y = NANFFT(X) +% Y = NANFFT(X,N) +% Y = NANFFT(X,[],DIM) +% +% [Y,N] = NANFFT(...) +% returns the number of valid samples N +% +% +% WARNING: missing values can introduce aliasing - causing unintended results. +% Moreover, the behavior of bandpass and highpass filters in case of missing values +% is not fully understood, and might contain some pitfalls. +% +% see also: FFT, XCORR, NANCONV, NANFILTER + +% $Id$ +% Copyright (C) 2005,2011 by Alois Schloegl +% This function is part of the NaN-toolbox available at +% http://pub.ist.ac.at/~schloegl/matlab/NaN/ and +% http://octave.svn.sourceforge.net/viewvc/octave/trunk/octave-forge/extra/NaN/inst/ + +% This program is free software; you can redistribute it and/or modify +% it under the terms of the GNU General Public License as published by +% the Free Software Foundation; either version 3 of the License, or +% (at your option) any later version. +% +% This program is distributed in the hope that it will be useful, +% but WITHOUT ANY WARRANTY; without even the implied warranty of +% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +% GNU General Public License for more details. +% +% You should have received a copy of the GNU General Public License +% along with this program; if not, write to the Free Software +% Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + +warning('NANFFT is experimental. For more details see HELP NANFFT'); + +NX = isnan(X); +X(NX) = 0; + +if nargin==1, + Y = fft(X); + N2 = sum(1-NX); % + N = fft(NX); +elseif nargin==2, + Y = fft(X,N); + N2 = sum(1-NX); + N = fft(NX); +elseif nargin==3, + Y = fft(X,N,DIM); + N2 = sum(1-NX,DIM); % + N = fft(NX,N,DIM); +end; + diff --git a/inst/nanfilter.m b/inst/nanfilter.m new file mode 100644 index 0000000..19e8077 --- /dev/null +++ b/inst/nanfilter.m @@ -0,0 +1,62 @@ +function [Y,Z] = nanfilter(B,A,X,z); +% NANFILTER is able to filter data with missing values encoded as NaN. +% +% [Y,Z] = nanfilter(B,A,X [, Z]); +% +% If X contains no missing data, NANFILTER should behave like FILTER. +% NaN-values are handled gracefully. +% +% WARNING: missing values can introduce aliasing - causing unintended results. +% Moreover, the behavior of bandpass and highpass filters in case of missing values +% is not fully understood, and might contain some pitfalls. +% +% see also: FILTER, SUMSKIPNAN, NANFFT, NANCONV, NANFILTER1UC + +% $Id$ +% Copyright (C) 2005,2011 by Alois Schloegl +% This function is part of the NaN-toolbox available at +% http://pub.ist.ac.at/~schloegl/matlab/NaN/ and +% http://octave.svn.sourceforge.net/viewvc/octave/trunk/octave-forge/extra/NaN/inst/ + +% This program is free software; you can redistribute it and/or modify +% it under the terms of the GNU General Public License as published by +% the Free Software Foundation; either version 3 of the License, or +% (at your option) any later version. +% +% This program is distributed in the hope that it will be useful, +% but WITHOUT ANY WARRANTY; without even the implied warranty of +% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +% GNU General Public License for more details. +% +% You should have received a copy of the GNU General Public License +% along with this program; if not, write to the Free Software +% Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + + +warning('NANFILTER is experimental. For more details see HELP NANFILTER'); + +na = length(A); +nb = length(B); +if any(size(X)==1) + nc = 1; +else + nc = size(X,2); +end; + +if nargin<4, + [t,Z.S] = filter(B,A,zeros(na+nb,nc)); + [t,Z.N] = filter(B,A,zeros(na+nb,nc)); +elseif isnumeric(z), + Z.S = z; + [t, Z.N] = filter(B, A, zeros(na+nb,nc)); +elseif isstruct(z), + Z = z; +end; + +NX = isnan(X); +X(NX) = 0; + +[Y , Z.S] = filter(B, A, X, Z.S); +[NY, Z.N] = filter(B, A, ~NX, Z.N); +Y = (sum(B)/sum(A)) * Y./NY; + diff --git a/inst/nanfilter1uc.m b/inst/nanfilter1uc.m new file mode 100644 index 0000000..181bef7 --- /dev/null +++ b/inst/nanfilter1uc.m @@ -0,0 +1,54 @@ +function [x,z] = nanfilter1uc(uc,x,z); +% NANFILTER1UC is an adaptive filter for data with missing values encoded as NaN. +% +% [Y,Z] = nanfilter1uc(uc,X [, Z]); +% +% if X contains no missing data, NANFILTER behaves like FILTER(uc,[1,uc-1],X[,Z]). +% +% see also: FILTER, NANFILTER, SUMSKIPNAN + +% $Id$ +% Copyright (C) 2010,2011 by Alois Schloegl +% This function is part of the NaN-toolbox available at +% http://pub.ist.ac.at/~schloegl/matlab/NaN/ and +% http://octave.svn.sourceforge.net/viewvc/octave/trunk/octave-forge/extra/NaN/inst/ + +% This program is free software; you can redistribute it and/or modify +% it under the terms of the GNU General Public License as published by +% the Free Software Foundation; either version 3 of the License, or +% (at your option) any later version. +% +% This program is distributed in the hope that it will be useful, +% but WITHOUT ANY WARRANTY; without even the implied warranty of +% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +% GNU General Public License for more details. +% +% You should have received a copy of the GNU General Public License +% along with this program; if not, write to the Free Software +% Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + + +na = 2; %length(A); +nb = 2; %length(B); +if any(size(x)==1) + nc = 1; +else + nc = size(x,2); +end; + +acN = zeros(1,nc); +if nargin<3, + z = zeros(1,nc); +end; +acc = NaN(1,nc); +for k = 1:size(x,1), + ix = isnan(x(k,:)); + acN = acN.*ix+1; + UC1 = ((1-uc).^acN); + acc(~ix) = (1-UC1(~ix)) .* x(k,~ix) + z(~ix); % / A{1}; + ix = isnan(acc); + acc(ix) = x(k,ix); + z = (1-uc) * acc; + x(k,:) = acc; +end; + diff --git a/inst/naninsttest.m b/inst/naninsttest.m new file mode 100644 index 0000000..2270ad8 --- /dev/null +++ b/inst/naninsttest.m @@ -0,0 +1,186 @@ +% NANINSTTEST checks whether the functions from NaN-toolbox have been +% correctly installed. +% +% see also: NANTEST + +% $Id$ +% Copyright (C) 2000-2003 by Alois Schloegl +% This script is part of the NaN-toolbox +% http://pub.ist.ac.at/~schloegl/matlab/NaN/ + +% This program is free software; you can redistribute it and/or modify +% it under the terms of the GNU General Public License as published by +% the Free Software Foundation; either version 3 of the License, or +% (at your option) any later version. +% +% This program is distributed in the hope that it will be useful, +% but WITHOUT ANY WARRANTY; without even the implied warranty of +% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +% GNU General Public License for more details. +% +% You should have received a copy of the GNU General Public License +% along with this program; If not, see . + + +r = zeros(42,2); + +x = [5,NaN,0,1,nan,-1:.4:1]; + +% run test, k=1: with NaNs, k=2: all NaN's are removed +% the result of both should be the same. + +%FLAG_WARNING = warning; +warning('off'); + +funlist = {'sumskipnan','mean','std','var','skewness','kurtosis','sem','median','mad','zscore','coefficient_of_variation','geomean','harmmean','meansq','moment','rms','','corrcoef','rankcorr','spearman','ranks','center','trimean','min','max','tpdf','tcdf','tinv','normpdf','normcdf','norminv','nansum','nanstd','histo_mex','sumskipnan_mex','covm_mex','svmtrain_mex','train','ttest','betapdf','betacdf','betainv','','','',''}; +for k=1:2, + if k==2, x(isnan(x))=[]; end; + r(1,k) =sumskipnan(x(1)); + r(2,k) =mean(x); + r(3,k) =std(x); + r(4,k) =var(x); + r(5,k) = skewness(x); + r(6,k) =kurtosis(x); + r(7,k) =sem(x); + r(8,k) =median(x); + r(9,k) =mad(x); + tmp = zscore(x); + r(10,k)=tmp(1); + if exist('coefficient_of_variation','file'), + r(11,k)=coefficient_of_variation(x); + end; + r(12,k)=geomean(x); + r(13,k)=harmmean(x); + if exist('meansq','file'), + r(14,k)=meansq(x); + end; + if exist('moment','file'), + r(15,k)=moment(x,6); + end; + if exist('rms','file'), + r(16,k)=rms(x); + end; + % r(17,k) is currently empty. + tmp=corrcoef(x',(1:length(x))'); + r(18,k)=any(isnan(tmp(:))); + if exist('rankcorr','file'), + tmp=rankcorr(x',(1:length(x))'); + r(19,k)=any(isnan(tmp(:))); + end; + if exist('spearman','file'), + tmp=spearman(x',(1:length(x))'); + r(20,k)=any(isnan(tmp(:))); + end; + if exist('ranks','file'), + r(21,k)=any(isnan(ranks(x')))+k; + end; + if exist('center','file'), + tmp=center(x); + r(22,k)=tmp(1); + end; + if exist('trimean','file'), + r(23,k)=trimean(x); + end; + r(24,k)=min(x); + r(25,k)=max(x); + + r(26,k) = k+isnan(tpdf(x(2),4)); + + try + r(27,k) = k*(~isnan(tcdf(nan,4))); + catch + r(27,k) = k; + end; + + r(28,k) = k*(~isnan(tinv(NaN,4))); + + if exist('normpdf','file'), + fun='normpdf'; + elseif exist('normal_pdf','file'), + fun='normal_pdf'; + end; + r(29,k) = (feval(fun,k,k,0)~=Inf)*k; + if exist('normcdf','file'), + fun='normcdf'; + elseif exist('normal_cdf','file'), + fun='normal_cdf'; + end; + r(30,k) = feval(fun,4,4,0); + if exist('norminv','file'), + fun='norminv'; + elseif exist('normal_inv','file'), + fun='normal_inv'; + end; + r(31,k) = k*any(isnan(feval(fun,[0,1],4,0))); + if exist('nansum','file'), + r(32,k)=k*isnan(nansum(nan)); + end; + if exist('nanstd','file'), + r(33,k)=k*(~isnan(nanstd(0))); + end; + + try + histo_mex([1:5]'); + r(34,k)=0; + catch; + r(34,k)=k; + end; + try + sumskipnan_mex([1:5]'); + r(35,k)=0; + catch; + r(35,k)=k; + end; + try + covm_mex([1:5]'); + r(36,k)=0; + catch; + r(36,k)=k; + end; + if ~exist('svmtrain_mex','file'), + r(37,k)=k; + end; + if ~exist('train','file'), + r(38,k)=k; + end; + + if exist('ttest','file'), + r(39,k)=ttest([x,x,x],10); + end; + + if exist('betapdf','file'), + r(40,k)=sum(~isnan(betapdf([x,x,x],10,5))); + end; + if exist('betacdf','file'), + r(41,k)=sum(~isnan(betapdf([x,x,x],10,5))); + end; + if exist('betainv','file'), + r(42,k)=sum(~isnan(betainv([x,x,x],10,5))); + end; +end; + +% check if result is correct +tmp = abs(r(:,1)-r(:,2)) +% This is part of the NaN-toolbox. For more details see +% http://pub.ist.ac.at/~schloegl/matlab/NaN/ +% +% This program is free software; you can redistribute it and/or modify +% it under the terms of the GNU General Public License as published by +% the Free Software Foundation; either version 3 of the License, or +% (at your option) any later version. +% +% This program is distributed in the hope that it will be useful, +% but WITHOUT ANY WARRANTY; without even the implied warranty of +% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +% GNU General Public License for more details. +% +% You should have received a copy of the GNU General Public License +% along with this program; If not, see . + + +if nargin>1 + [o,n] = sumskipnan(i,DIM); +else + [o,n] = sumskipnan(i); +end; +o=o./n; diff --git a/inst/nanstd.m b/inst/nanstd.m new file mode 100644 index 0000000..5c1d996 --- /dev/null +++ b/inst/nanstd.m @@ -0,0 +1,71 @@ +function [y] = nanstd(x,FLAG,DIM) +% NANSTD same as STD but ignores NaN's. +% NANSTD is OBSOLETE; use NaN/STD instead. NANSTD is included +% to fix a bug in alternative implementations and to +% provide some compatibility. +% +% Y = nanstd(x, FLAG, [,DIM]) +% +% x data +% FLAG 0: [default] normalizes with (N-1), N = sample size +% FLAG 1: normalizes with N, N = sample size +% DIM dimension +% 1 sum of columns +% 2 sum of rows +% default or []: first DIMENSION with more than 1 element +% Y resulting standard deviation +% +% see also: SUM, SUMSKIPNAN, NANSUM, STD + +% $Id$ +% Copyright (C) 2000-2003,2006,2008,2009,2010 by Alois Schloegl +% This is part of the NaN-toolbox. For more details see +% http://pub.ist.ac.at/~schloegl/matlab/NaN/ + +% This program is free software; you can redistribute it and/or modify +% it under the terms of the GNU General Public License as published by +% the Free Software Foundation; either version 3 of the License, or +% (at your option) any later version. +% +% This program is distributed in the hope that it will be useful, +% but WITHOUT ANY WARRANTY; without even the implied warranty of +% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +% GNU General Public License for more details. +% +% You should have received a copy of the GNU General Public License +% along with this program; If not, see . + +if nargin<2, + FLAG = 0; +end; + +if nargin<3, + DIM = []; +end; +if isempty(FLAG), + FLAG = 0; +end; +if isempty(DIM), + DIM = find(size(x)>1,1); + if isempty(DIM), DIM=1; end; +end; + +[y,n,ssq] = sumskipnan(x,DIM); +if all(ssq(:).*n(:) > 2*(y(:).^2)), + %% rounding error is neglectable + y = ssq - y.*y./n; +else + %% rounding error is not neglectable + [y,n] = sumskipnan(center(x,DIM).^2,DIM); +end; + +if (FLAG==1) + y = sqrt(y./n); % normalize with N +else + % default method + y = sqrt(y./max(n-1,0)); % normalize with N-1 +end; + + +%!assert(nanstd(0),NaN) + diff --git a/inst/nansum.m b/inst/nansum.m new file mode 100644 index 0000000..ce73c47 --- /dev/null +++ b/inst/nansum.m @@ -0,0 +1,43 @@ +function [o] = nansum(i,DIM) +% NANSUM same as SUM but ignores NaN's. +% NANSUM is OBSOLETE; use SUMSKIPNAN instead. NANSUM is included +% to fix a bug in some other versions. +% +% Y = nansum(x [,DIM]) +% +% DIM dimension +% 1 sum of columns +% 2 sum of rows +% default or []: first DIMENSION with more than 1 element +% Y resulting sum +% +% +% see also: SUM, SUMSKIPNAN, NANSUM + +% $Id$ +% Copyright (C) 2000-2003,2008 by Alois Schloegl +% This is part of the NaN-toolbox. For more details see +% http://pub.ist.ac.at/~schloegl/matlab/NaN/ +% +% This program is free software; you can redistribute it and/or modify +% it under the terms of the GNU General Public License as published by +% the Free Software Foundation; either version 3 of the License, or +% (at your option) any later version. +% +% This program is distributed in the hope that it will be useful, +% but WITHOUT ANY WARRANTY; without even the implied warranty of +% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +% GNU General Public License for more details. +% +% You should have received a copy of the GNU General Public License +% along with this program; If not, see . + + +if nargin>1 + o = sumskipnan(i,DIM); +else + o = sumskipnan(i); +end; + +%!assert(nansum(NaN),0) + diff --git a/inst/nantest.m b/inst/nantest.m new file mode 100644 index 0000000..feb3352 --- /dev/null +++ b/inst/nantest.m @@ -0,0 +1,308 @@ +% NANTEST checks several mathematical operations and a few +% statistical functions for their correctness related to NaN's. +% e.g. it checks norminv, normcdf, normpdf, sort, matrix division and multiplication. +% +% +% see also: NANINSTTEST +% +% REFERENCE(S): +% [1] W. Kahan (1996) Lecture notes on the Status of "IEEE Standard 754 for +% Binary Floating-point Arithmetic. +% + + +% This program is free software; you can redistribute it and/or modify +% it under the terms of the GNU General Public License as published by +% the Free Software Foundation; either version 2 of the License, or +% (at your option) any later version. +% +% This program is distributed in the hope that it will be useful, +% but WITHOUT ANY WARRANTY; without even the implied warranty of +% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +% GNU General Public License for more details. +% +% You should have received a copy of the GNU General Public License +% along with this program; If not, see . + +% $Id$ +% Copyright (C) 2000-2004,2009 by Alois Schloegl +% This script is part of the NaN-toolbox +% http://pub.ist.ac.at/~schloegl/matlab/NaN/ + +%FLAG_WARNING = warning; +%warning('off'); + +try + x = randn([3,4,5]); + x(~isnan(x)) = 0; +catch + fprintf(1,'WARNING: NANTEST fails for 3-DIM matrices. \n'); +end; +try + [s,n] = sumskipnan([nan,1,4,5]); +catch + fprintf(1,'WARNING: SUMSKIPNAN is not avaible. \n'); +end; + +% check NORMPDF, NORMCDF, NORMINV +x = [-inf,-2,-1,-.5,0,.5,1,2,3,inf,nan]'; +if exist('normpdf','file')==2, + q(1) = sum(isnan(normpdf(x,2,0)))>sum(isnan(x)); + if q(1), + fprintf(1,'NORMPDF cannot handle v=0.\n'); + fprintf(1,'-> NORMPDF should be replaced\n'); + end; +end; + +if exist('normcdf','file')==2, + q(2) = sum(isnan(normcdf(x,2,0)))>sum(isnan(x)); + if q(2), + fprintf(1,'NORMCDF cannot handle v=0.\n'); + fprintf(1,'-> NORMCDF should be replaced\n'); + end; +end; + +if ~(any(exist('erfinv') == [2,5])) + fprintf(1,'ERFINV is not available\n'); + +elseif exist('norminv','file')==2, + p = [-inf,-.2,0,.2,.5,1,2,inf,nan]; + q(3) = sum(~isnan(norminv(p,2,0)))<4; + if q(3), + fprintf(1,'NORMINV cannot handle correctly v=0.\n'); + fprintf(1,'-> NORMINV should be replaced\n'); + end; + q(4) = ~isnan(norminv(0,NaN,0)); + q(5) = any(norminv(0.5,[1 2 3],0)~=(1:3)); +end; + +if exist('tpdf','file')==2, + q(6) = ~isnan(tpdf(nan,4)); + if q(6), + fprintf(1,'TPDF(NaN,4) does not return NaN\n'); + fprintf(1,'-> TPDF should be replaced\n'); + end; +end; + +if exist('tcdf','file')==2, + try + q(7) = ~isnan(tcdf(nan,4)); + catch + q(7) = 1; + end; + if q(7), + fprintf(1,'TCDF(NaN,4) does not return NaN\n'); + fprintf(1,'-> TCDF should be replaced\n'); + end; +end; + +if exist('tinv','file')==2, + try + q(8) = ~isnan(tinv(nan,4)); + catch + q(8) = 1; + end; + if q(8), + fprintf(1,'TINV(NaN,4) does not return NaN\n'); + fprintf(1,'-> TINV should be replaced\n'); + end; +end; + +q(9) = isreal(double(2+3i)); +if q(9) + printf('DOUBLE rejects imaginary part\n-> this can affect SUMSKIPNAN\n'); +end; + +try + x = reshape(1:6,3,2); + [cc,nn] = covm(x+i*x,'e'); + q(10) = 0; +catch + q(10) = 1; +end; + +if 0, +%%%%% MOD +if exist('mod')>1, + if (mod(5,0))~=0, + fprintf(1,'WARNING: MOD(x,0) does not return 0.\n'); + end; + if isnan(mod(5,0)), + fprintf(1,'WARNING: MOD(x,0) returns NaN.\n'); + end; + if isnan(mod(5,inf)), + fprintf(1,'WARNING: MOD(x,INF) returns NaN.\n'); + end; +end; +%%%%% REM +if exist('rem')>1, + if (rem(5,0))~=0, + fprintf(1,'WARNING: REM(x,0) does not return 0.\n'); + end; + if isnan(rem(5,0)), + fprintf(1,'WARNING: REM(x,0) returns NaN.\n'); + end; + if isnan(mod(5,inf)), + fprintf(1,'WARNING: REM(x,INF) returns NaN.\n'); + end; +end; +end; + +%%%%% NANSUM(NAN) - this test addresses a problem in Matlab 5.3, 6.1 & 6.5 +if exist('nansum','file'), + if isnan(nansum(nan)), + fprintf(1,'Warning: NANSUM(NaN) returns NaN instead of 0\n'); + fprintf(1,'-> NANSUM should be replaced\n'); + end; +end; +%%%%% NANSUM(NAN) - this test addresses a problem in Matlab 5.3, 6.1 & 6.5 +if exist('nanstd','file'), + if ~isnan(nanstd(0)), + fprintf(1,'Warning: NANSTD(x) with isscalar(x) returns 0 instead of NaN\n'); + fprintf(1,'-> NANSTD should be replaced\n'); + end; +end; +%%%%% GEOMEAN - this test addresses a problem in Octave +if exist('geomean','file'), + if isnan(geomean((0:3)')), + fprintf(1,'Warning: GEOMEAN([0,1,2,3]) NaN instead of 0\n'); + fprintf(1,'-> GEOMEAN should be replaced\n'); + end; +end; +%%%%% HARMMEAN - this test addresses a problem in Octave +if exist('harmmean','file'), + if isnan(harmmean(0:3)), + fprintf(1,'Warning: HARMMEAN([0,1,2,3]) NaN instead of 0\n'); + fprintf(1,'-> HARMMEAN should be replaced\n'); + end; +end; +%%%%% BITAND - this test addresses a problem in Octave +if exist('bitand')>1, + if isnan(bitand(2^33-1,13)), + fprintf(1,'BITAND can return NaN. \n'); + end; +end; +%%%%% BITSHIFT - this test addresses a problem in Octave +if exist('bitshift','file'), + if isnan(bitshift(5,30,32)), + fprintf(1,'BITSHIFT can return NaN.\n'); + end; +end; +%%%%% ALL - this test addresses a problem in some old Octave and FreeMat v3.5 +if any(NaN)==1, + fprintf(1,'WARNING: ANY(NaN) returns 1 instead of 0\n'); +end; +if any([])==1, + fprintf(1,'WARNING: ANY([]) returns 1 instead of 0\n'); +end; +%%%%% ALL - this test addresses a problem in some old Octave and FreeMat v3.5 +if all(NaN)==0, + fprintf(1,'WARNING: ALL(NaN) returns 0 instead of 1\n'); +end; +if all([])==0, + fprintf(1,'WARNING: ALL([]) returns 0 instead of 1\n'); +end; + +%%%%% SORT - this was once a problem in Octave Version < 2.1.36, and still is in FreeMat 4.0 %%%% +if ~all(isnan(sort([3,4,NaN,3,4,NaN]))==[0,0,0,0,1,1]), + warning('Warning: SORT does not properly handle NaN.'); +end; + +%%%%% commutativity of 0*NaN %%% This test adresses a problem in Octave +x=[-2:2;4:8]'; +y=x;y(2,1)=nan;y(4,2)=nan; +B=[1,0,2;0,3,1]; +if ~all(all(isnan(y*B)==isnan(B'*y')')), + fprintf(2,'WARNING: 0*NaN within matrix multiplication is not commutative\n'); +end; + +% from Kahan (1996) +tmp = (0-3*i)/inf; +if isnan(tmp) + fprintf(2,'WARNING: (0-3*i)/inf results in NaN instead of 0.\n'); +end; + +%(roots([5,0,0])-[0;0]) +%(roots([2,-10,12])-[3;2]) +%(roots([2e-37,-2,2])-[1e37;1]) +%%%%% check nan/nan %% this test addresses a problem in Matlab 5.3, 6.1 & 6.5 +p = 4; +tmp1 = repmat(nan, 4); +tmp2 = repmat(nan, 4); +if ispc + % Octave 4.0.0 on Windows crashes, therefore the test is disabled + warning('mrdivide (repmat(nan,4), repmat(nan,4)) and mldivide (repmat(nan,4), repmat(nan,4)) not tested because it might crash Octave on Windows.\n'); +else +try + tmp1 = repmat(nan,p) / repmat(nan,p); +catch % exception error in Octave 3.8.2 and later of debian wheezy + fprintf(2,'mrdivide (repmat(nan,4), repmat(nan,4)) fails with an exception\n'); +end; +try + tmp2 = repmat(nan,p) \ repmat(nan,p); +catch % exception error in Octave 3.8.2 and later of debian wheezy + fprintf(2,'mldivide (repmat(nan,4), repmat(nan,4)) fails with an exception\n'); +end +end; +tmp3 = repmat(0,p)/repmat(0,p); +tmp4 = repmat(0,p)\repmat(0,p); +tmp5 = repmat(0,p)*repmat(inf,p); +tmp6 = repmat(inf,p)*repmat(0,p); +x = randn(100,1)*ones(1,p); y=x'*x; +tmp7 = y/y; +tmp8 = y\y; + +if ~all(isnan(tmp1(:))), + fprintf(1,'WARNING: matrix division NaN/NaN does not result in NaN\n'); +end; +if ~all(isnan(tmp2(:))), + fprintf(1,'WARNING: matrix division NaN\\NaN does not result in NaN\n'); +end; +if ~all(isnan(tmp3(:))), + fprintf(2,'WARNING: matrix division 0/0 does not result in NaN\n'); +end; +if ~all(isnan(tmp4(:))), + fprintf(2,'WARNING: matrix division 0\\0 does not result in NaN\n'); +end; +if ~all(isnan(tmp5(:))), + fprintf(2,'WARNING: matrix multiplication 0*inf does not result in NaN\n'); +end; +if ~all(isnan(tmp6(:))), + fprintf(2,'WARNING: matrix multiplication inf*0 does not result in NaN\n'); +end; +if any(any(tmp7==inf)); + fprintf(2,'WARNING: right division of two singulare matrices return INF\n'); +end; +if any(any(tmp8==inf)); + fprintf(2,'WARNING: left division of two singulare matrices return INF\n'); +end; + +tmp = [tmp1;tmp2;tmp3;tmp4;tmp5;tmp6;tmp7;tmp8]; + + + +%warning(FLAG_WARNING); + + +%%%%% QUANTILE TEST +d = [1 1 2 2 4 4 10 700]'; +q = [-1,0,.05,.1,.25,.49,.5,.51,.75,.8, .999999,1,2]; +r = [ NaN, 1, 1, 1, 1.5, 2, 3, 4, 7, 10, 700, 700, NaN]; +if any( quantile(d, q)' - r>0) + fprintf(1,'Quantile(1): failed\n'); +else + fprintf(1,'Quantile(1): OK\n'); +end; +if exist('histo3','file') + H = histo3(d); +else + H.X = [1;2;4;10;700]; + H.H = [2;2;2;1;1]; + H.datatype = 'HISTOGRAM'; +end; +if any( quantile(H, q)' - r>0) + fprintf(1,'Quantile(2): failed\n'); +else + fprintf(1,'Quantile(2): OK\n'); +end; + diff --git a/inst/normcdf.m b/inst/normcdf.m new file mode 100644 index 0000000..aea377a --- /dev/null +++ b/inst/normcdf.m @@ -0,0 +1,60 @@ +function p = normcdf(x,m,s) +% NORMCDF returns normal cumulative distribtion function +% +% cdf = normcdf(x,m,s); +% +% Computes the CDF of a the normal distribution +% with mean m and standard deviation s +% default: m=0; s=1; +% x,m,s must be matrices of same size, or any one can be a scalar. +% +% see also: NORMPDF, NORMINV + +% Reference(s): + +% $Id$ +% Copyright (C) 2000-2003,2010,2011 by Alois Schloegl +% This function is part of the NaN-toolbox +% http://pub.ist.ac.at/~schloegl/matlab/NaN/ + +% This program is free software; you can redistribute it and/or modify +% it under the terms of the GNU General Public License as published by +% the Free Software Foundation; either version 2 of the License, or +% (at your option) any later version. +% +% This program is distributed in the hope that it will be useful, +% but WITHOUT ANY WARRANTY; without even the implied warranty of +% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +% GNU General Public License for more details. +% +% You should have received a copy of the GNU General Public License +% along with this program; If not, see . + + +if nargin==1, + m=0; s=1; +elseif nargin==2, + s=1; +end; + +% allocate output memory and check size of arguments +z = (x-m)./s; % if this line causes an error, input arguments do not fit. + +p = erfc(z/-sqrt(2))/2; + +z = (s==0); +p((xm) & z) = 1; + +p(isnan(x) | isnan(m) | isnan(s) | (s<0)) = nan; + +%!assert(sum(isnan(normcdf([-inf,-2,-1,-.5,0,.5,1,2,3,inf,nan]',2,0))),1) + + + + + + diff --git a/inst/norminv.m b/inst/norminv.m new file mode 100644 index 0000000..847798d --- /dev/null +++ b/inst/norminv.m @@ -0,0 +1,60 @@ +function x = norminv(p,m,s) +% NORMINV returns inverse cumulative function of the normal distribution +% +% x = norminv(p,m,s); +% +% Computes the quantile (inverse of the CDF) of a the normal +% cumulative distribution with mean m and standard deviation s +% default: m=0; s=1; +% p,m,s must be matrices of same size, or any one can be a scalar. +% +% see also: NORMPDF, NORMCDF + +% Reference(s): + +% $Id$ +% Copyright (C) 2000-2003,2010,2011 by Alois Schloegl +% This function is part of the NaN-toolbox +% http://pub.ist.ac.at/~schloegl/matlab/NaN/ + +% This program is free software; you can redistribute it and/or modify +% it under the terms of the GNU General Public License as published by +% the Free Software Foundation; either version 2 of the License, or +% (at your option) any later version. +% +% This program is distributed in the hope that it will be useful, +% but WITHOUT ANY WARRANTY; without even the implied warranty of +% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +% GNU General Public License for more details. +% +% You should have received a copy of the GNU General Public License +% along with this program; If not, see . + +if nargin==1, + m=0; s=1; +elseif nargin==2, + s=1; +end; + +% allocate output memory and check size of arguments +x = sqrt(2)*erfinv(2*p - 1).*s + m; % if this line causes an error, input arguments do not fit. + +x((p>1) | (p<0) | isnan(p) | isnan(m) | isnan(s) | (s<0)) = nan; + +k = (s==0) & ~isnan(m); % temporary variable, reduces number of tests. + +x((p==0) & k) = -inf; + +x((p==1) & k) = +inf; + +k = (p>0) & (p<1) & k; +if numel(m)==1, + x(k) = m; +else + x(k) = m(k); +end; + + +%!assert(sum(~isnan(norminv([-inf,-.2,0,.2,.5,1,2,inf,nan],2,0))),4) + + diff --git a/inst/normpdf.m b/inst/normpdf.m new file mode 100644 index 0000000..88d1be2 --- /dev/null +++ b/inst/normpdf.m @@ -0,0 +1,54 @@ +function p = normpdf(x,m,s) +% NORMPDF returns normal probability density +% +% pdf = normpdf(x,m,s); +% +% Computes the PDF of a the normal distribution +% with mean m and standard deviation s +% default: m=0; s=1; +% x,m,s must be matrices of same size, or any one can be a scalar. +% +% see also: NORMCDF, NORMINV + +% Reference(s): + +% $Id$ +% Copyright (C) 2000-2003,2010,2011 by Alois Schloegl +% This function is part of the NaN-toolbox +% http://pub.ist.ac.at/~schloegl/matlab/NaN/ + +% This program is free software; you can redistribute it and/or modify +% it under the terms of the GNU General Public License as published by +% the Free Software Foundation; either version 2 of the License, or +% (at your option) any later version. +% +% This program is distributed in the hope that it will be useful, +% but WITHOUT ANY WARRANTY; without even the implied warranty of +% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +% GNU General Public License for more details. +% +% You should have received a copy of the GNU General Public License +% along with this program; If not, see . + +if nargin==1, + m=0;s=1; +elseif nargin==2, + s=1; +end; + +% allocate output memory and check size of argument +z = (x-m)./s; % if this line causes an error, input arguments do not fit. + +%p = ((2*pi)^(-1/2))*exp(-z.^2/2)./s; +SQ2PI = 2.5066282746310005024157652848110; +p = exp(-z.^2/2)./(s*SQ2PI); + +p((x==m) & (s==0)) = inf; + +p(isinf(z)~=0) = 0; + +p(isnan(x) | isnan(m) | isnan(s) | (s<0)) = nan; + +%!assert(sum(isnan(normpdf([-inf,-2,-1,-.5,0,.5,1,2,3,inf,nan]',2,0))),1) + + diff --git a/inst/partcorrcoef.m b/inst/partcorrcoef.m new file mode 100644 index 0000000..24e408d --- /dev/null +++ b/inst/partcorrcoef.m @@ -0,0 +1,166 @@ +function [R,sig,ci1,ci2] = partcorrcoef(X,Y,Z,Mode) +% PARTCORRCOEF calculates the partial correlation between X and Y +% after removing the influence of Z. +% X, Y and Z can contain missing values encoded with NaN. +% NaN's are skipped, NaN do not result in a NaN output. +% (Its assumed that the occurence of NaN's is uncorrelated) +% The output gives NaN, only if there are insufficient input data. +% +% The partial correlation is defined as +% pcc(xy|z)=(cc(x,y)-cc(x,z)*cc(y,z))/sqrt((1-cc(x,y)�)*((1-cc(x,z)�))) +% +% +% PARTCORRCOEF(X [,Mode]); +% calculates the (auto-)correlation matrix of X +% PARTCORRCOEF(X,Y,Z); +% PARTCORRCOEF(X,Y,Z,[]); +% PARTCORRCOEF(X,Y,Z,'Pearson'); +% PARTCORRCOEF(X,Y,Z,'Rank'); +% PARTCORRCOEF(X,Y,Z,'Spearman'); +% +% Mode=[] [default] +% removes from X and Y the part that can be explained by Z +% and computes the correlation of the remaining part. +% Ideally, this is equivalent to Mode='Pearson', however, in practice +% this is more accurate. +% Mode='Pearson' or 'parametric' +% Mode='Spearman' +% Mode='Rank' +% computes the partial correlation based on cc(x,y),cc(x,z) and cc(y,z) +% with the respective mode. +% +% [R,p,ci1,ci2] = PARTCORRCOEF(...); +% r is the partialcorrelation matrix +% r(i,j) is the partial correlation coefficient r between X(:,i) and Y(:,j) +% when influence of Z is removed. +% p gives the significance of PCC +% It tests the null hypothesis that the product moment correlation coefficient is zero +% using Student's t-test on the statistic t = r sqrt(N-Nz-2)/sqrt(1-r^2) +% where N is the number of samples (Statistics, M. Spiegel, Schaum series). +% p > alpha: do not reject the Null hypothesis: "R is zero". +% p < alpha: The alternative hypothesis "R2 is larger than zero" is true with probability (1-alpha). +% ci1 lower 0.95 confidence interval +% ci2 upper 0.95 confidence interval +% +% see also: SUMSKIPNAN, COVM, COV, COR, SPEARMAN, RANKCORR, RANKS, CORRCOEF +% +% REFERENCES: +% on the partial correlation coefficient +% [1] http://www.tufts.edu/~gdallal/partial.htm +% [2] http://www.nag.co.uk/numeric/fl/manual/pdf/G02/g02byf.pdf + +% $Id$ +% Copyright (C) 2000-2002,2009 by Alois Schloegl +% This function is part of the NaN-toolbox +% http://pub.ist.ac.at/~schloegl/matlab/NaN/ + +% This program is free software; you can redistribute it and/or modify +% it under the terms of the GNU General Public License as published by +% the Free Software Foundation; either version 3 of the License, or +% (at your option) any later version. +% +% This program is distributed in the hope that it will be useful, +% but WITHOUT ANY WARRANTY; without even the implied warranty of +% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +% GNU General Public License for more details. +% +% You should have received a copy of the GNU General Public License +% along with this program; if not, write to the Free Software +% Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + +% Features: +% + interprets NaN's as missing value +% + Pearson's correlation +% + Spearman's rank correlation +% + Rank correlation (non-parametric, non-Spearman) +% + is fast, using an efficient algorithm O(n.log(n)) for calculating the ranks +% + significance test for null-hypthesis: r=0 +% + confidence interval (0.99) included +% - rank correlation works for cell arrays, too (no check for missing values). +% + compatible with Octave and Matlab + + +if nargin==3 + Mode=[]; +elseif nargin==4, +else + error('Error PARTCORRCOEF: Missing argument(s)\n'); +end; + +if isempty(Z) + R = corrcoef(X,Y,Mode); + +elseif isempty(Mode) + if ~isempty(Z) + for j=1:size(X,2) + ix = ~any(isnan(Z),2) & ~isnan(X(:,j)); + X(:,j) = X(:,j) - Z*(Z(ix,:)\X(ix,j)); + end; + for j=1:size(Y,2) + ix = ~any(isnan(Z),2) & ~isnan(Y(:,j)); + Y(:,j) = Y(:,j) - Z*(Z(ix,:)\Y(ix,j)); + end; + end; + R = corrcoef(X,Y,Mode); + +else + rxy = corrcoef(X,Y,Mode); + rxz = corrcoef(X,Z,Mode); + if isempty(Y), + ryz = rxz; + else + ryz = corrcoef(Y,Z,Mode); + end; + + %rxy,rxz,ryz + R = (rxy-rxz*ryz')./sqrt((1-rxz.^2)*(1-ryz.^2)'); + +end; + +if nargout<2, + return, +end; + +% SIGNIFICANCE TEST +%warning off; % prevent division-by-zero warnings in Matlab. +NN=size(X,1)-size(Z,2); + +tmp = 1 - R.*R; +tmp(tmp<0) = 0; % prevent tmp<0 i.e. imag(t)~=0 +t = R.*sqrt(max(NN-2,0)./tmp); + +if exist('t_cdf','file') + sig = t_cdf(t,NN-2); +elseif exist('tcdf','file') + sig = tcdf(t,NN-2); +else + fprintf('Warning CORRCOEF: significance test not completed because of missing TCDF-function\n') + sig = repmat(nan,size(R)); +end; +sig = 2 * min(sig,1 - sig); + +if nargout<3, + return, +end; + + +% CONFIDENCE INTERVAL +if exist('flag_implicit_significance','file'), + alpha = flag_implicit_significance; +else + alpha = 0.01; +end; + +fprintf(1,'CORRCOEF: confidence interval is based on alpha=%f\n',alpha); + +tmp = R; +%tmp(ix1 | ix2) = nan; % avoid division-by-zero warning +z = log((1+tmp)./(1-tmp))/2; % Fisher's z-transform; +%sz = 1./sqrt(NN-3); % standard error of z +sz = sqrt(2)*erfinv(1-2*alpha)./sqrt(NN-3); % confidence interval for alpha of z + +ci1 = tanh(z-sz); +ci2 = tanh(z+sz); + + + diff --git a/inst/percentile.m b/inst/percentile.m new file mode 100644 index 0000000..ff3d55b --- /dev/null +++ b/inst/percentile.m @@ -0,0 +1,47 @@ +function Q=percentile(Y,q,DIM) +% PERCENTILE calculates the percentiles of histograms and sample arrays. +% +% Q = percentile(Y,q) +% Q = percentile(Y,q,DIM) +% returns the q-th percentile along dimension DIM of sample array Y. +% size(Q) is equal size(Y) except for dimension DIM which is size(Q,DIM)=length(Q) +% +% Q = percentile(HIS,q) +% returns the q-th percentile from the histogram HIS. +% HIS must be a HISTOGRAM struct as defined in HISTO2 or HISTO3. +% If q is a vector, the each row of Q returns the q(i)-th percentile +% +% see also: HISTO2, HISTO3, QUANTILE + +% $Id$ +% Copyright (C) 1996-2003,2005,2006,2007 by Alois Schloegl +% This function is part of the NaN-toolbox +% http://pub.ist.ac.at/~schloegl/matlab/NaN/ + +% This program is free software; you can redistribute it and/or modify +% it under the terms of the GNU General Public License as published by +% the Free Software Foundation; either version 2 of the License, or +% (at your option) any later version. +% +% This program is distributed in the hope that it will be useful, +% but WITHOUT ANY WARRANTY; without even the implied warranty of +% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +% GNU General Public License for more details. +% +% You should have received a copy of the GNU General Public License +% along with this program; If not, see . + + +if nargin==2, + Q = quantile(Y,q/100); + +elseif nargin==3, + Q = quantile(Y,q/100,DIM); + +else + help percentile + +end; + + + diff --git a/inst/prctile.m b/inst/prctile.m new file mode 100644 index 0000000..65bd09d --- /dev/null +++ b/inst/prctile.m @@ -0,0 +1,50 @@ +function Q=prctile(Y,q,DIM) +% PRCTILE calculates the percentiles of histograms and sample arrays. +% (its the same than PERCENTILE.M) +% +% Q = prctile(Y,q) +% Q = prctile(Y,q,DIM) +% returns the q-th percentile along dimension DIM of sample array Y. +% size(Q) is equal size(Y) except for dimension DIM which is size(Q,DIM)=length(Q) +% +% Q = prctile(HIS,q) +% returns the q-th percentile from the histogram HIS. +% HIS must be a HISTOGRAM struct as defined in HISTO2 or HISTO3. +% If q is a vector, the each row of Q returns the q(i)-th percentile +% +% see also: HISTO2, HISTO3, QUANTILE + +% $Id$ +% Copyright (C) 1996-2003,2005,2006,2007,2009 by Alois Schloegl +% This function is part of the NaN-toolbox +% http://pub.ist.ac.at/~schloegl/matlab/NaN/ + +% This program is free software; you can redistribute it and/or modify +% it under the terms of the GNU General Public License as published by +% the Free Software Foundation; either version 2 of the License, or +% (at your option) any later version. +% +% This program is distributed in the hope that it will be useful, +% but WITHOUT ANY WARRANTY; without even the implied warranty of +% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +% GNU General Public License for more details. +% +% You should have received a copy of the GNU General Public License +% along with this program; If not, see . + + +if nargin==2, + Q = quantile(Y,q/100); + +elseif nargin==3, + Q = quantile(Y,q/100,DIM); + +else + help percentile + +end; + +%!assert(prctile([1:3,NaN],[10,50,90])==[1,2,3]) +%!assert(quantile(1:10,[.2,.5]),[2.5, 5.5]) + + diff --git a/inst/quantile.m b/inst/quantile.m new file mode 100644 index 0000000..c630774 --- /dev/null +++ b/inst/quantile.m @@ -0,0 +1,152 @@ +function Q=quantile(Y,q,DIM,method) +% QUANTILE calculates the quantiles of histograms and sample arrays. +% +% Q = quantile(Y,q) +% Q = quantile(Y,q,DIM) +% returns the q-th quantile along dimension DIM of sample array Y. +% size(Q) is equal size(Y) except for dimension DIM which is size(Q,DIM)=length(Q) +% +% Q = quantile(HIS,q) +% returns the q-th quantile from the histogram HIS. +% HIS must be a HISTOGRAM struct as defined in HISTO2 or HISTO3. +% If q is a vector, the each row of Q returns the q(i)-th quantile +% +% see also: HISTO2, HISTO3, PERCENTILE + + +% $Id$ +% Copyright (C) 1996-2003,2005,2006,2007,2009,2011 by Alois Schloegl +% This function is part of the NaN-toolbox +% http://pub.ist.ac.at/~schloegl/matlab/NaN/ + +% This program is free software; you can redistribute it and/or modify +% it under the terms of the GNU General Public License as published by +% the Free Software Foundation; either version 3 of the License, or +% (at your option) any later version. +% +% This program is distributed in the hope that it will be useful, +% but WITHOUT ANY WARRANTY; without even the implied warranty of +% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +% GNU General Public License for more details. +% +% You should have received a copy of the GNU General Public License +% along with this program; If not, see . + +if nargin<3, + DIM = []; +end; +if isempty(DIM), + DIM = find(size(Y)>1,1); + if isempty(DIM), DIM = 1; end; +end; + + +if nargin<2, + help quantile + +else + [q, rix] = sort(q(:)'); % sort quantile values + [tmp,rix] = sort(rix); % generate reverse index + + SW = isstruct(Y); + if SW, SW = isfield(Y,'datatype'); end; + if SW, SW = strcmp(Y.datatype,'HISTOGRAM'); end; + if SW, + [yr, yc] = size(Y.H); + Q = repmat(nan,length(q),yc); + if ~isfield(Y,'N'); + Y.N = sum(Y.H,1); + end; + + for k1 = 1:yc, + tmp = Y.H(:,k1)>0; + h = full(Y.H(tmp,k1)); + t = Y.X(tmp,min(size(Y.X,2),k1)); + + N = Y.N(k1); + t2(1:2:2*length(t)) = t; + t2(2:2:2*length(t)) = t; + x2 = cumsum(h); + x(1)=0; + x(2:2:2*length(t)) = x2; + x(3:2:2*length(t)) = x2(1:end-1); + + % Q(q < 0 | 1 < q,:) = NaN; % already done at initialization + Q(q==0,k1) = t2(1); + Q(q==1,k1) = t2(end); + n = 1; + for k2 = find( (0 < q) & (q < 1) ) + while (q(k2)*N > x(n)), + n=n+1; + end; + + if q(k2)*N==x(n) + % mean of upper and lower bound + Q(k2,k1) = (t2(n)+t2(n+1))/2; + else + Q(k2,k1) = t2(n); + end; + end; + Q = Q(rix,:); % order resulting quantiles according to original input q + end; + + + elseif isnumeric(Y), + sz = size(Y); + if DIM>length(sz), + sz = [sz,ones(1,DIM-length(sz))]; + end; + + f = zeros(1,length(q)); + f( (q < 0) | (1 < q) ) = NaN; + D1 = prod(sz(1:DIM-1)); + D3 = prod(sz(DIM+1:length(sz))); + Q = repmat(nan,[sz(1:DIM-1),length(q),sz(DIM+1:length(sz))]); + for k = 0:D1-1, + for l = 0:D3-1, + xi = k + l * D1*sz(DIM) + 1 ; + xo = k + l * D1*length(q) + 1; + t = Y(xi:D1:xi+D1*sz(DIM)-1); + t = t(~isnan(t)); + N = length(t); + + if (N==0) + f(:) = NaN; + else + t = sort(t); + t2(1:2:2*length(t)) = t; + t2(2:2:2*length(t)) = t; + x = floor((1:2*length(t))/2); + %f(q < 0 | 1 < q) = NaN; % for efficiency its defined outside loop + f(q==0) = t2(1); + f(q==1) = t2(end); + + n = 1; + for k2 = find( (0 < q) & (q < 1) ) + while (q(k2)*N > x(n)), + n = n+1; + end; + + if q(k2)*N==x(n) + % mean of upper and lower bound + f(k2) = (t2(n) + t2(n+1))/2; + else + f(k2) = t2(n); + end; + end; + end; + Q(xo:D1:xo + D1*length(q) - 1) = f(rix); + end; + end; + + else + fprintf(2,'Error QUANTILES: invalid input argument\n'); + return; + end; + +end; + +%!assert(quantile(1:10,[.2,.5]),[2.5, 5.5]) +%!assert(quantile([1:3,NaN],[.10,.50,.90])==[1,2,3]) + + diff --git a/inst/range.m b/inst/range.m new file mode 100644 index 0000000..74701be --- /dev/null +++ b/inst/range.m @@ -0,0 +1,65 @@ +function Q=range(Y,DIM) +% RANGE calculates the range of Y +% Missing values (encoded as NaN) are ignored. +% +% Q = range(Y) +% Q = range(Y,DIM) +% returns the range along dimension DIM of sample array Y. +% +% Q = range(HIS) +% returns the RANGE from the histogram HIS. +% HIS must be a HISTOGRAM struct as defined in HISTO2 or HISTO3. +% +% see also: IQR, MAD, HISTO2, HISTO3, PERCENTILE, QUANTILE + +% Copyright (C) 2009-2019 by Alois Schloegl +% This function is part of the NaN-toolbox +% http://pub.ist.ac.at/~schloegl/matlab/NaN/ + +% This program is free software; you can redistribute it and/or modify +% it under the terms of the GNU General Public License as published by +% the Free Software Foundation; either version 3 of the License, or +% (at your option) any later version. +% +% This program is distributed in the hope that it will be useful, +% but WITHOUT ANY WARRANTY; without even the implied warranty of +% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +% GNU General Public License for more details. +% +% You should have received a copy of the GNU General Public License +% along with this program; If not, see . + +if nargin<2, + DIM = []; +end; +if isempty(DIM), + DIM = find(size(Y)>1,1); + if isempty(DIM), DIM = 1; end; +end; + + +if nargin<1, + help range + +else + SW = isstruct(Y); + if SW, SW = isfield(Y,'datatype'); end; + if SW, SW = strcmp(Y.datatype,'HISTOGRAM'); end; + if SW, + Q = repmat(NaN,1,size(Y.H,2)); + for k=1:size(Y.H,2); + t = Y.X(find(Y.H(:,k)>0),min(size(Y.X,2),k)); + Q(1,k) = max(t)-min(t); + end; + elseif isnumeric(Y) && nargin==1, + Q = max(Y) - min(Y); + elseif isnumeric(Y) && nargin==2, + Q = max(Y,[],DIM) - min(Y,[],DIM); + else + help range + end; +end; + + +%!assert(range([1:5,NaN]) == 4) + diff --git a/inst/rankcorr.m b/inst/rankcorr.m new file mode 100644 index 0000000..ce2f73a --- /dev/null +++ b/inst/rankcorr.m @@ -0,0 +1,45 @@ +function r = rankcorr(X,Y) +% RANKCORR calculated the rank correlation coefficient. +% This function is replaced by CORRCOEF. +% Significance test and confidence intervals can be obtained from CORRCOEF, too. +% +% R = CORRCOEF(X, [Y, ] 'Rank'); +% +% The rank correlation r = corrcoef(ranks(x)). +% is often confused with Spearman's rank correlation. +% Spearman's correlation is defined as +% r(x,y) = 1-6*sum((ranks(x)-ranks(y)).^2)/(N*(N*N-1)) +% The results are different. Here, the former version is implemented. +% +% see also: CORRCOEF, SPEARMAN, RANKS +% +% REFERENCES: +% [1] http://mathworld.wolfram.com/SpearmanRankCorrelationCoefficient.html +% [2] http://mathworld.wolfram.com/CorrelationCoefficient.html + +% $Id$ +% Copyright (C) 2000-2003 by Alois Schloegl +% This function is part of the NaN-toolbox +% http://pub.ist.ac.at/~schloegl/matlab/NaN/ + +% This program is free software; you can redistribute it and/or modify +% it under the terms of the GNU General Public License as published by +% the Free Software Foundation; either version 2 of the License, or +% (at your option) any later version. +% +% This program is distributed in the hope that it will be useful, +% but WITHOUT ANY WARRANTY; without even the implied warranty of +% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +% GNU General Public License for more details. +% +% You should have received a copy of the GNU General Public License +% along with this program; If not, see . + + +% warning('RANKCORR might become obsolete; use CORRCOEF(ranks(x)) or CORRCOEF(...,''Rank'') instead'); + +if nargin < 2 + r = corrcoef(ranks(X)); +else + r = corrcoef(ranks(X),ranks(Y)); +end \ No newline at end of file diff --git a/inst/ranks.m b/inst/ranks.m new file mode 100644 index 0000000..28015cb --- /dev/null +++ b/inst/ranks.m @@ -0,0 +1,199 @@ +function r = ranks(X,DIM,Mode) +% RANKS gives the rank of each element in a vector. +% This program uses an advanced algorithm with averge effort O(m.n.log(n)) +% NaN in the input yields NaN in the output. +% +% r = ranks(X[,DIM]) +% if X is a vector, return the vector of ranks of X adjusted for ties. +% if X is matrix, the rank is calculated along dimension DIM. +% if DIM is zero or empty, the lowest dimension with more then 1 element is used. +% r = ranks(X,DIM,'traditional') +% implements the traditional algorithm with O(n^2) computational +% and O(n^2) memory effort +% r = ranks(X,DIM,'mtraditional') +% implements the traditional algorithm with O(n^2) computational +% and O(n) memory effort +% r = ranks(X,DIM,'advanced ') +% implements an advanced algorithm with O(n*log(n)) computational +% and O(n.log(n)) memory effort +% r = ranks(X,DIM,'advanced-ties') +% implements an advanced algorithm with O(n*log(n)) computational +% and O(n.log(n)) memory effort +% but without correction for ties +% This is the fastest algorithm +% +% see also: CORRCOEF, SPEARMAN, RANKCORR +% +% REFERENCES: +% -- + + +% $Id$ +% Copyright (C) 2000-2002,2005,2010,2013 by Alois Schloegl +% This script is part of the NaN-toolbox +% http://pub.ist.ac.at/~schloegl/matlab/NaN/ + +% This program is free software; you can redistribute it and/or modify +% it under the terms of the GNU General Public License as published by +% the Free Software Foundation; either version 3 of the License, or +% (at your option) any later version. +% +% This program is distributed in the hope that it will be useful, +% but WITHOUT ANY WARRANTY; without even the implied warranty of +% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +% GNU General Public License for more details. +% +% You should have received a copy of the GNU General Public License +% along with this program; If not, see . + +% Features: +% + is fast, uses an efficient algorithm for the rank correlation +% + computational effort is O(n.log(n)) instead of O(n^2) +% + memory effort is O(n.log(n)), instead of O(n^2). +% Now, the ranks of 8000 elements can be easily calculated +% + NaNs in the input yield NaN in the output +% + compatible with Octave and Matlab +% + traditional method is also implemented for comparison. + + +if nargin<2, DIM = 0; end; +if ischar(DIM), + Mode= DIM; + DIM = 0; +elseif (nargin<3), + Mode = ''; +end; +if isempty(Mode), + Mode='advanced '; +end; + +sz_orig = size (X); +X = squeeze (X); %remove singleton dimensions for convenience +nd = ndims (X); +if (~DIM) + DIM = 1; +end +if DIM > 1 %shift the array so that the dimension to sort over is first + perm = [DIM 1:(DIM-1) (DIM+1):nd]; + X = permute (X, perm); +end +if nd > 2 %convert X to 2-D if it has >2 dimensions + sz = size(X); + N = sz(1); + M = prod(sz(2:end)); + X = reshape(X, N, M); +else + [N,M] = size(X); +end + +if strcmp(Mode(1:min(11,length(Mode))),'traditional'), % traditional, needs O(m.n^2) +% this method was originally implemented by: KH +% Comment of KH: This code is rather ugly, but is there an easy way to get the ranks adjusted for ties from sort? + +r = zeros(size(X)); + for i = 1:M; + p = X(:, i(ones(1,N))); + r(:,i) = (sum (p < p') + (sum (p == p') + 1) / 2)'; + end; + % r(r<1)=NaN; + +elseif strcmp(Mode(1:min(12,length(Mode))),'mtraditional'), + % + memory effort is lower + + r = zeros(size(X)); + for k = 1:N; + for i = 1:M; + r(k,i) = (sum (X(:,i) < X(k,i)) + (sum (X(:,i) == X(k,i)) + 1) / 2); + end; + end; + % r(r<1)=NaN; + +elseif strcmp(Mode(1:min(13,length(Mode))),'advanced-ties'), % advanced + % + uses sorting, hence needs only O(m.n.log(n)) computations + % - does not fix ties + + r = zeros(size(X)); + [sX, ix] = sort(X,1); + for k=1:M, + [tmp,r(:,k)] = sort(ix(:,k),1); % r yields the rank of each element + end; + r(isnan(X)) = nan; + + +elseif strcmp(Mode(1:min(8,length(Mode))),'advanced'), % advanced + % + uses sorting, hence needs only O(m.n.log(n)) computations + + % [tmp,ix] = sort([X,Y]); + % [tmp,r] = sort(ix); % r yields rank. + % but because sort does not work accordingly for cell arrays, + % and DIM argument not supported by Octave + % and DIM argument does not work for cell-arrays in Matlab + % we sort each column separately: + + r = zeros(size(X)); + n = N; + for k = 1:M, + [sX,ix] = sort(X(:,k)); + [tmp,r(:,k)] = sort(ix); % r yields the rank of each element + + % identify multiple occurences (not sure if this important, but implemented to be compatible with traditional version) + if isnumeric(X) + n=sum(~isnan(X(:,k))); + end; + x = [0;find(sX~=[sX(2:N);n])]; % for this reason, cells are not implemented yet. + d = find(diff(x)>1); + + % correct rank of multiple occurring elements + for l = 1:length(d), + t = (x(d(l))+1:x(d(l)+1))'; + r(ix(t),k) = mean(t); + end; + end; + r(isnan(X)) = nan; + +elseif strcmp(Mode,'=='), +% the results of both algorithms are compared for testing. +% +% if the Mode-argument is omitted, both methods are applied and +% the results are compared. Once the advanced algorithm is confirmed, +% it will become the default Mode. + + r = ranks(X,'advanced '); + r(isnan(r)) = 1/2; + + if N>100, + r1 = ranks(X,'mtraditional'); % Memory effort is lower + else + r1 = ranks(X,'traditional'); + end; + if ~all(all(r==r1)), + fprintf(2,'WARNING RANKS: advanced algorithm does not agree with traditional one\n Please report to \n'); + r = r1; + end; + r(isnan(X)) = nan; +end; + +%reshape r to match the input X +if nd > 2 + r = reshape (r, sz); +end +if (DIM > 1) + r = ipermute (r, perm); +end +r = reshape (r, sz_orig); %restore any singleton dimensions + + +%!shared z, r1, r2 +%! z = magic (4); +%! r1 = [4 1 1 4; 2 3 3 2; 3 2 2 3; 1 4 4 1]; +%! r2 = [4 1 2 3; 1 4 3 2; 3 2 1 4; 2 3 4 1]; +%!assert (ranks(z), r1); +%!assert (ranks(z, 2), r2); +%! z = nan(2, 2, 2); +%! z(:, :, 1) = [1 2; 3 4]; +%! z(:, :, 2) = [4 3; 2 1]; +%! r1 = cat(3, [1 1; 2 2], [2 2; 1 1]); +%! r2 = cat(3, [1 2; 1 2], [2 1; 2 1]); +%!assert (ranks(z), r1); +%!assert (ranks(z, 2), r2); +%!assert (ranks(z, 3), r1); diff --git a/inst/rms.m b/inst/rms.m new file mode 100644 index 0000000..220d75a --- /dev/null +++ b/inst/rms.m @@ -0,0 +1,58 @@ +function o=rms(x,DIM,W) +% RMS calculates the root mean square +% can deal with complex data. +% +% y = rms(x,DIM,W) +% +% DIM dimension +% 1 STD of columns +% 2 STD of rows +% N STD of N-th dimension +% default or []: first DIMENSION, with more than 1 element +% W weights to compute weighted s.d. (default: []) +% if W=[], all weights are 1. +% number of elements in W must match size(x,DIM) +% +% y estimated standard deviation +% +% features: +% - can deal with NaN's (missing values) +% - weighting of data +% - dimension argument also in Octave +% - compatible to Matlab and Octave +% +% see also: SUMSKIPNAN, MEAN + + +% This program is free software; you can redistribute it and/or modify +% it under the terms of the GNU General Public License as published by +% the Free Software Foundation; either version 2 of the License, or +% (at your option) any later version. +% +% This program is distributed in the hope that it will be useful, +% but WITHOUT ANY WARRANTY; without even the implied warranty of +% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +% GNU General Public License for more details. +% +% You should have received a copy of the GNU General Public License +% along with this program; If not, see . + + +% Copyright (C) 2000-2019 by Alois Schloegl +% This function is part of the NaN-toolbox +% http://pub.ist.ac.at/~schloegl/matlab/NaN/ + + +if nargin<2, + [o,N,ssq] = sumskipnan(x); +elseif nargin<3 + [o,N,ssq] = sumskipnan(x,DIM); +else + [o,N,ssq] = sumskipnan(x,DIM,W); +end; + +o = sqrt(ssq./N); + +%!assert(rms([1,5,1,NaN]) == 3) +%!assert(rms([1,5,1,-Inf,NaN]) == Inf) + diff --git a/inst/roc.m b/inst/roc.m new file mode 100644 index 0000000..54833a0 --- /dev/null +++ b/inst/roc.m @@ -0,0 +1,357 @@ +function [argout1,argout2,argout3] = roc(d, c, varargin); +% ROC plots receiver operator curve and computes derived statistics. +% computes the ROC curve, and a number of derived paramaters include +% AUC, optimal threshold values, corresponding confusion matrices, etc. +% +% Remark: if the sample values in d are not unique, there is a certain +% ambiguity in the results; the results may vary depending on +% on the ordering of the samples. Usually, this is only an issue, +% if the number of unique data value is much smaller than the total +% number of samples. +% +% Tratitionally, ROC was defined in the "Biosig for Octave and matlab" +% toolbox, later an ROC function became available in Matlab's NNET +% (Deep Learning) toolbox with a different usage interface. +% Therfore, there are different usage-styles. +% +% Usage (traditional/biosig style): +% RES = roc(d, c); +% RES = roc(d1, d0); +% RES = roc(...); +% +% RES = roc(...,'flag_plot'); +% RES = roc(..., s); +% plot ROC curve, including suggested thresholds +% In order to speed up the plotting, no more than 10000 data +% points are displayed. If you need more, you need to change +% the source code). +% +% The ROC curve can be plotted with +% plot(RES.FPR*100, RES.TPR*100); +% +% Usage style compatible with matlab's roc implementation: +% [TPR, FPR, THRESHOLDS] = ROC(targets, outputs) +% matlab-style interface for compatibiliy with Matlab's ROC implementation; +% Note that the input arguments are reversed; +% targets correspond to c, and outputs correspond to d. +% +% INPUT: +% d DATA, +% c CLASS, vector with 0 and 1 +% d1 DATA of class 1 +% d2 DATA of class 0 +% s line style (as used in plot) +% targets DATA, when using matlab-style ROC +% outputs CLASS when using matlab-style ROC +% +% OUTPUT: +% TPR true positive rate +% FPR false positive rate +% THRESHOLDS corresponding Threshold values +% ACC accuracy +% AUC area under ROC curve +% Yi max(SEN+SPEC-1), Youden index +% c TH(c) is the threshold that maximizes Yi +% +% RES is a structure and provides many more results +% including optimum threshold values, correpinding confusion matrices, etc. +% RES.THRESHOLD.FPR returns the threshold value to obtain +% the given FPR rate. +% RES.THRESHOLD.{maxYI,maxACC,maxKAPPA,maxMCC,maxMI,maxF1,maxPHI} return the +% threshold obtained from maximum Youden Index (YI), Accuracy, Cohen's Kappa [3], +% Matthews correlation coefficient [2] (also known as Phi coefficient [1]), +% Mutual information, and F1 score [4], resp. +% RES.TH([RES.THRESHOLD.maxYIix, RES.THRESHOLD.maxACCix, RES.THRESHOLD.maxKAPPAix, +% RES.THRESHOLD.maxMCCix, RES.THRESHOLD.maxMIix, RES.THRESHOLD.maxF1ix]) +% return the optimal threshold for the respective measure. +% RES.H_kappa: confusion matrix when Threshold of maximum Kappa is applied. +% RES.H_{yi,acc,kappa,mcc,mi,f1,phi}: confusion matrix when threshold of +% optimum {...} is applied. Its structure is [TN, FN; FP; TP]. +% +% see also: AUC, PLOT, ROC +% +% References: +% [0] https://en.wikipedia.org/wiki/ROC_curve +% [1] https://en.wikipedia.org/wiki/Phi_coefficient +% [2] https://en.wikipedia.org/wiki/Matthews_correlation_coefficient +% [3] https://en.wikipedia.org/wiki/Cohen%27s_kappa +% [4] https://en.wikipedia.org/wiki/F1_score +% [5] A. Schlögl, J. Kronegg, J.E. Huggins, S. G. Mason; +% Evaluation criteria in BCI research. +% (Eds.) G. Dornhege, J.R. Millan, T. Hinterberger, D.J. McFarland, K.-R.Müller; +% Towards Brain-Computer Interfacing, MIT Press, 2007, p.327-342 + +% Copyright (c) 1997-2021 Alois Schloegl +% This is part of the BIOSIG-toolbox http://biosig.sf.net/ +% +% This library is free software; you can redistribute it and/or +% modify it under the terms of the GNU Library General Public +% License as published by the Free Software Foundation; either +% version 3 of the License, or (at your option) any later version. +% +% This library is distributed in the hope that it will be useful, +% but WITHOUT ANY WARRANTY; without even the implied warranty of +% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +% Library General Public License for more details. +% +% You should have received a copy of the GNU Library General Public +% License along with this library; if not, write to the +% Free Software Foundation, Inc., 59 Temple Place - Suite 330, +% Boston, MA 02111-1307, USA. +% + +if all(size(d)==size(c)) && all(all((c==1) | (c==0) | isnan(c))), + MODE='biosig_traditional'; +elseif ( all(all( (d==1) | (d==0) )) && all(size(c)==size(d)) ) + MODE='matlab_style'; +elseif ( (size(d,2)==1) && (size(c,2)==1) ), + MODE='biosig_2class'; +else + error('can not identify input data style') +end + +if strcmp(MODE,'matlab_style') + warning('matlab style is not fully (bug-by-bug) compatible'); + % Matlab's roc functions seems to add (in certain circumstances) some weird + % 0 and 1 in its 'outputs' data (see thresholds). This seems wrong. + % We do not aim for bug-compatibility but for correctness. + % Therefore, this does not provide the exact same results. + + [thresholds,I] = sort(c,2); + x = d(I); + + tpr = 1-[zeros(size(x,1),1),cumsum(x==1,2)]./sum(x==1,2); + fpr = 1-[zeros(size(x,1),1),cumsum(x==0,2)]./sum(x==0,2); + tpr = tpr(:,end:-1:1); + fpr = fpr(:,end:-1:1); + thresholds = thresholds(:,end:-1:1); + if size(c,1)>1, + tpr=num2cell(tpr,2); + fpr=num2cell(fpr,2); + thresholds=num2cell(thresholds,2); + end; + argout1 = tpr; + argout2 = fpr; + argout3 = thresholds; + return; + +elseif strcmp(MODE,'biosig_2class') + d=d(:); + c=c(:); + d2=c; + c=[ones(size(d));zeros(size(d2))]; + d=[d;d2]; + fprintf(2,'Warning ROC: XXX\n') +elseif strcmp(MODE,'biosig_traditional') + d=d(:); + c=c(:); + ix = ~any(isnan([d,c]),2); + c = c(ix); + d = d(ix); +end; + +% handle (ignore) NaN's +c = c(~isnan(d)); +d = d(~isnan(d)); + + + +plot_args={'-'}; +flag_plot_args = 1; +thFPR = NaN; + +FLAG_DISPLAY=0; +for k=1:length(varargin) + arg = varargin{k}; + if strcmp(arg,'FPR') + flag_plot_args = 0; + thFPR = varargin{k+1}; + end; + if strcmp(arg,'flag_display') || strcmp(arg,'flag_plot') + FLAG_DISPLAY=1; + end + if flag_plot_args, + plot_args{k} = arg; + end +end; + +[D,I] = sort(d); +x = c(I); + +FN = [0;cumsum(x==1)]; +TP = sum(x==1)-FN; + +TN = [0;cumsum(x==0)]; +FP = sum(x==0)-TN; + +FNR = FN/sum(x==1); +TPR = 1-FNR; + +TNR = TN/sum(x==0); +FPR = 1-TNR; + +PPV = TP./(TP+FP); +NPV = TN./(TN+FN); + +SEN = TP./(TP+FN); +SPEC= TN./(TN+FP); +ACC = (TP+TN)./(TP+TN+FP+FN); + +% SEN = [FN TP TN FP SEN SPEC ACC D]; + +%%% compute Cohen's kappa coefficient +N = size(d,1); + +% H =[TN, FN; FP, TP] +p_i = [TP+FP, FN+TN]; +pi_ = [TP+FN, FP+TN]; +pe = sum(p_i.*pi_,2)/(N*N); % estimate of change agreement +kap = (ACC - pe) ./ (1 - pe); +mcc = (TP .* TN - FN .* FP) ./ sqrt(prod( [p_i, pi_], 2)); + +% mutual information +pxi = pi_/N; % p(x_i) +pyj = p_i/N; % p(y_j) +log2pji = ([TP,FN,FP,TN]/N).*log2([TP,FN,FP,TN]./[p_i,p_i]); + +% replace sumskipnan in order to avoid dependency on NaN-toolbox +% RES.MI = -sumskipnan(pyj.*log2(pyj),2) + sumskipnan(log2pji,2); +tmp1 = pyj.*log2(pyj); +tmp2 = log2pji; +tmp1(isnan(tmp1))=0; +tmp2(isnan(tmp2))=0; +MI = -sum(tmp1,2) + sum(tmp2,2); + + +% area under the ROC curve +if numel(FPR)<2 + RES.AUC=NaN; +else + RES.AUC = -diff(FPR)' * (TPR(1:end-1)+TPR(2:end))/2; +end + +% Youden index +YI = SEN + SPEC - 1; +F1 = 2*PPV.*TPR./(PPV+TPR); + +LRP = TPR./FPR; +LRN = FNR./TNR; + +% reduce size of AUC, to about 2500 to 5000 samples) +len = length(FPR); +delta = max(1,ceil(len/5000)); +dix = [1:delta:len-1,len]; + +RES.YI = YI(dix); +RES.ACC = ACC(dix); +RES.KAPPA = kap(dix); +RES.MCC = mcc(dix); +% RES.TH = D(dix); % broken +RES.F1 = F1(dix); +RES.MI = MI(dix); + +RES.SEN = SEN(dix); +RES.SPEC= SPEC(dix); +RES.TP = TP(dix); +RES.FP = FP(dix); +RES.FN = FN(dix); +RES.TN = TN(dix); +RES.TPR = TPR(dix); +RES.FPR = FPR(dix); +RES.FNR = FNR(dix); +RES.TNR = TNR(dix); +RES.LRP = LRP(dix); +RES.LRN = LRN(dix); +RES.PPV = PPV(dix); +RES.NPV = NPV(dix); + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +% Find optimal threshold: +% there are different threshold based on different criteria +% currently, cohen's kappa is usually what you want. +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +% Cohen's kappa is best tested for borderline cases, like, few +% samples only, or when multiple options are possible. +% others are not that sophisticated. +[RES.maxYI,ix] = max(SEN+SPEC-1); +ix = min(ix,length(D)); +RES.THRESHOLD.maxYI = D(ix); +RES.THRESHOLD.maxYIix = ix; +RES.H_yi = [TN(ix),FN(ix);FP(ix),TP(ix)]; + +[RES.maxKAPPA,ix] = max(kap); +if (RES.maxKAPPA < 1/length(d)) + % if there is no positive kappa, look for negative kappas + [RES.maxKAPPA,ix] = max(abs(kap)); +end +if (RES.maxKAPPA>2*eps) + ix = find(abs(kap)==RES.maxKAPPA,1); + RES.THRESHOLD.maxKAPPA = mean(D(ix+[-1:0])); + RES.THRESHOLD.maxKAPPAix = ix; + RES.H_kappa = [TN(ix),FN(ix);FP(ix),TP(ix)]; + if kap(ix)<0, + warning('ROC - negative maximum kappa found, data and classlabels are inversely correlated. You might want to switch the classlabels') + end; +end + +[RES.maxMCC,ix] = max(mcc); +ix = min(ix,length(D)); +RES.THRESHOLD.maxMCC = D(ix); +RES.THRESHOLD.maxMCCix = ix; +RES.H_mcc = [TN(ix),FN(ix);FP(ix),TP(ix)]; + +[RES.maxMI,ix] = max(MI); +ix = min(ix,length(D)); +RES.THRESHOLD.maxMI = D(ix); +RES.THRESHOLD.maxMIix = ix; +RES.H_mi = [TN(ix),FN(ix);FP(ix),TP(ix)]; + +[tmp,ix] = max(ACC); +ix = min(ix,length(D)); +RES.THRESHOLD.maxACC = D(ix); +RES.THRESHOLD.maxACCix = ix; +RES.H_acc = [TN(ix),FN(ix);FP(ix),TP(ix)]; + +[tmp,ix] = max(F1); +RES.THRESHOLD.maxF1 = D(ix); +RES.THRESHOLD.maxF1ix = ix; +RES.H_f1 = [TN(ix),FN(ix);FP(ix),TP(ix)]; + +RES.THRESHOLD.FPR = NaN; +if ~isnan(thFPR) + ix = max(1,min(N,round((1-thFPR)*N))); + RES.THRESHOLD.FPR = D(ix); + RES.THRESHOLD.FPRix = ix; + RES.H_fpr = [TN(ix),FN(ix);FP(ix),TP(ix)]; +end; + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%%% display only 10000 points at most. +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +if FLAG_DISPLAY, + ix0 = RES.THRESHOLD.maxKAPPAix; + ix1 = RES.THRESHOLD.maxYIix ; + ix2 = RES.THRESHOLD.maxMCCix; + ix3 = RES.THRESHOLD.maxMIix; + ix4 = RES.THRESHOLD.maxACCix; + ix5 = RES.THRESHOLD.maxF1ix; + + plot(FPR(dix)*100,TPR(dix)*100, FPR(ix0)*100, TPR(ix0)*100,'ok', FPR(ix1)*100, TPR(ix1)*100, 'xb', FPR(ix2)*100, TPR(ix2)*100, 'xg', FPR(ix3)*100, TPR(ix3)*100, 'xr', FPR(ix4)*100, TPR(ix4)*100, 'xc', FPR(ix5)*100, TPR(ix5)*100, 'xm'); + ylabel('TPR [%]');xlabel('FPR [%]'); + legend({'ROC','maxKappa','maxYoudenIndex','maxMCC','maxMI','maxACC','maxF1'},'location','southeast'); + + %ylabel('Sensitivity (true positive ratio) [%]'); + %xlabel('1-Specificity (false positive ratio) [%]'); +end; + +argout1=RES; +argout2=FPR; +argout3=D; + +%%% here are examples of strange results observed in Matlab's roc version +%%! [tpr1,fpr1,thresholds1] = roc([0,1,1,1,0,0,0],-[0.5:7]-4); +%% R=roc([5;1],[0;1]) +%% R=roc([-5;1],[0;1]) +%% R=roc([1:4]',[0;0;1;1;]); R.THRESHOLD.maxKAPPA<3 + diff --git a/inst/row_col_deletion.m b/inst/row_col_deletion.m new file mode 100644 index 0000000..578687b --- /dev/null +++ b/inst/row_col_deletion.m @@ -0,0 +1,113 @@ +function [rix,cix] = row_col_deletion(d,c,w) +% ROW_COL_DELETION selects the rows and columns for removing any missing values. +% A heuristic based on maximizing the number of remaining sample values +% is used. In other words, if there are more rows than columns, it is +% more likely that a row-wise deletion will be applied and vice versa. +% +% [rix,cix] = row_col_deletion(d) +% [rix,cix] = row_col_deletion(d,c,w) +% +% Input: +% d data (each row is a sample, each column a feature) +% c classlabels (not really used) [OPTIONAL] +% w weight for each sample vector [OPTIONAL] +% Output: +% rix selected samples +% cix selected columns +% +% d(rix,cix) does not contain any NaN's i.e. missing values +% +% see also: TRAIN_SC, TEST_SC + +% $Id$ +% Copyright (C) 2009,2010 by Alois Schloegl +% This function is part of the NaN-toolbox +% http://pub.ist.ac.at/~schloegl/matlab/NaN/ + +% This program is free software; you can redistribute it and/or +% modify it under the terms of the GNU General Public License +% as published by the Free Software Foundation; either version 3 +% of the License, or (at your option) any later version. +% +% This program is distributed in the hope that it will be useful, +% but WITHOUT ANY WARRANTY; without even the implied warranty of +% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +% GNU General Public License for more details. +% +% You should have received a copy of the GNU General Public License +% along with this program; if not, write to the Free Software +% Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA 02110-1301, USA. + + +if nargin > 2, + if isempty(w) || all(w==w(1)), + ix = ~isnan(c); + else + ix = ~any(isnan(c) | isnan(w)); + end; + d = d(ix,:); %% ignore samples with invalid c or w + w = w(ix,:); + +elseif nargin > 1, + d = d(~isnan(c),:); %% ignore samples with invalid c or w + w = []; +else + w = []; +end; + + +if 0, + % decides whether row-wise or column-wise deletion removes less data. + % rix and cix are the resulting index vectors + % either row-wise or column-wise deletion, but not a combination of both, is used. + % this is obsolete + + n = numel(d); + cix = find(~any(isnan(d),1)); + rix = find(~any(isnan(d),2)); + nr = length(rix)*size(d,2); % number of elements after row-wise deletion + nc = length(cix)*size(d,1); % number of elements after column-wise deletion + + if (nr>nc) + cix = 1:size(d,2); % select all columns + %fprintf(1,'row-wise deletion (%i,%i,%i)\n',n,nr,nc); + else + rix = 1:size(d,1); % select all rows + %fprintf(1,'column-wise deletion (%i,%i,%i)\n',n,nr,nc); + end; + +else + + %% a mix of row- and column-wise deletion is possible + if ~isempty(w) && (abs(sum(w)-1) > log2(N)*eps || any(w<0) || any(~isfinite(w))) + error('weight vector must contain only non-negative and finite values'); + end; + [N,M] = size(d); + rix = ones(N,1); cix = ones(1,M); + while 1; + e = ~isnan(d(rix>0,cix>0)); + if ~isempty(w), + colCost = mean(e, 1, w(rix>0)/sum(w(rix>0)))'; % cost of deleting columns + else + colCost = mean(e, 1)'; % cost of deleting columns + end; + rowCost = mean(e, 2); % cost of deleting rows + [tmp,ix] = sort([colCost; rowCost]); + + if abs(tmp(1)-1) < log2(N)*eps, break; end; % stopping criterion + + if diff(tmp(1:2))==0, warning('row/col deletion: arbitrary selection [%i,%i]',ix(1:2)); end; + ix = ix(1); + if (ix<=sum(cix)) + tmp = find(cix>0); + cix(tmp(ix)) = 0; + else + tmp = find(rix>0); + rix(tmp(ix-sum(cix))) = 0; + end; + end; + rix = find(rix); + cix = find(cix); + +end + diff --git a/inst/sem.m b/inst/sem.m new file mode 100644 index 0000000..3ebae98 --- /dev/null +++ b/inst/sem.m @@ -0,0 +1,61 @@ +function [SE,M]=sem(x,DIM, W) +% SEM calculates the standard error of the mean +% +% [SE,M] = SEM(x [, DIM [,W]]) +% calculates the standard error (SE) in dimension DIM +% the default DIM is the first non-single dimension +% M returns the mean. +% Can deal with complex data, too. +% +% DIM dimension +% 1: SEM of columns +% 2: SEM of rows +% N: SEM of N-th dimension +% default or []: first DIMENSION, with more than 1 element +% W weights to compute weighted mean and s.d. (default: []) +% if W=[], all weights are 1. +% number of elements in W must match size(x,DIM) +% +% features: +% - can deal with NaN's (missing values) +% - weighting of data +% - dimension argument +% - compatible to Matlab and Octave +% +% see also: SUMSKIPNAN, MEAN, VAR, STD + +% This program is free software; you can redistribute it and/or modify +% it under the terms of the GNU General Public License as published by +% the Free Software Foundation; either version 3 of the License, or +% (at your option) any later version. +% +% This program is distributed in the hope that it will be useful, +% but WITHOUT ANY WARRANTY; without even the implied warranty of +% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +% GNU General Public License for more details. +% +% You should have received a copy of the GNU General Public License +% along with this program; If not, see . + +% Copyright (C) 2000-2019 by Alois Schloegl +% This function is part of the NaN-toolbox +% http://pub.ist.ac.at/~schloegl/matlab/NaN/ + + +if nargin>2, + [S,N,SSQ] = sumskipnan(x,DIM,W); +elseif nargin>1, + [S,N,SSQ] = sumskipnan(x,DIM); +else + [S,N,SSQ] = sumskipnan(x); +end + +M = S./N; +SE = (SSQ.*N - real(S).^2 - imag(S).^2)./(N.*N.*(N-1)); +SE(SE<=0) = 0; % prevent negative value caused by round-off error +SE = sqrt(real(SE)); + +%!assert(sem([1,4,1,NaN]) == 1) +%!assert(sem([1,4,1,NaN]',1) == 1) +%!assert(sem([1,4,1,NaN] ,2) == 1) + diff --git a/inst/signrank.m b/inst/signrank.m new file mode 100644 index 0000000..523edee --- /dev/null +++ b/inst/signrank.m @@ -0,0 +1,154 @@ +function [pval, h, stats] = signrank (x, m, alpha, tail, DIM) +% SIGNRANK - Wilcoxon signed-rank test +% The Wilcoxon signed-rank test is a non-parametric statistical hypothesis +% test used to compare two related samples whether their population median +% ranks differ [1-3]. SIGNRANK treads NaNs as "Missing values" and ignores these. +% Octave's statistical package has also wilcoxon_test, however, this works only +% for data with N>25 samples, signrank is based on the works [1-3] and can +% be used also for smaller sample sizes. +% +% pval = signrank(x,m) +% tests Null-hypothesis that median of x is m. +% pval = signrank(x,y) +% size of x and size of y must match, it is tested whether the +% difference x-y is significantly different to m=0; +% pval = signrank(x,y,alpha) +% pval = signrank(x,y,alpha,tail) +% pval = signrank(x,y,alpha,tail,DIM) +% [pval,H,stats] = signrank(...) +% +% H=1 indicates a rejection of the Null-hypothesis at a significance +% level of alpha (default alpha = 0.05). +% +% With the optional argument string TAIL, the alternative of interest +% can be selected. If TAIL is '!=' or '<>' or 'both', the null is tested +% against the two-sided Alternative `mean (X) ~= mean (Y)'. If TAIL +% is '>' or 'right', the one-sided Alternative `mean (X) > mean (Y)' is used. +% Similarly for '<' or 'left', the one-sided Alternative `mean (X) < mean +% (Y)' is used. The default is the two-sided case. +% +% H returns whether the Null-Hypotheses must be rejected. +% The p-value of the test is returned in PVAL. +% +% signrank works on the first non-singleton dimension or on DIM. +% +% If no output argument is given, the p-value of the test is +% displayed. +% +% Reference(s): +% [1] Glenn A Walker, (2002) +% Common Statistical Methods for Clinical Research (with SAS examples), 2nd edition +% Chapter 12 The Wilcoxon Signed-Rank Test. +% [2] https://en.wikipedia.org/wiki/Wilcoxon_signed-rank_test +% [3] https://math.stackexchange.com/questions/1414794/wilcoxon-signed-rank-test + +% Copyright (C) 2010,2019 by Alois Schloegl +% This function is part of the NaN-toolbox +% http://pub.ist.ac.at/~schloegl/matlab/NaN/ + +% This program is free software: you can redistribute it and/or modify +% it under the terms of the GNU General Public License as published by +% the Free Software Foundation, either version 3 of the License, or +% (at your option) any later version. +% +% This program is distributed in the hope that it will be useful, +% but WITHOUT ANY WARRANTY; without even the implied warranty of +% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +% GNU General Public License for more details. +% +% You should have received a copy of the GNU General Public License +% along with this program. If not, see . + + if ((nargin < 2) || (nargin > 5) || nargout > 4) + print_usage ; + end + + if (nargin == 2) + alt = '~='; + end + if (nargin < 3) || isempty(alpha) + alpha = .05; + end + + if (nargin < 4) || isempty(tail) + tail = '~='; + end + if (~ ischar (tail)) + error ('signrank: tail must be a string'); + end + if nargin<5, + DIM = find(size(x)>1,1); + end; + if isempty(DIM), DIM=1; end; + + szx = size(x); + szm = size(m); + szx(DIM) = 1; + szm(DIM) = 1; + if size(m,DIM)==1 + ; + elseif size(x,DIM) == size(m,DIM) + x = x-m; + m = zeros(szm); + else + error ('signrank: dimension of X and Y do not fit'); + end + + % Algorithm according to + % https://en.wikipedia.org/wiki/Wilcoxon_signed-rank_test + + % Step 2: exclude 0, and get N_r + x(abs(x)==0)=NaN; + Nr=sum(~isnan(x),DIM); + + % Step 3 and 4: Order and Rank the data + Rix = tiedrank(abs(x)); + + % Glenn A Walker [3] + % compute correction for ties; + sz = size(x); + P = [DIM,1:DIM-1,DIM+1:length(sz)]; + HIS = histo2(reshape(permute(Rix,P),sz(DIM),sz([1:DIM-1,DIM+1:end]))); + m = HIS.H; + m(HIS.H<=1) = NaN; + C = ipermute(reshape(sumskipnan(m.*(m-1).*(m+1),1),[1,sz([1:DIM-1,DIM+1:end])]),P); + + %Step 5: + W = sum(sign(x).*Rix,DIM); + % z = W./sqrt(Nr.*(Nr+1).*(2*Nr+1)./(6*(Nr-1))); + + % https://math.stackexchange.com/questions/1414794/wilcoxon-signed-rank-test + Tplus = sumskipnan((x>0).*Rix, DIM); + Tminus = sumskipnan((x<0).*Rix, DIM); + + stats.z = (max(Tplus,Tminus)-Nr.*(Nr+1)/4)./sqrt(Nr.*(Nr+1).*(2*Nr+1)./24); + stats.signedrank = max(Tplus,Tminus); + + S = (Tplus - Tminus) / 2; + V = (Nr.* (Nr+1).*(2*Nr+1) - C/2) / 24; + t = S .* sqrt(max(Nr-1,0)) ./ sqrt(Nr.*V - S.*S); + cdf = tcdf(t, Nr); + + % see also NaN/ttest + if (strcmp (tail, '~=') || strcmp (tail, '!=') || strcmp (tail, '<>')) || strcmp(tail,'both'), + pval = 2 * min (cdf, 1 - cdf); + elseif strcmp (tail, '>') || strcmp(tail,'right'), + pval = 1 - cdf; + elseif strcmp (tail, '<') || strcmp(tail,'left'), + pval = cdf; + else + error ('signrank: option %s not recognized', tail); + end + + h = pval < alpha; + if (nargout == 0) + fprintf(1,' pval: %g\n', pval); + end + stats.t=t; + +%!test +%! % example from [3] +%! x = [15,8;10,3;6,7;5,13;10,2;15,12;7,14;5,8;8,13;12,3;4,9;13,3;8,10;10,2;11,4;13,7;6,1;6,11;,9,3; 5,5;10,2;9,8;11,5;8,8]; +%! [p,h,stats] = signrank( x(:,1), x(:,2) ); +%! assert ( abs(stats.t - 2.184) < .01) + diff --git a/inst/skewness.m b/inst/skewness.m new file mode 100644 index 0000000..e902421 --- /dev/null +++ b/inst/skewness.m @@ -0,0 +1,68 @@ +function R = skewness(i,DIM) +% SKEWNESS estimates the skewness +% +% y = skewness(x,DIM) +% calculates skewness of x in dimension DIM +% +% DIM dimension +% 1: STATS of columns +% 2: STATS of rows +% default or []: first DIMENSION, with more than 1 element +% +% features: +% - can deal with NaN's (missing values) +% - dimension argument +% - compatible to Matlab and Octave +% +% see also: SUMSKIPNAN, STATISTIC +% +% REFERENCE(S): +% http://mathworld.wolfram.com/ + +% $Id$ +% Copyright (C) 2000-2003,2010 by Alois Schloegl +% This function is part of the NaN-toolbox +% http://pub.ist.ac.at/~schloegl/matlab/NaN/ + +% This program is free software; you can redistribute it and/or modify +% it under the terms of the GNU General Public License as published by +% the Free Software Foundation; either version 3 of the License, or +% (at your option) any later version. +% +% This program is distributed in the hope that it will be useful, +% but WITHOUT ANY WARRANTY; without even the implied warranty of +% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +% GNU General Public License for more details. +% +% You should have received a copy of the GNU General Public License +% along with this program; If not, see . + + + +% check input arguments + +if nargin==1, + DIM = find(size(i)>1,1); + if isempty(DIM), DIM=1; end; +end; + +[R.SUM,R.N,R.SSQ] = sumskipnan(i,DIM); % sum + +R.MEAN = R.SUM./R.N; % mean +R.SSQ0 = R.SSQ - real(R.SUM).*real(R.MEAN) - imag(R.SUM).*imag(R.MEAN); % sum square with mean removed + +%if flag_implicit_unbiased_estim; %% ------- unbiased estimates ----------- + n1 = max(R.N-1,0); % in case of n=0 and n=1, the (biased) variance, STD and SEM are INF +%else +% n1 = R.N; +%end; + +R.VAR = R.SSQ0./n1; % variance (unbiased) +R.STD = sqrt(R.VAR); % standard deviation + +i = i - repmat(R.MEAN,size(i)./size(R.MEAN)); +R.CM3 = sumskipnan(i.^3,DIM)./n1; +%R.CM4 = sumskipnan(i.^4,DIM)./n1; + +R = R.CM3./(R.STD.^3); +%R = R.CM4./(R.VAR.^2)-3; diff --git a/inst/spearman.m b/inst/spearman.m new file mode 100644 index 0000000..8ec7ec0 --- /dev/null +++ b/inst/spearman.m @@ -0,0 +1,45 @@ +function r = spearman(x,y) +% SPEARMAN Spearman's rank correlation coefficient. +% This function is replaced by CORRCOEF. +% Significance test and confidence intervals can be obtained from CORRCOEF. +% +% [R,p,ci1,ci2] = CORRCOEF(x, [y, ] 'Rank'); +% +% For some (unknown) reason, in previous versions Spearman's rank correlation +% r = corrcoef(ranks(x)). +% But according to [1], Spearman's correlation is defined as +% r = 1-6*sum((ranks(x)-ranks(y)).^2)/(N*(N*N-1)) +% The results are different. Here, the later version is implemented. +% +% see also: CORRCOEF, RANKCORR +% +% REFERENCES: +% [1] http://mathworld.wolfram.com/SpearmanRankCorrelationCoefficient.html +% [2] http://mathworld.wolfram.com/CorrelationCoefficient.html + +% $Id$ +% Copyright (C) 2000-2002 by Alois Schloegl +% This function is part of the NaN-toolbox +% http://pub.ist.ac.at/~schloegl/matlab/NaN/ + +% This program is free software; you can redistribute it and/or modify +% it under the terms of the GNU General Public License as published by +% the Free Software Foundation; either version 3 of the License, or +% (at your option) any later version. +% +% This program is distributed in the hope that it will be useful, +% but WITHOUT ANY WARRANTY; without even the implied warranty of +% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +% GNU General Public License for more details. +% +% You should have received a copy of the GNU General Public License +% along with this program; If not, see . + + +% warning('SPEARMAN might become obsolete; use CORRCOEF(...,''Spearman'') instead'); + +if nargin < 2 + r = corrcoef(x,'Spearman'); +else + r = corrcoef(x,y,'Spearman'); +end diff --git a/inst/statistic.m b/inst/statistic.m new file mode 100644 index 0000000..306299d --- /dev/null +++ b/inst/statistic.m @@ -0,0 +1,173 @@ +function [varargout]=statistic(i,DIM,fun) +% STATISTIC estimates various statistics at once. +% +% R = STATISTIC(x,DIM) +% calculates all statistic (see list of fun) in dimension DIM +% R is a struct with all statistics +% +% y = STATISTIC(x,fun) +% estimate of fun on dimension DIM +% y gives the statistic of fun +% +% DIM dimension +% 1: STATS of columns +% 2: STATS of rows +% N: STATS of N-th dimension +% default or []: first DIMENSION, with more than 1 element +% +% fun 'mean' mean +% 'std' standard deviation +% 'var' variance +% 'sem' standard error of the mean +% 'rms' root mean square +% 'meansq' mean of squares +% 'sum' sum +% 'sumsq' sum of squares +% 'CM#' central moment of order # +% 'skewness' skewness +% 'kurtosis' excess coefficient (Fisher kurtosis) +% 'mad' mean absolute deviation +% +% features: +% - can deal with NaN's (missing values) +% - dimension argument +% - compatible to Matlab and Octave +% +% see also: SUMSKIPNAN +% +% REFERENCE(S): +% [1] http://www.itl.nist.gov/ +% [2] http://mathworld.wolfram.com/ + +% $Id$ +% Copyright (C) 2000-2003,2010 by Alois Schloegl +% This function is part of the NaN-toolbox +% http://pub.ist.ac.at/~schloegl/matlab/NaN/ + +% This program is free software; you can redistribute it and/or modify +% it under the terms of the GNU General Public License as published by +% the Free Software Foundation; either version 3 of the License, or +% (at your option) any later version. +% +% This program is distributed in the hope that it will be useful, +% but WITHOUT ANY WARRANTY; without even the implied warranty of +% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +% GNU General Public License for more details. +% +% You should have received a copy of the GNU General Public License +% along with this program; If not, see . + + + +if nargin==1, + DIM=[]; + fun=[]; +elseif nargin==2, + if ~isnumeric(DIM), + fun=DIM; + DIM=[]; + else + fun=[]; + end +end +if isempty(DIM), + DIM = find(size(i)>1,1); + if isempty(DIM), DIM=1; end; +end; + +%R.N = sumskipnan(~isnan(i),DIM); % number of elements +[R.SUM,R.N,R.SSQ] = sumskipnan(i,DIM); % sum +%R.S3P = sumskipnan(i.^3,DIM); % sum of 3rd power +R.S4P = sumskipnan(i.^4,DIM); % sum of 4th power +%R.S5P = sumskipnan(i.^5,DIM); % sum of 5th power + +R.MEAN = R.SUM./R.N; % mean +R.MSQ = R.SSQ./R.N; % mean square +R.RMS = sqrt(R.MSQ); % root mean square +%R.SSQ0 = R.SSQ-R.SUM.*R.MEAN; % sum square of mean removed +R.SSQ0 = R.SSQ - real(R.SUM).*real(R.MEAN) - imag(R.SUM).*imag(R.MEAN); % sum square of mean removed + +%if flag_implicit_unbiased_estim; %% ------- unbiased estimates ----------- + n1 = max(R.N-1,0); % in case of n=0 and n=1, the (biased) variance, STD and SEM are INF +%else +% n1 = R.N; +%end; + +R.VAR = R.SSQ0./n1; % variance (unbiased) +R.STD = sqrt(R.VAR); % standard deviation +R.SEM = sqrt(R.SSQ0./(R.N.*n1)); % standard error of the mean +R.SEV = sqrt(n1.*(n1.*R.S4P./R.N+(R.N.^2-2*R.N+3).*(R.SSQ./R.N).^2)./(R.N.^3)); % standard error of the variance +R.COEFFICIENT_OF_VARIATION = R.STD./R.MEAN; + +q = quantile(i, (1:3)/4, DIM); + +%sz=size(i);sz(DIM)=1; +%Q0500=repmat(nan,sz); +%Q0250=Q0500; +%Q0750=Q0500; +%MODE=Q0500; +%for k=1:size(i,2), +% tmp = sort(i(:,k)); + %ix = find(~~diff([-inf;tmp;inf])) + %ix2=diff(ix) + %MODE(k)= tmp(max(ix2)==ix2) +% Q0500(k) = flix(tmp,R.N(k)/2 + 0.5); +% Q0250(k) = flix(tmp,R.N(k)/4 + 0.5); +% Q0750(k) = flix(tmp,R.N(k)*3/4 + 0.5); +%end; +%R.MEDIAN = Q0500; +%R.Quartiles = [Q0250; Q0750]; + +%R.Skewness.Fisher = (R.CM3)./(R.STD.^3); %%% same as R.SKEWNESS + +%R.Skewness.Pearson_Mode = (R.MEAN-R.MODE)./R.STD; +%R.Skewness.Pearson_coeff1 = (3*R.MEAN-R.MODE)./R.STD; +%R.Skewness.Pearson_coeff2 = (3*R.MEAN-R.MEDIAN)./R.STD; +%R.Skewness.Bowley = (Q0750+Q0250 - 2*Q0500)./(Q0750-Q0250); % quartile skewness coefficient + +R.CM2 = R.SSQ0./n1; +szi = size(i); szm = [size(R.MEAN),1]; +i = i - repmat(R.MEAN,szi./szm(1:length(szi))); +R.CM3 = sumskipnan(i.^3,DIM)./n1; +R.CM4 = sumskipnan(i.^4,DIM)./n1; +%R.CM5 = sumskipnan(i.^5,DIM)./n1; + +R.SKEWNESS = R.CM3./(R.STD.^3); +R.KURTOSIS = R.CM4./(R.VAR.^2)-3; +[R.MAD,N] = sumskipnan(abs(i),DIM); % mean absolute deviation +R.MAD = R.MAD./n1; + +R.datatype = 'STAT Level 3'; + +tmp = version; +if 0, %str2num(tmp(1))*1000+str2num(tmp(3))*100+str2num(tmp(5:6))<2136, + % ###obsolete: was needed for Octave version < 2.1.36 + if strcmp(fun(1:2),'CM') + oo = str2double(fun(3:length(fun))); + varargout = sumskipnan(i.^oo,DIM)./n1; + elseif isempty(fun) + varargout = R; + else + varargout = getfield(R,upper(fun)); + end; +else + if iscell(fun), + for k=1:length(fun), + if strcmp(fun{k}(1:2),'CM') + oo = str2double(fun{k}(3:length(fun{k}))); + varargout{k} = sumskipnan(i.^oo,DIM)./n1; + else + varargout{k} = getfield(R,upper(fun{k})); + end; + end; + elseif ischar(fun), + if strcmp(fun(1:2),'CM') + oo = str2double(fun(3:length(fun))); + varargout{1} = sumskipnan(i.^oo,DIM)./n1; + else + varargout{1} = getfield(R,upper(fun)); + end; + else + varargout{1} = R; + end; +end; diff --git a/inst/std.m b/inst/std.m new file mode 100644 index 0000000..6aad0e3 --- /dev/null +++ b/inst/std.m @@ -0,0 +1,124 @@ +function [o,v]=std(x,opt,DIM,W) +% STD calculates the standard deviation. +% +% [y,v] = std(x [, opt[, DIM [, W]]]) +% +% opt option +% 0: normalizes with N-1 [default] +% provides the square root of best unbiased estimator of the variance +% 1: normalizes with N, +% this provides the square root of the second moment around the mean +% otherwise: +% best unbiased estimator of the standard deviation (see [1]) +% +% DIM dimension +% N STD of N-th dimension +% default or []: first DIMENSION, with more than 1 element +% W weights to compute weighted s.d. (default: []) +% if W=[], all weights are 1. +% number of elements in W must match size(x,DIM) +% +% y estimated standard deviation +% +% features: +% - provides an unbiased estimation of the S.D. +% - can deal with NaN's (missing values) +% - weighting of data +% - dimension argument also in Octave +% - compatible to Matlab and Octave +% +% see also: RMS, SUMSKIPNAN, MEAN, VAR, MEANSQ, +% +% +% References(s): +% [1] http://mathworld.wolfram.com/StandardDeviationDistribution.html + + +% This program is free software; you can redistribute it and/or modify +% it under the terms of the GNU General Public License as published by +% the Free Software Foundation; either version 3 of the License, or +% (at your option) any later version. +% +% This program is distributed in the hope that it will be useful, +% but WITHOUT ANY WARRANTY; without even the implied warranty of +% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +% GNU General Public License for more details. +% +% You should have received a copy of the GNU General Public License +% along with this program; If not, see . + +% $Id$ +% Copyright (C) 2000-2003,2006,2009,2010 by Alois Schloegl +% This is part of the NaN-toolbox for Octave and Matlab +% http://pub.ist.ac.at/~schloegl/matlab/NaN/ + +if nargin<4, + W = []; +end; +if nargin<3, + DIM = []; +end; +if isempty(DIM), + DIM = find(size(x)>1,1); + if isempty(DIM), DIM=1; end; +end; + + +[y,n,ssq] = sumskipnan(x,DIM,W); +if all(ssq(:).*n(:) > 2*(y(:).^2)) + %% rounding error is neglectable + y = ssq - y.*y./n; +else + %% rounding error is not neglectable + szx = size(x); + szy = size(y); + if length(szy)1, + v = y.*((max(n-1,0)./(n.*n))-1./(n.*ib.*ib)); % variance of the estimated S.D. ??? needs further checks +end; + + diff --git a/inst/sumskipnan.m b/inst/sumskipnan.m new file mode 100644 index 0000000..5b2e210 --- /dev/null +++ b/inst/sumskipnan.m @@ -0,0 +1,195 @@ +function [o,count,SSQ] = sumskipnan(x, DIM, W) +% SUMSKIPNAN adds all non-NaN values. +% +% All NaN's are skipped; NaN's are considered as missing values. +% SUMSKIPNAN of NaN's only gives O; and the number of valid elements is return. +% SUMSKIPNAN is also the elementary function for calculating +% various statistics (e.g. MEAN, STD, VAR, RMS, MEANSQ, SKEWNESS, +% KURTOSIS, MOMENT, STATISTIC etc.) from data with missing values. +% SUMSKIPNAN implements the DIMENSION-argument for data with missing values. +% Also the second output argument return the number of valid elements (not NaNs) +% +% Y = sumskipnan(x [,DIM]) +% [Y,N,SSQ] = sumskipnan(x [,DIM]) +% [...] = sumskipnan(x, DIM, W) +% +% x input data +% DIM dimension (default: []) +% empty DIM sets DIM to first non singleton dimension +% W weight vector for weighted sum, numel(W) must fit size(x,DIM) +% Y resulting sum +% N number of valid (not missing) elements +% SSQ sum of squares +% +% the function FLAG_NANS_OCCURED() returns whether any value in x +% is a not-a-number (NaN) +% +% features: +% - can deal with NaN's (missing values) +% - implements dimension argument. +% - computes weighted sum +% - compatible with Matlab and Octave +% +% see also: FLAG_NANS_OCCURED, SUM, NANSUM, MEAN, STD, VAR, RMS, MEANSQ, +% SSQ, MOMENT, SKEWNESS, KURTOSIS, SEM + + +% This program is free software; you can redistribute it and/or modify +% it under the terms of the GNU General Public License as published by +% the Free Software Foundation; either version 3 of the License, or +% (at your option) any later version. +% +% This program is distributed in the hope that it will be useful, +% but WITHOUT ANY WARRANTY; without even the implied warranty of +% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +% GNU General Public License for more details. +% +% You should have received a copy of the GNU General Public License +% along with this program; If not, see . + +% $Id$ +% Copyright (C) 2000-2005,2009,2011 by Alois Schloegl +% This function is part of the NaN-toolbox +% http://pub.ist.ac.at/~schloegl/matlab/NaN/ + + +global FLAG_NANS_OCCURED; + +if nargin<2, + DIM = []; +end; +if nargin<3, + W = []; +end; + +% an efficient implementation in C of the following lines +% could significantly increase performance +% only one loop and only one check for isnan is needed +% An MEX-Implementation is available in sumskipnan.cpp +% +% Outline of the algorithm: +% for { k=1,o=0,count=0; k++; k1,1); + if isempty(DIM), DIM = 1; end; +end +if (DIM<1), DIM = 1; end; %% Hack, because min([])=0 for FreeMat v3.5 + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +% non-float data +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +if (isempty(W) && (~(isa(x,'float') || isa(x,'double')))) || ~flag_implicit_skip_nan(), %%% skip always NaN's + if ~isempty(W) + error('SUMSKIPNAN: weighted sum of integers not supported, yet'); + end; + x = double(x); + o = sum(x,DIM); + if nargout>1 + sz = size(x); + N = sz(DIM); + sz(DIM) = 1; + count = repmat(N,sz); + if nargout>2 + x = x.*x; + SSQ = sum(x,DIM); + end; + end; + return; +end; + +if ~isempty(W) && (size(x,DIM)~=numel(W)) + error('SUMSKIPNAN: size of weight vector does not match size(x,DIM)'); +end; + +%% mex and oct files expect double +x = double(x); + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +% use Matlab-MEX function when available +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%if 1, +try + + %% using sumskipnan_mex.mex + if issparse(x), + fprintf(2,'sumskipnan: sparse matrix converted to full matrix\n'); + x = full(x); + end; + + %% !!! hack: FLAG_NANS_OCCURED is an output argument, reserve memory !!! + if isempty(FLAG_NANS_OCCURED), + FLAG_NANS_OCCURED = logical(0); % default value + end; + + if (nargout<2), + o = sumskipnan_mex(real(x),DIM,FLAG_NANS_OCCURED,W); + if (~isreal(x)) + io = sumskipnan_mex(imag(x),DIM,FLAG_NANS_OCCURED,W); + o = o + i*io; + end; + return; + elseif (nargout==2), + [o,count] = sumskipnan_mex(real(x),DIM,FLAG_NANS_OCCURED,W); + if (~isreal(x)) + [io,icount] = sumskipnan_mex(imag(x),DIM,FLAG_NANS_OCCURED,W); + if any(count(:)-icount(:)) + error('Number of NaNs differ for REAL and IMAG part'); + else + o = o+i*io; + end; + end; + return; + elseif (nargout>=3), + [o,count,SSQ] = sumskipnan_mex(real(x),DIM,FLAG_NANS_OCCURED,W); + if (~isreal(x)) + [io,icount,iSSQ] = sumskipnan_mex(imag(x),DIM,FLAG_NANS_OCCURED,W); + if any(count(:)-icount(:)) + error('Number of NaNs differ for REAL and IMAG part'); + else + o = o+i*io; + SSQ = SSQ+iSSQ; + end; + end; + return; + end; +end; + +if ~isempty(W) + error('weighted sumskipnan requires sumskipnan_mex'); +end; + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +% count non-NaN's +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +if nargout>1, + count = sum(x==x,DIM); + FLAG_NANS_OCCURED = any(count(:)2, + x = real(x).^2 + imag(x).^2; + SSQ = sum(x,DIM); +end; + +%!assert(sumskipnan([1,2],1),[1,2]) +%!assert(sumskipnan([1,NaN],2),1) +%!assert(sumskipnan([1,NaN],2),1) +%!assert(sumskipnan([nan,1,4,5]),10) +%!assert(sumskipnan([nan,1,4,5]',1,[3;2;1;0]),6) + + + diff --git a/inst/sumsq.m b/inst/sumsq.m new file mode 100644 index 0000000..7ea9edd --- /dev/null +++ b/inst/sumsq.m @@ -0,0 +1,50 @@ +function [o]=sumsq(x,DIM) +% SUMSQ calculates the sum of squares. +% +% [y] = sumsq(x [, DIM]) +% +% DIM dimension +% N STD of N-th dimension +% default or []: first DIMENSION, with more than 1 element +% +% y estimated standard deviation +% +% features: +% - can deal with NaN's (missing values) +% - dimension argument also in Octave +% - compatible to Matlab and Octave +% +% see also: RMS, SUMSKIPNAN, MEAN, VAR, MEANSQ, +% +% +% References(s): + + +% This program is free software; you can redistribute it and/or modify +% it under the terms of the GNU General Public License as published by +% the Free Software Foundation; either version 2 of the License, or +% (at your option) any later version. +% +% This program is distributed in the hope that it will be useful, +% but WITHOUT ANY WARRANTY; without even the implied warranty of +% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +% GNU General Public License for more details. +% +% You should have received a copy of the GNU General Public License +% along with this program; If not, see . + +% $Id$ +% Copyright (C) 2009,2010 by Alois Schloegl +% This function is part of the NaN-toolbox +% http://pub.ist.ac.at/~schloegl/matlab/NaN/ + +if nargin<2, + DIM = []; +end; +if isempty(DIM), + DIM = find(size(x)>1,1); + if isempty(DIM), DIM=1; end; +end; + +[s,n,o] = sumskipnan(x,DIM); + diff --git a/inst/tcdf.m b/inst/tcdf.m new file mode 100644 index 0000000..f7067e0 --- /dev/null +++ b/inst/tcdf.m @@ -0,0 +1,65 @@ +function p = tcdf(x,n) +% TCDF returns student cumulative distribtion function +% +% cdf = tcdf(x,DF); +% +% Computes the CDF of the students distribution +% with DF degrees of freedom +% x,DF must be matrices of same size, or any one can be a scalar. +% +% see also: NORMCDF, TPDF, TINV + +% Reference(s): + +% Copyright (C) 2000-2003,2009 by Alois Schloegl +% This is part of the NaN-toolbox. For more details see +% http://pub.ist.ac.at/~schloegl/matlab/NaN/ + +% This program is free software; you can redistribute it and/or modify +% it under the terms of the GNU General Public License as published by +% the Free Software Foundation; either version 3 of the License, or +% (at your option) any later version. +% +% This program is distributed in the hope that it will be useful, +% but WITHOUT ANY WARRANTY; without even the implied warranty of +% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +% GNU General Public License for more details. +% +% You should have received a copy of the GNU General Public License +% along with this program; If not, see . + + +% check size of arguments +if nargin~=2 + error('invalid number of input arguments') +elseif ~isreal(x) || ~isreal(n) + error('input arguments must be real') +elseif all(size(x)==1) + x = repmat(x,size(n)); +elseif all(size(n)==1) + n = repmat(n,size(x)); +elseif all(size(x)==size(n)) + ; %% OK, do nothing +else + error('size of input arguments must be equal or scalar') +end; + +% allocate memory +p = zeros(size(x)); +p((x==Inf) & (n>0)) = 1; + +% workaround for invalid arguments in BETAINC +ix = isnan(x) | ~(n>0); +p(ix)= NaN; + +ix = (x > -Inf) & (x < Inf) & (n > 0); +p(ix) = betainc (n(ix) ./ (n(ix) + x(ix).^2), n(ix)/2, 1/2) / 2; + +ix = ix & (x>0); +p(ix) = 1 - p(ix); + +% shape output +p = reshape(p,size(x)); + +%!assert(tcdf(NaN,4),NaN) +%!assert(tcdf(inf,3),1) diff --git a/inst/test_sc.m b/inst/test_sc.m new file mode 100644 index 0000000..b5aa2e9 --- /dev/null +++ b/inst/test_sc.m @@ -0,0 +1,298 @@ +function [R]=test_sc(CC,D,mode,classlabel) +% TEST_SC: apply statistical and SVM classifier to test data +% +% R = test_sc(CC,D,TYPE [,target_Classlabel]) +% R.output output: "signed" distance for each class. +% This represents the distances between sample D and the separating hyperplane +% The "signed distance" is possitive if it matches the target class, and +% and negative if it lays on the opposite side of the separating hyperplane. +% R.classlabel class for output data +% The target class is optional. If it is provided, the following values are returned. +% R.kappa Cohen's kappa coefficient +% R.ACC Classification accuracy +% R.H Confusion matrix +% +% The classifier CC is typically obtained by TRAIN_SC. If a statistical +% classifier is used, TYPE can be used to modify the classifier. +% TYPE = 'MDA' mahalanobis distance based classifier +% TYPE = 'MD2' mahalanobis distance based classifier +% TYPE = 'MD3' mahalanobis distance based classifier +% TYPE = 'GRB' Gaussian radial basis function +% TYPE = 'QDA' quadratic discriminant analysis +% TYPE = 'LD2' linear discriminant analysis +% TYPE = 'LD3', 'LDA', 'FDA, 'FLDA' (Fisher's) linear discriminant analysis +% TYPE = 'LD4' linear discriminant analysis +% TYPE = 'GDBC' general distance based classifier +% +% see also: TRAIN_SC +% +% References: +% [1] R. Duda, P. Hart, and D. Stork, Pattern Classification, second ed. +% John Wiley & Sons, 2001. + +% Copyright (C) 2005,2006,2008,2009,2010,2016 by Alois Schloegl +% This function is part of the NaN-toolbox +% http://pub.ist.ac.at/~schloegl/matlab/NaN/ + +% This program is free software; you can redistribute it and/or +% modify it under the terms of the GNU General Public License +% as published by the Free Software Foundation; either version 3 +% of the License, or (at your option) any later version. +% +% This program is distributed in the hope that it will be useful, +% but WITHOUT ANY WARRANTY; without even the implied warranty of +% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +% GNU General Public License for more details. +% +% You should have received a copy of the GNU General Public License +% along with this program; if not, write to the Free Software +% Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA 02110-1301, USA. + +if nargin<3, + mode = []; +end; +[t1,t] = strtok(CC.datatype,':'); +[t2,t] = strtok(t,':'); +[t3] = strtok(t,':'); +if ~strcmp(t1,'classifier'), return; end; + +if isfield(CC,'prewhite') + D = D*CC.prewhite(2:end,:) + CC.prewhite(ones(size(D,1),1),:); + CC = rmfield(CC,'prewhite'); +end; + +POS1 = [strfind(CC.datatype,'/gsvd'),strfind(CC.datatype,'/sparse'),strfind(CC.datatype,'/delet')]; + +if 0, + + +elseif strcmp(CC.datatype,'classifier:nbpw') + error('NBPW not implemented yet') + %%%% Naive Bayesian Parzen Window Classifier %%%% + d = repmat(NaN,size(D,1),size(CC.MEAN,1)); + for k = 1:size(CC.MEAN,1) + z = (D - CC.MEAN(repmat(k,size(D,1),1),:)).^2 ./ (CC.VAR(repmat(k,size(D,1),1),:)); + z = z + log(CC.VAR(repmat(k,size(D,1),1),:)); % + log(2*pi); + d(:,k) = sum(-z/2, 2) + log(mean(CC.N(k,:))); + end; + d = exp(d-log(mean(sum(CC.N,1)))-log(2*pi)/2); + + +elseif strcmp(CC.datatype,'classifier:nbc') + %%%% Naive Bayesian Classifier %%%% + d = repmat(NaN,size(D,1),size(CC.MEAN,1)); + for k = 1:size(CC.MEAN,1) + z = (D - CC.MEAN(repmat(k,size(D,1),1),:)).^2 ./ (CC.VAR(repmat(k,size(D,1),1),:)); + z = z + log(CC.VAR(repmat(k,size(D,1),1),:)); % + log(2*pi); + d(:,k) = sum(-z/2, 2) + log(mean(CC.N(k,:))); + end; + d = exp(d-log(mean(sum(CC.N,1)))-log(2*pi)/2); + + +elseif strcmp(CC.datatype,'classifier:anbc') + %%%% Augmented Naive Bayesian Classifier %%%% + d = repmat(NaN,size(D,1),size(CC.MEAN,1)); + for k = 1:size(CC.MEAN,1) + z = (D*CC.V - CC.MEAN(repmat(k,size(D,1),1),:)).^2 ./ (CC.VAR(repmat(k,size(D,1),1),:)); + z = z + log(CC.VAR(repmat(k,size(D,1),1),:)); % + log(2*pi); + d(:,k) = sum(-z/2, 2) + log(mean(CC.N(k,:))); + end; + d = exp(d-log(mean(sum(CC.N,1)))-log(2*pi)/2); + + +elseif strcmp(CC.datatype,'classifier:statistical:rda') + % Friedman (1989) Regularized Discriminant analysis + if isfield(CC,'hyperparameter') && isfield(CC.hyperparameter,'lambda') && isfield(CC.hyperparameter,'gamma') + D = [ones(size(D,1),1),D]; % add 1-column + lambda = CC.hyperparameter.lambda; + gamma = CC.hyperparameter.gamma; + d = repmat(NaN,size(D,1),size(CC.MD,3)); + ECM = CC.MD./CC.NN; + NC = size(ECM); + ECM0 = squeeze(sum(ECM,3)); %decompose ECM + [M0,sd,COV0] = decovm(ECM0); + for k = 1:NC(3); + [M,sd,s,xc,N] = decovm(squeeze(ECM(:,:,k))); + s = ((1-lambda)*N*s+lambda*COV0)/((1-lambda)*N+lambda); + s = (1-gamma)*s+gamma*(trace(s))/(NC(2)-1)*eye(NC(2)-1); + ir = [-M;eye(NC(2)-1)]*inv(s)*[-M',eye(NC(2)-1)]; % inverse correlation matrix extended by mean + d(:,k) = -sum((D*ir).*D,2); % calculate distance of each data point to each class + end; + else + error('QDA: hyperparamters lambda and/or gamma not defined') + end; + + +elseif strcmp(CC.datatype,'classifier:csp') + d = filtfilt(CC.FiltB,CC.FiltA,(D*CC.csp_w).^2); + R = test_sc(CC.CSP,log(d)); % LDA classifier of + d = R.output; + + +elseif strcmp(CC.datatype,'classifier:svm:lib:1vs1') || strcmp(CC.datatype,'classifier:svm:lib:rbf'); + nr = size(D,1); + [cl] = svmpredict_mex(ones(nr,1), D, CC.model, '-q'); %Use the classifier + %Create a pseudo tsd matrix for bci4eval + d = full(sparse(1:nr,cl,1,nr,CC.model.nr_class)); + +elseif isfield(CC,'weights'); %strcmpi(t2,'svm') || (strcmpi(t2,'statistical') & strncmpi(t3,'ld',2)) ; + % linear classifiers like: LDA, SVM, LPM + %d = [ones(size(D,1),1), D] * CC.weights; + d = repmat(NaN,size(D,1),size(CC.weights,2)); + for k = 1:size(CC.weights,2), + d(:,k) = D * CC.weights(2:end,k) + CC.weights(1,k); + end; + + +elseif ~isempty(POS1) % GSVD, sparse & DELETION + CC.datatype = CC.datatype(1:POS1(1)-1); + r = test_sc(CC, D*sparse(CC.G)); + d = r.output; + + +elseif strcmp(t2,'statistical'); + if isempty(mode) + mode.TYPE = upper(t3); + else + tmp=mode; + clear mode; + mode.TYPE=tmp; + end; + D = [ones(size(D,1),1),D]; % add 1-column + W = repmat(NaN, size(D,2), size(CC.MD,3)); + + if 0, + elseif strcmpi(mode.TYPE,'LD2'), + %d = ldbc2(CC,D); + ECM = CC.MD./CC.NN; + NC = size(ECM); + ECM0 = squeeze(sum(ECM,3)); %decompose ECM + [M0] = decovm(ECM0); + for k = 1:NC(3); + ecm = squeeze(ECM(:,:,k)); + [M1,sd,COV1] = decovm(ECM0-ecm); + [M2,sd,COV2] = decovm(ecm); + w = (COV1+COV2)\(M2'-M1')*2; + w0 = -M0*w; + W(:,k) = [w0; w]; + end; + d = D*W; + elseif strcmpi(mode.TYPE,'LD3') || strcmpi(mode.TYPE,'FLDA'); + %d = ldbc3(CC,D); + ECM = CC.MD./CC.NN; + NC = size(ECM); + ECM0 = squeeze(sum(ECM,3)); %decompose ECM + [M0,sd,COV0] = decovm(ECM0); + for k = 1:NC(3); + ecm = squeeze(ECM(:,:,k)); + [M1] = decovm(ECM0-ecm); + [M2] = decovm(ecm); + w = COV0\(M2'-M1')*2; + w0 = -M0*w; + W(:,k) = [w0; w]; + end; + d = D*W; + elseif strcmpi(mode.TYPE,'LD4'); + %d = ldbc4(CC,D); + ECM = CC.MD./CC.NN; + NC = size(ECM); + ECM0 = squeeze(sum(ECM,3)); %decompose ECM + M0 = decovm(ECM0); + for k = 1:NC(3); + ecm = squeeze(ECM(:,:,k)); + [M1,sd,COV1,xc,N1] = decovm(ECM0-ecm); + [M2,sd,COV2,xc,N2] = decovm(ecm); + w = (COV1*N1+COV2*N2)\((M2'-M1')*(N1+N2)); + w0 = -M0*w; + W(:,k) = [w0; w]; + end; + d = D*W; + elseif strcmpi(mode.TYPE,'MDA'); + d = repmat(NaN,size(D,1),length(CC.IR)); + for k = 1:length(CC.IR); + d(:,k) = -sum((D*CC.IR{k}).*D,2); % calculate distance of each data point to each class + end; + elseif strcmpi(mode.TYPE,'MD2'); + d = repmat(NaN,size(D,1),length(CC.IR)); + for k = 1:length(CC.IR); + d(:,k) = sum((D*CC.IR{k}).*D,2); % calculate distance of each data point to each class + end; + d = -sqrt(d); + elseif strcmpi(mode.TYPE,'GDBC'); + d = repmat(NaN,size(D,1),length(CC.IR)); + for k = 1:length(CC.IR); + d(:,k) = sum((D*CC.IR{k}).*D,2) + CC.logSF7(k); % calculate distance of each data point to each class + end; + d = exp(-d/2); + elseif strcmpi(mode.TYPE,'MD3'); + d = repmat(NaN,size(D,1),length(CC.IR)); + for k = 1:length(CC.IR); + d(:,k) = sum((D*CC.IR{k}).*D,2) + CC.logSF7(k); % calculate distance of each data point to each class + end; + d = exp(-d/2); + d = d./repmat(sum(d,2),1,size(d,2)); % Zuordungswahrscheinlichkeit [1], p.601, equ (18.39) + elseif strcmpi(mode.TYPE,'QDA'); + d = repmat(NaN,size(D,1),length(CC.IR)); + for k = 1:length(CC.IR); + % [1] (18.33) QCF - quadratic classification function + d(:,k) = -(sum((D*CC.IR{k}).*D,2) - CC.logSF5(k)); + end; + elseif strcmpi(mode.TYPE,'QDA2'); + d = repmat(NaN,size(D,1),length(CC.IR)); + for k = 1:length(CC.IR); + % [1] (18.33) QCF - quadratic classification function + d(:,k) = -(sum((D*(CC.IR{k})).*D,2) + CC.logSF4(k)); + end; + elseif strcmpi(mode.TYPE,'GRB'); % Gaussian RBF + d = repmat(NaN,size(D,1),length(CC.IR)); + for k = 1:length(CC.IR); + d(:,k) = sum((D*CC.IR{k}).*D,2); % calculate distance of each data point to each class + end; + d = exp(-sqrt(d)/2); + elseif strcmpi(mode.TYPE,'GRB2'); % Gaussian RBF + d = repmat(NaN,size(D,1),length(CC.IR)); + for k = 1:length(CC.IR); + d(:,k) = sum((D*CC.IR{k}).*D,2); % calculate distance of each data point to each class + end; + d = exp(-d); + elseif strcmpi(mode.TYPE,'MQU'); % Multiquadratic + d = repmat(NaN,size(D,1),length(CC.IR)); + for k = 1:length(CC.IR); + d(:,k) = sum((D*CC.IR{k}).*D,2); % calculate distance of each data point to each class + end; + d = -sqrt(1+d); + elseif strcmpi(mode.TYPE,'IMQ'); % Inverse Multiquadratic + d = repmat(NaN,size(D,1),length(CC.IR)); + for k = 1:length(CC.IR); + d(:,k) = sum((D*CC.IR{k}).*D,2); % calculate distance of each data point to each class + end; + d = (1+d).^(-1/2); + elseif strcmpi(mode.TYPE,'Cauchy'); % Cauchy RBF + d = repmat(NaN,size(D,1),length(CC.IR)); + for k = 1:length(CC.IR); + d(:,k) = sum((D*CC.IR{k}).*D,2); % calculate distance of each data point to each class + end; + d = 1./(1+d); + else + error('Classifier %s not supported. see HELP TRAIN_SC for supported classifiers.',mode.TYPE); + end; +else + fprintf(2,'Error TEST_SC: unknown classifier\n'); + return; +end; + +if size(d,2)>1, + [tmp,cl] = max(d,[],2); + cl = CC.Labels(cl); + cl(isnan(tmp)) = NaN; +elseif size(d,2)==1, + cl = (d<0) + 2*(d>0); + cl(isnan(d)) = NaN; +end; + +R.output = d; +R.classlabel = cl; + +if nargin>3, + [R.kappa,R.sd,R.H,z,R.ACC] = kappa(classlabel(:),cl(:)); +end; diff --git a/inst/tiedrank.m b/inst/tiedrank.m new file mode 100644 index 0000000..802c1c4 --- /dev/null +++ b/inst/tiedrank.m @@ -0,0 +1,54 @@ +function R=tiedrank(X,flag1,flag2) +% TIEDRANK compute rank of samples, the mean value is used in case of ties +% this function is just a wrapper for RANKS, and provided for compatibility +% with the statistics toolbox of matlab(tm) +% +% R = tiedrank(X) +% computes the rank R of vector X +% +% see also: RANKS + + +% Copyright (C) 2009,2010,2017 by Alois Schloegl +% This function is part of the NaN-toolbox +% http://pub.ist.ac.at/~schloegl/matlab/NaN/ + +% This program is free software; you can redistribute it and/or modify +% it under the terms of the GNU General Public License as published by +% the Free Software Foundation; either version 3 of the License, or +% (at your option) any later version. +% +% This program is distributed in the hope that it will be useful, +% but WITHOUT ANY WARRANTY; without even the implied warranty of +% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +% GNU General Public License for more details. +% +% You should have received a copy of the GNU General Public License +% along with this program; If not, see . + + +if nargin>3, + error('more than 3 input argument is currently not supported ') +end; +if nargin<2, + flag1=0; +end; +if nargin<3, + flag2=0; +end; + +if nargout>2, + warning('more than 1 output argument is currently not supported ') +end; + +if nargin<2, + DIM = []; +end; +if isempty(DIM), + DIM = find(size(X)>1,1); + if isempty(DIM), DIM = 1; end; +end +if (DIM<1), DIM = 1; end; %% Hack, because min([])=0 for FreeMat v3.5 + +R = ranks(X,DIM); + diff --git a/inst/tinv.m b/inst/tinv.m new file mode 100644 index 0000000..bceb356 --- /dev/null +++ b/inst/tinv.m @@ -0,0 +1,54 @@ +function y = tinv(x,n) +% TINV returns inverse cumulative function of the student distribution +% +% x = tinv(p,v); +% +% Computes the quantile (inverse of the CDF) of a the student +% cumulative distribution with mean m and standard deviation s +% p,v must be matrices of same size, or any one can be a scalar. +% +% see also: TPDF, TCDF, NORMPDF, NORMCDF, NORMINV + +% Reference(s): + +% $Id$ +% Copyright (C) 2000-2003,2009 by Alois Schloegl +% This function is part of the NaN-toolbox +% http://pub.ist.ac.at/~schloegl/matlab/NaN/ + +% This program is free software; you can redistribute it and/or modify +% it under the terms of the GNU General Public License as published by +% the Free Software Foundation; either version 2 of the License, or +% (at your option) any later version. +% +% This program is distributed in the hope that it will be useful, +% but WITHOUT ANY WARRANTY; without even the implied warranty of +% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +% GNU General Public License for more details. +% +% You should have received a copy of the GNU General Public License +% along with this program; If not, see . + + +% allocate output memory and check size of arguments +if all(size(x)==1) + x = repmat(x,size(n)); +elseif all(size(n)==1) + n = repmat(n,size(x)); +elseif all(size(x)==size(n)) + ; %% OK, do nothing +else + error('size of input arguments must be equal or scalar') +end; + +y = norminv(x); % do special cases, like x<=0, x>=1, isnan(x), n > 10000; +y(~(n>0)) = NaN; + +ix = find(~isnan(x) & (n>0) & (n<10000)); +if ~isempty(ix) + y(ix) = (sign(x(ix) - 1/2).*sqrt(n(ix)./betainv(2*min(x(ix), 1-x(ix)), n(ix)/2, 1/2) - n(ix))); +end; + +y = reshape(y,size(x)); + +%!assert(tinv(NaN,4),NaN) diff --git a/inst/tpdf.m b/inst/tpdf.m new file mode 100644 index 0000000..67101cb --- /dev/null +++ b/inst/tpdf.m @@ -0,0 +1,49 @@ +function p = tpdf(x,n) +% TPDF returns student probability density +% +% pdf = tpdf(x,DF); +% +% Computes the PDF of a the student distribution +% with DF degreas of freedom +% x,DF must be matrices of same size, or any one can be a scalar. +% +% see also: TINV, TCDF, NORMPDF, NORMCDF, NORMINV + +% Reference(s): + +% $Id$ +% Copyright (C) 2000-2003,2008,2009,2010 by Alois Schloegl +% This function is part of the NaN-toolbox +% http://pub.ist.ac.at/~schloegl/matlab/NaN/ + +% This program is free software; you can redistribute it and/or modify +% it under the terms of the GNU General Public License as published by +% the Free Software Foundation; either version 2 of the License, or +% (at your option) any later version. +% +% This program is distributed in the hope that it will be useful, +% but WITHOUT ANY WARRANTY; without even the implied warranty of +% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +% GNU General Public License for more details. +% +% You should have received a copy of the GNU General Public License +% along with this program; If not, see . + +% allocate memory and check size of arguments +p = x+n; % if this line causes an error, size of input arguments do not fit. +ix = (n>0) & (n~=inf) & ~isnan(x); + +% make size of x and n equal +n = x+n-x; +x = x+n-n; + +% workaround for invalid arguments in BETA +if any(ix) +p(ix) = (exp (-(n(ix)+1).*log(1+x(ix).^2./n(ix))/2) ./ (sqrt(n(ix)).* beta(n(ix)/2, 1/2))); +end; +p(~ix)= NaN; + +% shape output +p = reshape(p,size(x)); + +%!assert(tpdf(NaN,4),NaN) diff --git a/inst/train_lda_sparse.m b/inst/train_lda_sparse.m new file mode 100644 index 0000000..458003f --- /dev/null +++ b/inst/train_lda_sparse.m @@ -0,0 +1,145 @@ +function [CC] = train_lda_sparse(X,G,par,tol) +% Linear Discriminant Analysis for the Small Sample Size Problem as described in +% Algorithm 1 of J. Duintjer Tebbens, P. Schlesinger: 'Improving +% Implementation of Linear Discriminant Analysis for the High Dimension/Small Sample Size +% Problem', Computational Statistics and Data Analysis, vol. 52, no. 1, pp. 423-437, 2007. +% Input: +% X ...... (sparse) training data matrix +% G ...... group coding matrix of the training data +% test ...... (sparse) test data matrix +% Gtest ...... group coding matrix of the test data +% par ...... if par = 0 then classification exploits sparsity too +% tol ...... tolerance to distinguish zero eigenvalues +% Output: +% err ...... Wrong classification rate (in %) +% trafo ...... LDA transformation vectors +% +% Reference(s): +% J. Duintjer Tebbens, P. Schlesinger: 'Improving +% Implementation of Linear Discriminant Analysis for the High Dimension/Small Sample Size +% Problem', Computational Statistics and Data Analysis, vol. 52, no. 1, +% pp. 423-437, 2007. +% +% Copyright (C) by J. Duintjer Tebbens, Institute of Computer Science of the Academy of Sciences of the Czech Republic, +% Pod Vodarenskou vezi 2, 182 07 Praha 8 Liben, 18.July.2006. +% This work was supported by the Program Information Society under project +% 1ET400300415. +% +% +% Modified for the use with Matlab6.5 by A. Schloegl, 22.Aug.2006 +% +% $Id$ +% This function is part of the NaN-toolbox +% http://pub.ist.ac.at/~schloegl/matlab/NaN/ + +% This program is free software; you can redistribute it and/or +% modify it under the terms of the GNU General Public License +% as published by the Free Software Foundation; either version 3 +% of the License, or (at your option) any later version. +% +% This program is distributed in the hope that it will be useful, +% but WITHOUT ANY WARRANTY; without even the implied warranty of +% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +% GNU General Public License for more details. +% +% You should have received a copy of the GNU General Public License +% along with this program; if not, write to the Free Software +% Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA 02110-1301, USA. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% Step (1) +%p = length(X(1,:));n = length(X(:,1));g = length(G(1,:)); +G = sparse(G); +[n,p]=size(X); +g = size(G,2); + +for j=1:g + nj(j) = norm(G(:,j))^2; +end +Dtild = spdiags(nj'.^(-1),0,g,g); +Xtild = X*X'; +Xtild1 = Xtild*ones(n,1); +help = ones(n,1)*Xtild1'/n - (ones(1,n)*Xtild'*ones(n,1))/(n^2); +matrix = Xtild - Xtild1*ones(1,n)/n - help; +% eliminate non-symmetry of matrix due to rounding error: +matrix = (matrix+matrix')/2; +[V0,S] = eig(matrix); +% [s,I] = sort(diag(S),'descend'); +[s,I] = sort(-diag(S)); s = -s; + +cc = sum(s 0 + [Q,R] = qr(V2,0); + matrix = B1*Dhalf*Q; + [V0,S] = eig(matrix'*matrix); + %[s,I] = sort(diag(S),'descend'); + [s,I] = sort(-diag(S)); s = -s; + for j=1:cc + C(:,j) = Q*V0(:,I(j)); + end +end + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% Step (5) +C1 = help2*Dhalf*C; +trafo(:,1:g-1) = X'*C1 - (X'*ones(n,1))*(ones(1,n)*C1/n); +for j=1:g-1 + trafo(:,j) = trafo(:,j)/norm(trafo(:,j)); +end +CC.trafo = trafo; + +if par == 0 +% X2 = full(test*X'); +% [pred] = classifs(C1,M1,X2); + CC.C1 = C1; + CC.M1 = M1; + CC.X = X; +else +% M = Dtild*G'*X; +% [pred] = classifs(trafo,M,test); + CC.C1 = trafo; + CC.M1 = Dtild*G'*X; +end diff --git a/inst/train_sc.m b/inst/train_sc.m new file mode 100644 index 0000000..5348105 --- /dev/null +++ b/inst/train_sc.m @@ -0,0 +1,965 @@ +function [CC]=train_sc(D,classlabel,MODE,W) +% Train a (statistical) classifier +% +% CC = train_sc(D,classlabel) +% CC = train_sc(D,classlabel,MODE) +% CC = train_sc(D,classlabel,MODE, W) +% weighting D(k,:) with weight W(k) (not all classifiers supported weighting) +% +% CC contains the model parameters of a classifier which can be applied +% to test data using test_sc. +% R = test_sc(CC,D,...) +% +% D training samples (each row is a sample, each column is a feature) +% classlabel labels of each sample, must have the same number of rows as D. +% Two different encodings are supported: +% {-1,1}-encoding (multiple classes with separate columns for each class) or +% 1..M encoding. +% So [1;2;3;1;4] is equivalent to +% [+1,-1,-1,-1; +% [-1,+1,-1,-1; +% [-1,-1,+1,-1; +% [+1,-1,-1,-1] +% [-1,-1,-1,+1] +% Note, samples with classlabel=0 are ignored. +% +% The following classifier types are supported MODE.TYPE +% 'MDA' mahalanobis distance based classifier [1] +% 'MD2' mahalanobis distance based classifier [1] +% 'MD3' mahalanobis distance based classifier [1] +% 'GRB' Gaussian radial basis function [1] +% 'QDA' quadratic discriminant analysis [1] +% 'LD2' linear discriminant analysis (see LDBC2) [1] +% MODE.hyperparameter.gamma: regularization parameter [default 0] +% 'LD3', 'FDA', 'LDA', 'FLDA' +% linear discriminant analysis (see LDBC3) [1] +% MODE.hyperparameter.gamma: regularization parameter [default 0] +% 'LD4' linear discriminant analysis (see LDBC4) [1] +% MODE.hyperparameter.gamma: regularization parameter [default 0] +% 'LD5' another LDA (motivated by CSP) +% MODE.hyperparameter.gamma: regularization parameter [default 0] +% 'RDA' regularized discriminant analysis [7] +% MODE.hyperparameter.gamma: regularization parameter +% MODE.hyperparameter.lambda = +% gamma = 0, lambda = 0 : MDA +% gamma = 0, lambda = 1 : LDA [default] +% Hint: hyperparameter are used only in test_sc.m, testing different +% the hyperparameters do not need repetitive calls to train_sc, +% it is sufficient to modify CC.hyperparameter before calling test_sc. +% 'GDBC' general distance based classifier [1] +% '' statistical classifier, requires Mode argument in TEST_SC +% '###/DELETION' if the data contains missing values (encoded as NaNs), +% a row-wise or column-wise deletion (depending on which method +% removes less data values) is applied; +% '###/GSVD' GSVD and statistical classifier [2,3], +% '###/sparse' sparse [5] +% '###' must be 'LDA' or any other classifier +% 'PLS' (linear) partial least squares regression +% 'REG' regression analysis; +% 'WienerHopf' Wiener-Hopf equation +% 'NBC' Naive Bayesian Classifier [6] +% 'aNBC' Augmented Naive Bayesian Classifier [6] +% 'NBPW' Naive Bayesian Parzen Window [9] +% +% 'PLA' Perceptron Learning Algorithm [11] +% MODE.hyperparameter.alpha = alpha [default: 1] +% w = w + alpha * e'*x +% 'LMS', 'AdaLine' Least mean squares, adaptive line element, Widrow-Hoff, delta rule +% MODE.hyperparameter.alpha = alpha [default: 1] +% 'Winnow2' Winnow2 algorithm [12] +% +% 'PSVM' Proximal SVM [8] +% MODE.hyperparameter.nu (default: 1.0) +% 'LPM' Linear Programming Machine +% uses and requires train_LPM of the iLog CPLEX optimizer +% MODE.hyperparameter.c_value = +% 'CSP' CommonSpatialPattern is very experimental and just a hack +% uses a smoothing window of 50 samples. +% 'SVM','SVM1r' support vector machines, one-vs-rest +% MODE.hyperparameter.c_value = +% 'SVM11' support vector machines, one-vs-one + voting +% MODE.hyperparameter.c_value = +% 'RBF' Support Vector Machines with RBF Kernel +% MODE.hyperparameter.c_value = +% MODE.hyperparameter.gamma = +% 'SVM:LIB' libSVM [default SVM algorithm) +% 'SVM:bioinfo' uses and requires svmtrain from the bioinfo toolbox +% 'SVM:OSU' uses and requires mexSVMTrain from the OSU-SVM toolbox +% 'SVM:LOO' uses and requires svcm_train from the LOO-SVM toolbox +% 'SVM:Gunn' uses and requires svc-functios from the Gunn-SVM toolbox +% 'SVM:KM' uses and requires svmclass-function from the KM-SVM toolbox +% 'SVM:LINz' LibLinear [10] (requires train.mex from LibLinear somewhere in the path) +% z=0 (default) LibLinear with -- L2-regularized logistic regression +% z=1 LibLinear with -- L2-loss support vector machines (dual) +% z=2 LibLinear with -- L2-loss support vector machines (primal) +% z=3 LibLinear with -- L1-loss support vector machines (dual) +% 'SVM:LIN4' LibLinear with -- multi-class support vector machines by Crammer and Singer +% 'DT' decision tree - not implemented yet. +% +% {'REG','MDA','MD2','QDA','QDA2','LD2','LD3','LD4','LD5','LD6','NBC','aNBC','WienerHopf','LDA/GSVD','MDA/GSVD', 'LDA/sparse','MDA/sparse', 'PLA', 'LMS','LDA/DELETION','MDA/DELETION','NBC/DELETION','RDA/DELETION','REG/DELETION','RDA','GDBC','SVM','RBF','PSVM','SVM11','SVM:LIN4','SVM:LIN0','SVM:LIN1','SVM:LIN2','SVM:LIN3','WINNOW', 'DT'}; +% +% CC contains the model parameters of a classifier. Some time ago, +% CC was a statistical classifier containing the mean +% and the covariance of the data of each class (encoded in the +% so-called "extended covariance matrices". Nowadays, also other +% classifiers are supported. +% +% see also: TEST_SC, COVM, ROW_COL_DELETION +% +% References: +% [1] R. Duda, P. Hart, and D. Stork, Pattern Classification, second ed. +% John Wiley & Sons, 2001. +% [2] Peg Howland and Haesun Park, +% Generalizing Discriminant Analysis Using the Generalized Singular Value Decomposition +% IEEE Transactions on Pattern Analysis and Machine Intelligence, 26(8), 2004. +% dx.doi.org/10.1109/TPAMI.2004.46 +% [3] http://www-static.cc.gatech.edu/~kihwan23/face_recog_gsvd.htm +% [4] Jieping Ye, Ravi Janardan, Cheong Hee Park, Haesun Park +% A new optimization criterion for generalized discriminant analysis on undersampled problems. +% The Third IEEE International Conference on Data Mining, Melbourne, Florida, USA +% November 19 - 22, 2003 +% [5] J.D. Tebbens and P. Schlesinger (2006), +% Improving Implementation of Linear Discriminant Analysis for the Small Sample Size Problem +% Computational Statistics & Data Analysis, vol 52(1): 423-437, 2007 +% http://www.cs.cas.cz/mweb/download/publi/JdtSchl2006.pdf +% [6] H. Zhang, The optimality of Naive Bayes, +% http://www.cs.unb.ca/profs/hzhang/publications/FLAIRS04ZhangH.pdf +% [7] J.H. Friedman. Regularized discriminant analysis. +% Journal of the American Statistical Association, 84:165–175, 1989. +% [8] G. Fung and O.L. Mangasarian, Proximal Support Vector Machine Classifiers, KDD 2001. +% Eds. F. Provost and R. Srikant, Proc. KDD-2001: Knowledge Discovery and Data Mining, August 26-29, 2001, San Francisco, CA. +% p. 77-86. +% [9] Kai Keng Ang, Zhang Yang Chin, Haihong Zhang, Cuntai Guan. +% Filter Bank Common Spatial Pattern (FBCSP) in Brain-Computer Interface. +% IEEE International Joint Conference on Neural Networks, 2008. IJCNN 2008. (IEEE World Congress on Computational Intelligence). +% 1-8 June 2008 Page(s):2390 - 2397 +% [10] R.-E. Fan, K.-W. Chang, C.-J. Hsieh, X.-R. Wang, and C.-J. Lin. +% LIBLINEAR: A Library for Large Linear Classification, Journal of Machine Learning Research 9(2008), 1871-1874. +% Software available at http://www.csie.ntu.edu.tw/~cjlin/liblinear +% [11] http://en.wikipedia.org/wiki/Perceptron#Learning_algorithm +% [12] Littlestone, N. (1988) +% "Learning Quickly When Irrelevant Attributes Abound: A New Linear-threshold Algorithm" +% Machine Learning 285-318(2) +% http://en.wikipedia.org/wiki/Winnow_(algorithm) + +% Copyright (C) 2005-2010,2019 by Alois Schloegl +% This function is part of the NaN-toolbox +% http://pub.ist.ac.at/~schloegl/matlab/NaN/ + +% This program is free software; you can redistribute it and/or +% modify it under the terms of the GNU General Public License +% as published by the Free Software Foundation; either version 3 +% of the License, or (at your option) any later version. +% +% This program is distributed in the hope that it will be useful, +% but WITHOUT ANY WARRANTY; without even the implied warranty of +% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +% GNU General Public License for more details. +% +% You should have received a copy of the GNU General Public License +% along with this program; if not, write to the Free Software +% Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA 02110-1301, USA. + + if nargin<2, + error('insufficient input arguments\n\tusage: train_sc(D,C,...)\n'); + end + if nargin<3, MODE = 'LDA'; end + if nargin<4, W = []; end + if ischar(MODE) + tmp = MODE; + clear MODE; + MODE.TYPE = tmp; + elseif ~isfield(MODE,'TYPE') + MODE.TYPE=''; + end + + if isfield(MODE,'hyperparameters') && ~isfield(MODE,'hyperparameter'), + %% for backwards compatibility, this might become obsolete + warning('MODE.hyperparameters are used, You should use MODE.hyperparameter instead!!!'); + MODE.hyperparameter = MODE.hyperparameters; + end + + sz = size(D); + if sz(1)~=size(classlabel,1), + error('length of data and classlabel does not fit'); + end + + % remove all NaN's + if 1, + % several classifier can deal with NaN's, there is no need to remove them. + elseif isempty(W) + %% TODO: some classifiers can deal with NaN's in D. Test whether this can be relaxed. + %ix = any(isnan([classlabel]),2); + ix = any(isnan([D,classlabel]),2); + D(ix,:) = []; + classlabel(ix,:)=[]; + W = []; + else + %ix = any(isnan([classlabel]),2); + ix = any(isnan([D,classlabel]),2); + D(ix,:)=[]; + classlabel(ix,:)=[]; + W(ix,:)=[]; + warning('support for weighting of samples is still experimental'); + end + + sz = size(D); + if sz(1)~=length(classlabel), + error('length of data and classlabel does not fit'); + end + if ~isfield(MODE,'hyperparameter') + MODE.hyperparameter = []; + end + + if ~isfield(MODE,'RowDeletion') + MODE.RowDeletion = 0; + end + if MODE.RowDeletion, + D = D(~any(isnan(D),2),:); + end + + if 0, + ; + elseif ~isempty(strfind(lower(MODE.TYPE),'/delet')) + POS1 = find(MODE.TYPE=='/'); + [rix,cix] = row_col_deletion(D); + if ~isempty(W), W=W(rix); end + CC = train_sc(D(rix,cix),classlabel(rix,:),MODE.TYPE(1:POS1(1)-1),W); + CC.G = sparse(cix, 1:length(cix), 1, size(D,2), length(cix)); + if isfield(CC,'weights') + W = [CC.weights(1,:); CC.weights(2:end,:)]; + CC.weights = sparse(size(D,2)+1, size(W,2)); + CC.weights([1,cix+1],:) = W; + CC.datatype = ['classifier:statistical:',lower(MODE.TYPE)]; + else + CC.datatype = [CC.datatype,'/delet']; + end + + elseif ~isempty(strfind(lower(MODE.TYPE),'nbpw')) + error('NBPW not implemented yet') + %%%% Naive Bayesian Parzen Window Classifier. + [classlabel,CC.Labels] = CL1M(classlabel); + for k = 1:length(CC.Labels), + [d,CC.MEAN(k,:)] = center(D(classlabel==CC.Labels(k),:),1); + [CC.VAR(k,:),CC.N(k,:)] = sumskipnan(d.^2,1); + h2_opt = (4./(3*CC.N(k,:))).^(2/5).*CC.VAR(k,:); + %%% TODO + end + + + elseif ~isempty(strfind(lower(MODE.TYPE),'nbc')) + %%%% Naive Bayesian Classifier + if ~isempty(strfind(lower(MODE.TYPE),'anbc')) + %%%% Augmented Naive Bayesian classifier. + [CC.V,L] = eig(covm(D,'M',W)); + D = D*CC.V; + else + CC.V = eye(size(D,2)); + end + [classlabel,CC.Labels] = CL1M(classlabel); + for k = 1:length(CC.Labels), + ix = classlabel==CC.Labels(k); + %% [d,CC.MEAN(k,:)] = center(D(ix,:),1); + if ~isempty(W) + [s,n] = sumskipnan(D(ix,:),1,W(ix)); + CC.MEAN(k,:) = s./n; + d = D(ix,:) - CC.MEAN(repmat(k,sum(ix),1),:); + [CC.VAR(k,:),CC.N(k,:)] = sumskipnan(d.^2,1,W(ix)); + else + [s,n] = sumskipnan(D(ix,:),1); + CC.MEAN(k,:) = s./n; + d = D(ix,:) - CC.MEAN(repmat(k,sum(ix),1),:); + [CC.VAR(k,:),CC.N(k,:)] = sumskipnan(d.^2,1); + end + end + CC.VAR = CC.VAR./max(CC.N-1,0); + CC.datatype = ['classifier:',lower(MODE.TYPE)]; + + + elseif ~isempty(strfind(lower(MODE.TYPE),'lpm')) + if ~isempty(W) + error('Error TRAIN_SC: Classifier (%s) does not support weighted samples.',MODE.TYPE); + end + % linear programming machine + % CPLEX optimizer: ILOG solver, ilog cplex 6.5 reference manual http://www.ilog.com + MODE.TYPE = 'LPM'; + if ~isfield(MODE.hyperparameter,'c_value') + MODE.hyperparameter.c_value = 1; + end + [classlabel,CC.Labels] = CL1M(classlabel); + + M = length(CC.Labels); + if M==2, M=1; end % For a 2-class problem, only 1 Discriminant is needed + for k = 1:M, + %LPM = train_LPM(D,(classlabel==CC.Labels(k)),'C',MODE.hyperparameter.c_value); + LPM = train_LPM(D',(classlabel'==CC.Labels(k))); + CC.weights(:,k) = [-LPM.b; LPM.w(:)]; + end + CC.hyperparameter.c_value = MODE.hyperparameter.c_value; + CC.datatype = ['classifier:',lower(MODE.TYPE)]; + + + elseif ~isempty(strfind(lower(MODE.TYPE),'pla')), + % Perceptron Learning Algorithm + + [rix,cix] = row_col_deletion(D); + [CL101,CC.Labels] = cl101(classlabel); + M = size(CL101,2); + weights = sparse(length(cix)+1,M); + + %ix = randperm(size(D,1)); %% randomize samples ??? + if ~isfield(MODE.hyperparameter,'alpha') + if isfield(MODE.hyperparameter,'alpha') + alpha = MODE.hyperparameter.alpha; + else + alpha = 1; + end + for k = rix(:)', + %e = ((classlabel(k)==(1:M))-.5) - sign([1, D(k,cix)] * weights)/2; + e = CL101(k,:) - sign([1, D(k,cix)] * weights); + weights = weights + alpha * [1,D(k,cix)]' * e ; + end + + else %if ~isempty(W) + if isfield(MODE.hyperparameter,'alpha') + W = W*MODE.hyperparameter.alpha; + end + for k = rix(:)', + %e = ((classlabel(k)==(1:M))-.5) - sign([1, D(k,cix)] * weights)/2; + e = CL101(k,:) - sign([1, D(k,cix)] * weights); + weights = weights + W(k) * [1,D(k,cix)]' * e ; + end + end + CC.weights = sparse(size(D,2)+1,M); + CC.weights([1,cix+1],:) = weights; + CC.datatype = ['classifier:',lower(MODE.TYPE)]; + + + elseif ~isempty(strfind(lower(MODE.TYPE),'adaline')) || ~isempty(strfind(lower(MODE.TYPE),'lms')), + % adaptive linear elemente, least mean squares, delta rule, Widrow-Hoff, + + [rix,cix] = row_col_deletion(D); + [CL101,CC.Labels] = cl101(classlabel); + M = size(CL101,2); + weights = sparse(length(cix)+1,M); + + %ix = randperm(size(D,1)); %% randomize samples ??? + if isempty(W) + if isfield(MODE.hyperparameter,'alpha') + alpha = MODE.hyperparameter.alpha; + else + alpha = 1; + end + for k = rix(:)', + %e = (classlabel(k)==(1:M)) - [1, D(k,cix)] * weights; + e = CL101(k,:) - sign([1, D(k,cix)] * weights); + weights = weights + alpha * [1,D(k,cix)]' * e ; + end + + else %if ~isempty(W) + if isfield(MODE.hyperparameter,'alpha') + W = W*MODE.hyperparameter.alpha; + end + for k = rix(:)', + %e = (classlabel(k)==(1:M)) - [1, D(k,cix)] * weights; + e = CL101(k,:) - sign([1, D(k,cix)] * weights); + weights = weights + W(k) * [1,D(k,cix)]' * e ; + end + end + CC.weights = sparse(size(D,2)+1,M); + CC.weights([1,cix+1],:) = weights; + CC.datatype = ['classifier:',lower(MODE.TYPE)]; + + + elseif ~isempty(strfind(lower(MODE.TYPE),'winnow')) + % winnow algorithm + if ~isempty(W) + error('Classifier (%s) does not support weighted samples.',MODE.TYPE); + end + + [rix,cix] = row_col_deletion(D); + [CL101,CC.Labels] = cl101(classlabel); + M = size(CL101,2); + weights = ones(length(cix),M); + theta = size(D,2)/2; + + for k = rix(:)', + e = CL101(k,:) - sign(D(k,cix) * weights - theta); + weights = weights.* 2.^(D(k,cix)' * e); + end + + CC.weights = sparse(size(D,2)+1,M); + CC.weights(cix+1,:) = weights; + CC.datatype = ['classifier:',lower(MODE.TYPE)]; + + elseif ~isempty(strfind(lower(MODE.TYPE),'pls')) || ~isempty(strfind(lower(MODE.TYPE),'reg')) + % 4th version: support for weighted samples - work well with unequally distributed data: + % regression analysis, can handle sparse data, too. + + if nargin<4, + W = []; + end + [rix, cix] = row_col_deletion(D); + wD = [ones(length(rix),1),D(rix,cix)]; + + if ~isempty(W) + %% wD = diag(W)*wD + W = W(:); + for k=1:size(wD,2) + wD(:,k) = W(rix).*wD(:,k); + end + end + [CL101, CC.Labels] = cl101(classlabel(rix,:)); + M = size(CL101,2); + CC.weights = sparse(sz(2)+1,M); + + %[rix, cix] = row_col_deletion(wD); + [q,r] = qr(wD,0); + + if isempty(W) + CC.weights([1,cix+1],:) = r\(q'*CL101); + else + CC.weights([1,cix+1],:) = r\(q'*(W(rix,ones(1,M)).*CL101)); + end + %for k = 1:M, + % CC.weights(cix,k) = r\(q'*(W.*CL101(rix,k))); + %end + CC.datatype = ['classifier:statistical:',lower(MODE.TYPE)]; + + + elseif ~isempty(strfind(MODE.TYPE,'WienerHopf')) + % Q: equivalent to LDA + % equivalent to Regression, except regression can not deal with NaN's + [CL101,CC.Labels] = cl101(classlabel); + M = size(CL101,2); + CC.weights = sparse(size(D,2)+1,M); + cc = covm(D,'E',W); + %c1 = classlabel(~isnan(classlabel)); + %c2 = ones(sum(~isnan(classlabel)),M); + %for k = 1:M, + % c2(:,k) = c1==CC.Labels(k); + %end + %CC.weights = cc\covm([ones(size(c2,1),1),D(~isnan(classlabel),:)],2*real(c2)-1,'M',W); + CC.weights = cc\covm([ones(size(D,1),1),D],CL101,'M',W); + CC.datatype = ['classifier:statistical:',lower(MODE.TYPE)]; + + + elseif ~isempty(strfind(lower(MODE.TYPE),'/gsvd')) + if ~isempty(W) + error('Classifier (%s) does not support weighted samples.',MODE.TYPE); + end + % [2] Peg Howland and Haesun Park, 2004 + % Generalizing Discriminant Analysis Using the Generalized Singular Value Decomposition + % IEEE Transactions on Pattern Analysis and Machine Intelligence, 26(8), 2004. + % dx.doi.org/10.1109/TPAMI.2004.46 + % [3] http://www-static.cc.gatech.edu/~kihwan23/face_recog_gsvd.htm + + [classlabel,CC.Labels] = CL1M(classlabel); + [rix,cix] = row_col_deletion(D); + + Hw = zeros(length(rix)+length(CC.Labels), length(cix)); + Hb = []; + m0 = mean(D(rix,cix)); + K = length(CC.Labels); + N = zeros(1,K); + for k = 1:K, + ix = find(classlabel(rix)==CC.Labels(k)); + N(k) = length(ix); + [Hw(ix,:), mu] = center(D(rix(ix),cix)); + %Hb(k,:) = sqrt(N(k))*(mu(k,:)-m0); + Hw(length(rix)+k,:) = sqrt(N(k))*(mu-m0); % Hb(k,:) + end + try + [P,R,Q] = svd(Hw,'econ'); + catch % needed because SVD(..,'econ') not supported in Matlab 6.x + [P,R,Q] = svd(Hw,0); + end + t = rank(R); + + clear Hw Hb mu; + %[size(D);size(P);size(Q);size(R)] + R = R(1:t,1:t); + %P = P(1:size(D,1),1:t); + %Q = Q(1:t,:); + [U,E,W] = svd(P(1:length(rix),1:t),0); + %[size(U);size(E);size(W)] + clear U E P; + %[size(Q);size(R);size(W)] + + %G = Q(1:t,:)'*[R\W']; + G = Q(:,1:t)*(R\W'); % this works as well and needs only 'econ'-SVD + %G = G(:,1:t); % not needed + + % do not use this, gives very bad results for Medline database + %G = G(:,1:K); this seems to be a typo in [2] and [3]. + CC = train_sc(D(:,cix)*G,classlabel,MODE.TYPE(1:find(MODE.TYPE=='/')-1)); + CC.G = sparse(size(D,2),size(G,2)); + CC.G(cix,:) = G; + if isfield(CC,'weights') + CC.weights = sparse([CC.weights(1,:); CC.G*CC.weights(2:end,:)]); + CC.datatype = ['classifier:statistical:', lower(MODE.TYPE)]; + else + CC.datatype = [CC.datatype,'/gsvd']; + end + + + elseif ~isempty(strfind(lower(MODE.TYPE),'sparse')) + if ~isempty(W) + error('Classifier (%s) does not support weighted samples.',MODE.TYPE); + end + % [5] J.D. Tebbens and P.Schlesinger (2006), + % Improving Implementation of Linear Discriminant Analysis for the Small Sample Size Problem + % http://www.cs.cas.cz/mweb/download/publi/JdtSchl2006.pdf + + [classlabel,CC.Labels] = CL1M(classlabel); + [rix,cix] = row_col_deletion(D); + + warning('sparse LDA is sensitive to linear transformations') + M = length(CC.Labels); + G = sparse([],[],[],length(rix),M,length(rix)); + for k = 1:M, + G(classlabel(rix)==CC.Labels(k),k) = 1; + end + tol = 1e-10; + + G = train_lda_sparse(D(rix,cix),G,1,tol); + CC.datatype = 'classifier:slda'; + POS1 = find(MODE.TYPE=='/'); + %G = v(:,1:size(G.trafo,2)).*G.trafo; + %CC.weights = s * CC.weights(2:end,:) + sparse(1,1:M,CC.weights(1,:),sz(2)+1,M); + + CC = train_sc(D(rix,cix)*G.trafo,classlabel(rix),MODE.TYPE(1:POS1(1)-1)); + CC.G = sparse(size(D,2),size(G.trafo,2)); + CC.G(cix,:) = G.trafo; + if isfield(CC,'weights') + CC.weights = sparse([CC.weights(1,:); CC.G*CC.weights(2:end,:)]); + CC.datatype = ['classifier:statistical:',lower(MODE.TYPE)]; + else + CC.datatype = [CC.datatype,'/sparse']; + end + + elseif ~isempty(strfind(lower(MODE.TYPE),'rbf')) + if ~isempty(W) + error('Classifier (%s) does not support weighted samples.',MODE.TYPE); + end + + % Martin Hieden's RBF-SVM + if exist('svmpredict_mex','file')==3, + MODE.TYPE = 'SVM:LIB:RBF'; + else + error('No SVM training algorithm available. Install LibSVM for Matlab.\n'); + end + CC.options = '-t 2 -q'; %use RBF kernel, set C, set gamma + if isfield(MODE.hyperparameter,'gamma') + CC.options = sprintf('%s -c %g', CC.options, MODE.hyperparameter.c_value); % set C + end + if isfield(MODE.hyperparameter,'c_value') + CC.options = sprintf('%s -g %g', CC.options, MODE.hyperparameter.gamma); % set C + end + + % pre-whitening + [D,r,m]=zscore(D,1); + CC.prewhite = sparse(2:sz(2)+1,1:sz(2),r,sz(2)+1,sz(2),2*sz(2)); + CC.prewhite(1,:) = -m.*r; + + [classlabel,CC.Labels] = CL1M(classlabel); + CC.model = svmtrain_mex(classlabel, sparse(D), CC.options); % Call the training mex File + CC.datatype = ['classifier:',lower(MODE.TYPE)]; + + elseif ~isempty(strfind(lower(MODE.TYPE),'svm11')) + if ~isempty(W) + error('Classifier (%s) does not support weighted samples.',MODE.TYPE); + end + % 1-versus-1 scheme + if ~isfield(MODE.hyperparameter,'c_value') + MODE.hyperparameter.c_value = 1; + end + + CC.options=sprintf('-c %g -t 0 -q',MODE.hyperparameter.c_value); %use linear kernel, set C + CC.hyperparameter.c_value = MODE.hyperparameter.c_value; + + % pre-whitening + [D,r,m]=zscore(D,1); + CC.prewhite = sparse(2:sz(2)+1,1:sz(2),r,sz(2)+1,sz(2),2*sz(2)); + CC.prewhite(1,:) = -m.*r; + + [classlabel,CC.Labels] = CL1M(classlabel); + CC.model = svmtrain_mex(classlabel, D, CC.options); % Call the training mex File + + FUN = 'SVM:LIB:1vs1'; + CC.datatype = ['classifier:',lower(FUN)]; + + + elseif ~isempty(strfind(lower(MODE.TYPE),'psvm')) + if ~isempty(W) + %%% error('Classifier (%s) does not support weighted samples.',MODE.TYPE); + warning('Classifier (%s) in combination with weighted samples is not tested.',MODE.TYPE); + end + if ~isfield(MODE,'hyperparameter') + nu = 1; + elseif isfield(MODE.hyperparameter,'nu') + nu = MODE.hyperparameter.nu; + else + nu = 1; + end + [m,n] = size(D); + [CL101,CC.Labels] = cl101(classlabel); + CC.weights = sparse(n+1,size(CL101,2)); + M = size(CL101,2); + for k = 1:M, + d = sparse(1:m,1:m,CL101(:,k)); + H = d * [ones(m,1),D]; + %%% r = sum(H,1)'; + r = sumskipnan(H,1,W)'; + %%% r = (speye(n+1)/nu + H' * H)\r; %solve (I/nu+H’*H)r=H’*e + [HTH, nn] = covm(H,H,'M',W); + r = (speye(n+1)/nu + HTH)\r; %solve (I/nu+H’*H)r=H’*e + u = nu*(1-(H*r)); + %%% CC.weights(:,k) = u'*H; + [c,nn] = covm(u,H,'M',W); + CC.weights(:,k) = c'; + end + CC.hyperparameter.nu = nu; + CC.datatype = ['classifier:',lower(MODE.TYPE)]; + + elseif ~isempty(strfind(lower(MODE.TYPE),'svm:lin4')) + if ~isfield(MODE.hyperparameter,'c_value') + MODE.hyperparameter.c_value = 1; + end + + [classlabel,CC.Labels] = CL1M(classlabel); + M = length(CC.Labels); + CC.weights = sparse(size(D,2)+1,M); + + [rix,cix] = row_col_deletion(D); + + % pre-whitening + [D,r,m]=zscore(D(rix,cix),1); + sz2 = length(cix); + s = sparse(2:sz2+1,1:sz2,r,sz2+1,sz2,2*sz2); + s(1,:) = -m.*r; + + CC.options = sprintf('-s 4 -B 1 -c %f -q', MODE.hyperparameter.c_value); % C-SVC, C=1, linear kernel, degree = 1, + % TODO: support for weight vector got lost + %model = train(W,classlabel, sparse(D), CC.options); % C-SVC, C=1, linear kernel, degree = 1, + model = train(classlabel, sparse(D), CC.options); % C-SVC, C=1, linear kernel, degree = 1, + weights = model.w([end,1:end-1],:)'; + + CC.weights([1,cix+1],:) = s * weights(2:end,:) + sparse(1,1:M,weights(1,:),sz2+1,M); % include pre-whitening transformation + CC.weights([1,cix+1],:) = s * CC.weights(cix+1,:) + sparse(1,1:M,CC.weights(1,:),sz2+1,M); % include pre-whitening transformation + CC.hyperparameter.c_value = MODE.hyperparameter.c_value; + CC.datatype = ['classifier:',lower(MODE.TYPE)]; + + + elseif ~isempty(strfind(lower(MODE.TYPE),'svm')) + + if ~isfield(MODE.hyperparameter,'c_value') + MODE.hyperparameter.c_value = 1; + end + if any(MODE.TYPE==':'), + % nothing to be done + elseif exist('train','file')==3, + MODE.TYPE = 'SVM:LIN'; %% liblinear + elseif exist('svmtrain_mex','file')==3, + MODE.TYPE = 'SVM:LIB'; + elseif (exist('svmtrain','file')==3), + MODE.TYPE = 'SVM:LIB'; + fprintf(1,'You need to rename %s to svmtrain_mex.mex !! \n Press any key to continue !!!\n',which('svmtrain.mex')); + elseif exist('svmtrain','file')==2, + MODE.TYPE = 'SVM:bioinfo'; + elseif exist('mexSVMTrain','file')==3, + MODE.TYPE = 'SVM:OSU'; + elseif exist('svcm_train','file')==2, + MODE.TYPE = 'SVM:LOO'; + elseif exist('svmclass','file')==2, + MODE.TYPE = 'SVM:KM'; + elseif exist('svc','file')==2, + MODE.TYPE = 'SVM:Gunn'; + else + error('No SVM training algorithm available. Install OSV-SVM, or LOO-SVM, or libSVM for Matlab.\n'); + end + + %%CC = train_svm(D,classlabel,MODE); + [CL101,CC.Labels] = cl101(classlabel); + M = size(CL101,2); + [rix,cix] = row_col_deletion(D); + CC.weights = sparse(sz(2)+1, M); + + % pre-whitening + [D,r,m]=zscore(D(rix,cix),1); + sz2 = length(cix); + s = sparse(2:sz2+1,1:sz2,r,sz2+1,sz2,2*sz2); + s(1,:) = -m.*r; + + for k = 1:M, + cl = CL101(rix,k); + if strncmp(MODE.TYPE, 'SVM:LIN',7); + if isfield(MODE,'options') + CC.options = MODE.options; + else + t = 1; + if length(MODE.TYPE)>7, t=str2num(MODE.TYPE(8:end)); end + if ((t<0) || (t>13)) t=0; end + CC.options = sprintf('-s %i -B 1 -c %f -q',t, MODE.hyperparameter.c_value); % C-SVC, C=1, linear kernel, degree = 1, + end + % TODO: support for weight vector got lost + %model = train(W, cl, sparse(D), CC.options); % C-SVC, C=1, linear kernel, degree = 1, + model = train(cl, sparse(D), CC.options); % C-SVC, C=1, linear kernel, degree = 1, + w = model.w(:,1:end-1)'; + Bias = model.w(:,end)'; + + elseif strcmp(MODE.TYPE, 'SVM:LIB'); %% tested with libsvm-mat-3.2.3 + if isfield(MODE,'options') + CC.options = MODE.options; + else + CC.options = sprintf('-s 0 -c %f -t 0 -d 1 -q', MODE.hyperparameter.c_value); % C-SVC, C=1, linear kernel, degree = 1, + end + model = svmtrain_mex(cl, sparse(D), CC.options); % C-SVC, C=1, linear kernel, degree = 1, + w = -cl(1) * model.SVs' * model.sv_coef; %Calculate decision hyperplane weight vector + % ensure correct sign of weight vector and Bias according to class label + Bias = -model.rho * cl(1); + + elseif strcmp(MODE.TYPE, 'SVM:bioinfo'); + % SVM classifier from bioinformatics toolbox. + % Settings suggested by Ian Daly, 2011-06-06 + options = optimset('Display','iter','maxiter',20000, 'largescale','off'); + CC.SVMstruct = svmtrain(D, cl, 'AUTOSCALE', 0, 'quadprog_opts', options, 'Method', 'LS', 'kernel_function', 'polynomial'); + Bias = -CC.SVMstruct.Bias; + w = -CC.SVMstruct.Alpha'*CC.SVMstruct.SupportVectors; + + elseif strcmp(MODE.TYPE, 'SVM:OSU'); + [AlphaY, SVs, Bias] = mexSVMTrain(D', cl', [0 1 1 1 MODE.hyperparameter.c_value]); % Linear Kernel, C=1; degree=1, c-SVM + w = -SVs * AlphaY'*cl(1); %Calculate decision hyperplane weight vector + % ensure correct sign of weight vector and Bias according to class label + Bias = -Bias * cl(1); + + elseif strcmp(MODE.TYPE, 'SVM:LOO'); + [a, Bias, g, inds] = svcm_train(D, cl, MODE.hyperparameter.c_value); % C = 1; + w = D(inds,:)' * (a(inds).*cl(inds)) ; + + elseif strcmp(MODE.TYPE, 'SVM:Gunn'); + [nsv, alpha, Bias,svi] = svc(D, cl, 1, MODE.hyperparameter.c_value); % linear kernel, C = 1; + w = D(svi,:)' * alpha(svi) * cl(1); + Bias = mean(D*w); + + elseif strcmp(MODE.TYPE, 'SVM:KM'); + [xsup,w1,Bias,inds] = svmclass(D, cl, MODE.hyperparameter.c_value, 1, 'poly', 1); % C = 1; + w = -D(inds,:)' * w1; + + else + fprintf(2,'Error TRAIN_SVM: no SVM training algorithm available\n'); + return; + end + + CC.weights(1,k) = -Bias; + CC.weights(cix+1,k) = w; + end + CC.weights([1,cix+1],:) = s * CC.weights(cix+1,:) + sparse(1,1:M,CC.weights(1,:),sz2+1,M); % include pre-whitening transformation + CC.hyperparameter.c_value = MODE.hyperparameter.c_value; + CC.datatype = ['classifier:',lower(MODE.TYPE)]; + + + elseif ~isempty(strfind(lower(MODE.TYPE),'csp')) + CC.datatype = ['classifier:',lower(MODE.TYPE)]; + [classlabel,CC.Labels] = CL1M(classlabel); + CC.MD = repmat(NaN,[sz(2)+[1,1],length(CC.Labels)]); + CC.NN = CC.MD; + for k = 1:length(CC.Labels), + %% [CC.MD(k,:,:),CC.NN(k,:,:)] = covm(D(classlabel==CC.Labels(k),:),'E'); + ix = classlabel==CC.Labels(k); + if isempty(W) + [CC.MD(:,:,k),CC.NN(:,:,k)] = covm(D(ix,:), 'E'); + else + [CC.MD(:,:,k),CC.NN(:,:,k)] = covm(D(ix,:), 'E', W(ix)); + end + end + ECM = CC.MD./CC.NN; + W = csp(ECM,'CSP3'); + %%% ### This is a hack ### + CC.FiltA = 50; + CC.FiltB = ones(CC.FiltA,1); + d = filtfilt(CC.FiltB,CC.FiltA,(D*W).^2); + CC.csp_w = W; + CC.CSP = train_sc(log(d),classlabel); + + + else % Linear and Quadratic statistical classifiers + CC.datatype = ['classifier:statistical:',lower(MODE.TYPE)]; + [classlabel,CC.Labels] = CL1M(classlabel); + CC.MD = repmat(NaN,[sz(2)+[1,1],length(CC.Labels)]); + CC.NN = CC.MD; + for k = 1:length(CC.Labels), + ix = classlabel==CC.Labels(k); + if isempty(W) + [CC.MD(:,:,k),CC.NN(:,:,k)] = covm(D(ix,:), 'E'); + else + [CC.MD(:,:,k),CC.NN(:,:,k)] = covm(D(ix,:), 'E', W(ix)); + end + end + + ECM = CC.MD./CC.NN; + NC = size(CC.MD); + if strncmpi(MODE.TYPE,'LD',2) || strncmpi(MODE.TYPE,'FDA',3) || strncmpi(MODE.TYPE,'FLDA',3), + + %if NC(1)==2, NC(1)=1; end % linear two class problem needs only one discriminant + CC.weights = repmat(NaN,NC(2),NC(3)); % memory allocation + type = MODE.TYPE(3)-'0'; + + ECM0 = squeeze(sum(ECM,3)); %decompose ECM + for k = 1:NC(3); + ix = [1:k-1,k+1:NC(3)]; + dM = CC.MD(:,1,k)./CC.NN(:,1,k) - sum(CC.MD(:,1,ix),3)./sum(CC.NN(:,1,ix),3); + switch (type) + case 2 % LD2 + ecm0 = (sum(ECM(:,:,ix),3)/(NC(3)-1) + ECM(:,:,k)); + case 4 % LD4 + ecm0 = 2*(sum(ECM(:,:,ix),3) + ECM(:,:,k))/NC(3); + % ecm0 = sum(CC.MD,3)./sum(CC.NN,3); + case 5 % LD5 + ecm0 = ECM(:,:,k); + case 6 % LD6 + ecm0 = sum(CC.MD(:,:,ix),3)./sum(CC.NN(:,:,ix),3); + otherwise % LD3, LDA, FDA + ecm0 = ECM0; + end + if isfield(MODE.hyperparameter,'gamma') + ecm0 = ecm0 + mean(diag(ecm0))*eye(size(ecm0))*MODE.hyperparameter.gamma; + end + + CC.weights(:,k) = ecm0\dM; + + end + %CC.weights = sparse(CC.weights); + + elseif strcmpi(MODE.TYPE,'RDA'); + if isfield(MODE,'hyperparameter') + CC.hyperparameter = MODE.hyperparameter; + end + % default values + if ~isfield(CC.hyperparameter,'gamma') + CC.hyperparameter.gamma = 0; + end + if ~isfield(CC.hyperparameter,'lambda') + CC.hyperparameter.lambda = 1; + end + else + ECM0 = sum(ECM,3); + nn = ECM0(1,1,1); % number of samples in training set for class k + XC = squeeze(ECM0(:,:,1))/nn; % normalize correlation matrix + M = XC(1,2:NC(2)); % mean + S = XC(2:NC(2),2:NC(2)) - M'*M;% covariance matrix + + try + [v,d]=eig(S); + U0 = v(diag(d)==0,:); + CC.iS2 = U0*U0'; + end + + %M = M/nn; S=S/(nn-1); + v=eig(S); + if all(v > eps) + ICOV0 = inv(S); + else + ICOV0 = repmat(NaN,size(S)); + end + CC.iS0 = ICOV0; + % ICOV1 = zeros(size(S)); + for k = 1:NC(3), + %[M,sd,S,xc,N] = decovm(ECM{k}); %decompose ECM + %c = size(ECM,2); + nn = ECM(1,1,k);% number of samples in training set for class k + XC = squeeze(ECM(:,:,k))/nn;% normalize correlation matrix + M = XC(1,2:NC(2));% mean + S = XC(2:NC(2),2:NC(2)) - M'*M;% covariance matrix + %M = M/nn; S=S/(nn-1); + + %ICOV(1) = ICOV(1) + (XC(2:NC(2),2:NC(2)) - )/nn + %if all(eig(S) > eps) % only when S is positive definite + v=eig(S); + if all(v > eps) + iS = inv(S); + detS = det(S); + else + iS = repmat(NaN,size(S)); + detS = NaN; + end + + CC.M{k} = M; + CC.IR{k} = [-M;eye(NC(2)-1)]*iS*[-M',eye(NC(2)-1)]; % inverse correlation matrix extended by mean + CC.IR0{k} = [-M;eye(NC(2)-1)]*ICOV0*[-M',eye(NC(2)-1)]; % inverse correlation matrix extended by mean + d = NC(2)-1; + if exist('OCTAVE_VERSION','builtin') + S = full(S); + end + CC.logSF(k) = log(nn) - d/2*log(2*pi) - detS/2; + CC.logSF2(k) = -2*log(nn/sum(ECM(:,1,1))); + CC.logSF3(k) = d*log(2*pi) + log(det(S)); + CC.logSF4(k) = log(detS) + 2*log(nn); + CC.logSF5(k) = log(detS); + CC.logSF6(k) = log(detS) - 2*log(nn/sum(ECM(:,1,1))); + CC.logSF7(k) = log(detS) + d*log(2*pi) - 2*log(nn/sum(ECM(:,1,1))); + CC.logSF8(k) = sum(log(svd(S))) + log(nn) - log(sum(ECM(:,1,1))); + CC.SF(k) = nn/sqrt((2*pi)^d * detS); + %CC.datatype='LLBC'; + end + end + end +end + +function [CL101,Labels] = cl101(classlabel) + %% convert classlabels to {-1,1} encoding + + if (all(classlabel>=0) && all(classlabel==fix(classlabel)) && (size(classlabel,2)==1)) + M = max(classlabel); + if 0, % M==2; + % this approach is disabled for now because it currently + % is not compatible with all supported classifiers (e.g. NBC). + CL101 = (classlabel==2)-(classlabel==1); + else + CL101 = zeros(size(classlabel,1),M); + for k=1:M, + %% One-versus-Rest scheme + CL101(:,k) = 2*real(classlabel==k) - 1; + end + end + CL101(isnan(classlabel),:) = NaN; %% or zero ??? + + elseif all((classlabel==1) | (classlabel==-1) | (classlabel==0) ) + CL101 = classlabel; + M = size(CL101,2); + else + classlabel, + error('format of classlabel unsupported'); + end + Labels = 1:M; + return; +end + + +function [cl1m, Labels] = CL1M(classlabel) + %% convert classlabels to 1..M encoding + if (all(classlabel>=0) && all(classlabel==fix(classlabel)) && (size(classlabel,2)==1)) + cl1m = classlabel; + + elseif all((classlabel==1) | (classlabel==-1) | (classlabel==0) ) + CL101 = classlabel; + M = size(classlabel,2); + if any(sum(classlabel==1,2)>1) + warning('invalid format of classlabel - at most one category may have +1'); + end + if (M==1), + cl1m = (classlabel==-1) + 2*(classlabel==+1); + else + [tmp, cl1m] = max(classlabel,[],2); + if any(tmp ~= 1) + warning('some class might not be properly represented - you might what to add another column to classlabel = [max(classlabel,[],2)<1,classlabel]'); + end + cl1m(tmp<1)= 0; %% or NaN ??? + end + else + classlabel + error('format of classlabel unsupported'); + end + Labels = 1:max(cl1m); + return; +end diff --git a/inst/trimean.m b/inst/trimean.m new file mode 100644 index 0000000..4a3c61e --- /dev/null +++ b/inst/trimean.m @@ -0,0 +1,82 @@ +function y=trimean(x,DIM) +% TRIMEAN yields the weighted mean of the median and the quartiles +% m = TRIMEAN(y). +% +% The trimean is m = (Q1+2*MED+Q3)/4 +% with quartile Q1 and Q3 and median MED +% +% N-dimensional data is supported +% +% REFERENCES: +% [1] http://mathworld.wolfram.com/Trimean.html + + +% This program is free software; you can redistribute it and/or modify +% it under the terms of the GNU General Public License as published by +% the Free Software Foundation; either version 2 of the License, or +% (at your option) any later version. +% +% This program is distributed in the hope that it will be useful, +% but WITHOUT ANY WARRANTY; without even the implied warranty of +% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +% GNU General Public License for more details. +% +% You should have received a copy of the GNU General Public License +% along with this program; If not, see . + +% Copyright (C) 1996-2019 by Alois Schloegl +% This function is part of the NaN-toolbox +% http://pub.ist.ac.at/~schloegl/matlab/NaN/ + +global FLAG_NANS_OCCURED; + +% check dimension +sz=size(x); + +% find the dimension +if nargin==1, + DIM = find(size(x)>1,1); + if isempty(DIM), DIM=1; end; +end; + +if DIM>length(sz), + sz = [sz,ones(1,DIM-length(sz))]; +end; + +D1 = prod(sz(1:DIM-1)); +D2 = sz(DIM); +D3 = prod(sz(DIM+1:length(sz))); +D0 = [sz(1:DIM-1),1,sz(DIM+1:length(sz))]; +y = repmat(nan,D0); +q = repmat(nan,3,1); +for k = 0:D1-1, +for l = 0:D3-1, + xi = k + l * D1*sz(DIM) + 1 ; + xo = k + l * D1 + 1; + t = x(xi+(0:sz(DIM)-1)*D1); + t = sort(t(~isnan(t))); + t = t(:); + n = length(t); + if (n +% This function is part of the NaN-toolbox +% http://pub.ist.ac.at/~schloegl/matlab/NaN/ + +% This program is free software; you can redistribute it and/or modify +% it under the terms of the GNU General Public License as published by +% the Free Software Foundation; either version 3 of the License, or +% (at your option) any later version. +% +% This program is distributed in the hope that it will be useful, +% but WITHOUT ANY WARRANTY; without even the implied warranty of +% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +% GNU General Public License for more details. +% +% You should have received a copy of the GNU General Public License +% along with this program; If not, see . + +if nargin<3, + DIM = []; +end; +if isempty(DIM), + DIM = find(size(Y)>1,1); + if isempty(DIM), DIM = 1; end; +end; + +if nargin<2, + help trimmean + +else + sz = size(Y); + if DIM > length(sz), + sz = [sz,ones(1,DIM-length(sz))]; + end; + + D1 = prod(sz(1:DIM-1)); + D2 = length(p); + D3 = prod(sz(DIM+1:length(sz))); + Q = repmat(nan,[sz(1:DIM-1),D2,sz(DIM+1:length(sz))]); + for k = 0:D1-1, + for l = 0:D3-1, + xi = k + l * D1*sz(DIM) + 1 ; + xo = k + l * D1*D2; + t = Y(xi:D1:xi+D1*sz(DIM)-1); + t = sort(t(~isnan(t))); + N = length(t); + for m=1:D2, + n = floor(N*p(m)/2); + f = sum(t(1+n:N-n))/(N-2*n); + Q(xo + 1 + (m-1)*D1) = f; + end; + end; + end; +end; + +%!assert(trimmean([11.4, 17.3, 21.3, 25.9, 40.1],.2),23.2) + diff --git a/inst/ttest.m b/inst/ttest.m new file mode 100644 index 0000000..d19140e --- /dev/null +++ b/inst/ttest.m @@ -0,0 +1,161 @@ +function [h, pval, ci, stats] = ttest (x, m, varargin) +% TTEST (paired) t-test +% For a sample X from a normal distribution with unknown mean and +% variance, perform a t-test of the null hypothesis `mean (X) == M'. +% Under the null, the test statistic T follows a Student +% distribution with `DF = length (X) - 1' degrees of freedom. +% +% TTEST treads NaNs as "Missing values" and ignores these. +% +% H = ttest(x,m) +% tests Null-hypothesis that mean of x is m. +% H = ttest(x,y) +% size of x and size of y must match, it is tested whether the +% difference x-y is significantly different to m=0; +% H = ttest(x,y,alpha) +% H = ttest(x,y,alpha,tail) +% H = ttest(x,y,alpha,tail,DIM) +% [H,PVAL] = ttest(...) +% [H,PVAL,CI] = ttest(...) +% [H,PVAL,CI,stats] = ttest(...) +% +% H=1 indicates a rejection of the Null-hypothesis at a significance +% level of alpha (default alpha = 0.05). +% +% With the optional argument string TAIL, the alternative of interest +% can be selected. If TAIL is '!=' or '<>' or 'both', the null is tested +% against the two-sided Alternative `mean (X) ~= mean (Y)'. If TAIL +% is '>' or 'right', the one-sided Alternative `mean (X) > mean (Y)' is used. +% Similarly for '<' or 'left', the one-sided Alternative `mean (X) < mean +% (Y)' is used. The default is the two-sided case. +% +% H returns whether the Null-Hypotheses must be rejected. +% The p-value of the test is returned in PVAL. +% +% TTEST works on the first non-singleton dimension or on DIM. +% +% If no output argument is given, the p-value of the test is +% displayed. +% + +% Copyright (C) 2014 Tony Richardson +% Copyright (C) 2010,2020 by Alois Schloegl +% This function is part of the NaN-toolbox +% http://pub.ist.ac.at/~schloegl/matlab/NaN/ + +% This program is free software: you can redistribute it and/or modify +% it under the terms of the GNU General Public License as published by +% the Free Software Foundation, either version 3 of the License, or +% (at your option) any later version. +% +% This program is distributed in the hope that it will be useful, +% but WITHOUT ANY WARRANTY; without even the implied warranty of +% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +% GNU General Public License for more details. +% +% You should have received a copy of the GNU General Public License +% along with this program. If not, see . + + % Set default arguments + my_default = 0; + alpha = 0.05; + tail = 'both'; + + % Find the first non-singleton dimension of x + DIM = min(find(size(x)~=1)); + if isempty(DIM), DIM = 1; end + + if (nargin == 1) + m = my_default; + end + + i = 1; + while ( i <= length(varargin) ) + switch lower(varargin{i}) + case 'alpha' + i = i + 1; + alpha = varargin{i}; + case 'tail' + i = i + 1; + tail = varargin{i}; + case 'dim' + i = i + 1; + DIM = varargin{i}; + otherwise + error('Invalid Name argument.',[]); + end + i = i + 1; + end + + if ~isa(tail, 'char') + error('tail argument to ttest must be a string\n',[]); + end + + if any(and(~isscalar(m),size(x)~=size(m))) + error('Arrays in paired test must be the same size.'); + end + + % Set default values if arguments are present but empty + if isempty(m) + m = my_default; + end + + % This adjustment allows everything else to remain the + % same for both the one-sample t test and paired tests. + x = x - m; + + szx = size(x); + szm = size(m); + szx(DIM) = 1; + szm(DIM) = 1; + + [S, N] = sumskipnan(x, DIM); + x_bar = S./N; + stats.df = N - 1; + stats.sd = std (x, 0, DIM); + x_bar_std = stats.sd./sqrt(N); + tval = (x_bar)./x_bar_std; + stats.tstat = tval; + + if (strcmp (tail, '~=') || strcmp (tail, '!=') || strcmp (tail, '<>')) || strcmp(tail,'both'), + pval = 2*(1 - tcdf(abs(tval), N-1)); + tcrit = -tinv(alpha/2,N-1); + ci = [x_bar-tcrit.*x_bar_std; x_bar+tcrit.*x_bar_std] + m; + elseif strcmp (tail, '>') || strcmp(tail,'right'), + pval = tcdf(tval, N-1); + tcrit = -tinv(alpha, N-1); + ci = [m+x_bar-tcrit.*x_bar_std; inf*ones(size(x_bar))]; + elseif strcmp (tail, '<') || strcmp(tail,'left'), + pval = tcdf(tval, N-1); + tcrit = -tinv(alpha,N-1); + ci = [-inf*ones(size(x_bar)); m+x_bar+tcrit.*x_bar_std]; + else + error ('ttest: option %s not recognized', tail); + end + + % Reshape the ci array to match MATLAB shaping + if and(isscalar(x_bar), DIM==2) + ci = ci(:)'; + elseif size(x_bar,2)' or 'both', the null is tested +% against the two-sided Alternative `mean (X) ~= mean (Y)'. If TAIL +% is '>' or 'right', the one-sided Alternative `mean (X) > mean (Y)' is used. +% Similarly for '<' or 'left', the one-sided Alternative `mean (X) < mean +% (Y)' is used. The default is the two-sided case. +% +% vartype support only 'equal' (default value); the value 'unequal' is not supported. +% +% H returns whether the Null-Hypotheses must be rejected. +% The p-value of the test is returned in PVAL. +% +% TTEST2 works on the first non-singleton dimension or on DIM. +% +% If no output argument is given, the p-value of the test is +% displayed. +% + +%%% not supported yet +% [h,p,ci] = ttest2(...) +% [h,p,ci,stats] = ttest2(...) + +% $Id$ +% Copyright (C) 1995, 1996, 1997, 1998, 2000, 2002, 2005, 2006, 2007 +% Kurt Hornik +% Copyright (C) 2010 by Alois Schloegl +% This function is part of the NaN-toolbox +% http://pub.ist.ac.at/~schloegl/matlab/NaN/ + +% This program is free software: you can redistribute it and/or modify +% it under the terms of the GNU General Public License as published by +% the Free Software Foundation, either version 3 of the License, or +% (at your option) any later version. +% +% This program is distributed in the hope that it will be useful, +% but WITHOUT ANY WARRANTY; without even the implied warranty of +% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +% GNU General Public License for more details. +% +% You should have received a copy of the GNU General Public License +% along with this program. If not, see . + + if ((nargin < 2) || (nargin > 6) || nargout > 4) + print_usage ; + end + + if (nargin < 3) || isempty(alpha) + alpha = .05; + end + + if (nargin < 4) || isempty(tail) + tail = '~='; + end + if (~ ischar (tail)) + error ('ttest2: tail must be a string'); + end + if (nargin < 5) || isempty(vartype) + vartype = 'equal'; + end + if ~strcmp(vartype,'equal') + error ('test: vartype not supported') + end + if nargin<6, + DIM = find(size(x)>1,1); + end; + if isempty(DIM), DIM=1; end; + + szx = size(x); + szy = size(y); + szy(DIM) = 1; + szx(DIM) = 1; + + if (any(szx-szy)) + error ('ttest2: dimension of X and Y do not fit'); + end + + [SX, NX] = sumskipnan(x, DIM); + [SY, NY] = sumskipnan(y, DIM); + stats.df = NX + NY - 2; + MX = SX ./ NX; + MY = SY ./ NY; + + if any(size(x)==0) || any(size(y)==0) + v = NaN; + else + v = sumsq(x-repmat(MX,size(x)./size(MX))) + sumsq(y-repmat(MY,size(y)./size(MY))); + end; + stats.sd = sqrt(v/stats.df); + stats.tstat = (MX - MY) .* sqrt ((NX .* NY .* stats.df) ./ (v .* (NX + NY))); + cdf = tcdf (stats.tstat, stats.df); + + if (strcmp (tail, '~=') || strcmp (tail, '!=') || strcmp (tail, '<>')) || strcmp(tail,'both'), + pval = 2 * min (cdf, 1 - cdf); + elseif strcmp (tail, '>') || strcmp(tail,'right'), + pval = 1 - cdf; + elseif strcmp (tail, '<') || strcmp(tail,'left'), + pval = cdf; + else + error ('ttest2: option %s not recognized', tail); + end + + h = pval < alpha; + + if (nargout == 0) + fprintf(1,' pval: %g\n', pval); + end + diff --git a/inst/var.m b/inst/var.m new file mode 100644 index 0000000..f83e914 --- /dev/null +++ b/inst/var.m @@ -0,0 +1,102 @@ +function y=var(x,opt,DIM,W) +% VAR calculates the variance. +% +% y = var(x [, opt[, DIM]]) +% calculates the variance in dimension DIM +% the default DIM is the first non-single dimension +% +% opt 0: normalizes with N-1 [default] +% 1: normalizes with N +% DIM dimension +% 1: VAR of columns +% 2: VAR of rows +% N: VAR of N-th dimension +% default or []: first DIMENSION, with more than 1 element +% W weights to compute weighted variance (default: []) +% if W=[], all weights are 1. +% number of elements in W must match size(x,DIM) +% +% usage: +% var(x) +% var(x, opt, DIM) +% var(x, [], DIM) +% var(x, W, DIM) +% var(x, opt, DIM, W) +% +% features: +% - can deal with NaN's (missing values) +% - weighting of data +% - dimension argument +% - compatible to Matlab and Octave +% +% see also: MEANSQ, SUMSQ, SUMSKIPNAN, MEAN, RMS, STD, + +% This program is free software; you can redistribute it and/or modify +% it under the terms of the GNU General Public License as published by +% the Free Software Foundation; either version 3 of the License, or +% (at your option) any later version. +% +% This program is distributed in the hope that it will be useful, +% but WITHOUT ANY WARRANTY; without even the implied warranty of +% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +% GNU General Public License for more details. +% +% You should have received a copy of the GNU General Public License +% along with this program; If not, see . + +% $Id$ +% Copyright (C) 2000-2003,2006,2009,2010 by Alois Schloegl +% This is part of the NaN-toolbox for Octave and Matlab +% http://pub.ist.ac.at/~schloegl/matlab/NaN/ + +if nargin<3, + DIM = []; +end; + +if nargin==1, + W = []; + opt = []; + +elseif any(nargin==[2,3]) + if (numel(opt)<2), + W = []; + else + W = opt; + opt = []; + end; +elseif (nargin==4) && (numel(opt)<2) && (numel(DIM)<2), + ; +else + fprintf(1,'Error VAR: incorrect usage\n'); + help var; + return; +end; + +if isempty(opt), + opt = 0; +end; + +if isempty(DIM), + DIM = find(size(x)>1,1); + if isempty(DIM), DIM=1; end; +end; + +[y,n,ssq] = sumskipnan(x,DIM,W); +if all(ssq(:).*n(:) > 2*(y(:).^2)), + %% rounding error is neglectable + y = ssq - y.*y./n; +else + %% rounding error is not neglectable + szx = size(x); + szy = size(y); + if length(szy) +% This function is part of the NaN-toolbox +% http://pub.ist.ac.at/~schloegl/matlab/NaN/ + +% This program is free software; you can redistribute it and/or modify +% it under the terms of the GNU General Public License as published by +% the Free Software Foundation; either version 3 of the License, or +% (at your option) any later version. +% +% This program is distributed in the hope that it will be useful, +% but WITHOUT ANY WARRANTY; without even the implied warranty of +% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +% GNU General Public License for more details. +% +% You should have received a copy of the GNU General Public License +% along with this program; If not, see . + +if nargin<2, + Y = []; + MAXLAG = []; + SCALEOPT = 'none'; +elseif ischar(Y), + MAXLAG = Y; + SCALEOPT=MAXLAG; + Y=[]; +elseif all(size(Y)==1), + if nargin<3 + SCALEOPT = 'none'; + else + SCALEOPT = MAXLAG; + end; + MAXLAG = Y; + Y = []; +end; + +if 0, + +elseif isempty(Y) && isempty(MAXLAG) + NX = isnan(X); + X(NX) = 0; + [C,LAGS] = xcorr(X,'none'); + [N,LAGS] = xcorr(1-NX,'none'); +elseif ~isempty(Y) && isempty(MAXLAG) + NX = isnan(X); + NY = isnan(Y); + X(NX) = 0; + Y(NY) = 0; + [C,LAGS] = xcorr(X,Y,'none'); + [N,LAGS] = xcorr(1-NX,1-NY,'none'); +elseif isempty(Y) && ~isempty(MAXLAG) + NX = isnan(X); + X(NX) = 0; + [C,LAGS] = xcorr(X,MAXLAG,'none'); + [N,LAGS] = xcorr(1-NX,MAXLAG,'none'); +elseif ~isempty(Y) && ~isempty(MAXLAG) + NX = isnan(X); + NY = isnan(Y); + X(NX) = 0; + Y(NY) = 0; + [C,LAGS] = xcorr(X,Y,MAXLAG,'none'); + [N,LAGS] = xcorr(1-NX,1-NY,MAXLAG,'none'); +end; + +if 0, + +elseif strcmp(SCALEOPT,'none') + % done + +elseif strcmp(SCALEOPT,'coeff') + ix = find(LAGS==0); + if ~any(size(X)==1), %% ~isvector(X) + c = C(ix,1:size(X,2)+1:end); %% diagonal elements + v = c.^-0.5; % sqrt(1./c(:)); + v = v'*v; + C = C.*repmat(v(:).',size(C,1),1); + elseif isempty(Y) + C = C/C(ix); + else + C = C/sqrt(sumsq(X)*sumsq(Y)); + end; + +elseif strcmp(SCALEOPT,'biased') + C = C./repmat(max(N),size(C,1),1); + +elseif strcmp(SCALEOPT,'unbiased') + C = C./(repmat(max(N),size(C,1),1)-repmat(LAGS,1,size(C,2))); + +else + warning('invalid SCALEOPT - not supported'); +end; + diff --git a/inst/xptopen.m b/inst/xptopen.m new file mode 100644 index 0000000..e57b3f8 --- /dev/null +++ b/inst/xptopen.m @@ -0,0 +1,31 @@ +% XPTOPEN read of several file formats and writing of the SAS Transport Format (*.xpt) +% +% X = xptopen(filename) +% X = xptopen(filename,'r') +% read filename and return variables in struct X +% Supported are ARFF, SAS-XPT and STATA files. +% +% X = xptopen(filename,'w',X) +% save fields of struct X in filename. +% +% The fields of X must be column vectors of equal length. +% Each vector is either a numeric vector or a cell array of strings. +% +% The SAS-XPT format stores Date/Time as numeric value counting the number of days since 1960-01-01. + +% Copyright (C) 2015 by Alois Schloegl +% This is part of the NaN-toolbox. For more details see +% https://pub.ist.ac.at/~schloegl/matlab/NaN/ +% +% This program is free software; you can redistribute it and/or modify +% it under the terms of the GNU General Public License as published by +% the Free Software Foundation; either version 3 of the License, or +% (at your option) any later version. +% +% This program is distributed in the hope that it will be useful, +% but WITHOUT ANY WARRANTY; without even the implied warranty of +% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +% GNU General Public License for more details. +% +% You should have received a copy of the GNU General Public License +% along with this program; If not, see . diff --git a/inst/xval.m b/inst/xval.m new file mode 100644 index 0000000..a9a7da0 --- /dev/null +++ b/inst/xval.m @@ -0,0 +1,209 @@ +function [R,CC]=xval(D,classlabel,MODE,arg4) +% XVAL is used for crossvalidation +% +% [R,CC] = xval(D,classlabel) +% .. = xval(D,classlabel,CLASSIFIER) +% .. = xval(D,classlabel,CLASSIFIER,type) +% .. = xval(D,{classlabel,W},CLASSIFIER) +% .. = xval(D,{classlabel,W,NG},CLASSIFIER) +% +% example: +% load_fisheriris; %builtin iris dataset +% C = species; +% K = 5; NG = [1:length(C)]'*K/length(C); +% [R,CC] = xval(meas,{C,[],NG},'NBC'); +% +% Input: +% D: data features (one feature per column, one sample per row) +% classlabel labels of each sample, must have the same number of rows as D. +% Two different encodings are supported: +% {-1,1}-encoding (multiple classes with separate columns for each class) or +% 1..M encoding. +% So [1;2;3;1;4] is equivalent to +% [+1,-1,-1,-1; +% [-1,+1,-1,-1; +% [-1,-1,+1,-1; +% [+1,-1,-1,-1] +% [-1,-1,-1,+1] +% Note, samples with classlabel=0 are ignored. +% +% CLASSIFIER can be any classifier supported by train_sc (default='LDA') +% {'REG','MDA','MD2','QDA','QDA2','LD2','LD3','LD4','LD5','LD6','NBC','aNBC','WienerHopf', 'RDA','GDBC', +% 'SVM','RBF','PSVM','SVM11','SVM:LIN4','SVM:LIN0','SVM:LIN1','SVM:LIN2','SVM:LIN3','WINNOW'} +% these can be modified by ###/GSVD, ###/sparse and ###/DELETION. +% /DELETION removes in case of NaN's either the rows or the columns (which removes less data values) with any NaN +% /sparse and /GSVD preprocess the data an reduce it to some lower-dimensional space. +% Hyperparameters (like alpha for PLA, gamma/lambda for RDA, c_value for SVM, etc) can be defined as +% CLASSIFIER.hyperparameter.alpha, etc. and +% CLASSIFIER.TYPE = 'PLA' (as listed above). +% See train_sc for details. +% W: weights for each sample (row) in D. +% default: [] (i.e. all weights are 1) +% number of elements in W must match the number of rows of D +% NG: used to define the type of cross-valdiation +% Leave-One-Out-Method (LOOM): NG = [1:length(classlabel)]' (default) +% Leave-K-Out-Method: NG = ceil([1:length(classlabel)]'/K) +% K-fold XV: NG = ceil([1:length(classlabel)]'*K/length(classlabel)) +% group-wise XV (if samples are not indepentent) can be also defined here +% samples from the same group (dependent samples) get the same identifier +% samples from different groups get different classifiers +% TYPE: defines the type of cross-validation procedure if NG is not specified +% 'LOOM' leave-one-out-method +% k k-fold crossvalidation +% +% OUTPUT: +% R contains the resulting performance metric +% R.OUTPUT classifier output +% R.CL class labels +% R.H confusion matrix +% R.kappa Cohen's kappa coefficient +% R.ACC accuracy +% R.ERR error rate +% ... and a number of addition parameters obtained from the confusionm matrix [2] +% R.noXV.{...} contains the result w/o crossvalidation +% CC contains the classifier +% +% plota(R) shows the confusion matrix of the results +% +% see also: TRAIN_SC, TEST_SC, CLASSIFY, PLOTA +% +% References: +% [1] R. Duda, P. Hart, and D. Stork, Pattern Classification, second ed. +% John Wiley & Sons, 2001. +% [2] A. Schlögl, J. Kronegg, J.E. Huggins, S. G. Mason; +% Evaluation criteria in BCI research. +% (Eds.) G. Dornhege, J.R. Millan, T. Hinterberger, D.J. McFarland, K.-R.Müller; +% Towards Brain-Computer Interfacing, MIT Press, 2007, p.327-342 + +% Copyright (C) 2008,2009,2010,2016 by Alois Schloegl +% This function is part of the NaN-toolbox +% http://pub.ist.ac.at/~schloegl/matlab/NaN/ + +% This program is free software; you can redistribute it and/or +% modify it under the terms of the GNU General Public License +% as published by the Free Software Foundation; either version 3 +% of the License, or (at your option) any later version. +% +% This program is distributed in the hope that it will be useful, +% but WITHOUT ANY WARRANTY; without even the implied warranty of +% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +% GNU General Public License for more details. +% +% You should have received a copy of the GNU General Public License +% along with this program; if not, write to the Free Software +% Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA 02110-1301, USA. + +if (nargin<3) || isempty(MODE), + MODE = 'LDA'; +end; +if ischar(MODE) + tmp = MODE; + clear MODE; + MODE.TYPE = tmp; +elseif ~isfield(MODE,'TYPE') + MODE.TYPE=''; +end; + +sz = size(D); +NG = []; +W = []; + +if iscell(classlabel) + % hack to handle NaN's in unique(...) + c = classlabel{:,1}; + ix = find(~isnan(c)); + C = c; + [b,i,C(ix)] = unique(c(ix)); + if size(classlabel,2)>1, + W = [classlabel{:,2}]; + end; + if size(classlabel,2)>2, + [Label,tmp1,NG] = unique(classlabel{:,3}); + end; +elseif size(classlabel,2)>1, + %% group-wise classvalidation + C = classlabel(:,1); + W = classlabel(:,2); + if size(classlabel,2)==2, + warning('This option defines W and NG in an ambigous way - use instead xval(D,{C,[],NG},...) or xval(D,{C,W},...)'); + else + [Label,tmp1,NG] = unique(classlabel(:,3)); + end; +else + C = classlabel; +end; +if all(W==1), W = []; end; +if sz(1)~=size(C,1), + error('length of data and classlabel does not fit'); +end; + + +if isempty(NG) +if (nargin<4) || strcmpi(arg4,'LOOM') + %% LOOM + NG = (1:sz(1))'; + +elseif isnumeric(arg4) + if isscalar(arg4) + % K-fold XV + NG = ceil((1:length(C))'*arg4/length(C)); + elseif length(arg4)==2, + NG = ceil((1:length(C))'*arg4(1)/length(C)); + end; + +end; +end; + +sz = size(D); +if sz(1)~=length(C), + error('length of data and classlabel does not fit'); +end; +if ~isfield(MODE,'hyperparameter') + MODE.hyperparameter = []; +end + +cl = repmat(NaN,size(classlabel,1),1); +output = repmat(NaN,size(classlabel,1),max(C)); +for k = 1:max(NG), + ix = find(~any(isnan(C),2) & (NG~=k)); + if isempty(W), + CC = train_sc(D(ix,:), C(ix), MODE); + else + CC = train_sc(D(ix,:), C(ix), MODE, W(ix)); + end; + ix = find(NG==k); + r = test_sc(CC, D(ix,:)); + cl(ix,1) = r.classlabel; + output(ix,:) = r.output; +end; + +%R = kappa(C,cl,'notIgnoreNAN',W); +R = kappa(C,cl,[],W); +%R2 = kappa(R.H); + +R.OUTPUT=output; +R.CL=cl; +R.ERR = 1-R.ACC; +if isnumeric(R.Label) + R.Label = cellstr(int2str(R.Label)); +end; + +if nargout>1, + % final classifier + ix = find(~any(isnan(C),2)); + if isempty(W), + CC = train_sc(D(ix,:), C(ix), MODE); + else + CC = train_sc(D(ix,:), C(ix), MODE,W); + end; + CC.Labels = 1:max(C); + %CC.Labels = unique(C); + + % test w/o cross-validation + r = test_sc(CC, D(ix,:)); + R.noXV = kappa(C(ix), r.classlabel,[],W); + R.noXV.OUTPUT = r.output; + R.noXV.CL = r.classlabel; + R.noXV.CC = CC; +end; + diff --git a/inst/zScoreMedian.m b/inst/zScoreMedian.m new file mode 100644 index 0000000..29c6c58 --- /dev/null +++ b/inst/zScoreMedian.m @@ -0,0 +1,46 @@ +function Z = zScoreMedian(X, DIM) +% zScoreMedian removes the median and standardizes by the 1.483*median absolute deviation +% +% Usage: Z = zScoreMedian(X, DIM) +% Input: X : data +% DIM: dimension along which z-score should be calculated (1=columns, 2=rows) +% (optional, default=first dimension with more than 1 element +% Output: Z : z-scores + +% Copyright (C) 2003 Patrick Houweling +% Copyright (C) 2009 Alois Schloegl +% $Id$ +% This function is part of the NaN-toolbox +% http://pub.ist.ac.at/~schloegl/matlab/NaN/ + +% This program is free software: you can redistribute it and/or modify +% it under the terms of the GNU General Public License as published by +% the Free Software Foundation, either version 3 of the License, or +% (at your option) any later version. +% +% This program is distributed in the hope that it will be useful, +% but WITHOUT ANY WARRANTY; without even the implied warranty of +% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +% GNU General Public License for more details. +% +% You should have received a copy of the GNU General Public License +% along with this program. If not, see . + + +% input checks +if any(size(X)==0), + return; +end; + +% robust moment estimators: +% - mean: median +% - standard deviation: 1.483 * median absolute deviation (medAbsDev); +% the factor 1.483 is the ratio of the standard deviation of a normal random variable to its MAD. +if nargin<2, + [D, M] = medAbsDev(X); +else + [D, M] = medAbsDev(X, DIM); +end; + +% z-score: subtract M and divide by 1.483*D +Z = (X - repmat(M, size(X)./size(M))) ./ repmat(1.483*D, size(X)./size(D)); diff --git a/inst/zscore.m b/inst/zscore.m new file mode 100644 index 0000000..23fafb3 --- /dev/null +++ b/inst/zscore.m @@ -0,0 +1,79 @@ +function [i,m,s] = zscore(i,OPT, DIM, W) +% ZSCORE removes the mean and normalizes data +% to a variance of 1. Can be used for pre-whitening of data, too. +% +% [z,mu, sigma] = zscore(x [,OPT [, DIM]) +% z z-score of x along dimension DIM +% sigma is the inverse of the standard deviation +% mu is the mean of x +% +% The data x can be reconstucted with +% x = z*diag(sigma) + repmat(m, size(z)./size(m)) +% z = x*diag(1./sigma) - repmat(m.*v, size(z)./size(m)) +% +% OPT option +% 0: normalizes with N-1 [default] when computing sigma +% provides the square root of best unbiased estimator of the variance [1] +% 1: normalizes with N, when computing sigma +% this provides the square root of the second moment around the mean +% otherwise: +% best unbiased estimator of the standard deviation (see [1]) +% +% DIM dimension +% 1: STATS of columns +% 2: STATS of rows +% default or []: first DIMENSION, with more than 1 element +% +% see also: SUMSKIPNAN, MEAN, STD, DETREND +% +% REFERENCE(S): +% [1] http://mathworld.wolfram.com/z-Score.html + +% This program is free software; you can redistribute it and/or modify +% it under the terms of the GNU General Public License as published by +% the Free Software Foundation; either version 3 of the License, or +% (at your option) any later version. +% +% This program is distributed in the hope that it will be useful, +% but WITHOUT ANY WARRANTY; without even the implied warranty of +% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +% GNU General Public License for more details. +% +% You should have received a copy of the GNU General Public License +% along with this program; If not, see . + +% Copyright (C) 2000-2019 by Alois Schloegl +% This function is part of the NaN-toolbox +% http://pub.ist.ac.at/~schloegl/matlab/NaN/ + + +if any(size(i)==0); return; end; + +if nargin<2 + OPT=[]; +end +if nargin<3 + DIM=[]; +end +if nargin<4 + W = []; +end +if ~isempty(OPT) && ~any(OPT==[0,1]) + error('OPT must be 0, 1 or empty.') +end +if isempty(DIM), + DIM=min(find(size(i)>1)); + if isempty(DIM), DIM=1; end; +end; + + +% pre-whitening +[S,N,SSQ] = sumskipnan(i, DIM, W); +m = S./N; +i = i-repmat(m, size(i)./size(m)); % remove mean +s = std (i, OPT, DIM, W); +s(s==0)=1; +i = i ./ repmat(s,size(i)./size(s)); % scale to var=1 + +%!assert(isequaln(zscore([1,1,-1,-1,0,NaN]),[1,1,-1,-1,0,NaN])) + diff --git a/src/Makefile.in b/src/Makefile.in new file mode 100644 index 0000000..93c3fab --- /dev/null +++ b/src/Makefile.in @@ -0,0 +1,250 @@ +#################################################### +# Copyright 2010,2011,2012,2015,2019 Alois Schloegl +# This is part of the NaN-toolbox - a statistics and machine learning toolbox for data with and without missing values. +# http://pub.ist.ac.at/~schloegl/matlab/NaN/ +#################################################### +# +# This program is free software; you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the Free Software +# Foundation; either version 3 of the License, or (at your option) any later +# version. +# +# This program is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along with +# this program; if not, write to the Free Software Foundation, Inc., 59 Temple +# Place - Suite 330, Boston, MA 02111-1307, USA. + + + +### modify directories according to your needs + +# Define non-default octave-version +# Octave - global install (e.g. from debian package) +# OCTAVE_VERSION= +# Better alternative: define an OCTAVE_VERSION bash variable (or in .bashrc or .profile) +# OCTAVE_VERSION=-3.6.3 + +# Matlab configuration +#MATLABDIR = /usr/local/MATLAB/R2010b +# comment the following line if you use MATLAB on 32-bit operating system +#MEX_OPTION += -largeArrayDims +MEX_OPTION += -R2017b + +# Mingw crosscompiler: available at http://www.nongnu.org/mingw-cross-env/ +CROSS = $(HOME)/src/mxe/usr/bin/i686-w64-mingw32.static- +CROSS64 = $(HOME)/src/mxe/usr/bin/x86_64-w64-mingw32.static- +# include directory for Win32-Matlab include +W32MAT_INC = $(HOME)/bin/win32/Matlab/R2010b/extern/include/ +W64MAT_INC = $(HOME)/bin/win64/Matlab/R2010b/extern/include/ +# path to GNUMEX libraries, available from here http://sourceforge.net/projects/gnumex/ +GNUMEX = $(HOME)/bin/win32/gnumex +GNUMEX64 = $(HOME)/bin/win64/gnumex +# building gnumex64 was difficult, these hints were quite useful: +# http://sourceforge.net/mailarchive/forum.php?thread_name=AANLkTinZvxgC9ezp2P3UCX_a7TAUYuVsp2U40MQUV6qr%40mail.gmail.com&forum_name=gnumex-users +# Instead of building "mex shortpath.c" and "mex uigetpath.c", I used empty m-functions within argout=argin; +#################################################### +MKOCTFILE ?= mkoctfile$(OCTAVE_VERSION) +CC ?= gcc +CXX ?= g++ +CFLAGS += -Wall -Wextra -Wconversion -O2 +OCTMEX = $(MKOCTFILE) --mex +RM = rm + + +### in case libsvm is installed and should be used +# CFLAGS += -DHAVE_EXTERNAL_LIBSVM +### in case liblinear is installed and should be used +# CFLAGS += -DHAVE_EXTERNAL_LIBLINEAR + + +# Support Debian Hardening flags +# https://wiki.debian.org/HardeningWalkthrough#Handling_dpkg-buildflags_in_your_upstream_build_system +CFLAGS += $(CPPFLAGS) + +ifeq ($(OS),Windows_NT) + # CC need to be defined on Windows, see also http://savannah.gnu.org/bugs/?47559 + CC = gcc +else ifeq (Darwin,$(shell uname)) + # no OpenMP on MacOSX, neither for Matlab nor Octave +else + # enable OpenMP support + CFLAGS += -fPIC -fopenmp + OCTMEX += -lgomp + MATLABMEX += -lgomp + #MEX_OPTION += CC\#$(CXX) CXX\#$(CXX) CFLAGS\#"$(CFLAGS) " CXXFLAGS\#"$(CFLAGS) " +endif + +###REMOVE### MATMEX = $(MATLABDIR)/bin/mex $(MEX_OPTION) + +PROGS = histo_mex.mex covm_mex.mex kth_element.mex sumskipnan_mex.mex str2array.mex train.mex svmtrain_mex.mex svmpredict_mex.mex xptopen.mex + +### per default only the mex-files for octave are built +mex4o octave: $(PROGS) + + +### Matlab configuration - search for a matlab directory if not defined above +ifeq (,$(MATLABDIR)) + ifneq (,$(shell ls -1 /usr/local/ |grep MATLAB)) + # use oldest, typically mex-files are compatible with newer Matlab versions + MATLABDIR=/usr/local/MATLAB/$(shell ls -1rt /usr/local/MATLAB/ |grep "^R20*" |head -1) + endif +endif + + +### if MATLABDIR has been found or defined +ifneq (,$(MATLABDIR)) + ifneq (,$(shell ls -1 $(MATLABDIR)/bin/mexext)) + MEX_EXT=$(shell $(MATLABDIR)/bin/mexext) + mex4m matlab: $(patsubst %.mex, %.$(MEX_EXT), $(PROGS)) + endif +endif + + +mexw32 win32: $(patsubst %.mex, %.mexw32, $(PROGS)) +mexw64 win64: $(patsubst %.mex, %.mexw64, $(PROGS)) +all: octave win32 win64 mex4m + +clean: + -$(RM) *.o *.obj *.o64 core octave-core *.oct *~ *.mex* + +#$(PROGS): Makefile + + +######################################################### +# Matlab 2016b and later +######################################################### +# https://se.mathworks.com/matlabcentral/answers/377799-compiling-mex-files-without-the-mex-command +DEFINES := -D_GNU_SOURCE +MATLABMEX := -DMATLAB_MEX_FILE +# Matrix APIs: +# -DMX_COMPAT_32: compatibleArrayDims +# -DMATLAB_DEFAULT_RELEASE=R2017b: largeArrayDims +# -DMATLAB_DEFAULT_RELEASE=R2018a: Interleaved Complex +MATLABMEX += -DMATLAB_DEFAULT_RELEASE=R2017b + +CFLAGS += -fexceptions -fPIC -fno-omit-frame-pointer -pthread +COPTIMFLAGS := -O -DNDEBUG +CDEBUGFLAGS := -g +INCLUDE := -I"$(MWROOT)/extern/include" -I"$(MWROOT)/simulink/include" +LD := gcc +LDFLAGS += -pthread +LDTYPE := -shared +LINKEXPORTVER := -Wl,--version-script,"$(MWROOT)/extern/lib/glnxa64/c_exportsmexfileversion.map" +LINKLIBS := -Wl,--as-needed -Wl,-rpath-link,$(MWROOT)/bin/glnxa64 -L"$(MWROOT)/bin/glnxa64" -Wl,-rpath-link,$(MWROOT)/extern/bin/glnxa64 -L"$(MWROOT)/extern/bin/glnxa64" -lmx -lmex -lmat -lm -lstdc++ +BLAS_LIBS := $(shell $(MKOCTFILE) -p BLAS_LIBS) + + +c_mexapi_version.o : $(MWROOT)/extern/version/c_mexapi_version.c Makefile + $(CC) -c $< -o $@ $(MATLABMEX) $(DEFINES) $(CFLAGS) $(COPTIMFLAGS) $(INCLUDE) +%.$(MEX_EXT) : %.o c_mexapi_version.o + $(LD) $(LDFLAGS) $(LDTYPE) $(LINKEXPORTVER) $< $(LINKLIBS) -o $@ +%.o : %.cpp Makefile + $(CC) -c $< -o $@ $(MATLABMEX) $(DEFINES) $(CFLAGS) $(COPTIMFLAGS) $(INCLUDE) +%.o : %.c Makefile + $(CC) -c $< -o $@ $(MATLABMEX) $(DEFINES) $(CFLAGS) $(COPTIMFLAGS) $(INCLUDE) +train.o : train.c tron.cpp Makefile + $(CC) -c $< -o $@ $(MATLABMEX) $(DEFINES) $(CFLAGS) $(COPTIMFLAGS) $(INCLUDE) +svm%_mex.$(MEX_EXT) : svm%_mex.o svm.o svm_model_matlab.o c_mexapi_version.o + $(LD) $(LDFLAGS) $(LDTYPE) $(LINKEXPORTVER) $^ $(LINKLIBS) -o $@ + + +######################################################### +# Octave, MATLAB on Linux +######################################################### +ifneq (,@HAVE_EXTERNAL_LIBSVM@) +svm%_mex.mex: svm%_mex.cpp svm_model_octave.o + $(OCTMEX) $(CFLAGS) $< svm_model_octave.o -lsvm -o $@ +else +svm.o: svm.cpp + $(CC) $(CFLAGS) -c svm.cpp +svm%_mex.mex: svm%_mex.cpp svm_model_octave.o svm.o + $(OCTMEX) $^ -o $@ +endif + +svm_model_octave.o: svm_model_matlab.c + $(MKOCTFILE) -o "$@" -c "$<" +svm_model_matlab.o: svm_model_matlab.c + $(CXX) $(CFLAGS) -I $(MATLABDIR)/extern/include -o "$@" -c "$<" + +ifneq (,@HAVE_EXTERNAL_LIBLINEAR@) +train.$(MEX_EXT) predict.$(MEX_EXT): train.c tron.o linear_model_matlab.c + $(CXX) $(CFLAGS) -I $(MATLABDIR)/extern/include -c linear_model_matlab.c + $(MATMEX) -lblas train.c tron.o -llinear linear_model_matlab.o + #$(MATMEX) -lblas predict.c tron.o linear.o linear_model_matlab.o +train.mex predict.mex: train.c tron.o linear_model_matlab.c + $(OCTMEX) $(BLAS_LIBS) train.c tron.o -llinear linear_model_matlab.c +else +train.$(MEX_EXT) predict.$(MEX_EXT): train.c tron.o linear.o linear_model_matlab.c + $(CXX) $(CFLAGS) -I $(MATLABDIR)/extern/include -c linear_model_matlab.c + $(MATMEX) -lblas train.c tron.o linear.o linear_model_matlab.o + #$(MATMEX) -lblas predict.c tron.o linear.o linear_model_matlab.o +train.mex predict.mex: train.c tron.o linear.o linear_model_matlab.c + $(OCTMEX) $(BLAS_LIBS) train.c tron.o linear.o linear_model_matlab.c +linear.o: linear.cpp + $(CXX) $(CFLAGS) -c linear.cpp +endif +tron.o: tron.cpp tron.h + $(CXX) $(CFLAGS) -c tron.cpp + +%.oct: %.cc + mkoctfile$(OCTAVE_VERSION) "$<" + +%.mex: %.cpp + $(OCTMEX) "$<" + + +ifneq (,@HAVE_EXTERNAL_LIBTENSORFLOW@) +PROGS += mexTF + +mexTF.mex: mexTF.c + mkoctfile -mex $< -ltensorflow -o $@ + +mexTF.$(MEX_EXT): mexTF.c + $(MATMEX) $< -ltensorflow -o $@ + +endif + +######################################################### +# MATLAB/WIN32 +######################################################### +%.obj: %.cpp + $(CROSS)$(CXX) -fopenmp -c -DMATLAB_MEX_FILE -x c++ -o "$@" -I$(W32MAT_INC) -O2 -DMX_COMPAT_32 "$<" +%.obj: %.c + $(CROSS)$(CXX) -fopenmp -c -DMATLAB_MEX_FILE -x c++ -o "$@" -I$(W32MAT_INC) -O2 -DMX_COMPAT_32 "$<" + +train.mexw32 predict.mexw32: train.obj linear.obj linear_model_matlab.obj tron.obj + $(CROSS)$(CXX) -shared $(GNUMEX)/mex.def -o "$@" -L$(GNUMEX) -s "$<" linear_model_matlab.obj linear.obj tron.obj -llibmx -llibmex -llibmat -lblas + +svmpredict_mex.mexw32 : svmpredict_mex.obj svm.obj svm_model_matlab.obj + $(CROSS)$(CXX) -shared $(GNUMEX)/mex.def -o "$@" -L$(GNUMEX) -s "$<" svm_model_matlab.obj svm.obj -llibmx -llibmex -llibmat +svmtrain_mex.mexw32 : svmtrain_mex.obj svm.obj svm_model_matlab.obj + $(CROSS)$(CXX) -shared $(GNUMEX)/mex.def -o "$@" -L$(GNUMEX) -s "$<" svm_model_matlab.obj svm.obj -llibmx -llibmex -llibmat + +%.mexw32: %.obj + $(CROSS)$(CXX) -shared $(GNUMEX)/mex.def -o "$@" -L$(GNUMEX) -s "$<" -llibmx -llibmex -llibmat -lgomp -lpthread + + +######################################################### +# MATLAB/WIN64 +######################################################### + +## ToDO: fix OpenMP support: currently -fopenmp causes Matlab to crash +%.o64: %.cpp + $(CROSS64)$(CXX) -c -DMATLAB_MEX_FILE -x c++ -o "$@" -I$(W64MAT_INC) -O2 "$<" +%.o64: %.c + $(CROSS64)$(CXX) -c -DMATLAB_MEX_FILE -x c++ -o "$@" -I$(W64MAT_INC) -O2 "$<" + +train.mexw64 predict.mexw64: train.o64 linear.o64 linear_model_matlab.o64 tron.o64 + $(CROSS64)$(CXX) -shared $(GNUMEX64)/mex.def -o "$@" -L$(GNUMEX64) -s "$<" linear_model_matlab.o64 linear.o64 tron.o64 -llibmx -llibmex -llibmat -lblas + +svmpredict_mex.mexw64 : svmpredict_mex.o64 svm.o64 svm_model_matlab.o64 + $(CROSS64)$(CXX) -shared $(GNUMEX64)/mex.def -o "$@" -L$(GNUMEX64) -s "$<" svm_model_matlab.o64 svm.o64 -llibmx -llibmex -llibmat +svmtrain_mex.mexw64 : svmtrain_mex.o64 svm.o64 svm_model_matlab.o64 + $(CROSS64)$(CXX) -shared $(GNUMEX64)/mex.def -o "$@" -L$(GNUMEX64) -s "$<" svm_model_matlab.o64 svm.o64 -llibmx -llibmex -llibmat + +%.mexw64: %.o64 + $(CROSS64)$(CXX) -shared $(GNUMEX64)/mex.def -o "$@" -L$(GNUMEX64) -s "$<" -llibmx -llibmex -llibmat -lgomp -lpthread + diff --git a/src/config.h.in b/src/config.h.in new file mode 100644 index 0000000..3b9c584 --- /dev/null +++ b/src/config.h.in @@ -0,0 +1,209 @@ +/* config.h.in. Generated from configure.ac by autoheader. */ + +/* liblinear-dev is installed. */ +#undef HAVE_EXTERNAL_LIBLINEAR + +/* libsvm-dev is installed. */ +#undef HAVE_EXTERNAL_LIBSVM + +/* Define to 1 if you have the header file. */ +#undef HAVE_FLOAT_H + +/* Define to 1 if you have the `floor' function. */ +#undef HAVE_FLOOR + +/* Define to 1 if you have the header file. */ +#undef HAVE_INTTYPES_H + +/* Define to 1 if you have the `blas' library (-lblas). */ +#undef HAVE_LIBBLAS + +/* Define to 1 if you have the `gomp' library (-lgomp). */ +#undef HAVE_LIBGOMP + +/* Define to 1 if you have the `libmat' library (-llibmat). */ +#undef HAVE_LIBLIBMAT + +/* Define to 1 if you have the `libmex' library (-llibmex). */ +#undef HAVE_LIBLIBMEX + +/* Define to 1 if you have the `libmx' library (-llibmx). */ +#undef HAVE_LIBLIBMX + +/* Define to 1 if you have the `libtensorflow' library (-llibtensorflow). */ +#undef HAVE_LIBLIBTENSORFLOW + +/* Define to 1 if you have the `linear' library (-llinear). */ +#undef HAVE_LIBLINEAR + +/* Define to 1 if you have the `pthread' library (-lpthread). */ +#undef HAVE_LIBPTHREAD + +/* Define to 1 if you have the `svm' library (-lsvm). */ +#undef HAVE_LIBSVM + +/* Define to 1 if you have the header file. */ +#undef HAVE_LIBSVM_SVM_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_LIMITS_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_LINEAR_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_LOCALE_H + +/* Define to 1 if your system has a GNU libc compatible `malloc' function, and + to 0 otherwise. */ +#undef HAVE_MALLOC + +/* Define to 1 if you have the header file. */ +#undef HAVE_MEMORY_H + +/* Define to 1 if you have the `memset' function. */ +#undef HAVE_MEMSET + +/* Define to 1 if you have the `mkdir' function. */ +#undef HAVE_MKDIR + +/* Define to 1 if your system has a GNU libc compatible `realloc' function, + and to 0 otherwise. */ +#undef HAVE_REALLOC + +/* Define to 1 if you have the `setlocale' function. */ +#undef HAVE_SETLOCALE + +/* Define to 1 if you have the `sqrt' function. */ +#undef HAVE_SQRT + +/* Define to 1 if you have the header file. */ +#undef HAVE_STDINT_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_STDLIB_H + +/* Define to 1 if you have the `strchr' function. */ +#undef HAVE_STRCHR + +/* Define to 1 if you have the `strcspn' function. */ +#undef HAVE_STRCSPN + +/* Define to 1 if you have the `strdup' function. */ +#undef HAVE_STRDUP + +/* Define to 1 if you have the header file. */ +#undef HAVE_STRINGS_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_STRING_H + +/* Define to 1 if you have the `strncasecmp' function. */ +#undef HAVE_STRNCASECMP + +/* Define to 1 if you have the `strrchr' function. */ +#undef HAVE_STRRCHR + +/* Define to 1 if you have the `strtol' function. */ +#undef HAVE_STRTOL + +/* Define to 1 if you have the `strtoul' function. */ +#undef HAVE_STRTOUL + +/* Define to 1 if you have the header file. */ +#undef HAVE_SYS_PARAM_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_SYS_STAT_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_SYS_TYPES_H + +/* Define to 1 if you have the header file. */ +#undef HAVE_UNISTD_H + +/* Define to 1 if the system has the type `_Bool'. */ +#undef HAVE__BOOL + +/* Define to the address where bug reports for this package should be sent. */ +#undef PACKAGE_BUGREPORT + +/* Define to the full name of this package. */ +#undef PACKAGE_NAME + +/* Define to the full name and version of this package. */ +#undef PACKAGE_STRING + +/* Define to the one symbol short name of this package. */ +#undef PACKAGE_TARNAME + +/* Define to the home page for this package. */ +#undef PACKAGE_URL + +/* Define to the version of this package. */ +#undef PACKAGE_VERSION + +/* Define to 1 if you have the ANSI C header files. */ +#undef STDC_HEADERS + +/* Define for Solaris 2.5.1 so the uint32_t typedef from , + , or is not used. If the typedef were allowed, the + #define below would cause a syntax error. */ +#undef _UINT32_T + +/* Define for Solaris 2.5.1 so the uint64_t typedef from , + , or is not used. If the typedef were allowed, the + #define below would cause a syntax error. */ +#undef _UINT64_T + +/* Define for Solaris 2.5.1 so the uint8_t typedef from , + , or is not used. If the typedef were allowed, the + #define below would cause a syntax error. */ +#undef _UINT8_T + +/* Define to `__inline__' or `__inline' if that's what the C compiler + calls it, or to nothing if 'inline' is not supported under any name. */ +#ifndef __cplusplus +#undef inline +#endif + +/* Define to the type of a signed integer type of width exactly 16 bits if + such a type exists and the standard includes do not define it. */ +#undef int16_t + +/* Define to the type of a signed integer type of width exactly 32 bits if + such a type exists and the standard includes do not define it. */ +#undef int32_t + +/* Define to the type of a signed integer type of width exactly 64 bits if + such a type exists and the standard includes do not define it. */ +#undef int64_t + +/* Define to the type of a signed integer type of width exactly 8 bits if such + a type exists and the standard includes do not define it. */ +#undef int8_t + +/* Define to rpl_malloc if the replacement function should be used. */ +#undef malloc + +/* Define to rpl_realloc if the replacement function should be used. */ +#undef realloc + +/* Define to `unsigned int' if does not define. */ +#undef size_t + +/* Define to the type of an unsigned integer type of width exactly 16 bits if + such a type exists and the standard includes do not define it. */ +#undef uint16_t + +/* Define to the type of an unsigned integer type of width exactly 32 bits if + such a type exists and the standard includes do not define it. */ +#undef uint32_t + +/* Define to the type of an unsigned integer type of width exactly 64 bits if + such a type exists and the standard includes do not define it. */ +#undef uint64_t + +/* Define to the type of an unsigned integer type of width exactly 8 bits if + such a type exists and the standard includes do not define it. */ +#undef uint8_t diff --git a/src/configure b/src/configure new file mode 100755 index 0000000..30ab5a5 --- /dev/null +++ b/src/configure @@ -0,0 +1,6017 @@ +#! /bin/sh +# Guess values for system-dependent variables and create Makefiles. +# Generated by GNU Autoconf 2.69 for nan-toolbox 3.2.0. +# +# Report bugs to . +# +# +# Copyright (C) 1992-1996, 1998-2012 Free Software Foundation, Inc. +# +# +# This configure script is free software; the Free Software Foundation +# gives unlimited permission to copy, distribute and modify it. +## -------------------- ## +## M4sh Initialization. ## +## -------------------- ## + +# Be more Bourne compatible +DUALCASE=1; export DUALCASE # for MKS sh +if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then : + emulate sh + NULLCMD=: + # Pre-4.2 versions of Zsh do word splitting on ${1+"$@"}, which + # is contrary to our usage. Disable this feature. + alias -g '${1+"$@"}'='"$@"' + setopt NO_GLOB_SUBST +else + case `(set -o) 2>/dev/null` in #( + *posix*) : + set -o posix ;; #( + *) : + ;; +esac +fi + + +as_nl=' +' +export as_nl +# Printing a long string crashes Solaris 7 /usr/bin/printf. +as_echo='\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\' +as_echo=$as_echo$as_echo$as_echo$as_echo$as_echo +as_echo=$as_echo$as_echo$as_echo$as_echo$as_echo$as_echo +# Prefer a ksh shell builtin over an external printf program on Solaris, +# but without wasting forks for bash or zsh. +if test -z "$BASH_VERSION$ZSH_VERSION" \ + && (test "X`print -r -- $as_echo`" = "X$as_echo") 2>/dev/null; then + as_echo='print -r --' + as_echo_n='print -rn --' +elif (test "X`printf %s $as_echo`" = "X$as_echo") 2>/dev/null; then + as_echo='printf %s\n' + as_echo_n='printf %s' +else + if test "X`(/usr/ucb/echo -n -n $as_echo) 2>/dev/null`" = "X-n $as_echo"; then + as_echo_body='eval /usr/ucb/echo -n "$1$as_nl"' + as_echo_n='/usr/ucb/echo -n' + else + as_echo_body='eval expr "X$1" : "X\\(.*\\)"' + as_echo_n_body='eval + arg=$1; + case $arg in #( + *"$as_nl"*) + expr "X$arg" : "X\\(.*\\)$as_nl"; + arg=`expr "X$arg" : ".*$as_nl\\(.*\\)"`;; + esac; + expr "X$arg" : "X\\(.*\\)" | tr -d "$as_nl" + ' + export as_echo_n_body + as_echo_n='sh -c $as_echo_n_body as_echo' + fi + export as_echo_body + as_echo='sh -c $as_echo_body as_echo' +fi + +# The user is always right. +if test "${PATH_SEPARATOR+set}" != set; then + PATH_SEPARATOR=: + (PATH='/bin;/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 && { + (PATH='/bin:/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 || + PATH_SEPARATOR=';' + } +fi + + +# IFS +# We need space, tab and new line, in precisely that order. Quoting is +# there to prevent editors from complaining about space-tab. +# (If _AS_PATH_WALK were called with IFS unset, it would disable word +# splitting by setting IFS to empty value.) +IFS=" "" $as_nl" + +# Find who we are. Look in the path if we contain no directory separator. +as_myself= +case $0 in #(( + *[\\/]* ) as_myself=$0 ;; + *) as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + test -r "$as_dir/$0" && as_myself=$as_dir/$0 && break + done +IFS=$as_save_IFS + + ;; +esac +# We did not find ourselves, most probably we were run as `sh COMMAND' +# in which case we are not to be found in the path. +if test "x$as_myself" = x; then + as_myself=$0 +fi +if test ! -f "$as_myself"; then + $as_echo "$as_myself: error: cannot find myself; rerun with an absolute file name" >&2 + exit 1 +fi + +# Unset variables that we do not need and which cause bugs (e.g. in +# pre-3.0 UWIN ksh). But do not cause bugs in bash 2.01; the "|| exit 1" +# suppresses any "Segmentation fault" message there. '((' could +# trigger a bug in pdksh 5.2.14. +for as_var in BASH_ENV ENV MAIL MAILPATH +do eval test x\${$as_var+set} = xset \ + && ( (unset $as_var) || exit 1) >/dev/null 2>&1 && unset $as_var || : +done +PS1='$ ' +PS2='> ' +PS4='+ ' + +# NLS nuisances. +LC_ALL=C +export LC_ALL +LANGUAGE=C +export LANGUAGE + +# CDPATH. +(unset CDPATH) >/dev/null 2>&1 && unset CDPATH + +# Use a proper internal environment variable to ensure we don't fall + # into an infinite loop, continuously re-executing ourselves. + if test x"${_as_can_reexec}" != xno && test "x$CONFIG_SHELL" != x; then + _as_can_reexec=no; export _as_can_reexec; + # We cannot yet assume a decent shell, so we have to provide a +# neutralization value for shells without unset; and this also +# works around shells that cannot unset nonexistent variables. +# Preserve -v and -x to the replacement shell. +BASH_ENV=/dev/null +ENV=/dev/null +(unset BASH_ENV) >/dev/null 2>&1 && unset BASH_ENV ENV +case $- in # (((( + *v*x* | *x*v* ) as_opts=-vx ;; + *v* ) as_opts=-v ;; + *x* ) as_opts=-x ;; + * ) as_opts= ;; +esac +exec $CONFIG_SHELL $as_opts "$as_myself" ${1+"$@"} +# Admittedly, this is quite paranoid, since all the known shells bail +# out after a failed `exec'. +$as_echo "$0: could not re-execute with $CONFIG_SHELL" >&2 +as_fn_exit 255 + fi + # We don't want this to propagate to other subprocesses. + { _as_can_reexec=; unset _as_can_reexec;} +if test "x$CONFIG_SHELL" = x; then + as_bourne_compatible="if test -n \"\${ZSH_VERSION+set}\" && (emulate sh) >/dev/null 2>&1; then : + emulate sh + NULLCMD=: + # Pre-4.2 versions of Zsh do word splitting on \${1+\"\$@\"}, which + # is contrary to our usage. Disable this feature. + alias -g '\${1+\"\$@\"}'='\"\$@\"' + setopt NO_GLOB_SUBST +else + case \`(set -o) 2>/dev/null\` in #( + *posix*) : + set -o posix ;; #( + *) : + ;; +esac +fi +" + as_required="as_fn_return () { (exit \$1); } +as_fn_success () { as_fn_return 0; } +as_fn_failure () { as_fn_return 1; } +as_fn_ret_success () { return 0; } +as_fn_ret_failure () { return 1; } + +exitcode=0 +as_fn_success || { exitcode=1; echo as_fn_success failed.; } +as_fn_failure && { exitcode=1; echo as_fn_failure succeeded.; } +as_fn_ret_success || { exitcode=1; echo as_fn_ret_success failed.; } +as_fn_ret_failure && { exitcode=1; echo as_fn_ret_failure succeeded.; } +if ( set x; as_fn_ret_success y && test x = \"\$1\" ); then : + +else + exitcode=1; echo positional parameters were not saved. +fi +test x\$exitcode = x0 || exit 1 +test -x / || exit 1" + as_suggested=" as_lineno_1=";as_suggested=$as_suggested$LINENO;as_suggested=$as_suggested" as_lineno_1a=\$LINENO + as_lineno_2=";as_suggested=$as_suggested$LINENO;as_suggested=$as_suggested" as_lineno_2a=\$LINENO + eval 'test \"x\$as_lineno_1'\$as_run'\" != \"x\$as_lineno_2'\$as_run'\" && + test \"x\`expr \$as_lineno_1'\$as_run' + 1\`\" = \"x\$as_lineno_2'\$as_run'\"' || exit 1 +test \$(( 1 + 1 )) = 2 || exit 1" + if (eval "$as_required") 2>/dev/null; then : + as_have_required=yes +else + as_have_required=no +fi + if test x$as_have_required = xyes && (eval "$as_suggested") 2>/dev/null; then : + +else + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +as_found=false +for as_dir in /bin$PATH_SEPARATOR/usr/bin$PATH_SEPARATOR$PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + as_found=: + case $as_dir in #( + /*) + for as_base in sh bash ksh sh5; do + # Try only shells that exist, to save several forks. + as_shell=$as_dir/$as_base + if { test -f "$as_shell" || test -f "$as_shell.exe"; } && + { $as_echo "$as_bourne_compatible""$as_required" | as_run=a "$as_shell"; } 2>/dev/null; then : + CONFIG_SHELL=$as_shell as_have_required=yes + if { $as_echo "$as_bourne_compatible""$as_suggested" | as_run=a "$as_shell"; } 2>/dev/null; then : + break 2 +fi +fi + done;; + esac + as_found=false +done +$as_found || { if { test -f "$SHELL" || test -f "$SHELL.exe"; } && + { $as_echo "$as_bourne_compatible""$as_required" | as_run=a "$SHELL"; } 2>/dev/null; then : + CONFIG_SHELL=$SHELL as_have_required=yes +fi; } +IFS=$as_save_IFS + + + if test "x$CONFIG_SHELL" != x; then : + export CONFIG_SHELL + # We cannot yet assume a decent shell, so we have to provide a +# neutralization value for shells without unset; and this also +# works around shells that cannot unset nonexistent variables. +# Preserve -v and -x to the replacement shell. +BASH_ENV=/dev/null +ENV=/dev/null +(unset BASH_ENV) >/dev/null 2>&1 && unset BASH_ENV ENV +case $- in # (((( + *v*x* | *x*v* ) as_opts=-vx ;; + *v* ) as_opts=-v ;; + *x* ) as_opts=-x ;; + * ) as_opts= ;; +esac +exec $CONFIG_SHELL $as_opts "$as_myself" ${1+"$@"} +# Admittedly, this is quite paranoid, since all the known shells bail +# out after a failed `exec'. +$as_echo "$0: could not re-execute with $CONFIG_SHELL" >&2 +exit 255 +fi + + if test x$as_have_required = xno; then : + $as_echo "$0: This script requires a shell more modern than all" + $as_echo "$0: the shells that I found on your system." + if test x${ZSH_VERSION+set} = xset ; then + $as_echo "$0: In particular, zsh $ZSH_VERSION has bugs and should" + $as_echo "$0: be upgraded to zsh 4.3.4 or later." + else + $as_echo "$0: Please tell bug-autoconf@gnu.org and +$0: alois.schloegl@gmail.com about your system, including +$0: any error possibly output before this message. Then +$0: install a modern shell, or manually run the script +$0: under such a shell if you do have one." + fi + exit 1 +fi +fi +fi +SHELL=${CONFIG_SHELL-/bin/sh} +export SHELL +# Unset more variables known to interfere with behavior of common tools. +CLICOLOR_FORCE= GREP_OPTIONS= +unset CLICOLOR_FORCE GREP_OPTIONS + +## --------------------- ## +## M4sh Shell Functions. ## +## --------------------- ## +# as_fn_unset VAR +# --------------- +# Portably unset VAR. +as_fn_unset () +{ + { eval $1=; unset $1;} +} +as_unset=as_fn_unset + +# as_fn_set_status STATUS +# ----------------------- +# Set $? to STATUS, without forking. +as_fn_set_status () +{ + return $1 +} # as_fn_set_status + +# as_fn_exit STATUS +# ----------------- +# Exit the shell with STATUS, even in a "trap 0" or "set -e" context. +as_fn_exit () +{ + set +e + as_fn_set_status $1 + exit $1 +} # as_fn_exit + +# as_fn_mkdir_p +# ------------- +# Create "$as_dir" as a directory, including parents if necessary. +as_fn_mkdir_p () +{ + + case $as_dir in #( + -*) as_dir=./$as_dir;; + esac + test -d "$as_dir" || eval $as_mkdir_p || { + as_dirs= + while :; do + case $as_dir in #( + *\'*) as_qdir=`$as_echo "$as_dir" | sed "s/'/'\\\\\\\\''/g"`;; #'( + *) as_qdir=$as_dir;; + esac + as_dirs="'$as_qdir' $as_dirs" + as_dir=`$as_dirname -- "$as_dir" || +$as_expr X"$as_dir" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ + X"$as_dir" : 'X\(//\)[^/]' \| \ + X"$as_dir" : 'X\(//\)$' \| \ + X"$as_dir" : 'X\(/\)' \| . 2>/dev/null || +$as_echo X"$as_dir" | + sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ + s//\1/ + q + } + /^X\(\/\/\)[^/].*/{ + s//\1/ + q + } + /^X\(\/\/\)$/{ + s//\1/ + q + } + /^X\(\/\).*/{ + s//\1/ + q + } + s/.*/./; q'` + test -d "$as_dir" && break + done + test -z "$as_dirs" || eval "mkdir $as_dirs" + } || test -d "$as_dir" || as_fn_error $? "cannot create directory $as_dir" + + +} # as_fn_mkdir_p + +# as_fn_executable_p FILE +# ----------------------- +# Test if FILE is an executable regular file. +as_fn_executable_p () +{ + test -f "$1" && test -x "$1" +} # as_fn_executable_p +# as_fn_append VAR VALUE +# ---------------------- +# Append the text in VALUE to the end of the definition contained in VAR. Take +# advantage of any shell optimizations that allow amortized linear growth over +# repeated appends, instead of the typical quadratic growth present in naive +# implementations. +if (eval "as_var=1; as_var+=2; test x\$as_var = x12") 2>/dev/null; then : + eval 'as_fn_append () + { + eval $1+=\$2 + }' +else + as_fn_append () + { + eval $1=\$$1\$2 + } +fi # as_fn_append + +# as_fn_arith ARG... +# ------------------ +# Perform arithmetic evaluation on the ARGs, and store the result in the +# global $as_val. Take advantage of shells that can avoid forks. The arguments +# must be portable across $(()) and expr. +if (eval "test \$(( 1 + 1 )) = 2") 2>/dev/null; then : + eval 'as_fn_arith () + { + as_val=$(( $* )) + }' +else + as_fn_arith () + { + as_val=`expr "$@" || test $? -eq 1` + } +fi # as_fn_arith + + +# as_fn_error STATUS ERROR [LINENO LOG_FD] +# ---------------------------------------- +# Output "`basename $0`: error: ERROR" to stderr. If LINENO and LOG_FD are +# provided, also output the error to LOG_FD, referencing LINENO. Then exit the +# script with STATUS, using 1 if that was 0. +as_fn_error () +{ + as_status=$1; test $as_status -eq 0 && as_status=1 + if test "$4"; then + as_lineno=${as_lineno-"$3"} as_lineno_stack=as_lineno_stack=$as_lineno_stack + $as_echo "$as_me:${as_lineno-$LINENO}: error: $2" >&$4 + fi + $as_echo "$as_me: error: $2" >&2 + as_fn_exit $as_status +} # as_fn_error + +if expr a : '\(a\)' >/dev/null 2>&1 && + test "X`expr 00001 : '.*\(...\)'`" = X001; then + as_expr=expr +else + as_expr=false +fi + +if (basename -- /) >/dev/null 2>&1 && test "X`basename -- / 2>&1`" = "X/"; then + as_basename=basename +else + as_basename=false +fi + +if (as_dir=`dirname -- /` && test "X$as_dir" = X/) >/dev/null 2>&1; then + as_dirname=dirname +else + as_dirname=false +fi + +as_me=`$as_basename -- "$0" || +$as_expr X/"$0" : '.*/\([^/][^/]*\)/*$' \| \ + X"$0" : 'X\(//\)$' \| \ + X"$0" : 'X\(/\)' \| . 2>/dev/null || +$as_echo X/"$0" | + sed '/^.*\/\([^/][^/]*\)\/*$/{ + s//\1/ + q + } + /^X\/\(\/\/\)$/{ + s//\1/ + q + } + /^X\/\(\/\).*/{ + s//\1/ + q + } + s/.*/./; q'` + +# Avoid depending upon Character Ranges. +as_cr_letters='abcdefghijklmnopqrstuvwxyz' +as_cr_LETTERS='ABCDEFGHIJKLMNOPQRSTUVWXYZ' +as_cr_Letters=$as_cr_letters$as_cr_LETTERS +as_cr_digits='0123456789' +as_cr_alnum=$as_cr_Letters$as_cr_digits + + + as_lineno_1=$LINENO as_lineno_1a=$LINENO + as_lineno_2=$LINENO as_lineno_2a=$LINENO + eval 'test "x$as_lineno_1'$as_run'" != "x$as_lineno_2'$as_run'" && + test "x`expr $as_lineno_1'$as_run' + 1`" = "x$as_lineno_2'$as_run'"' || { + # Blame Lee E. McMahon (1931-1989) for sed's syntax. :-) + sed -n ' + p + /[$]LINENO/= + ' <$as_myself | + sed ' + s/[$]LINENO.*/&-/ + t lineno + b + :lineno + N + :loop + s/[$]LINENO\([^'$as_cr_alnum'_].*\n\)\(.*\)/\2\1\2/ + t loop + s/-\n.*// + ' >$as_me.lineno && + chmod +x "$as_me.lineno" || + { $as_echo "$as_me: error: cannot create $as_me.lineno; rerun with a POSIX shell" >&2; as_fn_exit 1; } + + # If we had to re-execute with $CONFIG_SHELL, we're ensured to have + # already done that, so ensure we don't try to do so again and fall + # in an infinite loop. This has already happened in practice. + _as_can_reexec=no; export _as_can_reexec + # Don't try to exec as it changes $[0], causing all sort of problems + # (the dirname of $[0] is not the place where we might find the + # original and so on. Autoconf is especially sensitive to this). + . "./$as_me.lineno" + # Exit status is that of the last command. + exit +} + +ECHO_C= ECHO_N= ECHO_T= +case `echo -n x` in #((((( +-n*) + case `echo 'xy\c'` in + *c*) ECHO_T=' ';; # ECHO_T is single tab character. + xy) ECHO_C='\c';; + *) echo `echo ksh88 bug on AIX 6.1` > /dev/null + ECHO_T=' ';; + esac;; +*) + ECHO_N='-n';; +esac + +rm -f conf$$ conf$$.exe conf$$.file +if test -d conf$$.dir; then + rm -f conf$$.dir/conf$$.file +else + rm -f conf$$.dir + mkdir conf$$.dir 2>/dev/null +fi +if (echo >conf$$.file) 2>/dev/null; then + if ln -s conf$$.file conf$$ 2>/dev/null; then + as_ln_s='ln -s' + # ... but there are two gotchas: + # 1) On MSYS, both `ln -s file dir' and `ln file dir' fail. + # 2) DJGPP < 2.04 has no symlinks; `ln -s' creates a wrapper executable. + # In both cases, we have to default to `cp -pR'. + ln -s conf$$.file conf$$.dir 2>/dev/null && test ! -f conf$$.exe || + as_ln_s='cp -pR' + elif ln conf$$.file conf$$ 2>/dev/null; then + as_ln_s=ln + else + as_ln_s='cp -pR' + fi +else + as_ln_s='cp -pR' +fi +rm -f conf$$ conf$$.exe conf$$.dir/conf$$.file conf$$.file +rmdir conf$$.dir 2>/dev/null + +if mkdir -p . 2>/dev/null; then + as_mkdir_p='mkdir -p "$as_dir"' +else + test -d ./-p && rmdir ./-p + as_mkdir_p=false +fi + +as_test_x='test -x' +as_executable_p=as_fn_executable_p + +# Sed expression to map a string onto a valid CPP name. +as_tr_cpp="eval sed 'y%*$as_cr_letters%P$as_cr_LETTERS%;s%[^_$as_cr_alnum]%_%g'" + +# Sed expression to map a string onto a valid variable name. +as_tr_sh="eval sed 'y%*+%pp%;s%[^_$as_cr_alnum]%_%g'" + + +test -n "$DJDIR" || exec 7<&0 &1 + +# Name of the host. +# hostname on some systems (SVR3.2, old GNU/Linux) returns a bogus exit status, +# so uname gets run too. +ac_hostname=`(hostname || uname -n) 2>/dev/null | sed 1q` + +# +# Initializations. +# +ac_default_prefix=/usr/local +ac_clean_files= +ac_config_libobj_dir=. +LIBOBJS= +cross_compiling=no +subdirs= +MFLAGS= +MAKEFLAGS= + +# Identity of this package. +PACKAGE_NAME='nan-toolbox' +PACKAGE_TARNAME='nan-toolbox' +PACKAGE_VERSION='3.2.0' +PACKAGE_STRING='nan-toolbox 3.2.0' +PACKAGE_BUGREPORT='alois.schloegl@gmail.com' +PACKAGE_URL='' + +ac_unique_file="train.c" +# Factoring default headers for most tests. +ac_includes_default="\ +#include +#ifdef HAVE_SYS_TYPES_H +# include +#endif +#ifdef HAVE_SYS_STAT_H +# include +#endif +#ifdef STDC_HEADERS +# include +# include +#else +# ifdef HAVE_STDLIB_H +# include +# endif +#endif +#ifdef HAVE_STRING_H +# if !defined STDC_HEADERS && defined HAVE_MEMORY_H +# include +# endif +# include +#endif +#ifdef HAVE_STRINGS_H +# include +#endif +#ifdef HAVE_INTTYPES_H +# include +#endif +#ifdef HAVE_STDINT_H +# include +#endif +#ifdef HAVE_UNISTD_H +# include +#endif" + +ac_subst_vars='LTLIBOBJS +POW_LIB +LIBOBJS +HAVE_EXTERNAL_LIBLINEAR +HAVE_EXTERNAL_LIBSVM +EGREP +GREP +CPP +ac_ct_CC +CFLAGS +CC +OBJEXT +EXEEXT +ac_ct_CXX +CPPFLAGS +LDFLAGS +CXXFLAGS +CXX +target_alias +host_alias +build_alias +LIBS +ECHO_T +ECHO_N +ECHO_C +DEFS +mandir +localedir +libdir +psdir +pdfdir +dvidir +htmldir +infodir +docdir +oldincludedir +includedir +runstatedir +localstatedir +sharedstatedir +sysconfdir +datadir +datarootdir +libexecdir +sbindir +bindir +program_transform_name +prefix +exec_prefix +PACKAGE_URL +PACKAGE_BUGREPORT +PACKAGE_STRING +PACKAGE_VERSION +PACKAGE_TARNAME +PACKAGE_NAME +PATH_SEPARATOR +SHELL' +ac_subst_files='' +ac_user_opts=' +enable_option_checking +' + ac_precious_vars='build_alias +host_alias +target_alias +CXX +CXXFLAGS +LDFLAGS +LIBS +CPPFLAGS +CCC +CC +CFLAGS +CPP' + + +# Initialize some variables set by options. +ac_init_help= +ac_init_version=false +ac_unrecognized_opts= +ac_unrecognized_sep= +# The variables have the same names as the options, with +# dashes changed to underlines. +cache_file=/dev/null +exec_prefix=NONE +no_create= +no_recursion= +prefix=NONE +program_prefix=NONE +program_suffix=NONE +program_transform_name=s,x,x, +silent= +site= +srcdir= +verbose= +x_includes=NONE +x_libraries=NONE + +# Installation directory options. +# These are left unexpanded so users can "make install exec_prefix=/foo" +# and all the variables that are supposed to be based on exec_prefix +# by default will actually change. +# Use braces instead of parens because sh, perl, etc. also accept them. +# (The list follows the same order as the GNU Coding Standards.) +bindir='${exec_prefix}/bin' +sbindir='${exec_prefix}/sbin' +libexecdir='${exec_prefix}/libexec' +datarootdir='${prefix}/share' +datadir='${datarootdir}' +sysconfdir='${prefix}/etc' +sharedstatedir='${prefix}/com' +localstatedir='${prefix}/var' +runstatedir='${localstatedir}/run' +includedir='${prefix}/include' +oldincludedir='/usr/include' +docdir='${datarootdir}/doc/${PACKAGE_TARNAME}' +infodir='${datarootdir}/info' +htmldir='${docdir}' +dvidir='${docdir}' +pdfdir='${docdir}' +psdir='${docdir}' +libdir='${exec_prefix}/lib' +localedir='${datarootdir}/locale' +mandir='${datarootdir}/man' + +ac_prev= +ac_dashdash= +for ac_option +do + # If the previous option needs an argument, assign it. + if test -n "$ac_prev"; then + eval $ac_prev=\$ac_option + ac_prev= + continue + fi + + case $ac_option in + *=?*) ac_optarg=`expr "X$ac_option" : '[^=]*=\(.*\)'` ;; + *=) ac_optarg= ;; + *) ac_optarg=yes ;; + esac + + # Accept the important Cygnus configure options, so we can diagnose typos. + + case $ac_dashdash$ac_option in + --) + ac_dashdash=yes ;; + + -bindir | --bindir | --bindi | --bind | --bin | --bi) + ac_prev=bindir ;; + -bindir=* | --bindir=* | --bindi=* | --bind=* | --bin=* | --bi=*) + bindir=$ac_optarg ;; + + -build | --build | --buil | --bui | --bu) + ac_prev=build_alias ;; + -build=* | --build=* | --buil=* | --bui=* | --bu=*) + build_alias=$ac_optarg ;; + + -cache-file | --cache-file | --cache-fil | --cache-fi \ + | --cache-f | --cache- | --cache | --cach | --cac | --ca | --c) + ac_prev=cache_file ;; + -cache-file=* | --cache-file=* | --cache-fil=* | --cache-fi=* \ + | --cache-f=* | --cache-=* | --cache=* | --cach=* | --cac=* | --ca=* | --c=*) + cache_file=$ac_optarg ;; + + --config-cache | -C) + cache_file=config.cache ;; + + -datadir | --datadir | --datadi | --datad) + ac_prev=datadir ;; + -datadir=* | --datadir=* | --datadi=* | --datad=*) + datadir=$ac_optarg ;; + + -datarootdir | --datarootdir | --datarootdi | --datarootd | --dataroot \ + | --dataroo | --dataro | --datar) + ac_prev=datarootdir ;; + -datarootdir=* | --datarootdir=* | --datarootdi=* | --datarootd=* \ + | --dataroot=* | --dataroo=* | --dataro=* | --datar=*) + datarootdir=$ac_optarg ;; + + -disable-* | --disable-*) + ac_useropt=`expr "x$ac_option" : 'x-*disable-\(.*\)'` + # Reject names that are not valid shell variable names. + expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null && + as_fn_error $? "invalid feature name: $ac_useropt" + ac_useropt_orig=$ac_useropt + ac_useropt=`$as_echo "$ac_useropt" | sed 's/[-+.]/_/g'` + case $ac_user_opts in + *" +"enable_$ac_useropt" +"*) ;; + *) ac_unrecognized_opts="$ac_unrecognized_opts$ac_unrecognized_sep--disable-$ac_useropt_orig" + ac_unrecognized_sep=', ';; + esac + eval enable_$ac_useropt=no ;; + + -docdir | --docdir | --docdi | --doc | --do) + ac_prev=docdir ;; + -docdir=* | --docdir=* | --docdi=* | --doc=* | --do=*) + docdir=$ac_optarg ;; + + -dvidir | --dvidir | --dvidi | --dvid | --dvi | --dv) + ac_prev=dvidir ;; + -dvidir=* | --dvidir=* | --dvidi=* | --dvid=* | --dvi=* | --dv=*) + dvidir=$ac_optarg ;; + + -enable-* | --enable-*) + ac_useropt=`expr "x$ac_option" : 'x-*enable-\([^=]*\)'` + # Reject names that are not valid shell variable names. + expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null && + as_fn_error $? "invalid feature name: $ac_useropt" + ac_useropt_orig=$ac_useropt + ac_useropt=`$as_echo "$ac_useropt" | sed 's/[-+.]/_/g'` + case $ac_user_opts in + *" +"enable_$ac_useropt" +"*) ;; + *) ac_unrecognized_opts="$ac_unrecognized_opts$ac_unrecognized_sep--enable-$ac_useropt_orig" + ac_unrecognized_sep=', ';; + esac + eval enable_$ac_useropt=\$ac_optarg ;; + + -exec-prefix | --exec_prefix | --exec-prefix | --exec-prefi \ + | --exec-pref | --exec-pre | --exec-pr | --exec-p | --exec- \ + | --exec | --exe | --ex) + ac_prev=exec_prefix ;; + -exec-prefix=* | --exec_prefix=* | --exec-prefix=* | --exec-prefi=* \ + | --exec-pref=* | --exec-pre=* | --exec-pr=* | --exec-p=* | --exec-=* \ + | --exec=* | --exe=* | --ex=*) + exec_prefix=$ac_optarg ;; + + -gas | --gas | --ga | --g) + # Obsolete; use --with-gas. + with_gas=yes ;; + + -help | --help | --hel | --he | -h) + ac_init_help=long ;; + -help=r* | --help=r* | --hel=r* | --he=r* | -hr*) + ac_init_help=recursive ;; + -help=s* | --help=s* | --hel=s* | --he=s* | -hs*) + ac_init_help=short ;; + + -host | --host | --hos | --ho) + ac_prev=host_alias ;; + -host=* | --host=* | --hos=* | --ho=*) + host_alias=$ac_optarg ;; + + -htmldir | --htmldir | --htmldi | --htmld | --html | --htm | --ht) + ac_prev=htmldir ;; + -htmldir=* | --htmldir=* | --htmldi=* | --htmld=* | --html=* | --htm=* \ + | --ht=*) + htmldir=$ac_optarg ;; + + -includedir | --includedir | --includedi | --included | --include \ + | --includ | --inclu | --incl | --inc) + ac_prev=includedir ;; + -includedir=* | --includedir=* | --includedi=* | --included=* | --include=* \ + | --includ=* | --inclu=* | --incl=* | --inc=*) + includedir=$ac_optarg ;; + + -infodir | --infodir | --infodi | --infod | --info | --inf) + ac_prev=infodir ;; + -infodir=* | --infodir=* | --infodi=* | --infod=* | --info=* | --inf=*) + infodir=$ac_optarg ;; + + -libdir | --libdir | --libdi | --libd) + ac_prev=libdir ;; + -libdir=* | --libdir=* | --libdi=* | --libd=*) + libdir=$ac_optarg ;; + + -libexecdir | --libexecdir | --libexecdi | --libexecd | --libexec \ + | --libexe | --libex | --libe) + ac_prev=libexecdir ;; + -libexecdir=* | --libexecdir=* | --libexecdi=* | --libexecd=* | --libexec=* \ + | --libexe=* | --libex=* | --libe=*) + libexecdir=$ac_optarg ;; + + -localedir | --localedir | --localedi | --localed | --locale) + ac_prev=localedir ;; + -localedir=* | --localedir=* | --localedi=* | --localed=* | --locale=*) + localedir=$ac_optarg ;; + + -localstatedir | --localstatedir | --localstatedi | --localstated \ + | --localstate | --localstat | --localsta | --localst | --locals) + ac_prev=localstatedir ;; + -localstatedir=* | --localstatedir=* | --localstatedi=* | --localstated=* \ + | --localstate=* | --localstat=* | --localsta=* | --localst=* | --locals=*) + localstatedir=$ac_optarg ;; + + -mandir | --mandir | --mandi | --mand | --man | --ma | --m) + ac_prev=mandir ;; + -mandir=* | --mandir=* | --mandi=* | --mand=* | --man=* | --ma=* | --m=*) + mandir=$ac_optarg ;; + + -nfp | --nfp | --nf) + # Obsolete; use --without-fp. + with_fp=no ;; + + -no-create | --no-create | --no-creat | --no-crea | --no-cre \ + | --no-cr | --no-c | -n) + no_create=yes ;; + + -no-recursion | --no-recursion | --no-recursio | --no-recursi \ + | --no-recurs | --no-recur | --no-recu | --no-rec | --no-re | --no-r) + no_recursion=yes ;; + + -oldincludedir | --oldincludedir | --oldincludedi | --oldincluded \ + | --oldinclude | --oldinclud | --oldinclu | --oldincl | --oldinc \ + | --oldin | --oldi | --old | --ol | --o) + ac_prev=oldincludedir ;; + -oldincludedir=* | --oldincludedir=* | --oldincludedi=* | --oldincluded=* \ + | --oldinclude=* | --oldinclud=* | --oldinclu=* | --oldincl=* | --oldinc=* \ + | --oldin=* | --oldi=* | --old=* | --ol=* | --o=*) + oldincludedir=$ac_optarg ;; + + -prefix | --prefix | --prefi | --pref | --pre | --pr | --p) + ac_prev=prefix ;; + -prefix=* | --prefix=* | --prefi=* | --pref=* | --pre=* | --pr=* | --p=*) + prefix=$ac_optarg ;; + + -program-prefix | --program-prefix | --program-prefi | --program-pref \ + | --program-pre | --program-pr | --program-p) + ac_prev=program_prefix ;; + -program-prefix=* | --program-prefix=* | --program-prefi=* \ + | --program-pref=* | --program-pre=* | --program-pr=* | --program-p=*) + program_prefix=$ac_optarg ;; + + -program-suffix | --program-suffix | --program-suffi | --program-suff \ + | --program-suf | --program-su | --program-s) + ac_prev=program_suffix ;; + -program-suffix=* | --program-suffix=* | --program-suffi=* \ + | --program-suff=* | --program-suf=* | --program-su=* | --program-s=*) + program_suffix=$ac_optarg ;; + + -program-transform-name | --program-transform-name \ + | --program-transform-nam | --program-transform-na \ + | --program-transform-n | --program-transform- \ + | --program-transform | --program-transfor \ + | --program-transfo | --program-transf \ + | --program-trans | --program-tran \ + | --progr-tra | --program-tr | --program-t) + ac_prev=program_transform_name ;; + -program-transform-name=* | --program-transform-name=* \ + | --program-transform-nam=* | --program-transform-na=* \ + | --program-transform-n=* | --program-transform-=* \ + | --program-transform=* | --program-transfor=* \ + | --program-transfo=* | --program-transf=* \ + | --program-trans=* | --program-tran=* \ + | --progr-tra=* | --program-tr=* | --program-t=*) + program_transform_name=$ac_optarg ;; + + -pdfdir | --pdfdir | --pdfdi | --pdfd | --pdf | --pd) + ac_prev=pdfdir ;; + -pdfdir=* | --pdfdir=* | --pdfdi=* | --pdfd=* | --pdf=* | --pd=*) + pdfdir=$ac_optarg ;; + + -psdir | --psdir | --psdi | --psd | --ps) + ac_prev=psdir ;; + -psdir=* | --psdir=* | --psdi=* | --psd=* | --ps=*) + psdir=$ac_optarg ;; + + -q | -quiet | --quiet | --quie | --qui | --qu | --q \ + | -silent | --silent | --silen | --sile | --sil) + silent=yes ;; + + -runstatedir | --runstatedir | --runstatedi | --runstated \ + | --runstate | --runstat | --runsta | --runst | --runs \ + | --run | --ru | --r) + ac_prev=runstatedir ;; + -runstatedir=* | --runstatedir=* | --runstatedi=* | --runstated=* \ + | --runstate=* | --runstat=* | --runsta=* | --runst=* | --runs=* \ + | --run=* | --ru=* | --r=*) + runstatedir=$ac_optarg ;; + + -sbindir | --sbindir | --sbindi | --sbind | --sbin | --sbi | --sb) + ac_prev=sbindir ;; + -sbindir=* | --sbindir=* | --sbindi=* | --sbind=* | --sbin=* \ + | --sbi=* | --sb=*) + sbindir=$ac_optarg ;; + + -sharedstatedir | --sharedstatedir | --sharedstatedi \ + | --sharedstated | --sharedstate | --sharedstat | --sharedsta \ + | --sharedst | --shareds | --shared | --share | --shar \ + | --sha | --sh) + ac_prev=sharedstatedir ;; + -sharedstatedir=* | --sharedstatedir=* | --sharedstatedi=* \ + | --sharedstated=* | --sharedstate=* | --sharedstat=* | --sharedsta=* \ + | --sharedst=* | --shareds=* | --shared=* | --share=* | --shar=* \ + | --sha=* | --sh=*) + sharedstatedir=$ac_optarg ;; + + -site | --site | --sit) + ac_prev=site ;; + -site=* | --site=* | --sit=*) + site=$ac_optarg ;; + + -srcdir | --srcdir | --srcdi | --srcd | --src | --sr) + ac_prev=srcdir ;; + -srcdir=* | --srcdir=* | --srcdi=* | --srcd=* | --src=* | --sr=*) + srcdir=$ac_optarg ;; + + -sysconfdir | --sysconfdir | --sysconfdi | --sysconfd | --sysconf \ + | --syscon | --sysco | --sysc | --sys | --sy) + ac_prev=sysconfdir ;; + -sysconfdir=* | --sysconfdir=* | --sysconfdi=* | --sysconfd=* | --sysconf=* \ + | --syscon=* | --sysco=* | --sysc=* | --sys=* | --sy=*) + sysconfdir=$ac_optarg ;; + + -target | --target | --targe | --targ | --tar | --ta | --t) + ac_prev=target_alias ;; + -target=* | --target=* | --targe=* | --targ=* | --tar=* | --ta=* | --t=*) + target_alias=$ac_optarg ;; + + -v | -verbose | --verbose | --verbos | --verbo | --verb) + verbose=yes ;; + + -version | --version | --versio | --versi | --vers | -V) + ac_init_version=: ;; + + -with-* | --with-*) + ac_useropt=`expr "x$ac_option" : 'x-*with-\([^=]*\)'` + # Reject names that are not valid shell variable names. + expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null && + as_fn_error $? "invalid package name: $ac_useropt" + ac_useropt_orig=$ac_useropt + ac_useropt=`$as_echo "$ac_useropt" | sed 's/[-+.]/_/g'` + case $ac_user_opts in + *" +"with_$ac_useropt" +"*) ;; + *) ac_unrecognized_opts="$ac_unrecognized_opts$ac_unrecognized_sep--with-$ac_useropt_orig" + ac_unrecognized_sep=', ';; + esac + eval with_$ac_useropt=\$ac_optarg ;; + + -without-* | --without-*) + ac_useropt=`expr "x$ac_option" : 'x-*without-\(.*\)'` + # Reject names that are not valid shell variable names. + expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null && + as_fn_error $? "invalid package name: $ac_useropt" + ac_useropt_orig=$ac_useropt + ac_useropt=`$as_echo "$ac_useropt" | sed 's/[-+.]/_/g'` + case $ac_user_opts in + *" +"with_$ac_useropt" +"*) ;; + *) ac_unrecognized_opts="$ac_unrecognized_opts$ac_unrecognized_sep--without-$ac_useropt_orig" + ac_unrecognized_sep=', ';; + esac + eval with_$ac_useropt=no ;; + + --x) + # Obsolete; use --with-x. + with_x=yes ;; + + -x-includes | --x-includes | --x-include | --x-includ | --x-inclu \ + | --x-incl | --x-inc | --x-in | --x-i) + ac_prev=x_includes ;; + -x-includes=* | --x-includes=* | --x-include=* | --x-includ=* | --x-inclu=* \ + | --x-incl=* | --x-inc=* | --x-in=* | --x-i=*) + x_includes=$ac_optarg ;; + + -x-libraries | --x-libraries | --x-librarie | --x-librari \ + | --x-librar | --x-libra | --x-libr | --x-lib | --x-li | --x-l) + ac_prev=x_libraries ;; + -x-libraries=* | --x-libraries=* | --x-librarie=* | --x-librari=* \ + | --x-librar=* | --x-libra=* | --x-libr=* | --x-lib=* | --x-li=* | --x-l=*) + x_libraries=$ac_optarg ;; + + -*) as_fn_error $? "unrecognized option: \`$ac_option' +Try \`$0 --help' for more information" + ;; + + *=*) + ac_envvar=`expr "x$ac_option" : 'x\([^=]*\)='` + # Reject names that are not valid shell variable names. + case $ac_envvar in #( + '' | [0-9]* | *[!_$as_cr_alnum]* ) + as_fn_error $? "invalid variable name: \`$ac_envvar'" ;; + esac + eval $ac_envvar=\$ac_optarg + export $ac_envvar ;; + + *) + # FIXME: should be removed in autoconf 3.0. + $as_echo "$as_me: WARNING: you should use --build, --host, --target" >&2 + expr "x$ac_option" : ".*[^-._$as_cr_alnum]" >/dev/null && + $as_echo "$as_me: WARNING: invalid host type: $ac_option" >&2 + : "${build_alias=$ac_option} ${host_alias=$ac_option} ${target_alias=$ac_option}" + ;; + + esac +done + +if test -n "$ac_prev"; then + ac_option=--`echo $ac_prev | sed 's/_/-/g'` + as_fn_error $? "missing argument to $ac_option" +fi + +if test -n "$ac_unrecognized_opts"; then + case $enable_option_checking in + no) ;; + fatal) as_fn_error $? "unrecognized options: $ac_unrecognized_opts" ;; + *) $as_echo "$as_me: WARNING: unrecognized options: $ac_unrecognized_opts" >&2 ;; + esac +fi + +# Check all directory arguments for consistency. +for ac_var in exec_prefix prefix bindir sbindir libexecdir datarootdir \ + datadir sysconfdir sharedstatedir localstatedir includedir \ + oldincludedir docdir infodir htmldir dvidir pdfdir psdir \ + libdir localedir mandir runstatedir +do + eval ac_val=\$$ac_var + # Remove trailing slashes. + case $ac_val in + */ ) + ac_val=`expr "X$ac_val" : 'X\(.*[^/]\)' \| "X$ac_val" : 'X\(.*\)'` + eval $ac_var=\$ac_val;; + esac + # Be sure to have absolute directory names. + case $ac_val in + [\\/$]* | ?:[\\/]* ) continue;; + NONE | '' ) case $ac_var in *prefix ) continue;; esac;; + esac + as_fn_error $? "expected an absolute directory name for --$ac_var: $ac_val" +done + +# There might be people who depend on the old broken behavior: `$host' +# used to hold the argument of --host etc. +# FIXME: To remove some day. +build=$build_alias +host=$host_alias +target=$target_alias + +# FIXME: To remove some day. +if test "x$host_alias" != x; then + if test "x$build_alias" = x; then + cross_compiling=maybe + elif test "x$build_alias" != "x$host_alias"; then + cross_compiling=yes + fi +fi + +ac_tool_prefix= +test -n "$host_alias" && ac_tool_prefix=$host_alias- + +test "$silent" = yes && exec 6>/dev/null + + +ac_pwd=`pwd` && test -n "$ac_pwd" && +ac_ls_di=`ls -di .` && +ac_pwd_ls_di=`cd "$ac_pwd" && ls -di .` || + as_fn_error $? "working directory cannot be determined" +test "X$ac_ls_di" = "X$ac_pwd_ls_di" || + as_fn_error $? "pwd does not report name of working directory" + + +# Find the source files, if location was not specified. +if test -z "$srcdir"; then + ac_srcdir_defaulted=yes + # Try the directory containing this script, then the parent directory. + ac_confdir=`$as_dirname -- "$as_myself" || +$as_expr X"$as_myself" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ + X"$as_myself" : 'X\(//\)[^/]' \| \ + X"$as_myself" : 'X\(//\)$' \| \ + X"$as_myself" : 'X\(/\)' \| . 2>/dev/null || +$as_echo X"$as_myself" | + sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ + s//\1/ + q + } + /^X\(\/\/\)[^/].*/{ + s//\1/ + q + } + /^X\(\/\/\)$/{ + s//\1/ + q + } + /^X\(\/\).*/{ + s//\1/ + q + } + s/.*/./; q'` + srcdir=$ac_confdir + if test ! -r "$srcdir/$ac_unique_file"; then + srcdir=.. + fi +else + ac_srcdir_defaulted=no +fi +if test ! -r "$srcdir/$ac_unique_file"; then + test "$ac_srcdir_defaulted" = yes && srcdir="$ac_confdir or .." + as_fn_error $? "cannot find sources ($ac_unique_file) in $srcdir" +fi +ac_msg="sources are in $srcdir, but \`cd $srcdir' does not work" +ac_abs_confdir=`( + cd "$srcdir" && test -r "./$ac_unique_file" || as_fn_error $? "$ac_msg" + pwd)` +# When building in place, set srcdir=. +if test "$ac_abs_confdir" = "$ac_pwd"; then + srcdir=. +fi +# Remove unnecessary trailing slashes from srcdir. +# Double slashes in file names in object file debugging info +# mess up M-x gdb in Emacs. +case $srcdir in +*/) srcdir=`expr "X$srcdir" : 'X\(.*[^/]\)' \| "X$srcdir" : 'X\(.*\)'`;; +esac +for ac_var in $ac_precious_vars; do + eval ac_env_${ac_var}_set=\${${ac_var}+set} + eval ac_env_${ac_var}_value=\$${ac_var} + eval ac_cv_env_${ac_var}_set=\${${ac_var}+set} + eval ac_cv_env_${ac_var}_value=\$${ac_var} +done + +# +# Report the --help message. +# +if test "$ac_init_help" = "long"; then + # Omit some internal or obsolete options to make the list less imposing. + # This message is too long to be a string in the A/UX 3.1 sh. + cat <<_ACEOF +\`configure' configures nan-toolbox 3.2.0 to adapt to many kinds of systems. + +Usage: $0 [OPTION]... [VAR=VALUE]... + +To assign environment variables (e.g., CC, CFLAGS...), specify them as +VAR=VALUE. See below for descriptions of some of the useful variables. + +Defaults for the options are specified in brackets. + +Configuration: + -h, --help display this help and exit + --help=short display options specific to this package + --help=recursive display the short help of all the included packages + -V, --version display version information and exit + -q, --quiet, --silent do not print \`checking ...' messages + --cache-file=FILE cache test results in FILE [disabled] + -C, --config-cache alias for \`--cache-file=config.cache' + -n, --no-create do not create output files + --srcdir=DIR find the sources in DIR [configure dir or \`..'] + +Installation directories: + --prefix=PREFIX install architecture-independent files in PREFIX + [$ac_default_prefix] + --exec-prefix=EPREFIX install architecture-dependent files in EPREFIX + [PREFIX] + +By default, \`make install' will install all the files in +\`$ac_default_prefix/bin', \`$ac_default_prefix/lib' etc. You can specify +an installation prefix other than \`$ac_default_prefix' using \`--prefix', +for instance \`--prefix=\$HOME'. + +For better control, use the options below. + +Fine tuning of the installation directories: + --bindir=DIR user executables [EPREFIX/bin] + --sbindir=DIR system admin executables [EPREFIX/sbin] + --libexecdir=DIR program executables [EPREFIX/libexec] + --sysconfdir=DIR read-only single-machine data [PREFIX/etc] + --sharedstatedir=DIR modifiable architecture-independent data [PREFIX/com] + --localstatedir=DIR modifiable single-machine data [PREFIX/var] + --runstatedir=DIR modifiable per-process data [LOCALSTATEDIR/run] + --libdir=DIR object code libraries [EPREFIX/lib] + --includedir=DIR C header files [PREFIX/include] + --oldincludedir=DIR C header files for non-gcc [/usr/include] + --datarootdir=DIR read-only arch.-independent data root [PREFIX/share] + --datadir=DIR read-only architecture-independent data [DATAROOTDIR] + --infodir=DIR info documentation [DATAROOTDIR/info] + --localedir=DIR locale-dependent data [DATAROOTDIR/locale] + --mandir=DIR man documentation [DATAROOTDIR/man] + --docdir=DIR documentation root [DATAROOTDIR/doc/nan-toolbox] + --htmldir=DIR html documentation [DOCDIR] + --dvidir=DIR dvi documentation [DOCDIR] + --pdfdir=DIR pdf documentation [DOCDIR] + --psdir=DIR ps documentation [DOCDIR] +_ACEOF + + cat <<\_ACEOF +_ACEOF +fi + +if test -n "$ac_init_help"; then + case $ac_init_help in + short | recursive ) echo "Configuration of nan-toolbox 3.2.0:";; + esac + cat <<\_ACEOF + +Some influential environment variables: + CXX C++ compiler command + CXXFLAGS C++ compiler flags + LDFLAGS linker flags, e.g. -L if you have libraries in a + nonstandard directory + LIBS libraries to pass to the linker, e.g. -l + CPPFLAGS (Objective) C/C++ preprocessor flags, e.g. -I if + you have headers in a nonstandard directory + CC C compiler command + CFLAGS C compiler flags + CPP C preprocessor + +Use these variables to override the choices made by `configure' or to help +it to find libraries and programs with nonstandard names/locations. + +Report bugs to . +_ACEOF +ac_status=$? +fi + +if test "$ac_init_help" = "recursive"; then + # If there are subdirs, report their specific --help. + for ac_dir in : $ac_subdirs_all; do test "x$ac_dir" = x: && continue + test -d "$ac_dir" || + { cd "$srcdir" && ac_pwd=`pwd` && srcdir=. && test -d "$ac_dir"; } || + continue + ac_builddir=. + +case "$ac_dir" in +.) ac_dir_suffix= ac_top_builddir_sub=. ac_top_build_prefix= ;; +*) + ac_dir_suffix=/`$as_echo "$ac_dir" | sed 's|^\.[\\/]||'` + # A ".." for each directory in $ac_dir_suffix. + ac_top_builddir_sub=`$as_echo "$ac_dir_suffix" | sed 's|/[^\\/]*|/..|g;s|/||'` + case $ac_top_builddir_sub in + "") ac_top_builddir_sub=. ac_top_build_prefix= ;; + *) ac_top_build_prefix=$ac_top_builddir_sub/ ;; + esac ;; +esac +ac_abs_top_builddir=$ac_pwd +ac_abs_builddir=$ac_pwd$ac_dir_suffix +# for backward compatibility: +ac_top_builddir=$ac_top_build_prefix + +case $srcdir in + .) # We are building in place. + ac_srcdir=. + ac_top_srcdir=$ac_top_builddir_sub + ac_abs_top_srcdir=$ac_pwd ;; + [\\/]* | ?:[\\/]* ) # Absolute name. + ac_srcdir=$srcdir$ac_dir_suffix; + ac_top_srcdir=$srcdir + ac_abs_top_srcdir=$srcdir ;; + *) # Relative name. + ac_srcdir=$ac_top_build_prefix$srcdir$ac_dir_suffix + ac_top_srcdir=$ac_top_build_prefix$srcdir + ac_abs_top_srcdir=$ac_pwd/$srcdir ;; +esac +ac_abs_srcdir=$ac_abs_top_srcdir$ac_dir_suffix + + cd "$ac_dir" || { ac_status=$?; continue; } + # Check for guested configure. + if test -f "$ac_srcdir/configure.gnu"; then + echo && + $SHELL "$ac_srcdir/configure.gnu" --help=recursive + elif test -f "$ac_srcdir/configure"; then + echo && + $SHELL "$ac_srcdir/configure" --help=recursive + else + $as_echo "$as_me: WARNING: no configuration information is in $ac_dir" >&2 + fi || ac_status=$? + cd "$ac_pwd" || { ac_status=$?; break; } + done +fi + +test -n "$ac_init_help" && exit $ac_status +if $ac_init_version; then + cat <<\_ACEOF +nan-toolbox configure 3.2.0 +generated by GNU Autoconf 2.69 + +Copyright (C) 2012 Free Software Foundation, Inc. +This configure script is free software; the Free Software Foundation +gives unlimited permission to copy, distribute and modify it. +_ACEOF + exit +fi + +## ------------------------ ## +## Autoconf initialization. ## +## ------------------------ ## + +# ac_fn_cxx_try_compile LINENO +# ---------------------------- +# Try to compile conftest.$ac_ext, and return whether this succeeded. +ac_fn_cxx_try_compile () +{ + as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack + rm -f conftest.$ac_objext + if { { ac_try="$ac_compile" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +$as_echo "$ac_try_echo"; } >&5 + (eval "$ac_compile") 2>conftest.err + ac_status=$? + if test -s conftest.err; then + grep -v '^ *+' conftest.err >conftest.er1 + cat conftest.er1 >&5 + mv -f conftest.er1 conftest.err + fi + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } && { + test -z "$ac_cxx_werror_flag" || + test ! -s conftest.err + } && test -s conftest.$ac_objext; then : + ac_retval=0 +else + $as_echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + ac_retval=1 +fi + eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno + as_fn_set_status $ac_retval + +} # ac_fn_cxx_try_compile + +# ac_fn_c_try_compile LINENO +# -------------------------- +# Try to compile conftest.$ac_ext, and return whether this succeeded. +ac_fn_c_try_compile () +{ + as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack + rm -f conftest.$ac_objext + if { { ac_try="$ac_compile" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +$as_echo "$ac_try_echo"; } >&5 + (eval "$ac_compile") 2>conftest.err + ac_status=$? + if test -s conftest.err; then + grep -v '^ *+' conftest.err >conftest.er1 + cat conftest.er1 >&5 + mv -f conftest.er1 conftest.err + fi + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } && { + test -z "$ac_c_werror_flag" || + test ! -s conftest.err + } && test -s conftest.$ac_objext; then : + ac_retval=0 +else + $as_echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + ac_retval=1 +fi + eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno + as_fn_set_status $ac_retval + +} # ac_fn_c_try_compile + +# ac_fn_c_try_link LINENO +# ----------------------- +# Try to link conftest.$ac_ext, and return whether this succeeded. +ac_fn_c_try_link () +{ + as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack + rm -f conftest.$ac_objext conftest$ac_exeext + if { { ac_try="$ac_link" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +$as_echo "$ac_try_echo"; } >&5 + (eval "$ac_link") 2>conftest.err + ac_status=$? + if test -s conftest.err; then + grep -v '^ *+' conftest.err >conftest.er1 + cat conftest.er1 >&5 + mv -f conftest.er1 conftest.err + fi + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } && { + test -z "$ac_c_werror_flag" || + test ! -s conftest.err + } && test -s conftest$ac_exeext && { + test "$cross_compiling" = yes || + test -x conftest$ac_exeext + }; then : + ac_retval=0 +else + $as_echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + ac_retval=1 +fi + # Delete the IPA/IPO (Inter Procedural Analysis/Optimization) information + # created by the PGI compiler (conftest_ipa8_conftest.oo), as it would + # interfere with the next link command; also delete a directory that is + # left behind by Apple's compiler. We do this before executing the actions. + rm -rf conftest.dSYM conftest_ipa8_conftest.oo + eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno + as_fn_set_status $ac_retval + +} # ac_fn_c_try_link + +# ac_fn_c_try_cpp LINENO +# ---------------------- +# Try to preprocess conftest.$ac_ext, and return whether this succeeded. +ac_fn_c_try_cpp () +{ + as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack + if { { ac_try="$ac_cpp conftest.$ac_ext" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +$as_echo "$ac_try_echo"; } >&5 + (eval "$ac_cpp conftest.$ac_ext") 2>conftest.err + ac_status=$? + if test -s conftest.err; then + grep -v '^ *+' conftest.err >conftest.er1 + cat conftest.er1 >&5 + mv -f conftest.er1 conftest.err + fi + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } > conftest.i && { + test -z "$ac_c_preproc_warn_flag$ac_c_werror_flag" || + test ! -s conftest.err + }; then : + ac_retval=0 +else + $as_echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + ac_retval=1 +fi + eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno + as_fn_set_status $ac_retval + +} # ac_fn_c_try_cpp + +# ac_fn_c_check_header_mongrel LINENO HEADER VAR INCLUDES +# ------------------------------------------------------- +# Tests whether HEADER exists, giving a warning if it cannot be compiled using +# the include files in INCLUDES and setting the cache variable VAR +# accordingly. +ac_fn_c_check_header_mongrel () +{ + as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack + if eval \${$3+:} false; then : + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $2" >&5 +$as_echo_n "checking for $2... " >&6; } +if eval \${$3+:} false; then : + $as_echo_n "(cached) " >&6 +fi +eval ac_res=\$$3 + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5 +$as_echo "$ac_res" >&6; } +else + # Is the header compilable? +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking $2 usability" >&5 +$as_echo_n "checking $2 usability... " >&6; } +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +$4 +#include <$2> +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + ac_header_compiler=yes +else + ac_header_compiler=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_header_compiler" >&5 +$as_echo "$ac_header_compiler" >&6; } + +# Is the header present? +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking $2 presence" >&5 +$as_echo_n "checking $2 presence... " >&6; } +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include <$2> +_ACEOF +if ac_fn_c_try_cpp "$LINENO"; then : + ac_header_preproc=yes +else + ac_header_preproc=no +fi +rm -f conftest.err conftest.i conftest.$ac_ext +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_header_preproc" >&5 +$as_echo "$ac_header_preproc" >&6; } + +# So? What about this header? +case $ac_header_compiler:$ac_header_preproc:$ac_c_preproc_warn_flag in #(( + yes:no: ) + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: accepted by the compiler, rejected by the preprocessor!" >&5 +$as_echo "$as_me: WARNING: $2: accepted by the compiler, rejected by the preprocessor!" >&2;} + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: proceeding with the compiler's result" >&5 +$as_echo "$as_me: WARNING: $2: proceeding with the compiler's result" >&2;} + ;; + no:yes:* ) + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: present but cannot be compiled" >&5 +$as_echo "$as_me: WARNING: $2: present but cannot be compiled" >&2;} + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: check for missing prerequisite headers?" >&5 +$as_echo "$as_me: WARNING: $2: check for missing prerequisite headers?" >&2;} + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: see the Autoconf documentation" >&5 +$as_echo "$as_me: WARNING: $2: see the Autoconf documentation" >&2;} + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: section \"Present But Cannot Be Compiled\"" >&5 +$as_echo "$as_me: WARNING: $2: section \"Present But Cannot Be Compiled\"" >&2;} + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: proceeding with the compiler's result" >&5 +$as_echo "$as_me: WARNING: $2: proceeding with the compiler's result" >&2;} +( $as_echo "## --------------------------------------- ## +## Report this to alois.schloegl@gmail.com ## +## --------------------------------------- ##" + ) | sed "s/^/$as_me: WARNING: /" >&2 + ;; +esac + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $2" >&5 +$as_echo_n "checking for $2... " >&6; } +if eval \${$3+:} false; then : + $as_echo_n "(cached) " >&6 +else + eval "$3=\$ac_header_compiler" +fi +eval ac_res=\$$3 + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5 +$as_echo "$ac_res" >&6; } +fi + eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno + +} # ac_fn_c_check_header_mongrel + +# ac_fn_c_try_run LINENO +# ---------------------- +# Try to link conftest.$ac_ext, and return whether this succeeded. Assumes +# that executables *can* be run. +ac_fn_c_try_run () +{ + as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack + if { { ac_try="$ac_link" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +$as_echo "$ac_try_echo"; } >&5 + (eval "$ac_link") 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } && { ac_try='./conftest$ac_exeext' + { { case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +$as_echo "$ac_try_echo"; } >&5 + (eval "$ac_try") 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; }; then : + ac_retval=0 +else + $as_echo "$as_me: program exited with status $ac_status" >&5 + $as_echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + ac_retval=$ac_status +fi + rm -rf conftest.dSYM conftest_ipa8_conftest.oo + eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno + as_fn_set_status $ac_retval + +} # ac_fn_c_try_run + +# ac_fn_c_check_header_compile LINENO HEADER VAR INCLUDES +# ------------------------------------------------------- +# Tests whether HEADER exists and can be compiled using the include files in +# INCLUDES, setting the cache variable VAR accordingly. +ac_fn_c_check_header_compile () +{ + as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $2" >&5 +$as_echo_n "checking for $2... " >&6; } +if eval \${$3+:} false; then : + $as_echo_n "(cached) " >&6 +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +$4 +#include <$2> +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + eval "$3=yes" +else + eval "$3=no" +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +fi +eval ac_res=\$$3 + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5 +$as_echo "$ac_res" >&6; } + eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno + +} # ac_fn_c_check_header_compile + +# ac_fn_c_check_type LINENO TYPE VAR INCLUDES +# ------------------------------------------- +# Tests whether TYPE exists after having included INCLUDES, setting cache +# variable VAR accordingly. +ac_fn_c_check_type () +{ + as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $2" >&5 +$as_echo_n "checking for $2... " >&6; } +if eval \${$3+:} false; then : + $as_echo_n "(cached) " >&6 +else + eval "$3=no" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +$4 +int +main () +{ +if (sizeof ($2)) + return 0; + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +$4 +int +main () +{ +if (sizeof (($2))) + return 0; + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + +else + eval "$3=yes" +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +fi +eval ac_res=\$$3 + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5 +$as_echo "$ac_res" >&6; } + eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno + +} # ac_fn_c_check_type + +# ac_fn_c_find_intX_t LINENO BITS VAR +# ----------------------------------- +# Finds a signed integer type with width BITS, setting cache variable VAR +# accordingly. +ac_fn_c_find_intX_t () +{ + as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for int$2_t" >&5 +$as_echo_n "checking for int$2_t... " >&6; } +if eval \${$3+:} false; then : + $as_echo_n "(cached) " >&6 +else + eval "$3=no" + # Order is important - never check a type that is potentially smaller + # than half of the expected target width. + for ac_type in int$2_t 'int' 'long int' \ + 'long long int' 'short int' 'signed char'; do + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +$ac_includes_default + enum { N = $2 / 2 - 1 }; +int +main () +{ +static int test_array [1 - 2 * !(0 < ($ac_type) ((((($ac_type) 1 << N) << N) - 1) * 2 + 1))]; +test_array [0] = 0; +return test_array [0]; + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +$ac_includes_default + enum { N = $2 / 2 - 1 }; +int +main () +{ +static int test_array [1 - 2 * !(($ac_type) ((((($ac_type) 1 << N) << N) - 1) * 2 + 1) + < ($ac_type) ((((($ac_type) 1 << N) << N) - 1) * 2 + 2))]; +test_array [0] = 0; +return test_array [0]; + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + +else + case $ac_type in #( + int$2_t) : + eval "$3=yes" ;; #( + *) : + eval "$3=\$ac_type" ;; +esac +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext + if eval test \"x\$"$3"\" = x"no"; then : + +else + break +fi + done +fi +eval ac_res=\$$3 + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5 +$as_echo "$ac_res" >&6; } + eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno + +} # ac_fn_c_find_intX_t + +# ac_fn_c_find_uintX_t LINENO BITS VAR +# ------------------------------------ +# Finds an unsigned integer type with width BITS, setting cache variable VAR +# accordingly. +ac_fn_c_find_uintX_t () +{ + as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for uint$2_t" >&5 +$as_echo_n "checking for uint$2_t... " >&6; } +if eval \${$3+:} false; then : + $as_echo_n "(cached) " >&6 +else + eval "$3=no" + # Order is important - never check a type that is potentially smaller + # than half of the expected target width. + for ac_type in uint$2_t 'unsigned int' 'unsigned long int' \ + 'unsigned long long int' 'unsigned short int' 'unsigned char'; do + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +$ac_includes_default +int +main () +{ +static int test_array [1 - 2 * !((($ac_type) -1 >> ($2 / 2 - 1)) >> ($2 / 2 - 1) == 3)]; +test_array [0] = 0; +return test_array [0]; + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + case $ac_type in #( + uint$2_t) : + eval "$3=yes" ;; #( + *) : + eval "$3=\$ac_type" ;; +esac +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext + if eval test \"x\$"$3"\" = x"no"; then : + +else + break +fi + done +fi +eval ac_res=\$$3 + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5 +$as_echo "$ac_res" >&6; } + eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno + +} # ac_fn_c_find_uintX_t + +# ac_fn_c_check_func LINENO FUNC VAR +# ---------------------------------- +# Tests whether FUNC exists, setting the cache variable VAR accordingly +ac_fn_c_check_func () +{ + as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $2" >&5 +$as_echo_n "checking for $2... " >&6; } +if eval \${$3+:} false; then : + $as_echo_n "(cached) " >&6 +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +/* Define $2 to an innocuous variant, in case declares $2. + For example, HP-UX 11i declares gettimeofday. */ +#define $2 innocuous_$2 + +/* System header to define __stub macros and hopefully few prototypes, + which can conflict with char $2 (); below. + Prefer to if __STDC__ is defined, since + exists even on freestanding compilers. */ + +#ifdef __STDC__ +# include +#else +# include +#endif + +#undef $2 + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +#ifdef __cplusplus +extern "C" +#endif +char $2 (); +/* The GNU C library defines this for functions which it implements + to always fail with ENOSYS. Some functions are actually named + something starting with __ and the normal name is an alias. */ +#if defined __stub_$2 || defined __stub___$2 +choke me +#endif + +int +main () +{ +return $2 (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + eval "$3=yes" +else + eval "$3=no" +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +fi +eval ac_res=\$$3 + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5 +$as_echo "$ac_res" >&6; } + eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno + +} # ac_fn_c_check_func +cat >config.log <<_ACEOF +This file contains any messages produced by compilers while +running configure, to aid debugging if configure makes a mistake. + +It was created by nan-toolbox $as_me 3.2.0, which was +generated by GNU Autoconf 2.69. Invocation command line was + + $ $0 $@ + +_ACEOF +exec 5>>config.log +{ +cat <<_ASUNAME +## --------- ## +## Platform. ## +## --------- ## + +hostname = `(hostname || uname -n) 2>/dev/null | sed 1q` +uname -m = `(uname -m) 2>/dev/null || echo unknown` +uname -r = `(uname -r) 2>/dev/null || echo unknown` +uname -s = `(uname -s) 2>/dev/null || echo unknown` +uname -v = `(uname -v) 2>/dev/null || echo unknown` + +/usr/bin/uname -p = `(/usr/bin/uname -p) 2>/dev/null || echo unknown` +/bin/uname -X = `(/bin/uname -X) 2>/dev/null || echo unknown` + +/bin/arch = `(/bin/arch) 2>/dev/null || echo unknown` +/usr/bin/arch -k = `(/usr/bin/arch -k) 2>/dev/null || echo unknown` +/usr/convex/getsysinfo = `(/usr/convex/getsysinfo) 2>/dev/null || echo unknown` +/usr/bin/hostinfo = `(/usr/bin/hostinfo) 2>/dev/null || echo unknown` +/bin/machine = `(/bin/machine) 2>/dev/null || echo unknown` +/usr/bin/oslevel = `(/usr/bin/oslevel) 2>/dev/null || echo unknown` +/bin/universe = `(/bin/universe) 2>/dev/null || echo unknown` + +_ASUNAME + +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + $as_echo "PATH: $as_dir" + done +IFS=$as_save_IFS + +} >&5 + +cat >&5 <<_ACEOF + + +## ----------- ## +## Core tests. ## +## ----------- ## + +_ACEOF + + +# Keep a trace of the command line. +# Strip out --no-create and --no-recursion so they do not pile up. +# Strip out --silent because we don't want to record it for future runs. +# Also quote any args containing shell meta-characters. +# Make two passes to allow for proper duplicate-argument suppression. +ac_configure_args= +ac_configure_args0= +ac_configure_args1= +ac_must_keep_next=false +for ac_pass in 1 2 +do + for ac_arg + do + case $ac_arg in + -no-create | --no-c* | -n | -no-recursion | --no-r*) continue ;; + -q | -quiet | --quiet | --quie | --qui | --qu | --q \ + | -silent | --silent | --silen | --sile | --sil) + continue ;; + *\'*) + ac_arg=`$as_echo "$ac_arg" | sed "s/'/'\\\\\\\\''/g"` ;; + esac + case $ac_pass in + 1) as_fn_append ac_configure_args0 " '$ac_arg'" ;; + 2) + as_fn_append ac_configure_args1 " '$ac_arg'" + if test $ac_must_keep_next = true; then + ac_must_keep_next=false # Got value, back to normal. + else + case $ac_arg in + *=* | --config-cache | -C | -disable-* | --disable-* \ + | -enable-* | --enable-* | -gas | --g* | -nfp | --nf* \ + | -q | -quiet | --q* | -silent | --sil* | -v | -verb* \ + | -with-* | --with-* | -without-* | --without-* | --x) + case "$ac_configure_args0 " in + "$ac_configure_args1"*" '$ac_arg' "* ) continue ;; + esac + ;; + -* ) ac_must_keep_next=true ;; + esac + fi + as_fn_append ac_configure_args " '$ac_arg'" + ;; + esac + done +done +{ ac_configure_args0=; unset ac_configure_args0;} +{ ac_configure_args1=; unset ac_configure_args1;} + +# When interrupted or exit'd, cleanup temporary files, and complete +# config.log. We remove comments because anyway the quotes in there +# would cause problems or look ugly. +# WARNING: Use '\'' to represent an apostrophe within the trap. +# WARNING: Do not start the trap code with a newline, due to a FreeBSD 4.0 bug. +trap 'exit_status=$? + # Save into config.log some information that might help in debugging. + { + echo + + $as_echo "## ---------------- ## +## Cache variables. ## +## ---------------- ##" + echo + # The following way of writing the cache mishandles newlines in values, +( + for ac_var in `(set) 2>&1 | sed -n '\''s/^\([a-zA-Z_][a-zA-Z0-9_]*\)=.*/\1/p'\''`; do + eval ac_val=\$$ac_var + case $ac_val in #( + *${as_nl}*) + case $ac_var in #( + *_cv_*) { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: cache variable $ac_var contains a newline" >&5 +$as_echo "$as_me: WARNING: cache variable $ac_var contains a newline" >&2;} ;; + esac + case $ac_var in #( + _ | IFS | as_nl) ;; #( + BASH_ARGV | BASH_SOURCE) eval $ac_var= ;; #( + *) { eval $ac_var=; unset $ac_var;} ;; + esac ;; + esac + done + (set) 2>&1 | + case $as_nl`(ac_space='\'' '\''; set) 2>&1` in #( + *${as_nl}ac_space=\ *) + sed -n \ + "s/'\''/'\''\\\\'\'''\''/g; + s/^\\([_$as_cr_alnum]*_cv_[_$as_cr_alnum]*\\)=\\(.*\\)/\\1='\''\\2'\''/p" + ;; #( + *) + sed -n "/^[_$as_cr_alnum]*_cv_[_$as_cr_alnum]*=/p" + ;; + esac | + sort +) + echo + + $as_echo "## ----------------- ## +## Output variables. ## +## ----------------- ##" + echo + for ac_var in $ac_subst_vars + do + eval ac_val=\$$ac_var + case $ac_val in + *\'\''*) ac_val=`$as_echo "$ac_val" | sed "s/'\''/'\''\\\\\\\\'\'''\''/g"`;; + esac + $as_echo "$ac_var='\''$ac_val'\''" + done | sort + echo + + if test -n "$ac_subst_files"; then + $as_echo "## ------------------- ## +## File substitutions. ## +## ------------------- ##" + echo + for ac_var in $ac_subst_files + do + eval ac_val=\$$ac_var + case $ac_val in + *\'\''*) ac_val=`$as_echo "$ac_val" | sed "s/'\''/'\''\\\\\\\\'\'''\''/g"`;; + esac + $as_echo "$ac_var='\''$ac_val'\''" + done | sort + echo + fi + + if test -s confdefs.h; then + $as_echo "## ----------- ## +## confdefs.h. ## +## ----------- ##" + echo + cat confdefs.h + echo + fi + test "$ac_signal" != 0 && + $as_echo "$as_me: caught signal $ac_signal" + $as_echo "$as_me: exit $exit_status" + } >&5 + rm -f core *.core core.conftest.* && + rm -f -r conftest* confdefs* conf$$* $ac_clean_files && + exit $exit_status +' 0 +for ac_signal in 1 2 13 15; do + trap 'ac_signal='$ac_signal'; as_fn_exit 1' $ac_signal +done +ac_signal=0 + +# confdefs.h avoids OS command line length limits that DEFS can exceed. +rm -f -r conftest* confdefs.h + +$as_echo "/* confdefs.h */" > confdefs.h + +# Predefined preprocessor variables. + +cat >>confdefs.h <<_ACEOF +#define PACKAGE_NAME "$PACKAGE_NAME" +_ACEOF + +cat >>confdefs.h <<_ACEOF +#define PACKAGE_TARNAME "$PACKAGE_TARNAME" +_ACEOF + +cat >>confdefs.h <<_ACEOF +#define PACKAGE_VERSION "$PACKAGE_VERSION" +_ACEOF + +cat >>confdefs.h <<_ACEOF +#define PACKAGE_STRING "$PACKAGE_STRING" +_ACEOF + +cat >>confdefs.h <<_ACEOF +#define PACKAGE_BUGREPORT "$PACKAGE_BUGREPORT" +_ACEOF + +cat >>confdefs.h <<_ACEOF +#define PACKAGE_URL "$PACKAGE_URL" +_ACEOF + + +# Let the site file select an alternate cache file if it wants to. +# Prefer an explicitly selected file to automatically selected ones. +ac_site_file1=NONE +ac_site_file2=NONE +if test -n "$CONFIG_SITE"; then + # We do not want a PATH search for config.site. + case $CONFIG_SITE in #(( + -*) ac_site_file1=./$CONFIG_SITE;; + */*) ac_site_file1=$CONFIG_SITE;; + *) ac_site_file1=./$CONFIG_SITE;; + esac +elif test "x$prefix" != xNONE; then + ac_site_file1=$prefix/share/config.site + ac_site_file2=$prefix/etc/config.site +else + ac_site_file1=$ac_default_prefix/share/config.site + ac_site_file2=$ac_default_prefix/etc/config.site +fi +for ac_site_file in "$ac_site_file1" "$ac_site_file2" +do + test "x$ac_site_file" = xNONE && continue + if test /dev/null != "$ac_site_file" && test -r "$ac_site_file"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: loading site script $ac_site_file" >&5 +$as_echo "$as_me: loading site script $ac_site_file" >&6;} + sed 's/^/| /' "$ac_site_file" >&5 + . "$ac_site_file" \ + || { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} +as_fn_error $? "failed to load site script $ac_site_file +See \`config.log' for more details" "$LINENO" 5; } + fi +done + +if test -r "$cache_file"; then + # Some versions of bash will fail to source /dev/null (special files + # actually), so we avoid doing that. DJGPP emulates it as a regular file. + if test /dev/null != "$cache_file" && test -f "$cache_file"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: loading cache $cache_file" >&5 +$as_echo "$as_me: loading cache $cache_file" >&6;} + case $cache_file in + [\\/]* | ?:[\\/]* ) . "$cache_file";; + *) . "./$cache_file";; + esac + fi +else + { $as_echo "$as_me:${as_lineno-$LINENO}: creating cache $cache_file" >&5 +$as_echo "$as_me: creating cache $cache_file" >&6;} + >$cache_file +fi + +# Check that the precious variables saved in the cache have kept the same +# value. +ac_cache_corrupted=false +for ac_var in $ac_precious_vars; do + eval ac_old_set=\$ac_cv_env_${ac_var}_set + eval ac_new_set=\$ac_env_${ac_var}_set + eval ac_old_val=\$ac_cv_env_${ac_var}_value + eval ac_new_val=\$ac_env_${ac_var}_value + case $ac_old_set,$ac_new_set in + set,) + { $as_echo "$as_me:${as_lineno-$LINENO}: error: \`$ac_var' was set to \`$ac_old_val' in the previous run" >&5 +$as_echo "$as_me: error: \`$ac_var' was set to \`$ac_old_val' in the previous run" >&2;} + ac_cache_corrupted=: ;; + ,set) + { $as_echo "$as_me:${as_lineno-$LINENO}: error: \`$ac_var' was not set in the previous run" >&5 +$as_echo "$as_me: error: \`$ac_var' was not set in the previous run" >&2;} + ac_cache_corrupted=: ;; + ,);; + *) + if test "x$ac_old_val" != "x$ac_new_val"; then + # differences in whitespace do not lead to failure. + ac_old_val_w=`echo x $ac_old_val` + ac_new_val_w=`echo x $ac_new_val` + if test "$ac_old_val_w" != "$ac_new_val_w"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: error: \`$ac_var' has changed since the previous run:" >&5 +$as_echo "$as_me: error: \`$ac_var' has changed since the previous run:" >&2;} + ac_cache_corrupted=: + else + { $as_echo "$as_me:${as_lineno-$LINENO}: warning: ignoring whitespace changes in \`$ac_var' since the previous run:" >&5 +$as_echo "$as_me: warning: ignoring whitespace changes in \`$ac_var' since the previous run:" >&2;} + eval $ac_var=\$ac_old_val + fi + { $as_echo "$as_me:${as_lineno-$LINENO}: former value: \`$ac_old_val'" >&5 +$as_echo "$as_me: former value: \`$ac_old_val'" >&2;} + { $as_echo "$as_me:${as_lineno-$LINENO}: current value: \`$ac_new_val'" >&5 +$as_echo "$as_me: current value: \`$ac_new_val'" >&2;} + fi;; + esac + # Pass precious variables to config.status. + if test "$ac_new_set" = set; then + case $ac_new_val in + *\'*) ac_arg=$ac_var=`$as_echo "$ac_new_val" | sed "s/'/'\\\\\\\\''/g"` ;; + *) ac_arg=$ac_var=$ac_new_val ;; + esac + case " $ac_configure_args " in + *" '$ac_arg' "*) ;; # Avoid dups. Use of quotes ensures accuracy. + *) as_fn_append ac_configure_args " '$ac_arg'" ;; + esac + fi +done +if $ac_cache_corrupted; then + { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} + { $as_echo "$as_me:${as_lineno-$LINENO}: error: changes in the environment can compromise the build" >&5 +$as_echo "$as_me: error: changes in the environment can compromise the build" >&2;} + as_fn_error $? "run \`make distclean' and/or \`rm $cache_file' and start over" "$LINENO" 5 +fi +## -------------------- ## +## Main body of script. ## +## -------------------- ## + +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + + +ac_config_headers="$ac_config_headers config.h" + + +# Checks for programs. +ac_ext=cpp +ac_cpp='$CXXCPP $CPPFLAGS' +ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_cxx_compiler_gnu +if test -z "$CXX"; then + if test -n "$CCC"; then + CXX=$CCC + else + if test -n "$ac_tool_prefix"; then + for ac_prog in g++ c++ gpp aCC CC cxx cc++ cl.exe FCC KCC RCC xlC_r xlC + do + # Extract the first word of "$ac_tool_prefix$ac_prog", so it can be a program name with args. +set dummy $ac_tool_prefix$ac_prog; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_CXX+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$CXX"; then + ac_cv_prog_CXX="$CXX" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_CXX="$ac_tool_prefix$ac_prog" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +CXX=$ac_cv_prog_CXX +if test -n "$CXX"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $CXX" >&5 +$as_echo "$CXX" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + + test -n "$CXX" && break + done +fi +if test -z "$CXX"; then + ac_ct_CXX=$CXX + for ac_prog in g++ c++ gpp aCC CC cxx cc++ cl.exe FCC KCC RCC xlC_r xlC +do + # Extract the first word of "$ac_prog", so it can be a program name with args. +set dummy $ac_prog; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_ac_ct_CXX+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$ac_ct_CXX"; then + ac_cv_prog_ac_ct_CXX="$ac_ct_CXX" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_ac_ct_CXX="$ac_prog" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +ac_ct_CXX=$ac_cv_prog_ac_ct_CXX +if test -n "$ac_ct_CXX"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_CXX" >&5 +$as_echo "$ac_ct_CXX" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + + test -n "$ac_ct_CXX" && break +done + + if test "x$ac_ct_CXX" = x; then + CXX="g++" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + CXX=$ac_ct_CXX + fi +fi + + fi +fi +# Provide some information about the compiler. +$as_echo "$as_me:${as_lineno-$LINENO}: checking for C++ compiler version" >&5 +set X $ac_compile +ac_compiler=$2 +for ac_option in --version -v -V -qversion; do + { { ac_try="$ac_compiler $ac_option >&5" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +$as_echo "$ac_try_echo"; } >&5 + (eval "$ac_compiler $ac_option >&5") 2>conftest.err + ac_status=$? + if test -s conftest.err; then + sed '10a\ +... rest of stderr output deleted ... + 10q' conftest.err >conftest.er1 + cat conftest.er1 >&5 + fi + rm -f conftest.er1 conftest.err + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } +done + +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +ac_clean_files_save=$ac_clean_files +ac_clean_files="$ac_clean_files a.out a.out.dSYM a.exe b.out" +# Try to create an executable without -o first, disregard a.out. +# It will help us diagnose broken compilers, and finding out an intuition +# of exeext. +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether the C++ compiler works" >&5 +$as_echo_n "checking whether the C++ compiler works... " >&6; } +ac_link_default=`$as_echo "$ac_link" | sed 's/ -o *conftest[^ ]*//'` + +# The possible output files: +ac_files="a.out conftest.exe conftest a.exe a_out.exe b.out conftest.*" + +ac_rmfiles= +for ac_file in $ac_files +do + case $ac_file in + *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map | *.inf | *.dSYM | *.o | *.obj ) ;; + * ) ac_rmfiles="$ac_rmfiles $ac_file";; + esac +done +rm -f $ac_rmfiles + +if { { ac_try="$ac_link_default" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +$as_echo "$ac_try_echo"; } >&5 + (eval "$ac_link_default") 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; then : + # Autoconf-2.13 could set the ac_cv_exeext variable to `no'. +# So ignore a value of `no', otherwise this would lead to `EXEEXT = no' +# in a Makefile. We should not override ac_cv_exeext if it was cached, +# so that the user can short-circuit this test for compilers unknown to +# Autoconf. +for ac_file in $ac_files '' +do + test -f "$ac_file" || continue + case $ac_file in + *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map | *.inf | *.dSYM | *.o | *.obj ) + ;; + [ab].out ) + # We found the default executable, but exeext='' is most + # certainly right. + break;; + *.* ) + if test "${ac_cv_exeext+set}" = set && test "$ac_cv_exeext" != no; + then :; else + ac_cv_exeext=`expr "$ac_file" : '[^.]*\(\..*\)'` + fi + # We set ac_cv_exeext here because the later test for it is not + # safe: cross compilers may not add the suffix if given an `-o' + # argument, so we may need to know it at that point already. + # Even if this section looks crufty: it has the advantage of + # actually working. + break;; + * ) + break;; + esac +done +test "$ac_cv_exeext" = no && ac_cv_exeext= + +else + ac_file='' +fi +if test -z "$ac_file"; then : + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +$as_echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +{ { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} +as_fn_error 77 "C++ compiler cannot create executables +See \`config.log' for more details" "$LINENO" 5; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +$as_echo "yes" >&6; } +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for C++ compiler default output file name" >&5 +$as_echo_n "checking for C++ compiler default output file name... " >&6; } +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_file" >&5 +$as_echo "$ac_file" >&6; } +ac_exeext=$ac_cv_exeext + +rm -f -r a.out a.out.dSYM a.exe conftest$ac_cv_exeext b.out +ac_clean_files=$ac_clean_files_save +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for suffix of executables" >&5 +$as_echo_n "checking for suffix of executables... " >&6; } +if { { ac_try="$ac_link" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +$as_echo "$ac_try_echo"; } >&5 + (eval "$ac_link") 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; then : + # If both `conftest.exe' and `conftest' are `present' (well, observable) +# catch `conftest.exe'. For instance with Cygwin, `ls conftest' will +# work properly (i.e., refer to `conftest.exe'), while it won't with +# `rm'. +for ac_file in conftest.exe conftest conftest.*; do + test -f "$ac_file" || continue + case $ac_file in + *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map | *.inf | *.dSYM | *.o | *.obj ) ;; + *.* ) ac_cv_exeext=`expr "$ac_file" : '[^.]*\(\..*\)'` + break;; + * ) break;; + esac +done +else + { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} +as_fn_error $? "cannot compute suffix of executables: cannot compile and link +See \`config.log' for more details" "$LINENO" 5; } +fi +rm -f conftest conftest$ac_cv_exeext +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_exeext" >&5 +$as_echo "$ac_cv_exeext" >&6; } + +rm -f conftest.$ac_ext +EXEEXT=$ac_cv_exeext +ac_exeext=$EXEEXT +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include +int +main () +{ +FILE *f = fopen ("conftest.out", "w"); + return ferror (f) || fclose (f) != 0; + + ; + return 0; +} +_ACEOF +ac_clean_files="$ac_clean_files conftest.out" +# Check that the compiler produces executables we can run. If not, either +# the compiler is broken, or we cross compile. +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether we are cross compiling" >&5 +$as_echo_n "checking whether we are cross compiling... " >&6; } +if test "$cross_compiling" != yes; then + { { ac_try="$ac_link" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +$as_echo "$ac_try_echo"; } >&5 + (eval "$ac_link") 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } + if { ac_try='./conftest$ac_cv_exeext' + { { case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +$as_echo "$ac_try_echo"; } >&5 + (eval "$ac_try") 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; }; then + cross_compiling=no + else + if test "$cross_compiling" = maybe; then + cross_compiling=yes + else + { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} +as_fn_error $? "cannot run C++ compiled programs. +If you meant to cross compile, use \`--host'. +See \`config.log' for more details" "$LINENO" 5; } + fi + fi +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $cross_compiling" >&5 +$as_echo "$cross_compiling" >&6; } + +rm -f conftest.$ac_ext conftest$ac_cv_exeext conftest.out +ac_clean_files=$ac_clean_files_save +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for suffix of object files" >&5 +$as_echo_n "checking for suffix of object files... " >&6; } +if ${ac_cv_objext+:} false; then : + $as_echo_n "(cached) " >&6 +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +rm -f conftest.o conftest.obj +if { { ac_try="$ac_compile" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +$as_echo "$ac_try_echo"; } >&5 + (eval "$ac_compile") 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; then : + for ac_file in conftest.o conftest.obj conftest.*; do + test -f "$ac_file" || continue; + case $ac_file in + *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map | *.inf | *.dSYM ) ;; + *) ac_cv_objext=`expr "$ac_file" : '.*\.\(.*\)'` + break;; + esac +done +else + $as_echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +{ { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} +as_fn_error $? "cannot compute suffix of object files: cannot compile +See \`config.log' for more details" "$LINENO" 5; } +fi +rm -f conftest.$ac_cv_objext conftest.$ac_ext +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_objext" >&5 +$as_echo "$ac_cv_objext" >&6; } +OBJEXT=$ac_cv_objext +ac_objext=$OBJEXT +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether we are using the GNU C++ compiler" >&5 +$as_echo_n "checking whether we are using the GNU C++ compiler... " >&6; } +if ${ac_cv_cxx_compiler_gnu+:} false; then : + $as_echo_n "(cached) " >&6 +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ +#ifndef __GNUC__ + choke me +#endif + + ; + return 0; +} +_ACEOF +if ac_fn_cxx_try_compile "$LINENO"; then : + ac_compiler_gnu=yes +else + ac_compiler_gnu=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +ac_cv_cxx_compiler_gnu=$ac_compiler_gnu + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_cxx_compiler_gnu" >&5 +$as_echo "$ac_cv_cxx_compiler_gnu" >&6; } +if test $ac_compiler_gnu = yes; then + GXX=yes +else + GXX= +fi +ac_test_CXXFLAGS=${CXXFLAGS+set} +ac_save_CXXFLAGS=$CXXFLAGS +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether $CXX accepts -g" >&5 +$as_echo_n "checking whether $CXX accepts -g... " >&6; } +if ${ac_cv_prog_cxx_g+:} false; then : + $as_echo_n "(cached) " >&6 +else + ac_save_cxx_werror_flag=$ac_cxx_werror_flag + ac_cxx_werror_flag=yes + ac_cv_prog_cxx_g=no + CXXFLAGS="-g" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +if ac_fn_cxx_try_compile "$LINENO"; then : + ac_cv_prog_cxx_g=yes +else + CXXFLAGS="" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +if ac_fn_cxx_try_compile "$LINENO"; then : + +else + ac_cxx_werror_flag=$ac_save_cxx_werror_flag + CXXFLAGS="-g" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +if ac_fn_cxx_try_compile "$LINENO"; then : + ac_cv_prog_cxx_g=yes +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext + ac_cxx_werror_flag=$ac_save_cxx_werror_flag +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_cxx_g" >&5 +$as_echo "$ac_cv_prog_cxx_g" >&6; } +if test "$ac_test_CXXFLAGS" = set; then + CXXFLAGS=$ac_save_CXXFLAGS +elif test $ac_cv_prog_cxx_g = yes; then + if test "$GXX" = yes; then + CXXFLAGS="-g -O2" + else + CXXFLAGS="-g" + fi +else + if test "$GXX" = yes; then + CXXFLAGS="-O2" + else + CXXFLAGS= + fi +fi +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu +if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}gcc", so it can be a program name with args. +set dummy ${ac_tool_prefix}gcc; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_CC+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$CC"; then + ac_cv_prog_CC="$CC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_CC="${ac_tool_prefix}gcc" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +CC=$ac_cv_prog_CC +if test -n "$CC"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $CC" >&5 +$as_echo "$CC" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + +fi +if test -z "$ac_cv_prog_CC"; then + ac_ct_CC=$CC + # Extract the first word of "gcc", so it can be a program name with args. +set dummy gcc; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_ac_ct_CC+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$ac_ct_CC"; then + ac_cv_prog_ac_ct_CC="$ac_ct_CC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_ac_ct_CC="gcc" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +ac_ct_CC=$ac_cv_prog_ac_ct_CC +if test -n "$ac_ct_CC"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_CC" >&5 +$as_echo "$ac_ct_CC" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + if test "x$ac_ct_CC" = x; then + CC="" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + CC=$ac_ct_CC + fi +else + CC="$ac_cv_prog_CC" +fi + +if test -z "$CC"; then + if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}cc", so it can be a program name with args. +set dummy ${ac_tool_prefix}cc; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_CC+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$CC"; then + ac_cv_prog_CC="$CC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_CC="${ac_tool_prefix}cc" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +CC=$ac_cv_prog_CC +if test -n "$CC"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $CC" >&5 +$as_echo "$CC" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + + fi +fi +if test -z "$CC"; then + # Extract the first word of "cc", so it can be a program name with args. +set dummy cc; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_CC+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$CC"; then + ac_cv_prog_CC="$CC" # Let the user override the test. +else + ac_prog_rejected=no +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + if test "$as_dir/$ac_word$ac_exec_ext" = "/usr/ucb/cc"; then + ac_prog_rejected=yes + continue + fi + ac_cv_prog_CC="cc" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +if test $ac_prog_rejected = yes; then + # We found a bogon in the path, so make sure we never use it. + set dummy $ac_cv_prog_CC + shift + if test $# != 0; then + # We chose a different compiler from the bogus one. + # However, it has the same basename, so the bogon will be chosen + # first if we set CC to just the basename; use the full file name. + shift + ac_cv_prog_CC="$as_dir/$ac_word${1+' '}$@" + fi +fi +fi +fi +CC=$ac_cv_prog_CC +if test -n "$CC"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $CC" >&5 +$as_echo "$CC" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + +fi +if test -z "$CC"; then + if test -n "$ac_tool_prefix"; then + for ac_prog in cl.exe + do + # Extract the first word of "$ac_tool_prefix$ac_prog", so it can be a program name with args. +set dummy $ac_tool_prefix$ac_prog; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_CC+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$CC"; then + ac_cv_prog_CC="$CC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_CC="$ac_tool_prefix$ac_prog" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +CC=$ac_cv_prog_CC +if test -n "$CC"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $CC" >&5 +$as_echo "$CC" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + + test -n "$CC" && break + done +fi +if test -z "$CC"; then + ac_ct_CC=$CC + for ac_prog in cl.exe +do + # Extract the first word of "$ac_prog", so it can be a program name with args. +set dummy $ac_prog; ac_word=$2 +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +$as_echo_n "checking for $ac_word... " >&6; } +if ${ac_cv_prog_ac_ct_CC+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -n "$ac_ct_CC"; then + ac_cv_prog_ac_ct_CC="$ac_ct_CC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_ac_ct_CC="$ac_prog" + $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done + done +IFS=$as_save_IFS + +fi +fi +ac_ct_CC=$ac_cv_prog_ac_ct_CC +if test -n "$ac_ct_CC"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_CC" >&5 +$as_echo "$ac_ct_CC" >&6; } +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi + + + test -n "$ac_ct_CC" && break +done + + if test "x$ac_ct_CC" = x; then + CC="" + else + case $cross_compiling:$ac_tool_warned in +yes:) +{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +ac_tool_warned=yes ;; +esac + CC=$ac_ct_CC + fi +fi + +fi + + +test -z "$CC" && { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} +as_fn_error $? "no acceptable C compiler found in \$PATH +See \`config.log' for more details" "$LINENO" 5; } + +# Provide some information about the compiler. +$as_echo "$as_me:${as_lineno-$LINENO}: checking for C compiler version" >&5 +set X $ac_compile +ac_compiler=$2 +for ac_option in --version -v -V -qversion; do + { { ac_try="$ac_compiler $ac_option >&5" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" +$as_echo "$ac_try_echo"; } >&5 + (eval "$ac_compiler $ac_option >&5") 2>conftest.err + ac_status=$? + if test -s conftest.err; then + sed '10a\ +... rest of stderr output deleted ... + 10q' conftest.err >conftest.er1 + cat conftest.er1 >&5 + fi + rm -f conftest.er1 conftest.err + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } +done + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether we are using the GNU C compiler" >&5 +$as_echo_n "checking whether we are using the GNU C compiler... " >&6; } +if ${ac_cv_c_compiler_gnu+:} false; then : + $as_echo_n "(cached) " >&6 +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ +#ifndef __GNUC__ + choke me +#endif + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + ac_compiler_gnu=yes +else + ac_compiler_gnu=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +ac_cv_c_compiler_gnu=$ac_compiler_gnu + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_c_compiler_gnu" >&5 +$as_echo "$ac_cv_c_compiler_gnu" >&6; } +if test $ac_compiler_gnu = yes; then + GCC=yes +else + GCC= +fi +ac_test_CFLAGS=${CFLAGS+set} +ac_save_CFLAGS=$CFLAGS +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether $CC accepts -g" >&5 +$as_echo_n "checking whether $CC accepts -g... " >&6; } +if ${ac_cv_prog_cc_g+:} false; then : + $as_echo_n "(cached) " >&6 +else + ac_save_c_werror_flag=$ac_c_werror_flag + ac_c_werror_flag=yes + ac_cv_prog_cc_g=no + CFLAGS="-g" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + ac_cv_prog_cc_g=yes +else + CFLAGS="" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + +else + ac_c_werror_flag=$ac_save_c_werror_flag + CFLAGS="-g" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + ac_cv_prog_cc_g=yes +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext + ac_c_werror_flag=$ac_save_c_werror_flag +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_cc_g" >&5 +$as_echo "$ac_cv_prog_cc_g" >&6; } +if test "$ac_test_CFLAGS" = set; then + CFLAGS=$ac_save_CFLAGS +elif test $ac_cv_prog_cc_g = yes; then + if test "$GCC" = yes; then + CFLAGS="-g -O2" + else + CFLAGS="-g" + fi +else + if test "$GCC" = yes; then + CFLAGS="-O2" + else + CFLAGS= + fi +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $CC option to accept ISO C89" >&5 +$as_echo_n "checking for $CC option to accept ISO C89... " >&6; } +if ${ac_cv_prog_cc_c89+:} false; then : + $as_echo_n "(cached) " >&6 +else + ac_cv_prog_cc_c89=no +ac_save_CC=$CC +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include +#include +struct stat; +/* Most of the following tests are stolen from RCS 5.7's src/conf.sh. */ +struct buf { int x; }; +FILE * (*rcsopen) (struct buf *, struct stat *, int); +static char *e (p, i) + char **p; + int i; +{ + return p[i]; +} +static char *f (char * (*g) (char **, int), char **p, ...) +{ + char *s; + va_list v; + va_start (v,p); + s = g (p, va_arg (v,int)); + va_end (v); + return s; +} + +/* OSF 4.0 Compaq cc is some sort of almost-ANSI by default. It has + function prototypes and stuff, but not '\xHH' hex character constants. + These don't provoke an error unfortunately, instead are silently treated + as 'x'. The following induces an error, until -std is added to get + proper ANSI mode. Curiously '\x00'!='x' always comes out true, for an + array size at least. It's necessary to write '\x00'==0 to get something + that's true only with -std. */ +int osf4_cc_array ['\x00' == 0 ? 1 : -1]; + +/* IBM C 6 for AIX is almost-ANSI by default, but it replaces macro parameters + inside strings and character constants. */ +#define FOO(x) 'x' +int xlc6_cc_array[FOO(a) == 'x' ? 1 : -1]; + +int test (int i, double x); +struct s1 {int (*f) (int a);}; +struct s2 {int (*f) (double a);}; +int pairnames (int, char **, FILE *(*)(struct buf *, struct stat *, int), int, int); +int argc; +char **argv; +int +main () +{ +return f (e, argv, 0) != argv[0] || f (e, argv, 1) != argv[1]; + ; + return 0; +} +_ACEOF +for ac_arg in '' -qlanglvl=extc89 -qlanglvl=ansi -std \ + -Ae "-Aa -D_HPUX_SOURCE" "-Xc -D__EXTENSIONS__" +do + CC="$ac_save_CC $ac_arg" + if ac_fn_c_try_compile "$LINENO"; then : + ac_cv_prog_cc_c89=$ac_arg +fi +rm -f core conftest.err conftest.$ac_objext + test "x$ac_cv_prog_cc_c89" != "xno" && break +done +rm -f conftest.$ac_ext +CC=$ac_save_CC + +fi +# AC_CACHE_VAL +case "x$ac_cv_prog_cc_c89" in + x) + { $as_echo "$as_me:${as_lineno-$LINENO}: result: none needed" >&5 +$as_echo "none needed" >&6; } ;; + xno) + { $as_echo "$as_me:${as_lineno-$LINENO}: result: unsupported" >&5 +$as_echo "unsupported" >&6; } ;; + *) + CC="$CC $ac_cv_prog_cc_c89" + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_cc_c89" >&5 +$as_echo "$ac_cv_prog_cc_c89" >&6; } ;; +esac +if test "x$ac_cv_prog_cc_c89" != xno; then : + +fi + +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + +# Checks for libraries. + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for caxpy in -lblas" >&5 +$as_echo_n "checking for caxpy in -lblas... " >&6; } +if ${ac_cv_lib_blas_caxpy+:} false; then : + $as_echo_n "(cached) " >&6 +else + ac_check_lib_save_LIBS=$LIBS +LIBS="-lblas $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +#ifdef __cplusplus +extern "C" +#endif +char caxpy (); +int +main () +{ +return caxpy (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + ac_cv_lib_blas_caxpy=yes +else + ac_cv_lib_blas_caxpy=no +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_blas_caxpy" >&5 +$as_echo "$ac_cv_lib_blas_caxpy" >&6; } +if test "x$ac_cv_lib_blas_caxpy" = xyes; then : + cat >>confdefs.h <<_ACEOF +#define HAVE_LIBBLAS 1 +_ACEOF + + LIBS="-lblas $LIBS" + +fi + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for omp_get_num_procs in -lgomp" >&5 +$as_echo_n "checking for omp_get_num_procs in -lgomp... " >&6; } +if ${ac_cv_lib_gomp_omp_get_num_procs+:} false; then : + $as_echo_n "(cached) " >&6 +else + ac_check_lib_save_LIBS=$LIBS +LIBS="-lgomp $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +#ifdef __cplusplus +extern "C" +#endif +char omp_get_num_procs (); +int +main () +{ +return omp_get_num_procs (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + ac_cv_lib_gomp_omp_get_num_procs=yes +else + ac_cv_lib_gomp_omp_get_num_procs=no +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_gomp_omp_get_num_procs" >&5 +$as_echo "$ac_cv_lib_gomp_omp_get_num_procs" >&6; } +if test "x$ac_cv_lib_gomp_omp_get_num_procs" = xyes; then : + cat >>confdefs.h <<_ACEOF +#define HAVE_LIBGOMP 1 +_ACEOF + + LIBS="-lgomp $LIBS" + +fi + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for matClose in -llibmat" >&5 +$as_echo_n "checking for matClose in -llibmat... " >&6; } +if ${ac_cv_lib_libmat_matClose+:} false; then : + $as_echo_n "(cached) " >&6 +else + ac_check_lib_save_LIBS=$LIBS +LIBS="-llibmat $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +#ifdef __cplusplus +extern "C" +#endif +char matClose (); +int +main () +{ +return matClose (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + ac_cv_lib_libmat_matClose=yes +else + ac_cv_lib_libmat_matClose=no +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_libmat_matClose" >&5 +$as_echo "$ac_cv_lib_libmat_matClose" >&6; } +if test "x$ac_cv_lib_libmat_matClose" = xyes; then : + cat >>confdefs.h <<_ACEOF +#define HAVE_LIBLIBMAT 1 +_ACEOF + + LIBS="-llibmat $LIBS" + +fi + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for mexPrintf in -llibmex" >&5 +$as_echo_n "checking for mexPrintf in -llibmex... " >&6; } +if ${ac_cv_lib_libmex_mexPrintf+:} false; then : + $as_echo_n "(cached) " >&6 +else + ac_check_lib_save_LIBS=$LIBS +LIBS="-llibmex $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +#ifdef __cplusplus +extern "C" +#endif +char mexPrintf (); +int +main () +{ +return mexPrintf (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + ac_cv_lib_libmex_mexPrintf=yes +else + ac_cv_lib_libmex_mexPrintf=no +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_libmex_mexPrintf" >&5 +$as_echo "$ac_cv_lib_libmex_mexPrintf" >&6; } +if test "x$ac_cv_lib_libmex_mexPrintf" = xyes; then : + cat >>confdefs.h <<_ACEOF +#define HAVE_LIBLIBMEX 1 +_ACEOF + + LIBS="-llibmex $LIBS" + +fi + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for mxGetNumberOfFields in -llibmx" >&5 +$as_echo_n "checking for mxGetNumberOfFields in -llibmx... " >&6; } +if ${ac_cv_lib_libmx_mxGetNumberOfFields+:} false; then : + $as_echo_n "(cached) " >&6 +else + ac_check_lib_save_LIBS=$LIBS +LIBS="-llibmx $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +#ifdef __cplusplus +extern "C" +#endif +char mxGetNumberOfFields (); +int +main () +{ +return mxGetNumberOfFields (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + ac_cv_lib_libmx_mxGetNumberOfFields=yes +else + ac_cv_lib_libmx_mxGetNumberOfFields=no +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_libmx_mxGetNumberOfFields" >&5 +$as_echo "$ac_cv_lib_libmx_mxGetNumberOfFields" >&6; } +if test "x$ac_cv_lib_libmx_mxGetNumberOfFields" = xyes; then : + cat >>confdefs.h <<_ACEOF +#define HAVE_LIBLIBMX 1 +_ACEOF + + LIBS="-llibmx $LIBS" + +fi + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for TF_NewTensor in -llibtensorflow" >&5 +$as_echo_n "checking for TF_NewTensor in -llibtensorflow... " >&6; } +if ${ac_cv_lib_libtensorflow_TF_NewTensor+:} false; then : + $as_echo_n "(cached) " >&6 +else + ac_check_lib_save_LIBS=$LIBS +LIBS="-llibtensorflow $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +#ifdef __cplusplus +extern "C" +#endif +char TF_NewTensor (); +int +main () +{ +return TF_NewTensor (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + ac_cv_lib_libtensorflow_TF_NewTensor=yes +else + ac_cv_lib_libtensorflow_TF_NewTensor=no +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_libtensorflow_TF_NewTensor" >&5 +$as_echo "$ac_cv_lib_libtensorflow_TF_NewTensor" >&6; } +if test "x$ac_cv_lib_libtensorflow_TF_NewTensor" = xyes; then : + cat >>confdefs.h <<_ACEOF +#define HAVE_LIBLIBTENSORFLOW 1 +_ACEOF + + LIBS="-llibtensorflow $LIBS" + +fi + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for pthread_exit in -lpthread" >&5 +$as_echo_n "checking for pthread_exit in -lpthread... " >&6; } +if ${ac_cv_lib_pthread_pthread_exit+:} false; then : + $as_echo_n "(cached) " >&6 +else + ac_check_lib_save_LIBS=$LIBS +LIBS="-lpthread $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +#ifdef __cplusplus +extern "C" +#endif +char pthread_exit (); +int +main () +{ +return pthread_exit (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + ac_cv_lib_pthread_pthread_exit=yes +else + ac_cv_lib_pthread_pthread_exit=no +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_pthread_pthread_exit" >&5 +$as_echo "$ac_cv_lib_pthread_pthread_exit" >&6; } +if test "x$ac_cv_lib_pthread_pthread_exit" = xyes; then : + cat >>confdefs.h <<_ACEOF +#define HAVE_LIBPTHREAD 1 +_ACEOF + + LIBS="-lpthread $LIBS" + +fi + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for train in -llinear" >&5 +$as_echo_n "checking for train in -llinear... " >&6; } +if ${ac_cv_lib_linear_train+:} false; then : + $as_echo_n "(cached) " >&6 +else + ac_check_lib_save_LIBS=$LIBS +LIBS="-llinear $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +#ifdef __cplusplus +extern "C" +#endif +char train (); +int +main () +{ +return train (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + ac_cv_lib_linear_train=yes +else + ac_cv_lib_linear_train=no +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_linear_train" >&5 +$as_echo "$ac_cv_lib_linear_train" >&6; } +if test "x$ac_cv_lib_linear_train" = xyes; then : + cat >>confdefs.h <<_ACEOF +#define HAVE_LIBLINEAR 1 +_ACEOF + + LIBS="-llinear $LIBS" + +fi + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for svm_train in -lsvm" >&5 +$as_echo_n "checking for svm_train in -lsvm... " >&6; } +if ${ac_cv_lib_svm_svm_train+:} false; then : + $as_echo_n "(cached) " >&6 +else + ac_check_lib_save_LIBS=$LIBS +LIBS="-lsvm $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +#ifdef __cplusplus +extern "C" +#endif +char svm_train (); +int +main () +{ +return svm_train (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + ac_cv_lib_svm_svm_train=yes +else + ac_cv_lib_svm_svm_train=no +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_svm_svm_train" >&5 +$as_echo "$ac_cv_lib_svm_svm_train" >&6; } +if test "x$ac_cv_lib_svm_svm_train" = xyes; then : + cat >>confdefs.h <<_ACEOF +#define HAVE_LIBSVM 1 +_ACEOF + + LIBS="-lsvm $LIBS" + +fi + + + +# Checks for header files. +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking how to run the C preprocessor" >&5 +$as_echo_n "checking how to run the C preprocessor... " >&6; } +# On Suns, sometimes $CPP names a directory. +if test -n "$CPP" && test -d "$CPP"; then + CPP= +fi +if test -z "$CPP"; then + if ${ac_cv_prog_CPP+:} false; then : + $as_echo_n "(cached) " >&6 +else + # Double quotes because CPP needs to be expanded + for CPP in "$CC -E" "$CC -E -traditional-cpp" "/lib/cpp" + do + ac_preproc_ok=false +for ac_c_preproc_warn_flag in '' yes +do + # Use a header file that comes with gcc, so configuring glibc + # with a fresh cross-compiler works. + # Prefer to if __STDC__ is defined, since + # exists even on freestanding compilers. + # On the NeXT, cc -E runs the code through the compiler's parser, + # not just through cpp. "Syntax error" is here to catch this case. + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#ifdef __STDC__ +# include +#else +# include +#endif + Syntax error +_ACEOF +if ac_fn_c_try_cpp "$LINENO"; then : + +else + # Broken: fails on valid input. +continue +fi +rm -f conftest.err conftest.i conftest.$ac_ext + + # OK, works on sane cases. Now check whether nonexistent headers + # can be detected and how. + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include +_ACEOF +if ac_fn_c_try_cpp "$LINENO"; then : + # Broken: success on invalid input. +continue +else + # Passes both tests. +ac_preproc_ok=: +break +fi +rm -f conftest.err conftest.i conftest.$ac_ext + +done +# Because of `break', _AC_PREPROC_IFELSE's cleaning code was skipped. +rm -f conftest.i conftest.err conftest.$ac_ext +if $ac_preproc_ok; then : + break +fi + + done + ac_cv_prog_CPP=$CPP + +fi + CPP=$ac_cv_prog_CPP +else + ac_cv_prog_CPP=$CPP +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $CPP" >&5 +$as_echo "$CPP" >&6; } +ac_preproc_ok=false +for ac_c_preproc_warn_flag in '' yes +do + # Use a header file that comes with gcc, so configuring glibc + # with a fresh cross-compiler works. + # Prefer to if __STDC__ is defined, since + # exists even on freestanding compilers. + # On the NeXT, cc -E runs the code through the compiler's parser, + # not just through cpp. "Syntax error" is here to catch this case. + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#ifdef __STDC__ +# include +#else +# include +#endif + Syntax error +_ACEOF +if ac_fn_c_try_cpp "$LINENO"; then : + +else + # Broken: fails on valid input. +continue +fi +rm -f conftest.err conftest.i conftest.$ac_ext + + # OK, works on sane cases. Now check whether nonexistent headers + # can be detected and how. + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include +_ACEOF +if ac_fn_c_try_cpp "$LINENO"; then : + # Broken: success on invalid input. +continue +else + # Passes both tests. +ac_preproc_ok=: +break +fi +rm -f conftest.err conftest.i conftest.$ac_ext + +done +# Because of `break', _AC_PREPROC_IFELSE's cleaning code was skipped. +rm -f conftest.i conftest.err conftest.$ac_ext +if $ac_preproc_ok; then : + +else + { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} +as_fn_error $? "C preprocessor \"$CPP\" fails sanity check +See \`config.log' for more details" "$LINENO" 5; } +fi + +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for grep that handles long lines and -e" >&5 +$as_echo_n "checking for grep that handles long lines and -e... " >&6; } +if ${ac_cv_path_GREP+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test -z "$GREP"; then + ac_path_GREP_found=false + # Loop through the user's path and test for each of PROGNAME-LIST + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH$PATH_SEPARATOR/usr/xpg4/bin +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_prog in grep ggrep; do + for ac_exec_ext in '' $ac_executable_extensions; do + ac_path_GREP="$as_dir/$ac_prog$ac_exec_ext" + as_fn_executable_p "$ac_path_GREP" || continue +# Check for GNU ac_path_GREP and select it if it is found. + # Check for GNU $ac_path_GREP +case `"$ac_path_GREP" --version 2>&1` in +*GNU*) + ac_cv_path_GREP="$ac_path_GREP" ac_path_GREP_found=:;; +*) + ac_count=0 + $as_echo_n 0123456789 >"conftest.in" + while : + do + cat "conftest.in" "conftest.in" >"conftest.tmp" + mv "conftest.tmp" "conftest.in" + cp "conftest.in" "conftest.nl" + $as_echo 'GREP' >> "conftest.nl" + "$ac_path_GREP" -e 'GREP$' -e '-(cannot match)-' < "conftest.nl" >"conftest.out" 2>/dev/null || break + diff "conftest.out" "conftest.nl" >/dev/null 2>&1 || break + as_fn_arith $ac_count + 1 && ac_count=$as_val + if test $ac_count -gt ${ac_path_GREP_max-0}; then + # Best one so far, save it but keep looking for a better one + ac_cv_path_GREP="$ac_path_GREP" + ac_path_GREP_max=$ac_count + fi + # 10*(2^10) chars as input seems more than enough + test $ac_count -gt 10 && break + done + rm -f conftest.in conftest.tmp conftest.nl conftest.out;; +esac + + $ac_path_GREP_found && break 3 + done + done + done +IFS=$as_save_IFS + if test -z "$ac_cv_path_GREP"; then + as_fn_error $? "no acceptable grep could be found in $PATH$PATH_SEPARATOR/usr/xpg4/bin" "$LINENO" 5 + fi +else + ac_cv_path_GREP=$GREP +fi + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_path_GREP" >&5 +$as_echo "$ac_cv_path_GREP" >&6; } + GREP="$ac_cv_path_GREP" + + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for egrep" >&5 +$as_echo_n "checking for egrep... " >&6; } +if ${ac_cv_path_EGREP+:} false; then : + $as_echo_n "(cached) " >&6 +else + if echo a | $GREP -E '(a|b)' >/dev/null 2>&1 + then ac_cv_path_EGREP="$GREP -E" + else + if test -z "$EGREP"; then + ac_path_EGREP_found=false + # Loop through the user's path and test for each of PROGNAME-LIST + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH$PATH_SEPARATOR/usr/xpg4/bin +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_prog in egrep; do + for ac_exec_ext in '' $ac_executable_extensions; do + ac_path_EGREP="$as_dir/$ac_prog$ac_exec_ext" + as_fn_executable_p "$ac_path_EGREP" || continue +# Check for GNU ac_path_EGREP and select it if it is found. + # Check for GNU $ac_path_EGREP +case `"$ac_path_EGREP" --version 2>&1` in +*GNU*) + ac_cv_path_EGREP="$ac_path_EGREP" ac_path_EGREP_found=:;; +*) + ac_count=0 + $as_echo_n 0123456789 >"conftest.in" + while : + do + cat "conftest.in" "conftest.in" >"conftest.tmp" + mv "conftest.tmp" "conftest.in" + cp "conftest.in" "conftest.nl" + $as_echo 'EGREP' >> "conftest.nl" + "$ac_path_EGREP" 'EGREP$' < "conftest.nl" >"conftest.out" 2>/dev/null || break + diff "conftest.out" "conftest.nl" >/dev/null 2>&1 || break + as_fn_arith $ac_count + 1 && ac_count=$as_val + if test $ac_count -gt ${ac_path_EGREP_max-0}; then + # Best one so far, save it but keep looking for a better one + ac_cv_path_EGREP="$ac_path_EGREP" + ac_path_EGREP_max=$ac_count + fi + # 10*(2^10) chars as input seems more than enough + test $ac_count -gt 10 && break + done + rm -f conftest.in conftest.tmp conftest.nl conftest.out;; +esac + + $ac_path_EGREP_found && break 3 + done + done + done +IFS=$as_save_IFS + if test -z "$ac_cv_path_EGREP"; then + as_fn_error $? "no acceptable egrep could be found in $PATH$PATH_SEPARATOR/usr/xpg4/bin" "$LINENO" 5 + fi +else + ac_cv_path_EGREP=$EGREP +fi + + fi +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_path_EGREP" >&5 +$as_echo "$ac_cv_path_EGREP" >&6; } + EGREP="$ac_cv_path_EGREP" + + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for ANSI C header files" >&5 +$as_echo_n "checking for ANSI C header files... " >&6; } +if ${ac_cv_header_stdc+:} false; then : + $as_echo_n "(cached) " >&6 +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include +#include +#include +#include + +int +main () +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + ac_cv_header_stdc=yes +else + ac_cv_header_stdc=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext + +if test $ac_cv_header_stdc = yes; then + # SunOS 4.x string.h does not declare mem*, contrary to ANSI. + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include + +_ACEOF +if (eval "$ac_cpp conftest.$ac_ext") 2>&5 | + $EGREP "memchr" >/dev/null 2>&1; then : + +else + ac_cv_header_stdc=no +fi +rm -f conftest* + +fi + +if test $ac_cv_header_stdc = yes; then + # ISC 2.0.2 stdlib.h does not declare free, contrary to ANSI. + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include + +_ACEOF +if (eval "$ac_cpp conftest.$ac_ext") 2>&5 | + $EGREP "free" >/dev/null 2>&1; then : + +else + ac_cv_header_stdc=no +fi +rm -f conftest* + +fi + +if test $ac_cv_header_stdc = yes; then + # /bin/cc in Irix-4.0.5 gets non-ANSI ctype macros unless using -ansi. + if test "$cross_compiling" = yes; then : + : +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include +#include +#if ((' ' & 0x0FF) == 0x020) +# define ISLOWER(c) ('a' <= (c) && (c) <= 'z') +# define TOUPPER(c) (ISLOWER(c) ? 'A' + ((c) - 'a') : (c)) +#else +# define ISLOWER(c) \ + (('a' <= (c) && (c) <= 'i') \ + || ('j' <= (c) && (c) <= 'r') \ + || ('s' <= (c) && (c) <= 'z')) +# define TOUPPER(c) (ISLOWER(c) ? ((c) | 0x40) : (c)) +#endif + +#define XOR(e, f) (((e) && !(f)) || (!(e) && (f))) +int +main () +{ + int i; + for (i = 0; i < 256; i++) + if (XOR (islower (i), ISLOWER (i)) + || toupper (i) != TOUPPER (i)) + return 2; + return 0; +} +_ACEOF +if ac_fn_c_try_run "$LINENO"; then : + +else + ac_cv_header_stdc=no +fi +rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \ + conftest.$ac_objext conftest.beam conftest.$ac_ext +fi + +fi +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_header_stdc" >&5 +$as_echo "$ac_cv_header_stdc" >&6; } +if test $ac_cv_header_stdc = yes; then + +$as_echo "#define STDC_HEADERS 1" >>confdefs.h + +fi + +# On IRIX 5.3, sys/types and inttypes.h are conflicting. +for ac_header in sys/types.h sys/stat.h stdlib.h string.h memory.h strings.h \ + inttypes.h stdint.h unistd.h +do : + as_ac_Header=`$as_echo "ac_cv_header_$ac_header" | $as_tr_sh` +ac_fn_c_check_header_compile "$LINENO" "$ac_header" "$as_ac_Header" "$ac_includes_default +" +if eval test \"x\$"$as_ac_Header"\" = x"yes"; then : + cat >>confdefs.h <<_ACEOF +#define `$as_echo "HAVE_$ac_header" | $as_tr_cpp` 1 +_ACEOF + +fi + +done + + +for ac_header in float.h limits.h locale.h stdint.h stdlib.h string.h sys/param.h +do : + as_ac_Header=`$as_echo "ac_cv_header_$ac_header" | $as_tr_sh` +ac_fn_c_check_header_mongrel "$LINENO" "$ac_header" "$as_ac_Header" "$ac_includes_default" +if eval test \"x\$"$as_ac_Header"\" = x"yes"; then : + cat >>confdefs.h <<_ACEOF +#define `$as_echo "HAVE_$ac_header" | $as_tr_cpp` 1 +_ACEOF + +fi + +done + +for ac_header in libsvm/svm.h +do : + ac_fn_c_check_header_mongrel "$LINENO" "libsvm/svm.h" "ac_cv_header_libsvm_svm_h" "$ac_includes_default" +if test "x$ac_cv_header_libsvm_svm_h" = xyes; then : + cat >>confdefs.h <<_ACEOF +#define HAVE_LIBSVM_SVM_H 1 +_ACEOF + +$as_echo "#define HAVE_EXTERNAL_LIBSVM 1" >>confdefs.h + + HAVE_EXTERNAL_LIBSVM="1" + + +fi + +done + +for ac_header in linear.h +do : + ac_fn_c_check_header_mongrel "$LINENO" "linear.h" "ac_cv_header_linear_h" "$ac_includes_default" +if test "x$ac_cv_header_linear_h" = xyes; then : + cat >>confdefs.h <<_ACEOF +#define HAVE_LINEAR_H 1 +_ACEOF + +$as_echo "#define HAVE_EXTERNAL_LIBLINEAR 1" >>confdefs.h + + HAVE_EXTERNAL_LIBLINEAR="1" + + +fi + +done + +# disable use of external libsvm and liblinear +HAVE_EXTERNAL_LIBSVM="" + +HAVE_EXTERNAL_LIBLINEAR="" + + +# Checks for typedefs, structures, and compiler characteristics. +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for stdbool.h that conforms to C99" >&5 +$as_echo_n "checking for stdbool.h that conforms to C99... " >&6; } +if ${ac_cv_header_stdbool_h+:} false; then : + $as_echo_n "(cached) " >&6 +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + + #include + #ifndef bool + "error: bool is not defined" + #endif + #ifndef false + "error: false is not defined" + #endif + #if false + "error: false is not 0" + #endif + #ifndef true + "error: true is not defined" + #endif + #if true != 1 + "error: true is not 1" + #endif + #ifndef __bool_true_false_are_defined + "error: __bool_true_false_are_defined is not defined" + #endif + + struct s { _Bool s: 1; _Bool t; } s; + + char a[true == 1 ? 1 : -1]; + char b[false == 0 ? 1 : -1]; + char c[__bool_true_false_are_defined == 1 ? 1 : -1]; + char d[(bool) 0.5 == true ? 1 : -1]; + /* See body of main program for 'e'. */ + char f[(_Bool) 0.0 == false ? 1 : -1]; + char g[true]; + char h[sizeof (_Bool)]; + char i[sizeof s.t]; + enum { j = false, k = true, l = false * true, m = true * 256 }; + /* The following fails for + HP aC++/ANSI C B3910B A.05.55 [Dec 04 2003]. */ + _Bool n[m]; + char o[sizeof n == m * sizeof n[0] ? 1 : -1]; + char p[-1 - (_Bool) 0 < 0 && -1 - (bool) 0 < 0 ? 1 : -1]; + /* Catch a bug in an HP-UX C compiler. See + http://gcc.gnu.org/ml/gcc-patches/2003-12/msg02303.html + http://lists.gnu.org/archive/html/bug-coreutils/2005-11/msg00161.html + */ + _Bool q = true; + _Bool *pq = &q; + +int +main () +{ + + bool e = &s; + *pq |= q; + *pq |= ! q; + /* Refer to every declared value, to avoid compiler optimizations. */ + return (!a + !b + !c + !d + !e + !f + !g + !h + !i + !!j + !k + !!l + + !m + !n + !o + !p + !q + !pq); + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + ac_cv_header_stdbool_h=yes +else + ac_cv_header_stdbool_h=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_header_stdbool_h" >&5 +$as_echo "$ac_cv_header_stdbool_h" >&6; } + ac_fn_c_check_type "$LINENO" "_Bool" "ac_cv_type__Bool" "$ac_includes_default" +if test "x$ac_cv_type__Bool" = xyes; then : + +cat >>confdefs.h <<_ACEOF +#define HAVE__BOOL 1 +_ACEOF + + +fi + + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for inline" >&5 +$as_echo_n "checking for inline... " >&6; } +if ${ac_cv_c_inline+:} false; then : + $as_echo_n "(cached) " >&6 +else + ac_cv_c_inline=no +for ac_kw in inline __inline__ __inline; do + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#ifndef __cplusplus +typedef int foo_t; +static $ac_kw foo_t static_foo () {return 0; } +$ac_kw foo_t foo () {return 0; } +#endif + +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + ac_cv_c_inline=$ac_kw +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext + test "$ac_cv_c_inline" != no && break +done + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_c_inline" >&5 +$as_echo "$ac_cv_c_inline" >&6; } + +case $ac_cv_c_inline in + inline | yes) ;; + *) + case $ac_cv_c_inline in + no) ac_val=;; + *) ac_val=$ac_cv_c_inline;; + esac + cat >>confdefs.h <<_ACEOF +#ifndef __cplusplus +#define inline $ac_val +#endif +_ACEOF + ;; +esac + +ac_fn_c_find_intX_t "$LINENO" "16" "ac_cv_c_int16_t" +case $ac_cv_c_int16_t in #( + no|yes) ;; #( + *) + +cat >>confdefs.h <<_ACEOF +#define int16_t $ac_cv_c_int16_t +_ACEOF +;; +esac + +ac_fn_c_find_intX_t "$LINENO" "32" "ac_cv_c_int32_t" +case $ac_cv_c_int32_t in #( + no|yes) ;; #( + *) + +cat >>confdefs.h <<_ACEOF +#define int32_t $ac_cv_c_int32_t +_ACEOF +;; +esac + +ac_fn_c_find_intX_t "$LINENO" "64" "ac_cv_c_int64_t" +case $ac_cv_c_int64_t in #( + no|yes) ;; #( + *) + +cat >>confdefs.h <<_ACEOF +#define int64_t $ac_cv_c_int64_t +_ACEOF +;; +esac + +ac_fn_c_find_intX_t "$LINENO" "8" "ac_cv_c_int8_t" +case $ac_cv_c_int8_t in #( + no|yes) ;; #( + *) + +cat >>confdefs.h <<_ACEOF +#define int8_t $ac_cv_c_int8_t +_ACEOF +;; +esac + +ac_fn_c_check_type "$LINENO" "size_t" "ac_cv_type_size_t" "$ac_includes_default" +if test "x$ac_cv_type_size_t" = xyes; then : + +else + +cat >>confdefs.h <<_ACEOF +#define size_t unsigned int +_ACEOF + +fi + +ac_fn_c_find_uintX_t "$LINENO" "16" "ac_cv_c_uint16_t" +case $ac_cv_c_uint16_t in #( + no|yes) ;; #( + *) + + +cat >>confdefs.h <<_ACEOF +#define uint16_t $ac_cv_c_uint16_t +_ACEOF +;; + esac + +ac_fn_c_find_uintX_t "$LINENO" "32" "ac_cv_c_uint32_t" +case $ac_cv_c_uint32_t in #( + no|yes) ;; #( + *) + +$as_echo "#define _UINT32_T 1" >>confdefs.h + + +cat >>confdefs.h <<_ACEOF +#define uint32_t $ac_cv_c_uint32_t +_ACEOF +;; + esac + +ac_fn_c_find_uintX_t "$LINENO" "64" "ac_cv_c_uint64_t" +case $ac_cv_c_uint64_t in #( + no|yes) ;; #( + *) + +$as_echo "#define _UINT64_T 1" >>confdefs.h + + +cat >>confdefs.h <<_ACEOF +#define uint64_t $ac_cv_c_uint64_t +_ACEOF +;; + esac + +ac_fn_c_find_uintX_t "$LINENO" "8" "ac_cv_c_uint8_t" +case $ac_cv_c_uint8_t in #( + no|yes) ;; #( + *) + +$as_echo "#define _UINT8_T 1" >>confdefs.h + + +cat >>confdefs.h <<_ACEOF +#define uint8_t $ac_cv_c_uint8_t +_ACEOF +;; + esac + + +# Checks for library functions. +for ac_header in stdlib.h +do : + ac_fn_c_check_header_mongrel "$LINENO" "stdlib.h" "ac_cv_header_stdlib_h" "$ac_includes_default" +if test "x$ac_cv_header_stdlib_h" = xyes; then : + cat >>confdefs.h <<_ACEOF +#define HAVE_STDLIB_H 1 +_ACEOF + +fi + +done + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for GNU libc compatible malloc" >&5 +$as_echo_n "checking for GNU libc compatible malloc... " >&6; } +if ${ac_cv_func_malloc_0_nonnull+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test "$cross_compiling" = yes; then : + ac_cv_func_malloc_0_nonnull=no +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#if defined STDC_HEADERS || defined HAVE_STDLIB_H +# include +#else +char *malloc (); +#endif + +int +main () +{ +return ! malloc (0); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_run "$LINENO"; then : + ac_cv_func_malloc_0_nonnull=yes +else + ac_cv_func_malloc_0_nonnull=no +fi +rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \ + conftest.$ac_objext conftest.beam conftest.$ac_ext +fi + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_func_malloc_0_nonnull" >&5 +$as_echo "$ac_cv_func_malloc_0_nonnull" >&6; } +if test $ac_cv_func_malloc_0_nonnull = yes; then : + +$as_echo "#define HAVE_MALLOC 1" >>confdefs.h + +else + $as_echo "#define HAVE_MALLOC 0" >>confdefs.h + + case " $LIBOBJS " in + *" malloc.$ac_objext "* ) ;; + *) LIBOBJS="$LIBOBJS malloc.$ac_objext" + ;; +esac + + +$as_echo "#define malloc rpl_malloc" >>confdefs.h + +fi + + +for ac_header in stdlib.h +do : + ac_fn_c_check_header_mongrel "$LINENO" "stdlib.h" "ac_cv_header_stdlib_h" "$ac_includes_default" +if test "x$ac_cv_header_stdlib_h" = xyes; then : + cat >>confdefs.h <<_ACEOF +#define HAVE_STDLIB_H 1 +_ACEOF + +fi + +done + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for GNU libc compatible realloc" >&5 +$as_echo_n "checking for GNU libc compatible realloc... " >&6; } +if ${ac_cv_func_realloc_0_nonnull+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test "$cross_compiling" = yes; then : + ac_cv_func_realloc_0_nonnull=no +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#if defined STDC_HEADERS || defined HAVE_STDLIB_H +# include +#else +char *realloc (); +#endif + +int +main () +{ +return ! realloc (0, 0); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_run "$LINENO"; then : + ac_cv_func_realloc_0_nonnull=yes +else + ac_cv_func_realloc_0_nonnull=no +fi +rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \ + conftest.$ac_objext conftest.beam conftest.$ac_ext +fi + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_func_realloc_0_nonnull" >&5 +$as_echo "$ac_cv_func_realloc_0_nonnull" >&6; } +if test $ac_cv_func_realloc_0_nonnull = yes; then : + +$as_echo "#define HAVE_REALLOC 1" >>confdefs.h + +else + $as_echo "#define HAVE_REALLOC 0" >>confdefs.h + + case " $LIBOBJS " in + *" realloc.$ac_objext "* ) ;; + *) LIBOBJS="$LIBOBJS realloc.$ac_objext" + ;; +esac + + +$as_echo "#define realloc rpl_realloc" >>confdefs.h + +fi + + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for working strtod" >&5 +$as_echo_n "checking for working strtod... " >&6; } +if ${ac_cv_func_strtod+:} false; then : + $as_echo_n "(cached) " >&6 +else + if test "$cross_compiling" = yes; then : + ac_cv_func_strtod=no +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +$ac_includes_default +#ifndef strtod +double strtod (); +#endif +int +main() +{ + { + /* Some versions of Linux strtod mis-parse strings with leading '+'. */ + char *string = " +69"; + char *term; + double value; + value = strtod (string, &term); + if (value != 69 || term != (string + 4)) + return 1; + } + + { + /* Under Solaris 2.4, strtod returns the wrong value for the + terminating character under some conditions. */ + char *string = "NaN"; + char *term; + strtod (string, &term); + if (term != string && *(term - 1) == 0) + return 1; + } + return 0; +} + +_ACEOF +if ac_fn_c_try_run "$LINENO"; then : + ac_cv_func_strtod=yes +else + ac_cv_func_strtod=no +fi +rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \ + conftest.$ac_objext conftest.beam conftest.$ac_ext +fi + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_func_strtod" >&5 +$as_echo "$ac_cv_func_strtod" >&6; } +if test $ac_cv_func_strtod = no; then + case " $LIBOBJS " in + *" strtod.$ac_objext "* ) ;; + *) LIBOBJS="$LIBOBJS strtod.$ac_objext" + ;; +esac + +ac_fn_c_check_func "$LINENO" "pow" "ac_cv_func_pow" +if test "x$ac_cv_func_pow" = xyes; then : + +fi + +if test $ac_cv_func_pow = no; then + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for pow in -lm" >&5 +$as_echo_n "checking for pow in -lm... " >&6; } +if ${ac_cv_lib_m_pow+:} false; then : + $as_echo_n "(cached) " >&6 +else + ac_check_lib_save_LIBS=$LIBS +LIBS="-lm $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +#ifdef __cplusplus +extern "C" +#endif +char pow (); +int +main () +{ +return pow (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + ac_cv_lib_m_pow=yes +else + ac_cv_lib_m_pow=no +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_m_pow" >&5 +$as_echo "$ac_cv_lib_m_pow" >&6; } +if test "x$ac_cv_lib_m_pow" = xyes; then : + POW_LIB=-lm +else + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: cannot find library containing definition of pow" >&5 +$as_echo "$as_me: WARNING: cannot find library containing definition of pow" >&2;} +fi + +fi + +fi + +for ac_func in floor memset mkdir setlocale sqrt strchr strcspn strdup strncasecmp strrchr strtol strtoul +do : + as_ac_var=`$as_echo "ac_cv_func_$ac_func" | $as_tr_sh` +ac_fn_c_check_func "$LINENO" "$ac_func" "$as_ac_var" +if eval test \"x\$"$as_ac_var"\" = x"yes"; then : + cat >>confdefs.h <<_ACEOF +#define `$as_echo "HAVE_$ac_func" | $as_tr_cpp` 1 +_ACEOF + +fi +done + + +ac_config_files="$ac_config_files Makefile" + +cat >confcache <<\_ACEOF +# This file is a shell script that caches the results of configure +# tests run on this system so they can be shared between configure +# scripts and configure runs, see configure's option --config-cache. +# It is not useful on other systems. If it contains results you don't +# want to keep, you may remove or edit it. +# +# config.status only pays attention to the cache file if you give it +# the --recheck option to rerun configure. +# +# `ac_cv_env_foo' variables (set or unset) will be overridden when +# loading this file, other *unset* `ac_cv_foo' will be assigned the +# following values. + +_ACEOF + +# The following way of writing the cache mishandles newlines in values, +# but we know of no workaround that is simple, portable, and efficient. +# So, we kill variables containing newlines. +# Ultrix sh set writes to stderr and can't be redirected directly, +# and sets the high bit in the cache file unless we assign to the vars. +( + for ac_var in `(set) 2>&1 | sed -n 's/^\([a-zA-Z_][a-zA-Z0-9_]*\)=.*/\1/p'`; do + eval ac_val=\$$ac_var + case $ac_val in #( + *${as_nl}*) + case $ac_var in #( + *_cv_*) { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: cache variable $ac_var contains a newline" >&5 +$as_echo "$as_me: WARNING: cache variable $ac_var contains a newline" >&2;} ;; + esac + case $ac_var in #( + _ | IFS | as_nl) ;; #( + BASH_ARGV | BASH_SOURCE) eval $ac_var= ;; #( + *) { eval $ac_var=; unset $ac_var;} ;; + esac ;; + esac + done + + (set) 2>&1 | + case $as_nl`(ac_space=' '; set) 2>&1` in #( + *${as_nl}ac_space=\ *) + # `set' does not quote correctly, so add quotes: double-quote + # substitution turns \\\\ into \\, and sed turns \\ into \. + sed -n \ + "s/'/'\\\\''/g; + s/^\\([_$as_cr_alnum]*_cv_[_$as_cr_alnum]*\\)=\\(.*\\)/\\1='\\2'/p" + ;; #( + *) + # `set' quotes correctly as required by POSIX, so do not add quotes. + sed -n "/^[_$as_cr_alnum]*_cv_[_$as_cr_alnum]*=/p" + ;; + esac | + sort +) | + sed ' + /^ac_cv_env_/b end + t clear + :clear + s/^\([^=]*\)=\(.*[{}].*\)$/test "${\1+set}" = set || &/ + t end + s/^\([^=]*\)=\(.*\)$/\1=${\1=\2}/ + :end' >>confcache +if diff "$cache_file" confcache >/dev/null 2>&1; then :; else + if test -w "$cache_file"; then + if test "x$cache_file" != "x/dev/null"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: updating cache $cache_file" >&5 +$as_echo "$as_me: updating cache $cache_file" >&6;} + if test ! -f "$cache_file" || test -h "$cache_file"; then + cat confcache >"$cache_file" + else + case $cache_file in #( + */* | ?:*) + mv -f confcache "$cache_file"$$ && + mv -f "$cache_file"$$ "$cache_file" ;; #( + *) + mv -f confcache "$cache_file" ;; + esac + fi + fi + else + { $as_echo "$as_me:${as_lineno-$LINENO}: not updating unwritable cache $cache_file" >&5 +$as_echo "$as_me: not updating unwritable cache $cache_file" >&6;} + fi +fi +rm -f confcache + +test "x$prefix" = xNONE && prefix=$ac_default_prefix +# Let make expand exec_prefix. +test "x$exec_prefix" = xNONE && exec_prefix='${prefix}' + +DEFS=-DHAVE_CONFIG_H + +ac_libobjs= +ac_ltlibobjs= +U= +for ac_i in : $LIBOBJS; do test "x$ac_i" = x: && continue + # 1. Remove the extension, and $U if already installed. + ac_script='s/\$U\././;s/\.o$//;s/\.obj$//' + ac_i=`$as_echo "$ac_i" | sed "$ac_script"` + # 2. Prepend LIBOBJDIR. When used with automake>=1.10 LIBOBJDIR + # will be set to the directory where LIBOBJS objects are built. + as_fn_append ac_libobjs " \${LIBOBJDIR}$ac_i\$U.$ac_objext" + as_fn_append ac_ltlibobjs " \${LIBOBJDIR}$ac_i"'$U.lo' +done +LIBOBJS=$ac_libobjs + +LTLIBOBJS=$ac_ltlibobjs + + + +: "${CONFIG_STATUS=./config.status}" +ac_write_fail=0 +ac_clean_files_save=$ac_clean_files +ac_clean_files="$ac_clean_files $CONFIG_STATUS" +{ $as_echo "$as_me:${as_lineno-$LINENO}: creating $CONFIG_STATUS" >&5 +$as_echo "$as_me: creating $CONFIG_STATUS" >&6;} +as_write_fail=0 +cat >$CONFIG_STATUS <<_ASEOF || as_write_fail=1 +#! $SHELL +# Generated by $as_me. +# Run this file to recreate the current configuration. +# Compiler output produced by configure, useful for debugging +# configure, is in config.log if it exists. + +debug=false +ac_cs_recheck=false +ac_cs_silent=false + +SHELL=\${CONFIG_SHELL-$SHELL} +export SHELL +_ASEOF +cat >>$CONFIG_STATUS <<\_ASEOF || as_write_fail=1 +## -------------------- ## +## M4sh Initialization. ## +## -------------------- ## + +# Be more Bourne compatible +DUALCASE=1; export DUALCASE # for MKS sh +if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then : + emulate sh + NULLCMD=: + # Pre-4.2 versions of Zsh do word splitting on ${1+"$@"}, which + # is contrary to our usage. Disable this feature. + alias -g '${1+"$@"}'='"$@"' + setopt NO_GLOB_SUBST +else + case `(set -o) 2>/dev/null` in #( + *posix*) : + set -o posix ;; #( + *) : + ;; +esac +fi + + +as_nl=' +' +export as_nl +# Printing a long string crashes Solaris 7 /usr/bin/printf. +as_echo='\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\' +as_echo=$as_echo$as_echo$as_echo$as_echo$as_echo +as_echo=$as_echo$as_echo$as_echo$as_echo$as_echo$as_echo +# Prefer a ksh shell builtin over an external printf program on Solaris, +# but without wasting forks for bash or zsh. +if test -z "$BASH_VERSION$ZSH_VERSION" \ + && (test "X`print -r -- $as_echo`" = "X$as_echo") 2>/dev/null; then + as_echo='print -r --' + as_echo_n='print -rn --' +elif (test "X`printf %s $as_echo`" = "X$as_echo") 2>/dev/null; then + as_echo='printf %s\n' + as_echo_n='printf %s' +else + if test "X`(/usr/ucb/echo -n -n $as_echo) 2>/dev/null`" = "X-n $as_echo"; then + as_echo_body='eval /usr/ucb/echo -n "$1$as_nl"' + as_echo_n='/usr/ucb/echo -n' + else + as_echo_body='eval expr "X$1" : "X\\(.*\\)"' + as_echo_n_body='eval + arg=$1; + case $arg in #( + *"$as_nl"*) + expr "X$arg" : "X\\(.*\\)$as_nl"; + arg=`expr "X$arg" : ".*$as_nl\\(.*\\)"`;; + esac; + expr "X$arg" : "X\\(.*\\)" | tr -d "$as_nl" + ' + export as_echo_n_body + as_echo_n='sh -c $as_echo_n_body as_echo' + fi + export as_echo_body + as_echo='sh -c $as_echo_body as_echo' +fi + +# The user is always right. +if test "${PATH_SEPARATOR+set}" != set; then + PATH_SEPARATOR=: + (PATH='/bin;/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 && { + (PATH='/bin:/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 || + PATH_SEPARATOR=';' + } +fi + + +# IFS +# We need space, tab and new line, in precisely that order. Quoting is +# there to prevent editors from complaining about space-tab. +# (If _AS_PATH_WALK were called with IFS unset, it would disable word +# splitting by setting IFS to empty value.) +IFS=" "" $as_nl" + +# Find who we are. Look in the path if we contain no directory separator. +as_myself= +case $0 in #(( + *[\\/]* ) as_myself=$0 ;; + *) as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + test -r "$as_dir/$0" && as_myself=$as_dir/$0 && break + done +IFS=$as_save_IFS + + ;; +esac +# We did not find ourselves, most probably we were run as `sh COMMAND' +# in which case we are not to be found in the path. +if test "x$as_myself" = x; then + as_myself=$0 +fi +if test ! -f "$as_myself"; then + $as_echo "$as_myself: error: cannot find myself; rerun with an absolute file name" >&2 + exit 1 +fi + +# Unset variables that we do not need and which cause bugs (e.g. in +# pre-3.0 UWIN ksh). But do not cause bugs in bash 2.01; the "|| exit 1" +# suppresses any "Segmentation fault" message there. '((' could +# trigger a bug in pdksh 5.2.14. +for as_var in BASH_ENV ENV MAIL MAILPATH +do eval test x\${$as_var+set} = xset \ + && ( (unset $as_var) || exit 1) >/dev/null 2>&1 && unset $as_var || : +done +PS1='$ ' +PS2='> ' +PS4='+ ' + +# NLS nuisances. +LC_ALL=C +export LC_ALL +LANGUAGE=C +export LANGUAGE + +# CDPATH. +(unset CDPATH) >/dev/null 2>&1 && unset CDPATH + + +# as_fn_error STATUS ERROR [LINENO LOG_FD] +# ---------------------------------------- +# Output "`basename $0`: error: ERROR" to stderr. If LINENO and LOG_FD are +# provided, also output the error to LOG_FD, referencing LINENO. Then exit the +# script with STATUS, using 1 if that was 0. +as_fn_error () +{ + as_status=$1; test $as_status -eq 0 && as_status=1 + if test "$4"; then + as_lineno=${as_lineno-"$3"} as_lineno_stack=as_lineno_stack=$as_lineno_stack + $as_echo "$as_me:${as_lineno-$LINENO}: error: $2" >&$4 + fi + $as_echo "$as_me: error: $2" >&2 + as_fn_exit $as_status +} # as_fn_error + + +# as_fn_set_status STATUS +# ----------------------- +# Set $? to STATUS, without forking. +as_fn_set_status () +{ + return $1 +} # as_fn_set_status + +# as_fn_exit STATUS +# ----------------- +# Exit the shell with STATUS, even in a "trap 0" or "set -e" context. +as_fn_exit () +{ + set +e + as_fn_set_status $1 + exit $1 +} # as_fn_exit + +# as_fn_unset VAR +# --------------- +# Portably unset VAR. +as_fn_unset () +{ + { eval $1=; unset $1;} +} +as_unset=as_fn_unset +# as_fn_append VAR VALUE +# ---------------------- +# Append the text in VALUE to the end of the definition contained in VAR. Take +# advantage of any shell optimizations that allow amortized linear growth over +# repeated appends, instead of the typical quadratic growth present in naive +# implementations. +if (eval "as_var=1; as_var+=2; test x\$as_var = x12") 2>/dev/null; then : + eval 'as_fn_append () + { + eval $1+=\$2 + }' +else + as_fn_append () + { + eval $1=\$$1\$2 + } +fi # as_fn_append + +# as_fn_arith ARG... +# ------------------ +# Perform arithmetic evaluation on the ARGs, and store the result in the +# global $as_val. Take advantage of shells that can avoid forks. The arguments +# must be portable across $(()) and expr. +if (eval "test \$(( 1 + 1 )) = 2") 2>/dev/null; then : + eval 'as_fn_arith () + { + as_val=$(( $* )) + }' +else + as_fn_arith () + { + as_val=`expr "$@" || test $? -eq 1` + } +fi # as_fn_arith + + +if expr a : '\(a\)' >/dev/null 2>&1 && + test "X`expr 00001 : '.*\(...\)'`" = X001; then + as_expr=expr +else + as_expr=false +fi + +if (basename -- /) >/dev/null 2>&1 && test "X`basename -- / 2>&1`" = "X/"; then + as_basename=basename +else + as_basename=false +fi + +if (as_dir=`dirname -- /` && test "X$as_dir" = X/) >/dev/null 2>&1; then + as_dirname=dirname +else + as_dirname=false +fi + +as_me=`$as_basename -- "$0" || +$as_expr X/"$0" : '.*/\([^/][^/]*\)/*$' \| \ + X"$0" : 'X\(//\)$' \| \ + X"$0" : 'X\(/\)' \| . 2>/dev/null || +$as_echo X/"$0" | + sed '/^.*\/\([^/][^/]*\)\/*$/{ + s//\1/ + q + } + /^X\/\(\/\/\)$/{ + s//\1/ + q + } + /^X\/\(\/\).*/{ + s//\1/ + q + } + s/.*/./; q'` + +# Avoid depending upon Character Ranges. +as_cr_letters='abcdefghijklmnopqrstuvwxyz' +as_cr_LETTERS='ABCDEFGHIJKLMNOPQRSTUVWXYZ' +as_cr_Letters=$as_cr_letters$as_cr_LETTERS +as_cr_digits='0123456789' +as_cr_alnum=$as_cr_Letters$as_cr_digits + +ECHO_C= ECHO_N= ECHO_T= +case `echo -n x` in #((((( +-n*) + case `echo 'xy\c'` in + *c*) ECHO_T=' ';; # ECHO_T is single tab character. + xy) ECHO_C='\c';; + *) echo `echo ksh88 bug on AIX 6.1` > /dev/null + ECHO_T=' ';; + esac;; +*) + ECHO_N='-n';; +esac + +rm -f conf$$ conf$$.exe conf$$.file +if test -d conf$$.dir; then + rm -f conf$$.dir/conf$$.file +else + rm -f conf$$.dir + mkdir conf$$.dir 2>/dev/null +fi +if (echo >conf$$.file) 2>/dev/null; then + if ln -s conf$$.file conf$$ 2>/dev/null; then + as_ln_s='ln -s' + # ... but there are two gotchas: + # 1) On MSYS, both `ln -s file dir' and `ln file dir' fail. + # 2) DJGPP < 2.04 has no symlinks; `ln -s' creates a wrapper executable. + # In both cases, we have to default to `cp -pR'. + ln -s conf$$.file conf$$.dir 2>/dev/null && test ! -f conf$$.exe || + as_ln_s='cp -pR' + elif ln conf$$.file conf$$ 2>/dev/null; then + as_ln_s=ln + else + as_ln_s='cp -pR' + fi +else + as_ln_s='cp -pR' +fi +rm -f conf$$ conf$$.exe conf$$.dir/conf$$.file conf$$.file +rmdir conf$$.dir 2>/dev/null + + +# as_fn_mkdir_p +# ------------- +# Create "$as_dir" as a directory, including parents if necessary. +as_fn_mkdir_p () +{ + + case $as_dir in #( + -*) as_dir=./$as_dir;; + esac + test -d "$as_dir" || eval $as_mkdir_p || { + as_dirs= + while :; do + case $as_dir in #( + *\'*) as_qdir=`$as_echo "$as_dir" | sed "s/'/'\\\\\\\\''/g"`;; #'( + *) as_qdir=$as_dir;; + esac + as_dirs="'$as_qdir' $as_dirs" + as_dir=`$as_dirname -- "$as_dir" || +$as_expr X"$as_dir" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ + X"$as_dir" : 'X\(//\)[^/]' \| \ + X"$as_dir" : 'X\(//\)$' \| \ + X"$as_dir" : 'X\(/\)' \| . 2>/dev/null || +$as_echo X"$as_dir" | + sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ + s//\1/ + q + } + /^X\(\/\/\)[^/].*/{ + s//\1/ + q + } + /^X\(\/\/\)$/{ + s//\1/ + q + } + /^X\(\/\).*/{ + s//\1/ + q + } + s/.*/./; q'` + test -d "$as_dir" && break + done + test -z "$as_dirs" || eval "mkdir $as_dirs" + } || test -d "$as_dir" || as_fn_error $? "cannot create directory $as_dir" + + +} # as_fn_mkdir_p +if mkdir -p . 2>/dev/null; then + as_mkdir_p='mkdir -p "$as_dir"' +else + test -d ./-p && rmdir ./-p + as_mkdir_p=false +fi + + +# as_fn_executable_p FILE +# ----------------------- +# Test if FILE is an executable regular file. +as_fn_executable_p () +{ + test -f "$1" && test -x "$1" +} # as_fn_executable_p +as_test_x='test -x' +as_executable_p=as_fn_executable_p + +# Sed expression to map a string onto a valid CPP name. +as_tr_cpp="eval sed 'y%*$as_cr_letters%P$as_cr_LETTERS%;s%[^_$as_cr_alnum]%_%g'" + +# Sed expression to map a string onto a valid variable name. +as_tr_sh="eval sed 'y%*+%pp%;s%[^_$as_cr_alnum]%_%g'" + + +exec 6>&1 +## ----------------------------------- ## +## Main body of $CONFIG_STATUS script. ## +## ----------------------------------- ## +_ASEOF +test $as_write_fail = 0 && chmod +x $CONFIG_STATUS || ac_write_fail=1 + +cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 +# Save the log message, to keep $0 and so on meaningful, and to +# report actual input values of CONFIG_FILES etc. instead of their +# values after options handling. +ac_log=" +This file was extended by nan-toolbox $as_me 3.2.0, which was +generated by GNU Autoconf 2.69. Invocation command line was + + CONFIG_FILES = $CONFIG_FILES + CONFIG_HEADERS = $CONFIG_HEADERS + CONFIG_LINKS = $CONFIG_LINKS + CONFIG_COMMANDS = $CONFIG_COMMANDS + $ $0 $@ + +on `(hostname || uname -n) 2>/dev/null | sed 1q` +" + +_ACEOF + +case $ac_config_files in *" +"*) set x $ac_config_files; shift; ac_config_files=$*;; +esac + +case $ac_config_headers in *" +"*) set x $ac_config_headers; shift; ac_config_headers=$*;; +esac + + +cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 +# Files that config.status was made for. +config_files="$ac_config_files" +config_headers="$ac_config_headers" + +_ACEOF + +cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 +ac_cs_usage="\ +\`$as_me' instantiates files and other configuration actions +from templates according to the current configuration. Unless the files +and actions are specified as TAGs, all are instantiated by default. + +Usage: $0 [OPTION]... [TAG]... + + -h, --help print this help, then exit + -V, --version print version number and configuration settings, then exit + --config print configuration, then exit + -q, --quiet, --silent + do not print progress messages + -d, --debug don't remove temporary files + --recheck update $as_me by reconfiguring in the same conditions + --file=FILE[:TEMPLATE] + instantiate the configuration file FILE + --header=FILE[:TEMPLATE] + instantiate the configuration header FILE + +Configuration files: +$config_files + +Configuration headers: +$config_headers + +Report bugs to ." + +_ACEOF +cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 +ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`" +ac_cs_version="\\ +nan-toolbox config.status 3.2.0 +configured by $0, generated by GNU Autoconf 2.69, + with options \\"\$ac_cs_config\\" + +Copyright (C) 2012 Free Software Foundation, Inc. +This config.status script is free software; the Free Software Foundation +gives unlimited permission to copy, distribute and modify it." + +ac_pwd='$ac_pwd' +srcdir='$srcdir' +test -n "\$AWK" || AWK=awk +_ACEOF + +cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 +# The default lists apply if the user does not specify any file. +ac_need_defaults=: +while test $# != 0 +do + case $1 in + --*=?*) + ac_option=`expr "X$1" : 'X\([^=]*\)='` + ac_optarg=`expr "X$1" : 'X[^=]*=\(.*\)'` + ac_shift=: + ;; + --*=) + ac_option=`expr "X$1" : 'X\([^=]*\)='` + ac_optarg= + ac_shift=: + ;; + *) + ac_option=$1 + ac_optarg=$2 + ac_shift=shift + ;; + esac + + case $ac_option in + # Handling of the options. + -recheck | --recheck | --rechec | --reche | --rech | --rec | --re | --r) + ac_cs_recheck=: ;; + --version | --versio | --versi | --vers | --ver | --ve | --v | -V ) + $as_echo "$ac_cs_version"; exit ;; + --config | --confi | --conf | --con | --co | --c ) + $as_echo "$ac_cs_config"; exit ;; + --debug | --debu | --deb | --de | --d | -d ) + debug=: ;; + --file | --fil | --fi | --f ) + $ac_shift + case $ac_optarg in + *\'*) ac_optarg=`$as_echo "$ac_optarg" | sed "s/'/'\\\\\\\\''/g"` ;; + '') as_fn_error $? "missing file argument" ;; + esac + as_fn_append CONFIG_FILES " '$ac_optarg'" + ac_need_defaults=false;; + --header | --heade | --head | --hea ) + $ac_shift + case $ac_optarg in + *\'*) ac_optarg=`$as_echo "$ac_optarg" | sed "s/'/'\\\\\\\\''/g"` ;; + esac + as_fn_append CONFIG_HEADERS " '$ac_optarg'" + ac_need_defaults=false;; + --he | --h) + # Conflict between --help and --header + as_fn_error $? "ambiguous option: \`$1' +Try \`$0 --help' for more information.";; + --help | --hel | -h ) + $as_echo "$ac_cs_usage"; exit ;; + -q | -quiet | --quiet | --quie | --qui | --qu | --q \ + | -silent | --silent | --silen | --sile | --sil | --si | --s) + ac_cs_silent=: ;; + + # This is an error. + -*) as_fn_error $? "unrecognized option: \`$1' +Try \`$0 --help' for more information." ;; + + *) as_fn_append ac_config_targets " $1" + ac_need_defaults=false ;; + + esac + shift +done + +ac_configure_extra_args= + +if $ac_cs_silent; then + exec 6>/dev/null + ac_configure_extra_args="$ac_configure_extra_args --silent" +fi + +_ACEOF +cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 +if \$ac_cs_recheck; then + set X $SHELL '$0' $ac_configure_args \$ac_configure_extra_args --no-create --no-recursion + shift + \$as_echo "running CONFIG_SHELL=$SHELL \$*" >&6 + CONFIG_SHELL='$SHELL' + export CONFIG_SHELL + exec "\$@" +fi + +_ACEOF +cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 +exec 5>>config.log +{ + echo + sed 'h;s/./-/g;s/^.../## /;s/...$/ ##/;p;x;p;x' <<_ASBOX +## Running $as_me. ## +_ASBOX + $as_echo "$ac_log" +} >&5 + +_ACEOF +cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 +_ACEOF + +cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 + +# Handling of arguments. +for ac_config_target in $ac_config_targets +do + case $ac_config_target in + "config.h") CONFIG_HEADERS="$CONFIG_HEADERS config.h" ;; + "Makefile") CONFIG_FILES="$CONFIG_FILES Makefile" ;; + + *) as_fn_error $? "invalid argument: \`$ac_config_target'" "$LINENO" 5;; + esac +done + + +# If the user did not use the arguments to specify the items to instantiate, +# then the envvar interface is used. Set only those that are not. +# We use the long form for the default assignment because of an extremely +# bizarre bug on SunOS 4.1.3. +if $ac_need_defaults; then + test "${CONFIG_FILES+set}" = set || CONFIG_FILES=$config_files + test "${CONFIG_HEADERS+set}" = set || CONFIG_HEADERS=$config_headers +fi + +# Have a temporary directory for convenience. Make it in the build tree +# simply because there is no reason against having it here, and in addition, +# creating and moving files from /tmp can sometimes cause problems. +# Hook for its removal unless debugging. +# Note that there is a small window in which the directory will not be cleaned: +# after its creation but before its name has been assigned to `$tmp'. +$debug || +{ + tmp= ac_tmp= + trap 'exit_status=$? + : "${ac_tmp:=$tmp}" + { test ! -d "$ac_tmp" || rm -fr "$ac_tmp"; } && exit $exit_status +' 0 + trap 'as_fn_exit 1' 1 2 13 15 +} +# Create a (secure) tmp directory for tmp files. + +{ + tmp=`(umask 077 && mktemp -d "./confXXXXXX") 2>/dev/null` && + test -d "$tmp" +} || +{ + tmp=./conf$$-$RANDOM + (umask 077 && mkdir "$tmp") +} || as_fn_error $? "cannot create a temporary directory in ." "$LINENO" 5 +ac_tmp=$tmp + +# Set up the scripts for CONFIG_FILES section. +# No need to generate them if there are no CONFIG_FILES. +# This happens for instance with `./config.status config.h'. +if test -n "$CONFIG_FILES"; then + + +ac_cr=`echo X | tr X '\015'` +# On cygwin, bash can eat \r inside `` if the user requested igncr. +# But we know of no other shell where ac_cr would be empty at this +# point, so we can use a bashism as a fallback. +if test "x$ac_cr" = x; then + eval ac_cr=\$\'\\r\' +fi +ac_cs_awk_cr=`$AWK 'BEGIN { print "a\rb" }' /dev/null` +if test "$ac_cs_awk_cr" = "a${ac_cr}b"; then + ac_cs_awk_cr='\\r' +else + ac_cs_awk_cr=$ac_cr +fi + +echo 'BEGIN {' >"$ac_tmp/subs1.awk" && +_ACEOF + + +{ + echo "cat >conf$$subs.awk <<_ACEOF" && + echo "$ac_subst_vars" | sed 's/.*/&!$&$ac_delim/' && + echo "_ACEOF" +} >conf$$subs.sh || + as_fn_error $? "could not make $CONFIG_STATUS" "$LINENO" 5 +ac_delim_num=`echo "$ac_subst_vars" | grep -c '^'` +ac_delim='%!_!# ' +for ac_last_try in false false false false false :; do + . ./conf$$subs.sh || + as_fn_error $? "could not make $CONFIG_STATUS" "$LINENO" 5 + + ac_delim_n=`sed -n "s/.*$ac_delim\$/X/p" conf$$subs.awk | grep -c X` + if test $ac_delim_n = $ac_delim_num; then + break + elif $ac_last_try; then + as_fn_error $? "could not make $CONFIG_STATUS" "$LINENO" 5 + else + ac_delim="$ac_delim!$ac_delim _$ac_delim!! " + fi +done +rm -f conf$$subs.sh + +cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 +cat >>"\$ac_tmp/subs1.awk" <<\\_ACAWK && +_ACEOF +sed -n ' +h +s/^/S["/; s/!.*/"]=/ +p +g +s/^[^!]*!// +:repl +t repl +s/'"$ac_delim"'$// +t delim +:nl +h +s/\(.\{148\}\)..*/\1/ +t more1 +s/["\\]/\\&/g; s/^/"/; s/$/\\n"\\/ +p +n +b repl +:more1 +s/["\\]/\\&/g; s/^/"/; s/$/"\\/ +p +g +s/.\{148\}// +t nl +:delim +h +s/\(.\{148\}\)..*/\1/ +t more2 +s/["\\]/\\&/g; s/^/"/; s/$/"/ +p +b +:more2 +s/["\\]/\\&/g; s/^/"/; s/$/"\\/ +p +g +s/.\{148\}// +t delim +' >$CONFIG_STATUS || ac_write_fail=1 +rm -f conf$$subs.awk +cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 +_ACAWK +cat >>"\$ac_tmp/subs1.awk" <<_ACAWK && + for (key in S) S_is_set[key] = 1 + FS = "" + +} +{ + line = $ 0 + nfields = split(line, field, "@") + substed = 0 + len = length(field[1]) + for (i = 2; i < nfields; i++) { + key = field[i] + keylen = length(key) + if (S_is_set[key]) { + value = S[key] + line = substr(line, 1, len) "" value "" substr(line, len + keylen + 3) + len += length(value) + length(field[++i]) + substed = 1 + } else + len += 1 + keylen + } + + print line +} + +_ACAWK +_ACEOF +cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 +if sed "s/$ac_cr//" < /dev/null > /dev/null 2>&1; then + sed "s/$ac_cr\$//; s/$ac_cr/$ac_cs_awk_cr/g" +else + cat +fi < "$ac_tmp/subs1.awk" > "$ac_tmp/subs.awk" \ + || as_fn_error $? "could not setup config files machinery" "$LINENO" 5 +_ACEOF + +# VPATH may cause trouble with some makes, so we remove sole $(srcdir), +# ${srcdir} and @srcdir@ entries from VPATH if srcdir is ".", strip leading and +# trailing colons and then remove the whole line if VPATH becomes empty +# (actually we leave an empty line to preserve line numbers). +if test "x$srcdir" = x.; then + ac_vpsub='/^[ ]*VPATH[ ]*=[ ]*/{ +h +s/// +s/^/:/ +s/[ ]*$/:/ +s/:\$(srcdir):/:/g +s/:\${srcdir}:/:/g +s/:@srcdir@:/:/g +s/^:*// +s/:*$// +x +s/\(=[ ]*\).*/\1/ +G +s/\n// +s/^[^=]*=[ ]*$// +}' +fi + +cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 +fi # test -n "$CONFIG_FILES" + +# Set up the scripts for CONFIG_HEADERS section. +# No need to generate them if there are no CONFIG_HEADERS. +# This happens for instance with `./config.status Makefile'. +if test -n "$CONFIG_HEADERS"; then +cat >"$ac_tmp/defines.awk" <<\_ACAWK || +BEGIN { +_ACEOF + +# Transform confdefs.h into an awk script `defines.awk', embedded as +# here-document in config.status, that substitutes the proper values into +# config.h.in to produce config.h. + +# Create a delimiter string that does not exist in confdefs.h, to ease +# handling of long lines. +ac_delim='%!_!# ' +for ac_last_try in false false :; do + ac_tt=`sed -n "/$ac_delim/p" confdefs.h` + if test -z "$ac_tt"; then + break + elif $ac_last_try; then + as_fn_error $? "could not make $CONFIG_HEADERS" "$LINENO" 5 + else + ac_delim="$ac_delim!$ac_delim _$ac_delim!! " + fi +done + +# For the awk script, D is an array of macro values keyed by name, +# likewise P contains macro parameters if any. Preserve backslash +# newline sequences. + +ac_word_re=[_$as_cr_Letters][_$as_cr_alnum]* +sed -n ' +s/.\{148\}/&'"$ac_delim"'/g +t rset +:rset +s/^[ ]*#[ ]*define[ ][ ]*/ / +t def +d +:def +s/\\$// +t bsnl +s/["\\]/\\&/g +s/^ \('"$ac_word_re"'\)\(([^()]*)\)[ ]*\(.*\)/P["\1"]="\2"\ +D["\1"]=" \3"/p +s/^ \('"$ac_word_re"'\)[ ]*\(.*\)/D["\1"]=" \2"/p +d +:bsnl +s/["\\]/\\&/g +s/^ \('"$ac_word_re"'\)\(([^()]*)\)[ ]*\(.*\)/P["\1"]="\2"\ +D["\1"]=" \3\\\\\\n"\\/p +t cont +s/^ \('"$ac_word_re"'\)[ ]*\(.*\)/D["\1"]=" \2\\\\\\n"\\/p +t cont +d +:cont +n +s/.\{148\}/&'"$ac_delim"'/g +t clear +:clear +s/\\$// +t bsnlc +s/["\\]/\\&/g; s/^/"/; s/$/"/p +d +:bsnlc +s/["\\]/\\&/g; s/^/"/; s/$/\\\\\\n"\\/p +b cont +' >$CONFIG_STATUS || ac_write_fail=1 + +cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 + for (key in D) D_is_set[key] = 1 + FS = "" +} +/^[\t ]*#[\t ]*(define|undef)[\t ]+$ac_word_re([\t (]|\$)/ { + line = \$ 0 + split(line, arg, " ") + if (arg[1] == "#") { + defundef = arg[2] + mac1 = arg[3] + } else { + defundef = substr(arg[1], 2) + mac1 = arg[2] + } + split(mac1, mac2, "(") #) + macro = mac2[1] + prefix = substr(line, 1, index(line, defundef) - 1) + if (D_is_set[macro]) { + # Preserve the white space surrounding the "#". + print prefix "define", macro P[macro] D[macro] + next + } else { + # Replace #undef with comments. This is necessary, for example, + # in the case of _POSIX_SOURCE, which is predefined and required + # on some systems where configure will not decide to define it. + if (defundef == "undef") { + print "/*", prefix defundef, macro, "*/" + next + } + } +} +{ print } +_ACAWK +_ACEOF +cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 + as_fn_error $? "could not setup config headers machinery" "$LINENO" 5 +fi # test -n "$CONFIG_HEADERS" + + +eval set X " :F $CONFIG_FILES :H $CONFIG_HEADERS " +shift +for ac_tag +do + case $ac_tag in + :[FHLC]) ac_mode=$ac_tag; continue;; + esac + case $ac_mode$ac_tag in + :[FHL]*:*);; + :L* | :C*:*) as_fn_error $? "invalid tag \`$ac_tag'" "$LINENO" 5;; + :[FH]-) ac_tag=-:-;; + :[FH]*) ac_tag=$ac_tag:$ac_tag.in;; + esac + ac_save_IFS=$IFS + IFS=: + set x $ac_tag + IFS=$ac_save_IFS + shift + ac_file=$1 + shift + + case $ac_mode in + :L) ac_source=$1;; + :[FH]) + ac_file_inputs= + for ac_f + do + case $ac_f in + -) ac_f="$ac_tmp/stdin";; + *) # Look for the file first in the build tree, then in the source tree + # (if the path is not absolute). The absolute path cannot be DOS-style, + # because $ac_f cannot contain `:'. + test -f "$ac_f" || + case $ac_f in + [\\/$]*) false;; + *) test -f "$srcdir/$ac_f" && ac_f="$srcdir/$ac_f";; + esac || + as_fn_error 1 "cannot find input file: \`$ac_f'" "$LINENO" 5;; + esac + case $ac_f in *\'*) ac_f=`$as_echo "$ac_f" | sed "s/'/'\\\\\\\\''/g"`;; esac + as_fn_append ac_file_inputs " '$ac_f'" + done + + # Let's still pretend it is `configure' which instantiates (i.e., don't + # use $as_me), people would be surprised to read: + # /* config.h. Generated by config.status. */ + configure_input='Generated from '` + $as_echo "$*" | sed 's|^[^:]*/||;s|:[^:]*/|, |g' + `' by configure.' + if test x"$ac_file" != x-; then + configure_input="$ac_file. $configure_input" + { $as_echo "$as_me:${as_lineno-$LINENO}: creating $ac_file" >&5 +$as_echo "$as_me: creating $ac_file" >&6;} + fi + # Neutralize special characters interpreted by sed in replacement strings. + case $configure_input in #( + *\&* | *\|* | *\\* ) + ac_sed_conf_input=`$as_echo "$configure_input" | + sed 's/[\\\\&|]/\\\\&/g'`;; #( + *) ac_sed_conf_input=$configure_input;; + esac + + case $ac_tag in + *:-:* | *:-) cat >"$ac_tmp/stdin" \ + || as_fn_error $? "could not create $ac_file" "$LINENO" 5 ;; + esac + ;; + esac + + ac_dir=`$as_dirname -- "$ac_file" || +$as_expr X"$ac_file" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ + X"$ac_file" : 'X\(//\)[^/]' \| \ + X"$ac_file" : 'X\(//\)$' \| \ + X"$ac_file" : 'X\(/\)' \| . 2>/dev/null || +$as_echo X"$ac_file" | + sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ + s//\1/ + q + } + /^X\(\/\/\)[^/].*/{ + s//\1/ + q + } + /^X\(\/\/\)$/{ + s//\1/ + q + } + /^X\(\/\).*/{ + s//\1/ + q + } + s/.*/./; q'` + as_dir="$ac_dir"; as_fn_mkdir_p + ac_builddir=. + +case "$ac_dir" in +.) ac_dir_suffix= ac_top_builddir_sub=. ac_top_build_prefix= ;; +*) + ac_dir_suffix=/`$as_echo "$ac_dir" | sed 's|^\.[\\/]||'` + # A ".." for each directory in $ac_dir_suffix. + ac_top_builddir_sub=`$as_echo "$ac_dir_suffix" | sed 's|/[^\\/]*|/..|g;s|/||'` + case $ac_top_builddir_sub in + "") ac_top_builddir_sub=. ac_top_build_prefix= ;; + *) ac_top_build_prefix=$ac_top_builddir_sub/ ;; + esac ;; +esac +ac_abs_top_builddir=$ac_pwd +ac_abs_builddir=$ac_pwd$ac_dir_suffix +# for backward compatibility: +ac_top_builddir=$ac_top_build_prefix + +case $srcdir in + .) # We are building in place. + ac_srcdir=. + ac_top_srcdir=$ac_top_builddir_sub + ac_abs_top_srcdir=$ac_pwd ;; + [\\/]* | ?:[\\/]* ) # Absolute name. + ac_srcdir=$srcdir$ac_dir_suffix; + ac_top_srcdir=$srcdir + ac_abs_top_srcdir=$srcdir ;; + *) # Relative name. + ac_srcdir=$ac_top_build_prefix$srcdir$ac_dir_suffix + ac_top_srcdir=$ac_top_build_prefix$srcdir + ac_abs_top_srcdir=$ac_pwd/$srcdir ;; +esac +ac_abs_srcdir=$ac_abs_top_srcdir$ac_dir_suffix + + + case $ac_mode in + :F) + # + # CONFIG_FILE + # + +_ACEOF + +cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 +# If the template does not know about datarootdir, expand it. +# FIXME: This hack should be removed a few years after 2.60. +ac_datarootdir_hack=; ac_datarootdir_seen= +ac_sed_dataroot=' +/datarootdir/ { + p + q +} +/@datadir@/p +/@docdir@/p +/@infodir@/p +/@localedir@/p +/@mandir@/p' +case `eval "sed -n \"\$ac_sed_dataroot\" $ac_file_inputs"` in +*datarootdir*) ac_datarootdir_seen=yes;; +*@datadir@*|*@docdir@*|*@infodir@*|*@localedir@*|*@mandir@*) + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $ac_file_inputs seems to ignore the --datarootdir setting" >&5 +$as_echo "$as_me: WARNING: $ac_file_inputs seems to ignore the --datarootdir setting" >&2;} +_ACEOF +cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 + ac_datarootdir_hack=' + s&@datadir@&$datadir&g + s&@docdir@&$docdir&g + s&@infodir@&$infodir&g + s&@localedir@&$localedir&g + s&@mandir@&$mandir&g + s&\\\${datarootdir}&$datarootdir&g' ;; +esac +_ACEOF + +# Neutralize VPATH when `$srcdir' = `.'. +# Shell code in configure.ac might set extrasub. +# FIXME: do we really want to maintain this feature? +cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 +ac_sed_extra="$ac_vpsub +$extrasub +_ACEOF +cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 +:t +/@[a-zA-Z_][a-zA-Z_0-9]*@/!b +s|@configure_input@|$ac_sed_conf_input|;t t +s&@top_builddir@&$ac_top_builddir_sub&;t t +s&@top_build_prefix@&$ac_top_build_prefix&;t t +s&@srcdir@&$ac_srcdir&;t t +s&@abs_srcdir@&$ac_abs_srcdir&;t t +s&@top_srcdir@&$ac_top_srcdir&;t t +s&@abs_top_srcdir@&$ac_abs_top_srcdir&;t t +s&@builddir@&$ac_builddir&;t t +s&@abs_builddir@&$ac_abs_builddir&;t t +s&@abs_top_builddir@&$ac_abs_top_builddir&;t t +$ac_datarootdir_hack +" +eval sed \"\$ac_sed_extra\" "$ac_file_inputs" | $AWK -f "$ac_tmp/subs.awk" \ + >$ac_tmp/out || as_fn_error $? "could not create $ac_file" "$LINENO" 5 + +test -z "$ac_datarootdir_hack$ac_datarootdir_seen" && + { ac_out=`sed -n '/\${datarootdir}/p' "$ac_tmp/out"`; test -n "$ac_out"; } && + { ac_out=`sed -n '/^[ ]*datarootdir[ ]*:*=/p' \ + "$ac_tmp/out"`; test -z "$ac_out"; } && + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $ac_file contains a reference to the variable \`datarootdir' +which seems to be undefined. Please make sure it is defined" >&5 +$as_echo "$as_me: WARNING: $ac_file contains a reference to the variable \`datarootdir' +which seems to be undefined. Please make sure it is defined" >&2;} + + rm -f "$ac_tmp/stdin" + case $ac_file in + -) cat "$ac_tmp/out" && rm -f "$ac_tmp/out";; + *) rm -f "$ac_file" && mv "$ac_tmp/out" "$ac_file";; + esac \ + || as_fn_error $? "could not create $ac_file" "$LINENO" 5 + ;; + :H) + # + # CONFIG_HEADER + # + if test x"$ac_file" != x-; then + { + $as_echo "/* $configure_input */" \ + && eval '$AWK -f "$ac_tmp/defines.awk"' "$ac_file_inputs" + } >"$ac_tmp/config.h" \ + || as_fn_error $? "could not create $ac_file" "$LINENO" 5 + if diff "$ac_file" "$ac_tmp/config.h" >/dev/null 2>&1; then + { $as_echo "$as_me:${as_lineno-$LINENO}: $ac_file is unchanged" >&5 +$as_echo "$as_me: $ac_file is unchanged" >&6;} + else + rm -f "$ac_file" + mv "$ac_tmp/config.h" "$ac_file" \ + || as_fn_error $? "could not create $ac_file" "$LINENO" 5 + fi + else + $as_echo "/* $configure_input */" \ + && eval '$AWK -f "$ac_tmp/defines.awk"' "$ac_file_inputs" \ + || as_fn_error $? "could not create -" "$LINENO" 5 + fi + ;; + + + esac + +done # for ac_tag + + +as_fn_exit 0 +_ACEOF +ac_clean_files=$ac_clean_files_save + +test $ac_write_fail = 0 || + as_fn_error $? "write failure creating $CONFIG_STATUS" "$LINENO" 5 + + +# configure is writing to config.log, and then calls config.status. +# config.status does its own redirection, appending to config.log. +# Unfortunately, on DOS this fails, as config.log is still kept open +# by configure, so config.status won't be able to write to it; its +# output is simply discarded. So we exec the FD to /dev/null, +# effectively closing config.log, so it can be properly (re)opened and +# appended to by config.status. When coming back to configure, we +# need to make the FD available again. +if test "$no_create" != yes; then + ac_cs_success=: + ac_config_status_args= + test "$silent" = yes && + ac_config_status_args="$ac_config_status_args --quiet" + exec 5>/dev/null + $SHELL $CONFIG_STATUS $ac_config_status_args || ac_cs_success=false + exec 5>>config.log + # Use ||, not &&, to avoid exiting from the if with $? = 1, which + # would make configure fail if this is the last instruction. + $ac_cs_success || as_fn_exit 1 +fi +if test -n "$ac_unrecognized_opts" && test "$enable_option_checking" != no; then + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: unrecognized options: $ac_unrecognized_opts" >&5 +$as_echo "$as_me: WARNING: unrecognized options: $ac_unrecognized_opts" >&2;} +fi + diff --git a/src/configure.ac b/src/configure.ac new file mode 100644 index 0000000..9856d00 --- /dev/null +++ b/src/configure.ac @@ -0,0 +1,59 @@ +# -*- Autoconf -*- +# Process this file with autoconf to produce a configure script. + +AC_PREREQ([2.69]) +AC_INIT([nan-toolbox], [3.5.3], [alois.schloegl@gmail.com]) +AC_CONFIG_SRCDIR([train.c]) +AC_CONFIG_HEADERS([config.h]) + +# Checks for programs. +AC_PROG_CXX +AC_PROG_CC + +# Checks for libraries. +AC_CHECK_LIB([blas], [caxpy]) +AC_CHECK_LIB([gomp], [omp_get_num_procs]) +AC_CHECK_LIB([libmat], [matClose]) +AC_CHECK_LIB([libmex], [mexPrintf]) +AC_CHECK_LIB([libmx], [mxGetNumberOfFields]) +AC_CHECK_LIB([libtensorflow], [TF_NewTensor]) +AC_CHECK_LIB([pthread], [pthread_exit]) +AC_CHECK_LIB([linear], [train]) +AC_CHECK_LIB([svm], [svm_train]) + + +# Checks for header files. +AC_CHECK_HEADERS([float.h limits.h locale.h stdint.h stdlib.h string.h sys/param.h]) +AC_CHECK_HEADERS([libsvm/svm.h], + [AC_DEFINE([HAVE_EXTERNAL_LIBSVM],[1],[libsvm-dev is installed.]) + AC_SUBST([HAVE_EXTERNAL_LIBSVM],"1") + ]) +AC_CHECK_HEADERS([linear.h], + [AC_DEFINE([HAVE_EXTERNAL_LIBLINEAR],[1],[liblinear-dev is installed.]) + AC_SUBST([HAVE_EXTERNAL_LIBLINEAR],"1") + ]) +# disable use of external libsvm and liblinear +AC_SUBST([HAVE_EXTERNAL_LIBSVM],"") +AC_SUBST([HAVE_EXTERNAL_LIBLINEAR],"") + +# Checks for typedefs, structures, and compiler characteristics. +AC_CHECK_HEADER_STDBOOL +AC_C_INLINE +AC_TYPE_INT16_T +AC_TYPE_INT32_T +AC_TYPE_INT64_T +AC_TYPE_INT8_T +AC_TYPE_SIZE_T +AC_TYPE_UINT16_T +AC_TYPE_UINT32_T +AC_TYPE_UINT64_T +AC_TYPE_UINT8_T + +# Checks for library functions. +AC_FUNC_MALLOC +AC_FUNC_REALLOC +AC_FUNC_STRTOD +AC_CHECK_FUNCS([floor memset mkdir setlocale sqrt strchr strcspn strdup strncasecmp strrchr strtol strtoul]) + +AC_CONFIG_FILES([Makefile]) +AC_OUTPUT diff --git a/src/covm_mex.cpp b/src/covm_mex.cpp new file mode 100644 index 0000000..d6ec5b2 --- /dev/null +++ b/src/covm_mex.cpp @@ -0,0 +1,843 @@ +/* +//------------------------------------------------------------------- +// C-MEX implementation of COVM - this function is part of the NaN-toolbox. +// +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, see . +// +// +// covm: in-product of matrices, NaN are skipped. +// usage: +// [cc,nn] = covm_mex(X,Y,flag,W); +// +// Input: +// - X: +// - Y: [optional], if empty, Y=X; +// - flag: if not empty, it is set to 1 if some NaN was observed +// - W: weight vector to compute weighted correlation +// +// Output: +// - CC = X' * sparse(diag(W)) * Y while NaN's are skipped +// - NN = real(~ISNAN(X)')*sparse(diag(W))*real(~ISNAN(Y)) count of valid (non-NaN) elements +// computed more efficiently +// +// $Id$ +// Copyright (C) 2009,2010,2011 Alois Schloegl +// This function is part of the NaN-toolbox +// http://pub.ist.ac.at/~schloegl/matlab/NaN/ +// +//------------------------------------------------------------------- +*/ + +#ifdef __GNUC__ + #include +#endif +#include +#include "mex.h" + +/*#define NO_FLAG*/ + +/* + math.h has isnan() defined for all sizes of floating point numbers, + but c++ assumes isnan(double), causing possible conversions for float and long double + */ +#define ISNAN(a) (a!=a) + +#ifndef typeof +#define typeof __typeof__ +#endif + +void mexFunction(int POutputCount, mxArray* POutput[], int PInputCount, const mxArray *PInputs[]) +{ + double *X0=NULL, *Y0=NULL, *W=NULL; + double *CC; + double *NN = NULL; + + size_t rX,cX,rY,cY; + size_t i; + char flag_isNaN = 0; + int ACC_LEVEL; + + /*********** check input arguments *****************/ + + // check for proper number of input and output arguments + if ((PInputCount <= 0) || (PInputCount > 5)) { + mexPrintf("usage: [CC,NN] = covm_mex(X [,Y [,flag [,W [,'E']]]])\n\n"); + mexPrintf("Do not use COVM_MEX directly, use COVM instead. \n"); +/* + mexPrintf("\nCOVM_MEX computes the covariance matrix of real matrices and skips NaN's\n"); + mexPrintf("\t[CC,NN] = covm_mex(...)\n\t\t computes CC=X'*Y, NN contains the number of not-NaN elements\n"); + mexPrintf("\t\t CC./NN is the unbiased covariance matrix\n"); + mexPrintf("\t... = covm_mex(X,Y,...)\n\t\t computes CC=X'*sparse(diag(W))*Y, number of rows of X and Y must match\n"); + mexPrintf("\t... = covm_mex(X,[], ...)\n\t\t computes CC=X'*sparse(diag(W))*X\n"); + mexPrintf("\t... = covm_mex(...,flag,...)\n\t\t if flag is not empty, it is set to 1 if some NaN occured in X or Y\n"); + mexPrintf("\t... = covm_mex(...,W)\n\t\t W to compute weighted covariance, number of elements must match the number of rows of X\n"); + mexPrintf("\t\t if isempty(W), all weights are 1\n"); + mexPrintf("\t[CC,NN]=covm_mex(X,Y,flag,W)\n"); +*/ return; + } + if (POutputCount > 2) + mexErrMsgTxt("covm.MEX has 1 to 2 output arguments."); + + + // get 1st argument + if(mxIsDouble(PInputs[0]) && !mxIsComplex(PInputs[0]) && !mxIsSparse(PInputs[0]) ) + X0 = mxGetPr(PInputs[0]); + else + mexErrMsgTxt("First argument must be non-sparse REAL/DOUBLE."); + rX = mxGetM(PInputs[0]); + cX = mxGetN(PInputs[0]); + + // get 2nd argument + if (PInputCount > 1) { + if (!mxGetNumberOfElements(PInputs[1])) + ; // Y0 = NULL; + + else if (mxIsDouble(PInputs[1]) && !mxIsComplex(PInputs[1])) + Y0 = mxGetPr(PInputs[1]); + + else + mexErrMsgTxt("Second argument must be REAL/DOUBLE."); + } + + + // get weight vector for weighted sumskipnan + if (PInputCount > 3) { + // get 4th argument + size_t nW = mxGetNumberOfElements(PInputs[3]); + if (!nW) + ; + else if (nW == rX) + W = mxGetPr(PInputs[3]); + else + mexErrMsgTxt("number of elements in W must match numbers of rows in X"); + } + +#ifdef __GNUC__ + ACC_LEVEL = 0; + { + mxArray *LEVEL = NULL; + int s = mexCallMATLAB(1, &LEVEL, 0, NULL, "flag_accuracy_level"); + if (!s) { + ACC_LEVEL = (int) mxGetScalar(LEVEL); + } + mxDestroyArray(LEVEL); + } + // mexPrintf("Accuracy Level=%i\n",ACC_LEVEL); +#endif + if (Y0==NULL) { + Y0 = X0; + rY = rX; + cY = cX; + } + else { + rY = mxGetM(PInputs[1]); + cY = mxGetN(PInputs[1]); + } + if (rX != rY) + mexErrMsgTxt("number of rows in X and Y do not match"); + + /*********** create output arguments *****************/ + + POutput[0] = mxCreateDoubleMatrix(cX, cY, mxREAL); + CC = mxGetPr(POutput[0]); + + if (POutputCount > 1) { + POutput[1] = mxCreateDoubleMatrix(cX, cY, mxREAL); + NN = mxGetPr(POutput[1]); + } + + + /*********** compute covariance *****************/ + +#if 0 + /*------ version 1 --------------------- + this solution is slower than the alternative solution below + for transposed matrices, this might be faster. + */ + for (k=0; k 2) && mxGetNumberOfElements(PInputs[2])) { + // set FLAG_NANS_OCCURED + switch (mxGetClassID(PInputs[2])) { + case mxDOUBLE_CLASS: + *(double*)mxGetData(PInputs[2]) = 1.0; + break; + case mxSINGLE_CLASS: + *(float*)mxGetData(PInputs[2]) = 1.0; + break; + case mxLOGICAL_CLASS: + case mxCHAR_CLASS: + case mxINT8_CLASS: + case mxUINT8_CLASS: + *(char*)mxGetData(PInputs[2]) = 1; + break; +#ifdef __GNUC__ + case mxINT16_CLASS: + case mxUINT16_CLASS: + *(uint16_t*)mxGetData(PInputs[2]) = 1; + break; + case mxINT32_CLASS: + case mxUINT32_CLASS: + *(uint32_t*)mxGetData(PInputs[2])= 1; + break; + case mxINT64_CLASS: + case mxUINT64_CLASS: + *(uint64_t*)mxGetData(PInputs[2]) = 1; + break; + case mxFUNCTION_CLASS: + case mxUNKNOWN_CLASS: + case mxCELL_CLASS: + case mxSTRUCT_CLASS: +#endif + default: + mexPrintf("Type of 3rd input argument cannot be used to return status of NaN occurrence."); + } + } +#endif +#endif +} + diff --git a/src/histo_mex.cpp b/src/histo_mex.cpp new file mode 100644 index 0000000..c6e05f3 --- /dev/null +++ b/src/histo_mex.cpp @@ -0,0 +1,435 @@ +//------------------------------------------------------------------- +// C-MEX implementation of Histogram - this function is part of the NaN-toolbox. +// +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, see . +// +// +// histo_mex: computes histogram +// +// Input: +// - data matrix +// - flag for row-wise histogram +// +// Output: +// - histogram +// HIS.X +// HIS.H +// +// $Id$ +// Copyright (C) 2009,2010,2011 Alois Schloegl +// This function is part of the NaN-toolbox +// http://pub.ist.ac.at/~schloegl/matlab/NaN/ +// +//------------------------------------------------------------------- + +/* TODO: + speed: its slower than the m-functions histo2/3/4 + |-> use a more efficient sorting function + resembling of histo3 for multicolumn data. + support of complex data and char-strings +*/ + +#include +#include +#include +#include "mex.h" + +/* + math.h has isnan() defined for all sizes of floating point numbers, + but c++ assumes isnan(double), causing possible conversions for float and long double +*/ +#define ISNAN(a) (a!=a) + + +#ifdef tmwtypes_h + #if (MX_API_VER<=0x07020000) + typedef int mwSize; + #endif +#endif + +struct sort_t { + uint8_t *Table; // data table + size_t Size; // sizeof elements e.g. 4 for single + size_t Stride; // for multicolumn data + size_t N; // number of rows + mxClassID Type; // data type +} Sort; + +//inline int compare(const sqize_t *a, const size_t *b) { +int compare(const void *a, const void *b) { + int z = 0; + size_t i = 0; + size_t ix1 = *(size_t*)a; + size_t ix2 = *(size_t*)b; + + while ((if2) z = 1; + break; + } + case mxUINT32_CLASS: { + uint32_t f1,f2; + f1 = ((uint32_t*)Sort.Table)[ix1]; + f2 = ((uint32_t*)Sort.Table)[ix2]; + if (f1f2) z = 1; + break; + } + case mxINT64_CLASS: { + int64_t f1,f2; + f1 = ((int64_t*)Sort.Table)[ix1]; + f2 = ((int64_t*)Sort.Table)[ix2]; + if (f1f2) z = 1; + break; + } + case mxUINT64_CLASS: { + uint64_t f1,f2; + f1 = ((uint64_t*)Sort.Table)[ix1]; + f2 = ((uint64_t*)Sort.Table)[ix2]; + if (f1f2) z = 1; + break; + } + case mxSINGLE_CLASS: { + float f1,f2; + f1 = ((float*)Sort.Table)[ix1]; + f2 = ((float*)Sort.Table)[ix2]; + z = ISNAN(f1) - ISNAN(f2); + if (z) break; + + if (f1f2) z = 1; + // else f1==f2 || (isnan(f1) && isnan(f2)) + break; + } + case mxDOUBLE_CLASS: { + double f1,f2; + f1 = ((double*)Sort.Table)[ix1]; + f2 = ((double*)Sort.Table)[ix2]; + z = ISNAN(f1) - ISNAN(f2); + if (z) break; + + if (f1f2) z = 1; + // else f1==f2 || (isnan(f1) && isnan(f2)) + break; + } + case mxINT16_CLASS: { + int16_t f1,f2; + f1 = ((int16_t*)Sort.Table)[ix1]; + f2 = ((int16_t*)Sort.Table)[ix2]; + if (f1f2) z = 1; + break; + } + case mxUINT16_CLASS: { + uint16_t f1,f2; + f1 = ((uint16_t*)Sort.Table)[ix1]; + f2 = ((uint16_t*)Sort.Table)[ix2]; + if (f1f2) z = 1; + break; + } + case mxINT8_CLASS: { + int8_t f1,f2; + f1 = ((int8_t*)Sort.Table)[ix1]; + f2 = ((int8_t*)Sort.Table)[ix2]; + if (f1f2) z = 1; + break; + } + case mxUINT8_CLASS: { + uint8_t f1,f2; + f1 = ((uint8_t*)Sort.Table)[ix1]; + f2 = ((uint8_t*)Sort.Table)[ix2]; + if (f1f2) z = 1; + break; + } + default: + mexErrMsgTxt("unsupported input type"); + } + i++; + ix1 += Sort.Stride; + ix2 += Sort.Stride; + } + return(z); +} + + +void mexFunction(int POutputCount, mxArray* POutput[], int PInputCount, const mxArray *PInputs[]) +{ + + const mwSize *SZ; + char flag_rows = 0; + char done = 0; + size_t j, k, l; // running indices + const mxArray *W = NULL; + double *w = NULL; + + // check for proper number of input and output arguments + if ((PInputCount <= 0) || (PInputCount > 3)) { + mexPrintf("HISTO_MEX computes histogram from vector or column matrices\n\n"); + mexPrintf("usage:\tHIS = histo_mex(Y)\n\t\tComputes histogram from each column\n"); + mexPrintf("\t[HIS,tix] = histo_mex(Y,'rows')\n\t\tComputes row-wise histogram, tix is useful for data compression.\n\t\t Y = HIS.X(tix,:); \n\n"); + + mexPrintf("see also: HISTO2, HISTO3, HISTO4\n\n"); + mexErrMsgTxt("HISTO_MEX requires 1 or 2 input arguments\n"); + } + if (POutputCount > 2) + mexErrMsgTxt("histo.MEX has 1 output arguments."); + + // get 1st argument + if (mxIsComplex(PInputs[0])) + mexErrMsgTxt("complex argument not supported (yet). "); + // TODO: support complex argument! + + if (PInputCount==1) + ; // histo_mex(X) + else if (mxIsChar(PInputs[1])) { + // histo_mex(X,'rows') + char *t = mxArrayToString(PInputs[1]); + flag_rows = !strcmp(t,"rows"); + mxFree(t); + // histo_mex(X,'rows',W) + if ((PInputCount>2) && mxIsDouble(PInputs[2])) W = PInputs[2]; + } + // histo_mex(X,W) + else if (mxIsDouble(PInputs[1])) { + W = PInputs[1]; + } + else + mexErrMsgTxt("Weight vector must be REAL/DOUBLE."); + + if (W != NULL) { + if (mxGetM(PInputs[0])==mxGetM(W) ) + w = (double*)mxGetData(W); + else + mexErrMsgTxt("number of rows in X and W do not match."); + + for (k=0; (k=0.0); k++); + if (k2) + mexErrMsgTxt("Error HISTO.MEX: input must be vector or matrix (no more than two dimensions)"); + + size_t n = SZ[0]; + + const char *fnames[] = {"datatype","X","H"}; + mxArray *HIS = mxCreateStructMatrix(1, 1, 3, fnames); + mxSetField(HIS,0,"datatype",mxCreateString("HISTOGRAM")); + + if (flag_rows || (SZ[1]==1)) { + + ///***** SORT each column: initialize sorting algorithm + size_t *idx = NULL; + idx = (size_t*) mxMalloc(SZ[0]*sizeof(size_t)); + for (n=0; n1) { + POutput[1] = mxCreateNumericMatrix(SZ[0], 1, mxUINT64_CLASS,mxREAL); + tix = (uint64_t*)mxGetData(POutput[1]); + } + + // fill HIS.H and HIS.X + mxArray *H = mxCreateNumericMatrix(n, 1, mxDOUBLE_CLASS,mxREAL); + mxArray *X = mxCreateNumericMatrix(n, SZ[1], mxGetClassID(PInputs[0]),mxREAL); + mxSetField(HIS,0,"H",H); + mxSetField(HIS,0,"X",X); + double *h = (double*)mxGetData(H); + uint8_t *x = (uint8_t*)mxGetData(X); + + l = 0; + if (tix) tix[idx[0]] = 1; + for (k=0; k. +// +// +// Input: +// X data vector, must be double/real +// k which element should be selected +// flag [optional]: +// 0: data in X might be reorded (partially sorted) in-place and +// is slightly faster because no local copy is generated +// data with NaN is not correctly handled. +// 1: data in X is never modified in-place, but a local copy is used. +// data with NaN is not correctly handled. +// 2: copies data and excludes all NaN's, the copying might be slower +// than 1, but it enables a faster selection algorithm. +// This is the save but slowest option +// +// Output: +// x = sort(X)(k) +// +// $Id$ +// Copyright (C) 2010,2011 Alois Schloegl +// This function is part of the NaN-toolbox +// http://pub.ist.ac.at/~schloegl/matlab/NaN/ +// +//------------------------------------------------------------------- + + +#include +#include +#include +#include +#include "mex.h" + + +#ifdef tmwtypes_h + #if (MX_API_VER<=0x07020000) + typedef int mwSize; + typedef int mwIndex; + #endif +#endif + +/* + math.h has isnan() defined for all sizes of floating point numbers, + but c++ assumes isnan(double), causing possible conversions for float and long double +*/ +#define ISNAN(a) (a!=a) + + +#define SWAP(a,b) {temp = a; a=b; b=temp;} + +static void findFirstK(double *array, size_t left, size_t right, size_t k) +{ + while (right > left) { + size_t pivotIndex = (left + right) / 2; + + /* partition */ + double temp; + double pivotValue = array[pivotIndex]; + SWAP(array[pivotIndex], array[right]); + pivotIndex = left; + for (size_t i = left; i <= right - 1; ++i ) { + // if (array[i] <= pivotValue || isnan(pivotValue)) // needed if data contains NaN's + if (array[i] <= pivotValue) + { + SWAP(array[i], array[pivotIndex]); + ++pivotIndex; + } + } + SWAP(array[pivotIndex], array[right]); + + if (pivotIndex > k) + right = pivotIndex - 1; + else if (pivotIndex < k) + left = pivotIndex + 1; + else break; + } +} + + +void mexFunction(int POutputCount, mxArray* POutput[], int PInputCount, const mxArray *PInputs[]) +{ + size_t k, n; // running indices + size_t szK, szX; + double *T,*X,*Y,*K; + char flag = 0; // default value + + // check for proper number of input and output arguments + if ( PInputCount < 2 || PInputCount > 3 ) { + mexPrintf("KTH_ELEMENT returns the K-th smallest element of vector X\n"); + mexPrintf("\nusage:\tx = kth_element(X,k)\n"); + mexPrintf("\nusage:\tx = kth_element(X,k,flag)\n"); + mexPrintf("\nflag=0: the elements in X can be modified in-place, and data with NaN's is not correctly handled. This can be useful for performance reasons, but it might modify data in-place and is not save for data with NaN's. You are warned.\n"); + mexPrintf("flag=1: prevents in-place modification of X using a local copy of the data, but does not handle data with NaN in the correct way.\n"); + mexPrintf("flag=2: prevents in-place modification of X using a local copy of the data and handles NaN's correctly. This is the save but slowest option.\n"); + + mexPrintf("\nsee also: median, quantile\n\n"); + mexErrMsgTxt("KTH_ELEMENT requires two or three input arguments\n"); + } + else if (PInputCount == 3) { + // check value of flag + size_t N = mxGetNumberOfElements(PInputs[2]); + if (N>1) + mexErrMsgTxt("KTH_ELEMENT: flag argument must be scalar\n"); + else if (N==1) { + switch (mxGetClassID(PInputs[2])) { + case mxLOGICAL_CLASS: + case mxCHAR_CLASS: + case mxINT8_CLASS: + case mxUINT8_CLASS: + flag = (char)*(uint8_t*)mxGetData(PInputs[2]); + break; + case mxDOUBLE_CLASS: + flag = (char)*(double*)mxGetData(PInputs[2]); + break; + case mxSINGLE_CLASS: + flag = (char)*(float*)mxGetData(PInputs[2]); + break; + case mxINT16_CLASS: + case mxUINT16_CLASS: + flag = (char)*(uint16_t*)mxGetData(PInputs[2]); + break; + case mxINT32_CLASS: + case mxUINT32_CLASS: + flag = (char)*(uint32_t*)mxGetData(PInputs[2]); + break; + case mxINT64_CLASS: + case mxUINT64_CLASS: + flag = (char)*(uint64_t*)mxGetData(PInputs[2]); + break; + case mxFUNCTION_CLASS: + case mxUNKNOWN_CLASS: + case mxCELL_CLASS: + case mxSTRUCT_CLASS: + default: + mexErrMsgTxt("KTH_ELEMENT: Type of 3rd input argument not supported."); + } + } + // else flag = default value + } + // else flag = default value + + if (POutputCount > 2) + mexErrMsgTxt("KTH_ELEMENT has only one output arguments."); + + // get 1st argument + if (mxIsComplex(PInputs[0]) || mxIsComplex(PInputs[1])) + mexErrMsgTxt("complex argument not supported (yet). "); + if (!mxIsDouble(PInputs[0]) || !mxIsDouble(PInputs[1])) + mexErrMsgTxt("input arguments must be of type double . "); + // TODO: support of complex, and integer data + + + szK = mxGetNumberOfElements(PInputs[1]); + K = (double*)mxGetData(PInputs[1]); + + szX = mxGetNumberOfElements(PInputs[0]); + X = (double*)mxGetData(PInputs[0]); + + if (flag==0) + T = X; + else { + //***** create temporary copy for avoiding unintended side effects (in-place sort of input data) */ + T = (double*)mxMalloc(szX*sizeof(double)); + if (flag==1) + memcpy(T,X,szX*sizeof(double)); + else { + /* do not copy NaN's */ + for (k=0,n=0; k < szX; k++) { + if (!ISNAN(X[k])) T[n++]=X[k]; + } + szX = n; + } + } + + /*********** create output arguments *****************/ + POutput[0] = mxCreateDoubleMatrix(mxGetM(PInputs[1]),mxGetN(PInputs[1]),mxREAL); + Y = (double*) mxGetData(POutput[0]); + for (k=0; k < szK; k++) { + if (K[k] > szX || K[k] < 1) + Y[k] = 0.0/0.0; // NaN: result undefined + else { + n = (size_t)(K[k]-1); // convert to zero-based indexing, round towards 0 + findFirstK(T, 0, szX-1, n); + Y[k] = T[n]; + } + } + + if (flag) mxFree(T); + + return; +} + diff --git a/src/linear.cpp b/src/linear.cpp new file mode 100644 index 0000000..a178393 --- /dev/null +++ b/src/linear.cpp @@ -0,0 +1,3128 @@ +/* + This code was extracted from liblinear-2.2.1 in Feb 2019 and + modified for the use with Octave and Matlab + +Copyright (c) 2007-2019 The LIBLINEAR Project. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither name of copyright holders nor the names of its contributors +may be used to endorse or promote products derived from this software +without specific prior written permission. + + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR +CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +#include +#include +#include +#include +#include +#include +#include "linear.h" +#include "tron.h" +int liblinear_version = LIBLINEAR_VERSION; +typedef signed char schar; +template static inline void swap(T& x, T& y) { T t=x; x=y; y=t; } +#ifndef min +template static inline T min(T x,T y) { return (x static inline T max(T x,T y) { return (x>y)?x:y; } +#endif +template static inline void clone(T*& dst, S* src, int n) +{ + dst = new T[n]; + memcpy((void *)dst,(void *)src,sizeof(T)*n); +} +#define Malloc(type,n) (type *)malloc((n)*sizeof(type)) +#define INF HUGE_VAL + +static void print_string_stdout(const char *s) +{ + fputs(s,stdout); + fflush(stdout); +} +static void print_null(const char*) {} + +static void (*liblinear_print_string) (const char *) = &print_string_stdout; + +#if 1 +static void info(const char *fmt,...) +{ + char buf[BUFSIZ]; + va_list ap; + va_start(ap,fmt); + vsprintf(buf,fmt,ap); + va_end(ap); + (*liblinear_print_string)(buf); +} +#else +static void info(const char *fmt,...) {} +#endif +class sparse_operator +{ +public: + static double nrm2_sq(const feature_node *x) + { + double ret = 0; + while(x->index != -1) + { + ret += x->value*x->value; + x++; + } + return (ret); + } + + static double dot(const double *s, const feature_node *x) + { + double ret = 0; + while(x->index != -1) + { + ret += s[x->index-1]*x->value; + x++; + } + return (ret); + } + + static void axpy(const double a, const feature_node *x, double *y) + { + while(x->index != -1) + { + y[x->index-1] += a*x->value; + x++; + } + } +}; + +class l2r_lr_fun: public function +{ +public: + l2r_lr_fun(const problem *prob, double *C); + ~l2r_lr_fun(); + + double fun(double *w); + void grad(double *w, double *g); + void Hv(double *s, double *Hs); + + int get_nr_variable(void); + void get_diag_preconditioner(double *M); + +private: + void Xv(double *v, double *Xv); + void XTv(double *v, double *XTv); + + double *C; + double *z; + double *D; + const problem *prob; +}; + +l2r_lr_fun::l2r_lr_fun(const problem *prob, double *C) +{ + int l=prob->l; + + this->prob = prob; + + z = new double[l]; + D = new double[l]; + this->C = C; +} + +l2r_lr_fun::~l2r_lr_fun() +{ + delete[] z; + delete[] D; +} + + +double l2r_lr_fun::fun(double *w) +{ + int i; + double f=0; + double *y=prob->y; + int l=prob->l; + int w_size=get_nr_variable(); + + Xv(w, z); + + for(i=0;i= 0) + f += C[i]*log(1 + exp(-yz)); + else + f += C[i]*(-yz+log(1 + exp(yz))); + } + + return(f); +} + +void l2r_lr_fun::grad(double *w, double *g) +{ + int i; + double *y=prob->y; + int l=prob->l; + int w_size=get_nr_variable(); + + for(i=0;in; +} + +void l2r_lr_fun::get_diag_preconditioner(double *M) +{ + int i; + int l = prob->l; + int w_size=get_nr_variable(); + feature_node **x = prob->x; + + for (i=0; iindex!=-1) + { + M[s->index-1] += s->value*s->value*C[i]*D[i]; + s++; + } + } +} + +void l2r_lr_fun::Hv(double *s, double *Hs) +{ + int i; + int l=prob->l; + int w_size=get_nr_variable(); + feature_node **x=prob->x; + + for(i=0;il; + feature_node **x=prob->x; + + for(i=0;il; + int w_size=get_nr_variable(); + feature_node **x=prob->x; + + for(i=0;il; + + this->prob = prob; + + z = new double[l]; + I = new int[l]; + this->C = C; +} + +l2r_l2_svc_fun::~l2r_l2_svc_fun() +{ + delete[] z; + delete[] I; +} + +double l2r_l2_svc_fun::fun(double *w) +{ + int i; + double f=0; + double *y=prob->y; + int l=prob->l; + int w_size=get_nr_variable(); + + Xv(w, z); + + for(i=0;i 0) + f += C[i]*d*d; + } + + return(f); +} + +void l2r_l2_svc_fun::grad(double *w, double *g) +{ + int i; + double *y=prob->y; + int l=prob->l; + int w_size=get_nr_variable(); + + sizeI = 0; + for (i=0;in; +} + +void l2r_l2_svc_fun::get_diag_preconditioner(double *M) +{ + int i; + int w_size=get_nr_variable(); + feature_node **x = prob->x; + + for (i=0; iindex!=-1) + { + M[s->index-1] += s->value*s->value*C[idx]*2; + s++; + } + } +} + +void l2r_l2_svc_fun::Hv(double *s, double *Hs) +{ + int i; + int w_size=get_nr_variable(); + feature_node **x=prob->x; + + for(i=0;il; + feature_node **x=prob->x; + + for(i=0;ix; + + for(i=0;ip = p; +} + +double l2r_l2_svr_fun::fun(double *w) +{ + int i; + double f=0; + double *y=prob->y; + int l=prob->l; + int w_size=get_nr_variable(); + double d; + + Xv(w, z); + + for(i=0;i p) + f += C[i]*(d-p)*(d-p); + } + + return(f); +} + +void l2r_l2_svr_fun::grad(double *w, double *g) +{ + int i; + double *y=prob->y; + int l=prob->l; + int w_size=get_nr_variable(); + double d; + + sizeI = 0; + for(i=0;i p) + { + z[sizeI] = C[i]*(d-p); + I[sizeI] = i; + sizeI++; + } + + } + subXTv(z, g); + + for(i=0;iy[i]) +// To support weights for instances, use GETI(i) (i) + +class Solver_MCSVM_CS +{ + public: + Solver_MCSVM_CS(const problem *prob, int nr_class, double *C, double eps=0.1, int max_iter=100000); + ~Solver_MCSVM_CS(); + void Solve(double *w); + private: + void solve_sub_problem(double A_i, int yi, double C_yi, int active_i, double *alpha_new); + bool be_shrunk(int i, int m, int yi, double alpha_i, double minG); + double *B, *C, *G; + int w_size, l; + int nr_class; + int max_iter; + double eps; + const problem *prob; +}; + +Solver_MCSVM_CS::Solver_MCSVM_CS(const problem *prob, int nr_class, double *weighted_C, double eps, int max_iter) +{ + this->w_size = prob->n; + this->l = prob->l; + this->nr_class = nr_class; + this->eps = eps; + this->max_iter = max_iter; + this->prob = prob; + this->B = new double[nr_class]; + this->G = new double[nr_class]; + this->C = weighted_C; +} + +Solver_MCSVM_CS::~Solver_MCSVM_CS() +{ + delete[] B; + delete[] G; +} + +int compare_double(const void *a, const void *b) +{ + if(*(double *)a > *(double *)b) + return -1; + if(*(double *)a < *(double *)b) + return 1; + return 0; +} + +void Solver_MCSVM_CS::solve_sub_problem(double A_i, int yi, double C_yi, int active_i, double *alpha_new) +{ + int r; + double *D; + + clone(D, B, active_i); + if(yi < active_i) + D[yi] += A_i*C_yi; + qsort(D, active_i, sizeof(double), compare_double); + + double beta = D[0] - A_i*C_yi; + for(r=1;ry[i] == m + // alpha[i*nr_class+m] <= 0 if prob->y[i] != m + // If initial alpha isn't zero, uncomment the for loop below to initialize w + for(i=0;ix[i]; + QD[i] = 0; + while(xi->index != -1) + { + double val = xi->value; + QD[i] += val*val; + + // Uncomment the for loop if initial alpha isn't zero + // for(m=0; mindex-1)*nr_class+m] += alpha[i*nr_class+m]*val; + xi++; + } + active_size_i[i] = nr_class; + y_index[i] = (int)prob->y[i]; + index[i] = i; + } + + while(iter < max_iter) + { + double stopping = -INF; + for(i=0;i 0) + { + for(m=0;mx[i]; + while(xi->index!= -1) + { + double *w_i = &w[(xi->index-1)*nr_class]; + for(m=0;mvalue); + xi++; + } + + double minG = INF; + double maxG = -INF; + for(m=0;m maxG) + maxG = G[m]; + } + if(y_index[i] < active_size_i[i]) + if(alpha_i[(int) prob->y[i]] < C[GETI(i)] && G[y_index[i]] < minG) + minG = G[y_index[i]]; + + for(m=0;mm) + { + if(!be_shrunk(i, active_size_i[i], y_index[i], + alpha_i[alpha_index_i[active_size_i[i]]], minG)) + { + swap(alpha_index_i[m], alpha_index_i[active_size_i[i]]); + swap(G[m], G[active_size_i[i]]); + if(y_index[i] == active_size_i[i]) + y_index[i] = m; + else if(y_index[i] == m) + y_index[i] = active_size_i[i]; + break; + } + active_size_i[i]--; + } + } + } + + if(active_size_i[i] <= 1) + { + active_size--; + swap(index[s], index[active_size]); + s--; + continue; + } + + if(maxG-minG <= 1e-12) + continue; + else + stopping = max(maxG - minG, stopping); + + for(m=0;m= 1e-12) + { + d_ind[nz_d] = alpha_index_i[m]; + d_val[nz_d] = d; + nz_d++; + } + } + + xi = prob->x[i]; + while(xi->index != -1) + { + double *w_i = &w[(xi->index-1)*nr_class]; + for(m=0;mvalue; + xi++; + } + } + } + + iter++; + if(iter % 10 == 0) + { + info("."); + } + + if(stopping < eps_shrink) + { + if(stopping < eps && start_from_all == true) + break; + else + { + active_size = l; + for(i=0;i= max_iter) + info("\nWARNING: reaching max number of iterations\n"); + + // calculate objective value + double v = 0; + int nSV = 0; + for(i=0;i 0) + nSV++; + } + for(i=0;iy[i]]; + info("Objective value = %lf\n",v); + info("nSV = %d\n",nSV); + + delete [] alpha; + delete [] alpha_new; + delete [] index; + delete [] QD; + delete [] d_ind; + delete [] d_val; + delete [] alpha_index; + delete [] y_index; + delete [] active_size_i; +} + +// A coordinate descent algorithm for +// L1-loss and L2-loss SVM dual problems +// +// min_\alpha 0.5(\alpha^T (Q + D)\alpha) - e^T \alpha, +// s.t. 0 <= \alpha_i <= upper_bound_i, +// +// where Qij = yi yj xi^T xj and +// D is a diagonal matrix +// +// In L1-SVM case: +// upper_bound_i = Cp if y_i = 1 +// upper_bound_i = Cn if y_i = -1 +// D_ii = 0 +// In L2-SVM case: +// upper_bound_i = INF +// D_ii = 1/(2*Cp) if y_i = 1 +// D_ii = 1/(2*Cn) if y_i = -1 +// +// Given: +// x, y, Cp, Cn +// eps is the stopping tolerance +// +// solution will be put in w +// +// See Algorithm 3 of Hsieh et al., ICML 2008 + +#undef GETI +#define GETI(i) (y[i]+1) +// To support weights for instances, use GETI(i) (i) + +static void solve_l2r_l1l2_svc( + const problem *prob, double *w, double eps, + double Cp, double Cn, int solver_type) +{ + int l = prob->l; + int w_size = prob->n; + int i, s, iter = 0; + double C, d, G; + double *QD = new double[l]; + int max_iter = 1000; + int *index = new int[l]; + double *alpha = new double[l]; + schar *y = new schar[l]; + int active_size = l; + + // PG: projected gradient, for shrinking and stopping + double PG; + double PGmax_old = INF; + double PGmin_old = -INF; + double PGmax_new, PGmin_new; + + // default solver_type: L2R_L2LOSS_SVC_DUAL + double diag[3] = {0.5/Cn, 0, 0.5/Cp}; + double upper_bound[3] = {INF, 0, INF}; + if(solver_type == L2R_L1LOSS_SVC_DUAL) + { + diag[0] = 0; + diag[2] = 0; + upper_bound[0] = Cn; + upper_bound[2] = Cp; + } + + for(i=0; iy[i] > 0) + { + y[i] = +1; + } + else + { + y[i] = -1; + } + } + + // Initial alpha can be set here. Note that + // 0 <= alpha[i] <= upper_bound[GETI(i)] + for(i=0; ix[i]; + QD[i] += sparse_operator::nrm2_sq(xi); + sparse_operator::axpy(y[i]*alpha[i], xi, w); + + index[i] = i; + } + + while (iter < max_iter) + { + PGmax_new = -INF; + PGmin_new = INF; + + for (i=0; ix[i]; + + G = yi*sparse_operator::dot(w, xi)-1; + + C = upper_bound[GETI(i)]; + G += alpha[i]*diag[GETI(i)]; + + PG = 0; + if (alpha[i] == 0) + { + if (G > PGmax_old) + { + active_size--; + swap(index[s], index[active_size]); + s--; + continue; + } + else if (G < 0) + PG = G; + } + else if (alpha[i] == C) + { + if (G < PGmin_old) + { + active_size--; + swap(index[s], index[active_size]); + s--; + continue; + } + else if (G > 0) + PG = G; + } + else + PG = G; + + PGmax_new = max(PGmax_new, PG); + PGmin_new = min(PGmin_new, PG); + + if(fabs(PG) > 1.0e-12) + { + double alpha_old = alpha[i]; + alpha[i] = min(max(alpha[i] - G/QD[i], 0.0), C); + d = (alpha[i] - alpha_old)*yi; + sparse_operator::axpy(d, xi, w); + } + } + + iter++; + if(iter % 10 == 0) + info("."); + + if(PGmax_new - PGmin_new <= eps) + { + if(active_size == l) + break; + else + { + active_size = l; + info("*"); + PGmax_old = INF; + PGmin_old = -INF; + continue; + } + } + PGmax_old = PGmax_new; + PGmin_old = PGmin_new; + if (PGmax_old <= 0) + PGmax_old = INF; + if (PGmin_old >= 0) + PGmin_old = -INF; + } + + info("\noptimization finished, #iter = %d\n",iter); + if (iter >= max_iter) + info("\nWARNING: reaching max number of iterations\nUsing -s 2 may be faster (also see FAQ)\n\n"); + + // calculate objective value + + double v = 0; + int nSV = 0; + for(i=0; i 0) + ++nSV; + } + info("Objective value = %lf\n",v/2); + info("nSV = %d\n",nSV); + + delete [] QD; + delete [] alpha; + delete [] y; + delete [] index; +} + + +// A coordinate descent algorithm for +// L1-loss and L2-loss epsilon-SVR dual problem +// +// min_\beta 0.5\beta^T (Q + diag(lambda)) \beta - p \sum_{i=1}^l|\beta_i| + \sum_{i=1}^l yi\beta_i, +// s.t. -upper_bound_i <= \beta_i <= upper_bound_i, +// +// where Qij = xi^T xj and +// D is a diagonal matrix +// +// In L1-SVM case: +// upper_bound_i = C +// lambda_i = 0 +// In L2-SVM case: +// upper_bound_i = INF +// lambda_i = 1/(2*C) +// +// Given: +// x, y, p, C +// eps is the stopping tolerance +// +// solution will be put in w +// +// See Algorithm 4 of Ho and Lin, 2012 + +#undef GETI +#define GETI(i) (0) +// To support weights for instances, use GETI(i) (i) + +static void solve_l2r_l1l2_svr( + const problem *prob, double *w, const parameter *param, + int solver_type) +{ + int l = prob->l; + double C = param->C; + double p = param->p; + int w_size = prob->n; + double eps = param->eps; + int i, s, iter = 0; + int max_iter = 1000; + int active_size = l; + int *index = new int[l]; + + double d, G, H; + double Gmax_old = INF; + double Gmax_new, Gnorm1_new; + double Gnorm1_init = -1.0; // Gnorm1_init is initialized at the first iteration + double *beta = new double[l]; + double *QD = new double[l]; + double *y = prob->y; + + // L2R_L2LOSS_SVR_DUAL + double lambda[1], upper_bound[1]; + lambda[0] = 0.5/C; + upper_bound[0] = INF; + + if(solver_type == L2R_L1LOSS_SVR_DUAL) + { + lambda[0] = 0; + upper_bound[0] = C; + } + + // Initial beta can be set here. Note that + // -upper_bound <= beta[i] <= upper_bound + for(i=0; ix[i]; + QD[i] = sparse_operator::nrm2_sq(xi); + sparse_operator::axpy(beta[i], xi, w); + + index[i] = i; + } + + + while(iter < max_iter) + { + Gmax_new = 0; + Gnorm1_new = 0; + + for(i=0; ix[i]; + G += sparse_operator::dot(w, xi); + + double Gp = G+p; + double Gn = G-p; + double violation = 0; + if(beta[i] == 0) + { + if(Gp < 0) + violation = -Gp; + else if(Gn > 0) + violation = Gn; + else if(Gp>Gmax_old && Gn<-Gmax_old) + { + active_size--; + swap(index[s], index[active_size]); + s--; + continue; + } + } + else if(beta[i] >= upper_bound[GETI(i)]) + { + if(Gp > 0) + violation = Gp; + else if(Gp < -Gmax_old) + { + active_size--; + swap(index[s], index[active_size]); + s--; + continue; + } + } + else if(beta[i] <= -upper_bound[GETI(i)]) + { + if(Gn < 0) + violation = -Gn; + else if(Gn > Gmax_old) + { + active_size--; + swap(index[s], index[active_size]); + s--; + continue; + } + } + else if(beta[i] > 0) + violation = fabs(Gp); + else + violation = fabs(Gn); + + Gmax_new = max(Gmax_new, violation); + Gnorm1_new += violation; + + // obtain Newton direction d + if(Gp < H*beta[i]) + d = -Gp/H; + else if(Gn > H*beta[i]) + d = -Gn/H; + else + d = -beta[i]; + + if(fabs(d) < 1.0e-12) + continue; + + double beta_old = beta[i]; + beta[i] = min(max(beta[i]+d, -upper_bound[GETI(i)]), upper_bound[GETI(i)]); + d = beta[i]-beta_old; + + if(d != 0) + sparse_operator::axpy(d, xi, w); + } + + if(iter == 0) + Gnorm1_init = Gnorm1_new; + iter++; + if(iter % 10 == 0) + info("."); + + if(Gnorm1_new <= eps*Gnorm1_init) + { + if(active_size == l) + break; + else + { + active_size = l; + info("*"); + Gmax_old = INF; + continue; + } + } + + Gmax_old = Gmax_new; + } + + info("\noptimization finished, #iter = %d\n", iter); + if(iter >= max_iter) + info("\nWARNING: reaching max number of iterations\nUsing -s 11 may be faster\n\n"); + + // calculate objective value + double v = 0; + int nSV = 0; + for(i=0; il; + int w_size = prob->n; + int i, s, iter = 0; + double *xTx = new double[l]; + int max_iter = 1000; + int *index = new int[l]; + double *alpha = new double[2*l]; // store alpha and C - alpha + schar *y = new schar[l]; + int max_inner_iter = 100; // for inner Newton + double innereps = 1e-2; + double innereps_min = min(1e-8, eps); + double upper_bound[3] = {Cn, 0, Cp}; + + for(i=0; iy[i] > 0) + { + y[i] = +1; + } + else + { + y[i] = -1; + } + } + + // Initial alpha can be set here. Note that + // 0 < alpha[i] < upper_bound[GETI(i)] + // alpha[2*i] + alpha[2*i+1] = upper_bound[GETI(i)] + for(i=0; ix[i]; + xTx[i] = sparse_operator::nrm2_sq(xi); + sparse_operator::axpy(y[i]*alpha[2*i], xi, w); + index[i] = i; + } + + while (iter < max_iter) + { + for (i=0; ix[i]; + ywTx = yi*sparse_operator::dot(w, xi); + double a = xisq, b = ywTx; + + // Decide to minimize g_1(z) or g_2(z) + int ind1 = 2*i, ind2 = 2*i+1, sign = 1; + if(0.5*a*(alpha[ind2]-alpha[ind1])+b < 0) + { + ind1 = 2*i+1; + ind2 = 2*i; + sign = -1; + } + + // g_t(z) = z*log(z) + (C-z)*log(C-z) + 0.5a(z-alpha_old)^2 + sign*b(z-alpha_old) + double alpha_old = alpha[ind1]; + double z = alpha_old; + if(C - z < 0.5 * C) + z = 0.1*z; + double gp = a*(z-alpha_old)+sign*b+log(z/(C-z)); + Gmax = max(Gmax, fabs(gp)); + + // Newton method on the sub-problem + const double eta = 0.1; // xi in the paper + int inner_iter = 0; + while (inner_iter <= max_inner_iter) + { + if(fabs(gp) < innereps) + break; + double gpp = a + C/(C-z)/z; + double tmpz = z - gp/gpp; + if(tmpz <= 0) + z *= eta; + else // tmpz in (0, C) + z = tmpz; + gp = a*(z-alpha_old)+sign*b+log(z/(C-z)); + newton_iter++; + inner_iter++; + } + + if(inner_iter > 0) // update w + { + alpha[ind1] = z; + alpha[ind2] = C-z; + sparse_operator::axpy(sign*(z-alpha_old)*yi, xi, w); + } + } + + iter++; + if(iter % 10 == 0) + info("."); + + if(Gmax < eps) + break; + + if(newton_iter <= l/10) + innereps = max(innereps_min, 0.1*innereps); + + } + + info("\noptimization finished, #iter = %d\n",iter); + if (iter >= max_iter) + info("\nWARNING: reaching max number of iterations\nUsing -s 0 may be faster (also see FAQ)\n\n"); + + // calculate objective value + + double v = 0; + for(i=0; il; + int w_size = prob_col->n; + int j, s, iter = 0; + int max_iter = 1000; + int active_size = w_size; + int max_num_linesearch = 20; + + double sigma = 0.01; + double d, G_loss, G, H; + double Gmax_old = INF; + double Gmax_new, Gnorm1_new; + double Gnorm1_init = -1.0; // Gnorm1_init is initialized at the first iteration + double d_old, d_diff; + double loss_old, loss_new; + double appxcond, cond; + + int *index = new int[w_size]; + schar *y = new schar[l]; + double *b = new double[l]; // b = 1-ywTx + double *xj_sq = new double[w_size]; + feature_node *x; + + double C[3] = {Cn,0,Cp}; + + // Initial w can be set here. + for(j=0; jy[j] > 0) + y[j] = 1; + else + y[j] = -1; + } + for(j=0; jx[j]; + while(x->index != -1) + { + int ind = x->index-1; + x->value *= y[ind]; // x->value stores yi*xij + double val = x->value; + b[ind] -= w[j]*val; + xj_sq[j] += C[GETI(ind)]*val*val; + x++; + } + } + + while(iter < max_iter) + { + Gmax_new = 0; + Gnorm1_new = 0; + + for(j=0; jx[j]; + while(x->index != -1) + { + int ind = x->index-1; + if(b[ind] > 0) + { + double val = x->value; + double tmp = C[GETI(ind)]*val; + G_loss -= tmp*b[ind]; + H += tmp*val; + } + x++; + } + G_loss *= 2; + + G = G_loss; + H *= 2; + H = max(H, 1e-12); + + double Gp = G+1; + double Gn = G-1; + double violation = 0; + if(w[j] == 0) + { + if(Gp < 0) + violation = -Gp; + else if(Gn > 0) + violation = Gn; + else if(Gp>Gmax_old/l && Gn<-Gmax_old/l) + { + active_size--; + swap(index[s], index[active_size]); + s--; + continue; + } + } + else if(w[j] > 0) + violation = fabs(Gp); + else + violation = fabs(Gn); + + Gmax_new = max(Gmax_new, violation); + Gnorm1_new += violation; + + // obtain Newton direction d + if(Gp < H*w[j]) + d = -Gp/H; + else if(Gn > H*w[j]) + d = -Gn/H; + else + d = -w[j]; + + if(fabs(d) < 1.0e-12) + continue; + + double delta = fabs(w[j]+d)-fabs(w[j]) + G*d; + d_old = 0; + int num_linesearch; + for(num_linesearch=0; num_linesearch < max_num_linesearch; num_linesearch++) + { + d_diff = d_old - d; + cond = fabs(w[j]+d)-fabs(w[j]) - sigma*delta; + + appxcond = xj_sq[j]*d*d + G_loss*d + cond; + if(appxcond <= 0) + { + x = prob_col->x[j]; + sparse_operator::axpy(d_diff, x, b); + break; + } + + if(num_linesearch == 0) + { + loss_old = 0; + loss_new = 0; + x = prob_col->x[j]; + while(x->index != -1) + { + int ind = x->index-1; + if(b[ind] > 0) + loss_old += C[GETI(ind)]*b[ind]*b[ind]; + double b_new = b[ind] + d_diff*x->value; + b[ind] = b_new; + if(b_new > 0) + loss_new += C[GETI(ind)]*b_new*b_new; + x++; + } + } + else + { + loss_new = 0; + x = prob_col->x[j]; + while(x->index != -1) + { + int ind = x->index-1; + double b_new = b[ind] + d_diff*x->value; + b[ind] = b_new; + if(b_new > 0) + loss_new += C[GETI(ind)]*b_new*b_new; + x++; + } + } + + cond = cond + loss_new - loss_old; + if(cond <= 0) + break; + else + { + d_old = d; + d *= 0.5; + delta *= 0.5; + } + } + + w[j] += d; + + // recompute b[] if line search takes too many steps + if(num_linesearch >= max_num_linesearch) + { + info("#"); + for(int i=0; ix[i]; + sparse_operator::axpy(-w[i], x, b); + } + } + } + + if(iter == 0) + Gnorm1_init = Gnorm1_new; + iter++; + if(iter % 10 == 0) + info("."); + + if(Gnorm1_new <= eps*Gnorm1_init) + { + if(active_size == w_size) + break; + else + { + active_size = w_size; + info("*"); + Gmax_old = INF; + continue; + } + } + + Gmax_old = Gmax_new; + } + + info("\noptimization finished, #iter = %d\n", iter); + if(iter >= max_iter) + info("\nWARNING: reaching max number of iterations\n"); + + // calculate objective value + + double v = 0; + int nnz = 0; + for(j=0; jx[j]; + while(x->index != -1) + { + x->value *= prob_col->y[x->index-1]; // restore x->value + x++; + } + if(w[j] != 0) + { + v += fabs(w[j]); + nnz++; + } + } + for(j=0; j 0) + v += C[GETI(j)]*b[j]*b[j]; + + info("Objective value = %lf\n", v); + info("#nonzeros/#features = %d/%d\n", nnz, w_size); + + delete [] index; + delete [] y; + delete [] b; + delete [] xj_sq; +} + +// A coordinate descent algorithm for +// L1-regularized logistic regression problems +// +// min_w \sum |wj| + C \sum log(1+exp(-yi w^T xi)), +// +// Given: +// x, y, Cp, Cn +// eps is the stopping tolerance +// +// solution will be put in w +// +// See Yuan et al. (2011) and appendix of LIBLINEAR paper, Fan et al. (2008) + +#undef GETI +#define GETI(i) (y[i]+1) +// To support weights for instances, use GETI(i) (i) + +static void solve_l1r_lr( + const problem *prob_col, double *w, double eps, + double Cp, double Cn) +{ + int l = prob_col->l; + int w_size = prob_col->n; + int j, s, newton_iter=0, iter=0; + int max_newton_iter = 100; + int max_iter = 1000; + int max_num_linesearch = 20; + int active_size; + int QP_active_size; + + double nu = 1e-12; + double inner_eps = 1; + double sigma = 0.01; + double w_norm, w_norm_new; + double z, G, H; + double Gnorm1_init = -1.0; // Gnorm1_init is initialized at the first iteration + double Gmax_old = INF; + double Gmax_new, Gnorm1_new; + double QP_Gmax_old = INF; + double QP_Gmax_new, QP_Gnorm1_new; + double delta, negsum_xTd, cond; + + int *index = new int[w_size]; + schar *y = new schar[l]; + double *Hdiag = new double[w_size]; + double *Grad = new double[w_size]; + double *wpd = new double[w_size]; + double *xjneg_sum = new double[w_size]; + double *xTd = new double[l]; + double *exp_wTx = new double[l]; + double *exp_wTx_new = new double[l]; + double *tau = new double[l]; + double *D = new double[l]; + feature_node *x; + + double C[3] = {Cn,0,Cp}; + + // Initial w can be set here. + for(j=0; jy[j] > 0) + y[j] = 1; + else + y[j] = -1; + + exp_wTx[j] = 0; + } + + w_norm = 0; + for(j=0; jx[j]; + while(x->index != -1) + { + int ind = x->index-1; + double val = x->value; + exp_wTx[ind] += w[j]*val; + if(y[ind] == -1) + xjneg_sum[j] += C[GETI(ind)]*val; + x++; + } + } + for(j=0; jx[j]; + while(x->index != -1) + { + int ind = x->index-1; + Hdiag[j] += x->value*x->value*D[ind]; + tmp += x->value*tau[ind]; + x++; + } + Grad[j] = -tmp + xjneg_sum[j]; + + double Gp = Grad[j]+1; + double Gn = Grad[j]-1; + double violation = 0; + if(w[j] == 0) + { + if(Gp < 0) + violation = -Gp; + else if(Gn > 0) + violation = Gn; + //outer-level shrinking + else if(Gp>Gmax_old/l && Gn<-Gmax_old/l) + { + active_size--; + swap(index[s], index[active_size]); + s--; + continue; + } + } + else if(w[j] > 0) + violation = fabs(Gp); + else + violation = fabs(Gn); + + Gmax_new = max(Gmax_new, violation); + Gnorm1_new += violation; + } + + if(newton_iter == 0) + Gnorm1_init = Gnorm1_new; + + if(Gnorm1_new <= eps*Gnorm1_init) + break; + + iter = 0; + QP_Gmax_old = INF; + QP_active_size = active_size; + + for(int i=0; ix[j]; + G = Grad[j] + (wpd[j]-w[j])*nu; + while(x->index != -1) + { + int ind = x->index-1; + G += x->value*D[ind]*xTd[ind]; + x++; + } + + double Gp = G+1; + double Gn = G-1; + double violation = 0; + if(wpd[j] == 0) + { + if(Gp < 0) + violation = -Gp; + else if(Gn > 0) + violation = Gn; + //inner-level shrinking + else if(Gp>QP_Gmax_old/l && Gn<-QP_Gmax_old/l) + { + QP_active_size--; + swap(index[s], index[QP_active_size]); + s--; + continue; + } + } + else if(wpd[j] > 0) + violation = fabs(Gp); + else + violation = fabs(Gn); + + QP_Gmax_new = max(QP_Gmax_new, violation); + QP_Gnorm1_new += violation; + + // obtain solution of one-variable problem + if(Gp < H*wpd[j]) + z = -Gp/H; + else if(Gn > H*wpd[j]) + z = -Gn/H; + else + z = -wpd[j]; + + if(fabs(z) < 1.0e-12) + continue; + z = min(max(z,-10.0),10.0); + + wpd[j] += z; + + x = prob_col->x[j]; + sparse_operator::axpy(z, x, xTd); + } + + iter++; + + if(QP_Gnorm1_new <= inner_eps*Gnorm1_init) + { + //inner stopping + if(QP_active_size == active_size) + break; + //active set reactivation + else + { + QP_active_size = active_size; + QP_Gmax_old = INF; + continue; + } + } + + QP_Gmax_old = QP_Gmax_new; + } + + if(iter >= max_iter) + info("WARNING: reaching max number of inner iterations\n"); + + delta = 0; + w_norm_new = 0; + for(j=0; j= max_num_linesearch) + { + for(int i=0; ix[i]; + sparse_operator::axpy(w[i], x, exp_wTx); + } + + for(int i=0; i= max_newton_iter) + info("WARNING: reaching max number of iterations\n"); + + // calculate objective value + + double v = 0; + int nnz = 0; + for(j=0; jl; + int n = prob->n; + size_t nnz = 0; + size_t *col_ptr = new size_t [n+1]; + feature_node *x_space; + prob_col->l = l; + prob_col->n = n; + prob_col->y = new double[l]; + prob_col->x = new feature_node*[n]; + + for(i=0; iy[i] = prob->y[i]; + + for(i=0; ix[i]; + while(x->index != -1) + { + nnz++; + col_ptr[x->index]++; + x++; + } + } + for(i=1; ix[i] = &x_space[col_ptr[i]]; + + for(i=0; ix[i]; + while(x->index != -1) + { + int ind = x->index-1; + x_space[col_ptr[ind]].index = i+1; // starts from 1 + x_space[col_ptr[ind]].value = x->value; + col_ptr[ind]++; + x++; + } + } + for(i=0; il; + int max_nr_class = 16; + int nr_class = 0; + int *label = Malloc(int,max_nr_class); + int *count = Malloc(int,max_nr_class); + int *data_label = Malloc(int,l); + int i; + + for(i=0;iy[i]; + int j; + for(j=0;jeps; + double eps_cg = 0.1; + if(param->init_sol != NULL) + eps_cg = 0.5; + + int pos = 0; + int neg = 0; + for(int i=0;il;i++) + if(prob->y[i] > 0) + pos++; + neg = prob->l - pos; + double primal_solver_tol = eps*max(min(pos,neg), 1)/prob->l; + + function *fun_obj=NULL; + switch(param->solver_type) + { + case L2R_LR: + { + double *C = new double[prob->l]; + for(int i = 0; i < prob->l; i++) + { + if(prob->y[i] > 0) + C[i] = Cp; + else + C[i] = Cn; + } + fun_obj=new l2r_lr_fun(prob, C); + TRON tron_obj(fun_obj, primal_solver_tol, eps_cg); + tron_obj.set_print_string(liblinear_print_string); + tron_obj.tron(w); + delete fun_obj; + delete[] C; + break; + } + case L2R_L2LOSS_SVC: + { + double *C = new double[prob->l]; + for(int i = 0; i < prob->l; i++) + { + if(prob->y[i] > 0) + C[i] = Cp; + else + C[i] = Cn; + } + fun_obj=new l2r_l2_svc_fun(prob, C); + TRON tron_obj(fun_obj, primal_solver_tol, eps_cg); + tron_obj.set_print_string(liblinear_print_string); + tron_obj.tron(w); + delete fun_obj; + delete[] C; + break; + } + case L2R_L2LOSS_SVC_DUAL: + solve_l2r_l1l2_svc(prob, w, eps, Cp, Cn, L2R_L2LOSS_SVC_DUAL); + break; + case L2R_L1LOSS_SVC_DUAL: + solve_l2r_l1l2_svc(prob, w, eps, Cp, Cn, L2R_L1LOSS_SVC_DUAL); + break; + case L1R_L2LOSS_SVC: + { + problem prob_col; + feature_node *x_space = NULL; + transpose(prob, &x_space ,&prob_col); + solve_l1r_l2_svc(&prob_col, w, primal_solver_tol, Cp, Cn); + delete [] prob_col.y; + delete [] prob_col.x; + delete [] x_space; + break; + } + case L1R_LR: + { + problem prob_col; + feature_node *x_space = NULL; + transpose(prob, &x_space ,&prob_col); + solve_l1r_lr(&prob_col, w, primal_solver_tol, Cp, Cn); + delete [] prob_col.y; + delete [] prob_col.x; + delete [] x_space; + break; + } + case L2R_LR_DUAL: + solve_l2r_lr_dual(prob, w, eps, Cp, Cn); + break; + case L2R_L2LOSS_SVR: + { + double *C = new double[prob->l]; + for(int i = 0; i < prob->l; i++) + C[i] = param->C; + + fun_obj=new l2r_l2_svr_fun(prob, C, param->p); + TRON tron_obj(fun_obj, param->eps); + tron_obj.set_print_string(liblinear_print_string); + tron_obj.tron(w); + delete fun_obj; + delete[] C; + break; + + } + case L2R_L1LOSS_SVR_DUAL: + solve_l2r_l1l2_svr(prob, w, param, L2R_L1LOSS_SVR_DUAL); + break; + case L2R_L2LOSS_SVR_DUAL: + solve_l2r_l1l2_svr(prob, w, param, L2R_L2LOSS_SVR_DUAL); + break; + default: + fprintf(stderr, "ERROR: unknown solver_type\n"); + break; + } +} + +// Calculate the initial C for parameter selection +static double calc_start_C(const problem *prob, const parameter *param) +{ + int i; + double xTx,max_xTx; + max_xTx = 0; + for(i=0; il; i++) + { + xTx = 0; + feature_node *xi=prob->x[i]; + while(xi->index != -1) + { + double val = xi->value; + xTx += val*val; + xi++; + } + if(xTx > max_xTx) + max_xTx = xTx; + } + + double min_C = 1.0; + if(param->solver_type == L2R_LR) + min_C = 1.0 / (prob->l * max_xTx); + else if(param->solver_type == L2R_L2LOSS_SVC) + min_C = 1.0 / (2 * prob->l * max_xTx); + + return pow( 2, floor(log(min_C) / log(2.0)) ); +} + + +// +// Interface functions +// +model* train(const problem *prob, const parameter *param) +{ + int i,j; + int l = prob->l; + int n = prob->n; + int w_size = prob->n; + model *model_ = Malloc(model,1); + + if(prob->bias>=0) + model_->nr_feature=n-1; + else + model_->nr_feature=n; + model_->param = *param; + model_->bias = prob->bias; + + if(check_regression_model(model_)) + { + model_->w = Malloc(double, w_size); + for(i=0; iw[i] = 0; + model_->nr_class = 2; + model_->label = NULL; + train_one(prob, param, model_->w, 0, 0); + } + else + { + int nr_class; + int *label = NULL; + int *start = NULL; + int *count = NULL; + int *perm = Malloc(int,l); + + // group training data of the same class + group_classes(prob,&nr_class,&label,&start,&count,perm); + + model_->nr_class=nr_class; + model_->label = Malloc(int,nr_class); + for(i=0;ilabel[i] = label[i]; + + // calculate weighted C + double *weighted_C = Malloc(double, nr_class); + for(i=0;iC; + for(i=0;inr_weight;i++) + { + for(j=0;jweight_label[i] == label[j]) + break; + if(j == nr_class) + fprintf(stderr,"WARNING: class label %d specified in weight is not found\n", param->weight_label[i]); + else + weighted_C[j] *= param->weight[i]; + } + + // constructing the subproblem + feature_node **x = Malloc(feature_node *,l); + for(i=0;ix[perm[i]]; + + int k; + problem sub_prob; + sub_prob.l = l; + sub_prob.n = n; + sub_prob.x = Malloc(feature_node *,sub_prob.l); + sub_prob.y = Malloc(double,sub_prob.l); + + for(k=0; ksolver_type == MCSVM_CS) + { + model_->w=Malloc(double, n*nr_class); + for(i=0;ieps); + Solver.Solve(model_->w); + } + else + { + if(nr_class == 2) + { + model_->w=Malloc(double, w_size); + + int e0 = start[0]+count[0]; + k=0; + for(; kinit_sol != NULL) + for(i=0;iw[i] = param->init_sol[i]; + else + for(i=0;iw[i] = 0; + + train_one(&sub_prob, param, model_->w, weighted_C[0], weighted_C[1]); + } + else + { + model_->w=Malloc(double, w_size*nr_class); + double *w=Malloc(double, w_size); + for(i=0;iinit_sol != NULL) + for(j=0;jinit_sol[j*nr_class+i]; + else + for(j=0;jC); + + for(j=0;jw[j*nr_class+i] = w[j]; + } + free(w); + } + + } + + free(x); + free(label); + free(start); + free(count); + free(perm); + free(sub_prob.x); + free(sub_prob.y); + free(weighted_C); + } + return model_; +} + +void cross_validation(const problem *prob, const parameter *param, int nr_fold, double *target) +{ + int i; + int *fold_start; + int l = prob->l; + int *perm = Malloc(int,l); + if (nr_fold > l) + { + nr_fold = l; + fprintf(stderr,"WARNING: # folds > # data. Will use # folds = # data instead (i.e., leave-one-out cross validation)\n"); + } + fold_start = Malloc(int,nr_fold+1); + for(i=0;ibias; + subprob.n = prob->n; + subprob.l = l-(end-begin); + subprob.x = Malloc(struct feature_node*,subprob.l); + subprob.y = Malloc(double,subprob.l); + + k=0; + for(j=0;jx[perm[j]]; + subprob.y[k] = prob->y[perm[j]]; + ++k; + } + for(j=end;jx[perm[j]]; + subprob.y[k] = prob->y[perm[j]]; + ++k; + } + struct model *submodel = train(&subprob,param); + for(j=begin;jx[perm[j]]); + free_and_destroy_model(&submodel); + free(subprob.x); + free(subprob.y); + } + free(fold_start); + free(perm); +} + +void find_parameter_C(const problem *prob, const parameter *param, int nr_fold, double start_C, double max_C, double *best_C, double *best_rate) +{ + // variables for CV + int i; + int *fold_start; + int l = prob->l; + int *perm = Malloc(int, l); + double *target = Malloc(double, prob->l); + struct problem *subprob = Malloc(problem,nr_fold); + + // variables for warm start + double ratio = 2; + double **prev_w = Malloc(double*, nr_fold); + for(i = 0; i < nr_fold; i++) + prev_w[i] = NULL; + int num_unchanged_w = 0; + struct parameter param1 = *param; + void (*default_print_string) (const char *) = liblinear_print_string; + + if (nr_fold > l) + { + nr_fold = l; + fprintf(stderr,"WARNING: # folds > # data. Will use # folds = # data instead (i.e., leave-one-out cross validation)\n"); + } + fold_start = Malloc(int,nr_fold+1); + for(i=0;ibias; + subprob[i].n = prob->n; + subprob[i].l = l-(end-begin); + subprob[i].x = Malloc(struct feature_node*,subprob[i].l); + subprob[i].y = Malloc(double,subprob[i].l); + + k=0; + for(j=0;jx[perm[j]]; + subprob[i].y[k] = prob->y[perm[j]]; + ++k; + } + for(j=end;jx[perm[j]]; + subprob[i].y[k] = prob->y[perm[j]]; + ++k; + } + + } + + *best_rate = 0; + if(start_C <= 0) + start_C = calc_start_C(prob,param); + param1.C = start_C; + + while(param1.C <= max_C) + { + //Output disabled for running CV at a particular C + set_print_string_function(&print_null); + + for(i=0; inr_class == 2) + total_w_size = subprob[i].n; + else + total_w_size = subprob[i].n * submodel->nr_class; + + if(prev_w[i] == NULL) + { + prev_w[i] = Malloc(double, total_w_size); + for(j=0; jw[j]; + } + else if(num_unchanged_w >= 0) + { + double norm_w_diff = 0; + for(j=0; jw[j] - prev_w[i][j])*(submodel->w[j] - prev_w[i][j]); + prev_w[i][j] = submodel->w[j]; + } + norm_w_diff = sqrt(norm_w_diff); + + if(norm_w_diff > 1e-15) + num_unchanged_w = -1; + } + else + { + for(j=0; jw[j]; + } + + for(j=begin; jx[perm[j]]); + + free_and_destroy_model(&submodel); + } + set_print_string_function(default_print_string); + + int total_correct = 0; + for(i=0; il; i++) + if(target[i] == prob->y[i]) + ++total_correct; + double current_rate = (double)total_correct/prob->l; + if(current_rate > *best_rate) + { + *best_C = param1.C; + *best_rate = current_rate; + } + + info("log2c=%7.2f\trate=%g\n",log(param1.C)/log(2.0),100.0*current_rate); + num_unchanged_w++; + if(num_unchanged_w == 3) + break; + param1.C = param1.C*ratio; + } + + if(param1.C > max_C && max_C > start_C) + info("warning: maximum C reached.\n"); + free(fold_start); + free(perm); + free(target); + for(i=0; ibias>=0) + n=model_->nr_feature+1; + else + n=model_->nr_feature; + double *w=model_->w; + int nr_class=model_->nr_class; + int i; + int nr_w; + if(nr_class==2 && model_->param.solver_type != MCSVM_CS) + nr_w = 1; + else + nr_w = nr_class; + + const feature_node *lx=x; + for(i=0;iindex)!=-1; lx++) + { + // the dimension of testing data may exceed that of training + if(idx<=n) + for(i=0;ivalue; + } + + if(nr_class==2) + { + if(check_regression_model(model_)) + return dec_values[0]; + else + return (dec_values[0]>0)?model_->label[0]:model_->label[1]; + } + else + { + int dec_max_idx = 0; + for(i=1;i dec_values[dec_max_idx]) + dec_max_idx = i; + } + return model_->label[dec_max_idx]; + } +} + +double predict(const model *model_, const feature_node *x) +{ + double *dec_values = Malloc(double, model_->nr_class); + double label=predict_values(model_, x, dec_values); + free(dec_values); + return label; +} + +double predict_probability(const struct model *model_, const struct feature_node *x, double* prob_estimates) +{ + if(check_probability_model(model_)) + { + int i; + int nr_class=model_->nr_class; + int nr_w; + if(nr_class==2) + nr_w = 1; + else + nr_w = nr_class; + + double label=predict_values(model_, x, prob_estimates); + for(i=0;inr_feature; + int n; + const parameter& param = model_->param; + + if(model_->bias>=0) + n=nr_feature+1; + else + n=nr_feature; + int w_size = n; + FILE *fp = fopen(model_file_name,"w"); + if(fp==NULL) return -1; + + char *old_locale = setlocale(LC_ALL, NULL); + if (old_locale) + { + old_locale = strdup(old_locale); + } + setlocale(LC_ALL, "C"); + + int nr_w; + if(model_->nr_class==2 && model_->param.solver_type != MCSVM_CS) + nr_w=1; + else + nr_w=model_->nr_class; + + fprintf(fp, "solver_type %s\n", solver_type_table[param.solver_type]); + fprintf(fp, "nr_class %d\n", model_->nr_class); + + if(model_->label) + { + fprintf(fp, "label"); + for(i=0; inr_class; i++) + fprintf(fp, " %d", model_->label[i]); + fprintf(fp, "\n"); + } + + fprintf(fp, "nr_feature %d\n", nr_feature); + + fprintf(fp, "bias %.17g\n", model_->bias); + + fprintf(fp, "w\n"); + for(i=0; iw[i*nr_w+j]); + fprintf(fp, "\n"); + } + + setlocale(LC_ALL, old_locale); + free(old_locale); + + if (ferror(fp) != 0 || fclose(fp) != 0) return -1; + else return 0; +} + +// +// FSCANF helps to handle fscanf failures. +// Its do-while block avoids the ambiguity when +// if (...) +// FSCANF(); +// is used +// +#define FSCANF(_stream, _format, _var)do\ +{\ + if (fscanf(_stream, _format, _var) != 1)\ + {\ + fprintf(stderr, "ERROR: fscanf failed to read the model\n");\ + EXIT_LOAD_MODEL()\ + }\ +}while(0) +// EXIT_LOAD_MODEL should NOT end with a semicolon. +#define EXIT_LOAD_MODEL()\ +{\ + setlocale(LC_ALL, old_locale);\ + free(model_->label);\ + free(model_);\ + free(old_locale);\ + return NULL;\ +} +struct model *load_model(const char *model_file_name) +{ + FILE *fp = fopen(model_file_name,"r"); + if(fp==NULL) return NULL; + + int i; + int nr_feature; + int n; + int nr_class; + double bias; + model *model_ = Malloc(model,1); + parameter& param = model_->param; + // parameters for training only won't be assigned, but arrays are assigned as NULL for safety + param.nr_weight = 0; + param.weight_label = NULL; + param.weight = NULL; + param.init_sol = NULL; + + model_->label = NULL; + + char *old_locale = setlocale(LC_ALL, NULL); + if (old_locale) + { + old_locale = strdup(old_locale); + } + setlocale(LC_ALL, "C"); + + char cmd[81]; + while(1) + { + FSCANF(fp,"%80s",cmd); + if(strcmp(cmd,"solver_type")==0) + { + FSCANF(fp,"%80s",cmd); + int i; + for(i=0;solver_type_table[i];i++) + { + if(strcmp(solver_type_table[i],cmd)==0) + { + param.solver_type=i; + break; + } + } + if(solver_type_table[i] == NULL) + { + fprintf(stderr,"unknown solver type.\n"); + EXIT_LOAD_MODEL() + } + } + else if(strcmp(cmd,"nr_class")==0) + { + FSCANF(fp,"%d",&nr_class); + model_->nr_class=nr_class; + } + else if(strcmp(cmd,"nr_feature")==0) + { + FSCANF(fp,"%d",&nr_feature); + model_->nr_feature=nr_feature; + } + else if(strcmp(cmd,"bias")==0) + { + FSCANF(fp,"%lf",&bias); + model_->bias=bias; + } + else if(strcmp(cmd,"w")==0) + { + break; + } + else if(strcmp(cmd,"label")==0) + { + int nr_class = model_->nr_class; + model_->label = Malloc(int,nr_class); + for(int i=0;ilabel[i]); + } + else + { + fprintf(stderr,"unknown text in model file: [%s]\n",cmd); + EXIT_LOAD_MODEL() + } + } + + nr_feature=model_->nr_feature; + if(model_->bias>=0) + n=nr_feature+1; + else + n=nr_feature; + int w_size = n; + int nr_w; + if(nr_class==2 && param.solver_type != MCSVM_CS) + nr_w = 1; + else + nr_w = nr_class; + + model_->w=Malloc(double, w_size*nr_w); + for(i=0; iw[i*nr_w+j]); + } + + setlocale(LC_ALL, old_locale); + free(old_locale); + + if (ferror(fp) != 0 || fclose(fp) != 0) return NULL; + + return model_; +} + +int get_nr_feature(const model *model_) +{ + return model_->nr_feature; +} + +int get_nr_class(const model *model_) +{ + return model_->nr_class; +} + +void get_labels(const model *model_, int* label) +{ + if (model_->label != NULL) + for(int i=0;inr_class;i++) + label[i] = model_->label[i]; +} + +// use inline here for better performance (around 20% faster than the non-inline one) +static inline double get_w_value(const struct model *model_, int idx, int label_idx) +{ + int nr_class = model_->nr_class; + int solver_type = model_->param.solver_type; + const double *w = model_->w; + + if(idx < 0 || idx > model_->nr_feature) + return 0; + if(check_regression_model(model_)) + return w[idx]; + else + { + if(label_idx < 0 || label_idx >= nr_class) + return 0; + if(nr_class == 2 && solver_type != MCSVM_CS) + { + if(label_idx == 0) + return w[idx]; + else + return -w[idx]; + } + else + return w[idx*nr_class+label_idx]; + } +} + +// feat_idx: starting from 1 to nr_feature +// label_idx: starting from 0 to nr_class-1 for classification models; +// for regression models, label_idx is ignored. +double get_decfun_coef(const struct model *model_, int feat_idx, int label_idx) +{ + if(feat_idx > model_->nr_feature) + return 0; + return get_w_value(model_, feat_idx-1, label_idx); +} + +double get_decfun_bias(const struct model *model_, int label_idx) +{ + int bias_idx = model_->nr_feature; + double bias = model_->bias; + if(bias <= 0) + return 0; + else + return bias*get_w_value(model_, bias_idx, label_idx); +} + +void free_model_content(struct model *model_ptr) +{ + if(model_ptr->w != NULL) + free(model_ptr->w); + if(model_ptr->label != NULL) + free(model_ptr->label); +} + +void free_and_destroy_model(struct model **model_ptr_ptr) +{ + struct model *model_ptr = *model_ptr_ptr; + if(model_ptr != NULL) + { + free_model_content(model_ptr); + free(model_ptr); + } +} + +void destroy_param(parameter* param) +{ + if(param->weight_label != NULL) + free(param->weight_label); + if(param->weight != NULL) + free(param->weight); + if(param->init_sol != NULL) + free(param->init_sol); +} + +const char *check_parameter(const problem*, const parameter *param) +{ + if(param->eps <= 0) + return "eps <= 0"; + + if(param->C <= 0) + return "C <= 0"; + + if(param->p < 0) + return "p < 0"; + + if(param->solver_type != L2R_LR + && param->solver_type != L2R_L2LOSS_SVC_DUAL + && param->solver_type != L2R_L2LOSS_SVC + && param->solver_type != L2R_L1LOSS_SVC_DUAL + && param->solver_type != MCSVM_CS + && param->solver_type != L1R_L2LOSS_SVC + && param->solver_type != L1R_LR + && param->solver_type != L2R_LR_DUAL + && param->solver_type != L2R_L2LOSS_SVR + && param->solver_type != L2R_L2LOSS_SVR_DUAL + && param->solver_type != L2R_L1LOSS_SVR_DUAL) + return "unknown solver type"; + + if(param->init_sol != NULL + && param->solver_type != L2R_LR && param->solver_type != L2R_L2LOSS_SVC) + return "Initial-solution specification supported only for solver L2R_LR and L2R_L2LOSS_SVC"; + + return NULL; +} + +int check_probability_model(const struct model *model_) +{ + return (model_->param.solver_type==L2R_LR || + model_->param.solver_type==L2R_LR_DUAL || + model_->param.solver_type==L1R_LR); +} + +int check_regression_model(const struct model *model_) +{ + return (model_->param.solver_type==L2R_L2LOSS_SVR || + model_->param.solver_type==L2R_L1LOSS_SVR_DUAL || + model_->param.solver_type==L2R_L2LOSS_SVR_DUAL); +} + +void set_print_string_function(void (*print_func)(const char*)) +{ + if (print_func == NULL) + liblinear_print_string = &print_string_stdout; + else + liblinear_print_string = print_func; +} + diff --git a/src/linear.h b/src/linear.h new file mode 100644 index 0000000..8f2f67e --- /dev/null +++ b/src/linear.h @@ -0,0 +1,121 @@ +/* + This code was extracted from liblinear-2.2.1 in Feb 2019 and + modified for the use with Octave and Matlab + + +Copyright (c) 2007-2019 The LIBLINEAR Project. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither name of copyright holders nor the names of its contributors +may be used to endorse or promote products derived from this software +without specific prior written permission. + + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR +CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +#ifndef _LIBLINEAR_H +#define _LIBLINEAR_H + +#define LIBLINEAR_VERSION 221 + +#ifdef __cplusplus +extern "C" { +#endif + +extern int liblinear_version; + +struct feature_node +{ + int index; + double value; +}; + +struct problem +{ + int l, n; + double *y; + struct feature_node **x; + double bias; /* < 0 if no bias term */ +}; + +enum { L2R_LR, L2R_L2LOSS_SVC_DUAL, L2R_L2LOSS_SVC, L2R_L1LOSS_SVC_DUAL, MCSVM_CS, L1R_L2LOSS_SVC, L1R_LR, L2R_LR_DUAL, L2R_L2LOSS_SVR = 11, L2R_L2LOSS_SVR_DUAL, L2R_L1LOSS_SVR_DUAL }; /* solver_type */ + +struct parameter +{ + int solver_type; + + /* these are for training only */ + double eps; /* stopping criteria */ + double C; + int nr_weight; + int *weight_label; + double* weight; + double p; + double *init_sol; +}; + +struct model +{ + struct parameter param; + int nr_class; /* number of classes */ + int nr_feature; + double *w; + int *label; /* label of each class */ + double bias; +}; + +struct model* train(const struct problem *prob, const struct parameter *param); +void cross_validation(const struct problem *prob, const struct parameter *param, int nr_fold, double *target); +void find_parameter_C(const struct problem *prob, const struct parameter *param, int nr_fold, double start_C, double max_C, double *best_C, double *best_rate); + +double predict_values(const struct model *model_, const struct feature_node *x, double* dec_values); +double predict(const struct model *model_, const struct feature_node *x); +double predict_probability(const struct model *model_, const struct feature_node *x, double* prob_estimates); + +int save_model(const char *model_file_name, const struct model *model_); +struct model *load_model(const char *model_file_name); + +int get_nr_feature(const struct model *model_); +int get_nr_class(const struct model *model_); +void get_labels(const struct model *model_, int* label); +double get_decfun_coef(const struct model *model_, int feat_idx, int label_idx); +double get_decfun_bias(const struct model *model_, int label_idx); + +void free_model_content(struct model *model_ptr); +void free_and_destroy_model(struct model **model_ptr_ptr); +void destroy_param(struct parameter *param); + +const char *check_parameter(const struct problem *prob, const struct parameter *param); +int check_probability_model(const struct model *model); +int check_regression_model(const struct model *model); +void set_print_string_function(void (*print_func) (const char*)); + +#ifdef __cplusplus +} +#endif + +#endif /* _LIBLINEAR_H */ + diff --git a/src/linear_model_matlab.c b/src/linear_model_matlab.c new file mode 100644 index 0000000..8d5b7ec --- /dev/null +++ b/src/linear_model_matlab.c @@ -0,0 +1,212 @@ +/* + This code was extracted from liblinear-2.2.1 in Feb 2019 and + modified for the use with Octave and Matlab + +Copyright (c) 2007-2019 The LIBLINEAR Project. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither name of copyright holders nor the names of its contributors +may be used to endorse or promote products derived from this software +without specific prior written permission. + + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR +CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +#include +#include +#include "linear.h" +#include "linear_model_matlab.h" + +#ifdef MX_API_VER +#if MX_API_VER < 0x07030000 +typedef int mwIndex; +#endif +#endif + +#define Malloc(type,n) (type *)malloc((n)*sizeof(type)) + +#define NUM_OF_RETURN_FIELD 6 + +static const char *field_names[] = { + "Parameters", + "nr_class", + "nr_feature", + "bias", + "Label", + "w", +}; + +const char *model_to_matlab_structure(mxArray *plhs[], struct model *model_) +{ + int i; + int nr_w; + double *ptr; + mxArray *return_model, **rhs; + int out_id = 0; + int n, w_size; + + rhs = (mxArray **)mxMalloc(sizeof(mxArray *)*NUM_OF_RETURN_FIELD); + + // Parameters + // for now, only solver_type is needed + rhs[out_id] = mxCreateDoubleMatrix(1, 1, mxREAL); + ptr = mxGetPr(rhs[out_id]); + ptr[0] = model_->param.solver_type; + out_id++; + + // nr_class + rhs[out_id] = mxCreateDoubleMatrix(1, 1, mxREAL); + ptr = mxGetPr(rhs[out_id]); + ptr[0] = model_->nr_class; + out_id++; + + if(model_->nr_class==2 && model_->param.solver_type != MCSVM_CS) + nr_w=1; + else + nr_w=model_->nr_class; + + // nr_feature + rhs[out_id] = mxCreateDoubleMatrix(1, 1, mxREAL); + ptr = mxGetPr(rhs[out_id]); + ptr[0] = model_->nr_feature; + out_id++; + + // bias + rhs[out_id] = mxCreateDoubleMatrix(1, 1, mxREAL); + ptr = mxGetPr(rhs[out_id]); + ptr[0] = model_->bias; + out_id++; + + if(model_->bias>=0) + n=model_->nr_feature+1; + else + n=model_->nr_feature; + + w_size = n; + // Label + if(model_->label) + { + rhs[out_id] = mxCreateDoubleMatrix(model_->nr_class, 1, mxREAL); + ptr = mxGetPr(rhs[out_id]); + for(i = 0; i < model_->nr_class; i++) + ptr[i] = model_->label[i]; + } + else + rhs[out_id] = mxCreateDoubleMatrix(0, 0, mxREAL); + out_id++; + + // w + rhs[out_id] = mxCreateDoubleMatrix(nr_w, w_size, mxREAL); + ptr = mxGetPr(rhs[out_id]); + for(i = 0; i < w_size*nr_w; i++) + ptr[i]=model_->w[i]; + out_id++; + + /* Create a struct matrix contains NUM_OF_RETURN_FIELD fields */ + return_model = mxCreateStructMatrix(1, 1, NUM_OF_RETURN_FIELD, field_names); + + /* Fill struct matrix with input arguments */ + for(i = 0; i < NUM_OF_RETURN_FIELD; i++) + mxSetField(return_model,0,field_names[i],mxDuplicateArray(rhs[i])); + /* return */ + plhs[0] = return_model; + mxFree(rhs); + + return NULL; +} + +const char *matlab_matrix_to_model(struct model *model_, const mxArray *matlab_struct) +{ + int i, num_of_fields; + int nr_w; + double *ptr; + int id = 0; + int n, w_size; + mxArray **rhs; + + num_of_fields = mxGetNumberOfFields(matlab_struct); + rhs = (mxArray **) mxMalloc(sizeof(mxArray *)*num_of_fields); + + for(i=0;inr_class=0; + nr_w=0; + model_->nr_feature=0; + model_->w=NULL; + model_->label=NULL; + + // Parameters + ptr = mxGetPr(rhs[id]); + model_->param.solver_type = (int)ptr[0]; + id++; + + // nr_class + ptr = mxGetPr(rhs[id]); + model_->nr_class = (int)ptr[0]; + id++; + + if(model_->nr_class==2 && model_->param.solver_type != MCSVM_CS) + nr_w=1; + else + nr_w=model_->nr_class; + + // nr_feature + ptr = mxGetPr(rhs[id]); + model_->nr_feature = (int)ptr[0]; + id++; + + // bias + ptr = mxGetPr(rhs[id]); + model_->bias = ptr[0]; + id++; + + if(model_->bias>=0) + n=model_->nr_feature+1; + else + n=model_->nr_feature; + w_size = n; + + // Label + if(mxIsEmpty(rhs[id]) == 0) + { + model_->label = Malloc(int, model_->nr_class); + ptr = mxGetPr(rhs[id]); + for(i=0;inr_class;i++) + model_->label[i] = (int)ptr[i]; + } + id++; + + ptr = mxGetPr(rhs[id]); + model_->w=Malloc(double, w_size*nr_w); + for(i = 0; i < w_size*nr_w; i++) + model_->w[i]=ptr[i]; + id++; + mxFree(rhs); + + return NULL; +} + diff --git a/src/linear_model_matlab.h b/src/linear_model_matlab.h new file mode 100644 index 0000000..7acfec0 --- /dev/null +++ b/src/linear_model_matlab.h @@ -0,0 +1,50 @@ +/* + This code was extracted from liblinear-2.2.1 in Feb 2019 and + modified for the use with Octave and Matlab + +Copyright (c) 2007-2019 The LIBLINEAR Project. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither name of copyright holders nor the names of its contributors +may be used to endorse or promote products derived from this software +without specific prior written permission. + + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR +CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +#include "mex.h" + +#ifdef __cplusplus +extern "C" { +#endif + +const char *model_to_matlab_structure(mxArray *plhs[], struct model *model_); +const char *matlab_matrix_to_model(struct model *model_, const mxArray *matlab_struct); + +#ifdef __cplusplus +} +#endif + diff --git a/src/make.m b/src/make.m new file mode 100644 index 0000000..3dc20e5 --- /dev/null +++ b/src/make.m @@ -0,0 +1,76 @@ +function make(arg1) +% This make.m is used for Matlab under Windows + +% Copyright (C) 2010,2011 by Alois Schloegl +% This function is part of the NaN-toolbox +% http://pub.ist.ac.at/~schloegl/matlab/NaN/ + + +% This program is free software; you can redistribute it and/or modify +% it under the terms of the GNU General Public License as published by +% the Free Software Foundation; either version 3 of the License, or +% (at your option) any later version. +% +% This program is distributed in the hope that it will be useful, +% but WITHOUT ANY WARRANTY; without even the implied warranty of +% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +% GNU General Public License for more details. +% +% You should have received a copy of the GNU General Public License +% along with this program; If not, see . + +% add -largeArrayDims on 64-bit machines + +if (nargin>0 && strcmp(arg1,'clean')), + if strcmp(computer,'PCWIN') + dos('del *.obj'); + dos('del *.mex*'); + else + unix('rm *.o'); + unix('rm *.mex*'); + end; + return; +end; + +mex covm_mex.cpp +mex sumskipnan_mex.cpp +mex histo_mex.cpp +mex kth_element.cpp +mex str2array.cpp +mex xptopen.cpp +mex -c svm.cpp +mex -c svm_model_matlab.c +mex -c tron.cpp +mex -c linear.cpp +mex -c linear_model_matlab.c +if strcmp(computer,'PCWIN') && ~exist('OCTAVE_VERSION','builtin'), + mex svmtrain_mex.cpp svm.obj svm_model_matlab.obj + mex svmpredict_mex.cpp svm.obj svm_model_matlab.obj + + if ~exist('LAPACK/daxpy.f','file') || ~exist('LAPACK/ddot.f','file') || ~exist('LAPACK/dscal.f','file') || ~exist('LAPACK/dnrm2.f','file'), + fprintf(1,'The lapack functions daxpy, ddot, dscal, and dnrm2 are required.\n'); + fprintf(1,'If some functions are missing, get them from here:\n'); + if ~exist('LAPACK','dir') mkdir('LAPACK'); end; + fprintf(1,'Get http://www.netlib.org/blas/daxpy.f and save to %s',fullfile(pwd,'LAPACK')); + fprintf(1,'Get http://www.netlib.org/blas/ddot.f and save to %s',fullfile(pwd,'LAPACK')); + fprintf(1,'Get http://www.netlib.org/blas/dscal.f and save to %s',fullfile(pwd,'LAPACK')); + fprintf(1,'Get http://www.netlib.org/blas/dnrm2.f and save to %s',fullfile(pwd,'LAPACK')); + fprintf(1,'Press any key to continue ... '\n); + pause; + end; + mex -c LAPACK/daxpy.f + mex -c LAPACK/ddot.f + mex -c LAPACK/dscal.f + mex -c LAPACK/dnrm2.f + dos('copy train.c train.cpp'); + mex('train.cpp','tron.obj','linear.obj','linear_model_matlab.obj','daxpy.obj','ddot.obj','dscal.obj','dnrm2.obj') + dos('del *.obj'); + +else + mex svmtrain_mex.cpp svm.o svm_model_matlab.o + mex svmpredict_mex.cpp svm.o svm_model_matlab.o + unix('cp train.c train.cpp'); + mex train.cpp tron.o linear.o linear_model_matlab.o + unix('rm *.o'); +end + diff --git a/src/mexTF.c b/src/mexTF.c new file mode 100644 index 0000000..a5f513c --- /dev/null +++ b/src/mexTF.c @@ -0,0 +1,285 @@ +/* + + Copyright (C) 2019 Alois Schloegl + This file is part of the "BioSig for C/C++" repository + (biosig4c++) at http://biosig.sf.net/ + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; either version 3 + of the License, or (at your option) any later version. + + +References: + https://stackoverflow.com/questions/44378764/hello-tensorflow-using-the-c-api + https://stackoverflow.com/questions/41688217/how-to-load-a-graph-with-tensorflow-so-and-c-api-h-in-c-language + https://tebesu.github.io/posts/Training-a-TensorFlow-graph-in-C++-API + */ + + +#include +#include +#include +#include +#include "mex.h" +//#include "matrix.h" + + +#ifdef tmwtypes_h + #if (MX_API_VER<=0x07020000) + typedef int mwSize; + #endif +#endif + + +TF_Buffer* read_file(const char* file); + +void free_buffer(void* data, size_t length) { + free(data); +} + +TF_Buffer* read_file(const char* file) { + FILE *f = fopen(file, "rb"); + if (f==NULL) return NULL; + fseek(f, 0, SEEK_END); + long fsize = ftell(f); + fseek(f, 0, SEEK_SET); //same as rewind(f); + + void* data = malloc(fsize); + fread(data, fsize, 1, f); + fclose(f); + + TF_Buffer* buf = TF_NewBuffer(); + buf->data = data; + buf->length = fsize; + buf->data_deallocator = free_buffer; + return buf; +} + + +void deallocTFData(void* data, size_t len, void* arg) { + return; +}; + +void mexFunction( + int nlhs, /* number of expected outputs */ + mxArray *plhs[], /* array of pointers to output arguments */ + int nrhs, /* number of inputs */ + const mxArray *prhs[] /* array of pointers to input arguments */ +) + +{ + const mxArray *arg; + TF_Buffer* graph_def = NULL; + TF_Tensor * tensor = NULL; + + if (nrhs<1) { + mexPrintf("mexTF (mexTensorflow) is in a very experimental state.\n"); + mexPrintf(" Usage of mexTF:\n"); + mexPrintf("\tv = mexTF()\n\t\treturns tensorflow version\n"); + mexPrintf("\t[v, graph_def] = mexTF('graph_def')\n\t\treads graph definition file\n"); + mexPrintf("\t[v, graph_def2] = mexTF(graph_def)\n\t\treads graph definition\n"); + mexPrintf("\t[v, graph_def2, class] = mexTF(graph_def, data)\n\t\treads graph definition\n"); + mexPrintf(" Input:\n"); + mexPrintf(" Output:\nTensorflow version\n"); + } + + mexPrintf("%s line %d: %d %d\n",__FILE__,__LINE__,nrhs,nlhs); + for (int k = 0; k < nrhs; k++) { + arg = prhs[k]; + mxClassID argtype = mxGetClassID(arg); + + if (mxIsEmpty(arg) && (k>0)) { + mexPrintf("%s line %d\n",__FILE__,__LINE__); + } + + else if ( mxIsChar(arg) && (k==0) ) { + mexPrintf("%s line %d\n",__FILE__,__LINE__); + char *tmp = mxArrayToString(arg); + graph_def = read_file(tmp); + mxFree(tmp); + } + + else if ( ((argtype==mxINT8_CLASS) || (argtype==mxUINT8_CLASS)) && (k==0) ) { + mexPrintf("%s line %d\n",__FILE__,__LINE__); + if (!graph_def) { + graph_def = TF_NewBuffer(); + graph_def->data = mxGetData(arg); + graph_def->length = mxGetNumberOfElements(arg); + graph_def->data_deallocator = NULL; + }; + } + + else if ( (k==1) && mxIsNumeric(arg) ) { + mexPrintf("%s line %d\n",__FILE__,__LINE__); + TF_DataType tf_type; + + mxClassID typ = mxGetClassID(arg); + switch (argtype) { + case mxDOUBLE_CLASS: + tf_type = TF_DOUBLE; + break; + case mxSINGLE_CLASS: + tf_type = TF_FLOAT; + break; + + case mxINT64_CLASS: + tf_type = TF_INT64; + break; + case mxINT32_CLASS: + tf_type = TF_INT32; + break; + case mxINT16_CLASS: + tf_type = TF_INT16; + break; + case mxINT8_CLASS: + tf_type = TF_INT8; + break; + + case mxUINT64_CLASS: + tf_type = TF_UINT64; + break; + case mxUINT32_CLASS: + tf_type = TF_UINT32; + break; + case mxUINT16_CLASS: + tf_type = TF_UINT16; + break; + case mxUINT8_CLASS: + tf_type = TF_UINT8; + break; + + default: + mexPrintf("Error: data type %s of arg1 not supported\n",mxGetClassName(arg)); + return; + ; + } + + int ndims = mxGetNumberOfDimensions(arg); + int64_t *dims = calloc(ndims, sizeof(int64_t)); + for (int k=0; k < ndims; k++) { + dims[k] = *(mxGetDimensions(arg) + k); + mexPrintf("%s line %d: dim[%d]= %d \n", __FILE__, __LINE__, k, dims[k]); + } + + mexPrintf("%s line %d: going to converted to tensor [%d,%d,%d] \n", __FILE__, __LINE__, ndims, mxGetNumberOfElements(arg), TF_DataTypeSize(tf_type)); + + tensor = TF_NewTensor( tf_type, dims, ndims, (void*)mxGetData(arg), mxGetNumberOfElements(arg) * TF_DataTypeSize(tf_type), &deallocTFData, NULL); + + mexPrintf("%s line %d: input converted to tensor %p\n", __FILE__, __LINE__, tensor); + mexPrintf("%s line %d: input converted to tensor %d %d %d %d \n", __FILE__, __LINE__, TF_NumDims(tensor), TF_TensorByteSize(tensor), TF_Dim(tensor, 0), TF_Dim(tensor, 1)); + + free(dims); + } + } + + mexPrintf("%s line %d\n",__FILE__,__LINE__); + plhs[0] = mxCreateString(TF_Version()); + if ( (nlhs > 1) && graph_def ) { + mexPrintf("%s line %d\n",__FILE__,__LINE__); + const int ndim = 2; + mwSize dims[ndim]; + dims[0] = 1; + dims[1] = graph_def->length; + plhs[1] = mxCreateNumericArray(ndim, dims, mxUINT8_CLASS, mxREAL); + void *p = mxMalloc(dims[1]); + memcpy(p, graph_def->data, dims[1]); + mxSetData(plhs[1], p); + } + mexPrintf("%s line %d\n",__FILE__,__LINE__); + + /*********************************************** + load graph + ***********************************************/ + // Graph definition from unzipped https://storage.googleapis.com/download.tensorflow.org/models/inception5h.zip + // which is used in the Go, Java and Android examples + // TF_Buffer* graph_def = read_file("inception5h/tensorflow_inception_graph.pb"); + TF_Graph* graph = TF_NewGraph(); + + // Import graph_def into graph + TF_Status* status = TF_NewStatus(); + TF_ImportGraphDefOptions* opts = TF_NewImportGraphDefOptions(); + TF_GraphImportGraphDef(graph, graph_def, opts, status); + TF_DeleteImportGraphDefOptions(opts); + TF_DeleteBuffer(graph_def); + + if (TF_GetCode(status) != TF_OK) { + fprintf(stderr, "ERROR: Unable to import graph <%s>\n", TF_Message(status)); + TF_DeleteStatus(status); + return; + } + fprintf(stdout, "Successfully imported graph\n"); + + +if (tensor==NULL) { + // Use the graph + TF_DeleteGraph(graph); + return; +} + + /*********************************************** + run session + ***********************************************/ + TF_SessionOptions * options = TF_NewSessionOptions(); + TF_Session * session = TF_NewSession( graph, options, status ); + +mexPrintf("%s line %d: %s\n",__FILE__,__LINE__, TF_Message(status)); + + char hello[] = "Hello TensorFlow!"; +// if (tensor==NULL) tensor = TF_AllocateTensor( TF_STRING, 0, 0, 8 + TF_StringEncodedSize( strlen( hello ) ) ); + + TF_Tensor * tensorOutput; + +mexPrintf("%s line %d: %s\n",__FILE__,__LINE__, TF_Message(status)); + + TF_OperationDescription * operationDescription = TF_NewOperation( graph, "Const", "hello" ); + +mexPrintf("%s line %d: %s\n",__FILE__,__LINE__, TF_Message(status)); + + TF_Operation * operation; + struct TF_Output output; + +mexPrintf("%s line %d: %s\n",__FILE__,__LINE__, TF_Message(status)); + +// TF_StringEncode( hello, strlen( hello ), 8 + ( char * ) TF_TensorData( tensor ), TF_StringEncodedSize( strlen( hello ) ), status ); +// memset( TF_TensorData( tensor ), 0, 8 ); + +mexPrintf("%s line %d: %s\n",__FILE__,__LINE__, TF_Message(status)); + + TF_SetAttrTensor( operationDescription, "value", tensor, status ); + +mexPrintf("%s line %d: %s\n",__FILE__,__LINE__, TF_Message(status)); + + TF_SetAttrType( operationDescription, "dtype", TF_TensorType( tensor ) ); + +mexPrintf("%s line %d: %s\n",__FILE__,__LINE__, TF_Message(status)); + + operation = TF_FinishOperation( operationDescription, status ); + + +mexPrintf("%s line %d: %s\n",__FILE__,__LINE__, TF_Message(status)); + + output.oper = operation; + output.index = 0; + + TF_SessionRun( session, 0, + 0, 0, 0, // Inputs + &output, &tensorOutput, 1, // Outputs + &operation, 1, // Operations + 0, status ); + + +mexPrintf("%s line %d: %s\n",__FILE__,__LINE__, TF_Message(status)); + + printf( "status code: %i\n", TF_GetCode( status ) ); + printf( "%s\n", ( ( char * ) TF_TensorData( tensorOutput ) ) + 9 ); + + TF_CloseSession( session, status ); + TF_DeleteSession( session, status ); + TF_DeleteStatus( status ); + TF_DeleteSessionOptions( options ); + TF_DeleteGraph(graph); + +} + + diff --git a/src/predict.c b/src/predict.c new file mode 100644 index 0000000..b7eb11a --- /dev/null +++ b/src/predict.c @@ -0,0 +1,376 @@ +/* + +Copyright (c) 2007-2019 The LIBLINEAR Project. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither name of copyright holders nor the names of its contributors +may be used to endorse or promote products derived from this software +without specific prior written permission. + + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR +CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +#include +#include +#include +#include "linear.h" + +#include "mex.h" +#include "linear_model_matlab.h" + +#ifdef MX_API_VER +#if MX_API_VER < 0x07030000 +typedef int mwIndex; +#endif +#endif + +#define CMD_LEN 2048 + +#define Malloc(type,n) (type *)malloc((n)*sizeof(type)) + +int print_null(const char *s,...) {} +int (*info)(const char *fmt,...); + +int col_format_flag; + +void read_sparse_instance(const mxArray *prhs, int index, struct feature_node *x, int feature_number, double bias) +{ + int j; + mwIndex *ir, *jc, low, high, i; + double *samples; + + ir = mxGetIr(prhs); + jc = mxGetJc(prhs); + samples = mxGetPr(prhs); + + // each column is one instance + j = 0; + low = jc[index], high = jc[index+1]; + for(i=low; i=0) + { + x[j].index = feature_number+1; + x[j].value = bias; + j++; + } + x[j].index = -1; +} + +static void fake_answer(int nlhs, mxArray *plhs[]) +{ + int i; + for(i=0;iparam.solver_type!=MCSVM_CS) + nr_w=1; + else + nr_w=nr_class; + + // prhs[1] = testing instance matrix + feature_number = get_nr_feature(model_); + testing_instance_number = (int) mxGetM(prhs[1]); + if(col_format_flag) + { + feature_number = (int) mxGetM(prhs[1]); + testing_instance_number = (int) mxGetN(prhs[1]); + } + + label_vector_row_num = (int) mxGetM(prhs[0]); + label_vector_col_num = (int) mxGetN(prhs[0]); + + if(label_vector_row_num!=testing_instance_number) + { + mexPrintf("Length of label vector does not match # of instances.\n"); + fake_answer(nlhs, plhs); + return; + } + if(label_vector_col_num!=1) + { + mexPrintf("label (1st argument) should be a vector (# of column is 1).\n"); + fake_answer(nlhs, plhs); + return; + } + + ptr_label = mxGetPr(prhs[0]); + + // transpose instance matrix + if(col_format_flag) + pplhs[0] = (mxArray *)prhs[1]; + else + { + mxArray *pprhs[1]; + pprhs[0] = mxDuplicateArray(prhs[1]); + if(mexCallMATLAB(1, pplhs, 1, pprhs, "transpose")) + { + mexPrintf("Error: cannot transpose testing instance matrix\n"); + fake_answer(nlhs, plhs); + return; + } + } + + + prob_estimates = Malloc(double, nr_class); + + tplhs[0] = mxCreateDoubleMatrix(testing_instance_number, 1, mxREAL); + if(predict_probability_flag) + tplhs[2] = mxCreateDoubleMatrix(testing_instance_number, nr_class, mxREAL); + else + tplhs[2] = mxCreateDoubleMatrix(testing_instance_number, nr_w, mxREAL); + + ptr_predict_label = mxGetPr(tplhs[0]); + ptr_prob_estimates = mxGetPr(tplhs[2]); + ptr_dec_values = mxGetPr(tplhs[2]); + x = Malloc(struct feature_node, feature_number+2); + for(instance_index=0;instance_indexbias); + + if(predict_probability_flag) + { + predict_label = predict_probability(model_, x, prob_estimates); + ptr_predict_label[instance_index] = predict_label; + for(i=0;i 3 || nrhs > 5 || nrhs < 3) + { + exit_with_help(); + fake_answer(nlhs, plhs); + return; + } + if(nrhs == 5) + { + mxGetString(prhs[4], cmd, mxGetN(prhs[4])+1); + if(strcmp(cmd, "col") == 0) + { + col_format_flag = 1; + } + } + + if(!mxIsDouble(prhs[0]) || !mxIsDouble(prhs[1])) { + mexPrintf("Error: label vector and instance matrix must be double\n"); + fake_answer(nlhs, plhs); + return; + } + + if(mxIsStruct(prhs[2])) + { + const char *error_msg; + + // parse options + if(nrhs>=4) + { + int i, argc = 1; + char *argv[CMD_LEN/2]; + + // put options in argv[] + mxGetString(prhs[3], cmd, mxGetN(prhs[3]) + 1); + if((argv[argc] = strtok(cmd, " ")) != NULL) + while((argv[++argc] = strtok(NULL, " ")) != NULL) + ; + + for(i=1;i=argc && argv[i-1][1] != 'q') + { + exit_with_help(); + fake_answer(nlhs, plhs); + return; + } + switch(argv[i-1][1]) + { + case 'b': + prob_estimate_flag = atoi(argv[i]); + break; + case 'q': + info = &print_null; + i--; + break; + default: + mexPrintf("unknown option\n"); + exit_with_help(); + fake_answer(nlhs, plhs); + return; + } + } + } + + model_ = Malloc(struct model, 1); + error_msg = matlab_matrix_to_model(model_, prhs[2]); + if(error_msg) + { + mexPrintf("Error: can't read model: %s\n", error_msg); + free_and_destroy_model(&model_); + fake_answer(nlhs, plhs); + return; + } + + if(prob_estimate_flag) + { + if(!check_probability_model(model_)) + { + mexPrintf("probability output is only supported for logistic regression\n"); + prob_estimate_flag=0; + } + } + + if(mxIsSparse(prhs[1])) + do_predict(nlhs, plhs, prhs, model_, prob_estimate_flag); + else + { + mexPrintf("Testing_instance_matrix must be sparse; " + "use sparse(Testing_instance_matrix) first\n"); + fake_answer(nlhs, plhs); + } + + // destroy model_ + free_and_destroy_model(&model_); + } + else + { + mexPrintf("model file should be a struct array\n"); + fake_answer(nlhs, plhs); + } + + return; +} diff --git a/src/str2array.cpp b/src/str2array.cpp new file mode 100644 index 0000000..d084462 --- /dev/null +++ b/src/str2array.cpp @@ -0,0 +1,338 @@ +//------------------------------------------------------------------- +// C-MEX implementation of STR2ARRAY - this function is part of the NaN-toolbox. +// Actually, it also fixes a problem in STR2ARRAY.m described here: +// http://www-old.cae.wisc.edu/pipermail/help-octave/2007-December/007325.html +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, see . +// +// +// usage: +// [...] = STR2ARRAY(s) +// [...] = STR2ARRAY(sa) +// [...] = STR2ARRAY(s,cdelim) +// [...] = STR2ARRAY(s,cdelim,rdelim) +// [...] = STR2ARRAY(s,cdelim,rdelim,ddelim) +// [num,status,strarray] = STR2ARRAY(...) +// +// Input: +// s char string +// sa cell array of strings +// cdelim column delimiter +// rdelim row delimiter +// ddelim decimal delimiter +// +// Output: +// $Id: STR2ARRAY.cpp 7142 2010-03-30 18:48:06Z schloegl $ +// Copyright (C) 2010,2011 Alois Schloegl +// This function is part of the NaN-toolbox +// http://pub.ist.ac.at/~schloegl/matlab/NaN/ +// +//------------------------------------------------------------------- + + + +#include +#include +#include +#include +#include "mex.h" + +#ifdef tmwtypes_h + #if (MX_API_VER<=0x07020000) + typedef int mwSize; + #endif +#endif + + +int str2val(char *s, double *r, double *i) +{ +/* + str2val converts string into numeric value. real and complex numbers are supported. + complex numbers are "3.4 + 5.6i" or "3.4 + i * 5.6" (spaces are optional) + + input: + s char string + output: + *r real value + *i imaginary value + return values: + 0: conversion failed + 1: real number returned: + 2: complex number returned + +*/ + char *endptr = NULL; + double val = strtod(s, &endptr); // conversion + + while (isspace(*endptr)) endptr++; +#ifdef DEBUG + mexPrintf("123<%s>\t,%f,\t[%s]\n",s,val,endptr); +#endif + if (!*endptr) { + // conversion successful + *r = val; + return(1); + } + else if ((*endptr=='+') || (*endptr=='-')) { + // imaginary part + double sgn = (*endptr=='+') ? 1.0 : -1.0; + double ival; + while (isspace(*(++endptr))); + + if (*endptr=='i') { + // case " a + i * b " + while (isspace(*(++endptr))); + + if (*endptr=='*') { + ival = strtod(endptr+1, &endptr); // conversion + if (*endptr && !isspace(*endptr)) { + return(0); // failed + } + else { + *r = val; + *i = sgn*ival; + return(2); // + } + } + else + return(0); //failed + } + else { + // case " a + bi " + ival = strtod(endptr, &endptr); // conversion + if (*endptr != 'i') return(0); + endptr++; + while (*endptr) { + if (!isspace(*endptr)) return(0); + endptr++; + } + *r = val; + *i = sgn*ival; + return(2); + } + } + else //if (*endptr && !isspace(*endptr)) + { + // conversion failed + return(0); + } +} + + + +void mexFunction( + int nlhs, /* number of expected outputs */ + mxArray *plhs[], /* array of pointers to output arguments */ + int nrhs, /* number of inputs */ + const mxArray *prhs[] /* array of pointers to input arguments */ +) + +{ + char *s = NULL; + const char *cdelim = "\x09,"; + const char *rdelim = "\x0a;"; + const char *ddelim = NULL; + const char *valid_delim = " ()[]{},;:\"|/\x21\x22\x09\0x0a\0x0b\0x0c\0x0d\x00"; // valid delimiter + uint8_t *u; + size_t slen = 0,k; + size_t maxcol=0, maxrow=0, nr, nc; + + if (nrhs<1) { + mexPrintf(" STR2ARRAY.MEX converts delimiter text files into arrays of numerics and cell-strings\n"); + mexPrintf(" STR2ARRAY.MEX converts delimiter text files into numeric arrays\n"); + mexPrintf(" It fixes a problem of the old STR2DOUBLE discussed here: http://www-old.cae.wisc.edu/pipermail/help-octave/2007-December/007325.html\n"); + mexPrintf(" at avoids using the insecure STR2NUM using EVAL\n"); + mexPrintf("\n Usage of STR2ARRAY:\n"); + mexPrintf("\t[...] = STR2ARRAY(s)\n"); + mexPrintf("\t[...] = STR2ARRAY(sa)\n"); + mexPrintf("\t[...] = STR2ARRAY(s,cdelim)\n"); + mexPrintf("\t[...] = STR2ARRAY(s,cdelim,rdelim)\n"); + mexPrintf("\t[...] = STR2ARRAY(s,cdelim,rdelim,ddelim)\n"); + mexPrintf("\t[num,status,strarray] = STR2ARRAY(...)\n"); + mexPrintf(" Input:\n\ts\tstring\n\tsa\tcell array of strings\n\tcdelim\tlist of column delimiters (default: \",\"\n\trdelim\tlist of row delimiter (default: \";\")"); + mexPrintf("\n\tddelim\tdecimal delimiter (default: \".\"). This is useful if decimal delimiter is a comma (e.g. after Excel export in Europe)\n"); + mexPrintf(" Output:\n\tnum\tnumeric array\n\tstatus\tflag failing conversion\n\tstrarray\tcell array of strings contains strings of failed conversions\n"); + mexPrintf("\nExamples:\n\tSTR2ARRAY('4.12')\n\tSTR2ARRAY('1.2 - 3.4e2i') complex numbers\n\tSTR2ARRAY('101.01 , 0-i4; 1.2 - i * 3.4, abc')\n\tSTR2ARRAY({'101.01', '0-i4'; '1.2 - i * 3.4', 'abc'})\n\tSTR2ARRAY('1,2;a,b,c;3,4')\n"); + mexPrintf("\tSTR2ARRAY('1;2,3;4',';',',') exchange row- and column delimiter\n\tSTR2ARRAY('1,200 4;3,400 5',' ',';',',') replace decimal delimter\n"); + return; + } + + /* sanity check of input arguments */ + + if ((nrhs==1) && mxIsCell(prhs[0])) { + // cell array of strings + maxrow = mxGetM(prhs[0]); + maxcol = mxGetN(prhs[0]); + + /* allocate output memory */ + if (nlhs>2) plhs[2] = mxCreateCellMatrix(maxrow, maxcol); + uint8_t *v = NULL; + if (nlhs>1) { + plhs[1] = mxCreateLogicalMatrix(maxrow, maxcol); + v = (uint8_t*)mxGetData(plhs[1]); + memset(v, 1, maxrow*maxcol); + } + plhs[0] = mxCreateDoubleMatrix(maxrow, maxcol, mxREAL); + double *o = (double*)mxGetData(plhs[0]); + double *oi= NULL; + for (k=0; k2) mxSetCell(plhs[2], k, a); + */ + } + else { + int typ = str2val(s, o+k, &ival); + if ((nlhs>2) && (typ==0)) mxSetCell(plhs[2], (mwSize)k, mxCreateString(s)); + if ((nlhs>1) && (typ> 0)) v[k] = 0; + if (typ==2) { + if (mxGetPi(plhs[0])==NULL) { + oi = (double*) mxCalloc(maxrow*maxcol, sizeof(double)); + mxSetPi(plhs[0], oi); + } + oi[k] = ival; + } + } + } + // cell-array input is finished + return; + } + + if (nrhs>0) { + if (mxIsChar(prhs[0])) { + s = mxArrayToString(prhs[0]); + slen = mxGetNumberOfElements(prhs[0]); + } + else + mexErrMsgTxt("arg1 is not a char array"); + } + if (nrhs>1) { + if (mxIsChar(prhs[1])) + cdelim = mxArrayToString(prhs[1]); + else + mexErrMsgTxt("arg2 is not a char array"); + } + if (nrhs>2) { + if (mxIsChar(prhs[2])) + rdelim = mxArrayToString(prhs[2]); + else + mexErrMsgTxt("arg3 is not a char array"); + } + if (nrhs>3) { + if (mxIsChar(prhs[3]) && (mxGetNumberOfElements(prhs[3])==1) ) { + ddelim = mxArrayToString(prhs[3]); + for (k=0; k0); + u[slen] = 2; + } + for (k = 0; k < slen; ) { + if (u[k]==2) { + s[k] = 0; + nr++; + if (nc > maxcol) maxcol=nc; + nc = 0; + } + else if (u[k]==1) { + s[k] = 0; + nc++; + } + k++; + } + if (nc > maxcol) maxcol=nc; + maxcol += (slen>0); + maxrow = nr; + + /* allocate output memory */ + if (nlhs>2) plhs[2] = mxCreateCellMatrix(maxrow, maxcol); + uint8_t *v = NULL; + if (nlhs>1) { + plhs[1] = mxCreateLogicalMatrix(maxrow, maxcol); + v = (uint8_t*)mxGetData(plhs[1]); + memset(v,1,maxrow*maxcol); + } + plhs[0] = mxCreateDoubleMatrix(maxrow, maxcol, mxREAL); + double *o = (double*)mxGetData(plhs[0]); + double *oi = NULL; + for (k=0; k2) && (typ==0)) mxSetCell(plhs[2], idx, mxCreateString(s+last)); + if ((nlhs>1) && (typ> 0)) v[idx] = 0; + if (typ==2) { + if (oi==NULL) { + oi = (double*) mxCalloc(maxrow*maxcol, sizeof(double)); + mxSetPi(plhs[0], oi); + } + oi[idx] = ival; + } + } + + nc++; // next element + if (u[k]==2) { + nr++; // next row + nc = 0; + } + last = k+1; + } + } + mxFree(u); +}; + diff --git a/src/sumskipnan_mex.cpp b/src/sumskipnan_mex.cpp new file mode 100644 index 0000000..9973e00 --- /dev/null +++ b/src/sumskipnan_mex.cpp @@ -0,0 +1,1026 @@ + +//------------------------------------------------------------------- +// C-MEX implementation of SUMSKIPNAN - this function is part of the NaN-toolbox. +// +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, see . +// +// +// sumskipnan: sums all non-NaN values +// usage: +// [o,count,SSQ] = sumskipnan_mex(x,DIM,flag,W); +// +// SUMSKIPNAN uses two techniques to reduce errors: +// 1) long double (80bit) instead of 64-bit double is used internally +// 2) The Kahan Summation formula is used to reduce the error margin from N*eps to 2*eps +// The latter is only implemented in case of stride=1 (column vectors only, summation along 1st dimension). +// +// Input: +// - x data array +// - DIM (optional) dimension to sum +// - flag (optional) is actually an output argument telling whether some NaN was observed +// - W (optional) weight vector to compute weighted sum (default 1) +// +// Output: +// - o (weighted) sum along dimension DIM +// - count of valid elements +// - sums of squares +// +// +// $Id$ +// Copyright (C) 2009,2010,2011 Alois Schloegl +// This function is part of the NaN-toolbox +// http://pub.ist.ac.at/~schloegl/matlab/NaN/ +// +//------------------------------------------------------------------- + + + + +#include +#include +#include "mex.h" + +/* + math.h has isnan() defined for all sizes of floating point numbers, + but c++ assumes isnan(double), causing possible conversions for float and long double +*/ +#define ISNAN(a) (a!=a) + +#ifndef typeof +#define typeof __typeof__ +#endif + +inline void __sumskipnan2w__(double *data, size_t Ni, double *s, double *No, char *flag_anyISNAN, double *W); +inline void __sumskipnan3w__(double *data, size_t Ni, double *s, double *s2, double *No, char *flag_anyISNAN, double *W); +inline void __sumskipnan2wr__(double *data, size_t Ni, double *s, double *No, char *flag_anyISNAN, double *W); +inline void __sumskipnan3wr__(double *data, size_t Ni, double *s, double *s2, double *No, char *flag_anyISNAN, double *W); +inline void __sumskipnan2we__(double *data, size_t Ni, double *s, double *No, char *flag_anyISNAN, double *W); +inline void __sumskipnan3we__(double *data, size_t Ni, double *s, double *s2, double *No, char *flag_anyISNAN, double *W); +inline void __sumskipnan2wer__(double *data, size_t Ni, double *s, double *No, char *flag_anyISNAN, double *W); +inline void __sumskipnan3wer__(double *data, size_t Ni, double *s, double *s2, double *No, char *flag_anyISNAN, double *W); + +//#define NO_FLAG + +#ifdef tmwtypes_h + #if (MX_API_VER<=0x07020000) + typedef int mwSize; + #endif +#endif + + +void mexFunction(int POutputCount, mxArray* POutput[], int PInputCount, const mxArray *PInputs[]) +{ + const mwSize *SZ; + double* LInput; + double* LOutputSum; + double* LOutputCount; + double* LOutputSum2; + long double* LongOutputSum = NULL; + long double* LongOutputCount = NULL; + long double* LongOutputSum2 = NULL; + double x; + double* W = NULL; // weight vector + + size_t DIM = 0; + size_t D1, D2, D3; // NN; // + size_t ND, ND2; // number of dimensions: input, output + size_t ix0, ix1, ix2; // index to input and output + size_t j, l; // running indices + mwIndex *SZ2; // size of output + char flag_isNaN = 0; + + // check for proper number of input and output arguments + if ((PInputCount <= 0) || (PInputCount > 4)) + mexErrMsgTxt("SUMSKIPNAN.MEX requires between 1 and 4 arguments."); + if (POutputCount > 4) + mexErrMsgTxt("SUMSKIPNAN.MEX has 1 to 3 output arguments."); + + // get 1st argument + if(mxIsDouble(PInputs[0]) && !mxIsComplex(PInputs[0]) && !mxIsSparse(PInputs[0]) ) + LInput = mxGetPr(PInputs[0]); + else + mexErrMsgTxt("First argument must be double and not sparse REAL/DOUBLE."); + + // get 2nd argument + if (PInputCount > 1) { + switch (mxGetNumberOfElements(PInputs[1])) { + case 0: x = 0.0; // accept empty element + break; + case 1: x = (mxIsNumeric(PInputs[1]) ? mxGetScalar(PInputs[1]) : -1.0); + break; + default:x = -1.0; // invalid + } + if ((x < 0) || (x > 65535) || (x != floor(x))) + mexErrMsgTxt("Error SUMSKIPNAN.MEX: DIM-argument must be a positive integer scalar"); + + DIM = (unsigned)floor(x); + } + + // get size + ND = mxGetNumberOfDimensions(PInputs[0]); + // NN = mxGetNumberOfElements(PInputs[0]); + SZ = mxGetDimensions(PInputs[0]); + + // if DIM==0 (undefined), look for first dimension with more than 1 element. + for (j = 0; (DIM < 1) && (j < ND); j++) + if (SZ[j]>1) DIM = j+1; + + if (DIM < 1) DIM=1; // in case DIM is still undefined + + ND2 = (ND>DIM ? ND : DIM); // number of dimensions of output + + SZ2 = (mwSize*)mxCalloc(ND2, sizeof(mwSize)); // allocate memory for output size + + for (j=0; j ND, add extra elements 1 + SZ2[j] = 1; + + for (j=0, D1=1; j 3) { + if (!mxGetNumberOfElements(PInputs[3])) + ; // empty weight vector - no weighting + else if (mxGetNumberOfElements(PInputs[3])==D2) + W = mxGetPr(PInputs[3]); + else + mexErrMsgTxt("Error SUMSKIPNAN.MEX: length of weight vector does not match size of dimension"); + } + + int ACC_LEVEL = 0; + { + mxArray *LEVEL = NULL; + int s = mexCallMATLAB(1, &LEVEL, 0, NULL, "flag_accuracy_level"); + if (!s) { + ACC_LEVEL = (int) mxGetScalar(LEVEL); + if ((D1>1) && (ACC_LEVEL>2)) + mexWarnMsgTxt("Warning: Kahan summation not supported with stride > 1 !"); + } + mxDestroyArray(LEVEL); + } + // mexPrintf("Accuracy Level=%i\n",ACC_LEVEL); + + // create outputs + #define TYP mxDOUBLE_CLASS + + POutput[0] = mxCreateNumericArray(ND2, SZ2, TYP, mxREAL); + LOutputSum = mxGetPr(POutput[0]); + if (D1!=1 && D2>0) LongOutputSum = (long double*) mxCalloc(D1*D3,sizeof(long double)); + if (POutputCount >= 2) { + POutput[1] = mxCreateNumericArray(ND2, SZ2, TYP, mxREAL); + LOutputCount = mxGetPr(POutput[1]); + if (D1!=1 && D2>0) LongOutputCount = (long double*) mxCalloc(D1*D3,sizeof(long double)); + } + if (POutputCount >= 3) { + POutput[2] = mxCreateNumericArray(ND2, SZ2, TYP, mxREAL); + LOutputSum2 = mxGetPr(POutput[2]); + if (D1!=1 && D2>0) LongOutputSum2 = (long double*) mxCalloc(D1*D3,sizeof(long double)); + } + mxFree(SZ2); + + + if (!D1 || !D2 || !D3) // zero size array + ; // do nothing + else if (D1==1) { + if (ACC_LEVEL<1) { + // double accuray, naive summation, error = N*2^-52 + switch (POutputCount) { + case 0: + case 1: + #pragma omp parallel for schedule(dynamic) + for (l = 0; l DIM + for (l = 0; l DIM + for (l = 0; l DIM + for (l = 0; l 2) && mxGetNumberOfElements(PInputs[2])) { + // set FLAG_NANS_OCCURED + switch (mxGetClassID(PInputs[2])) { + case mxLOGICAL_CLASS: + case mxCHAR_CLASS: + case mxINT8_CLASS: + case mxUINT8_CLASS: + *(uint8_t*)mxGetData(PInputs[2]) = 1; + break; + case mxDOUBLE_CLASS: + *(double*)mxGetData(PInputs[2]) = 1.0; + break; + case mxSINGLE_CLASS: + *(float*)mxGetData(PInputs[2]) = 1.0; + break; + case mxINT16_CLASS: + case mxUINT16_CLASS: + *(uint16_t*)mxGetData(PInputs[2]) = 1; + break; + case mxINT32_CLASS: + case mxUINT32_CLASS: + *(uint32_t*)mxGetData(PInputs[2])= 1; + break; + case mxINT64_CLASS: + case mxUINT64_CLASS: + *(uint64_t*)mxGetData(PInputs[2]) = 1; + break; + case mxFUNCTION_CLASS: + case mxUNKNOWN_CLASS: + case mxCELL_CLASS: + case mxSTRUCT_CLASS: + default: + mexPrintf("Type of 3rd input argument not supported."); + } + } +#endif +} + +#define stride 1 +inline void __sumskipnan2w__(double *data, size_t Ni, double *s, double *No, char *flag_anyISNAN, double *W) +{ + long double sum=0; + char flag=0; + // LOOP along dimension DIM + + double *end = data + stride*Ni; + if (W) { + // with weight vector + long double count = 0.0; + do { + long double x = *data; + if (!ISNAN(x)) + { + count += *W; + sum += *W*x; + } +#ifndef NO_FLAG + else + flag = 1; +#endif + + data++; // stride=1 + W++; + } + while (data < end); + *No = (typeof(*No))count; + } else { + // w/o weight vector + size_t countI = 0; + do { + long double x = *data; + if (!ISNAN(x)) + { + countI++; + sum += x; + } +#ifndef NO_FLAG + else + flag = 1; +#endif + data++; // stride=1 + } + while (data < end); + *No = (typeof(*No))countI; + } + +#ifndef NO_FLAG + if (flag && (flag_anyISNAN != NULL)) *flag_anyISNAN = 1; +#endif + *s = (typeof(*s))sum; + +} + + +inline void __sumskipnan3w__(double *data, size_t Ni, double *s, double *s2, double *No, char *flag_anyISNAN, double *W) +{ + long double sum=0; + long double msq=0; + char flag=0; + // LOOP along dimension DIM + + double *end = data + stride*Ni; + if (W) { + // with weight vector + long double count = 0.0; + do { + long double x = *data; + if (!ISNAN(x)) { + count += *W; + long double t = *W*x; + sum += t; + msq += x*t; + } +#ifndef NO_FLAG + else + flag = 1; +#endif + data++; // stride=1 + W++; + } + while (data < end); + *No = (typeof(*No))count; + } else { + // w/o weight vector + size_t countI = 0; + do { + long double x = *data; + if (!ISNAN(x)) { + countI++; + sum += x; + msq += x*x; + } +#ifndef NO_FLAG + else + flag = 1; +#endif + data++; // stride=1 + } + while (data < end); + *No = (typeof(*No))countI; + } + +#ifndef NO_FLAG + if (flag && (flag_anyISNAN != NULL)) *flag_anyISNAN = 1; +#endif + *s = (typeof(*s))sum; + *s2 = (typeof(*s2))msq; +} + +inline void __sumskipnan2wr__(double *data, size_t Ni, double *s, double *No, char *flag_anyISNAN, double *W) +{ + double sum=0; + char flag=0; + // LOOP along dimension DIM + + double *end = data + stride*Ni; + if (W) { + // with weight vector + double count = 0.0; + do { + double x = *data; + if (!ISNAN(x)) + { + count += *W; + sum += *W*x; + } +#ifndef NO_FLAG + else + flag = 1; +#endif + + data++; // stride=1 + W++; + } + while (data < end); + *No = (typeof(*No))count; + } else { + // w/o weight vector + size_t countI = 0; + do { + double x = *data; + if (!ISNAN(x)) + { + countI++; + sum += x; + } +#ifndef NO_FLAG + else + flag = 1; +#endif + data++; // stride=1 + } + while (data < end); + *No = (typeof(*No))countI; + } + +#ifndef NO_FLAG + if (flag && (flag_anyISNAN != NULL)) *flag_anyISNAN = 1; +#endif + *s = (typeof(*s))sum; + +} + + +inline void __sumskipnan3wr__(double *data, size_t Ni, double *s, double *s2, double *No, char *flag_anyISNAN, double *W) +{ + double sum=0; + double msq=0; + char flag=0; + // LOOP along dimension DIM + + double *end = data + stride*Ni; + if (W) { + // with weight vector + double count = 0.0; + do { + double x = *data; + if (!ISNAN(x)) { + count += *W; + double t = *W*x; + sum += t; + msq += x*t; + } +#ifndef NO_FLAG + else + flag = 1; +#endif + data++; // stride=1 + W++; + } + while (data < end); + *No = count; + } else { + // w/o weight vector + size_t countI = 0; + do { + double x = *data; + if (!ISNAN(x)) { + countI++; + sum += x; + msq += x*x; + } +#ifndef NO_FLAG + else + flag = 1; +#endif + data++; // stride=1 + } + while (data < end); + *No = (typeof(*No))countI; + } + +#ifndef NO_FLAG + if (flag && (flag_anyISNAN != NULL)) *flag_anyISNAN = 1; +#endif + *s = (typeof(*s))sum; + *s2 = (typeof(*s2))msq; +} + + + +/*************************************** + using Kahan's summation formula [1] + this gives more accurate results while the computational effort within the loop is about 4x as high + First tests show a penalty of about 40% in terms of computational time. + + [1] David Goldberg, + What Every Computer Scientist Should Know About Floating-Point Arithmetic + ACM Computing Surveys, Vol 23, No 1, March 1991. + ****************************************/ + +inline void __sumskipnan2we__(double *data, size_t Ni, double *s, double *No, char *flag_anyISNAN, double *W) +{ + long double sum=0; + char flag=0; + // LOOP along dimension DIM + + double *end = data + stride*Ni; + if (W) { + // with weight vector + long double count = 0.0; + long double rc=0.0, rn=0.0; + do { + long double x = *data; + long double t,y; + if (!ISNAN(x)) + { + //count += *W; [1] + y = *W-rn; + t = count+y; + rn= (t-count)-y; + count= t; + + //sum += *W*x; [1] + y = *W*x-rc; + t = sum+y; + rc= (t-sum)-y; + sum= t; + } +#ifndef NO_FLAG + else + flag = 1; +#endif + + data++; // stride=1 + W++; + } + while (data < end); + *No = (typeof(*No))count; + } else { + // w/o weight vector + size_t countI = 0; + long double rc=0.0; + do { + long double x = *data; + long double t,y; + if (!ISNAN(x)) + { + countI++; + // sum += x; [1] + y = x-rc; + t = sum+y; + rc= (t-sum)-y; + sum= t; + } +#ifndef NO_FLAG + else + flag = 1; +#endif + data++; // stride=1 + } + while (data < end); + *No = (typeof(*No))countI; + } + +#ifndef NO_FLAG + if (flag && (flag_anyISNAN != NULL)) *flag_anyISNAN = 1; +#endif + *s = (typeof(*s))sum; + +} + + +inline void __sumskipnan3we__(double *data, size_t Ni, double *s, double *s2, double *No, char *flag_anyISNAN, double *W) +{ + long double sum=0; + long double msq=0; + char flag=0; + // LOOP along dimension DIM + + double *end = data + stride*Ni; + if (W) { + // with weight vector + long double count = 0.0; + long double rc=0.0, rn=0.0, rq=0.0; + do { + long double x = *data; + long double t,y; + if (!ISNAN(x)) { + //count += *W; [1] + y = *W-rn; + t = count+y; + rn= (t-count)-y; + count= t; + + long double w = *W*x; + //sum += *W*x; [1] + y = *W*x-rc; + t = sum+y; + rc= (t-sum)-y; + sum= t; + + // msq += x*w; + y = w*x-rq; + t = msq+y; + rq= (t-msq)-y; + msq= t; + } +#ifndef NO_FLAG + else + flag = 1; +#endif + data++; // stride=1 + W++; + } + while (data < end); + *No = (typeof(*No))count; + } else { + // w/o weight vector + size_t countI = 0; + long double rc=0.0, rq=0.0; + do { + long double x = *data; + long double t,y; + if (!ISNAN(x)) { + countI++; + //sum += x; [1] + y = x-rc; + t = sum+y; + rc= (t-sum)-y; + sum= t; + + // msq += x*x; + y = x*x-rq; + t = msq+y; + rq= (t-msq)-y; + msq= t; + } +#ifndef NO_FLAG + else + flag = 1; +#endif + data++; // stride=1 + } + while (data < end); + *No = (typeof(*No))countI; + } + +#ifndef NO_FLAG + if (flag && (flag_anyISNAN != NULL)) *flag_anyISNAN = 1; +#endif + *s = (typeof(*s))sum; + *s2 = (typeof(*s))msq; +} + +inline void __sumskipnan2wer__(double *data, size_t Ni, double *s, double *No, char *flag_anyISNAN, double *W) +{ + double sum=0; + char flag=0; + // LOOP along dimension DIM + + double *end = data + stride*Ni; + if (W) { + // with weight vector + double count = 0.0; + double rc=0.0, rn=0.0; + do { + double x = *data; + double t,y; + if (!ISNAN(x)) + { + //count += *W; [1] + y = *W-rn; + t = count+y; + rn= (t-count)-y; + count= t; + + //sum += *W*x; [1] + y = *W*x-rc; + t = sum+y; + rc= (t-sum)-y; + sum= t; + } +#ifndef NO_FLAG + else + flag = 1; +#endif + + data++; // stride=1 + W++; + } + while (data < end); + *No = (typeof(*No))count; + } else { + // w/o weight vector + size_t countI = 0; + double rc=0.0; + do { + double x = *data; + double t,y; + if (!ISNAN(x)) + { + countI++; + // sum += x; [1] + y = x-rc; + t = sum+y; + rc= (t-sum)-y; + sum= t; + } +#ifndef NO_FLAG + else + flag = 1; +#endif + data++; // stride=1 + } + while (data < end); + *No = (typeof(*No))countI; + } + +#ifndef NO_FLAG + if (flag && (flag_anyISNAN != NULL)) *flag_anyISNAN = 1; +#endif + *s = (typeof(*s))sum; + +} + + +inline void __sumskipnan3wer__(double *data, size_t Ni, double *s, double *s2, double *No, char *flag_anyISNAN, double *W) +{ + double sum=0; + double msq=0; + char flag=0; + // LOOP along dimension DIM + + double *end = data + stride*Ni; + if (W) { + // with weight vector + double count = 0.0; + double rc=0.0, rn=0.0, rq=0.0; + do { + double x = *data; + double t,y; + if (!ISNAN(x)) { + //count += *W; [1] + y = *W-rn; + t = count+y; + rn= (t-count)-y; + count= t; + + double w = *W*x; + //sum += *W*x; [1] + y = *W*x-rc; + t = sum+y; + rc= (t-sum)-y; + sum= t; + + // msq += x*w; + y = w*x-rq; + t = msq+y; + rq= (t-msq)-y; + msq= t; + } +#ifndef NO_FLAG + else + flag = 1; +#endif + data++; // stride=1 + W++; + } + while (data < end); + *No = (typeof(*No))count; + } else { + // w/o weight vector + size_t countI = 0; + double rc=0.0, rq=0.0; + do { + double x = *data; + double t,y; + if (!ISNAN(x)) { + countI++; + //sum += x; [1] + y = x-rc; + t = sum+y; + rc= (t-sum)-y; + sum= t; + + // msq += x*x; + y = x*x-rq; + t = msq+y; + rq= (t-msq)-y; + msq= t; + } +#ifndef NO_FLAG + else + flag = 1; +#endif + data++; // stride=1 + } + while (data < end); + *No = (typeof(*No))countI; + } + +#ifndef NO_FLAG + if (flag && (flag_anyISNAN != NULL)) *flag_anyISNAN = 1; +#endif + *s = (typeof(*s))sum; + *s2 = (typeof(*s))msq; +} + diff --git a/src/svm.cpp b/src/svm.cpp new file mode 100644 index 0000000..a46f105 --- /dev/null +++ b/src/svm.cpp @@ -0,0 +1,3219 @@ +/* + + This code was extracted from libsvm 3.2.3 in Feb 2019 and + modified for the use with Octave and Matlab + + +Copyright (c) 2000-2019 Chih-Chung Chang and Chih-Jen Lin +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither name of copyright holders nor the names of its contributors +may be used to endorse or promote products derived from this software +without specific prior written permission. + + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR +CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "svm.h" +int libsvm_version = LIBSVM_VERSION; +typedef float Qfloat; +typedef signed char schar; +#ifndef min +template static inline T min(T x,T y) { return (x static inline T max(T x,T y) { return (x>y)?x:y; } +#endif +template static inline void swap(T& x, T& y) { T t=x; x=y; y=t; } +template static inline void clone(T*& dst, S* src, int n) +{ + dst = new T[n]; + memcpy((void *)dst,(void *)src,sizeof(T)*n); +} +static inline double powi(double base, int times) +{ + double tmp = base, ret = 1.0; + + for(int t=times; t>0; t/=2) + { + if(t%2==1) ret*=tmp; + tmp = tmp * tmp; + } + return ret; +} +#define INF HUGE_VAL +#define TAU 1e-12 +#define Malloc(type,n) (type *)malloc((n)*sizeof(type)) + +static void print_string_stdout(const char *s) +{ + fputs(s,stdout); + fflush(stdout); +} +static void (*svm_print_string) (const char *) = &print_string_stdout; +#if 1 +static void info(const char *fmt,...) +{ + char buf[BUFSIZ]; + va_list ap; + va_start(ap,fmt); + vsprintf(buf,fmt,ap); + va_end(ap); + (*svm_print_string)(buf); +} +#else +static void info(const char *fmt,...) {} +#endif + +// +// Kernel Cache +// +// l is the number of total data items +// size is the cache size limit in bytes +// +class Cache +{ +public: + Cache(int l,long int size); + ~Cache(); + + // request data [0,len) + // return some position p where [p,len) need to be filled + // (p >= len if nothing needs to be filled) + int get_data(const int index, Qfloat **data, int len); + void swap_index(int i, int j); +private: + int l; + long int size; + struct head_t + { + head_t *prev, *next; // a circular list + Qfloat *data; + int len; // data[0,len) is cached in this entry + }; + + head_t *head; + head_t lru_head; + void lru_delete(head_t *h); + void lru_insert(head_t *h); +}; + +Cache::Cache(int l_,long int size_):l(l_),size(size_) +{ + head = (head_t *)calloc(l,sizeof(head_t)); // initialized to 0 + size /= sizeof(Qfloat); + size -= l * sizeof(head_t) / sizeof(Qfloat); + size = max(size, 2 * (long int) l); // cache must be large enough for two columns + lru_head.next = lru_head.prev = &lru_head; +} + +Cache::~Cache() +{ + for(head_t *h = lru_head.next; h != &lru_head; h=h->next) + free(h->data); + free(head); +} + +void Cache::lru_delete(head_t *h) +{ + // delete from current location + h->prev->next = h->next; + h->next->prev = h->prev; +} + +void Cache::lru_insert(head_t *h) +{ + // insert to last position + h->next = &lru_head; + h->prev = lru_head.prev; + h->prev->next = h; + h->next->prev = h; +} + +int Cache::get_data(const int index, Qfloat **data, int len) +{ + head_t *h = &head[index]; + if(h->len) lru_delete(h); + int more = len - h->len; + + if(more > 0) + { + // free old space + while(size < more) + { + head_t *old = lru_head.next; + lru_delete(old); + free(old->data); + size += old->len; + old->data = 0; + old->len = 0; + } + + // allocate new space + h->data = (Qfloat *)realloc(h->data,sizeof(Qfloat)*len); + size -= more; + swap(h->len,len); + } + + lru_insert(h); + *data = h->data; + return len; +} + +void Cache::swap_index(int i, int j) +{ + if(i==j) return; + + if(head[i].len) lru_delete(&head[i]); + if(head[j].len) lru_delete(&head[j]); + swap(head[i].data,head[j].data); + swap(head[i].len,head[j].len); + if(head[i].len) lru_insert(&head[i]); + if(head[j].len) lru_insert(&head[j]); + + if(i>j) swap(i,j); + for(head_t *h = lru_head.next; h!=&lru_head; h=h->next) + { + if(h->len > i) + { + if(h->len > j) + swap(h->data[i],h->data[j]); + else + { + // give up + lru_delete(h); + free(h->data); + size += h->len; + h->data = 0; + h->len = 0; + } + } + } +} + +// +// Kernel evaluation +// +// the static method k_function is for doing single kernel evaluation +// the constructor of Kernel prepares to calculate the l*l kernel matrix +// the member function get_Q is for getting one column from the Q Matrix +// +class QMatrix { +public: + virtual Qfloat *get_Q(int column, int len) const = 0; + virtual double *get_QD() const = 0; + virtual void swap_index(int i, int j) const = 0; + virtual ~QMatrix() {} +}; + +class Kernel: public QMatrix { +public: + Kernel(int l, svm_node * const * x, const svm_parameter& param); + virtual ~Kernel(); + + static double k_function(const svm_node *x, const svm_node *y, + const svm_parameter& param); + virtual Qfloat *get_Q(int column, int len) const = 0; + virtual double *get_QD() const = 0; + virtual void swap_index(int i, int j) const // no so const... + { + swap(x[i],x[j]); + if(x_square) swap(x_square[i],x_square[j]); + } +protected: + + double (Kernel::*kernel_function)(int i, int j) const; + +private: + const svm_node **x; + double *x_square; + + // svm_parameter + const int kernel_type; + const int degree; + const double gamma; + const double coef0; + + static double dot(const svm_node *px, const svm_node *py); + double kernel_linear(int i, int j) const + { + return dot(x[i],x[j]); + } + double kernel_poly(int i, int j) const + { + return powi(gamma*dot(x[i],x[j])+coef0,degree); + } + double kernel_rbf(int i, int j) const + { + return exp(-gamma*(x_square[i]+x_square[j]-2*dot(x[i],x[j]))); + } + double kernel_sigmoid(int i, int j) const + { + return tanh(gamma*dot(x[i],x[j])+coef0); + } + double kernel_precomputed(int i, int j) const + { + return x[i][(int)(x[j][0].value)].value; + } +}; + +Kernel::Kernel(int l, svm_node * const * x_, const svm_parameter& param) +:kernel_type(param.kernel_type), degree(param.degree), + gamma(param.gamma), coef0(param.coef0) +{ + switch(kernel_type) + { + case LINEAR: + kernel_function = &Kernel::kernel_linear; + break; + case POLY: + kernel_function = &Kernel::kernel_poly; + break; + case RBF: + kernel_function = &Kernel::kernel_rbf; + break; + case SIGMOID: + kernel_function = &Kernel::kernel_sigmoid; + break; + case PRECOMPUTED: + kernel_function = &Kernel::kernel_precomputed; + break; + } + + clone(x,x_,l); + + if(kernel_type == RBF) + { + x_square = new double[l]; + for(int i=0;iindex != -1 && py->index != -1) + { + if(px->index == py->index) + { + sum += px->value * py->value; + ++px; + ++py; + } + else + { + if(px->index > py->index) + ++py; + else + ++px; + } + } + return sum; +} + +double Kernel::k_function(const svm_node *x, const svm_node *y, + const svm_parameter& param) +{ + switch(param.kernel_type) + { + case LINEAR: + return dot(x,y); + case POLY: + return powi(param.gamma*dot(x,y)+param.coef0,param.degree); + case RBF: + { + double sum = 0; + while(x->index != -1 && y->index !=-1) + { + if(x->index == y->index) + { + double d = x->value - y->value; + sum += d*d; + ++x; + ++y; + } + else + { + if(x->index > y->index) + { + sum += y->value * y->value; + ++y; + } + else + { + sum += x->value * x->value; + ++x; + } + } + } + + while(x->index != -1) + { + sum += x->value * x->value; + ++x; + } + + while(y->index != -1) + { + sum += y->value * y->value; + ++y; + } + + return exp(-param.gamma*sum); + } + case SIGMOID: + return tanh(param.gamma*dot(x,y)+param.coef0); + case PRECOMPUTED: //x: test (validation), y: SV + return x[(int)(y->value)].value; + default: + return 0; // Unreachable + } +} + +// An SMO algorithm in Fan et al., JMLR 6(2005), p. 1889--1918 +// Solves: +// +// min 0.5(\alpha^T Q \alpha) + p^T \alpha +// +// y^T \alpha = \delta +// y_i = +1 or -1 +// 0 <= alpha_i <= Cp for y_i = 1 +// 0 <= alpha_i <= Cn for y_i = -1 +// +// Given: +// +// Q, p, y, Cp, Cn, and an initial feasible point \alpha +// l is the size of vectors and matrices +// eps is the stopping tolerance +// +// solution will be put in \alpha, objective value will be put in obj +// +class Solver { +public: + Solver() {}; + virtual ~Solver() {}; + + struct SolutionInfo { + double obj; + double rho; + double upper_bound_p; + double upper_bound_n; + double r; // for Solver_NU + }; + + void Solve(int l, const QMatrix& Q, const double *p_, const schar *y_, + double *alpha_, double Cp, double Cn, double eps, + SolutionInfo* si, int shrinking); +protected: + int active_size; + schar *y; + double *G; // gradient of objective function + enum { LOWER_BOUND, UPPER_BOUND, FREE }; + char *alpha_status; // LOWER_BOUND, UPPER_BOUND, FREE + double *alpha; + const QMatrix *Q; + const double *QD; + double eps; + double Cp,Cn; + double *p; + int *active_set; + double *G_bar; // gradient, if we treat free variables as 0 + int l; + bool unshrink; // XXX + + double get_C(int i) + { + return (y[i] > 0)? Cp : Cn; + } + void update_alpha_status(int i) + { + if(alpha[i] >= get_C(i)) + alpha_status[i] = UPPER_BOUND; + else if(alpha[i] <= 0) + alpha_status[i] = LOWER_BOUND; + else alpha_status[i] = FREE; + } + bool is_upper_bound(int i) { return alpha_status[i] == UPPER_BOUND; } + bool is_lower_bound(int i) { return alpha_status[i] == LOWER_BOUND; } + bool is_free(int i) { return alpha_status[i] == FREE; } + void swap_index(int i, int j); + void reconstruct_gradient(); + virtual int select_working_set(int &i, int &j); + virtual double calculate_rho(); + virtual void do_shrinking(); +private: + bool be_shrunk(int i, double Gmax1, double Gmax2); +}; + +void Solver::swap_index(int i, int j) +{ + Q->swap_index(i,j); + swap(y[i],y[j]); + swap(G[i],G[j]); + swap(alpha_status[i],alpha_status[j]); + swap(alpha[i],alpha[j]); + swap(p[i],p[j]); + swap(active_set[i],active_set[j]); + swap(G_bar[i],G_bar[j]); +} + +void Solver::reconstruct_gradient() +{ + // reconstruct inactive elements of G from G_bar and free variables + + if(active_size == l) return; + + int i,j; + int nr_free = 0; + + for(j=active_size;j 2*active_size*(l-active_size)) + { + for(i=active_size;iget_Q(i,active_size); + for(j=0;jget_Q(i,l); + double alpha_i = alpha[i]; + for(j=active_size;jl = l; + this->Q = &Q; + QD=Q.get_QD(); + clone(p, p_,l); + clone(y, y_,l); + clone(alpha,alpha_,l); + this->Cp = Cp; + this->Cn = Cn; + this->eps = eps; + unshrink = false; + + // initialize alpha_status + { + alpha_status = new char[l]; + for(int i=0;iINT_MAX/100 ? INT_MAX : 100*l); + int counter = min(l,1000)+1; + + while(iter < max_iter) + { + // show progress and do shrinking + + if(--counter == 0) + { + counter = min(l,1000); + if(shrinking) do_shrinking(); + info("."); + } + + int i,j; + if(select_working_set(i,j)!=0) + { + // reconstruct the whole gradient + reconstruct_gradient(); + // reset active set size and check + active_size = l; + info("*"); + if(select_working_set(i,j)!=0) + break; + else + counter = 1; // do shrinking next iteration + } + + ++iter; + + // update alpha[i] and alpha[j], handle bounds carefully + + const Qfloat *Q_i = Q.get_Q(i,active_size); + const Qfloat *Q_j = Q.get_Q(j,active_size); + + double C_i = get_C(i); + double C_j = get_C(j); + + double old_alpha_i = alpha[i]; + double old_alpha_j = alpha[j]; + + if(y[i]!=y[j]) + { + double quad_coef = QD[i]+QD[j]+2*Q_i[j]; + if (quad_coef <= 0) + quad_coef = TAU; + double delta = (-G[i]-G[j])/quad_coef; + double diff = alpha[i] - alpha[j]; + alpha[i] += delta; + alpha[j] += delta; + + if(diff > 0) + { + if(alpha[j] < 0) + { + alpha[j] = 0; + alpha[i] = diff; + } + } + else + { + if(alpha[i] < 0) + { + alpha[i] = 0; + alpha[j] = -diff; + } + } + if(diff > C_i - C_j) + { + if(alpha[i] > C_i) + { + alpha[i] = C_i; + alpha[j] = C_i - diff; + } + } + else + { + if(alpha[j] > C_j) + { + alpha[j] = C_j; + alpha[i] = C_j + diff; + } + } + } + else + { + double quad_coef = QD[i]+QD[j]-2*Q_i[j]; + if (quad_coef <= 0) + quad_coef = TAU; + double delta = (G[i]-G[j])/quad_coef; + double sum = alpha[i] + alpha[j]; + alpha[i] -= delta; + alpha[j] += delta; + + if(sum > C_i) + { + if(alpha[i] > C_i) + { + alpha[i] = C_i; + alpha[j] = sum - C_i; + } + } + else + { + if(alpha[j] < 0) + { + alpha[j] = 0; + alpha[i] = sum; + } + } + if(sum > C_j) + { + if(alpha[j] > C_j) + { + alpha[j] = C_j; + alpha[i] = sum - C_j; + } + } + else + { + if(alpha[i] < 0) + { + alpha[i] = 0; + alpha[j] = sum; + } + } + } + + // update G + + double delta_alpha_i = alpha[i] - old_alpha_i; + double delta_alpha_j = alpha[j] - old_alpha_j; + + for(int k=0;k= max_iter) + { + if(active_size < l) + { + // reconstruct the whole gradient to calculate objective value + reconstruct_gradient(); + active_size = l; + info("*"); + } + fprintf(stderr,"\nWARNING: reaching max number of iterations\n"); + } + + // calculate rho + + si->rho = calculate_rho(); + + // calculate objective value + { + double v = 0; + int i; + for(i=0;iobj = v/2; + } + + // put back the solution + { + for(int i=0;iupper_bound_p = Cp; + si->upper_bound_n = Cn; + + info("\noptimization finished, #iter = %d\n",iter); + + delete[] p; + delete[] y; + delete[] alpha; + delete[] alpha_status; + delete[] active_set; + delete[] G; + delete[] G_bar; +} + +// return 1 if already optimal, return 0 otherwise +int Solver::select_working_set(int &out_i, int &out_j) +{ + // return i,j such that + // i: maximizes -y_i * grad(f)_i, i in I_up(\alpha) + // j: minimizes the decrease of obj value + // (if quadratic coefficeint <= 0, replace it with tau) + // -y_j*grad(f)_j < -y_i*grad(f)_i, j in I_low(\alpha) + + double Gmax = -INF; + double Gmax2 = -INF; + int Gmax_idx = -1; + int Gmin_idx = -1; + double obj_diff_min = INF; + + for(int t=0;t= Gmax) + { + Gmax = -G[t]; + Gmax_idx = t; + } + } + else + { + if(!is_lower_bound(t)) + if(G[t] >= Gmax) + { + Gmax = G[t]; + Gmax_idx = t; + } + } + + int i = Gmax_idx; + const Qfloat *Q_i = NULL; + if(i != -1) // NULL Q_i not accessed: Gmax=-INF if i=-1 + Q_i = Q->get_Q(i,active_size); + + for(int j=0;j= Gmax2) + Gmax2 = G[j]; + if (grad_diff > 0) + { + double obj_diff; + double quad_coef = QD[i]+QD[j]-2.0*y[i]*Q_i[j]; + if (quad_coef > 0) + obj_diff = -(grad_diff*grad_diff)/quad_coef; + else + obj_diff = -(grad_diff*grad_diff)/TAU; + + if (obj_diff <= obj_diff_min) + { + Gmin_idx=j; + obj_diff_min = obj_diff; + } + } + } + } + else + { + if (!is_upper_bound(j)) + { + double grad_diff= Gmax-G[j]; + if (-G[j] >= Gmax2) + Gmax2 = -G[j]; + if (grad_diff > 0) + { + double obj_diff; + double quad_coef = QD[i]+QD[j]+2.0*y[i]*Q_i[j]; + if (quad_coef > 0) + obj_diff = -(grad_diff*grad_diff)/quad_coef; + else + obj_diff = -(grad_diff*grad_diff)/TAU; + + if (obj_diff <= obj_diff_min) + { + Gmin_idx=j; + obj_diff_min = obj_diff; + } + } + } + } + } + + if(Gmax+Gmax2 < eps || Gmin_idx == -1) + return 1; + + out_i = Gmax_idx; + out_j = Gmin_idx; + return 0; +} + +bool Solver::be_shrunk(int i, double Gmax1, double Gmax2) +{ + if(is_upper_bound(i)) + { + if(y[i]==+1) + return(-G[i] > Gmax1); + else + return(-G[i] > Gmax2); + } + else if(is_lower_bound(i)) + { + if(y[i]==+1) + return(G[i] > Gmax2); + else + return(G[i] > Gmax1); + } + else + return(false); +} + +void Solver::do_shrinking() +{ + int i; + double Gmax1 = -INF; // max { -y_i * grad(f)_i | i in I_up(\alpha) } + double Gmax2 = -INF; // max { y_i * grad(f)_i | i in I_low(\alpha) } + + // find maximal violating pair first + for(i=0;i= Gmax1) + Gmax1 = -G[i]; + } + if(!is_lower_bound(i)) + { + if(G[i] >= Gmax2) + Gmax2 = G[i]; + } + } + else + { + if(!is_upper_bound(i)) + { + if(-G[i] >= Gmax2) + Gmax2 = -G[i]; + } + if(!is_lower_bound(i)) + { + if(G[i] >= Gmax1) + Gmax1 = G[i]; + } + } + } + + if(unshrink == false && Gmax1 + Gmax2 <= eps*10) + { + unshrink = true; + reconstruct_gradient(); + active_size = l; + info("*"); + } + + for(i=0;i i) + { + if (!be_shrunk(active_size, Gmax1, Gmax2)) + { + swap_index(i,active_size); + break; + } + active_size--; + } + } +} + +double Solver::calculate_rho() +{ + double r; + int nr_free = 0; + double ub = INF, lb = -INF, sum_free = 0; + for(int i=0;i0) + r = sum_free/nr_free; + else + r = (ub+lb)/2; + + return r; +} + +// +// Solver for nu-svm classification and regression +// +// additional constraint: e^T \alpha = constant +// +class Solver_NU: public Solver +{ +public: + Solver_NU() {} + void Solve(int l, const QMatrix& Q, const double *p, const schar *y, + double *alpha, double Cp, double Cn, double eps, + SolutionInfo* si, int shrinking) + { + this->si = si; + Solver::Solve(l,Q,p,y,alpha,Cp,Cn,eps,si,shrinking); + } +private: + SolutionInfo *si; + int select_working_set(int &i, int &j); + double calculate_rho(); + bool be_shrunk(int i, double Gmax1, double Gmax2, double Gmax3, double Gmax4); + void do_shrinking(); +}; + +// return 1 if already optimal, return 0 otherwise +int Solver_NU::select_working_set(int &out_i, int &out_j) +{ + // return i,j such that y_i = y_j and + // i: maximizes -y_i * grad(f)_i, i in I_up(\alpha) + // j: minimizes the decrease of obj value + // (if quadratic coefficeint <= 0, replace it with tau) + // -y_j*grad(f)_j < -y_i*grad(f)_i, j in I_low(\alpha) + + double Gmaxp = -INF; + double Gmaxp2 = -INF; + int Gmaxp_idx = -1; + + double Gmaxn = -INF; + double Gmaxn2 = -INF; + int Gmaxn_idx = -1; + + int Gmin_idx = -1; + double obj_diff_min = INF; + + for(int t=0;t= Gmaxp) + { + Gmaxp = -G[t]; + Gmaxp_idx = t; + } + } + else + { + if(!is_lower_bound(t)) + if(G[t] >= Gmaxn) + { + Gmaxn = G[t]; + Gmaxn_idx = t; + } + } + + int ip = Gmaxp_idx; + int in = Gmaxn_idx; + const Qfloat *Q_ip = NULL; + const Qfloat *Q_in = NULL; + if(ip != -1) // NULL Q_ip not accessed: Gmaxp=-INF if ip=-1 + Q_ip = Q->get_Q(ip,active_size); + if(in != -1) + Q_in = Q->get_Q(in,active_size); + + for(int j=0;j= Gmaxp2) + Gmaxp2 = G[j]; + if (grad_diff > 0) + { + double obj_diff; + double quad_coef = QD[ip]+QD[j]-2*Q_ip[j]; + if (quad_coef > 0) + obj_diff = -(grad_diff*grad_diff)/quad_coef; + else + obj_diff = -(grad_diff*grad_diff)/TAU; + + if (obj_diff <= obj_diff_min) + { + Gmin_idx=j; + obj_diff_min = obj_diff; + } + } + } + } + else + { + if (!is_upper_bound(j)) + { + double grad_diff=Gmaxn-G[j]; + if (-G[j] >= Gmaxn2) + Gmaxn2 = -G[j]; + if (grad_diff > 0) + { + double obj_diff; + double quad_coef = QD[in]+QD[j]-2*Q_in[j]; + if (quad_coef > 0) + obj_diff = -(grad_diff*grad_diff)/quad_coef; + else + obj_diff = -(grad_diff*grad_diff)/TAU; + + if (obj_diff <= obj_diff_min) + { + Gmin_idx=j; + obj_diff_min = obj_diff; + } + } + } + } + } + + if(max(Gmaxp+Gmaxp2,Gmaxn+Gmaxn2) < eps || Gmin_idx == -1) + return 1; + + if (y[Gmin_idx] == +1) + out_i = Gmaxp_idx; + else + out_i = Gmaxn_idx; + out_j = Gmin_idx; + + return 0; +} + +bool Solver_NU::be_shrunk(int i, double Gmax1, double Gmax2, double Gmax3, double Gmax4) +{ + if(is_upper_bound(i)) + { + if(y[i]==+1) + return(-G[i] > Gmax1); + else + return(-G[i] > Gmax4); + } + else if(is_lower_bound(i)) + { + if(y[i]==+1) + return(G[i] > Gmax2); + else + return(G[i] > Gmax3); + } + else + return(false); +} + +void Solver_NU::do_shrinking() +{ + double Gmax1 = -INF; // max { -y_i * grad(f)_i | y_i = +1, i in I_up(\alpha) } + double Gmax2 = -INF; // max { y_i * grad(f)_i | y_i = +1, i in I_low(\alpha) } + double Gmax3 = -INF; // max { -y_i * grad(f)_i | y_i = -1, i in I_up(\alpha) } + double Gmax4 = -INF; // max { y_i * grad(f)_i | y_i = -1, i in I_low(\alpha) } + + // find maximal violating pair first + int i; + for(i=0;i Gmax1) Gmax1 = -G[i]; + } + else if(-G[i] > Gmax4) Gmax4 = -G[i]; + } + if(!is_lower_bound(i)) + { + if(y[i]==+1) + { + if(G[i] > Gmax2) Gmax2 = G[i]; + } + else if(G[i] > Gmax3) Gmax3 = G[i]; + } + } + + if(unshrink == false && max(Gmax1+Gmax2,Gmax3+Gmax4) <= eps*10) + { + unshrink = true; + reconstruct_gradient(); + active_size = l; + } + + for(i=0;i i) + { + if (!be_shrunk(active_size, Gmax1, Gmax2, Gmax3, Gmax4)) + { + swap_index(i,active_size); + break; + } + active_size--; + } + } +} + +double Solver_NU::calculate_rho() +{ + int nr_free1 = 0,nr_free2 = 0; + double ub1 = INF, ub2 = INF; + double lb1 = -INF, lb2 = -INF; + double sum_free1 = 0, sum_free2 = 0; + + for(int i=0;i 0) + r1 = sum_free1/nr_free1; + else + r1 = (ub1+lb1)/2; + + if(nr_free2 > 0) + r2 = sum_free2/nr_free2; + else + r2 = (ub2+lb2)/2; + + si->r = (r1+r2)/2; + return (r1-r2)/2; +} + +// +// Q matrices for various formulations +// +class SVC_Q: public Kernel +{ +public: + SVC_Q(const svm_problem& prob, const svm_parameter& param, const schar *y_) + :Kernel(prob.l, prob.x, param) + { + clone(y,y_,prob.l); + cache = new Cache(prob.l,(long int)(param.cache_size*(1<<20))); + QD = new double[prob.l]; + for(int i=0;i*kernel_function)(i,i); + } + + Qfloat *get_Q(int i, int len) const + { + Qfloat *data; + int start, j; + if((start = cache->get_data(i,&data,len)) < len) + { + for(j=start;j*kernel_function)(i,j)); + } + return data; + } + + double *get_QD() const + { + return QD; + } + + void swap_index(int i, int j) const + { + cache->swap_index(i,j); + Kernel::swap_index(i,j); + swap(y[i],y[j]); + swap(QD[i],QD[j]); + } + + ~SVC_Q() + { + delete[] y; + delete cache; + delete[] QD; + } +private: + schar *y; + Cache *cache; + double *QD; +}; + +class ONE_CLASS_Q: public Kernel +{ +public: + ONE_CLASS_Q(const svm_problem& prob, const svm_parameter& param) + :Kernel(prob.l, prob.x, param) + { + cache = new Cache(prob.l,(long int)(param.cache_size*(1<<20))); + QD = new double[prob.l]; + for(int i=0;i*kernel_function)(i,i); + } + + Qfloat *get_Q(int i, int len) const + { + Qfloat *data; + int start, j; + if((start = cache->get_data(i,&data,len)) < len) + { + for(j=start;j*kernel_function)(i,j); + } + return data; + } + + double *get_QD() const + { + return QD; + } + + void swap_index(int i, int j) const + { + cache->swap_index(i,j); + Kernel::swap_index(i,j); + swap(QD[i],QD[j]); + } + + ~ONE_CLASS_Q() + { + delete cache; + delete[] QD; + } +private: + Cache *cache; + double *QD; +}; + +class SVR_Q: public Kernel +{ +public: + SVR_Q(const svm_problem& prob, const svm_parameter& param) + :Kernel(prob.l, prob.x, param) + { + l = prob.l; + cache = new Cache(l,(long int)(param.cache_size*(1<<20))); + QD = new double[2*l]; + sign = new schar[2*l]; + index = new int[2*l]; + for(int k=0;k*kernel_function)(k,k); + QD[k+l] = QD[k]; + } + buffer[0] = new Qfloat[2*l]; + buffer[1] = new Qfloat[2*l]; + next_buffer = 0; + } + + void swap_index(int i, int j) const + { + swap(sign[i],sign[j]); + swap(index[i],index[j]); + swap(QD[i],QD[j]); + } + + Qfloat *get_Q(int i, int len) const + { + Qfloat *data; + int j, real_i = index[i]; + if(cache->get_data(real_i,&data,l) < l) + { + for(j=0;j*kernel_function)(real_i,j); + } + + // reorder and copy + Qfloat *buf = buffer[next_buffer]; + next_buffer = 1 - next_buffer; + schar si = sign[i]; + for(j=0;jl; + double *minus_ones = new double[l]; + schar *y = new schar[l]; + + int i; + + for(i=0;iy[i] > 0) y[i] = +1; else y[i] = -1; + } + + Solver s; + s.Solve(l, SVC_Q(*prob,*param,y), minus_ones, y, + alpha, Cp, Cn, param->eps, si, param->shrinking); + + double sum_alpha=0; + for(i=0;il)); + + for(i=0;il; + double nu = param->nu; + + schar *y = new schar[l]; + + for(i=0;iy[i]>0) + y[i] = +1; + else + y[i] = -1; + + double sum_pos = nu*l/2; + double sum_neg = nu*l/2; + + for(i=0;ieps, si, param->shrinking); + double r = si->r; + + info("C = %f\n",1/r); + + for(i=0;irho /= r; + si->obj /= (r*r); + si->upper_bound_p = 1/r; + si->upper_bound_n = 1/r; + + delete[] y; + delete[] zeros; +} + +static void solve_one_class( + const svm_problem *prob, const svm_parameter *param, + double *alpha, Solver::SolutionInfo* si) +{ + int l = prob->l; + double *zeros = new double[l]; + schar *ones = new schar[l]; + int i; + + int n = (int)(param->nu*prob->l); // # of alpha's at upper bound + + for(i=0;il) + alpha[n] = param->nu * prob->l - n; + for(i=n+1;ieps, si, param->shrinking); + + delete[] zeros; + delete[] ones; +} + +static void solve_epsilon_svr( + const svm_problem *prob, const svm_parameter *param, + double *alpha, Solver::SolutionInfo* si) +{ + int l = prob->l; + double *alpha2 = new double[2*l]; + double *linear_term = new double[2*l]; + schar *y = new schar[2*l]; + int i; + + for(i=0;ip - prob->y[i]; + y[i] = 1; + + alpha2[i+l] = 0; + linear_term[i+l] = param->p + prob->y[i]; + y[i+l] = -1; + } + + Solver s; + s.Solve(2*l, SVR_Q(*prob,*param), linear_term, y, + alpha2, param->C, param->C, param->eps, si, param->shrinking); + + double sum_alpha = 0; + for(i=0;iC*l)); + + delete[] alpha2; + delete[] linear_term; + delete[] y; +} + +static void solve_nu_svr( + const svm_problem *prob, const svm_parameter *param, + double *alpha, Solver::SolutionInfo* si) +{ + int l = prob->l; + double C = param->C; + double *alpha2 = new double[2*l]; + double *linear_term = new double[2*l]; + schar *y = new schar[2*l]; + int i; + + double sum = C * param->nu * l / 2; + for(i=0;iy[i]; + y[i] = 1; + + linear_term[i+l] = prob->y[i]; + y[i+l] = -1; + } + + Solver_NU s; + s.Solve(2*l, SVR_Q(*prob,*param), linear_term, y, + alpha2, C, C, param->eps, si, param->shrinking); + + info("epsilon = %f\n",-si->r); + + for(i=0;il); + Solver::SolutionInfo si; + switch(param->svm_type) + { + case C_SVC: + solve_c_svc(prob,param,alpha,&si,Cp,Cn); + break; + case NU_SVC: + solve_nu_svc(prob,param,alpha,&si); + break; + case ONE_CLASS: + solve_one_class(prob,param,alpha,&si); + break; + case EPSILON_SVR: + solve_epsilon_svr(prob,param,alpha,&si); + break; + case NU_SVR: + solve_nu_svr(prob,param,alpha,&si); + break; + } + + info("obj = %f, rho = %f\n",si.obj,si.rho); + + // output SVs + + int nSV = 0; + int nBSV = 0; + for(int i=0;il;i++) + { + if(fabs(alpha[i]) > 0) + { + ++nSV; + if(prob->y[i] > 0) + { + if(fabs(alpha[i]) >= si.upper_bound_p) + ++nBSV; + } + else + { + if(fabs(alpha[i]) >= si.upper_bound_n) + ++nBSV; + } + } + } + + info("nSV = %d, nBSV = %d\n",nSV,nBSV); + + decision_function f; + f.alpha = alpha; + f.rho = si.rho; + return f; +} + +// Platt's binary SVM Probablistic Output: an improvement from Lin et al. +static void sigmoid_train( + int l, const double *dec_values, const double *labels, + double& A, double& B) +{ + double prior1=0, prior0 = 0; + int i; + + for (i=0;i 0) prior1+=1; + else prior0+=1; + + int max_iter=100; // Maximal number of iterations + double min_step=1e-10; // Minimal step taken in line search + double sigma=1e-12; // For numerically strict PD of Hessian + double eps=1e-5; + double hiTarget=(prior1+1.0)/(prior1+2.0); + double loTarget=1/(prior0+2.0); + double *t=Malloc(double,l); + double fApB,p,q,h11,h22,h21,g1,g2,det,dA,dB,gd,stepsize; + double newA,newB,newf,d1,d2; + int iter; + + // Initial Point and Initial Fun Value + A=0.0; B=log((prior0+1.0)/(prior1+1.0)); + double fval = 0.0; + + for (i=0;i0) t[i]=hiTarget; + else t[i]=loTarget; + fApB = dec_values[i]*A+B; + if (fApB>=0) + fval += t[i]*fApB + log(1+exp(-fApB)); + else + fval += (t[i] - 1)*fApB +log(1+exp(fApB)); + } + for (iter=0;iter= 0) + { + p=exp(-fApB)/(1.0+exp(-fApB)); + q=1.0/(1.0+exp(-fApB)); + } + else + { + p=1.0/(1.0+exp(fApB)); + q=exp(fApB)/(1.0+exp(fApB)); + } + d2=p*q; + h11+=dec_values[i]*dec_values[i]*d2; + h22+=d2; + h21+=dec_values[i]*d2; + d1=t[i]-p; + g1+=dec_values[i]*d1; + g2+=d1; + } + + // Stopping Criteria + if (fabs(g1)= min_step) + { + newA = A + stepsize * dA; + newB = B + stepsize * dB; + + // New function value + newf = 0.0; + for (i=0;i= 0) + newf += t[i]*fApB + log(1+exp(-fApB)); + else + newf += (t[i] - 1)*fApB +log(1+exp(fApB)); + } + // Check sufficient decrease + if (newf=max_iter) + info("Reaching maximal iterations in two-class probability estimates\n"); + free(t); +} + +static double sigmoid_predict(double decision_value, double A, double B) +{ + double fApB = decision_value*A+B; + // 1-p used later; avoid catastrophic cancellation + if (fApB >= 0) + return exp(-fApB)/(1.0+exp(-fApB)); + else + return 1.0/(1+exp(fApB)) ; +} + +// Method 2 from the multiclass_prob paper by Wu, Lin, and Weng +static void multiclass_probability(int k, double **r, double *p) +{ + int t,j; + int iter = 0, max_iter=max(100,k); + double **Q=Malloc(double *,k); + double *Qp=Malloc(double,k); + double pQp, eps=0.005/k; + + for (t=0;tmax_error) + max_error=error; + } + if (max_error=max_iter) + info("Exceeds max_iter in multiclass_prob\n"); + for(t=0;tl); + double *dec_values = Malloc(double,prob->l); + + // random shuffle + for(i=0;il;i++) perm[i]=i; + for(i=0;il;i++) + { + int j = i+rand()%(prob->l-i); + swap(perm[i],perm[j]); + } + for(i=0;il/nr_fold; + int end = (i+1)*prob->l/nr_fold; + int j,k; + struct svm_problem subprob; + + subprob.l = prob->l-(end-begin); + subprob.x = Malloc(struct svm_node*,subprob.l); + subprob.y = Malloc(double,subprob.l); + + k=0; + for(j=0;jx[perm[j]]; + subprob.y[k] = prob->y[perm[j]]; + ++k; + } + for(j=end;jl;j++) + { + subprob.x[k] = prob->x[perm[j]]; + subprob.y[k] = prob->y[perm[j]]; + ++k; + } + int p_count=0,n_count=0; + for(j=0;j0) + p_count++; + else + n_count++; + + if(p_count==0 && n_count==0) + for(j=begin;j 0 && n_count == 0) + for(j=begin;j 0) + for(j=begin;jx[perm[j]],&(dec_values[perm[j]])); + // ensure +1 -1 order; reason not using CV subroutine + dec_values[perm[j]] *= submodel->label[0]; + } + svm_free_and_destroy_model(&submodel); + svm_destroy_param(&subparam); + } + free(subprob.x); + free(subprob.y); + } + sigmoid_train(prob->l,dec_values,prob->y,probA,probB); + free(dec_values); + free(perm); +} + +// Return parameter of a Laplace distribution +static double svm_svr_probability( + const svm_problem *prob, const svm_parameter *param) +{ + int i; + int nr_fold = 5; + double *ymv = Malloc(double,prob->l); + double mae = 0; + + svm_parameter newparam = *param; + newparam.probability = 0; + svm_cross_validation(prob,&newparam,nr_fold,ymv); + for(i=0;il;i++) + { + ymv[i]=prob->y[i]-ymv[i]; + mae += fabs(ymv[i]); + } + mae /= prob->l; + double std=sqrt(2*mae*mae); + int count=0; + mae=0; + for(i=0;il;i++) + if (fabs(ymv[i]) > 5*std) + count=count+1; + else + mae+=fabs(ymv[i]); + mae /= (prob->l-count); + info("Prob. model for test data: target value = predicted value + z,\nz: Laplace distribution e^(-|z|/sigma)/(2sigma),sigma= %g\n",mae); + free(ymv); + return mae; +} + + +// label: label name, start: begin of each class, count: #data of classes, perm: indices to the original data +// perm, length l, must be allocated before calling this subroutine +static void svm_group_classes(const svm_problem *prob, int *nr_class_ret, int **label_ret, int **start_ret, int **count_ret, int *perm) +{ + int l = prob->l; + int max_nr_class = 16; + int nr_class = 0; + int *label = Malloc(int,max_nr_class); + int *count = Malloc(int,max_nr_class); + int *data_label = Malloc(int,l); + int i; + + for(i=0;iy[i]; + int j; + for(j=0;jparam = *param; + model->free_sv = 0; // XXX + + if(param->svm_type == ONE_CLASS || + param->svm_type == EPSILON_SVR || + param->svm_type == NU_SVR) + { + // regression or one-class-svm + model->nr_class = 2; + model->label = NULL; + model->nSV = NULL; + model->probA = NULL; model->probB = NULL; + model->sv_coef = Malloc(double *,1); + + if(param->probability && + (param->svm_type == EPSILON_SVR || + param->svm_type == NU_SVR)) + { + model->probA = Malloc(double,1); + model->probA[0] = svm_svr_probability(prob,param); + } + + decision_function f = svm_train_one(prob,param,0,0); + model->rho = Malloc(double,1); + model->rho[0] = f.rho; + + int nSV = 0; + int i; + for(i=0;il;i++) + if(fabs(f.alpha[i]) > 0) ++nSV; + model->l = nSV; + model->SV = Malloc(svm_node *,nSV); + model->sv_coef[0] = Malloc(double,nSV); + model->sv_indices = Malloc(int,nSV); + int j = 0; + for(i=0;il;i++) + if(fabs(f.alpha[i]) > 0) + { + model->SV[j] = prob->x[i]; + model->sv_coef[0][j] = f.alpha[i]; + model->sv_indices[j] = i+1; + ++j; + } + + free(f.alpha); + } + else + { + // classification + int l = prob->l; + int nr_class; + int *label = NULL; + int *start = NULL; + int *count = NULL; + int *perm = Malloc(int,l); + + // group training data of the same class + svm_group_classes(prob,&nr_class,&label,&start,&count,perm); + if(nr_class == 1) + info("WARNING: training data in only one class. See README for details.\n"); + + svm_node **x = Malloc(svm_node *,l); + int i; + for(i=0;ix[perm[i]]; + + // calculate weighted C + + double *weighted_C = Malloc(double, nr_class); + for(i=0;iC; + for(i=0;inr_weight;i++) + { + int j; + for(j=0;jweight_label[i] == label[j]) + break; + if(j == nr_class) + fprintf(stderr,"WARNING: class label %d specified in weight is not found\n", param->weight_label[i]); + else + weighted_C[j] *= param->weight[i]; + } + + // train k*(k-1)/2 models + + bool *nonzero = Malloc(bool,l); + for(i=0;iprobability) + { + probA=Malloc(double,nr_class*(nr_class-1)/2); + probB=Malloc(double,nr_class*(nr_class-1)/2); + } + + int p = 0; + for(i=0;iprobability) + svm_binary_svc_probability(&sub_prob,param,weighted_C[i],weighted_C[j],probA[p],probB[p]); + + f[p] = svm_train_one(&sub_prob,param,weighted_C[i],weighted_C[j]); + for(k=0;k 0) + nonzero[si+k] = true; + for(k=0;k 0) + nonzero[sj+k] = true; + free(sub_prob.x); + free(sub_prob.y); + ++p; + } + + // build output + + model->nr_class = nr_class; + + model->label = Malloc(int,nr_class); + for(i=0;ilabel[i] = label[i]; + + model->rho = Malloc(double,nr_class*(nr_class-1)/2); + for(i=0;irho[i] = f[i].rho; + + if(param->probability) + { + model->probA = Malloc(double,nr_class*(nr_class-1)/2); + model->probB = Malloc(double,nr_class*(nr_class-1)/2); + for(i=0;iprobA[i] = probA[i]; + model->probB[i] = probB[i]; + } + } + else + { + model->probA=NULL; + model->probB=NULL; + } + + int total_sv = 0; + int *nz_count = Malloc(int,nr_class); + model->nSV = Malloc(int,nr_class); + for(i=0;inSV[i] = nSV; + nz_count[i] = nSV; + } + + info("Total nSV = %d\n",total_sv); + + model->l = total_sv; + model->SV = Malloc(svm_node *,total_sv); + model->sv_indices = Malloc(int,total_sv); + p = 0; + for(i=0;iSV[p] = x[i]; + model->sv_indices[p++] = perm[i] + 1; + } + + int *nz_start = Malloc(int,nr_class); + nz_start[0] = 0; + for(i=1;isv_coef = Malloc(double *,nr_class-1); + for(i=0;isv_coef[i] = Malloc(double,total_sv); + + p = 0; + for(i=0;isv_coef[j-1][q++] = f[p].alpha[k]; + q = nz_start[j]; + for(k=0;ksv_coef[i][q++] = f[p].alpha[ci+k]; + ++p; + } + + free(label); + free(probA); + free(probB); + free(count); + free(perm); + free(start); + free(x); + free(weighted_C); + free(nonzero); + for(i=0;il; + int *perm = Malloc(int,l); + int nr_class; + if (nr_fold > l) + { + nr_fold = l; + fprintf(stderr,"WARNING: # folds > # data. Will use # folds = # data instead (i.e., leave-one-out cross validation)\n"); + } + fold_start = Malloc(int,nr_fold+1); + // stratified cv may not give leave-one-out rate + // Each class to l folds -> some folds may have zero elements + if((param->svm_type == C_SVC || + param->svm_type == NU_SVC) && nr_fold < l) + { + int *start = NULL; + int *label = NULL; + int *count = NULL; + svm_group_classes(prob,&nr_class,&label,&start,&count,perm); + + // random shuffle and then data grouped by fold using the array perm + int *fold_count = Malloc(int,nr_fold); + int c; + int *index = Malloc(int,l); + for(i=0;ix[perm[j]]; + subprob.y[k] = prob->y[perm[j]]; + ++k; + } + for(j=end;jx[perm[j]]; + subprob.y[k] = prob->y[perm[j]]; + ++k; + } + struct svm_model *submodel = svm_train(&subprob,param); + if(param->probability && + (param->svm_type == C_SVC || param->svm_type == NU_SVC)) + { + double *prob_estimates=Malloc(double,svm_get_nr_class(submodel)); + for(j=begin;jx[perm[j]],prob_estimates); + free(prob_estimates); + } + else + for(j=begin;jx[perm[j]]); + svm_free_and_destroy_model(&submodel); + free(subprob.x); + free(subprob.y); + } + free(fold_start); + free(perm); +} + + +int svm_get_svm_type(const svm_model *model) +{ + return model->param.svm_type; +} + +int svm_get_nr_class(const svm_model *model) +{ + return model->nr_class; +} + +void svm_get_labels(const svm_model *model, int* label) +{ + if (model->label != NULL) + for(int i=0;inr_class;i++) + label[i] = model->label[i]; +} + +void svm_get_sv_indices(const svm_model *model, int* indices) +{ + if (model->sv_indices != NULL) + for(int i=0;il;i++) + indices[i] = model->sv_indices[i]; +} + +int svm_get_nr_sv(const svm_model *model) +{ + return model->l; +} + +double svm_get_svr_probability(const svm_model *model) +{ + if ((model->param.svm_type == EPSILON_SVR || model->param.svm_type == NU_SVR) && + model->probA!=NULL) + return model->probA[0]; + else + { + fprintf(stderr,"Model doesn't contain information for SVR probability inference\n"); + return 0; + } +} + +double svm_predict_values(const svm_model *model, const svm_node *x, double* dec_values) +{ + int i; + if(model->param.svm_type == ONE_CLASS || + model->param.svm_type == EPSILON_SVR || + model->param.svm_type == NU_SVR) + { + double *sv_coef = model->sv_coef[0]; + double sum = 0; + for(i=0;il;i++) + sum += sv_coef[i] * Kernel::k_function(x,model->SV[i],model->param); + sum -= model->rho[0]; + *dec_values = sum; + + if(model->param.svm_type == ONE_CLASS) + return (sum>0)?1:-1; + else + return sum; + } + else + { + int nr_class = model->nr_class; + int l = model->l; + + double *kvalue = Malloc(double,l); + for(i=0;iSV[i],model->param); + + int *start = Malloc(int,nr_class); + start[0] = 0; + for(i=1;inSV[i-1]; + + int *vote = Malloc(int,nr_class); + for(i=0;inSV[i]; + int cj = model->nSV[j]; + + int k; + double *coef1 = model->sv_coef[j-1]; + double *coef2 = model->sv_coef[i]; + for(k=0;krho[p]; + dec_values[p] = sum; + + if(dec_values[p] > 0) + ++vote[i]; + else + ++vote[j]; + p++; + } + + int vote_max_idx = 0; + for(i=1;i vote[vote_max_idx]) + vote_max_idx = i; + + free(kvalue); + free(start); + free(vote); + return model->label[vote_max_idx]; + } +} + +double svm_predict(const svm_model *model, const svm_node *x) +{ + int nr_class = model->nr_class; + double *dec_values; + if(model->param.svm_type == ONE_CLASS || + model->param.svm_type == EPSILON_SVR || + model->param.svm_type == NU_SVR) + dec_values = Malloc(double, 1); + else + dec_values = Malloc(double, nr_class*(nr_class-1)/2); + double pred_result = svm_predict_values(model, x, dec_values); + free(dec_values); + return pred_result; +} + +double svm_predict_probability( + const svm_model *model, const svm_node *x, double *prob_estimates) +{ + if ((model->param.svm_type == C_SVC || model->param.svm_type == NU_SVC) && + model->probA!=NULL && model->probB!=NULL) + { + int i; + int nr_class = model->nr_class; + double *dec_values = Malloc(double, nr_class*(nr_class-1)/2); + svm_predict_values(model, x, dec_values); + + double min_prob=1e-7; + double **pairwise_prob=Malloc(double *,nr_class); + for(i=0;iprobA[k],model->probB[k]),min_prob),1-min_prob); + pairwise_prob[j][i]=1-pairwise_prob[i][j]; + k++; + } + if (nr_class == 2) + { + prob_estimates[0] = pairwise_prob[0][1]; + prob_estimates[1] = pairwise_prob[1][0]; + } + else + multiclass_probability(nr_class,pairwise_prob,prob_estimates); + + int prob_max_idx = 0; + for(i=1;i prob_estimates[prob_max_idx]) + prob_max_idx = i; + for(i=0;ilabel[prob_max_idx]; + } + else + return svm_predict(model, x); +} + +static const char *svm_type_table[] = +{ + "c_svc","nu_svc","one_class","epsilon_svr","nu_svr",NULL +}; + +static const char *kernel_type_table[]= +{ + "linear","polynomial","rbf","sigmoid","precomputed",NULL +}; + +int svm_save_model(const char *model_file_name, const svm_model *model) +{ + FILE *fp = fopen(model_file_name,"w"); + if(fp==NULL) return -1; + + char *old_locale = setlocale(LC_ALL, NULL); + if (old_locale) { + old_locale = strdup(old_locale); + } + setlocale(LC_ALL, "C"); + + const svm_parameter& param = model->param; + + fprintf(fp,"svm_type %s\n", svm_type_table[param.svm_type]); + fprintf(fp,"kernel_type %s\n", kernel_type_table[param.kernel_type]); + + if(param.kernel_type == POLY) + fprintf(fp,"degree %d\n", param.degree); + + if(param.kernel_type == POLY || param.kernel_type == RBF || param.kernel_type == SIGMOID) + fprintf(fp,"gamma %.17g\n", param.gamma); + + if(param.kernel_type == POLY || param.kernel_type == SIGMOID) + fprintf(fp,"coef0 %.17g\n", param.coef0); + + int nr_class = model->nr_class; + int l = model->l; + fprintf(fp, "nr_class %d\n", nr_class); + fprintf(fp, "total_sv %d\n",l); + + { + fprintf(fp, "rho"); + for(int i=0;irho[i]); + fprintf(fp, "\n"); + } + + if(model->label) + { + fprintf(fp, "label"); + for(int i=0;ilabel[i]); + fprintf(fp, "\n"); + } + + if(model->probA) // regression has probA only + { + fprintf(fp, "probA"); + for(int i=0;iprobA[i]); + fprintf(fp, "\n"); + } + if(model->probB) + { + fprintf(fp, "probB"); + for(int i=0;iprobB[i]); + fprintf(fp, "\n"); + } + + if(model->nSV) + { + fprintf(fp, "nr_sv"); + for(int i=0;inSV[i]); + fprintf(fp, "\n"); + } + + fprintf(fp, "SV\n"); + const double * const *sv_coef = model->sv_coef; + const svm_node * const *SV = model->SV; + + for(int i=0;ivalue)); + else + while(p->index != -1) + { + fprintf(fp,"%d:%.8g ",p->index,p->value); + p++; + } + fprintf(fp, "\n"); + } + + setlocale(LC_ALL, old_locale); + free(old_locale); + + if (ferror(fp) != 0 || fclose(fp) != 0) return -1; + else return 0; +} + +static char *line = NULL; +static int max_line_len; + +static char* readline(FILE *input) +{ + int len; + + if(fgets(line,max_line_len,input) == NULL) + return NULL; + + while(strrchr(line,'\n') == NULL) + { + max_line_len *= 2; + line = (char *) realloc(line,max_line_len); + len = (int) strlen(line); + if(fgets(line+len,max_line_len-len,input) == NULL) + break; + } + return line; +} + +// +// FSCANF helps to handle fscanf failures. +// Its do-while block avoids the ambiguity when +// if (...) +// FSCANF(); +// is used +// +#define FSCANF(_stream, _format, _var) do{ if (fscanf(_stream, _format, _var) != 1) return false; }while(0) +bool read_model_header(FILE *fp, svm_model* model) +{ + svm_parameter& param = model->param; + // parameters for training only won't be assigned, but arrays are assigned as NULL for safety + param.nr_weight = 0; + param.weight_label = NULL; + param.weight = NULL; + + char cmd[81]; + while(1) + { + FSCANF(fp,"%80s",cmd); + + if(strcmp(cmd,"svm_type")==0) + { + FSCANF(fp,"%80s",cmd); + int i; + for(i=0;svm_type_table[i];i++) + { + if(strcmp(svm_type_table[i],cmd)==0) + { + param.svm_type=i; + break; + } + } + if(svm_type_table[i] == NULL) + { + fprintf(stderr,"unknown svm type.\n"); + return false; + } + } + else if(strcmp(cmd,"kernel_type")==0) + { + FSCANF(fp,"%80s",cmd); + int i; + for(i=0;kernel_type_table[i];i++) + { + if(strcmp(kernel_type_table[i],cmd)==0) + { + param.kernel_type=i; + break; + } + } + if(kernel_type_table[i] == NULL) + { + fprintf(stderr,"unknown kernel function.\n"); + return false; + } + } + else if(strcmp(cmd,"degree")==0) + FSCANF(fp,"%d",¶m.degree); + else if(strcmp(cmd,"gamma")==0) + FSCANF(fp,"%lf",¶m.gamma); + else if(strcmp(cmd,"coef0")==0) + FSCANF(fp,"%lf",¶m.coef0); + else if(strcmp(cmd,"nr_class")==0) + FSCANF(fp,"%d",&model->nr_class); + else if(strcmp(cmd,"total_sv")==0) + FSCANF(fp,"%d",&model->l); + else if(strcmp(cmd,"rho")==0) + { + int n = model->nr_class * (model->nr_class-1)/2; + model->rho = Malloc(double,n); + for(int i=0;irho[i]); + } + else if(strcmp(cmd,"label")==0) + { + int n = model->nr_class; + model->label = Malloc(int,n); + for(int i=0;ilabel[i]); + } + else if(strcmp(cmd,"probA")==0) + { + int n = model->nr_class * (model->nr_class-1)/2; + model->probA = Malloc(double,n); + for(int i=0;iprobA[i]); + } + else if(strcmp(cmd,"probB")==0) + { + int n = model->nr_class * (model->nr_class-1)/2; + model->probB = Malloc(double,n); + for(int i=0;iprobB[i]); + } + else if(strcmp(cmd,"nr_sv")==0) + { + int n = model->nr_class; + model->nSV = Malloc(int,n); + for(int i=0;inSV[i]); + } + else if(strcmp(cmd,"SV")==0) + { + while(1) + { + int c = getc(fp); + if(c==EOF || c=='\n') break; + } + break; + } + else + { + fprintf(stderr,"unknown text in model file: [%s]\n",cmd); + return false; + } + } + + return true; + +} + +svm_model *svm_load_model(const char *model_file_name) +{ + FILE *fp = fopen(model_file_name,"rb"); + if(fp==NULL) return NULL; + + char *old_locale = setlocale(LC_ALL, NULL); + if (old_locale) { + old_locale = strdup(old_locale); + } + setlocale(LC_ALL, "C"); + + // read parameters + + svm_model *model = Malloc(svm_model,1); + model->rho = NULL; + model->probA = NULL; + model->probB = NULL; + model->sv_indices = NULL; + model->label = NULL; + model->nSV = NULL; + + // read header + if (!read_model_header(fp, model)) + { + fprintf(stderr, "ERROR: fscanf failed to read model\n"); + setlocale(LC_ALL, old_locale); + free(old_locale); + free(model->rho); + free(model->label); + free(model->nSV); + free(model); + return NULL; + } + + // read sv_coef and SV + + int elements = 0; + long pos = ftell(fp); + + max_line_len = 1024; + line = Malloc(char,max_line_len); + char *p,*endptr,*idx,*val; + + while(readline(fp)!=NULL) + { + p = strtok(line,":"); + while(1) + { + p = strtok(NULL,":"); + if(p == NULL) + break; + ++elements; + } + } + elements += model->l; + + fseek(fp,pos,SEEK_SET); + + int m = model->nr_class - 1; + int l = model->l; + model->sv_coef = Malloc(double *,m); + int i; + for(i=0;isv_coef[i] = Malloc(double,l); + model->SV = Malloc(svm_node*,l); + svm_node *x_space = NULL; + if(l>0) x_space = Malloc(svm_node,elements); + + int j=0; + for(i=0;iSV[i] = &x_space[j]; + + p = strtok(line, " \t"); + model->sv_coef[0][i] = strtod(p,&endptr); + for(int k=1;ksv_coef[k][i] = strtod(p,&endptr); + } + + while(1) + { + idx = strtok(NULL, ":"); + val = strtok(NULL, " \t"); + + if(val == NULL) + break; + x_space[j].index = (int) strtol(idx,&endptr,10); + x_space[j].value = strtod(val,&endptr); + + ++j; + } + x_space[j++].index = -1; + } + free(line); + + setlocale(LC_ALL, old_locale); + free(old_locale); + + if (ferror(fp) != 0 || fclose(fp) != 0) + return NULL; + + model->free_sv = 1; // XXX + return model; +} + +void svm_free_model_content(svm_model* model_ptr) +{ + if(model_ptr->free_sv && model_ptr->l > 0 && model_ptr->SV != NULL) + free((void *)(model_ptr->SV[0])); + if(model_ptr->sv_coef) + { + for(int i=0;inr_class-1;i++) + free(model_ptr->sv_coef[i]); + } + + free(model_ptr->SV); + model_ptr->SV = NULL; + + free(model_ptr->sv_coef); + model_ptr->sv_coef = NULL; + + free(model_ptr->rho); + model_ptr->rho = NULL; + + free(model_ptr->label); + model_ptr->label= NULL; + + free(model_ptr->probA); + model_ptr->probA = NULL; + + free(model_ptr->probB); + model_ptr->probB= NULL; + + free(model_ptr->sv_indices); + model_ptr->sv_indices = NULL; + + free(model_ptr->nSV); + model_ptr->nSV = NULL; +} + +void svm_free_and_destroy_model(svm_model** model_ptr_ptr) +{ + if(model_ptr_ptr != NULL && *model_ptr_ptr != NULL) + { + svm_free_model_content(*model_ptr_ptr); + free(*model_ptr_ptr); + *model_ptr_ptr = NULL; + } +} + +void svm_destroy_param(svm_parameter* param) +{ + free(param->weight_label); + free(param->weight); +} + +const char *svm_check_parameter(const svm_problem *prob, const svm_parameter *param) +{ + // svm_type + + int svm_type = param->svm_type; + if(svm_type != C_SVC && + svm_type != NU_SVC && + svm_type != ONE_CLASS && + svm_type != EPSILON_SVR && + svm_type != NU_SVR) + return "unknown svm type"; + + // kernel_type, degree + + int kernel_type = param->kernel_type; + if(kernel_type != LINEAR && + kernel_type != POLY && + kernel_type != RBF && + kernel_type != SIGMOID && + kernel_type != PRECOMPUTED) + return "unknown kernel type"; + + if(param->gamma < 0) + return "gamma < 0"; + + if(param->degree < 0) + return "degree of polynomial kernel < 0"; + + // cache_size,eps,C,nu,p,shrinking + + if(param->cache_size <= 0) + return "cache_size <= 0"; + + if(param->eps <= 0) + return "eps <= 0"; + + if(svm_type == C_SVC || + svm_type == EPSILON_SVR || + svm_type == NU_SVR) + if(param->C <= 0) + return "C <= 0"; + + if(svm_type == NU_SVC || + svm_type == ONE_CLASS || + svm_type == NU_SVR) + if(param->nu <= 0 || param->nu > 1) + return "nu <= 0 or nu > 1"; + + if(svm_type == EPSILON_SVR) + if(param->p < 0) + return "p < 0"; + + if(param->shrinking != 0 && + param->shrinking != 1) + return "shrinking != 0 and shrinking != 1"; + + if(param->probability != 0 && + param->probability != 1) + return "probability != 0 and probability != 1"; + + if(param->probability == 1 && + svm_type == ONE_CLASS) + return "one-class SVM probability output not supported yet"; + + + // check whether nu-svc is feasible + + if(svm_type == NU_SVC) + { + int l = prob->l; + int max_nr_class = 16; + int nr_class = 0; + int *label = Malloc(int,max_nr_class); + int *count = Malloc(int,max_nr_class); + + int i; + for(i=0;iy[i]; + int j; + for(j=0;jnu*(n1+n2)/2 > min(n1,n2)) + { + free(label); + free(count); + return "specified nu is infeasible"; + } + } + } + free(label); + free(count); + } + + return NULL; +} + +int svm_check_probability_model(const svm_model *model) +{ + return ((model->param.svm_type == C_SVC || model->param.svm_type == NU_SVC) && + model->probA!=NULL && model->probB!=NULL) || + ((model->param.svm_type == EPSILON_SVR || model->param.svm_type == NU_SVR) && + model->probA!=NULL); +} + +void svm_set_print_string_function(void (*print_func)(const char *)) +{ + if(print_func == NULL) + svm_print_string = &print_string_stdout; + else + svm_print_string = print_func; +} diff --git a/src/svm.h b/src/svm.h new file mode 100644 index 0000000..d4dbee9 --- /dev/null +++ b/src/svm.h @@ -0,0 +1,141 @@ +/* + This code was extracted from libsvm 3.2.3 in Feb 2019 and + modified for the use with Octave and Matlab + + +Copyright (c) 2000-2019 Chih-Chung Chang and Chih-Jen Lin +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither name of copyright holders nor the names of its contributors +may be used to endorse or promote products derived from this software +without specific prior written permission. + + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR +CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifndef _LIBSVM_H +#define _LIBSVM_H + +#define LIBSVM_VERSION 323 + +#ifdef __cplusplus +extern "C" { +#endif + +extern int libsvm_version; + +struct svm_node +{ + int index; + double value; +}; + +struct svm_problem +{ + int l; + double *y; + struct svm_node **x; +}; + +enum { C_SVC, NU_SVC, ONE_CLASS, EPSILON_SVR, NU_SVR }; /* svm_type */ +enum { LINEAR, POLY, RBF, SIGMOID, PRECOMPUTED }; /* kernel_type */ + +struct svm_parameter +{ + int svm_type; + int kernel_type; + int degree; /* for poly */ + double gamma; /* for poly/rbf/sigmoid */ + double coef0; /* for poly/sigmoid */ + + /* these are for training only */ + double cache_size; /* in MB */ + double eps; /* stopping criteria */ + double C; /* for C_SVC, EPSILON_SVR and NU_SVR */ + int nr_weight; /* for C_SVC */ + int *weight_label; /* for C_SVC */ + double* weight; /* for C_SVC */ + double nu; /* for NU_SVC, ONE_CLASS, and NU_SVR */ + double p; /* for EPSILON_SVR */ + int shrinking; /* use the shrinking heuristics */ + int probability; /* do probability estimates */ +}; + +// +// svm_model +// +struct svm_model +{ + struct svm_parameter param; /* parameter */ + int nr_class; /* number of classes, = 2 in regression/one class svm */ + int l; /* total #SV */ + struct svm_node **SV; /* SVs (SV[l]) */ + double **sv_coef; /* coefficients for SVs in decision functions (sv_coef[k-1][l]) */ + double *rho; /* constants in decision functions (rho[k*(k-1)/2]) */ + double *probA; /* pariwise probability information */ + double *probB; + int *sv_indices; /* sv_indices[0,...,nSV-1] are values in [1,...,num_traning_data] to indicate SVs in the training set */ + + /* for classification only */ + + int *label; /* label of each class (label[k]) */ + int *nSV; /* number of SVs for each class (nSV[k]) */ + /* nSV[0] + nSV[1] + ... + nSV[k-1] = l */ + /* XXX */ + int free_sv; /* 1 if svm_model is created by svm_load_model*/ + /* 0 if svm_model is created by svm_train */ +}; + +struct svm_model *svm_train(const struct svm_problem *prob, const struct svm_parameter *param); +void svm_cross_validation(const struct svm_problem *prob, const struct svm_parameter *param, int nr_fold, double *target); + +int svm_save_model(const char *model_file_name, const struct svm_model *model); +struct svm_model *svm_load_model(const char *model_file_name); + +int svm_get_svm_type(const struct svm_model *model); +int svm_get_nr_class(const struct svm_model *model); +void svm_get_labels(const struct svm_model *model, int *label); +void svm_get_sv_indices(const struct svm_model *model, int *sv_indices); +int svm_get_nr_sv(const struct svm_model *model); +double svm_get_svr_probability(const struct svm_model *model); + +double svm_predict_values(const struct svm_model *model, const struct svm_node *x, double* dec_values); +double svm_predict(const struct svm_model *model, const struct svm_node *x); +double svm_predict_probability(const struct svm_model *model, const struct svm_node *x, double* prob_estimates); + +void svm_free_model_content(struct svm_model *model_ptr); +void svm_free_and_destroy_model(struct svm_model **model_ptr_ptr); +void svm_destroy_param(struct svm_parameter *param); + +const char *svm_check_parameter(const struct svm_problem *prob, const struct svm_parameter *param); +int svm_check_probability_model(const struct svm_model *model); + +void svm_set_print_string_function(void (*print_func)(const char *)); + +#ifdef __cplusplus +} +#endif + +#endif /* _LIBSVM_H */ diff --git a/src/svm_model_matlab.c b/src/svm_model_matlab.c new file mode 100644 index 0000000..ba02385 --- /dev/null +++ b/src/svm_model_matlab.c @@ -0,0 +1,411 @@ +/* + This code was extracted from libsvm 3.2.3 in Feb 2019 and + modified for the use with Octave and Matlab + + +Copyright (c) 2000-2019 Chih-Chung Chang and Chih-Jen Lin +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither name of copyright holders nor the names of its contributors +may be used to endorse or promote products derived from this software +without specific prior written permission. + + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR +CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#include +#include +#include "svm.h" +#include "svm_model_matlab.h" + + +#ifdef MX_API_VER +#if MX_API_VER < 0x07030000 +typedef int mwIndex; +#endif +#endif + +#define NUM_OF_RETURN_FIELD 11 + +#define Malloc(type,n) (type *)malloc((n)*sizeof(type)) + +static const char *field_names[] = { + "Parameters", + "nr_class", + "totalSV", + "rho", + "Label", + "sv_indices", + "ProbA", + "ProbB", + "nSV", + "sv_coef", + "SVs" +}; + +const char *model_to_matlab_structure(mxArray *plhs[], int num_of_feature, struct svm_model *model) +{ + int i, j, n; + double *ptr; + mxArray *return_model, **rhs; + int out_id = 0; + + rhs = (mxArray **)mxMalloc(sizeof(mxArray *)*NUM_OF_RETURN_FIELD); + + // Parameters + rhs[out_id] = mxCreateDoubleMatrix(5, 1, mxREAL); + ptr = mxGetPr(rhs[out_id]); + ptr[0] = model->param.svm_type; + ptr[1] = model->param.kernel_type; + ptr[2] = model->param.degree; + ptr[3] = model->param.gamma; + ptr[4] = model->param.coef0; + out_id++; + + // nr_class + rhs[out_id] = mxCreateDoubleMatrix(1, 1, mxREAL); + ptr = mxGetPr(rhs[out_id]); + ptr[0] = model->nr_class; + out_id++; + + // total SV + rhs[out_id] = mxCreateDoubleMatrix(1, 1, mxREAL); + ptr = mxGetPr(rhs[out_id]); + ptr[0] = model->l; + out_id++; + + // rho + n = model->nr_class*(model->nr_class-1)/2; + rhs[out_id] = mxCreateDoubleMatrix(n, 1, mxREAL); + ptr = mxGetPr(rhs[out_id]); + for(i = 0; i < n; i++) + ptr[i] = model->rho[i]; + out_id++; + + // Label + if(model->label) + { + rhs[out_id] = mxCreateDoubleMatrix(model->nr_class, 1, mxREAL); + ptr = mxGetPr(rhs[out_id]); + for(i = 0; i < model->nr_class; i++) + ptr[i] = model->label[i]; + } + else + rhs[out_id] = mxCreateDoubleMatrix(0, 0, mxREAL); + out_id++; + + // sv_indices + if(model->sv_indices) + { + rhs[out_id] = mxCreateDoubleMatrix(model->l, 1, mxREAL); + ptr = mxGetPr(rhs[out_id]); + for(i = 0; i < model->l; i++) + ptr[i] = model->sv_indices[i]; + } + else + rhs[out_id] = mxCreateDoubleMatrix(0, 0, mxREAL); + out_id++; + + // probA + if(model->probA != NULL) + { + rhs[out_id] = mxCreateDoubleMatrix(n, 1, mxREAL); + ptr = mxGetPr(rhs[out_id]); + for(i = 0; i < n; i++) + ptr[i] = model->probA[i]; + } + else + rhs[out_id] = mxCreateDoubleMatrix(0, 0, mxREAL); + out_id ++; + + // probB + if(model->probB != NULL) + { + rhs[out_id] = mxCreateDoubleMatrix(n, 1, mxREAL); + ptr = mxGetPr(rhs[out_id]); + for(i = 0; i < n; i++) + ptr[i] = model->probB[i]; + } + else + rhs[out_id] = mxCreateDoubleMatrix(0, 0, mxREAL); + out_id++; + + // nSV + if(model->nSV) + { + rhs[out_id] = mxCreateDoubleMatrix(model->nr_class, 1, mxREAL); + ptr = mxGetPr(rhs[out_id]); + for(i = 0; i < model->nr_class; i++) + ptr[i] = model->nSV[i]; + } + else + rhs[out_id] = mxCreateDoubleMatrix(0, 0, mxREAL); + out_id++; + + // sv_coef + rhs[out_id] = mxCreateDoubleMatrix(model->l, model->nr_class-1, mxREAL); + ptr = mxGetPr(rhs[out_id]); + for(i = 0; i < model->nr_class-1; i++) + for(j = 0; j < model->l; j++) + ptr[(i*(model->l))+j] = model->sv_coef[i][j]; + out_id++; + + // SVs + { + int ir_index, nonzero_element; + mwIndex *ir, *jc; + mxArray *pprhs[1], *pplhs[1]; + + if(model->param.kernel_type == PRECOMPUTED) + { + nonzero_element = model->l; + num_of_feature = 1; + } + else + { + nonzero_element = 0; + for(i = 0; i < model->l; i++) { + j = 0; + while(model->SV[i][j].index != -1) + { + nonzero_element++; + j++; + } + } + } + + // SV in column, easier accessing + rhs[out_id] = mxCreateSparse(num_of_feature, model->l, nonzero_element, mxREAL); + ir = mxGetIr(rhs[out_id]); + jc = mxGetJc(rhs[out_id]); + ptr = mxGetPr(rhs[out_id]); + jc[0] = ir_index = 0; + for(i = 0;i < model->l; i++) + { + if(model->param.kernel_type == PRECOMPUTED) + { + // make a (1 x model->l) matrix + ir[ir_index] = 0; + ptr[ir_index] = model->SV[i][0].value; + ir_index++; + jc[i+1] = jc[i] + 1; + } + else + { + int x_index = 0; + while (model->SV[i][x_index].index != -1) + { + ir[ir_index] = model->SV[i][x_index].index - 1; + ptr[ir_index] = model->SV[i][x_index].value; + ir_index++, x_index++; + } + jc[i+1] = jc[i] + x_index; + } + } + // transpose back to SV in row + pprhs[0] = rhs[out_id]; + if(mexCallMATLAB(1, pplhs, 1, pprhs, "transpose")) + return "cannot transpose SV matrix"; + rhs[out_id] = pplhs[0]; + out_id++; + } + + /* Create a struct matrix contains NUM_OF_RETURN_FIELD fields */ + return_model = mxCreateStructMatrix(1, 1, NUM_OF_RETURN_FIELD, field_names); + + /* Fill struct matrix with input arguments */ + for(i = 0; i < NUM_OF_RETURN_FIELD; i++) + mxSetField(return_model,0,field_names[i],mxDuplicateArray(rhs[i])); + /* return */ + plhs[0] = return_model; + mxFree(rhs); + + return NULL; +} + +struct svm_model *matlab_matrix_to_model(const mxArray *matlab_struct, const char **msg) +{ + int i, j, n, num_of_fields; + double *ptr; + int id = 0; + struct svm_node *x_space; + struct svm_model *model; + mxArray **rhs; + + num_of_fields = mxGetNumberOfFields(matlab_struct); + if(num_of_fields != NUM_OF_RETURN_FIELD) + { + *msg = "number of return field is not correct"; + return NULL; + } + rhs = (mxArray **) mxMalloc(sizeof(mxArray *)*num_of_fields); + + for(i=0;irho = NULL; + model->probA = NULL; + model->probB = NULL; + model->label = NULL; + model->sv_indices = NULL; + model->nSV = NULL; + model->free_sv = 1; // XXX + + ptr = mxGetPr(rhs[id]); + model->param.svm_type = (int)ptr[0]; + model->param.kernel_type = (int)ptr[1]; + model->param.degree = (int)ptr[2]; + model->param.gamma = ptr[3]; + model->param.coef0 = ptr[4]; + id++; + + ptr = mxGetPr(rhs[id]); + model->nr_class = (int)ptr[0]; + id++; + + ptr = mxGetPr(rhs[id]); + model->l = (int)ptr[0]; + id++; + + // rho + n = model->nr_class * (model->nr_class-1)/2; + model->rho = (double*) malloc(n*sizeof(double)); + ptr = mxGetPr(rhs[id]); + for(i=0;irho[i] = ptr[i]; + id++; + + // label + if(mxIsEmpty(rhs[id]) == 0) + { + model->label = (int*) malloc(model->nr_class*sizeof(int)); + ptr = mxGetPr(rhs[id]); + for(i=0;inr_class;i++) + model->label[i] = (int)ptr[i]; + } + id++; + + // sv_indices + if(mxIsEmpty(rhs[id]) == 0) + { + model->sv_indices = (int*) malloc(model->l*sizeof(int)); + ptr = mxGetPr(rhs[id]); + for(i=0;il;i++) + model->sv_indices[i] = (int)ptr[i]; + } + id++; + + // probA + if(mxIsEmpty(rhs[id]) == 0) + { + model->probA = (double*) malloc(n*sizeof(double)); + ptr = mxGetPr(rhs[id]); + for(i=0;iprobA[i] = ptr[i]; + } + id++; + + // probB + if(mxIsEmpty(rhs[id]) == 0) + { + model->probB = (double*) malloc(n*sizeof(double)); + ptr = mxGetPr(rhs[id]); + for(i=0;iprobB[i] = ptr[i]; + } + id++; + + // nSV + if(mxIsEmpty(rhs[id]) == 0) + { + model->nSV = (int*) malloc(model->nr_class*sizeof(int)); + ptr = mxGetPr(rhs[id]); + for(i=0;inr_class;i++) + model->nSV[i] = (int)ptr[i]; + } + id++; + + // sv_coef + ptr = mxGetPr(rhs[id]); + model->sv_coef = (double**) malloc((model->nr_class-1)*sizeof(double)); + for( i=0 ; i< model->nr_class -1 ; i++ ) + model->sv_coef[i] = (double*) malloc((model->l)*sizeof(double)); + for(i = 0; i < model->nr_class - 1; i++) + for(j = 0; j < model->l; j++) + model->sv_coef[i][j] = ptr[i*(model->l)+j]; + id++; + + // SV + { + int sr, elements; + int num_samples; + mwIndex *ir, *jc; + mxArray *pprhs[1], *pplhs[1]; + + // transpose SV + pprhs[0] = rhs[id]; + if(mexCallMATLAB(1, pplhs, 1, pprhs, "transpose")) + { + svm_free_and_destroy_model(&model); + *msg = "cannot transpose SV matrix"; + return NULL; + } + rhs[id] = pplhs[0]; + + sr = (int)mxGetN(rhs[id]); + + ptr = mxGetPr(rhs[id]); + ir = mxGetIr(rhs[id]); + jc = mxGetJc(rhs[id]); + + num_samples = (int)mxGetNzmax(rhs[id]); + + elements = num_samples + sr; + + model->SV = (struct svm_node **) malloc(sr * sizeof(struct svm_node *)); + x_space = (struct svm_node *)malloc(elements * sizeof(struct svm_node)); + + // SV is in column + for(i=0;iSV[i] = &x_space[low+i]; + for(j=low;jSV[i][x_index].index = (int)ir[j] + 1; + model->SV[i][x_index].value = ptr[j]; + x_index++; + } + model->SV[i][x_index].index = -1; + } + + id++; + } + mxFree(rhs); + + return model; +} diff --git a/src/svm_model_matlab.h b/src/svm_model_matlab.h new file mode 100644 index 0000000..3c9cc1c --- /dev/null +++ b/src/svm_model_matlab.h @@ -0,0 +1,52 @@ +/* + This code was extracted from libsvm 3.2.3 in Feb 2019 and + modified for the use with Octave and Matlab + + +Copyright (c) 2000-2019 Chih-Chung Chang and Chih-Jen Lin +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither name of copyright holders nor the names of its contributors +may be used to endorse or promote products derived from this software +without specific prior written permission. + + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR +CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +#include "mex.h" + +#ifdef __cplusplus +extern "C" { +#endif + +const char *model_to_matlab_structure(mxArray *plhs[], int num_of_feature, struct svm_model *model); +struct svm_model *matlab_matrix_to_model(const mxArray *matlab_struct, const char **error_message); + + +#ifdef __cplusplus +} +#endif + diff --git a/src/svmpredict_mex.cpp b/src/svmpredict_mex.cpp new file mode 100644 index 0000000..2fbe1a6 --- /dev/null +++ b/src/svmpredict_mex.cpp @@ -0,0 +1,408 @@ +/* + This code was extracted from libsvm 3.2.3 in Feb 2019 and + modified for the use with Octave and Matlab + + +Copyright (c) 2000-2019 Chih-Chung Chang and Chih-Jen Lin +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither name of copyright holders nor the names of its contributors +may be used to endorse or promote products derived from this software +without specific prior written permission. + + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR +CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +#include +#include +#include +#include "svm.h" + +#include "mex.h" +#include "svm_model_matlab.h" + +#ifdef MX_API_VER +#if MX_API_VER < 0x07030000 +typedef int mwIndex; +#endif +#endif + +#define CMD_LEN 2048 + +int print_null(const char *s,...) {return 0;} +int (*info)(const char *fmt,...) = &mexPrintf; + +void read_sparse_instance(const mxArray *prhs, int index, struct svm_node *x) +{ + int i, j, low, high; + mwIndex *ir, *jc; + double *samples; + + ir = mxGetIr(prhs); + jc = mxGetJc(prhs); + samples = mxGetPr(prhs); + + // each column is one instance + j = 0; + low = (int)jc[index], high = (int)jc[index+1]; + for(i=low;iparam.kernel_type == PRECOMPUTED) + { + // precomputed kernel requires dense matrix, so we make one + mxArray *rhs[1], *lhs[1]; + rhs[0] = mxDuplicateArray(prhs[1]); + if(mexCallMATLAB(1, lhs, 1, rhs, "full")) + { + mexPrintf("Error: cannot full testing instance matrix\n"); + fake_answer(nlhs, plhs); + return; + } + ptr_instance = mxGetPr(lhs[0]); + mxDestroyArray(rhs[0]); + } + else + { + mxArray *pprhs[1]; + pprhs[0] = mxDuplicateArray(prhs[1]); + if(mexCallMATLAB(1, pplhs, 1, pprhs, "transpose")) + { + mexPrintf("Error: cannot transpose testing instance matrix\n"); + fake_answer(nlhs, plhs); + return; + } + } + } + + if(predict_probability) + { + if(svm_type==NU_SVR || svm_type==EPSILON_SVR) + info("Prob. model for test data: target value = predicted value + z,\nz: Laplace distribution e^(-|z|/sigma)/(2sigma),sigma=%g\n",svm_get_svr_probability(model)); + else + prob_estimates = (double *) malloc(nr_class*sizeof(double)); + } + + tplhs[0] = mxCreateDoubleMatrix(testing_instance_number, 1, mxREAL); + if(predict_probability) + { + // prob estimates are in plhs[2] + if(svm_type==C_SVC || svm_type==NU_SVC) + tplhs[2] = mxCreateDoubleMatrix(testing_instance_number, nr_class, mxREAL); + else + tplhs[2] = mxCreateDoubleMatrix(0, 0, mxREAL); + } + else + { + // decision values are in plhs[2] + if(svm_type == ONE_CLASS || + svm_type == EPSILON_SVR || + svm_type == NU_SVR || + nr_class == 1) // if only one class in training data, decision values are still returned. + tplhs[2] = mxCreateDoubleMatrix(testing_instance_number, 1, mxREAL); + else + tplhs[2] = mxCreateDoubleMatrix(testing_instance_number, nr_class*(nr_class-1)/2, mxREAL); + } + + ptr_predict_label = mxGetPr(tplhs[0]); + ptr_prob_estimates = mxGetPr(tplhs[2]); + ptr_dec_values = mxGetPr(tplhs[2]); + x = (struct svm_node*)malloc((feature_number+1)*sizeof(struct svm_node) ); + for(instance_index=0;instance_indexparam.kernel_type != PRECOMPUTED) // prhs[1]^T is still sparse + read_sparse_instance(pplhs[0], instance_index, x); + else + { + for(i=0;i 3 || nrhs > 4 || nrhs < 3) + { + exit_with_help(); + fake_answer(nlhs, plhs); + return; + } + + if(!mxIsDouble(prhs[0]) || !mxIsDouble(prhs[1])) { + mexPrintf("Error: label vector and instance matrix must be double\n"); + fake_answer(nlhs, plhs); + return; + } + + if(mxIsStruct(prhs[2])) + { + const char *error_msg; + + // parse options + if(nrhs==4) + { + int i, argc = 1; + char cmd[CMD_LEN], *argv[CMD_LEN/2]; + + // put options in argv[] + mxGetString(prhs[3], cmd, mxGetN(prhs[3]) + 1); + if((argv[argc] = strtok(cmd, " ")) != NULL) + while((argv[++argc] = strtok(NULL, " ")) != NULL) + ; + + for(i=1;i=argc) && argv[i-1][1] != 'q') + { + exit_with_help(); + fake_answer(nlhs, plhs); + return; + } + switch(argv[i-1][1]) + { + case 'b': + prob_estimate_flag = atoi(argv[i]); + break; + case 'q': + i--; + info = &print_null; + break; + default: + mexPrintf("Unknown option: -%c\n", argv[i-1][1]); + exit_with_help(); + fake_answer(nlhs, plhs); + return; + } + } + } + + model = matlab_matrix_to_model(prhs[2], &error_msg); + if (model == NULL) + { + mexPrintf("Error: can't read model: %s\n", error_msg); + fake_answer(nlhs, plhs); + return; + } + + if(prob_estimate_flag) + { + if(svm_check_probability_model(model)==0) + { + mexPrintf("Model does not support probabiliy estimates\n"); + fake_answer(nlhs, plhs); + svm_free_and_destroy_model(&model); + return; + } + } + else + { + if(svm_check_probability_model(model)!=0) + info("Model supports probability estimates, but disabled in predicton.\n"); + } + + predict(nlhs, plhs, prhs, model, prob_estimate_flag); + // destroy model + svm_free_and_destroy_model(&model); + } + else + { + mexPrintf("model file should be a struct array\n"); + fake_answer(nlhs, plhs); + } + + return; +} diff --git a/src/svmtrain_mex.cpp b/src/svmtrain_mex.cpp new file mode 100644 index 0000000..e2b11b8 --- /dev/null +++ b/src/svmtrain_mex.cpp @@ -0,0 +1,533 @@ +/* + This code was extracted from libsvm 3.2.3 in Feb 2019 and + modified for the use with Octave and Matlab + + +Copyright (c) 2000-2019 Chih-Chung Chang and Chih-Jen Lin +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither name of copyright holders nor the names of its contributors +may be used to endorse or promote products derived from this software +without specific prior written permission. + + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR +CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +#include +#include +#include +#include +#include "svm.h" + +#include "mex.h" +#include "svm_model_matlab.h" + +#ifdef MX_API_VER +#if MX_API_VER < 0x07030000 +typedef int mwIndex; +#endif +#endif + +#define CMD_LEN 2048 +#define Malloc(type,n) (type *)malloc((n)*sizeof(type)) + +void print_null(const char *s) {} +void print_string_matlab(const char *s) {mexPrintf(s);} + +void exit_with_help() +{ + mexPrintf( + "Usage: model = svmtrain(training_label_vector, training_instance_matrix, 'libsvm_options');\n" + "libsvm_options:\n" + "-s svm_type : set type of SVM (default 0)\n" + " 0 -- C-SVC (multi-class classification)\n" + " 1 -- nu-SVC (multi-class classification)\n" + " 2 -- one-class SVM\n" + " 3 -- epsilon-SVR (regression)\n" + " 4 -- nu-SVR (regression)\n" + "-t kernel_type : set type of kernel function (default 2)\n" + " 0 -- linear: u'*v\n" + " 1 -- polynomial: (gamma*u'*v + coef0)^degree\n" + " 2 -- radial basis function: exp(-gamma*|u-v|^2)\n" + " 3 -- sigmoid: tanh(gamma*u'*v + coef0)\n" + " 4 -- precomputed kernel (kernel values in training_instance_matrix)\n" + "-d degree : set degree in kernel function (default 3)\n" + "-g gamma : set gamma in kernel function (default 1/num_features)\n" + "-r coef0 : set coef0 in kernel function (default 0)\n" + "-c cost : set the parameter C of C-SVC, epsilon-SVR, and nu-SVR (default 1)\n" + "-n nu : set the parameter nu of nu-SVC, one-class SVM, and nu-SVR (default 0.5)\n" + "-p epsilon : set the epsilon in loss function of epsilon-SVR (default 0.1)\n" + "-m cachesize : set cache memory size in MB (default 100)\n" + "-e epsilon : set tolerance of termination criterion (default 0.001)\n" + "-h shrinking : whether to use the shrinking heuristics, 0 or 1 (default 1)\n" + "-b probability_estimates : whether to train a SVC or SVR model for probability estimates, 0 or 1 (default 0)\n" + "-wi weight : set the parameter C of class i to weight*C, for C-SVC (default 1)\n" + "-v n: n-fold cross validation mode\n" + "-q : quiet mode (no outputs)\n" + ); +} + +// svm arguments +struct svm_parameter param; // set by parse_command_line +struct svm_problem prob; // set by read_problem +struct svm_model *model; +struct svm_node *x_space; +int cross_validation; +int nr_fold; + + +double do_cross_validation() +{ + int i; + int total_correct = 0; + double total_error = 0; + double sumv = 0, sumy = 0, sumvv = 0, sumyy = 0, sumvy = 0; + double *target = Malloc(double,prob.l); + double retval = 0.0; + + svm_cross_validation(&prob,¶m,nr_fold,target); + if(param.svm_type == EPSILON_SVR || + param.svm_type == NU_SVR) + { + for(i=0;i 2) + { + // put options in argv[] + mxGetString(prhs[2], cmd, mxGetN(prhs[2]) + 1); + if((argv[argc] = strtok(cmd, " ")) != NULL) + while((argv[++argc] = strtok(NULL, " ")) != NULL) + ; + } + + // parse options + for(i=1;i=argc && argv[i-1][1] != 'q') // since option -q has no parameter + return 1; + switch(argv[i-1][1]) + { + case 's': + param.svm_type = atoi(argv[i]); + break; + case 't': + param.kernel_type = atoi(argv[i]); + break; + case 'd': + param.degree = atoi(argv[i]); + break; + case 'g': + param.gamma = atof(argv[i]); + break; + case 'r': + param.coef0 = atof(argv[i]); + break; + case 'n': + param.nu = atof(argv[i]); + break; + case 'm': + param.cache_size = atof(argv[i]); + break; + case 'c': + param.C = atof(argv[i]); + break; + case 'e': + param.eps = atof(argv[i]); + break; + case 'p': + param.p = atof(argv[i]); + break; + case 'h': + param.shrinking = atoi(argv[i]); + break; + case 'b': + param.probability = atoi(argv[i]); + break; + case 'q': + print_func = &print_null; + i--; + break; + case 'v': + cross_validation = 1; + nr_fold = atoi(argv[i]); + if(nr_fold < 2) + { + mexPrintf("n-fold cross validation: n must >= 2\n"); + return 1; + } + break; + case 'w': + ++param.nr_weight; + param.weight_label = (int *)realloc(param.weight_label,sizeof(int)*param.nr_weight); + param.weight = (double *)realloc(param.weight,sizeof(double)*param.nr_weight); + param.weight_label[param.nr_weight-1] = atoi(&argv[i-1][2]); + param.weight[param.nr_weight-1] = atof(argv[i]); + break; + default: + mexPrintf("Unknown option -%c\n", argv[i-1][1]); + return 1; + } + } + + svm_set_print_string_function(print_func); + + return 0; +} + +// read in a problem (in svmlight format) +int read_problem_dense(const mxArray *label_vec, const mxArray *instance_mat) +{ + // using size_t due to the output type of matlab functions + size_t i, j, k, l; + size_t elements, max_index, sc, label_vector_row_num; + double *samples, *labels; + + prob.x = NULL; + prob.y = NULL; + x_space = NULL; + + labels = mxGetPr(label_vec); + samples = mxGetPr(instance_mat); + sc = mxGetN(instance_mat); + + elements = 0; + // number of instances + l = mxGetM(instance_mat); + label_vector_row_num = mxGetM(label_vec); + prob.l = (int)l; + + if(label_vector_row_num!=l) + { + mexPrintf("Length of label vector does not match # of instances.\n"); + return -1; + } + + if(param.kernel_type == PRECOMPUTED) + elements = l * (sc + 1); + else + { + for(i = 0; i < l; i++) + { + for(k = 0; k < sc; k++) + if(samples[k * l + i] != 0) + elements++; + // count the '-1' element + elements++; + } + } + + prob.y = Malloc(double,l); + prob.x = Malloc(struct svm_node *,l); + x_space = Malloc(struct svm_node, elements); + + max_index = sc; + j = 0; + for(i = 0; i < l; i++) + { + prob.x[i] = &x_space[j]; + prob.y[i] = labels[i]; + + for(k = 0; k < sc; k++) + { + if(param.kernel_type == PRECOMPUTED || samples[k * l + i] != 0) + { + x_space[j].index = (int)k + 1; + x_space[j].value = samples[k * l + i]; + j++; + } + } + x_space[j++].index = -1; + } + + if(param.gamma == 0 && max_index > 0) + param.gamma = (double)(1.0/max_index); + + if(param.kernel_type == PRECOMPUTED) + for(i=0;i (int)max_index) + { + mexPrintf("Wrong input format: sample_serial_number out of range\n"); + return -1; + } + } + + return 0; +} + +int read_problem_sparse(const mxArray *label_vec, const mxArray *instance_mat) +{ + mwIndex *ir, *jc, low, high, k; + // using size_t due to the output type of matlab functions + size_t i, j, l, elements, max_index, label_vector_row_num; + mwSize num_samples; + double *samples, *labels; + mxArray *instance_mat_col; // transposed instance sparse matrix + + prob.x = NULL; + prob.y = NULL; + x_space = NULL; + + // transpose instance matrix + { + mxArray *prhs[1], *plhs[1]; + prhs[0] = mxDuplicateArray(instance_mat); + if(mexCallMATLAB(1, plhs, 1, prhs, "transpose")) + { + mexPrintf("Error: cannot transpose training instance matrix\n"); + return -1; + } + instance_mat_col = plhs[0]; + mxDestroyArray(prhs[0]); + } + + // each column is one instance + labels = mxGetPr(label_vec); + samples = mxGetPr(instance_mat_col); + ir = mxGetIr(instance_mat_col); + jc = mxGetJc(instance_mat_col); + + num_samples = mxGetNzmax(instance_mat_col); + + // number of instances + l = mxGetN(instance_mat_col); + label_vector_row_num = mxGetM(label_vec); + prob.l = (int) l; + + if(label_vector_row_num!=l) + { + mexPrintf("Length of label vector does not match # of instances.\n"); + return -1; + } + + elements = num_samples + l; + max_index = mxGetM(instance_mat_col); + + prob.y = Malloc(double,l); + prob.x = Malloc(struct svm_node *,l); + x_space = Malloc(struct svm_node, elements); + + j = 0; + for(i=0;i 0) + param.gamma = (double)(1.0/max_index); + + return 0; +} + +static void fake_answer(int nlhs, mxArray *plhs[]) +{ + int i; + for(i=0;i 1) + { + exit_with_help(); + fake_answer(nlhs, plhs); + return; + } + + // Transform the input Matrix to libsvm format + if(nrhs > 1 && nrhs < 4) + { + int err; + + if(!mxIsDouble(prhs[0]) || !mxIsDouble(prhs[1])) + { + mexPrintf("Error: label vector and instance matrix must be double\n"); + fake_answer(nlhs, plhs); + return; + } + + if(mxIsSparse(prhs[0])) + { + mexPrintf("Error: label vector should not be in sparse format\n"); + fake_answer(nlhs, plhs); + return; + } + + if(parse_command_line(nrhs, prhs, NULL)) + { + exit_with_help(); + svm_destroy_param(¶m); + fake_answer(nlhs, plhs); + return; + } + + if(mxIsSparse(prhs[1])) + { + if(param.kernel_type == PRECOMPUTED) + { + // precomputed kernel requires dense matrix, so we make one + mxArray *rhs[1], *lhs[1]; + + rhs[0] = mxDuplicateArray(prhs[1]); + if(mexCallMATLAB(1, lhs, 1, rhs, "full")) + { + mexPrintf("Error: cannot generate a full training instance matrix\n"); + svm_destroy_param(¶m); + fake_answer(nlhs, plhs); + return; + } + err = read_problem_dense(prhs[0], lhs[0]); + mxDestroyArray(lhs[0]); + mxDestroyArray(rhs[0]); + } + else + err = read_problem_sparse(prhs[0], prhs[1]); + } + else + err = read_problem_dense(prhs[0], prhs[1]); + + // svmtrain's original code + error_msg = svm_check_parameter(&prob, ¶m); + + if(err || error_msg) + { + if (error_msg != NULL) + mexPrintf("Error: %s\n", error_msg); + svm_destroy_param(¶m); + free(prob.y); + free(prob.x); + free(x_space); + fake_answer(nlhs, plhs); + return; + } + + if(cross_validation) + { + double *ptr; + plhs[0] = mxCreateDoubleMatrix(1, 1, mxREAL); + ptr = mxGetPr(plhs[0]); + ptr[0] = do_cross_validation(); + } + else + { + int nr_feat = (int)mxGetN(prhs[1]); + const char *error_msg; + model = svm_train(&prob, ¶m); + error_msg = model_to_matlab_structure(plhs, nr_feat, model); + if(error_msg) + mexPrintf("Error: can't convert libsvm model to matrix structure: %s\n", error_msg); + svm_free_and_destroy_model(&model); + } + svm_destroy_param(¶m); + free(prob.y); + free(prob.x); + free(x_space); + } + else + { + exit_with_help(); + fake_answer(nlhs, plhs); + return; + } +} diff --git a/src/train.c b/src/train.c new file mode 100644 index 0000000..7589bb2 --- /dev/null +++ b/src/train.c @@ -0,0 +1,528 @@ +/* + +Copyright (c) 2007-2019 The LIBLINEAR Project. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither name of copyright holders nor the names of its contributors +may be used to endorse or promote products derived from this software +without specific prior written permission. + + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR +CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +*/ + +#include +#include +#include +#include +#include "linear.h" + +#include "mex.h" +#include "linear_model_matlab.h" + +#ifdef MX_API_VER +#if MX_API_VER < 0x07030000 +typedef int mwIndex; +#endif +#endif + +#define CMD_LEN 2048 +#define Malloc(type,n) (type *)malloc((n)*sizeof(type)) +#define INF HUGE_VAL + +void print_null(const char *s) {} +void print_string_matlab(const char *s) {mexPrintf(s);} + +void exit_with_help() +{ + mexPrintf( + "Usage: model = train(training_label_vector, training_instance_matrix, 'liblinear_options', 'col');\n" + "liblinear_options:\n" + "-s type : set type of solver (default 1)\n" + " for multi-class classification\n" + " 0 -- L2-regularized logistic regression (primal)\n" + " 1 -- L2-regularized L2-loss support vector classification (dual)\n" + " 2 -- L2-regularized L2-loss support vector classification (primal)\n" + " 3 -- L2-regularized L1-loss support vector classification (dual)\n" + " 4 -- support vector classification by Crammer and Singer\n" + " 5 -- L1-regularized L2-loss support vector classification\n" + " 6 -- L1-regularized logistic regression\n" + " 7 -- L2-regularized logistic regression (dual)\n" + " for regression\n" + " 11 -- L2-regularized L2-loss support vector regression (primal)\n" + " 12 -- L2-regularized L2-loss support vector regression (dual)\n" + " 13 -- L2-regularized L1-loss support vector regression (dual)\n" + "-c cost : set the parameter C (default 1)\n" + "-p epsilon : set the epsilon in loss function of SVR (default 0.1)\n" + "-e epsilon : set tolerance of termination criterion\n" + " -s 0 and 2\n" + " |f'(w)|_2 <= eps*min(pos,neg)/l*|f'(w0)|_2,\n" + " where f is the primal function and pos/neg are # of\n" + " positive/negative data (default 0.01)\n" + " -s 11\n" + " |f'(w)|_2 <= eps*|f'(w0)|_2 (default 0.001)\n" + " -s 1, 3, 4 and 7\n" + " Dual maximal violation <= eps; similar to libsvm (default 0.1)\n" + " -s 5 and 6\n" + " |f'(w)|_1 <= eps*min(pos,neg)/l*|f'(w0)|_1,\n" + " where f is the primal function (default 0.01)\n" + " -s 12 and 13\n" + " |f'(alpha)|_1 <= eps |f'(alpha0)|,\n" + " where f is the dual function (default 0.1)\n" + "-B bias : if bias >= 0, instance x becomes [x; bias]; if < 0, no bias term added (default -1)\n" + "-wi weight: weights adjust the parameter C of different classes (see README for details)\n" + "-v n: n-fold cross validation mode\n" + "-C : find parameter C (only for -s 0 and 2)\n" + "-q : quiet mode (no outputs)\n" + "col:\n" + " if 'col' is set, training_instance_matrix is parsed in column format, otherwise is in row format\n" + ); +} + +// liblinear arguments +struct parameter param; // set by parse_command_line +struct problem prob; // set by read_problem +struct model *model_; +struct feature_node *x_space; +int flag_cross_validation; +int flag_find_C; +int flag_C_specified; +int flag_solver_specified; +int col_format_flag; +int nr_fold; +double bias; + + +void do_find_parameter_C(double *best_C, double *best_rate) +{ + double start_C; + double max_C = 1024; + if (flag_C_specified) + start_C = param.C; + else + start_C = -1.0; + find_parameter_C(&prob, ¶m, nr_fold, start_C, max_C, best_C, best_rate); + mexPrintf("Best C = %lf CV accuracy = %g%%\n", *best_C, 100.0**best_rate); +} + + +double do_cross_validation() +{ + int i; + int total_correct = 0; + double total_error = 0; + double sumv = 0, sumy = 0, sumvv = 0, sumyy = 0, sumvy = 0; + double *target = Malloc(double, prob.l); + double retval = 0.0; + + cross_validation(&prob,¶m,nr_fold,target); + if(param.solver_type == L2R_L2LOSS_SVR || + param.solver_type == L2R_L1LOSS_SVR_DUAL || + param.solver_type == L2R_L2LOSS_SVR_DUAL) + { + for(i=0;i 2) + { + mxGetString(prhs[2], cmd, mxGetN(prhs[2]) + 1); + if((argv[argc] = strtok(cmd, " ")) != NULL) + while((argv[++argc] = strtok(NULL, " ")) != NULL) + ; + } + + // parse options + for(i=1;i=argc && argv[i-1][1] != 'q' && argv[i-1][1] != 'C') // since options -q and -C have no parameter + return 1; + switch(argv[i-1][1]) + { + case 's': + param.solver_type = atoi(argv[i]); + flag_solver_specified = 1; + break; + case 'c': + param.C = atof(argv[i]); + flag_C_specified = 1; + break; + case 'p': + param.p = atof(argv[i]); + break; + case 'e': + param.eps = atof(argv[i]); + break; + case 'B': + bias = atof(argv[i]); + break; + case 'v': + flag_cross_validation = 1; + nr_fold = atoi(argv[i]); + if(nr_fold < 2) + { + mexPrintf("n-fold cross validation: n must >= 2\n"); + return 1; + } + break; + case 'w': + ++param.nr_weight; + param.weight_label = (int *) realloc(param.weight_label,sizeof(int)*param.nr_weight); + param.weight = (double *) realloc(param.weight,sizeof(double)*param.nr_weight); + param.weight_label[param.nr_weight-1] = atoi(&argv[i-1][2]); + param.weight[param.nr_weight-1] = atof(argv[i]); + break; + case 'q': + print_func = &print_null; + i--; + break; + case 'C': + flag_find_C = 1; + i--; + break; + default: + mexPrintf("unknown option\n"); + return 1; + } + } + + set_print_string_function(print_func); + + // default solver for parameter selection is L2R_L2LOSS_SVC + if(flag_find_C) + { + if(!flag_cross_validation) + nr_fold = 5; + if(!flag_solver_specified) + { + mexPrintf("Solver not specified. Using -s 2\n"); + param.solver_type = L2R_L2LOSS_SVC; + } + else if(param.solver_type != L2R_LR && param.solver_type != L2R_L2LOSS_SVC) + { + mexPrintf("Warm-start parameter search only available for -s 0 and -s 2\n"); + return 1; + } + } + + if(param.eps == INF) + { + switch(param.solver_type) + { + case L2R_LR: + case L2R_L2LOSS_SVC: + param.eps = 0.01; + break; + case L2R_L2LOSS_SVR: + param.eps = 0.001; + break; + case L2R_L2LOSS_SVC_DUAL: + case L2R_L1LOSS_SVC_DUAL: + case MCSVM_CS: + case L2R_LR_DUAL: + param.eps = 0.1; + break; + case L1R_L2LOSS_SVC: + case L1R_LR: + param.eps = 0.01; + break; + case L2R_L1LOSS_SVR_DUAL: + case L2R_L2LOSS_SVR_DUAL: + param.eps = 0.1; + break; + } + } + return 0; +} + +static void fake_answer(int nlhs, mxArray *plhs[]) +{ + int i; + for(i=0;i=0) + { + x_space[j].index = (int) max_index+1; + x_space[j].value = prob.bias; + j++; + } + x_space[j++].index = -1; + } + + if(prob.bias>=0) + prob.n = (int) max_index+1; + else + prob.n = (int) max_index; + + return 0; +} + +// Interface function of matlab +// now assume prhs[0]: label prhs[1]: features +void mexFunction( int nlhs, mxArray *plhs[], + int nrhs, const mxArray *prhs[] ) +{ + const char *error_msg; + // fix random seed to have same results for each run + // (for cross validation) + srand(1); + + if(nlhs > 1) + { + exit_with_help(); + fake_answer(nlhs, plhs); + return; + } + + // Transform the input Matrix to libsvm format + if(nrhs > 1 && nrhs < 5) + { + int err=0; + + if(!mxIsDouble(prhs[0]) || !mxIsDouble(prhs[1])) + { + mexPrintf("Error: label vector and instance matrix must be double\n"); + fake_answer(nlhs, plhs); + return; + } + + if(mxIsSparse(prhs[0])) + { + mexPrintf("Error: label vector should not be in sparse format"); + fake_answer(nlhs, plhs); + return; + } + + if(parse_command_line(nrhs, prhs, NULL)) + { + exit_with_help(); + destroy_param(¶m); + fake_answer(nlhs, plhs); + return; + } + + if(mxIsSparse(prhs[1])) + err = read_problem_sparse(prhs[0], prhs[1]); + else + { + mexPrintf("Training_instance_matrix must be sparse; " + "use sparse(Training_instance_matrix) first\n"); + destroy_param(¶m); + fake_answer(nlhs, plhs); + return; + } + + // train's original code + error_msg = check_parameter(&prob, ¶m); + + if(err || error_msg) + { + if (error_msg != NULL) + mexPrintf("Error: %s\n", error_msg); + destroy_param(¶m); + free(prob.y); + free(prob.x); + free(x_space); + fake_answer(nlhs, plhs); + return; + } + + if (flag_find_C) + { + double best_C, best_rate, *ptr; + + do_find_parameter_C(&best_C, &best_rate); + + plhs[0] = mxCreateDoubleMatrix(2, 1, mxREAL); + ptr = mxGetPr(plhs[0]); + ptr[0] = best_C; + ptr[1] = best_rate; + } + else if(flag_cross_validation) + { + double *ptr; + plhs[0] = mxCreateDoubleMatrix(1, 1, mxREAL); + ptr = mxGetPr(plhs[0]); + ptr[0] = do_cross_validation(); + } + else + { + const char *error_msg; + + model_ = train(&prob, ¶m); + error_msg = model_to_matlab_structure(plhs, model_); + if(error_msg) + mexPrintf("Error: can't convert libsvm model to matrix structure: %s\n", error_msg); + free_and_destroy_model(&model_); + } + destroy_param(¶m); + free(prob.y); + free(prob.x); + free(x_space); + } + else + { + exit_with_help(); + fake_answer(nlhs, plhs); + return; + } +} diff --git a/src/tron.cpp b/src/tron.cpp new file mode 100644 index 0000000..2407d22 --- /dev/null +++ b/src/tron.cpp @@ -0,0 +1,321 @@ +/* + This code was extracted from liblinear-2.2.1 in Feb 2019 and + modified for the use with Octave and Matlab + +Copyright (c) 2007-2019 The LIBLINEAR Project. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither name of copyright holders nor the names of its contributors +may be used to endorse or promote products derived from this software +without specific prior written permission. + + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR +CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +#include +#include +#include +#include +#include "tron.h" + +#ifndef min +template static inline T min(T x,T y) { return (x static inline T max(T x,T y) { return (x>y)?x:y; } +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +extern double dnrm2_(int *, double *, int *); +extern double ddot_(int *, double *, int *, double *, int *); +extern int daxpy_(int *, double *, double *, int *, double *, int *); +extern int dscal_(int *, double *, double *, int *); + +#ifdef __cplusplus +} +#endif + +static void default_print(const char *buf) +{ + fputs(buf,stdout); + fflush(stdout); +} + +static double uTMv(int n, double *u, double *M, double *v) +{ + const int m = n-4; + double res = 0; + int i; + for (i=0; ifun_obj=const_cast(fun_obj); + this->eps=eps; + this->eps_cg=eps_cg; + this->max_iter=max_iter; + tron_print_string = default_print; +} + +TRON::~TRON() +{ +} + +void TRON::tron(double *w) +{ + // Parameters for updating the iterates. + double eta0 = 1e-4, eta1 = 0.25, eta2 = 0.75; + + // Parameters for updating the trust region size delta. + double sigma1 = 0.25, sigma2 = 0.5, sigma3 = 4; + + int n = fun_obj->get_nr_variable(); + int i, cg_iter; + double delta=0, sMnorm, one=1.0; + double alpha, f, fnew, prered, actred, gs; + int search = 1, iter = 1, inc = 1; + double *s = new double[n]; + double *r = new double[n]; + double *g = new double[n]; + + const double alpha_pcg = 0.01; + double *M = new double[n]; + + // calculate gradient norm at w=0 for stopping condition. + double *w0 = new double[n]; + for (i=0; ifun(w0); + fun_obj->grad(w0, g); + double gnorm0 = dnrm2_(&n, g, &inc); + delete [] w0; + + f = fun_obj->fun(w); + fun_obj->grad(w, g); + double gnorm = dnrm2_(&n, g, &inc); + + if (gnorm <= eps*gnorm0) + search = 0; + + fun_obj->get_diag_preconditioner(M); + for(i=0; ifun(w_new); + + // Compute the actual reduction. + actred = f - fnew; + + // On the first iteration, adjust the initial step bound. + sMnorm = sqrt(uTMv(n, s, M, s)); + if (iter == 1 && !delta_adjusted) + { + delta = min(delta, sMnorm); + delta_adjusted = true; + } + + // Compute prediction alpha*sMnorm of the step. + if (fnew - f - gs <= 0) + alpha = sigma3; + else + alpha = max(sigma1, -0.5*(gs/(fnew - f - gs))); + + // Update the trust region bound according to the ratio of actual to predicted reduction. + if (actred < eta0*prered) + delta = min(alpha*sMnorm, sigma2*delta); + else if (actred < eta1*prered) + delta = max(sigma1*delta, min(alpha*sMnorm, sigma2*delta)); + else if (actred < eta2*prered) + delta = max(sigma1*delta, min(alpha*sMnorm, sigma3*delta)); + else + { + if (reach_boundary) + delta = sigma3*delta; + else + delta = max(delta, min(alpha*sMnorm, sigma3*delta)); + } + + info("iter %2d act %5.3e pre %5.3e delta %5.3e f %5.3e |g| %5.3e CG %3d\n", iter, actred, prered, delta, f, gnorm, cg_iter); + + if (actred > eta0*prered) + { + iter++; + memcpy(w, w_new, sizeof(double)*n); + f = fnew; + fun_obj->grad(w, g); + fun_obj->get_diag_preconditioner(M); + for(i=0; iget_nr_variable(); + double one = 1; + double *d = new double[n]; + double *Hd = new double[n]; + double zTr, znewTrnew, alpha, beta, cgtol; + double *z = new double[n]; + + *reach_boundary = false; + for (i=0; iHv(d, Hd); + + alpha = zTr/ddot_(&n, d, &inc, Hd, &inc); + daxpy_(&n, &alpha, d, &inc, s, &inc); + + double sMnorm = sqrt(uTMv(n, s, M, s)); + if (sMnorm > delta) + { + info("cg reaches trust region boundary\n"); + *reach_boundary = true; + alpha = -alpha; + daxpy_(&n, &alpha, d, &inc, s, &inc); + + double sTMd = uTMv(n, s, M, d); + double sTMs = uTMv(n, s, M, s); + double dTMd = uTMv(n, d, M, d); + double dsq = delta*delta; + double rad = sqrt(sTMd*sTMd + dTMd*(dsq-sTMs)); + if (sTMd >= 0) + alpha = (dsq - sTMs)/(sTMd + rad); + else + alpha = (rad - sTMd)/dTMd; + daxpy_(&n, &alpha, d, &inc, s, &inc); + alpha = -alpha; + daxpy_(&n, &alpha, Hd, &inc, r, &inc); + break; + } + alpha = -alpha; + daxpy_(&n, &alpha, Hd, &inc, r, &inc); + + for (i=0; i= dmax) + dmax = fabs(x[i]); + return(dmax); +} + +void TRON::set_print_string(void (*print_string) (const char *buf)) +{ + tron_print_string = print_string; +} diff --git a/src/tron.h b/src/tron.h new file mode 100644 index 0000000..7bf6f03 --- /dev/null +++ b/src/tron.h @@ -0,0 +1,74 @@ +/* + This code was extracted from liblinear-2.2.1 in Feb 2019 and + modified for the use with Octave and Matlab + + +Copyright (c) 2007-2019 The LIBLINEAR Project. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither name of copyright holders nor the names of its contributors +may be used to endorse or promote products derived from this software +without specific prior written permission. + + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR +CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +#ifndef _TRON_H +#define _TRON_H + +class function +{ +public: + virtual double fun(double *w) = 0 ; + virtual void grad(double *w, double *g) = 0 ; + virtual void Hv(double *s, double *Hs) = 0 ; + + virtual int get_nr_variable(void) = 0 ; + virtual void get_diag_preconditioner(double *M) = 0 ; + virtual ~function(void){} +}; + +class TRON +{ +public: + TRON(const function *fun_obj, double eps = 0.1, double eps_cg = 0.1, int max_iter = 1000); + ~TRON(); + + void tron(double *w); + void set_print_string(void (*i_print) (const char *buf)); + +private: + int trpcg(double delta, double *g, double *M, double *s, double *r, bool *reach_boundary); + double norm_inf(int n, double *x); + + double eps; + double eps_cg; + int max_iter; + function *fun_obj; + void info(const char *fmt,...); + void (*tron_print_string)(const char *buf); +}; +#endif diff --git a/src/xptopen.cpp b/src/xptopen.cpp new file mode 100644 index 0000000..2af935d --- /dev/null +++ b/src/xptopen.cpp @@ -0,0 +1,1155 @@ +//------------------------------------------------------------------- +// XPTOPEN is C-MEX implementation for reading various +// statistical data formats including SAS/XPT, SPSS/PASW, +// STATA and ARFF data formats. Basic support for writing +// SAS/XPT is also supported. +// Endian conversion is done automatically. +// +// usage: x = xptopen(filename) +// usage: x = xptopen(filename,'r') +// read filename and return variables in struct x +// usage: xptopen(filename,'w',x) +// save fields of struct x in filename +// usage: x = xptopen(filename,'a',x) +// append fields of struct x to filename +// +// References: +// + +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, see . +// +// $Id$ +// Copyright (C) 2010,2011,2012,2013 Alois Schloegl +// This function is part of the NaN-toolbox +// http://pub.ist.ac.at/~schloegl/matlab/NaN/ +// +// References: +// [1] TS-140 THE RECORD LAYOUT OF A DATA SET IN SAS TRANSPORT (XPORT) FORMAT +// http://support.sas.com/techsup/technote/ts140.html +// [2] IBM floating point format +// http://en.wikipedia.org/wiki/IBM_Floating_Point_Architecture +// [3] see http://old.nabble.com/Re%3A-IBM-integer-and-double-formats-p20428979.html +// [4] STATA File Format +// http://www.stata.com/help.cgi?dta +// http://www.stata.com/help.cgi?dta_113 +//------------------------------------------------------------------- + +/* +SPSS file format +// http://cvs.savannah.gnu.org/pspp/doc/data-file-format.texi?root=pspp&content-type=text%2Fplain +*/ + +#define TEST_CONVERSION 0 // 0: ieee754, 1: SAS converter (big endian bug), 2: experimental +#define DEBUG 1 + +#include +#include +//#include +#include +#include +#include +#include +#include +#include "mex.h" + +#ifndef typeof +#define typeof __typeof__ +#endif + +#ifdef tmwtypes_h + #if (MX_API_VER<=0x07020000) + typedef int mwSize; + typedef int mwIndex; + #endif +#endif + +#define NaN (0.0/0.0) +#define fix(m) (m<0 ? ceil(m) : floor(m)) +#define max(a,b) (((a) > (b)) ? (a) : (b)) +#define min(a,b) (((a) < (b)) ? (a) : (b)) + + +#if 0 + +#elif defined(__linux__) +# include +# include + +#elif defined(__CYGWIN__) +# include +# include + +#elif defined(__GLIBC__) // for Hurd +# include +# include + +#elif defined(__MINGW32__) + /* use local version because MINGW does not provide byteswap.h */ +# define __BIG_ENDIAN 4321 +# define __LITTLE_ENDIAN 1234 +# define __BYTE_ORDER __LITTLE_ENDIAN + +#elif defined(__NetBSD__) +# include +# define __BIG_ENDIAN _BIG_ENDIAN +# define __LITTLE_ENDIAN _LITTLE_ENDIAN +# define __BYTE_ORDER _BYTE_ORDER +# define bswap_16(x) bswap16(x) +# define bswap_32(x) bswap32(x) +# define bswap_64(x) bswap64(x) + +#elif defined(_APPLE_) && defined(_MACH_) +# include +# define _BYTE_ORDER __DARWIN_BYTE_ORDER +# define _LITTLE_ENDIAN __DARWIN_LITTLE_ENDIAN +# define _BIG_ENDIAN __DARWIN_BIG_ENDIAN +# define bswap_16(x) __bswap16(x) +# define bswap_32(x) __bswap32(x) +# define bswap_64(x) __bswap64(x) + +#elif defined(__APPLE__) +# include +# define __BIG_ENDIAN 4321 +# define __LITTLE_ENDIAN 1234 +#if (defined(__LITTLE_ENDIAN__) && (__LITTLE_ENDIAN__ == 1)) + #define __BYTE_ORDER __LITTLE_ENDIAN +#else + #define __BYTE_ORDER __BIG_ENDIAN +#endif +# define bswap_16(x) CFSwapInt16(x) +# define bswap_32(x) CFSwapInt32(x) +# define bswap_64(x) CFSwapInt64(x) + +#elif (defined(BSD) && (BSD >= 199103)) && !defined(__GLIBC__) +# include +# define __BIG_ENDIAN _BIG_ENDIAN +# define __LITTLE_ENDIAN _LITTLE_ENDIAN +# define __BYTE_ORDER _BYTE_ORDER +# define bswap_16(x) __bswap16(x) +# define bswap_32(x) __bswap32(x) +# define bswap_64(x) __bswap64(x) + +#elif defined(__GNUC__) + /* use byteswap macros from the host system, hopefully optimized ones ;-) */ +# include +# include +# define bswap_16(x) __bswap_16 (x) +# define bswap_32(x) __bswap_32 (x) +# define bswap_64(x) __bswap_64 (x) + +#elif defined(__sparc__) +# define __BIG_ENDIAN 4321 +# define __LITTLE_ENDIAN 1234 +# define __BYTE_ORDER __BIG_ENDIAN + +#else +# error Unknown platform +#endif + +#if defined(__MINGW32__) || defined(__sparc__) + +# ifndef bswap_16 +# define bswap_16(x) \ + ((((x) & 0xff00) >> 8) | (((x) & 0x00ff) << 8)) +# endif + +# ifndef bswap_32 +# define bswap_32(x) \ + ((((x) & 0xff000000) >> 24) \ + | (((x) & 0x00ff0000) >> 8) \ + | (((x) & 0x0000ff00) << 8) \ + | (((x) & 0x000000ff) << 24)) + +# endif + +# ifndef bswap_64 +# define bswap_64(x) \ + ((((x) & 0xff00000000000000ull) >> 56) \ + | (((x) & 0x00ff000000000000ull) >> 40) \ + | (((x) & 0x0000ff0000000000ull) >> 24) \ + | (((x) & 0x000000ff00000000ull) >> 8) \ + | (((x) & 0x00000000ff000000ull) << 8) \ + | (((x) & 0x0000000000ff0000ull) << 24) \ + | (((x) & 0x000000000000ff00ull) << 40) \ + | (((x) & 0x00000000000000ffull) << 56)) +# endif + +#endif + + +#if !defined(__BIG_ENDIAN) && !defined(__LITTLE_ENDIAN) +#error ENDIANITY is not known +#endif + + +#if __BYTE_ORDER == __BIG_ENDIAN +#define l_endian_u16(x) ((uint16_t)bswap_16((uint16_t)(x))) +#define l_endian_u32(x) ((uint32_t)bswap_32((uint32_t)(x))) +#define l_endian_u64(x) ((uint64_t)bswap_64((uint64_t)(x))) +#define l_endian_i16(x) ((int16_t)bswap_16((int16_t)(x))) +#define l_endian_i32(x) ((int32_t)bswap_32((int32_t)(x))) +#define l_endian_i64(x) ((int64_t)bswap_64((int64_t)(x))) + +#define b_endian_u16(x) ((uint16_t)(x)) +#define b_endian_u32(x) ((uint32_t)(x)) +#define b_endian_u64(x) ((uint64_t)(x)) +#define b_endian_i16(x) ((int16_t)(x)) +#define b_endian_i32(x) ((int32_t)(x)) +#define b_endian_i64(x) ((int64_t)(x)) + +#elif __BYTE_ORDER==__LITTLE_ENDIAN +#define l_endian_u16(x) ((uint16_t)(x)) +#define l_endian_u32(x) ((uint32_t)(x)) +#define l_endian_u64(x) ((uint64_t)(x)) +#define l_endian_i16(x) ((int16_t)(x)) +#define l_endian_i32(x) ((int32_t)(x)) +#define l_endian_i64(x) ((int64_t)(x)) + +#define b_endian_u16(x) ((uint16_t)bswap_16((uint16_t)(x))) +#define b_endian_u32(x) ((uint32_t)bswap_32((uint32_t)(x))) +#define b_endian_u64(x) ((uint64_t)bswap_64((uint64_t)(x))) +#define b_endian_i16(x) ((int16_t)bswap_16((int16_t)(x))) +#define b_endian_i32(x) ((int32_t)bswap_32((int32_t)(x))) +#define b_endian_i64(x) ((int64_t)bswap_64((int64_t)(x))) + +#endif /* __BYTE_ORDER */ + + +/* + Including ZLIB enables reading gzipped files (they are decompressed on-the-fly) + The output files can be zipped, too. + */ + +#ifdef WITH_ZLIB +#include +#endif + + +double xpt2d(uint64_t x); +uint64_t d2xpt(double x); +double tm_time2gdf_time(struct tm *t); + +void mexFunction(int POutputCount, mxArray* POutput[], int PInputCount, const mxArray *PInputs[]) +{ + const char L1[] = "HEADER RECORD*******LIBRARY HEADER RECORD!!!!!!!000000000000000000000000000000 "; + const char L2[] = "SAS SAS SASLIB 6.06 bsd4.2 13APR89:10:20:06"; + //const char L3[] = ""; + const char L4[] = "HEADER RECORD*******MEMBER HEADER RECORD!!!!!!!000000000000000001600000000140 "; + const char L5[] = "HEADER RECORD*******DSCRPTR HEADER RECORD!!!!!!!000000000000000000000000000000 "; + const char L6[] = "SAS ABC SASLIB 6.06 bsd4.2 13APR89:10:20:06"; + //const char L7[] = ""; + const char L8[] = "HEADER RECORD*******NAMESTR HEADER RECORD!!!!!!!000000000200000000000000000000 "; + const char LO[] = "HEADER RECORD*******OBS HEADER RECORD!!!!!!!000000000000000000000000000000 "; + + const char DATEFORMAT[] = "%d%b%y:%H:%M:%S"; + char *fn = NULL; + char Mode[3] = "r"; + size_t count = 0, HeadLen0=80*8, HeadLen2=0, sz2 = 0; + uint32_t NS = 0; + char H0[HeadLen0]; + char *H2 = NULL; + char SWAP = 0; + +#ifndef ZLIB_H + FILE *fid; +#else + gzFile fid; + #define fopen gzopen + #define fread(a,b,c,d) (gzread(d,a,b*c)/b) + #define fwrite(a,b,c,d) (gzwrite(d,a,b*c)/b) + #define feof gzeof + #define fseek gzseek + #define fclose gzclose + #define rewind(fid) (gzseek(fid,0,SEEK_SET)) +#endif + + // check for proper number of input and output arguments + if ( PInputCount > 0 && mxGetClassID(PInputs[0])==mxCHAR_CLASS) { + size_t buflen = (mxGetM(PInputs[0]) * mxGetN(PInputs[0]) * sizeof(mxChar)) + 1; + fn = (char*)malloc(buflen); + mxGetString(PInputs[0], fn, buflen); + } + else { + mexPrintf("XPTOPEN read of several file formats and writing of the SAS Transport Format (*.xpt)\n"); + mexPrintf("\n\tX = xptopen(filename)\n"); + mexPrintf("\tX = xptopen(filename,'r')\n"); + mexPrintf("\t\tread filename and return variables in struct X\n"); +#ifdef ZLIB_H + mexPrintf("\tSupported are ARFF, SAS-XPT and STATA files with or w/o zlib/gzip compression.\n"); +#else + mexPrintf("\tSupported are ARFF, SAS-XPT and STATA files.\n"); +#endif + mexPrintf("\n\tX = xptopen(filename,'w',X)\n"); + mexPrintf("\t\tsave fields of struct X in filename.\n\n"); + mexPrintf("\tThe fields of X must be column vectors of equal length.\n"); + mexPrintf("\tEach vector is either a numeric vector or a cell array of strings.\n"); + mexPrintf("\nThe SAS-XPT format stores Date/Time as numeric value counting the number of days since 1960-01-01.\n\n"); + return; + } + + if (PInputCount > 1) + if (mxGetClassID(PInputs[1])==mxCHAR_CLASS && mxGetNumberOfElements(PInputs[1])) { + mxGetString(PInputs[1],Mode,3); + Mode[2]=0; + } + + fid = fopen(fn,Mode); + if (fid == NULL) { + mexErrMsgTxt("Can not open file!\n"); + } + + if (Mode[0]=='r' || Mode[0]=='a' ) { + count += fread(H0,1,80*8,fid); + enum FileFormat { + noFile, unknown, ARFF, SASXPT, SPSS, SQLite, STATA + }; + enum FileFormat TYPE; /* type of file format */ + uint8_t LittleEndian; /* 1 if file is LittleEndian data format and 0 for big endian data format*/ + + TYPE = unknown; + if (!memcmp(H0,"$FL2@(#) SPSS DATA FILE",23) || !memcmp(H0,"$FL2@(#) PASW STATISTICS DATA FILE",27)) { + /* + SPSS file format + */ + uint32_t M=0; + + mexWarnMsgTxt("XPTOPEN: support of for SPSS file format is very experimental (do not use it for production use)\n"); + + TYPE = SPSS; + switch (*(uint32_t*)(H0+64)) { + case 0x00000002: + case 0x00000003: + LittleEndian = 1; + SWAP = __BYTE_ORDER==__BIG_ENDIAN; + NS = l_endian_u32(*(uint32_t*)(H0+68)); + M = l_endian_u32(*(uint32_t*)(H0+80)); + break; + case 0x02000000: + case 0x03000000: + SWAP = __BYTE_ORDER==__LITTLE_ENDIAN; + LittleEndian = 0; + NS = b_endian_u32(*(uint32_t*)(H0+68)); + M = b_endian_u32(*(uint32_t*)(H0+80)); + break; + default: + TYPE = unknown; + } + NS = *(int32_t*)(H0+80); + M = *(int32_t*)(H0+80); + if (SWAP) { + NS = bswap_32(NS); + M = bswap_32(M); + } + HeadLen0 = 184; + char *H2 = (char*)malloc(NS*32); + size_t c2 = 0; + + /* + Read Variable SPSS header + */ + int ns = 0; + const char **ListOfVarNames = (const char**)malloc((NS+1) * sizeof(char*)); + char *VarNames = (char*)malloc((NS+1) * sizeof(char) * 9); + double *MISSINGS = (double*)malloc((NS+1) * sizeof(double)); + for (uint32_t k=0; k=0x6e || H0[0]<=114) && (H0[1]==1 || H0[1]==2) && H0[2]==1 && H0[3]==0) { + /* + STATA File Format + http://www.stata.com/help.cgi?dta + http://www.stata.com/help.cgi?dta_113 + Stata files written by R start with 0x6e + */ + uint32_t M=0; + + TYPE = STATA; + // Header 119 bytes + LittleEndian = H0[1]==2; + if (LittleEndian) { + NS = l_endian_u16(*(uint16_t*)(H0+4)); + M = l_endian_u32(*(uint32_t*)(H0+6)); + } + else { + NS = b_endian_u16(*(uint16_t*)(H0+4)); + M = b_endian_u32(*(uint32_t*)(H0+6)); + } + + // Descriptors + int fmtlen = (H0[0]==113) ? 12 : 49; + fseek(fid,109,SEEK_SET); + size_t HeadLen2 = 2+NS*(1+33+2+fmtlen+33+81); + char *H1 = (char*)malloc(HeadLen2); + HeadLen2 = fread(H1,1,HeadLen2,fid); + + // expansion fields + char typ; int32_t len; + char flagSWAP = (((__BYTE_ORDER == __BIG_ENDIAN) && LittleEndian) || ((__BYTE_ORDER == __LITTLE_ENDIAN) && !LittleEndian)); + do { + if (!fread(&typ,1,1,fid)) break; + if (!fread(&len,4,1,fid)) break; + if (flagSWAP) bswap_32(len); + fseek(fid,len,SEEK_CUR); + } while (len); + uint8_t *typlist = (uint8_t*)H1; + +/* + char *varlist = H1+NS; + char *srtlist; + char *fmtlist = H1+NS*36+2; + char *lbllist = H1+NS*(36+fmtlen)+2; +*/ + + mxArray **R = (mxArray**) mxMalloc(NS*sizeof(mxArray*)); + size_t *bi = (size_t*) malloc((NS+1)*sizeof(size_t*)); + const char **ListOfVarNames = (const char**)malloc(NS * sizeof(char*)); + bi[0] = 0; + for (size_t k = 0; k < NS; k++) { + size_t sz; + ListOfVarNames[k] = H1+NS+33*k; + switch (typlist[k]) { + case 0xfb: sz = 1; break; + case 0xfc: sz = 2; break; + case 0xfd: sz = 4; break; + case 0xfe: sz = 4; break; + case 0xff: sz = 8; break; + default: sz = typlist[k]; + } + bi[k+1] = bi[k]+sz; + } + + // data + uint8_t *data = (uint8_t *) malloc(bi[NS] * M); + fread(data, bi[NS], M, fid); + + char *f = (char*)malloc(bi[NS]+1); + for (size_t k = 0; k < NS; k++) { + switch (typlist[k]) { + case 0xfb: + R[k] = mxCreateDoubleMatrix(M, 1, mxREAL); + for (typeof(M) m = 0; m < M; m++) { + int8_t d = *(int8_t*)(data+bi[k]+m*bi[NS]); + ((double*)mxGetData(R[k]))[m] = (d>100) ? NaN : d; + } + break; + case 0xfc: + R[k] = mxCreateDoubleMatrix(M, 1, mxREAL); + if (flagSWAP) for (size_t m = 0; m < M; m++) { + int16_t d = (int16_t) bswap_16(*(uint16_t*)(data+bi[k]+m*bi[NS])); + ((double*)mxGetData(R[k]))[m] = (d>32740) ? NaN : d; + } + else for (typeof(M) m = 0; m < M; m++) { + int16_t d = *(int16_t*)(data+bi[k]+m*bi[NS]); + ((double*)mxGetData(R[k]))[m] = (d>32740) ? NaN : d; + } + break; + case 0xfd: + R[k] = mxCreateDoubleMatrix(M, 1, mxREAL); + if (flagSWAP) for (size_t m = 0; m < M; m++) { + int32_t d = (int32_t)bswap_32(*(uint32_t*)(data+bi[k]+m*bi[NS])); + ((double*)mxGetData(R[k]))[m] = (d>2147483620) ? NaN : d; + } + else for (typeof(M) m = 0; m < M; m++) { + int32_t d = *(int32_t*)(data+bi[k]+m*bi[NS]); + ((double*)mxGetData(R[k]))[m] = (d>2147483620) ? NaN : d; + } + break; + case 0xfe: + R[k] = mxCreateNumericMatrix(M, 1, mxSINGLE_CLASS, mxREAL); + if (flagSWAP) for (size_t m = 0; m < M; m++) { + ((uint32_t*)mxGetData(R[k]))[m] = bswap_32(*(uint32_t*)(data+bi[k]+m*bi[NS]));; + } + else for (size_t m = 0; m < M; m++) { + ((uint32_t*)mxGetData(R[k]))[m] = *(uint32_t*)(data+bi[k]+m*bi[NS]); + } + break; + case 0xff: + R[k] = mxCreateDoubleMatrix(M, 1, mxREAL); + if (flagSWAP) for (typeof(M) m = 0; m < M; m++) { + ((uint64_t*)mxGetData(R[k]))[m] = bswap_64(*(uint64_t*)(data+bi[k]+m*bi[NS])); + } + else for (typeof(M) m = 0; m < M; m++) { + ((uint64_t*)mxGetData(R[k]))[m] = *(uint64_t*)(data+bi[k]+m*bi[NS]); + } + break; + default: + R[k] = mxCreateCellMatrix(M, 1); + size_t sz = typlist[k]; + for (typeof(M) m = 0; m < M; m++) { + memcpy(f, data+bi[k]+m*bi[NS], sz); + f[sz] = 0; + mxSetCell(R[k], m, mxCreateString(f)); + } + } + } + if (f) free(f); + if (H1) free(H1); + if (bi) free(bi); + + /* convert into output */ + POutput[0] = mxCreateStructMatrix(1, 1, NS, ListOfVarNames); + for (size_t k = 0; k < NS; k++) { + mxSetField(POutput[0], 0, ListOfVarNames[k], R[k]); + } + + if (ListOfVarNames) free(ListOfVarNames); + } + + else if (H0[0]=='%' || H0[0]=='@') { + /* + ARFF + */ + uint32_t M=0; + + TYPE = ARFF; + rewind(fid); + + char *H1 = NULL; + count = 0; + size_t ns = 0; + char *vartyp = NULL; + char **datestr = NULL; + const char **ListOfVarNames = NULL; + mxArray **R = NULL; + size_t m = 0; + + while (!feof(fid)) { + HeadLen0 = max(1024,HeadLen0*2); + H1 = (char*)realloc(H1,HeadLen0); + count += fread(H1+count,1,HeadLen0-count-1,fid); + } + H1[count] = 0; + + switch (H1[count-1]) { + case 0x0a: + case 0x0d: + H1[count] = 0; + break; + default: + H1[count] = 0x0a; + } + H1[count+1] = 0; + + char *line = strtok(H1,"\x0a\0x0d"); + + int status = 0; + while (line) { + + if (!strncasecmp(line,"@relation",9)) { + status = 1; + } + + else if (status == 1 && !strncasecmp(line,"@attribute",10)) { + if (ns<=NS) { + ns = max(16, ns*2); + ListOfVarNames = (const char**)realloc(ListOfVarNames,ns*sizeof(char*)); + vartyp = (char*)realloc(vartyp,ns*sizeof(char)); + R = (mxArray**) mxRealloc(R,ns*sizeof(mxArray*)); + } + size_t k = 10; + char *p1, *p2; + while (isspace(line[k])) k++; + p1 = line+k; + while (!isspace(line[k])) k++; + line[k++]=0; + while (isspace(line[k])) k++; + p2 = line+k; + + ListOfVarNames[NS] = p1; + if (!strncasecmp(p2,"numeric",7)) { + vartyp[NS] = 1; + } + else if (!strncasecmp(p2,"integer",7)) { + vartyp[NS] = 2; + } + else if (!strncasecmp(p2,"real",4)) { + vartyp[NS] = 3; + } + else if (!strncasecmp(p2,"string",6)) { + vartyp[NS] = 4; + } + else if (!strncasecmp(p2,"{",1)) { + vartyp[NS] = 5; + } + else if (!strncasecmp(p2,"date",4)) { + vartyp[NS] = 6; + datestr = (char**)realloc(datestr,(NS+1)*sizeof(char*)); + p2+=4; + while (isspace(*p2)) p2++; + datestr[NS] = p2; + if (p2[0]==34) { + p2++; + while (p2[0]!=34 && p2[0]) p2++; + p2[1]=0; + } + } + else if (!strncasecmp(p2,"relational",10)) { + vartyp[NS] = 7; + } + else vartyp[NS] = 99; + + NS++; + } + + else if (status == 1 && !strncasecmp(line,"@data",5)) { + status = 2; + char *p = line; + while (*p) p++; // goto end of current line + p++; // skip \x00 + M = 0; + while (*p) { + if (p[0]==0x0a || p[0]==0x0d) { + // count number of + M++; + // skip next char (deals with ) + p+=2; + } + else p++; + } + for (size_t k=0; k0x40 && c<0x5b)) && !u ) + return(NaN); + + int s,e; + s = *(((char*)&x) + 7) & 0x80; // sign + e = (*(((char*)&x) + 7) & 0x7f) - 64; // exponent + *(((char*)&x) + 7) = 0; // mantisse x + +#if DEBUG + mexPrintf("%x %x %016Lx\n",s,e,x); +#endif + + double y = ldexp((double)x, e*4-56); + if (s) return(-y); + else return( y); + +#endif +} + + +/* + D2XPT converts from little-endian IEEE to little-endian IBM format +*/ + +uint64_t d2xpt(double x) { +#if __BYTE_ORDER == __BIG_ENDIAN + + mexErrMsgTxt("IEEE-to-IBM conversion on big-endian platform not supported, yet"); + return(0x2eLL << 56); // NaN - not a number + +#elif __BYTE_ORDER==__LITTLE_ENDIAN + uint64_t s,m; + int e; + + + if (x != x) return(0x2eLL << 56); // NaN - not a number + + if (fabs(x) == 1.0/0.0) return(0x5fLL << 56); // +-infinity + + if (x == 0.0) return(0); + + if (x > 0.0) s=0; + else s=1; + + x = frexp(x,&e); + +#if DEBUG + mexPrintf("d2xpt(%f)\n",x); +#endif + // see http://old.nabble.com/Re%3A-IBM-integer-and-double-formats-p20428979.html + memcpy(&m, &x, 8); + *(((char*)&m) + 6) &= 0x0f; // + if (e) *(((char*)&m) + 6) |= 0x10; // reconstruct implicit leading '1' for normalized numbers + m <<= (3-(-e & 3)); + *(((uint8_t*)&m) + 7) = s ? 0x80 : 0; + e = (e + (-e & 3)) / 4 + 64; + + if (e >= 128) return(0x5f); // overflow + if (e < 0) { + uint64_t h = 1<<(4*-e - 1); + m = m / (2*h) + (m & h && m & (3*h-1) ? 1 : 0); + e = 0; + } + return (((uint64_t)e)<<56 | m); + +#endif + +} + + +double tm_time2gdf_time(struct tm *t) { + /* based Octave's datevec.m + it referes Peter Baum's algorithm at http://vsg.cape.com/~pbaum/date/date0.htm + but the link is not working anymore as of 2008-12-03. + + Other links to Peter Baum's algorithm are + http://www.rexswain.com/b2mmddyy.rex + http://www.dpwr.net/forums/index.php?s=ecfa72e38be61327403126e23aeea7e5&showtopic=4309 + */ + + int Y,M,s; //h,m, + double D; + + D = t->tm_mday; + M = t->tm_mon+1; + Y = t->tm_year+1900; + + // Set start of year to March by moving Jan. and Feb. to previous year. + // Correct for months > 12 by moving to subsequent years. + Y += (int)fix ((M-14.0)/12); + + const int monthstart[] = {306, 337, 0, 31, 61, 92, 122, 153, 184, 214, 245, 275}; + // Lookup number of days since start of the current year. + D += monthstart[t->tm_mon % 12] + 60; + + // Add number of days to the start of the current year. Correct + // for leap year every 4 years except centuries not divisible by 400. + D += 365*Y + floor (Y/4) - floor (Y/100) + floor (Y/400); + + // Add fraction representing current second of the day. + s = t->tm_hour*3600 + t->tm_min*60 + t->tm_sec; + + // s -= timezone; + return(D + s/86400.0); +} + diff --git a/test/test_classify.m b/test/test_classify.m new file mode 100644 index 0000000..b549194 --- /dev/null +++ b/test/test_classify.m @@ -0,0 +1,48 @@ +% TEST_CLASSIFY tests and compares NaN/CLASSIFY.M with the matlab version of CLASSIFY + +% Copyright (C) 2009,2010,2019 by Alois Schloegl +% This function is part of the NaN-toolbox +% http://pub.ist.ac.at/~schloegl/matlab/NaN/ + +% This program is free software; you can redistribute it and/or +% modify it under the terms of the GNU General Public License +% as published by the Free Software Foundation; either version 3 +% of the License, or (at your option) any later version. +% +% This program is distributed in the hope that it will be useful, +% but WITHOUT ANY WARRANTY; without even the implied warranty of +% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +% GNU General Public License for more details. +% +% You should have received a copy of the GNU General Public License +% along with this program; if not, write to the Free Software +% Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA 02110-1301, USA. + + +clear +load_fisheriris +SL = meas(51:end,1); +SW = meas(51:end,2); +group = species(51:end); +h1 = gscatter(SL,SW,group,'rb','v^',[],'off'); +set(h1,'LineWidth',2) +legend('Fisher versicolor','Fisher virginica','Location','northwest') + +[X,Y] = meshgrid(linspace(4.5,8),linspace(2,4)); +X = X(:); Y = Y(:); + +classifiers={'linear','quadratic','diagLinear','diagQuadratic','mahalanobis'}; + +p = which('train_sc.m'); +p = fileparts(p); +rmpath(p); +for k=1:length(classifiers) + [C1,err(1,k),P1,logp1,coeff1] = classify([X Y],[SL SW],group,classifiers{k}); +end; + +addpath(p); +for k=1:length(classifiers) + [C2,err(2,k),P2,logp2,coeff2] = classify([X Y],[SL SW],group,classifiers{k}); +end; + + diff --git a/test/test_fss.m b/test/test_fss.m new file mode 100644 index 0000000..7751d9e --- /dev/null +++ b/test/test_fss.m @@ -0,0 +1,77 @@ +% TEST_FSS test of fss.m + +% Copyright (C) 2009,2010,2016 by Alois Schloegl +% This function is part of the NaN-toolbox +% http://pub.ist.ac.at/~schloegl/matlab/NaN/ + +% This program is free software; you can redistribute it and/or +% modify it under the terms of the GNU General Public License +% as published by the Free Software Foundation; either version 3 +% of the License, or (at your option) any later version. +% +% This program is distributed in the hope that it will be useful, +% but WITHOUT ANY WARRANTY; without even the implied warranty of +% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +% GNU General Public License for more details. +% +% You should have received a copy of the GNU General Public License +% along with this program; if not, write to the Free Software +% Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA 02110-1301, USA. + +if 1, +clear +if ~exist('ue6.mat','file') + if strncmp(computer,'PCWIN',5) + fprintf(1,'Download http://pub.ist.ac.at/~schloegl/LV/SMBS/UE6/ue6.mat and save in local directory %s\nPress any key to continue ...\n',pwd); + pause; + else + system('wget http://pub.ist.ac.at/~schloegl/LV/SMBS/UE6/ue6.mat'); + end; +end +load ue6; + +N = 10; % select N highest ranked features +[ix,score] = fss(data, C, N); +end; + +classifier= {'REG','MDA','MD2','QDA','QDA2','LD2','LD3','LD4','LD5','LD6','NBC','aNBC','WienerHopf','LDA/GSVD','MDA/GSVD', 'LDA/sparse','MDA/sparse', 'PLA', 'LMS','LDA/DELETION','MDA/DELETION','NBC/DELETION','RDA/DELETION','REG/DELETION','RDA','GDBC','SVM','PSVM','SVM11','SVM:LIN4','SVM:LIN0','SVM:LIN1','SVM:LIN2','SVM:LIN3'};%,'RBF' + +%% compute cross-validated result; +for k=1:N + [R1{k},CC1{k}]=xval(data(:,ix(1:k)),C); +end; +for k=1:length(classifier), + fprintf(1,'%i:\t%s\n',k,classifier{k}); + [R2{k},CC2{k}]=xval(data(:,ix(1:5)),C,classifier{k}); +end; + +fprintf(1,'#\tFeature\tN\tACC [%%]\tKappa+-se\t I [bit]\n'); +R=R1; +for k=1:length(R); + n(k)=sum(R{k}.data(:)); + ACC(k)=R{k}.ACC; + KAP(k)=R{k}.kappa; + KAP_Se(k)=R{k}.kappa_se; + MI(k)=R{k}.MI; + + fprintf(1,'%3i:\t%4i\t%i\t%5.2f\t%5.2f+-%5.2f\t%4.2f\n',k,ix(k),n(k),ACC(k),KAP(k),KAP_Se(k),MI(k)); +end +R=R2; +for k=1:length(R); + n(k)=sum(R{k}.data(:)); + ACC(k)=R{k}.ACC; + KAP(k)=R{k}.kappa; + KAP_Se(k)=R{k}.kappa_se; + MI(k)=R{k}.MI; + + fprintf(1,'%3i:\t%8s\t%i\t%5.2f\t%5.2f+-%5.2f\t%4.2f\n',k,classifier{k},n(k),ACC(k),KAP(k),KAP_Se(k),MI(k)); +end + + +%% display +plot(ACC*100,'x'); +set(gca,'YLim',[0,100]) +ylabel('Accuracy [%]') +title('selection of N out of 2540 features') + + diff --git a/test/test_mex_accuracy.m b/test/test_mex_accuracy.m new file mode 100644 index 0000000..547ea07 --- /dev/null +++ b/test/test_mex_accuracy.m @@ -0,0 +1,95 @@ +% TEST_MEX_ACCURACY evaluates the accuracy and speed of +% different accuracy levels in SUMSKIPNAN_MEX and COVM_MEX +% +% see also: FLAG_ACCURACY_LEVEL, SUMSKIPNAN_MEX, COVM_MEX +% +% Reference: +% [1] David Goldberg, +% What Every Computer Scientist Should Know About Floating-Point Arithmetic +% ACM Computing Surveys, Vol 23, No 1, March 1991. + +% Copyright (C) 2009,2010,2019 by Alois Schloegl +% This function is part of the NaN-toolbox +% http://pub.ist.ac.at/~schloegl/matlab/NaN/ + +% This program is free software; you can redistribute it and/or modify +% it under the terms of the GNU General Public License as published by +% the Free Software Foundation; either version 3 of the License, or +% (at your option) any later version. +% +% This program is distributed in the hope that it will be useful, +% but WITHOUT ANY WARRANTY; without even the implied warranty of +% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +% GNU General Public License for more details. +% +% You should have received a copy of the GNU General Public License +% along with this program; if not, write to the Free Software +% Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + +clear +flag=0; +N = 1e7; +x=randn(N,10)+1e6; + +level = flag_accuracy_level; %% backup original level +flag_accuracy_level(0); +tic,t=cputime();[cc0,nn0]=covm_mex(x,[],flag);t0=[cputime-t,toc]; + +flag_accuracy_level(1); +tic,t=cputime();[cc1,nn1]=covm_mex(x,[],flag);t1=[cputime-t,toc]; + +flag_accuracy_level(2); +tic,t=cputime();[cc2,nn2]=covm_mex(x,[],flag);t2=[cputime-t,toc]; + +flag_accuracy_level(3); +tic,t=cputime();[cc3,nn3]=covm_mex(x,[],flag);t3=[cputime-t,toc]; + +tic,t=cputime();cc4=x'*x;nn4=size(x,1);t4=[cputime-t,toc]; + +flag_accuracy_level(0); +tic,t=cputime();[c0,n0]=sumskipnan_mex(x,1,flag);t0s=[cputime-t,toc]; + +flag_accuracy_level(1); +tic,t=cputime();[c1,n1]=sumskipnan_mex(x,1,flag);t1s=[cputime-t,toc]; + +flag_accuracy_level(2); +tic,t=cputime();[c2,n2]=sumskipnan_mex(x,1,flag);t2s=[cputime-t,toc]; + +flag_accuracy_level(3); +tic,t=cputime();[c3,n3]=sumskipnan_mex(x,1,flag);t3s=[cputime-t,toc]; + +tic,t=cputime();c4=sum(x,1);n4=size(x,1);t4s=[cputime-t,toc]; + +flag_accuracy_level(level); %% restore original level + +cc = {cc0,cc1,cc2,cc3}; +c = {c0,c1,c2,c3}; +tt = [t0;t1;t2;t3;t4]; +t = [t0s;t1s;t2s;t3s;t4s]; + +[status, host] = system ('hostname'); +fprintf('hostname: %s\n',host); +fprintf('Sum squared differences between accuracy levels:\n'); +fprintf('Level:\t|(0) naive-dou\t|(1) naive-ext\t|(2) kahan-dou \t| (3) kahan-ext\n') +fprintf('error:\t|N*2^-52\t|N*2^-64\t| 2^-52 \t| 2^-64\n') +fprintf('COVM_MEX:\ntime:\t|%f\t|%f\t| %f \t| %f \t| %f',tt(:,2)) +fprintf('\nntime:\t|%f\t|%f\t| %f \t| %f ',tt(1:4,2)./tt(5,2)) +for K1=1:4, +fprintf('\n(%i)\t',K1-1); +for K2=1:4, + EE(K1,K2)=sum(sum((cc{K1}-cc{K2}).^2)); + E(K1,K2) =sum(sum((c{K1}-c{K2}).^2)); + fprintf('|%8g\t',EE(K1,K2)/nn1(1)); +end; +end; +fprintf('\nSUMSKIPNAN_MEX:\n') +fprintf('time:\t|%f\t|%f\t| %f \t| %f \t| %f',t(:,2)) +fprintf('\nntime:\t|%f\t|%f\t| %f \t| %f ',t(1:4,2)./t(5,2)) +for K1=1:4, +fprintf('\n(%i)\t',K1-1); +for K2=1:4, + fprintf('|%8g\t',E(K1,K2)/n1(1)); +end; +end; +fprintf('\n'); + diff --git a/test/test_perf_skipnan.m b/test/test_perf_skipnan.m new file mode 100644 index 0000000..009cdca --- /dev/null +++ b/test/test_perf_skipnan.m @@ -0,0 +1,56 @@ + +%% TEST_PERF_NANTB + +% Copyright (C) 2009,2010 by Alois Schloegl +% This function is part of the NaN-toolbox +% http://pub.ist.ac.at/~schloegl/matlab/NaN/ + +% This program is free software; you can redistribute it and/or modify +% it under the terms of the GNU General Public License as published by +% the Free Software Foundation; either version 3 of the License, or +% (at your option) any later version. +% +% This program is distributed in the hope that it will be useful, +% but WITHOUT ANY WARRANTY; without even the implied warranty of +% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +% GNU General Public License for more details. +% +% You should have received a copy of the GNU General Public License +% along with this program; if not, write to the Free Software +% Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 +% USA + + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +% Performance +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +% SUMSKIPNAN_MEX with and w/o OpenMP +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%% Generate Test data +y = randn(4e5,32); + +flag=0; +N = 10; % repeat the tests 10 times +t1=repmat(N,2); +t2=repmat(N,2); +for k=1:N; + tic;t=cputime(); + [s,n]=sumskipnan_mex(y,1); + t1(k,1)=cputime()-t; t1(k,2)=toc; + + tic;t=cputime(); + [c,n]=covm_mex(y,[],flag); + t2(k,1)=cputime()-t; t2(k,2)=toc; +end; +[[t1,t2];mean([t1,t2]);std([t1,t2])] +exp(-diff(log([mean(t1)',mean(t2)']))) + + + + + diff --git a/test/test_str2array.csv b/test/test_str2array.csv new file mode 100644 index 0000000..6637d08 --- /dev/null +++ b/test/test_str2array.csv @@ -0,0 +1,9 @@ +"remarks";"id";"type";"first";"last";"excluding";"list";"timestamp" +;"JK130515a";1;8;9;;"[8:9]"; +;"JK130612b";1;3;4;;"[3:4]"; +;"JK130925b";1;3;4;;"[3:4]"; +;"JK131004b";1;3;4;;"[3:4]"; +;"JK131010a";1;9;10;;"[9:10]"; +;;;;;;; +;;;;;;; +"cell type: 1=CA1";;;;;;; diff --git a/test/test_str2array.m b/test/test_str2array.m new file mode 100644 index 0000000..1184fbd --- /dev/null +++ b/test/test_str2array.m @@ -0,0 +1,8 @@ +fid = fopen('test_str2array.csv','r'); %% corrected directory +if fid<0, return; end; +s = fread(fid,[1,inf],'uint8=>char'); +fclose(fid); +s(s==10)=[]; +[n,v,c]=str2array(s,[';',char(9)],char([10,13])) + + diff --git a/test/test_train_sc.m b/test/test_train_sc.m new file mode 100644 index 0000000..406fe8a --- /dev/null +++ b/test/test_train_sc.m @@ -0,0 +1,95 @@ +% Test train_sc and test_sc, weighted samples + + + +% $Id$ +% Copyright (C) 2010 by Alois Schloegl +% This function is part of the NaN-toolbox +% http://pub.ist.ac.at/~schloegl/matlab/NaN/ + +% This program is free software; you can redistribute it and/or +% modify it under the terms of the GNU General Public License +% as published by the Free Software Foundation; either version 3 +% of the License, or (at your option) any later version. +% +% This program is distributed in the hope that it will be useful, +% but WITHOUT ANY WARRANTY; without even the implied warranty of +% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +% GNU General Public License for more details. +% +% You should have received a copy of the GNU General Public License +% along with this program; if not, write to the Free Software +% Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA 02110-1301, USA. + + +clear +classifier= {'REG','REG2','MDA','MD2','QDA','QDA2','LD2','LD3','LD4','LD5','LD6','NBC','aNBC','WienerHopf','PLA', 'LMS','LDA/DELETION','MDA/DELETION','NBC/DELETION','RDA/DELETION','RDA','GDBC','SVM','RBF'};% 'LDA/GSVD','MDA/GSVD', 'LDA/GSVD','MDA/GSVD', 'LDA/sparse','MDA/sparse', + +N=1e2; +c=[1:N]'*2>N; + +W3 = [ones(1,N/2)/5,ones(1,N/10)]; +for l=1:length(classifier), + fprintf(1,'%s\n',classifier{l}); +for k=1:10, + +x=randn(N,2); +x=x+[c,c]; + +ix = 1:0.6*N; + +try, +CC = train_sc(x(ix,:),c(ix)+1,classifier{l}); +R1 = test_sc(CC,x,[],c+1); + +CC = train_sc(x,c+1,classifier{l}); +R2 = test_sc(CC,x,[],c+1); + +CC = train_sc(x(ix,:),c(ix)+1,classifier{l},W3); +R3 = test_sc(CC,x,[],c+1); + +acc1(k,l)=[R1.ACC]; +kap1(k,l)=[R1.kappa]; +acc2(k,l)=[R2.ACC]; +kap2(k,l)=[R2.kappa]; +acc3(k,l)=[R3.ACC]; +kap3(k,l)=[R3.kappa]; +end; + +end; +end; + +[se,m]=sem(acc1);m +[se,m]=sem(acc2);m +[se,m]=sem(acc3);m + +%[diff(m),diff(m)/sqrt(sum(se.^2))] +%[se,m]=sem(kap);[diff(m),diff(m)/sqrt(sum(se.^2))] + +%These are tests to compare varios classiers + +return + + +N=1e2; +c=[1:N]'*2>N; + +for k=1:1000,k + +x=randn(N,2); +x=x+[c,c]; + +ix = 1:0.6*N; +[R1,CC]=xval(x(ix,:),c(ix)+1,'REG'); +[R2,CC]=xval(x,c+1,'REG'); +[R3,CC]=xval(x(ix,:),c(ix)+1,'LDA'); +[R4,CC]=xval(x,c+1,'LDA'); + +acc(k,1:4)=[R1.ACC,R2.ACC,R3.ACC,R4.ACC]; +kap(k,1:4)=[R1.kappa,R2.kappa,R3.kappa,R4.kappa]; + +end; + +[se,m]=sem(acc),%[diff(m),diff(m)/sqrt(sum(se.^2))] +%[se,m]=sem(kap);[diff(m),diff(m)/sqrt(sum(se.^2))] + diff --git a/test/test_xptopen.m b/test/test_xptopen.m new file mode 100644 index 0000000..aceb86c --- /dev/null +++ b/test/test_xptopen.m @@ -0,0 +1,75 @@ +% TEST_XPTOPEN tests XPTOPEN + +% $Id$ +% Copyright (C) 2010 by Alois Schloegl +% This function is part of the NaN-toolbox +% http://biosig-consulting.com/matlab/NaN/ + +% This program is free software; you can redistribute it and/or +% modify it under the terms of the GNU General Public License +% as published by the Free Software Foundation; either version 3 +% of the License, or (at your option) any later version. +% +% This program is distributed in the hope that it will be useful, +% but WITHOUT ANY WARRANTY; without even the implied warranty of +% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +% GNU General Public License for more details. +% +% You should have received a copy of the GNU General Public License +% along with this program; if not, write to the Free Software +% Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA 02110-1301, USA. + +%x.c = [-1000,-2,-1,0,1,2,NaN,10,100,1000,10000,1e6,1e7,1e8]'; +%y.Y = [1,2,NaN,1]'+10; + +if 1, +X.a = [-2,-0,NaN,10,444,-pi]';%,100,1000,10000,1e6,1e7,1e8]'; +X.d = [1,2,NaN,1,Inf,-Inf]'+10; +X.b = {'a','B',' ','*','Z','zzz'}'; + +fn = 'test.xpt'; +Y = xptopen(fn,'w',X) +Z = xptopen(fn,'r') + + +end; + +fn = {'buy','humid','prdsale'}; +for k1 = 1:length(fn); + X = xptopen(fn{k1},'r'); + xptopen([fn{k1},'.xpt'],'w',X); + f = fieldnames(X); + + fid = fopen([fn{k1},'.csv'],'w'); + for k1=1:length(f) + if k1>1, fprintf(fid,';'); end; + fprintf(fid,'%s',f{k1}); + end; + fprintf(fid,'\n'); + + for k2=1:length(X.(f{1})); + for k1=1:length(f) + if k1>1, fprintf(fid,';'); end; + v = X.(f{k1})(k2); + if isnumeric(v) + if strcmp(f{k1},'DATE'), + fprintf(fid,'%s',datestr(v + datenum([1960,1,1]),1)); + elseif strcmp(f{k1},'MONTH'), + fprintf(fid,'%s',datestr(v + datenum([1960,1,1]),3)); + elseif v==ceil(v), + fprintf(fid,'%i',v); + else + fprintf(fid,'%f',v); + end + elseif iscell(v) && ischar(v{1}) + fprintf(fid,'%s',v{1}); + else + fprintf(fid,'--'); + end; + end; + fprintf(fid,'\n'); + end; + + fclose(fid); +end; + diff --git a/test/test_xval.m b/test/test_xval.m new file mode 100644 index 0000000..56ee933 --- /dev/null +++ b/test/test_xval.m @@ -0,0 +1,61 @@ +% test_classifier; + +% $Id$ +% Copyright (C) 2010 by Alois Schloegl +% This function is part of the NaN-toolbox +% http://pub.ist.ac.at/~schloegl/matlab/NaN/ + +% This program is free software; you can redistribute it and/or +% modify it under the terms of the GNU General Public License +% as published by the Free Software Foundation; either version 3 +% of the License, or (at your option) any later version. +% +% This program is distributed in the hope that it will be useful, +% but WITHOUT ANY WARRANTY; without even the implied warranty of +% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +% GNU General Public License for more details. +% +% You should have received a copy of the GNU General Public License +% along with this program; if not, write to the Free Software +% Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA 02110-1301, USA. + + +if 1, +clear +N=100; % number of samples +M=10; % number of features +classifier = {'SVM:LIB', 'REG', 'MDA', 'MD2', 'QDA', 'QDA2', 'LD2', 'LD3', 'LD4','LD5', 'LD6', 'NBC', 'aNBC', 'WienerHopf', 'LDA/GSVD', 'MDA/GSVD', 'LDA/sparse', 'MDA/sparse', 'PLA', 'LMS', 'LDA/DELETION', 'MDA/DELETION', 'NBC/DELETION', 'RDA/DELETION', 'REG/DELETION', 'RDA', 'GDBC', 'SVM', 'RBF', 'PSVM', 'SVM11', 'SVM:LIN4', 'SVM:LIN0', 'SVM:LIN1', 'SVM:LIN2', 'SVM:LIN3', 'SVM:LIN5', 'SVM:LIN6', 'SVM:LIN7', 'SVM:LIN11', 'SVM:LIN12', 'SVM:LIN13', 'WINNOW' }; + +x = randn(N,M); % data +c = ([1:N]'>(N/2))+1; % classlabel +%w = [ones(1,N/2)/5,ones(1,N/10),zeros(1,4*N/10)]; +w = []; % no weightening + +x = randn(N,M); +x = x+c*ones(1,M); + +if 1, +%x(2:2:N/2,2) = NaN; +x(2:2:N,2) = NaN; +x(3,2:2:end) = NaN; +end; +end; + +for k = 1:length(classifier); + try, + [R{k},CC{k}] = xval(x, {c,w}, classifier{k}); + fprintf(1,'%8s\t%i\t%5.2f\t%5.2f+-%5.2f\n',classifier{k},sum(R{k}.data(:)),R{k}.ACC*100,R{k}.kappa,R{k}.kappa_se); + save -v6 debug.mat + catch, + R{k} = []; + end; +end; + +for k = 1:length(R) + if isempty(R{k}) + fprintf(1,'%8s \t failed\n',classifier{k}); + else + fprintf(1,'%8s\t%i\t%5.2f\t%5.2f+-%5.2f\n',classifier{k},sum(R{k}.data(:)),R{k}.ACC*100,R{k}.kappa,R{k}.kappa_se); + end; +end + -- cgit v1.2.3 From 9e30cf8e4dcf915fb0a77531d170afcc5d56f830 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafael=20Laboissi=C3=A8re?= Date: Sun, 15 Aug 2021 05:03:13 -0300 Subject: Import octave-nan_3.6.0-2.debian.tar.xz [dgit import tarball octave-nan 3.6.0-2 octave-nan_3.6.0-2.debian.tar.xz] --- README.Debian | 15 ++ changelog | 402 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ check.m | 20 +++ clean | 7 + control | 36 +++++ copyright | 117 ++++++++++++++++ gbp.conf | 4 + octave-nan.docs | 1 + rules | 10 ++ source/format | 1 + upstream/metadata | 3 + watch | 2 + 12 files changed, 618 insertions(+) create mode 100644 README.Debian create mode 100644 changelog create mode 100644 check.m create mode 100644 clean create mode 100644 control create mode 100644 copyright create mode 100644 gbp.conf create mode 100644 octave-nan.docs create mode 100755 rules create mode 100644 source/format create mode 100644 upstream/metadata create mode 100644 watch diff --git a/README.Debian b/README.Debian new file mode 100644 index 0000000..3bbbf5a --- /dev/null +++ b/README.Debian @@ -0,0 +1,15 @@ +NaN package for Octave +====================== + +Since this package changes the way NaNs are handled in Octave (by overloading +several core functions), it is not loaded by default upon Octave startup. + +In order to use its functions, the user must explicitly ask Octave to load the +package with the following command: + + pkg load nan + +In case the user wants to have the package loaded by default in every session, +this command can be added to ~/.octaverc. + + -- Sébastien Villemot , Thu, 22 Mar 2012 20:54:24 +0100 diff --git a/changelog b/changelog new file mode 100644 index 0000000..55c94a4 --- /dev/null +++ b/changelog @@ -0,0 +1,402 @@ +octave-nan (3.6.0-2) unstable; urgency=medium + + * Upload to unstable + * d/rules: Use execute_before_dh_installdeb instead of override_dh_installdeb + + -- Rafael Laboissière Sun, 15 Aug 2021 05:03:13 -0300 + +octave-nan (3.6.0-1) experimental; urgency=medium + + * New upstream version 3.6.0 + + -- Rafael Laboissière Fri, 30 Jul 2021 05:50:27 -0300 + +octave-nan (3.5.3-1) experimental; urgency=medium + + * New upstream version 3.5.3 + * d/control: Bump Standards-Version to 4.5.1 (no changes needed) + * d/copyright: Reflect upstream changes + + -- Rafael Laboissière Sat, 20 Feb 2021 02:51:16 -0300 + +octave-nan (3.5.2-1) unstable; urgency=medium + + * New upstream version 3.5.2 + * d/p/xtest-in-load-fisheriris.patch: Drop patch (applied upstream) + * d/p/depends-statistics.patch: Drop patch (fixed upstream) + * d/control: Drop build-dependency on octave-statistics + * d/p/honour-cppflags-from-environment.patch: New patch + + -- Rafael Laboissière Sat, 07 Nov 2020 18:03:05 -0300 + +octave-nan (3.5.0-1) unstable; urgency=medium + + * New upstream version 3.5.0 + * d/copyright: Reflect upstream changes + * d/control: Build-depends on octave-statistics + * d/rules: Fix wrong permission of file fishers_exact_test.m + * d/p/depends-statistics.patch: New patch + + -- Rafael Laboissière Wed, 29 Jul 2020 05:42:46 -0300 + +octave-nan (3.4.5-3) unstable; urgency=medium + + * d/u/metadata: New file + * d/control: Bump debhelper compatibitlity level to 13 + * d/u/metadata: Drop Name and Contact fields + * d/p/xtest-in-load-fisheriris.patch: New patch (Closes: #964210) + + -- Rafael Laboissière Mon, 13 Jul 2020 07:13:06 -0300 + +octave-nan (3.4.5-2) unstable; urgency=medium + + [ Sébastien Villemot ] + * Add missing Build-Depends on wget + * d/copyright: use correct license short name (FSFAP) for Makefile + + [ Rafael Laboissière ] + * d/control: Bump Standards-Version to 4.5.0 (no changes needed) + + -- Sébastien Villemot Thu, 13 Feb 2020 14:54:17 +0100 + +octave-nan (3.4.5-1) unstable; urgency=medium + + * New upstream version 3.4.5 + * Drop patches (applied upstream): + + d/p/spelling-fix-in-train-help-str.patch + + d/p/drop-no-undefined-ldflags.patch + + d/p/fix-spelling-str2array.patch + + -- Rafael Laboissière Sat, 14 Dec 2019 14:22:06 -0300 + +octave-nan (3.4.3-1) unstable; urgency=medium + + * New upstream version 3.4.3 + * d/copyright: Reflect upstream changes + * d/p/spelling-fix-in-train-help-str.patch: Refresh for new upstream version + * Drop patches (fixed upstream): + + d/p/workaround-for-mkoctfile-bug.patch + + d/p/build-against-libsvm-dev.patch + + d/p/reduce_test_memory_footprint.patch + * d/control: + + Bump Standards-Version to 4.4.1 (no changes needed) + + Bump dependency on dh-octave to >= 0.7.1 + This allows the injection of the virtual package octave-abi-N + into the package's list of dependencies. + + Build-Depends on liblinear-dev + * d/clean: Clean files left over after building the package + * d/rules: Drop obsolete target override_dh_auto_clean + * d/s/lintian-overrides: Drop unused override + * d/p/drop-no-undefined-ldflags.patch: New patch + * d/p/fix-spelling-str2array.patch: New patch + + -- Rafael Laboissiere Wed, 13 Nov 2019 11:20:05 -0300 + +octave-nan (3.1.4-5) unstable; urgency=medium + + * d/control: + + Add Rules-Requires-Root: no + + Bump Standards-Version to 4.3.0 + + Bump to debhelper compat level 12 + * Build-depend on debhelper-compat instead of using d/compat + + -- Rafael Laboissiere Wed, 02 Jan 2019 22:56:43 -0200 + +octave-nan (3.1.4-4) unstable; urgency=medium + + [ Mike Miller ] + * d/control, d/copyright: Use secure URL for upstream source. + + [ Rafael Laboissiere ] + * d/control: Bump Standards-Version to 4.1.4 (no changes needed) + * d/rules: Ensures that rm command will not fail + + [ Sébastien Villemot ] + * workaround-for-mkoctfile-bug.patch: new patch, fixes FTBFS against + Octave 4.4 + + -- Sébastien Villemot Wed, 13 Jun 2018 21:45:32 +0200 + +octave-nan (3.1.4-3) unstable; urgency=medium + + * Use dh-octave for building the package + * d/control: + + Use Debian's GitLab URLs in Vcs-* headers + + Change Maintainer to team+pkg-octave-team@tracker.debian.org + + -- Rafael Laboissiere Sat, 10 Feb 2018 07:34:18 -0200 + +octave-nan (3.1.4-2) unstable; urgency=medium + + * Use the dh-based version of octave-pkg-dev + * Set debhelper compatibility level to >= 11 + * d/control: Bump Standards-Version to 4.1.3 (no changes needed) + + -- Rafael Laboissiere Fri, 29 Dec 2017 22:14:05 -0200 + +octave-nan (3.1.4-1) unstable; urgency=medium + + [ Rafael Laboissiere ] + * New upstream version 3.1.4 + * d/control: Add Testsuite field + * d/control: Bump Standards-Version to 4.1.1 (no changes needed) + + [ Sébastien Villemot ] + * d/copyright: reflect upstream changes. + + -- Sébastien Villemot Thu, 19 Oct 2017 10:40:06 +0200 + +octave-nan (3.1.2-2) unstable; urgency=medium + + [ Sébastien Villemot ] + * Upload to unstable. + * d/copyright: use secure URL for format. + * d/control: remove unnecessary versioned dependency on octave-pkg-dev. + + [ Rafael Laboissiere ] + * d/control: Use cgit instead of gitweb in Vcs-Browser URL + + -- Sébastien Villemot Mon, 12 Jun 2017 14:48:19 +0200 + +octave-nan (3.1.2-1) experimental; urgency=medium + + * New upstream version 3.1.2 + * d/copyright: Reflect upstream changes + * d/p/spelling-fix-in-train-help-str.patch: New patch + + -- Rafael Laboissiere Tue, 07 Feb 2017 19:07:54 -0200 + +octave-nan (3.1.1-1) unstable; urgency=medium + + * New upstream version 3.1.1 + * d/copyright: reflect upstream changes. + * Bump to debhelper compat level 10. + * d/watch: upgrade to format version 4. + * build-against-libsvm-dev.patch: update patch. + Now rely on the new HAVE_EXTERNAL_LIBSVM flag. Also remove completely + svm.{cpp,h} from the source tree, to be sure to pick up the external + library. + * Add a lintian source override for removed svm.h. + + -- Sébastien Villemot Fri, 13 Jan 2017 16:43:02 +0100 + +octave-nan (3.0.3-1) unstable; urgency=medium + + * Imported Upstream version 3.0.3 + * d/p/build-against-libsvm-dev.patch: Adjust for new API of libsvm + + -- Rafael Laboissiere Sun, 14 Aug 2016 11:55:47 -0300 + +octave-nan (3.0.2-2) unstable; urgency=medium + + * Build against libsvm-dev (Closes: 833940) + + d/control: Build-depends on libsvm-dev + + d/p/build-against-libsvm-dev.patch: New patch + + -- Rafael Laboissiere Sat, 13 Aug 2016 08:52:45 -0300 + +octave-nan (3.0.2-1) unstable; urgency=medium + + * Imported Upstream version 3.0.2 + * Bump Standards-Version to 3.9.8 (no changes needed) + + -- Rafael Laboissiere Thu, 04 Aug 2016 11:52:07 -0300 + +octave-nan (3.0.1-1) unstable; urgency=medium + + * Imported Upstream version 3.0.1 + + -- Rafael Laboissiere Thu, 17 Mar 2016 13:37:05 +0100 + +octave-nan (3.0-1) unstable; urgency=medium + + [ Sébastien Villemot ] + * Imported Upstream version 3.0 + + [ Rafael Laboissiere ] + * d/control: Use secure URIs in the Vcs-* fields + * d/control: Bump Standards-Version to 3.9.7 (no changes needed) + + -- Sébastien Villemot Fri, 19 Feb 2016 22:23:51 +0100 + +octave-nan (2.8.1-1) unstable; urgency=medium + + [ Sébastien Villemot ] + * Imported Upstream version 2.8.1 + * d/copyright: reflect upstream changes. + * Add Rafael Laboissiere and Mike Miller to Uploaders. + + [ Rafael Laboissiere ] + * Bump Standards-Version to 3.9.6 (no changes needed) + + -- Sébastien Villemot Mon, 20 Jul 2015 10:56:25 +0200 + +octave-nan (2.5.9-2) unstable; urgency=medium + + * reduce_test_memory_footprint.patch: new patch, reduces memory usage of + tests. Should fix FTBFS on mips and mipsel. Thanks to Thomas Weber for + investigating and finding the solution. + + -- Sébastien Villemot Sat, 01 Mar 2014 15:55:19 +0100 + +octave-nan (2.5.9-1) unstable; urgency=medium + + [ Rafael Laboissiere ] + * Imported Upstream version 2.5.9 + * debian/copyright: Adjust for new upstream release + * Drop patches hurd.patch and fix_makefile (applied upstream) + * Remove obsolete DM-Upload-Allowed flag + * Bump to Standards-Version 3.9.5, no changes needed + + [ Sébastien Villemot ] + * Use my @debian.org email address + + [ Thomas Weber ] + * debian/control: Use canonical URLs in Vcs-* fields + + -- Sébastien Villemot Mon, 27 Jan 2014 19:08:39 +0100 + +octave-nan (2.5.5-2) unstable; urgency=low + + * hurd.patch: new patch (Closes: #670073) + + -- Sébastien Villemot Mon, 23 Apr 2012 23:02:12 +0200 + +octave-nan (2.5.5-1) unstable; urgency=low + + * Imported Upstream version 2.5.5 + * debian/control: mention in long description that package is not autoloaded + * Describe how to load the package in README.Debian + * debian/copyright: reflect upstream changes + + -- Sébastien Villemot Tue, 03 Apr 2012 19:43:39 +0200 + +octave-nan (2.5.2-1) unstable; urgency=low + + * Imported Upstream version 2.5.2 + * debian/copyright: update to machine-readable format 1.0 + * Bump to debhelper compat level 9 + * Build-depend on octave-pkg-dev >= 1.0.0, to compile against Octave 3.6 + * Bump to Standards-Version 3.9.3, no changes needed + * debian/patches/fprintf_does_not_like_cells: remove patch, applied upstream + + -- Sébastien Villemot Mon, 12 Mar 2012 21:02:58 +0000 + +octave-nan (2.4.4-1) unstable; urgency=low + + [ Sébastien Villemot ] + * Imported Upstream version 2.4.4 + * Bump Standards-Version to 3.9.2, no changes needed + * debian/copyright: reflect changes in the upstream package, upgrade to DEP5 + * debian/watch: use qa.debian.org's sf.net redirector + * debian/patches/fix_makefile: refresh patch + * debian/rules: + + use our own clean rule to make the package idempotent + + do not include MATLAB build script in package + * Bump to debhelper compatibility level 8 + * Add myself to Uploaders + + -- Thomas Weber Thu, 06 Oct 2011 23:12:35 +0200 + +octave-nan (2.4.1-2) unstable; urgency=low + + * Add patch fix_makefile, for adapting the values to Octave 3.2 + + -- Thomas Weber Sun, 15 May 2011 13:19:17 +0200 + +octave-nan (2.4.1-1) unstable; urgency=low + + * New upstream release + * Bump standards version to 3.9.1, no changes required + + -- Thomas Weber Tue, 26 Apr 2011 11:30:23 +0200 + +octave-nan (2.3.1-2) experimental; urgency=low + + * Run test suite via debian/check.m + * New patch: fprintf_does_not_like_cells, fixes a problem with fprintf in + the test suite + + -- Thomas Weber Sat, 02 Oct 2010 23:48:41 +0200 + +octave-nan (2.3.1-1) experimental; urgency=low + + * New upstream version (closes: 595811) + * Adapt watch file for individual package releases + * Dropped patches: fix_empty_opt_argument_for_var (applied upstream) + + -- Thomas Weber Sun, 26 Sep 2010 19:31:17 +0200 + +octave-nan (1.0.9-2) unstable; urgency=low + + * debian/control: + - Remove Rafael Laboissiere from Uploaders (Closes: #571853) + - Remove Ólafur Jens Sigurðsson from Uploaders + * New patch: fix_empty_opt_argument_for_var: correctly handle the case of an + empty second argument for var() (closes: #594906). + * Switch to dpkg-source 3.0 (quilt) format + + -- Thomas Weber Wed, 01 Sep 2010 22:52:55 +0200 + +octave-nan (1.0.9-1) unstable; urgency=low + + [ Rafael Laboissiere ] + * debian/patches/autoload-yes.diff: Drop this patch. This package + should not be autoloaded since it overrides the standard functions + provided in octave3.0. + * debian/control, debian/rules: Adjust to not use quilt anymore + * debian/README.source: Removed file + * debian/NEWS: Add a note explaining the change above + * debian/control: Build-depend on octave-pkg-dev >= 0.7.0, such that the + package is built against octave3.2 + + [ Thomas Weber ] + * New upstream release. + * Use debian/octave-nan.docs for installation, following the change to + octave-pkg-dev 0.7.1. Adjust debian/rules for this change. + + -- Thomas Weber Tue, 29 Dec 2009 23:45:06 +0100 + +octave-nan (1.0.8-1) unstable; urgency=low + + * New upstream release + * debian/control: + + (Standards-Version): Bump to 3.8.1 (add file debian/README.source) + + (Depends): Add ${misc:Depends} + + (Vcs-Git, Vcs-Browser): Adjust to new Git repository + * debian/copyright: Use DEP5 URL in Format-Specification + * debian/patches/autoload-yes.diff: Add description + + -- Rafael Laboissiere Sun, 24 May 2009 15:19:15 +0200 + +octave-nan (1.0.7-2) unstable; urgency=low + + [ Rafael Laboissiere ] + * debian/copyright: Add header + * debian/control: Bump build-dependency on octave-pkg-dev to >= 0.6.4, + such that the package is built with the versioned packages directory + + [ Thomas Weber ] + * Upload to unstable (closes: #501016) + + -- Thomas Weber Sun, 05 Apr 2009 21:20:57 +0200 + +octave-nan (1.0.7-1) experimental; urgency=low + + [ Ólafur Jens Sigurðsson ] + * debian/control: Bumped Standards-Version to 3.8.0 (no changes + needed) + + [ Thomas Weber ] + * New upstream release, handles empty second argument to std() correctly + (closes: #500252) + * Bump dependency on octave-pkg-dev to 0.6.1, to get the experimental + version + + -- Thomas Weber Thu, 11 Dec 2008 00:32:40 +0100 + +octave-nan (1.0.6-1) unstable; urgency=low + + * Initial release (closes: #481484) + + -- Rafael Laboissiere Fri, 16 May 2008 14:09:37 +0000 diff --git a/check.m b/check.m new file mode 100644 index 0000000..632b0fe --- /dev/null +++ b/check.m @@ -0,0 +1,20 @@ +# run the tests in the test directory (that is, files that start with 'test_' +# only run tests that do not require network access for downloading files +cd test + +# No for-loop, as the test files call clean() +file = 'test_mex_accuracy.m'; +disp(['Testing ', file]); +source(file); + +file = 'test_train_sc.m'; +disp(['Testing ', file]); +source(file); + +%file = 'test_xptopen.m'; +%disp(['Testing ', file]); +%source(file); + +file = 'test_xval.m'; +disp(['Testing ', file]); +source(file); diff --git a/clean b/clean new file mode 100644 index 0000000..6cf24ab --- /dev/null +++ b/clean @@ -0,0 +1,7 @@ +iris.data +inst/make.m +src/config.status +src/Makefile +src/config.h +src/config.log +test/debug.mat diff --git a/control b/control new file mode 100644 index 0000000..4fb3b05 --- /dev/null +++ b/control @@ -0,0 +1,36 @@ +Source: octave-nan +Maintainer: Debian Octave Group +Uploaders: Thomas Weber , + Rafael Laboissière , + Sébastien Villemot , + Mike Miller +Section: math +Priority: optional +Build-Depends: debhelper-compat (= 13), + dh-octave (>= 0.7.1), + liblinear-dev, + libsvm-dev, + wget +Standards-Version: 4.5.1 +Vcs-Browser: https://salsa.debian.org/pkg-octave-team/octave-nan +Vcs-Git: https://salsa.debian.org/pkg-octave-team/octave-nan.git +Homepage: https://octave.sourceforge.io/nan/ +Testsuite: autopkgtest-pkg-octave +Rules-Requires-Root: no + +Package: octave-nan +Architecture: any +Depends: ${misc:Depends}, + ${shlibs:Depends}, + ${octave:Depends} +Description: handles data with and without missing values in Octave + This package allows the handling of data with and without missing + values by using NaN in Octave. Features of this package includes: + implement of the statistical tools, supports DIM argument, supports + unbiased estimation significance test, confidence intervals and + Spearman`s rank correlation included in CORRCOEF. + . + This package changes the way NaNs are handled in Octave. In order to use + its functions, the user must explicitly ask Octave to load the package. + . + This Octave add-on package is part of the Octave-Forge project. diff --git a/copyright b/copyright new file mode 100644 index 0000000..1efa934 --- /dev/null +++ b/copyright @@ -0,0 +1,117 @@ +Format: https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/ +Upstream-Name: NaN package for Octave +Upstream-Contact: Alois Schloegl +Source: https://octave.sourceforge.io/nan/ + +Files: * +Copyright: 1997-2012, 2014-2021 Alois Schloegl + 2007-2019 The LIBLINEAR Project + 2000-2019 Chih-Chung Chang and Chih-Jen Lin + 2006 J. Duintjer Tebbens + 2003 Patrick Houweling +License: GPL-3+ + +Files: inst/mahal.m inst/kurtosis.m inst/center.m inst/trimean.m inst/cov.m + inst/cor.m inst/rankcorr.m inst/tinv.m inst/mad.m inst/norminv.m + inst/nantest.m inst/normcdf.m inst/coefficient_of_variation.m + inst/meansq.m inst/median.m inst/percentile.m + inst/tpdf.m inst/zscore.m inst/hist2res.m inst/meandev.m inst/rms.m + inst/sumsq.m inst/normpdf.m inst/harmmean.m inst/prctile.m +Copyright: 1996-2014 Alois Schloegl +License: GPL-2+ + +Files: Makefile +Copyright: 2015-2016 Carnë Draug + 2015-2016 Oliver Heimlich + 2017 Julien Bect + 2017 Olaf Till +License: FSFAP + Copying and distribution of this file, with or without modification, + are permitted in any medium without royalty provided the copyright + notice and this notice are preserved. This file is offered as-is, + without any warranty. + +Files: src/svm.h src/svm_model_matlab.h +Copyright: 2000-2009 Chih-Chung Chang and Chih-Jen Lin + 2010 Alois Schloegl +License: BSD-3-clause + +Files: src/svm_model_matlab.c +Copyright: 2000-2009 Chih-Chung Chang and Chih-Jen Lin + 2010,2016 Alois Schloegl +License: GPL-3+ and BSD-3-clause + +Files: inst/detrend.m inst/ttest.m inst/ttest2.m +Copyright: 1995-2007 Kurt Hornik + 2001-2010 by Alois Schloegl +License: GPL-2+ + +Files: debian/* +Copyright: 2008-2009, 2014-2021 Rafael Laboissiere + 2008-2011 Thomas Weber + 2011-2017 Sébastien Villemot +License: GPL-3+ + +License: BSD-3-clause + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + . + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + . + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + . + 3. Neither name of copyright holders nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + . + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR + CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +License: GPL-2+ + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + . + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + . + You should have received a copy of the GNU General Public License + along with this program; If not, see . + . + On Debian systems, the complete text of the GNU General Public + License, version 2, can be found in the file + `/usr/share/common-licenses/GPL-2'. + +License: GPL-3+ + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + . + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + . + You should have received a copy of the GNU General Public License + along with this program; if not, see . + . + On Debian systems, the complete text of the GNU General Public + License, version 3, can be found in the file + `/usr/share/common-licenses/GPL-3'. diff --git a/gbp.conf b/gbp.conf new file mode 100644 index 0000000..6b65fe0 --- /dev/null +++ b/gbp.conf @@ -0,0 +1,4 @@ +[DEFAULT] +debian-branch = debian/latest +upstream-branch = upstream/latest +pristine-tar = True diff --git a/octave-nan.docs b/octave-nan.docs new file mode 100644 index 0000000..e25ee51 --- /dev/null +++ b/octave-nan.docs @@ -0,0 +1 @@ +doc/README.TXT diff --git a/rules b/rules new file mode 100755 index 0000000..536e6cb --- /dev/null +++ b/rules @@ -0,0 +1,10 @@ +#!/usr/bin/make -f +# -*- makefile -*- + +%: + dh $@ --buildsystem=octave --with=octave + +execute_before_dh_installdeb: + # Fix wrong permission of a file in the upstream tarball + # Reported upstream: https://savannah.gnu.org/bugs/index.php?58848 + chmod -x $$(find debian/octave-nan -name fishers_exact_test.m) diff --git a/source/format b/source/format new file mode 100644 index 0000000..163aaf8 --- /dev/null +++ b/source/format @@ -0,0 +1 @@ +3.0 (quilt) diff --git a/upstream/metadata b/upstream/metadata new file mode 100644 index 0000000..02c3396 --- /dev/null +++ b/upstream/metadata @@ -0,0 +1,3 @@ +Bug-Database: https://savannah.gnu.org/bugs/?group=octave +Bug-Submit: https://savannah.gnu.org/bugs/?func=additem&group=octave +Repository-Browse: https://octave.sourceforge.io/pkg-repository/nan/ diff --git a/watch b/watch new file mode 100644 index 0000000..a2c8f6e --- /dev/null +++ b/watch @@ -0,0 +1,2 @@ +version=4 +http://sf.net/octave/nan-(.+)\.tar\.gz -- cgit v1.2.3