diff options
66 files changed, 7333 insertions, 0 deletions
diff --git a/.clang-format b/.clang-format new file mode 100644 index 0000000..d24b5aa --- /dev/null +++ b/.clang-format @@ -0,0 +1,56 @@ +--- +Language: Cpp +AccessModifierOffset: -4 +ConstructorInitializerIndentWidth: 4 +AlignEscapedNewlinesLeft: false +AlignTrailingComments: false +AllowAllParametersOfDeclarationOnNextLine: true +AllowShortBlocksOnASingleLine: false +AllowShortIfStatementsOnASingleLine: false +AllowShortLoopsOnASingleLine: false +AllowShortFunctionsOnASingleLine: All +AlwaysBreakTemplateDeclarations: false +AlwaysBreakBeforeMultilineStrings: false +BreakBeforeBinaryOperators: true +BreakBeforeTernaryOperators: true +BreakConstructorInitializersBeforeComma: true +BinPackParameters: true +ColumnLimit: 0 +ConstructorInitializerAllOnOneLineOrOnePerLine: false +DerivePointerAlignment: false +ExperimentalAutoDetectBinPacking: false +IndentCaseLabels: false +IndentWrappedFunctionNames: false +IndentFunctionDeclarationAfterType: false +MaxEmptyLinesToKeep: 1 +KeepEmptyLinesAtTheStartOfBlocks: true +NamespaceIndentation: Inner +ObjCSpaceAfterProperty: true +ObjCSpaceBeforeProtocolList: true +PenaltyBreakBeforeFirstCallParameter: 19 +PenaltyBreakComment: 300 +PenaltyBreakString: 1000 +PenaltyBreakFirstLessLess: 120 +PenaltyExcessCharacter: 1000000 +PenaltyReturnTypeOnItsOwnLine: 60 +PointerAlignment: Right +SpacesBeforeTrailingComments: 1 +Cpp11BracedListStyle: false +Standard: Cpp11 +IndentWidth: 4 +TabWidth: 8 +UseTab: Never +BreakBeforeBraces: Linux +SpacesInParentheses: false +SpacesInAngles: false +SpaceInEmptyParentheses: false +SpacesInCStyleCastParentheses: false +SpacesInContainerLiterals: true +SpaceBeforeAssignmentOperators: true +ContinuationIndentWidth: 4 +CommentPragmas: '^ IWYU pragma:' +ForEachMacros: [ foreach, Q_FOREACH, BOOST_FOREACH ] +SpaceBeforeParens: ControlStatements +DisableFormat: false +... + diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..e4e5f6c --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +*~
\ No newline at end of file diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 0000000..237c68f --- /dev/null +++ b/.travis.yml @@ -0,0 +1,49 @@ +# +# Available repositories are listed here: +# https://github.com/travis-ci/apt-source-whitelist/blob/master/ubuntu.json +# + +sudo: false + +language: cpp + +matrix: + include: + - env: COMPILER_VERSION=4.8 + os: linux + compiler: g++ + addons: + apt: + sources: + - ubuntu-toolchain-r-test + - kalakris-cmake + - ubuntu-sdk-team + packages: + - g++-4.8 + - cmake + - libglib2.0-dev + - jq + # - env: COMPILER_VERSION=3.5 + # os: linux + # compiler: clang++ + # addons: + # apt: + # sources: + # - ubuntu-toolchain-r-test + # - llvm-toolchain-precise-3.5 + # packages: + # - clang-3.5 + # - cmake + # - libglib2.0-dev + + +before_script: + - mkdir build + - cd build + - CC=$CC-${COMPILER_VERSION} CXX=$CXX-${COMPILER_VERSION} cmake -DBUILD_TESTS=True .. + - cd .. + +script: + - cd build + - make -k -j2 VERBOSE=1 + - ctest --output-on-failure @@ -0,0 +1,4 @@ +author of stardict: + Hu Zheng <huzheng_001@163.com> http://forlinux.yeah.net +author of sdcv: + Evgeniy Dushistov <dushistov@mail.ru> diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 0000000..a34e357 --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,156 @@ +project(sdcv) + +# Older versions have a different signature for CMAKE_MINIMUM_REQUIRED, +# check it manually just to make sure +if (${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION} LESS 2.8) + message(FATAL_ERROR "${PROJECT_NAME} requires at least CMake v2.8." + " You are running v${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION}." + " Please upgrade." ) +endif(${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION} LESS 2.8) + +# If we get this far, use the modern signature. This will also cause newer +# CMake versions to try to be backwards-compatible with the desired version +cmake_minimum_required(VERSION 2.8 FATAL_ERROR) +cmake_policy(VERSION 2.8) + +include("${CMAKE_CURRENT_SOURCE_DIR}/cmake/compiler.cmake") + +set(ZLIB_FIND_REQUIRED True) +include(FindZLIB) + +set(GLIB2_REQ "'glib-2.0 >= 2.6.1'") +set(CMAKE_MODULE_PATH "${CMAKE_SOURCE_DIR}/cmake") +include("${CMAKE_CURRENT_SOURCE_DIR}/cmake/FindGLIB2.cmake") + +if (NOT GLIB2_FOUND) + message(FATAL_ERROR "sdcv require ${GLIB2_REQ}, " + "make sure that you install it") +endif() + +set(WITH_READLINE True CACHE BOOL "Use readline library") + +if (WITH_READLINE) + find_path(READLINE_INCLUDE_DIR readline/readline.h) + find_library(READLINE_LIBRARY NAMES readline) + if (NOT (READLINE_INCLUDE_DIR AND READLINE_LIBRARY)) + set(WITH_READLINE False CACHE FORCE) + endif () +endif (WITH_READLINE) + +option(ENABLE_NLS "Enable NLS support" True) + +set(sdcv_SRCS + src/sdcv.cpp + src/readline.cpp + src/readline.hpp + src/libwrapper.cpp + src/libwrapper.hpp + src/utils.cpp + src/utils.hpp + + src/stardict_lib.cpp + src/stardict_lib.hpp + src/dictziplib.cpp + src/dictziplib.hpp + src/distance.cpp + src/distance.hpp + src/mapfile.hpp +) + +if (ENABLE_NLS) + find_package(GettextTools REQUIRED) + set(gettext_stockDir "${CMAKE_CURRENT_SOURCE_DIR}/po") + set(gettext_langDir "${CMAKE_CURRENT_BINARY_DIR}/lang") + set(gettext_outDir "${CMAKE_CURRENT_BINARY_DIR}/locale") + set(GETTEXT_TRANSLATIONS_PATH "${CMAKE_INSTALL_PREFIX}/share/locale") + gettext_make_target("lang" + HIERARCHY "{1}/{2}/{3}/{4}.mo" + KEYWORDS "_" + DOMAIN "sdcv" + STOCK_DIR ${gettext_stockDir} + LANG_DIR ${gettext_langDir} + OUT_DIR ${gettext_outDir} + SOURCE ${sdcv_SRCS}) + + list(APPEND makeCleanFiles ${gettext_outDir}) +endif () + +include(CheckFunctionExists) +check_function_exists(mmap HAVE_MMAP) + +include(CheckIncludeFile) +check_include_file(locale.h HAVE_LOCALE_H) + +configure_file(${CMAKE_CURRENT_SOURCE_DIR}/config.h.cmake + ${CMAKE_CURRENT_BINARY_DIR}/config.h) + + +include_directories( + ${ZLIB_INCLUDE_DIR} + ${GLIB2_INCLUDE_DIRS} + ${READLINE_INCLUDE_DIR} + ${CMAKE_CURRENT_SOURCE_DIR}/src/lib + ${CMAKE_CURRENT_BINARY_DIR} +) + +# +# Packing stuff +# +set(CPACK_PACKAGE_DESCRIPTION_SUMMARY "console version of StarDict program") +set(CPACK_PACKAGE_VENDOR "Evgeniy Dushistov <dushistov@mail.ru>") +set(CPACK_PACKAGE_DESCRIPTION_FILE "${CMAKE_CURRENT_SOURCE_DIR}/README.org") +set(CPACK_PACKAGE_VERSION_MAJOR "0") +set(CPACK_PACKAGE_VERSION_MINOR "5") +set(CPACK_PACKAGE_VERSION_PATCH "2") + +set(sdcv_VERSION + "${CPACK_PACKAGE_VERSION_MAJOR}.${CPACK_PACKAGE_VERSION_MINOR}.${CPACK_PACKAGE_VERSION_PATCH}") + +add_definitions(-DVERSION="${sdcv_VERSION}" -DHAVE_CONFIG_H) + +add_executable(sdcv ${sdcv_SRCS}) + +target_link_libraries(sdcv + ${GLIB2_LIBRARIES} + ${ZLIB_LIBRARIES} + ${READLINE_LIBRARY} +) +if (ENABLE_NLS) + set_directory_properties(PROPERTIES ADDITIONAL_MAKE_CLEAN_FILES "locale") +endif () + +include(CPack) + +install(TARGETS sdcv DESTINATION "bin") +install(FILES doc/sdcv.1 DESTINATION "share/man/man1") +install(FILES doc/uk/sdcv.1 DESTINATION "share/man/uk/man1") + +if (ENABLE_NLS) + install(DIRECTORY "${gettext_outDir}" DESTINATION "share") +endif () + +option(BUILD_TESTS "Enable automatic testing" False) + +if (BUILD_TESTS) + find_program(SHELL_CMD NAMES sh bash + DOC "Shell scripts interpretator command") + message(STATUS "Build tests") + enable_testing()# Force "make test" to works + + macro(add_sdcv_shell_test test_name) + add_test(NAME ${test_name} + COMMAND "${SHELL_CMD}" "${CMAKE_CURRENT_SOURCE_DIR}/tests/${test_name}" $<TARGET_FILE:sdcv> "${CMAKE_CURRENT_SOURCE_DIR}/tests") + endmacro() + + add_sdcv_shell_test(t_list) + add_sdcv_shell_test(t_use) + add_sdcv_shell_test(t_only_data_dir) + add_sdcv_shell_test(t_synonyms) + add_sdcv_shell_test(t_json) + add_sdcv_shell_test(t_exact) + add_sdcv_shell_test(t_interactive) + add_sdcv_shell_test(t_utf8output) + add_sdcv_shell_test(t_utf8input) + add_sdcv_shell_test(t_datadir) + +endif (BUILD_TESTS) @@ -0,0 +1,341 @@ + GNU GENERAL PUBLIC LICENSE + Version 2, June 1991 + +Copyright (C) 1989, 1991 Free Software Foundation, Inc. +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + +Everyone is permitted to copy and distribute verbatim copies +of this license document, but changing it is not allowed. + + Preamble + + The licenses for most software are designed to take away your +freedom to share and change it. By contrast, the GNU General Public +License is intended to guarantee your freedom to share and change free +software--to make sure the software is free for all its users. This +General Public License applies to most of the Free Software +Foundation's software and to any other program whose authors commit to +using it. (Some other Free Software Foundation software is covered by +the GNU Library General Public License instead.) You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +this service if you wish), that you receive source code or can get it +if you want it, that you can change the software or use pieces of it +in new free programs; and that you know you can do these things. + + To protect your rights, we need to make restrictions that forbid +anyone to deny you these rights or to ask you to surrender the rights. +These restrictions translate to certain responsibilities for you if you +distribute copies of the software, or if you modify it. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must give the recipients all the rights that +you have. You must make sure that they, too, receive or can get the +source code. And you must show them these terms so they know their +rights. + + We protect your rights with two steps: (1) copyright the software, and +(2) offer you this license which gives you legal permission to copy, +distribute and/or modify the software. + + Also, for each author's protection and ours, we want to make certain +that everyone understands that there is no warranty for this free +software. If the software is modified by someone else and passed on, we +want its recipients to know that what they have is not the original, so +that any problems introduced by others will not reflect on the original +authors' reputations. + + Finally, any free program is threatened constantly by software +patents. We wish to avoid the danger that redistributors of a free +program will individually obtain patent licenses, in effect making the +program proprietary. To prevent this, we have made it clear that any +patent must be licensed for everyone's free use or not licensed at all. + + The precise terms and conditions for copying, distribution and +modification follow. + + GNU GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License applies to any program or other work which contains +a notice placed by the copyright holder saying it may be distributed +under the terms of this General Public License. The "Program", below, +refers to any such program or work, and a "work based on the Program" +means either the Program or any derivative work under copyright law: +that is to say, a work containing the Program or a portion of it, +either verbatim or with modifications and/or translated into another +language. (Hereinafter, translation is included without limitation in +the term "modification".) Each licensee is addressed as "you". + +Activities other than copying, distribution and modification are not +covered by this License; they are outside its scope. The act of +running the Program is not restricted, and the output from the Program +is covered only if its contents constitute a work based on the +Program (independent of having been made by running the Program). +Whether that is true depends on what the Program does. + + 1. You may copy and distribute verbatim copies of the Program's +source code as you receive it, in any medium, provided that you +conspicuously and appropriately publish on each copy an appropriate +copyright notice and disclaimer of warranty; keep intact all the +notices that refer to this License and to the absence of any warranty; +and give any other recipients of the Program a copy of this License +along with the Program. + +You may charge a fee for the physical act of transferring a copy, and +you may at your option offer warranty protection in exchange for a fee. + + 2. You may modify your copy or copies of the Program or any portion +of it, thus forming a work based on the Program, and copy and +distribute such modifications or work under the terms of Section 1 +above, provided that you also meet all of these conditions: + + a) You must cause the modified files to carry prominent notices + stating that you changed the files and the date of any change. + + b) You must cause any work that you distribute or publish, that in + whole or in part contains or is derived from the Program or any + part thereof, to be licensed as a whole at no charge to all third + parties under the terms of this License. + + c) If the modified program normally reads commands interactively + when run, you must cause it, when started running for such + interactive use in the most ordinary way, to print or display an + announcement including an appropriate copyright notice and a + notice that there is no warranty (or else, saying that you provide + a warranty) and that users may redistribute the program under + these conditions, and telling the user how to view a copy of this + License. (Exception: if the Program itself is interactive but + does not normally print such an announcement, your work based on + the Program is not required to print an announcement.) + +These requirements apply to the modified work as a whole. If +identifiable sections of that work are not derived from the Program, +and can be reasonably considered independent and separate works in +themselves, then this License, and its terms, do not apply to those +sections when you distribute them as separate works. But when you +distribute the same sections as part of a whole which is a work based +on the Program, the distribution of the whole must be on the terms of +this License, whose permissions for other licensees extend to the +entire whole, and thus to each and every part regardless of who wrote it. + +Thus, it is not the intent of this section to claim rights or contest +your rights to work written entirely by you; rather, the intent is to +exercise the right to control the distribution of derivative or +collective works based on the Program. + +In addition, mere aggregation of another work not based on the Program +with the Program (or with a work based on the Program) on a volume of +a storage or distribution medium does not bring the other work under +the scope of this License. + + 3. You may copy and distribute the Program (or a work based on it, +under Section 2) in object code or executable form under the terms of +Sections 1 and 2 above provided that you also do one of the following: + + a) Accompany it with the complete corresponding machine-readable + source code, which must be distributed under the terms of Sections + 1 and 2 above on a medium customarily used for software interchange; or, + + b) Accompany it with a written offer, valid for at least three + years, to give any third party, for a charge no more than your + cost of physically performing source distribution, a complete + machine-readable copy of the corresponding source code, to be + distributed under the terms of Sections 1 and 2 above on a medium + customarily used for software interchange; or, + + c) Accompany it with the information you received as to the offer + to distribute corresponding source code. (This alternative is + allowed only for noncommercial distribution and only if you + received the program in object code or executable form with such + an offer, in accord with Subsection b above.) + +The source code for a work means the preferred form of the work for +making modifications to it. For an executable work, complete source +code means all the source code for all modules it contains, plus any +associated interface definition files, plus the scripts used to +control compilation and installation of the executable. However, as a +special exception, the source code distributed need not include +anything that is normally distributed (in either source or binary +form) with the major components (compiler, kernel, and so on) of the +operating system on which the executable runs, unless that component +itself accompanies the executable. + +If distribution of executable or object code is made by offering +access to copy from a designated place, then offering equivalent +access to copy the source code from the same place counts as +distribution of the source code, even though third parties are not +compelled to copy the source along with the object code. + + 4. You may not copy, modify, sublicense, or distribute the Program +except as expressly provided under this License. Any attempt +otherwise to copy, modify, sublicense or distribute the Program is +void, and will automatically terminate your rights under this License. +However, parties who have received copies, or rights, from you under +this License will not have their licenses terminated so long as such +parties remain in full compliance. + + 5. You are not required to accept this License, since you have not +signed it. However, nothing else grants you permission to modify or +distribute the Program or its derivative works. These actions are +prohibited by law if you do not accept this License. Therefore, by +modifying or distributing the Program (or any work based on the +Program), you indicate your acceptance of this License to do so, and +all its terms and conditions for copying, distributing or modifying +the Program or works based on it. + + 6. Each time you redistribute the Program (or any work based on the +Program), the recipient automatically receives a license from the +original licensor to copy, distribute or modify the Program subject to +these terms and conditions. You may not impose any further +restrictions on the recipients' exercise of the rights granted herein. +You are not responsible for enforcing compliance by third parties to +this License. + + 7. If, as a consequence of a court judgment or allegation of patent +infringement or for any other reason (not limited to patent issues), +conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot +distribute so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you +may not distribute the Program at all. For example, if a patent +license would not permit royalty-free redistribution of the Program by +all those who receive copies directly or indirectly through you, then +the only way you could satisfy both it and this License would be to +refrain entirely from distribution of the Program. + +If any portion of this section is held invalid or unenforceable under +any particular circumstance, the balance of the section is intended to +apply and the section as a whole is intended to apply in other +circumstances. + +It is not the purpose of this section to induce you to infringe any +patents or other property right claims or to contest validity of any +such claims; this section has the sole purpose of protecting the +integrity of the free software distribution system, which is +implemented by public license practices. Many people have made +generous contributions to the wide range of software distributed +through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing +to distribute software through any other system and a licensee cannot +impose that choice. + +This section is intended to make thoroughly clear what is believed to +be a consequence of the rest of this License. + + 8. If the distribution and/or use of the Program is restricted in +certain countries either by patents or by copyrighted interfaces, the +original copyright holder who places the Program under this License +may add an explicit geographical distribution limitation excluding +those countries, so that distribution is permitted only in or among +countries not thus excluded. In such case, this License incorporates +the limitation as if written in the body of this License. + + 9. The Free Software Foundation may publish revised and/or new versions +of the General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + +Each version is given a distinguishing version number. If the Program +specifies a version number of this License which applies to it and "any +later version", you have the option of following the terms and conditions +either of that version or of any later version published by the Free +Software Foundation. If the Program does not specify a version number of +this License, you may choose any version ever published by the Free Software +Foundation. + + 10. If you wish to incorporate parts of the Program into other free +programs whose distribution conditions are different, write to the author +to ask for permission. For software which is copyrighted by the Free +Software Foundation, write to the Free Software Foundation; we sometimes +make exceptions for this. Our decision will be guided by the two goals +of preserving the free status of all derivatives of our free software and +of promoting the sharing and reuse of software generally. + + NO WARRANTY + + 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY +FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN +OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES +PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED +OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS +TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE +PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, +REPAIR OR CORRECTION. + + 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR +REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, +INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING +OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED +TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY +YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER +PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE +POSSIBILITY OF SUCH DAMAGES. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +convey the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + <one line to give the program's name and a brief idea of what it does.> + Copyright (C) <year> <name of author> + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + + +Also add information on how to contact you by electronic and paper mail. + +If the program is interactive, make it output a short notice like this +when it starts in an interactive mode: + + Gnomovision version 69, Copyright (C) year name of author + Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, the commands you use may +be called something other than `show w' and `show c'; they could even be +mouse-clicks or menu items--whatever suits your program. + +You should also get your employer (if you work as a programmer) or your +school, if any, to sign a "copyright disclaimer" for the program, if +necessary. Here is a sample; alter the names: + + Yoyodyne, Inc., hereby disclaims all copyright interest in the program + `Gnomovision' (which makes passes at compilers) written by James Hacker. + + <signature of Ty Coon>, 1 April 1989 + Ty Coon, President of Vice + +This General Public License does not permit incorporating your program into +proprietary programs. If your program is a subroutine library, you may +consider it more useful to permit linking proprietary applications with the +library. If this is what you want to do, use the GNU Library General +Public License instead of this License. @@ -0,0 +1,79 @@ +Version 0.5.2 +Synonyms index support (.syn files) by Peter <craven@gmx.net> +Add support of json output by Peter <craven@gmx.net> (--json-output) +Add -e for exact searches (no fuzzy matches) by Peter <craven@gmx.net> +Fix build with clang 3.4.1 +fix FSF address in license by Tomáš Čech <sleep_walker@suse.com> + +Version 0.5.1 + Fix usage of SDCV_PAGER by Anton Yuzhaninov + Fix build without readline< + specify dictionary order by -u switches or ~/.sdcv_ordering by Cong Gu + +Version 0.5 +- add option to colorize output +- Roman Imankulov's patch to better use of readline +- Fix build with modern compilers +- Fix work on CPU with align issues, like ARM and SPARC (thanks to Michal Čihař ) +- Migrate to C++11 and CMake (so at now archive with code 180K->54K) +- Update Russian translation + +Version 0.4.3 +- fix build problems with gcc 4.x +- French translation, thanks to Vincent Petry <PVince81@yahoo.fr> +- Slovak translation update, thanks to Ivan Masár <helix84@centrum.sk> +- Add Czech translation, thanks to Michal Čihař <michal@cihar.com> + +Version 0.4.2 +* Cache scheme has been rewritten, size of cache has been reduced +* Merge with stardict, speedup of lookup, and add full-text search +* Russian translation update + +Version 0.4.1 +* Recreate cache if idx file was modified +* Abbility to use pager(SDCV_PAGER) +* Add Chinese (traditional) translation +* Add Ukrainian translation + +Version 0.4 +* Fix several bugs +* libstardict by default linked statically with sdcv +* Add a Simplified Chinese translation +* documentation update +* add ability to use readline library with sdcv +* add ability hold history in ~/.sdcv_history + +Version 0.3.4 +* Fix bug of handling of fuzzy search +* More clever showing search results + +Version 0.3.3 +* Fix bug in using -u option +* Update Slovak translation + +Version 0.3.2 +* Fix bug in Dictionary::load for correct work with gzip'ed idx files +* Add Slovak translation +* Change format of output for -l option + +Version 0.3.1 +* Fix bug in Lib::Lookup, which cause SF and other problems in search +* Change installation procedure to install with libstardict headers files, +* to help compile any application with libstardict +* Documentation update + +Version 0.3 +* update libstardict, make search in dictionaries and loading of dictionaries +faster +* add --data-dir option +* -u, --use-dict now take bookname as argument, not a name of 'ifo' file +* documentation update and improve + +Version 0.2 + +* Add autoconf, automake, gettext support. +* Divide package into two parts: libstardict and sdcv by it self. +* Add options for: +* search word in special dictionary +* get list of dictionaries +* for using in scripts diff --git a/README.org b/README.org new file mode 100644 index 0000000..d041fb6 --- /dev/null +++ b/README.org @@ -0,0 +1,40 @@ +#+OPTIONS: ^:nil +[[https://travis-ci.org/Dushistov/sdcv][https://travis-ci.org/Dushistov/sdcv.svg?branch=master]] +[[https://github.com/Dushistov/sdcv/blob/master/LICENSE][https://img.shields.io/badge/license-GPL%202-brightgreen.svg]] +* How to compile and install +#+BEGIN_SRC sh +mkdir /tmp/build-sdcv +cd /tmp/build-sdcv +cmake path/to/source/code/of/sdcv +make +#+END_SRC +if you enable nls then you should also type +#+BEGIN_SRC sh +make lang +#+END_SRC +** to install type +#+BEGIN_SRC sh +make install +#+END_SRC +you can use "DESTDIR" variable to change installation path + +* Documentation +See sdcv man page for usage description. + +* Bugs +If you find bug reports it via email to dushistov at mail dot ru. +Be sure to include the word "sdcv" somewhere in the "Subject:" field. + +* Notes to developer +** make source code release +#+BEGIN_SRC sh +make package_source +#+END_SRC +** update translation +#+BEGIN_SRC sh +cd po +xgettext -k_ ../src/*.cpp -o new.pot +msgmerge -U sdcv.pot new.pot +rm new.pot +for i in `ls *.po`; do msgmerge -U $i sdcv.pot; done +#+END_SRC diff --git a/cmake/FindGLIB2.cmake b/cmake/FindGLIB2.cmake new file mode 100644 index 0000000..db8d759 --- /dev/null +++ b/cmake/FindGLIB2.cmake @@ -0,0 +1,217 @@ +# - Try to find GLib2 +# Once done this will define +# +# GLIB2_FOUND - system has GLib2 +# GLIB2_INCLUDE_DIRS - the GLib2 include directory +# GLIB2_LIBRARIES - Link these to use GLib2 +# +# HAVE_GLIB_GREGEX_H glib has gregex.h header and +# supports g_regex_match_simple +# +# Copyright (c) 2006 Andreas Schneider <mail@cynapses.org> +# Copyright (c) 2006 Philippe Bernery <philippe.bernery@gmail.com> +# Copyright (c) 2007 Daniel Gollub <dgollub@suse.de> +# Copyright (c) 2007 Alban Browaeys <prahal@yahoo.com> +# Copyright (c) 2008 Michael Bell <michael.bell@web.de> +# Copyright (c) 2008 Bjoern Ricks <bjoern.ricks@googlemail.com> +# +# Redistribution and use is allowed according to the terms of the New +# BSD license. +# For details see the accompanying COPYING-CMAKE-SCRIPTS file. +# + + +IF (GLIB2_LIBRARIES AND GLIB2_INCLUDE_DIRS ) + # in cache already + SET(GLIB2_FOUND TRUE) +ELSE (GLIB2_LIBRARIES AND GLIB2_INCLUDE_DIRS ) + + INCLUDE(FindPkgConfig) + + ## Glib + IF ( GLIB2_FIND_REQUIRED ) + SET( _pkgconfig_REQUIRED "REQUIRED" ) + ELSE ( GLIB2_FIND_REQUIRED ) + SET( _pkgconfig_REQUIRED "" ) + ENDIF ( GLIB2_FIND_REQUIRED ) + + IF ( GLIB2_MIN_VERSION ) + PKG_SEARCH_MODULE( GLIB2 ${_pkgconfig_REQUIRED} glib-2.0>=${GLIB2_MIN_VERSION} ) + ELSE ( GLIB2_MIN_VERSION ) + PKG_SEARCH_MODULE( GLIB2 ${_pkgconfig_REQUIRED} glib-2.0 ) + ENDIF ( GLIB2_MIN_VERSION ) + IF ( PKG_CONFIG_FOUND ) + IF ( GLIB2_FOUND ) + SET ( GLIB2_CORE_FOUND TRUE ) + ELSE ( GLIB2_FOUND ) + SET ( GLIB2_CORE_FOUND FALSE ) + ENDIF ( GLIB2_FOUND ) + ENDIF ( PKG_CONFIG_FOUND ) + + # Look for glib2 include dir and libraries w/o pkgconfig + IF ( NOT GLIB2_FOUND AND NOT PKG_CONFIG_FOUND ) + FIND_PATH( + _glibconfig_include_DIR + NAMES + glibconfig.h + PATHS + /opt/gnome/lib64 + /opt/gnome/lib + /opt/lib/ + /opt/local/lib + /sw/lib/ + /usr/lib64 + /usr/lib + /usr/local/include + ${CMAKE_LIBRARY_PATH} + PATH_SUFFIXES + glib-2.0/include + ) + + FIND_PATH( + _glib2_include_DIR + NAMES + glib.h + PATHS + /opt/gnome/include + /opt/local/include + /sw/include + /usr/include + /usr/local/include + PATH_SUFFIXES + glib-2.0 + ) + + #MESSAGE(STATUS "Glib headers: ${_glib2_include_DIR}") + + FIND_LIBRARY( + _glib2_link_DIR + NAMES + glib-2.0 + glib + PATHS + /opt/gnome/lib + /opt/local/lib + /sw/lib + /usr/lib + /usr/local/lib + ) + IF ( _glib2_include_DIR AND _glib2_link_DIR ) + SET ( _glib2_FOUND TRUE ) + ENDIF ( _glib2_include_DIR AND _glib2_link_DIR ) + + + IF ( _glib2_FOUND ) + SET ( GLIB2_INCLUDE_DIRS ${_glib2_include_DIR} ${_glibconfig_include_DIR} ) + SET ( GLIB2_LIBRARIES ${_glib2_link_DIR} ) + SET ( GLIB2_CORE_FOUND TRUE ) + ELSE ( _glib2_FOUND ) + SET ( GLIB2_CORE_FOUND FALSE ) + ENDIF ( _glib2_FOUND ) + + # Handle dependencies + # libintl + IF ( NOT LIBINTL_FOUND ) + FIND_PATH(LIBINTL_INCLUDE_DIR + NAMES + libintl.h + PATHS + /opt/gnome/include + /opt/local/include + /sw/include + /usr/include + /usr/local/include + ) + + FIND_LIBRARY(LIBINTL_LIBRARY + NAMES + intl + PATHS + /opt/gnome/lib + /opt/local/lib + /sw/lib + /usr/local/lib + /usr/lib + ) + + IF (LIBINTL_LIBRARY AND LIBINTL_INCLUDE_DIR) + SET (LIBINTL_FOUND TRUE) + ENDIF (LIBINTL_LIBRARY AND LIBINTL_INCLUDE_DIR) + ENDIF ( NOT LIBINTL_FOUND ) + + # libiconv + IF ( NOT LIBICONV_FOUND ) + FIND_PATH(LIBICONV_INCLUDE_DIR + NAMES + iconv.h + PATHS + /opt/gnome/include + /opt/local/include + /opt/local/include + /sw/include + /sw/include + /usr/local/include + /usr/include + PATH_SUFFIXES + glib-2.0 + ) + + FIND_LIBRARY(LIBICONV_LIBRARY + NAMES + iconv + PATHS + /opt/gnome/lib + /opt/local/lib + /sw/lib + /usr/lib + /usr/local/lib + ) + + IF (LIBICONV_LIBRARY AND LIBICONV_INCLUDE_DIR) + SET (LIBICONV_FOUND TRUE) + ENDIF (LIBICONV_LIBRARY AND LIBICONV_INCLUDE_DIR) + ENDIF ( NOT LIBICONV_FOUND ) + + IF (LIBINTL_FOUND) + SET (GLIB2_LIBRARIES ${GLIB2_LIBRARIES} ${LIBINTL_LIBRARY}) + SET (GLIB2_INCLUDE_DIRS ${GLIB2_INCLUDE_DIRS} ${LIBINTL_INCLUDE_DIR}) + ENDIF (LIBINTL_FOUND) + + IF (LIBICONV_FOUND) + SET (GLIB2_LIBRARIES ${GLIB2_LIBRARIES} ${LIBICONV_LIBRARY}) + SET (GLIB2_INCLUDE_DIRS ${GLIB2_INCLUDE_DIRS} ${LIBICONV_INCLUDE_DIR}) + ENDIF (LIBICONV_FOUND) + + ENDIF ( NOT GLIB2_FOUND AND NOT PKG_CONFIG_FOUND ) + ## + + IF (GLIB2_CORE_FOUND AND GLIB2_INCLUDE_DIRS AND GLIB2_LIBRARIES) + SET (GLIB2_FOUND TRUE) + ENDIF (GLIB2_CORE_FOUND AND GLIB2_INCLUDE_DIRS AND GLIB2_LIBRARIES) + + IF (GLIB2_FOUND) + IF (NOT GLIB2_FIND_QUIETLY) + MESSAGE (STATUS "Found GLib2: ${GLIB2_LIBRARIES} ${GLIB2_INCLUDE_DIRS}") + ENDIF (NOT GLIB2_FIND_QUIETLY) + ELSE (GLIB2_FOUND) + IF (GLIB2_FIND_REQUIRED) + MESSAGE (SEND_ERROR "Could not find GLib2") + ENDIF (GLIB2_FIND_REQUIRED) + ENDIF (GLIB2_FOUND) + + # show the GLIB2_INCLUDE_DIRS and GLIB2_LIBRARIES variables only in the advanced view + MARK_AS_ADVANCED(GLIB2_INCLUDE_DIRS GLIB2_LIBRARIES) + MARK_AS_ADVANCED(LIBICONV_INCLUDE_DIR LIBICONV_LIBRARY) + MARK_AS_ADVANCED(LIBINTL_INCLUDE_DIR LIBINTL_LIBRARY) + +ENDIF (GLIB2_LIBRARIES AND GLIB2_INCLUDE_DIRS) + +IF ( GLIB2_FOUND ) + # Check if system has a newer version of glib + # which supports g_regex_match_simple + INCLUDE( CheckIncludeFiles ) + SET( CMAKE_REQUIRED_INCLUDES ${GLIB2_INCLUDE_DIRS} ) + CHECK_INCLUDE_FILES ( glib/gregex.h HAVE_GLIB_GREGEX_H ) + # Reset CMAKE_REQUIRED_INCLUDES + SET( CMAKE_REQUIRED_INCLUDES "" ) +ENDIF( GLIB2_FOUND ) diff --git a/cmake/FindGettextTools.cmake b/cmake/FindGettextTools.cmake new file mode 100644 index 0000000..231813b --- /dev/null +++ b/cmake/FindGettextTools.cmake @@ -0,0 +1,152 @@ +# - Finds GNU gettext and provides tools +# This module looks for the GNU gettext tools. This module defines the +# following values: +# GETTEXT_XGETTEXT_EXECUTABLE: The full path to the xgettext tool. +# GETTEXT_MSGMERGE_EXECUTABLE: The full path to the msgmerge tool. +# GETTEXT_MSGFMT_EXECUTABLE: The full path to the msgfmt tool. +# GETTEXT_FOUND: True if gettext has been found. +# GETTEXT_VERSION_STRING: The version of gettext found (since CMake 2.8.8) +# +# It provides the following macro: +# +# GETTEXT_MAKE_TARGET ( +# targetName +# HIERARCHY <HIERARCHY_FORMAT> +# KEYWORDS keyword1 ... keywordN +# DOMAIN <TRANSLATION_DOMAIN> +# STOCK_DIR <DIR> +# LANG_DIR <DIR> +# OUT_DIR <DIR> +# SOURCE sourceFile1 ... sourceFileN ) +# +# Creates a target that will take a set of translatable source files, +# create a Gettext pot file then copy stock translations in to the build +# directory to allow user editing, then compiles them in to mo files in a +# directory hierarchy to be used in the application. +# +# USAGE: +# targetName (e.g., "lang") +# The name of the target that will be created to generate translations. +# +# HIERARCHY (e.g., "{1}/{2}/{3}/{4}.mo") +# This is the format in which compiled message catalogs are placed. +# {1}: The path prefix. (e.g., "/my-repo/build/locale/") +# {2}: The language name. (e.g., "en") +# {3}: The catalog category. (e.g., "LC_MESSAGES") +# {4}: The domain. (e.g., "my-app") +# +# KEYWORDS (e.g., "_") +# A list of keywords used by xgettext to find translatable strings in the +# source files. +# +# DOMAIN (e.g., "my-app") +# The Gettext domain. It should be unique to your application. +# +# STOCK_DIR (e.g., "/my-repo/stock-lang/") +# The path to the initial translations to be copied to the LANG_DIR. +# If you have a set of official translations in your source repository, +# you'd want to set STOCK_DIR to this. +# +# LANG_DIR (e.g., "lang") +# The name of the directory to be created in the build folder, containing +# editable translations and updated templates. +# +# OUT_DIR (e.g., "locale") +# The directory that compiled catalogs will be placed in, according to +# the HIERARCHY format. +# +# SOURCE (e.g., "main.c") +# A list of source files to read translatable strings from. Usually this +# could be the same list you pass to add_executable. +# +# If you use the examples above and have a structure like this: +# /my-repo/stock-lang/en.po +# +# You may end up with this structure: +# /my-repo/stock-lang/en.po +# /my-repo/build/lang/my-app.pot +# /my-repo/build/lang/en.po +# /my-repo/build/locale/en/LC_MESSAGES/my-app.mo + +# This nasty set of tools is divided up in to three files: +# FindGettextTools.cmake +# This is the file you're reading right now. It provides a neat macro. +# FindGettextTools/config.cmake.in +# This is used as the bridge to transfer arguments from the macro to the +# actual script used to do Gettext stuff. A copy is created and filled in by +# FindGettextTools.cmake and read by FindGettextTools/script.cmake. +# The copy is found in the target's directory in the CMakeFiles directory +# under the name 'gettext.cmake'. +# FindGettextTools/script.cmake +# Does Gettext things based on the bridge config file whenever the target +# created using FindGettextTools.cmake is run. + +FIND_PROGRAM(GETTEXT_XGETTEXT_EXECUTABLE xgettext) +FIND_PROGRAM(GETTEXT_MSGMERGE_EXECUTABLE msgmerge) +FIND_PROGRAM(GETTEXT_MSGFMT_EXECUTABLE msgfmt) + +SET(_gettextScript "${CMAKE_CURRENT_LIST_DIR}/FindGettextTools/script.cmake") +SET(_gettextConfig "${CMAKE_CURRENT_LIST_DIR}/FindGettextTools/config.cmake.in") + +IF(GETTEXT_XGETTEXT_EXECUTABLE) + EXECUTE_PROCESS(COMMAND ${GETTEXT_XGETTEXT_EXECUTABLE} --version + OUTPUT_VARIABLE gettext_version + ERROR_QUIET + OUTPUT_STRIP_TRAILING_WHITESPACE) + IF(gettext_version MATCHES "^xgettext \\(.*\\) [0-9]") + STRING(REGEX REPLACE "^xgettext \\([^\\)]*\\) ([0-9\\.]+[^ \n]*).*" "\\1" + GETTEXT_VERSION_STRING "${gettext_version}") + ENDIF() + SET(gettext_version) +ENDIF() + +INCLUDE(FindPackageHandleStandardArgs) +FIND_PACKAGE_HANDLE_STANDARD_ARGS(Gettext + REQUIRED_VARS + GETTEXT_XGETTEXT_EXECUTABLE + GETTEXT_MSGMERGE_EXECUTABLE + GETTEXT_MSGFMT_EXECUTABLE + VERSION_VAR GETTEXT_VERSION_STRING) + +INCLUDE(CMakeParseArguments) + +FUNCTION(GETTEXT_MAKE_TARGET _targetName) + SET(_oneValueArgs HIERARCHY DOMAIN STOCK_DIR LANG_DIR OUT_DIR) + SET(_multiValueArgs KEYWORDS SOURCE) + + CMAKE_PARSE_ARGUMENTS(_parsedArguments + "" + "${_oneValueArgs}" + "${_multiValueArgs}" + "${ARGN}") + + IF(NOT ( + _parsedArguments_HIERARCHY AND + _parsedArguments_KEYWORDS AND + _parsedArguments_DOMAIN AND + _parsedArguments_STOCK_DIR AND + _parsedArguments_LANG_DIR AND + _parsedArguments_OUT_DIR AND + _parsedArguments_SOURCE)) + MESSAGE(FATAL_ERROR "Wrong usage!") + ENDIF() + + SET(_config + "${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/${_targetName}.dir/gettext.cmake") + + CONFIGURE_FILE(${_gettextConfig} ${_config}) + + ADD_CUSTOM_TARGET(${_targetName} ${CMAKE_COMMAND} "-P" ${_gettextScript} + ${_config}) +ENDFUNCTION() + +IF(GETTEXT_MSGMERGE_EXECUTABLE AND + GETTEXT_MSGFMT_EXECUTABLE AND + GETTEXT_XGETTEXT_EXECUTABLE) + SET(GETTEXT_FOUND TRUE) +ELSE() + SET(GETTEXT_FOUND FALSE) + IF(GETTEXT_REQUIRED) + MESSAGE(FATAL_ERROR "Gettext not found") + ENDIF() +ENDIF() diff --git a/cmake/FindGettextTools/config.cmake.in b/cmake/FindGettextTools/config.cmake.in new file mode 100644 index 0000000..57e3739 --- /dev/null +++ b/cmake/FindGettextTools/config.cmake.in @@ -0,0 +1,14 @@ +# This file is automatically generated by GettextTools. +# It WILL be overwritten by CMake, so editing it is futile. Sorry! + +set(hierarchy "${_parsedArguments_HIERARCHY}") +set(keywords "${_parsedArguments_KEYWORDS}") +set(domain "${_parsedArguments_DOMAIN}") +set(sourcePrefix "${CMAKE_CURRENT_SOURCE_DIR}") +set(stockDir "${_parsedArguments_STOCK_DIR}") +set(langDir "${_parsedArguments_LANG_DIR}") +set(outDir "${_parsedArguments_OUT_DIR}") +set(XGETTEXT_EXECUTABLE "${GETTEXT_XGETTEXT_EXECUTABLE}") +set(MSGMERGE_EXECUTABLE "${GETTEXT_MSGMERGE_EXECUTABLE}") +set(MSGFMT_EXECUTABLE "${GETTEXT_MSGFMT_EXECUTABLE}") +set(sourceFiles "${_parsedArguments_SOURCE}") diff --git a/cmake/FindGettextTools/script.cmake b/cmake/FindGettextTools/script.cmake new file mode 100644 index 0000000..364c5ea --- /dev/null +++ b/cmake/FindGettextTools/script.cmake @@ -0,0 +1,103 @@ +# ----- Set up variables. + +# Read variables from the generated config. +include(${CMAKE_ARGV3}) + +# Transform keywords in to flags. +set(keywordArgs "") +foreach(keyword ${keywords}) + list(APPEND keywordArgs "--keyword=${keyword}") +endforeach() + +# ----- Make the pot file. + +message("Creating translation template...") + +file(MAKE_DIRECTORY ${langDir}) + +set(potFile "${langDir}/${domain}.pot") + +execute_process(COMMAND ${XGETTEXT_EXECUTABLE} + "--output=${potFile}" + "--omit-header" "--add-comments" + ${keywordArgs} + ${sourceFiles} + WORKING_DIRECTORY ${sourcePrefix}) + +message(" '${domain}.pot' done.") + +# ----- Copy and merge across the po files that come with the source. + +message("Copying and updating stock translations...") + +file(GLOB poFiles "${stockDir}/*.po") + +foreach(file ${poFiles}) + # Get the language name, like en_US or zh_CN from the name of the po file, so + # 'en_US.po' or 'zh_CN.po' become 'en_US' or 'zh_CN.po' + get_filename_component(langName ${file} NAME_WE) + + set(newFile "${langDir}/${langName}.po") + + if(NOT EXISTS ${newFile}) + execute_process(COMMAND ${MSGMERGE_EXECUTABLE} + "--output-file" ${newFile} ${file} ${potFile} + OUTPUT_QUIET ERROR_VARIABLE error RESULT_VARIABLE ret) + + if(ret) # Have to do this hack as msgmerge prints to stderr. + message(SEND_ERROR "${error}") + endif() + + message(" '${langName}' copied.") + elseif(${file} IS_NEWER_THAN ${newFile}) + execute_process(COMMAND ${MSGMERGE_EXECUTABLE} + "--update" ${newFile} ${file} + OUTPUT_QUIET ERROR_VARIABLE error RESULT_VARIABLE ret) + + if(ret) # Have to do this hack as msgmerge prints to stderr. + message(SEND_ERROR "${error}") + endif() + + message(" '${langName}' merged.") + endif() +endforeach() + +# ----- Process the files in to mo files. + +message("Compiling translations...") + +file(GLOB localPoFiles "${langDir}/*.po") + +foreach(file ${localPoFiles}) + execute_process(COMMAND ${MSGMERGE_EXECUTABLE} + "--update" ${file} ${potFile} + OUTPUT_QUIET ERROR_VARIABLE error RESULT_VARIABLE ret) + + if(ret) # Have to do this hack as msgmerge prints to stderr. + message(SEND_ERROR "${error}") + endif() + + get_filename_component(langName ${file} NAME_WE) + + set(binaryFile "${hierarchy}") + string(REPLACE "{1}" "${outDir}" binaryFile "${binaryFile}") + string(REPLACE "{2}" "${langName}" binaryFile "${binaryFile}") + string(REPLACE "{3}" "LC_MESSAGES" binaryFile "${binaryFile}") + string(REPLACE "{4}" "${domain}" binaryFile "${binaryFile}") + + if(${file} IS_NEWER_THAN ${binaryFile}) + get_filename_component(binaryDir ${binaryFile} PATH) + + file(MAKE_DIRECTORY ${binaryDir}) + + execute_process(COMMAND ${MSGFMT_EXECUTABLE} + ${file} "--output-file" ${binaryFile} + OUTPUT_QUIET ERROR_VARIABLE error RESULT_VARIABLE ret) + + if(ret) # Have to do this hack as msgfmt prints to stderr. + message(SEND_ERROR "${error}") + endif() + + message(" '${langName}' done.") + endif() +endforeach() diff --git a/cmake/FindIconv.cmake b/cmake/FindIconv.cmake new file mode 100644 index 0000000..092b357 --- /dev/null +++ b/cmake/FindIconv.cmake @@ -0,0 +1,64 @@ +# - Try to find Iconv +# Once done this will define +# +# ICONV_FOUND - system has Iconv +# ICONV_INCLUDE_DIR - the Iconv include directory +# ICONV_LIBRARIES - Link these to use Iconv +# ICONV_SECOND_ARGUMENT_IS_CONST - the second argument for iconv() is const +# +include(CheckCCompilerFlag) +include(CheckCSourceCompiles) + +IF (ICONV_INCLUDE_DIR AND ICONV_LIBRARIES) + # Already in cache, be silent + SET(ICONV_FIND_QUIETLY TRUE) +ENDIF (ICONV_INCLUDE_DIR AND ICONV_LIBRARIES) + +FIND_PATH(ICONV_INCLUDE_DIR iconv.h HINTS /sw/include/ PATHS /opt/local) + +FIND_LIBRARY(ICONV_LIBRARIES NAMES iconv libiconv c PATHS /opt/local) + +IF(ICONV_INCLUDE_DIR AND ICONV_LIBRARIES) + SET(ICONV_FOUND TRUE) +ENDIF(ICONV_INCLUDE_DIR AND ICONV_LIBRARIES) + +set(CMAKE_REQUIRED_INCLUDES ${ICONV_INCLUDE_DIR}) +set(CMAKE_REQUIRED_LIBRARIES ${ICONV_LIBRARIES}) +IF(ICONV_FOUND) + check_c_compiler_flag("-Werror" ICONV_HAVE_WERROR) + set (CMAKE_C_FLAGS_BACKUP "${CMAKE_C_FLAGS}") + if(ICONV_HAVE_WERROR) + set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Werror") + endif(ICONV_HAVE_WERROR) + check_c_source_compiles(" + #include <iconv.h> + int main(){ + iconv_t conv = 0; + const char* in = 0; + size_t ilen = 0; + char* out = 0; + size_t olen = 0; + iconv(conv, &in, &ilen, &out, &olen); + return 0; + } +" ICONV_SECOND_ARGUMENT_IS_CONST ) + set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS_BACKUP}") +ENDIF(ICONV_FOUND) +set(CMAKE_REQUIRED_INCLUDES) +set(CMAKE_REQUIRED_LIBRARIES) + +IF(ICONV_FOUND) + IF(NOT ICONV_FIND_QUIETLY) + MESSAGE(STATUS "Found Iconv: ${ICONV_LIBRARIES}") + ENDIF(NOT ICONV_FIND_QUIETLY) +ELSE(ICONV_FOUND) + IF(Iconv_FIND_REQUIRED) + MESSAGE(FATAL_ERROR "Could not find Iconv") + ENDIF(Iconv_FIND_REQUIRED) +ENDIF(ICONV_FOUND) + +MARK_AS_ADVANCED( + ICONV_INCLUDE_DIR + ICONV_LIBRARIES + ICONV_SECOND_ARGUMENT_IS_CONST +) diff --git a/cmake/FindLibintl.cmake b/cmake/FindLibintl.cmake new file mode 100644 index 0000000..7fec58b --- /dev/null +++ b/cmake/FindLibintl.cmake @@ -0,0 +1,56 @@ +# Try to find the libintl library. Explicit searching is currently +# only required for Win32, though it might be useful for some UNIX +# variants, too. Therefore code for searching common UNIX include +# directories is included, too. +# +# Once done this will define +# +# LIBINTL_FOUND - system has libintl +# LIBINTL_LIBRARIES - libraries needed for linking + +IF (LIBINTL_FOUND) + SET(LIBINTL_FIND_QUIETLY TRUE) +ENDIF () + +# for Windows we rely on the environement variables +# %INCLUDE% and %LIB%; FIND_LIBRARY checks %LIB% +# automatically on Windows +IF(WIN32) + FIND_LIBRARY(LIBINTL_LIBRARIES + NAMES intl + ) + IF(LIBINTL_LIBRARIES) + SET(LIBINTL_FOUND TRUE) + ELSE(LIBINTL_LIBRARIES) + SET(LIBINTL_FOUND FALSE) + ENDIF(LIBINTL_LIBRARIES) +ELSE() + include(CheckFunctionExists) + check_function_exists(dgettext LIBINTL_LIBC_HAS_DGETTEXT) + if (LIBINTL_LIBC_HAS_DGETTEXT) + find_library(LIBINTL_LIBRARIES NAMES c) + set(LIBINTL_FOUND TRUE) + else (LIBINTL_LIBC_HAS_DGETTEXT) + find_library(LIBINTL_LIBRARIES + NAMES intl libintl + PATHS /usr/lib /usr/local/lib + ) + IF(LIBINTL_LIBRARIES) + SET(LIBINTL_FOUND TRUE) + ELSE(LIBINTL_LIBRARIES) + SET(LIBINTL_FOUND FALSE) + ENDIF(LIBINTL_LIBRARIES) + ENDIF (LIBINTL_LIBC_HAS_DGETTEXT) +ENDIF() + +IF (LIBINTL_FOUND) + IF (NOT LIBINTL_FIND_QUIETLY) + MESSAGE(STATUS "Found libintl: ${LIBINTL_LIBRARIES}") + ENDIF () +ELSE () + IF (LIBINTL_FIND_REQUIRED) + MESSAGE(FATAL_ERROR "Could NOT find libintl") + ENDIF () +ENDIF () + +MARK_AS_ADVANCED(LIBINTL_LIBRARIES LIBINTL_LIBC_HAS_DGETTEXT) diff --git a/cmake/FindPackageHandleStandardArgs.cmake b/cmake/FindPackageHandleStandardArgs.cmake new file mode 100644 index 0000000..151d812 --- /dev/null +++ b/cmake/FindPackageHandleStandardArgs.cmake @@ -0,0 +1,58 @@ +# FIND_PACKAGE_HANDLE_STANDARD_ARGS(NAME (DEFAULT_MSG|"Custom failure message") VAR1 ... ) +# This macro is intended to be used in FindXXX.cmake modules files. +# It handles the REQUIRED and QUIET argument to FIND_PACKAGE() and +# it also sets the <UPPERCASED_NAME>_FOUND variable. +# The package is found if all variables listed are TRUE. +# Example: +# +# FIND_PACKAGE_HANDLE_STANDARD_ARGS(LibXml2 DEFAULT_MSG LIBXML2_LIBRARIES LIBXML2_INCLUDE_DIR) +# +# LibXml2 is considered to be found, if both LIBXML2_LIBRARIES and +# LIBXML2_INCLUDE_DIR are valid. Then also LIBXML2_FOUND is set to TRUE. +# If it is not found and REQUIRED was used, it fails with FATAL_ERROR, +# independent whether QUIET was used or not. +# If it is found, the location is reported using the VAR1 argument, so +# here a message "Found LibXml2: /usr/lib/libxml2.so" will be printed out. +# If the second argument is DEFAULT_MSG, the message in the failure case will +# be "Could NOT find LibXml2", if you don't like this message you can specify +# your own custom failure message there. + +MACRO(FIND_PACKAGE_HANDLE_STANDARD_ARGS _NAME _FAIL_MSG _VAR1 ) + + IF("${_FAIL_MSG}" STREQUAL "DEFAULT_MSG") + IF (${_NAME}_FIND_REQUIRED) + SET(_FAIL_MESSAGE "Could not find REQUIRED package ${_NAME}") + ELSE (${_NAME}_FIND_REQUIRED) + SET(_FAIL_MESSAGE "Could not find OPTIONAL package ${_NAME}") + ENDIF (${_NAME}_FIND_REQUIRED) + ELSE("${_FAIL_MSG}" STREQUAL "DEFAULT_MSG") + SET(_FAIL_MESSAGE "${_FAIL_MSG}") + ENDIF("${_FAIL_MSG}" STREQUAL "DEFAULT_MSG") + + STRING(TOUPPER ${_NAME} _NAME_UPPER) + + SET(${_NAME_UPPER}_FOUND TRUE) + IF(NOT ${_VAR1}) + SET(${_NAME_UPPER}_FOUND FALSE) + ENDIF(NOT ${_VAR1}) + + FOREACH(_CURRENT_VAR ${ARGN}) + IF(NOT ${_CURRENT_VAR}) + SET(${_NAME_UPPER}_FOUND FALSE) + ENDIF(NOT ${_CURRENT_VAR}) + ENDFOREACH(_CURRENT_VAR) + + IF (${_NAME_UPPER}_FOUND) + IF (NOT ${_NAME}_FIND_QUIETLY) + MESSAGE(STATUS "Found ${_NAME}: ${${_VAR1}}") + ENDIF (NOT ${_NAME}_FIND_QUIETLY) + ELSE (${_NAME_UPPER}_FOUND) + IF (${_NAME}_FIND_REQUIRED) + MESSAGE(FATAL_ERROR "${_FAIL_MESSAGE}") + ELSE (${_NAME}_FIND_REQUIRED) + IF (NOT ${_NAME}_FIND_QUIETLY) + MESSAGE(STATUS "${_FAIL_MESSAGE}") + ENDIF (NOT ${_NAME}_FIND_QUIETLY) + ENDIF (${_NAME}_FIND_REQUIRED) + ENDIF (${_NAME_UPPER}_FOUND) +ENDMACRO(FIND_PACKAGE_HANDLE_STANDARD_ARGS) diff --git a/cmake/compiler.cmake b/cmake/compiler.cmake new file mode 100644 index 0000000..cf94ed3 --- /dev/null +++ b/cmake/compiler.cmake @@ -0,0 +1,35 @@ +function(append value) + foreach(variable ${ARGN}) + set(${variable} "${${variable}} ${value}" PARENT_SCOPE) + endforeach(variable) +endfunction() + +include(CheckCXXCompilerFlag) + +if (NOT DEFINED SDCV_COMPILER_IS_GCC_COMPATIBLE) + if (CMAKE_COMPILER_IS_GNUCXX) + set(SDCV_COMPILER_IS_GCC_COMPATIBLE ON) + elseif (MSVC) + set(SDCV_COMPILER_IS_GCC_COMPATIBLE OFF) + elseif ("${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang") + set (SDCV_COMPILER_IS_GCC_COMPATIBLE ON) + endif() +endif() + +if (MSVC AND (MSVC_VERSION LESS 1900)) + message(FATAL_ERROR "MSVC version ${MSVC_VERSION} have no full c++11 support") +elseif (MSVC) + add_definitions(-DNOMINMAX) +elseif (NOT MSVC) + check_cxx_compiler_flag("-std=c++11" CXX_SUPPORTS_CXX11) + if (CXX_SUPPORTS_CXX11) + append("-std=c++11" CMAKE_CXX_FLAGS) + else () + message(FATAL_ERROR "sdcv requires C++11 support but the '-std=c++11' flag isn't supported.") + endif() +endif () + +if (SDCV_COMPILER_IS_GCC_COMPATIBLE) + append("-Wall" "-Wextra" "-Wformat-security" "-Wcast-align" "-Werror=format" "-Wcast-qual" CMAKE_C_FLAGS) + append("-Wall" "-pedantic" "-Wextra" "-Wformat-security" "-Wcast-align" "-Werror=format" "-Wcast-qual" CMAKE_CXX_FLAGS) +endif () diff --git a/config.h.cmake b/config.h.cmake new file mode 100644 index 0000000..e67be0c --- /dev/null +++ b/config.h.cmake @@ -0,0 +1,5 @@ +#cmakedefine HAVE_MMAP 1 +#cmakedefine HAVE_LOCALE_H 1 +#cmakedefine WITH_READLINE 1 +#cmakedefine ENABLE_NLS 1 +#cmakedefine GETTEXT_TRANSLATIONS_PATH "${GETTEXT_TRANSLATIONS_PATH}"
\ No newline at end of file diff --git a/doc/DICTFILE_FORMAT b/doc/DICTFILE_FORMAT new file mode 100644 index 0000000..d1b1d9d --- /dev/null +++ b/doc/DICTFILE_FORMAT @@ -0,0 +1,352 @@ +Format for StarDict dictionary files +------------------------------------ + +StarDict homepage: http://stardict.sourceforge.net + +{0}. Number and Byte-order Conventions +When you record the numbers that identify sizes, offsets, etc., you +should use 32-bit numbers, such as you might represent with a glong. + +In order to make StarDict work on different platforms, these numbers +must be in network byte order. You can ensure the correct byte order +by using the g_htonl() function when creating dictionary files. +Conversely, you should use g_ntohl() when reading dictionary files. + +Strings should be encoded in UTF-8. + + +{1}. Files +Every dictionary consists of three files: +(1). somedict.ifo +(2). somedict.idx or somedict.idx.gz +(3). somedict.dict or somedict.dict.dz + +You can use gzip -9 to compress the .idx file. If the .idx file are not +compressed, the loading can be fast and save memory when using, compress it +will make the .idx file load into memory and make the quering fast when using. + +You can use dictzip to compress the .dict file. +"dictzip" uses the same compression algorithm and file format as does gzip, +but provides a table that can be used to randomly access compressed blocks +in the file. The use of 50-64kB blocks for compression typically degrades +compression by less than 10%, while maintaining acceptable random access +capabilities for all data in the file. As an added benefit, files +compressed with dictzip can be decompressed with gunzip. +For more information about dictzip, refer to DICT project, please see: +http://www.dict.org + +Stardict will search for the .ifo file, then open the .idx or +.idx.gz file and the .dict.dz or .dict file which is in the same directory and +has the same base name. + + + +{2}. The ".ifo" file's format. +The .ifo file has the following format: + +StarDict's dict ifo file +version=2.4.2 +[options] + +Note that the current "version" string must be "2.4.2". If it's not, +then StarDict will refuse to read the file. + +[options] +--------- +In the example above, [options] expands to any of the following lines +specifying information about the dictionary. Each option is a keyword +followed by an equal sign, then the value of that option, then a +newline. The options may be appear in any order. + +Note that the dictionary must have at least a bookname, a wordcount and a +idxfilesize, or the load will fail. All other information is optional. All +strings should be encoded in UTF-8. + +Available options: + +bookname= // required +wordcount= // required +idxfilesize= // required +author= +email= +website= +description= +date= +sametypesequence= // very important. + + +wordcount is the count of word entries in .idx file, it must be right. + +idxfilesize is the size(in bytes) of the .idx file, even the .idx is compressed +to a .idx.gz file, this entry must record the original .idx file's size, and it +must be right too. The .gz file don't contain its original size information, +but knowing the original size can speed up the extraction to memory, as you +don't need to call realloc() for many times. + + +The "sametypesequence" option is described in further detail below. + +*** +sametypesequence + +You should first familiarize yourself with the .dict file format +described in the next section so that you can understand what effect +this option has on the .dict file. + +If the sametypesequence option is set, it tells StarDict that each +word's data in the .dict file will have the same sequence of datatypes. +In this case, we expect a .dict file that's been optimized in two +ways: the type identifiers should be omitted, and the size marker for +the last data entry of each word should be omitted. + +Let's consider some concrete examples of the sametypesequence option. + +Suppose that a dictionary records many .wav files, and so sets: + sametypesequence=W +In this case, each word's entry in the .dict file consists solely of a +wav file. In the .dict file, you would leave out the 'W' character +before each entry, and you would also omit the 32-bit integer at the +front of each .wav entry that would normally give the entry's length. +You can do this since the length is known from the information in the +idx file. + +As another example, suppose a dictionary contains phonetic information +and a meaning for each word. The sametypesequence option for this +dictionary would be: + sametypesequence=tm +Once again, you can omit the 't' and 'm' characters before each data +entry in the .dict file. In addition, you should omit the terminating +'\0' for the 'm' entry for each word in the .dict file, as the length +of the meaning string can be inferred from the length of the phonetic +string (still indicated by a terminating '\0') and the length of the +entire word entry (listed in the .idx file). + +So for cases where the last data entry for each word normally requires +a terminating '\0' character, you should omit this character in the +dict file. And for cases where the last data entry for each word +normally requires an initial 32-bit number giving the length of the +field (such as WAV and PNG entries), you must omit this number in the +dictionary. + +Every dictionary should try to use the sametypesequence feature to +save disk space. +*** + + +{3}. The ".idx" file's format. +The .idx file is just a word list. + +The word list is a sorted list of word entries. + +Each entry in the word list contains three fields, one after the other: + word_str; // a utf-8 string terminated by '\0'. + word_data_offset; // word data's offset in .dict file + word_data_size; // word data's total size in .dict file + +word_str gives the string representing this word. It's the string +that is "looked up" by the StarDict. + +word_data_offset and word_data_size should both be 32-bit numbers in +network byte order. + +No two entries should have the same "word_str". In other words, +(strcmp(s1, s2) != 0). + +The length of "word_str" should be less than 256. In other words, +(strlen(word) < 256). + +The word list must be sorted by calling stardict_strcmp() on the "word_str" +fields. If the word list order is wrong, StarDict will fail to function +correctly! + +============ +gint stardict_strcmp(const gchar *s1, const gchar *s2) +{ + gint a; + a = g_ascii_strcasecmp(s1, s2); + if (a == 0) + return strcmp(s1, s2); + else + return a; +} +============ +g_ascii_strcasecmp() is a glib function: +Unlike the BSD strcasecmp() function, this only recognizes standard +ASCII letters and ignores the locale, treating all non-ASCII characters +as if they are not letters. + +stardict_strcmp() works fine with English characters, but the other +locale characters' sorting is not so good. There should be a _strcmp +function which handles the utf-8 string sorting better. If you know +one, email me :) + +g_utf8_collate()? This is a locale-dependent funcition. So if you look +up Chinese characters while in the Chinese locale, it works fine. But +if you are in some other locale then the lookup will fail, as the +order is not the same as in the Chinese locale (which was used when +creating the dictionary). + +g_utf8_to_ucs4() then do comparing? This sounds like a good solution, but.. + +The complete solution can be found in "Unicode Technical Standard #10: Unicode +Collation Algorithm", http://www.unicode.org/reports/tr10/ + +I hope glib will provide a locale-independent g_utf8_collate() soon. +http://bugzilla.gnome.org/show_bug.cgi?id=112798 + + + +{4}. The ".dict" file's format. +The .dict file is a pure data sequence, as the offset and size of each +word is recorded in the corresponding .idx file. + +If the "sametypesequence" option is not used in the .ifo file, then +the .dict file has fields in the following order: +============== +word_1_data_1_type; // a single char identifying the data type +word_1_data_1_data; // the data +word_1_data_2_type; +word_1_data_2_data; +...... // the number of data entries for each word is determined by + // word_data_size in .idx file +word_2_data_1_type; +word_2_data_1_data; +...... +============== +It's important to note that each field in each word indicates its +own length, as described below. The number of possible fields per +word is also not fixed, and is determined by simply reading data until +you've read word_data_size bytes for that word. + + +Suppose the "sametypesequence" option is used in the .idx file, and +the option is set like this: +sametypesequence=tm +Then the .dict file will look like this: +============== +word_1_data_1_data +word_1_data_2_data +word_2_data_1_data +word_2_data_2_data +...... +============== +The first data entry for each word will have a terminating '\0', but +the second entry will not have a terminating '\0'. The omissions of +the type chars and of the last field's size information are the +optimizations required by the "sametypesequence" option described +above. + + +Type identifiers +---------------- +Here are the single-character type identifiers that may be used with +the "sametypesequence" option in the .idx file, or may appear in the +dict file itself if the "sametypesequence" option is not used. + +Lower-case characters signify that a field's size is determined by a +terminating '\0', while upper-case characters indicate that the data +begins with a 32-bit integer that gives the length of the data field. + +'m' +Word's pure text meaning. +The data should be a utf-8 string ending with '\0'. + +'l' +Word's pure text meaning. +The data is NOT a utf-8 string, but is instead a string in locale +encoding, ending with '\0'. Sometimes using this type will save disk +space, but its use is discouraged. + +'g' +A utf-8 string which is marked up with the Pango text markup language. +For more information about this markup language, See the "Pango +Reference Manual." +You might have it installed locally at: +file:///usr/share/gtk-doc/html/pango/PangoMarkupFormat.html + +'t' +English phonetic string. +The data should be a utf-8 string ending with '\0'. + +Here are some utf-8 phonetic characters: +θʃŋʧðʒæıʌʊɒɛəɑɜɔˌˈːˑ +æɑɒʌәєŋvθðʃʒːɡˏˊˋ + +'y' +Chinese YinBiao. +The data should be a utf-8 string ending with '\0'. + + +'W' +wav file. +The data begins with a network byte-ordered glong to identify the wav +file's size, immediately followed by the file's content. + +'P' +png file. +The data begins with a network byte-ordered glong to identify the png +file's size, immediately followed by the file's content. + +'X' +this type identifier is reserved for experimental extensions. + + +{5}. Tree Dictionary +The tree dictionary support is used for information viewing, etc. + +A tree dictionary contains three file: sometreedict.ifo, sometreedict.tdx.gz +and sometreedict.dict.dz. + +It is better to compress the .tdx file, as it is always load into memory. + +The .ifo file has the following format: + +StarDict's treedict ifo file +version=2.4.2 +[options] + +Available options: + +bookname= // required +tdxfilesize= // required +wordcount= +author= +email= +website= +description= +date= +sametypesequence= + +wordcount is only used for info view in the dict manage dialog, so it is not +important in tree dictionary. + +The .tdx file is just the word list. +----------- +The word list is a tree list of word entries. + +Each entry in the word list contains four fields, one after the other: + word_str; // a utf-8 string terminated by '\0'. + word_data_offset; // word data's offset in .dict file + word_data_size; // word data's total size in .dict file. it can be 0. + word_subentry_count; //have many sub word this entry has, 0 means none. + +Subentry is immidiately followed by its parent entry. This make the order is +just as when a tree list with all its nodes extended, then sort from top to +bottom. + +The .dict file's format is the same as the normal dictionary. + + + +{6}. More information. +You can read "src/lib.cpp", "src/dictmanagedlg.cpp" and +"src/tools/*.cpp" for more information. + +If you have any questions, email me. :) + +Thanks to Will Robinson <wsr23@stanford.edu> for cleaning up this file's +English. + +Hu Zheng <huzheng_001@163.com> +http://forlinux.yeah.net +2003.11.11 diff --git a/doc/sdcv.1 b/doc/sdcv.1 new file mode 100644 index 0000000..86351b7 --- /dev/null +++ b/doc/sdcv.1 @@ -0,0 +1,104 @@ +.TH SDCV 1 "2006-04-24" "sdcv-0.4.2" +.SH NAME +sdcv \- console version of StarDict program +.SH SYNOPSIS +.B sdcv +[ +.BI options +] +[list of words] +.SH DESCRIPTION +.I sdcv +is a simple, cross-platform text-based utility +for working with dictionaries in StarDict format. +Each word from "list of words" may be a string +with a leading '/' for using a Fuzzy search algorithm, +with a leading '|' for using full-text search, +and the string may contain '?' and '*' for regexp search. +It works in interactive and non-interactive mode. +To exit from interactive mode press Ctrl+D. +In interactive mode, +if sdcv was compiled with readline library support, +you can use the UP and DOWN keys to cycle through history. +.SH OPTIONS +.TP 8 +.B "\-h \-\-help" +Display help message and exit +.TP 8 +.B "\-v \-\-verbose" +Display version and exit +.TP 8 +.B "\-l \-\-list\-dicts" +Display list of available dictionaries and exit +.TP 8 +.B "\-u \-\-use\-dict filename" +For search use only dictionary with this bookname +.TP 8 +.B "\-n \-\-non\-interactive" +For use in scripts +.TP 8 +.B "\-x \-\-only\-data\-dir" +For use in scripts: only use the dictionaries in data-dir, do not search in user and system directories +.TP 8 +.B "\-e \-\-exact\-search" +Do not fuzzy-search for similar words, only return exact matches +.TP 8 +.B "\-j \-\-json" +Print the results of list-dicts and searches as json, not as plain text. +For use in automatically processing the results of a dictionary lookup. +.TP 8 +.B "\-\-utf8\-output" +Force sdcv to not convert to locale charset, output in utf8 +.TP 8 +.B "\-\-utf8\-input" +Force sdcv to not convert from locale charset, assume that +input is in utf8 +.TP 8 +.B "\-\-data\-dir path/to/directory" +Use this directory as the path to the stardict data directory. This means that +sdcv searches for dictionaries in data-dir/dic directory. +.TP 8 +.B "\-\-color" +Use ANSI escape codes for colorizing sdcv output (does not work with json output). +.SH FILES +.TP +/usr/share/stardict/dic +.TP +$(HOME)/.stardict/dic + +Place where sdcv expects to find dictionaries. +Instead of /usr/share/stardict/dic you can use any directory +you want, just set the STARDICT_DATA_DIR environment variable. +For example, if you have dictionaries in /mnt/data/stardict-dicts/dic, +set STARDICT_DATA_DIR to /mnt/data/stardict-dicts. +.TP +$(HOME)/.sdcv_history + +This file includes the last $(SDCV_HISTSIZE) words, which you sought with sdcv. +SDCV uses this file only if it was compiled with readline library support. +.TP +$(HOME)/.sdcv_ordering + +This is a text file containing one dictionary bookname per line. +It specifies in which order the results of a search should be shown. +.SH ENVIRONMENT +Environment Variables Used By \fIsdcv\fR: +.TP 20 +.B STARDICT_DATA_DIR +If set, sdcv uses this variable as the data directory, this means that sdcv +searches dictionaries in $\fBSTARDICT_DATA_DIR\fR\\dic +.TP 20 +.B SDCV_HISTSIZE +If set, sdcv writes in $(HOME)/.sdcv_history the last $(SDCV_HISTSIZE) words, +which you look up using sdcv. If it is not set, then the last 2000 words are saved in $(HOME)/.sdcv_history. +.TP 20 +.B SDCV_PAGER +If SDCV_PAGER is set, its value is used as the name of the program +to use to display the dictionary article. +.SH BUGS +Email bug reports to dushistov at mail dot ru. Be sure to include the word +"sdcv" somewhere in the "Subject:" field. +.SH AUTHORS +Evgeniy A. Dushistov, Hu Zheng +.SH SEE ALSO +stardict(1), http://sdcv.sourceforge.net/, http://stardict.sourceforge.net diff --git a/doc/uk/sdcv.1 b/doc/uk/sdcv.1 new file mode 100644 index 0000000..ff3b270 --- /dev/null +++ b/doc/uk/sdcv.1 @@ -0,0 +1,84 @@ +.TH SDCV 1 "2004-12-06" "sdcv-0.4" +.SH NAME +sdcv \- консольна версія Зоряного словника [Stardict] +.SH SYNOPSIS +.B sdcv +[ +.BI options +] +[list of words] +.SH DESCRIPTION +.I sdcv +sdcv проста, міжплатформена текстова утиліта для роботи із +словниками у форматі Зоряного словника [StarDict]. +Слово зі "списку слів", може бути рядком з початковим слешем '/' +щоб задіяти нечіткий пошуковий алгоритм, рядок, може +містити '?' і '*' для використання пошуку з регулярними виразами. +Утиліта працює в діалоговому та не в інтерактивному режимах. +Щоб вийти з діалогового режиму натискають Ctrl+D. +У діалоговому режимі, якщо sdcv був скомпільований з підтримкою +бібліотеки readline, Ви можете використовувати клавіші ДОГОРИ +та ВНИЗ для роботи з хронологією. +.SH OPTIONS +.TP 8 +.B "\-h \-\-help" +відображає повідомлення довідки та виходить +.TP 8 +.B "\-v \-\-verbose" +відображає версію та виходить +.TP 8 +.B "\-l \-\-list\-dicts" +відображає список доступних словників та виходить +.TP 8 +.B "\-u \-\-use\-dict filename" +для пошуку з використанням лише словника з цим іменем(bookname) +.TP 8 +.B "\-n \-\-non\-interactive" +для використання в скриптах +.TP 8 +.B "\-\-utf8\-output" +Заставити sdcv розмовляти не в системному кодуванні locale, а робити вивід в utf8 +.TP 8 +.B "\-\-utf8\-input" +Заставити sdcv слухати не в системному кодуванні locale, а припускати що це +ввід в utf8 +.TP 8 +.B "\-\-data\-dir path/to/directory" +Використовуйте цю теку як шлях до теки даних зоряного словника [stardict]. +Це значає, що sdcv шукає словники у теці data-dir/dic. +.SH FILES +.TP +/usr/share/stardict/dic +.TP +$(HOME)/.stardict/dic + +Місце, де sdcv очікує знайти словники. +Замість шляху /usr/share/stardict/dic Ви можете використовувати все, +що Ви хочете, лише встановіть змінну оточення STARDICT_DATA_DIR. +Наприклад, якщо Ви маєте словники у теці /mnt/data/stardict-dicts/dic, +встановіть STARDICT_DATA_DIR у /mnt/data/stardict-dicts. +.TP +$(HOME)/.sdcv_history + +Цей файл містить останні $(SDCV_HISTSIZE) слова, які Ви шукали з sdcv. +SDCV використовує цей файл при умові, якщо sdcv був скомпільований +з підтримкою бібліотеки readline. + +.SH ENVIRONMENT +Змінні оточення для \fIsdcv\fR: +.TP 20 +.B STARDICT_DATA_DIR +Якщо встановлена, sdcv використає цю змінну як теку даних, це означає, +що sdcv шукатиме словники у $\fBSTARDICT_DATA_DIR\fR\dic +.TP 20 +.B SDCV_HISTSIZE +Якщо встановлена, sdcv писатиме у $(HOME)/.sdcv_history лише +останні $(SDCV_HISTSIZE) слова, які Ви шукали з sdcv. Якщо не встановлена, +то збірігатиметься останніх 2000 слів у $(HOME)/.sdcv_history. +.SH BUGS +Звіти про помилки висилайте на адресу dushistov на mail крапка ru. +Не забувайте включати слово "sdcv" десь у полі "Тема:". +.SH AUTHORS +Эвгений А. Душистов, Hu Zheng +.SH SEE ALSO +stardict(1), http://sdcv.sourceforge.net/, http://stardict.sourceforge.net diff --git a/po/cs.po b/po/cs.po new file mode 100644 index 0000000..0d8c3c1 --- /dev/null +++ b/po/cs.po @@ -0,0 +1,165 @@ +# Czech translations for sdcv package. +# Copyright (C) 2008 Free Software Foundation, Inc. +# This file is distributed under the same license as the sdcv package. +# Michal Čihař <michal@cihar.com>, 2008. +msgid "" +msgstr "" +"Project-Id-Version: sdcv 0.4.2\n" +"Report-Msgid-Bugs-To: dushistov@mail.ru\n" +"POT-Creation-Date: 2017-08-16 09:52+0300\n" +"PO-Revision-Date: 2008-09-24 10:42+0200\n" +"Last-Translator: Michal Čihař <michal@cihar.com>\n" +"Language-Team: Czech <cs@li.org>\n" +"Language: cs\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=UTF-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Plural-Forms: nplurals=3; plural=(n==1) ? 0 : (n>=2 && n<=4) ? 1 : 2;\n" + +#: ../src/libwrapper.cpp:300 +msgid "popen failed" +msgstr "popen selhalo" + +#: ../src/libwrapper.cpp:340 +#, c-format +msgid "Can not convert %s to utf8.\n" +msgstr "Nepodařilo se převést %s do utf8.\n" + +#: ../src/libwrapper.cpp:398 ../src/libwrapper.cpp:432 +#, fuzzy, c-format +msgid "Found %zu items, similar to %s.\n" +msgstr "Nalezeno %d záznamů podobných %s.\n" + +#: ../src/libwrapper.cpp:416 +msgid "Your choice[-1 to abort]: " +msgstr "Vaše volba [-1 pro ukončení]: " + +#: ../src/libwrapper.cpp:426 +#, fuzzy, c-format +msgid "" +"Invalid choice.\n" +"It must be from 0 to %zu or -1.\n" +msgstr "" +"Chybná volba.\n" +"Musí být mezi 0 a %d nebo -1.\n" + +#: ../src/libwrapper.cpp:445 +#, c-format +msgid "Nothing similar to %s, sorry :(\n" +msgstr "Nic podobného %s nenalezeno, promiň :(\n" + +#: ../src/sdcv.cpp:88 +#, fuzzy +msgid "display version information and exit" +msgstr "-v, --version zobrazí informace o verzi a skončí\n" + +#: ../src/sdcv.cpp:90 +#, fuzzy +msgid "display list of available dictionaries and exit" +msgstr "-l, --list-dicts zobrazí seznam dostupných slovníků a skončí\n" + +#: ../src/sdcv.cpp:92 +#, fuzzy +msgid "for search use only dictionary with this bookname" +msgstr "-u, --use-dict jméno vyhledávat jen v zadaném slovníku\n" + +#: ../src/sdcv.cpp:93 +msgid "bookname" +msgstr "" + +#: ../src/sdcv.cpp:95 +msgid "for use in scripts" +msgstr "" + +#: ../src/sdcv.cpp:97 +msgid "print the result formatted as JSON" +msgstr "" + +#: ../src/sdcv.cpp:99 +msgid "do not fuzzy-search for similar words, only return exact matches" +msgstr "" + +#: ../src/sdcv.cpp:101 +#, fuzzy +msgid "output must be in utf8" +msgstr "--utf8-output výstup musí být v utf8\n" + +#: ../src/sdcv.cpp:103 +#, fuzzy +msgid "input of sdcv in utf8" +msgstr "--utf8-input vstup musí být v utf8\n" + +#: ../src/sdcv.cpp:105 +#, fuzzy +msgid "use this directory as path to stardict data directory" +msgstr "" +"--data-dir cesta/někam použít tento adresář jako cestu ke slovníkům " +"stardict\n" + +#: ../src/sdcv.cpp:106 +msgid "path/to/dir" +msgstr "" + +#: ../src/sdcv.cpp:108 +msgid "" +"only use the dictionaries in data-dir, do not search in user and system " +"directories" +msgstr "" + +#: ../src/sdcv.cpp:110 +msgid "colorize the output" +msgstr "" + +#: ../src/sdcv.cpp:115 +msgid " words" +msgstr "" + +#: ../src/sdcv.cpp:121 +#, c-format +msgid "Invalid command line arguments: %s\n" +msgstr "" + +#: ../src/sdcv.cpp:127 +#, c-format +msgid "Console version of Stardict, version %s\n" +msgstr "Konzolová verze Stardictu, verze %s\n" + +#: ../src/sdcv.cpp:202 +#, c-format +msgid "g_mkdir failed: %s\n" +msgstr "g_mkdir selhalo: %s\n" + +#: ../src/sdcv.cpp:217 +msgid "Enter word or phrase: " +msgstr "Zadejte slovo nebo frázi: " + +#: ../src/sdcv.cpp:225 +#, c-format +msgid "There are no words/phrases to translate.\n" +msgstr "Nebyla zadáno nic k přeložení.\n" + +#: ../src/sdcv.cpp:237 +#, c-format +msgid "Dictionary's name Word count\n" +msgstr "Jméno slovníku Počet slov\n" + +#: ../src/utils.cpp:48 +#, fuzzy, c-format +msgid "Can not convert %s to current locale.\n" +msgstr "Nepodařilo se převést %s do utf8.\n" + +#~ msgid "" +#~ "Unknown option.\n" +#~ "Try '%s --help' for more information.\n" +#~ msgstr "" +#~ "Neznámý parametr.\n" +#~ "Zkuste '%s --help' pro nápovědu.\n" + +#~ msgid "Usage: %s [OPTIONS] words\n" +#~ msgstr "Použití: %s [PARAMETRY] slova\n" + +#~ msgid "-h, --help display this help and exit\n" +#~ msgstr "-h, --help zobrazí tuto nápovědu a skončí\n" + +#~ msgid "-n, --non-interactive for use in scripts\n" +#~ msgstr "-n, --non-interactive pro použití ve skriptech\n" diff --git a/po/en@boldquot.header b/po/en@boldquot.header new file mode 100644 index 0000000..fedb6a0 --- /dev/null +++ b/po/en@boldquot.header @@ -0,0 +1,25 @@ +# All this catalog "translates" are quotation characters. +# The msgids must be ASCII and therefore cannot contain real quotation +# characters, only substitutes like grave accent (0x60), apostrophe (0x27) +# and double quote (0x22). These substitutes look strange; see +# http://www.cl.cam.ac.uk/~mgk25/ucs/quotes.html +# +# This catalog translates grave accent (0x60) and apostrophe (0x27) to +# left single quotation mark (U+2018) and right single quotation mark (U+2019). +# It also translates pairs of apostrophe (0x27) to +# left single quotation mark (U+2018) and right single quotation mark (U+2019) +# and pairs of quotation mark (0x22) to +# left double quotation mark (U+201C) and right double quotation mark (U+201D). +# +# When output to an UTF-8 terminal, the quotation characters appear perfectly. +# When output to an ISO-8859-1 terminal, the single quotation marks are +# transliterated to apostrophes (by iconv in glibc 2.2 or newer) or to +# grave/acute accent (by libiconv), and the double quotation marks are +# transliterated to 0x22. +# When output to an ASCII terminal, the single quotation marks are +# transliterated to apostrophes, and the double quotation marks are +# transliterated to 0x22. +# +# This catalog furthermore displays the text between the quotation marks in +# bold face, assuming the VT100/XTerm escape sequences. +# diff --git a/po/en@quot.header b/po/en@quot.header new file mode 100644 index 0000000..a9647fc --- /dev/null +++ b/po/en@quot.header @@ -0,0 +1,22 @@ +# All this catalog "translates" are quotation characters. +# The msgids must be ASCII and therefore cannot contain real quotation +# characters, only substitutes like grave accent (0x60), apostrophe (0x27) +# and double quote (0x22). These substitutes look strange; see +# http://www.cl.cam.ac.uk/~mgk25/ucs/quotes.html +# +# This catalog translates grave accent (0x60) and apostrophe (0x27) to +# left single quotation mark (U+2018) and right single quotation mark (U+2019). +# It also translates pairs of apostrophe (0x27) to +# left single quotation mark (U+2018) and right single quotation mark (U+2019) +# and pairs of quotation mark (0x22) to +# left double quotation mark (U+201C) and right double quotation mark (U+201D). +# +# When output to an UTF-8 terminal, the quotation characters appear perfectly. +# When output to an ISO-8859-1 terminal, the single quotation marks are +# transliterated to apostrophes (by iconv in glibc 2.2 or newer) or to +# grave/acute accent (by libiconv), and the double quotation marks are +# transliterated to 0x22. +# When output to an ASCII terminal, the single quotation marks are +# transliterated to apostrophes, and the double quotation marks are +# transliterated to 0x22. +# diff --git a/po/fr.po b/po/fr.po new file mode 100644 index 0000000..c07403d --- /dev/null +++ b/po/fr.po @@ -0,0 +1,175 @@ +# French translations for sdcv package +# Traduction française pour le paquet sdcv. +# Copyright (C) 2004 THE sdcv'S COPYRIGHT HOLDER +# This file is distributed under the same license as the sdcv package. +# <dushistov@mail.ru>, 2004. +# +msgid "" +msgstr "" +"Project-Id-Version: sdcv 0.4.2\n" +"Report-Msgid-Bugs-To: dushistov@mail.ru\n" +"POT-Creation-Date: 2017-08-16 09:52+0300\n" +"PO-Revision-Date: 2009-06-15 23:20+0800\n" +"Language-Team: Vincent Petry <PVince81@yahoo.fr>\n" +"Language: \n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=UTF-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Plural-Forms: nplurals=3; plural=(n%10==1 && n%100!=11 ? 0 : n%10>=2 && n" +"%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2);\n" + +#: ../src/libwrapper.cpp:300 +msgid "popen failed" +msgstr "Échec de popen" + +#: ../src/libwrapper.cpp:340 +#, c-format +msgid "Can not convert %s to utf8.\n" +msgstr "Ne peut convertir %s au format utf8.\n" + +#: ../src/libwrapper.cpp:398 ../src/libwrapper.cpp:432 +#, fuzzy, c-format +msgid "Found %zu items, similar to %s.\n" +msgstr "Trouvé %d éléments similaires à %s.\n" + +#: ../src/libwrapper.cpp:416 +msgid "Your choice[-1 to abort]: " +msgstr "Votre choix[-1 pour abandonner] : " + +#: ../src/libwrapper.cpp:426 +#, fuzzy, c-format +msgid "" +"Invalid choice.\n" +"It must be from 0 to %zu or -1.\n" +msgstr "" +"Selection invalide.\n" +"Veuillez choisir un nombre entre 0 et %d, ou -1.\n" + +#: ../src/libwrapper.cpp:445 +#, c-format +msgid "Nothing similar to %s, sorry :(\n" +msgstr "Aucun mot/phrase similaire à %s, désolé :(\n" + +#: ../src/sdcv.cpp:88 +#, fuzzy +msgid "display version information and exit" +msgstr "" +"-v, --version afficher les informations de version et sortir\n" + +#: ../src/sdcv.cpp:90 +#, fuzzy +msgid "display list of available dictionaries and exit" +msgstr "" +"-l, --list-dicts afficher la liste des dictionnaires disponibles et " +"sortir\n" + +#: ../src/sdcv.cpp:92 +#, fuzzy +msgid "for search use only dictionary with this bookname" +msgstr "" +"-u, --use-dict nom_dict pour chercher seulement en utilisant le " +"dictionnaire spécifié\n" + +#: ../src/sdcv.cpp:93 +msgid "bookname" +msgstr "" + +#: ../src/sdcv.cpp:95 +msgid "for use in scripts" +msgstr "" + +#: ../src/sdcv.cpp:97 +msgid "print the result formatted as JSON" +msgstr "" + +#: ../src/sdcv.cpp:99 +msgid "do not fuzzy-search for similar words, only return exact matches" +msgstr "" + +#: ../src/sdcv.cpp:101 +#, fuzzy +msgid "output must be in utf8" +msgstr "--utf8-output force la sortie au format utf8\n" + +#: ../src/sdcv.cpp:103 +#, fuzzy +msgid "input of sdcv in utf8" +msgstr "--utf8-input force l'entrée de sdcv au format utf8\n" + +#: ../src/sdcv.cpp:105 +#, fuzzy +msgid "use this directory as path to stardict data directory" +msgstr "" +"--data-dir chemin utiliser ce chemin pour trouver les données de " +"stardict\n" + +#: ../src/sdcv.cpp:106 +msgid "path/to/dir" +msgstr "" + +#: ../src/sdcv.cpp:108 +msgid "" +"only use the dictionaries in data-dir, do not search in user and system " +"directories" +msgstr "" + +#: ../src/sdcv.cpp:110 +msgid "colorize the output" +msgstr "" + +#: ../src/sdcv.cpp:115 +msgid " words" +msgstr "" + +#: ../src/sdcv.cpp:121 +#, c-format +msgid "Invalid command line arguments: %s\n" +msgstr "" + +#: ../src/sdcv.cpp:127 +#, c-format +msgid "Console version of Stardict, version %s\n" +msgstr "Version console de Stardict, version %s\n" + +#: ../src/sdcv.cpp:202 +#, c-format +msgid "g_mkdir failed: %s\n" +msgstr "Échec de g_mkdir : %s\n" + +#: ../src/sdcv.cpp:217 +msgid "Enter word or phrase: " +msgstr "Entrez un mot ou une phrase: " + +#: ../src/sdcv.cpp:225 +#, c-format +msgid "There are no words/phrases to translate.\n" +msgstr "Il n'y a pas de mots/phrases à traduire.\n" + +#: ../src/sdcv.cpp:237 +#, c-format +msgid "Dictionary's name Word count\n" +msgstr "Nom dictionnaire Nombre de mots\n" + +#: ../src/utils.cpp:48 +#, c-format +msgid "Can not convert %s to current locale.\n" +msgstr "Ne peut pas convertir %s dans la locale courante.\n" + +#~ msgid "" +#~ "Unknown option.\n" +#~ "Try '%s --help' for more information.\n" +#~ msgstr "" +#~ "Option inconnue.\n" +#~ "Essayez '%s --help' pour plus d'informations.\n" + +#~ msgid "Usage: %s [OPTIONS] words\n" +#~ msgstr "Usage: %s [OPTIONS] mots\n" + +#~ msgid "-h, --help display this help and exit\n" +#~ msgstr "-h, --help afficher cet aide et sortir\n" + +#~ msgid "-n, --non-interactive for use in scripts\n" +#~ msgstr "-n, --non-interactive pour l'utilisation dans des scripts\n" + +#~ msgid "There is no dictionary with this bookname: %s.\n" +#~ msgstr "Pas de dictionnaire : %s.\n" diff --git a/po/ru.po b/po/ru.po new file mode 100644 index 0000000..abbb597 --- /dev/null +++ b/po/ru.po @@ -0,0 +1,145 @@ +# Russian translations for sdcv package +# Английские переводы для пакета sdcv. +# Copyright (C) 2004 THE sdcv'S COPYRIGHT HOLDER +# This file is distributed under the same license as the sdcv package. +# <dushistov@mail.ru>, 2004. +# +msgid "" +msgstr "" +"Project-Id-Version: sdcv 0.5\n" +"Report-Msgid-Bugs-To: dushistov@mail.ru\n" +"POT-Creation-Date: 2017-08-16 09:52+0300\n" +"PO-Revision-Date: 2017-08-16 10:05+0300\n" +"Last-Translator: Evgeniy Dushistov <dushistov@mail.ru>\n" +"Language-Team: Russian <ru@li.org>\n" +"Language: ru\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=UTF-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Plural-Forms: nplurals=3; plural=(n%10==1 && n%100!=11 ? 0 : n%10>=2 && n" +"%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2);\n" + +#: ../src/libwrapper.cpp:300 +msgid "popen failed" +msgstr "функция popen завершилась с ошибкой" + +#: ../src/libwrapper.cpp:340 +#, c-format +msgid "Can not convert %s to utf8.\n" +msgstr "Не могу преобразовать %s в utf8.\n" + +#: ../src/libwrapper.cpp:398 ../src/libwrapper.cpp:432 +#, c-format +msgid "Found %zu items, similar to %s.\n" +msgstr "Найдено %zu слов, похожих на %s.\n" + +#: ../src/libwrapper.cpp:416 +msgid "Your choice[-1 to abort]: " +msgstr "Ваш выбор[-1 - отмена]: " + +#: ../src/libwrapper.cpp:426 +#, c-format +msgid "" +"Invalid choice.\n" +"It must be from 0 to %zu or -1.\n" +msgstr "" +"Неправильный выбор.\n" +"Должно быть от 0 до %zu или -1.\n" + +#: ../src/libwrapper.cpp:445 +#, c-format +msgid "Nothing similar to %s, sorry :(\n" +msgstr "Ничего похожего на %s, извините :(\n" + +#: ../src/sdcv.cpp:88 +msgid "display version information and exit" +msgstr "показать номер версии и завершить работу" + +#: ../src/sdcv.cpp:90 +msgid "display list of available dictionaries and exit" +msgstr "показать список доступных словарей и завершить работу" + +#: ../src/sdcv.cpp:92 +msgid "for search use only dictionary with this bookname" +msgstr "для поиска использовать только этот словарь с таким именем" + +#: ../src/sdcv.cpp:93 +msgid "bookname" +msgstr "имя_словаря" + +#: ../src/sdcv.cpp:95 +msgid "for use in scripts" +msgstr "для использования в скриптах" + +#: ../src/sdcv.cpp:97 +msgid "print the result formatted as JSON" +msgstr "выдать результат в JSON формате" + +#: ../src/sdcv.cpp:99 +msgid "do not fuzzy-search for similar words, only return exact matches" +msgstr "не использовать нечеткий поиск похожих слов, вернуть только точные совпадения" + +#: ../src/sdcv.cpp:101 +msgid "output must be in utf8" +msgstr "вывод программы должен быть в utf8" + +#: ../src/sdcv.cpp:103 +msgid "input of sdcv in utf8" +msgstr "ввод программы в utf8" + +#: ../src/sdcv.cpp:105 +msgid "use this directory as path to stardict data directory" +msgstr "" +"использовать эту директорию в качестве пути к \"stardict data\" директории" + +#: ../src/sdcv.cpp:106 +msgid "path/to/dir" +msgstr "путь/до/директории" + +#: ../src/sdcv.cpp:108 +msgid "" +"only use the dictionaries in data-dir, do not search in user and system " +"directories" +msgstr "использовать словари только из data-dir, не искать в пользовательских и системных каталогах" + +#: ../src/sdcv.cpp:110 +msgid "colorize the output" +msgstr "раскрашивать вывод в разные цвета" + +#: ../src/sdcv.cpp:115 +msgid " words" +msgstr "слова" + +#: ../src/sdcv.cpp:121 +#, c-format +msgid "Invalid command line arguments: %s\n" +msgstr "Неправильный аргумент командой строки: %s\n" + +#: ../src/sdcv.cpp:127 +#, c-format +msgid "Console version of Stardict, version %s\n" +msgstr "Консольная версия StarDict, версия %s\n" + +#: ../src/sdcv.cpp:202 +#, c-format +msgid "g_mkdir failed: %s\n" +msgstr "g_mkdir завершился с ошибкой: %s\n" + +#: ../src/sdcv.cpp:217 +msgid "Enter word or phrase: " +msgstr "Введите слово или фразу: " + +#: ../src/sdcv.cpp:225 +#, c-format +msgid "There are no words/phrases to translate.\n" +msgstr "Не задано слова/фразы для перевода.\n" + +#: ../src/sdcv.cpp:237 +#, c-format +msgid "Dictionary's name Word count\n" +msgstr "Название словаря Количество слов\n" + +#: ../src/utils.cpp:48 +#, c-format +msgid "Can not convert %s to current locale.\n" +msgstr "Не могу преобразовать %s в локальную кодировку.\n" diff --git a/po/sdcv.pot b/po/sdcv.pot new file mode 100644 index 0000000..5348adb --- /dev/null +++ b/po/sdcv.pot @@ -0,0 +1,139 @@ +# SOME DESCRIPTIVE TITLE. +# Copyright (C) YEAR Free Software Foundation, Inc. +# This file is distributed under the same license as the PACKAGE package. +# FIRST AUTHOR <EMAIL@ADDRESS>, YEAR. +# +msgid "" +msgstr "" +"Project-Id-Version: sdcv 0.5\n" +"Report-Msgid-Bugs-To: dushistov@mail.ru\n" +"POT-Creation-Date: 2017-08-16 09:52+0300\n" +"PO-Revision-Date: 2017-08-16 10:01+0300\n" +"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n" +"Language-Team: LANGUAGE <LL@li.org>\n" +"Language: \n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=CHARSET\n" +"Content-Transfer-Encoding: 8bit\n" + +#: ../src/libwrapper.cpp:300 +msgid "popen failed" +msgstr "" + +#: ../src/libwrapper.cpp:340 +#, c-format +msgid "Can not convert %s to utf8.\n" +msgstr "" + +#: ../src/libwrapper.cpp:398 ../src/libwrapper.cpp:432 +#, c-format +msgid "Found %zu items, similar to %s.\n" +msgstr "" + +#: ../src/libwrapper.cpp:416 +msgid "Your choice[-1 to abort]: " +msgstr "" + +#: ../src/libwrapper.cpp:426 +#, c-format +msgid "" +"Invalid choice.\n" +"It must be from 0 to %zu or -1.\n" +msgstr "" + +#: ../src/libwrapper.cpp:445 +#, c-format +msgid "Nothing similar to %s, sorry :(\n" +msgstr "" + +#: ../src/sdcv.cpp:88 +msgid "display version information and exit" +msgstr "" + +#: ../src/sdcv.cpp:90 +msgid "display list of available dictionaries and exit" +msgstr "" + +#: ../src/sdcv.cpp:92 +msgid "for search use only dictionary with this bookname" +msgstr "" + +#: ../src/sdcv.cpp:93 +msgid "bookname" +msgstr "" + +#: ../src/sdcv.cpp:95 +msgid "for use in scripts" +msgstr "" + +#: ../src/sdcv.cpp:97 +msgid "print the result formatted as JSON" +msgstr "" + +#: ../src/sdcv.cpp:99 +msgid "do not fuzzy-search for similar words, only return exact matches" +msgstr "" + +#: ../src/sdcv.cpp:101 +msgid "output must be in utf8" +msgstr "" + +#: ../src/sdcv.cpp:103 +msgid "input of sdcv in utf8" +msgstr "" + +#: ../src/sdcv.cpp:105 +msgid "use this directory as path to stardict data directory" +msgstr "" + +#: ../src/sdcv.cpp:106 +msgid "path/to/dir" +msgstr "" + +#: ../src/sdcv.cpp:108 +msgid "" +"only use the dictionaries in data-dir, do not search in user and system " +"directories" +msgstr "" + +#: ../src/sdcv.cpp:110 +msgid "colorize the output" +msgstr "" + +#: ../src/sdcv.cpp:115 +msgid " words" +msgstr "" + +#: ../src/sdcv.cpp:121 +#, c-format +msgid "Invalid command line arguments: %s\n" +msgstr "" + +#: ../src/sdcv.cpp:127 +#, c-format +msgid "Console version of Stardict, version %s\n" +msgstr "" + +#: ../src/sdcv.cpp:202 +#, c-format +msgid "g_mkdir failed: %s\n" +msgstr "" + +#: ../src/sdcv.cpp:217 +msgid "Enter word or phrase: " +msgstr "" + +#: ../src/sdcv.cpp:225 +#, c-format +msgid "There are no words/phrases to translate.\n" +msgstr "" + +#: ../src/sdcv.cpp:237 +#, c-format +msgid "Dictionary's name Word count\n" +msgstr "" + +#: ../src/utils.cpp:48 +#, c-format +msgid "Can not convert %s to current locale.\n" +msgstr "" diff --git a/po/sk.po b/po/sk.po new file mode 100644 index 0000000..c248cec --- /dev/null +++ b/po/sk.po @@ -0,0 +1,172 @@ +# translation of sdcv.po to Slovak +# Copyright (C) +# Zdenko Podobný <zdpo@mailbox.sk>, 2004, 2005. +# Ivan Masár <helix84@centrum.sk>, 2007. +msgid "" +msgstr "" +"Project-Id-Version: sdcv 0.3.2\n" +"Report-Msgid-Bugs-To: dushistov@mail.ru\n" +"POT-Creation-Date: 2017-08-16 09:52+0300\n" +"PO-Revision-Date: 2007-09-11 00:22+0100\n" +"Last-Translator: Ivan Masár <helix84@centrum.sk>\n" +"Language-Team: Slovak <sk-i18n@lists.linux.sk>\n" +"Language: sk\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=UTF-8\n" +"Content-Transfer-Encoding: 8bit\n" +"X-Generator: KBabel 1.9.1\n" +"Plural-Forms: nplurals=3; plural=(n%10==1 && n%100!=11 ? 0 : n%10>=2 && n" +"%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2);\n" + +#: ../src/libwrapper.cpp:300 +msgid "popen failed" +msgstr "popen zlyhalo" + +#: ../src/libwrapper.cpp:340 +#, c-format +msgid "Can not convert %s to utf8.\n" +msgstr "nie je možné konvertovať %s na utf8.\n" + +#: ../src/libwrapper.cpp:398 ../src/libwrapper.cpp:432 +#, fuzzy, c-format +msgid "Found %zu items, similar to %s.\n" +msgstr "Nájdené %d položiek, podobných %s,\n" + +#: ../src/libwrapper.cpp:416 +#, fuzzy +msgid "Your choice[-1 to abort]: " +msgstr "Vaša voľba[-1 zruší]: " + +#: ../src/libwrapper.cpp:426 +#, fuzzy, c-format +msgid "" +"Invalid choice.\n" +"It must be from 0 to %zu or -1.\n" +msgstr "" +"Neplatná voľba.\n" +"Musí byť od 0 do %d alebo -1.\n" + +#: ../src/libwrapper.cpp:445 +#, c-format +msgid "Nothing similar to %s, sorry :(\n" +msgstr "Ľutujem, nič sa nepodobá na %s :(\n" + +#: ../src/sdcv.cpp:88 +#, fuzzy +msgid "display version information and exit" +msgstr "-v, --version zobrazí informácie o verzii a skončí\n" + +#: ../src/sdcv.cpp:90 +#, fuzzy +msgid "display list of available dictionaries and exit" +msgstr "" +"-l, --list-dicts zobrazí zoznam dostupných slovníkov a skončí\n" + +#: ../src/sdcv.cpp:92 +#, fuzzy +msgid "for search use only dictionary with this bookname" +msgstr "-u, --use-dict názov použiť pre hľadanie iba zvolený slovník\n" + +#: ../src/sdcv.cpp:93 +msgid "bookname" +msgstr "" + +#: ../src/sdcv.cpp:95 +msgid "for use in scripts" +msgstr "" + +#: ../src/sdcv.cpp:97 +msgid "print the result formatted as JSON" +msgstr "" + +#: ../src/sdcv.cpp:99 +msgid "do not fuzzy-search for similar words, only return exact matches" +msgstr "" + +#: ../src/sdcv.cpp:101 +#, fuzzy +msgid "output must be in utf8" +msgstr "--utf8-output výstup musí byť v utf8\n" + +#: ../src/sdcv.cpp:103 +#, fuzzy +msgid "input of sdcv in utf8" +msgstr "--utf8-input vstup pre sdcv je v utf8\n" + +#: ../src/sdcv.cpp:105 +#, fuzzy +msgid "use this directory as path to stardict data directory" +msgstr "" +"--data-dir path/to/dir použiť tento priečinok ako cestu pre stardict " +"dátový priečinok\n" + +#: ../src/sdcv.cpp:106 +msgid "path/to/dir" +msgstr "" + +#: ../src/sdcv.cpp:108 +msgid "" +"only use the dictionaries in data-dir, do not search in user and system " +"directories" +msgstr "" + +#: ../src/sdcv.cpp:110 +msgid "colorize the output" +msgstr "" + +#: ../src/sdcv.cpp:115 +msgid " words" +msgstr "" + +#: ../src/sdcv.cpp:121 +#, c-format +msgid "Invalid command line arguments: %s\n" +msgstr "" + +#: ../src/sdcv.cpp:127 +#, c-format +msgid "Console version of Stardict, version %s\n" +msgstr "Konzolová verzia StarDict, verzia %s\n" + +#: ../src/sdcv.cpp:202 +#, c-format +msgid "g_mkdir failed: %s\n" +msgstr "g_mkdir zlyhalo: %s\n" + +#: ../src/sdcv.cpp:217 +msgid "Enter word or phrase: " +msgstr "Vložte slovo alebo frázu: " + +#: ../src/sdcv.cpp:225 +#, c-format +msgid "There are no words/phrases to translate.\n" +msgstr "Nie je čo preložiť.\n" + +#: ../src/sdcv.cpp:237 +#, c-format +msgid "Dictionary's name Word count\n" +msgstr "názov slovníka počet slov\n" + +#: ../src/utils.cpp:48 +#, c-format +msgid "Can not convert %s to current locale.\n" +msgstr "Nie je možné konvertovať %s na aktuálne locale.\n" + +#~ msgid "" +#~ "Unknown option.\n" +#~ "Try '%s --help' for more information.\n" +#~ msgstr "" +#~ "Neznáma voľba.\n" +#~ "Skúste „%s --help“.\n" + +#~ msgid "Usage: %s [OPTIONS] words\n" +#~ msgstr "Použitie: %s [VOĽBY] slová\n" + +#~ msgid "-h, --help display this help and exit\n" +#~ msgstr "-h, --help zobrazí tento text a skončí\n" + +#~ msgid "-n, --non-interactive for use in scripts\n" +#~ msgstr "-n, --non-interactive pre použitie v skriptoch\n" + +#~ msgid "There is no dictionary with this bookname: %s.\n" +#~ msgstr "Neexistuje slovník s takýmto názvom: %s.\n" diff --git a/po/uk.po b/po/uk.po new file mode 100644 index 0000000..577b640 --- /dev/null +++ b/po/uk.po @@ -0,0 +1,176 @@ +# Ukrainian translations for sdcv package +# Українські переклади для пакету sdcv. +# Copyright (C) 2004 THE sdcv'S COPYRIGHT HOLDER +# This file is distributed under the same license as the sdcv package. +# <dushistov@mail.ru>, 2004. +# +msgid "" +msgstr "" +"Project-Id-Version: sdcv 0.3\n" +"Report-Msgid-Bugs-To: dushistov@mail.ru\n" +"POT-Creation-Date: 2017-08-16 09:52+0300\n" +"PO-Revision-Date: 2004-12-14 17:54+0300\n" +"Last-Translator: <dubyk@lsl.lviv.ua>\n" +"Language-Team: Ukrainian <dubyk@lsl.lviv.ua>\n" +"Language: uk\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=UTF-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Plural-Forms: nplurals=3; plural=(n%10==1 && n%100!=11 ? 0 : n%10>=2 && n" +"%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2);\n" + +#: ../src/libwrapper.cpp:300 +msgid "popen failed" +msgstr "" + +#: ../src/libwrapper.cpp:340 +#, c-format +msgid "Can not convert %s to utf8.\n" +msgstr "Не можу перетворити %s у utf8.\n" + +#: ../src/libwrapper.cpp:398 ../src/libwrapper.cpp:432 +#, fuzzy, c-format +msgid "Found %zu items, similar to %s.\n" +msgstr "Знайдено %d слів, схожих на %s.\n" + +#: ../src/libwrapper.cpp:416 +#, fuzzy +msgid "Your choice[-1 to abort]: " +msgstr "Ваш вибір: " + +#: ../src/libwrapper.cpp:426 +#, fuzzy, c-format +msgid "" +"Invalid choice.\n" +"It must be from 0 to %zu or -1.\n" +msgstr "" +"Неправильний вибір.\n" +"Повинно бути від 0 до %d.\n" + +#: ../src/libwrapper.cpp:445 +#, c-format +msgid "Nothing similar to %s, sorry :(\n" +msgstr "Нічого схожого на %s, даруйте :(\n" + +#: ../src/sdcv.cpp:88 +#, fuzzy +msgid "display version information and exit" +msgstr "-v, --version показати номер версії і завершити роботу\n" + +#: ../src/sdcv.cpp:90 +#, fuzzy +msgid "display list of available dictionaries and exit" +msgstr "" +"-l, --list-dicts показати список доступних словників і завершити " +"роботу\n" + +#: ../src/sdcv.cpp:92 +#, fuzzy +msgid "for search use only dictionary with this bookname" +msgstr "" +"-u, --use-dict ім`я словника для пошуку використовувати лише цей словник\n" + +#: ../src/sdcv.cpp:93 +msgid "bookname" +msgstr "" + +#: ../src/sdcv.cpp:95 +msgid "for use in scripts" +msgstr "" + +#: ../src/sdcv.cpp:97 +msgid "print the result formatted as JSON" +msgstr "" + +#: ../src/sdcv.cpp:99 +msgid "do not fuzzy-search for similar words, only return exact matches" +msgstr "" + +#: ../src/sdcv.cpp:101 +#, fuzzy +msgid "output must be in utf8" +msgstr "--utf8-output вивід програми повинен бути в utf8\n" + +#: ../src/sdcv.cpp:103 +#, fuzzy +msgid "input of sdcv in utf8" +msgstr "--utf8-input ввід програми в utf8\n" + +#: ../src/sdcv.cpp:105 +#, fuzzy +msgid "use this directory as path to stardict data directory" +msgstr "" +"--data-dir тека використовувати цю теку як шлях до stardict data " +"directory\n" + +#: ../src/sdcv.cpp:106 +msgid "path/to/dir" +msgstr "" + +#: ../src/sdcv.cpp:108 +msgid "" +"only use the dictionaries in data-dir, do not search in user and system " +"directories" +msgstr "" + +#: ../src/sdcv.cpp:110 +msgid "colorize the output" +msgstr "" + +#: ../src/sdcv.cpp:115 +msgid " words" +msgstr "" + +#: ../src/sdcv.cpp:121 +#, c-format +msgid "Invalid command line arguments: %s\n" +msgstr "" + +#: ../src/sdcv.cpp:127 +#, c-format +msgid "Console version of Stardict, version %s\n" +msgstr "Консольна версія Зоряного словника [Stardict], номер версії %s\n" + +#: ../src/sdcv.cpp:202 +#, c-format +msgid "g_mkdir failed: %s\n" +msgstr "" + +#: ../src/sdcv.cpp:217 +msgid "Enter word or phrase: " +msgstr "ВведЁть слово або фразу: " + +#: ../src/sdcv.cpp:225 +#, c-format +msgid "There are no words/phrases to translate.\n" +msgstr "Не задано слова/фрази для перекладу.\n" + +#: ../src/sdcv.cpp:237 +#, fuzzy, c-format +msgid "Dictionary's name Word count\n" +msgstr "назва словника кількість слів\n" + +#: ../src/utils.cpp:48 +#, c-format +msgid "Can not convert %s to current locale.\n" +msgstr "Не можу перетворити %s у локальне кодування.\n" + +#~ msgid "" +#~ "Unknown option.\n" +#~ "Try '%s --help' for more information.\n" +#~ msgstr "" +#~ "НевЁдома опцЁя.\n" +#~ "Спробуйте '%s --help' для отримання докладнішої інформації.\n" + +#~ msgid "Usage: %s [OPTIONS] words\n" +#~ msgstr "Використання: %s [OPTIONS] слова\n" + +#~ msgid "-h, --help display this help and exit\n" +#~ msgstr "" +#~ "-h, --help показати це повідомлення і завершити роботу\n" + +#~ msgid "-n, --non-interactive for use in scripts\n" +#~ msgstr "-n, --non-interactive для використання у 'скриптах'\n" + +#~ msgid "There is no dictionary with this bookname: %s.\n" +#~ msgstr "Словника з таким іменем не існує: %s.\n" diff --git a/po/zh_CN.po b/po/zh_CN.po new file mode 100644 index 0000000..98db0a9 --- /dev/null +++ b/po/zh_CN.po @@ -0,0 +1,166 @@ +# Simplified Chinese translation for sdcv +# Copyright (C) 2005 Free Software Foundation, Inc. +# Cai Qian <caiqian@gnome.org>, 2005. +# +msgid "" +msgstr "" +"Project-Id-Version: sdcv 0.3\n" +"Report-Msgid-Bugs-To: dushistov@mail.ru\n" +"POT-Creation-Date: 2017-08-16 09:52+0300\n" +"PO-Revision-Date: 2005-1-17 00:58+0800\n" +"Last-Translator: Cai Qian <caiqian@gnome.org>\n" +"Language-Team: Simplified Chinese\n" +"Language: \n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=UTF-8\n" +"Content-Transfer-Encoding: 8bit\n" + +#: ../src/libwrapper.cpp:300 +msgid "popen failed" +msgstr "" + +#: ../src/libwrapper.cpp:340 +#, c-format +msgid "Can not convert %s to utf8.\n" +msgstr "无法将 %s 转换为 UTF-8。\n" + +#: ../src/libwrapper.cpp:398 ../src/libwrapper.cpp:432 +#, fuzzy, c-format +msgid "Found %zu items, similar to %s.\n" +msgstr "发现 %d 条记录和 %s 相似。\n" + +#: ../src/libwrapper.cpp:416 +#, fuzzy +msgid "Your choice[-1 to abort]: " +msgstr "您的选择为:" + +#: ../src/libwrapper.cpp:426 +#, fuzzy, c-format +msgid "" +"Invalid choice.\n" +"It must be from 0 to %zu or -1.\n" +msgstr "" +"无效的选择。\n" +"必须是 0 到 %d。\n" + +#: ../src/libwrapper.cpp:445 +#, c-format +msgid "Nothing similar to %s, sorry :(\n" +msgstr "对不起,没有发现和 %s 相似的 :(\n" + +#: ../src/sdcv.cpp:88 +#, fuzzy +msgid "display version information and exit" +msgstr "-v, --version 显示版本信息并退出\n" + +#: ../src/sdcv.cpp:90 +#, fuzzy +msgid "display list of available dictionaries and exit" +msgstr "-l, --list-dicts 显示可用的字典列表并退出\n" + +#: ../src/sdcv.cpp:92 +#, fuzzy +msgid "for search use only dictionary with this bookname" +msgstr "-u, --use-dict 字典名 只使用指定的字典进行单词搜索\n" + +#: ../src/sdcv.cpp:93 +msgid "bookname" +msgstr "" + +#: ../src/sdcv.cpp:95 +msgid "for use in scripts" +msgstr "" + +#: ../src/sdcv.cpp:97 +msgid "print the result formatted as JSON" +msgstr "" + +#: ../src/sdcv.cpp:99 +msgid "do not fuzzy-search for similar words, only return exact matches" +msgstr "" + +#: ../src/sdcv.cpp:101 +#, fuzzy +msgid "output must be in utf8" +msgstr "--utf8-output 输出必须是 UTF-8\n" + +#: ../src/sdcv.cpp:103 +#, fuzzy +msgid "input of sdcv in utf8" +msgstr "--utf8-input sdcv 的输入为 UTF-8\n" + +#: ../src/sdcv.cpp:105 +#, fuzzy +msgid "use this directory as path to stardict data directory" +msgstr "--data-dir 目录路径 指定 Stardict 数据所在目录的路径\n" + +#: ../src/sdcv.cpp:106 +msgid "path/to/dir" +msgstr "" + +#: ../src/sdcv.cpp:108 +msgid "" +"only use the dictionaries in data-dir, do not search in user and system " +"directories" +msgstr "" + +#: ../src/sdcv.cpp:110 +msgid "colorize the output" +msgstr "" + +#: ../src/sdcv.cpp:115 +msgid " words" +msgstr "" + +#: ../src/sdcv.cpp:121 +#, c-format +msgid "Invalid command line arguments: %s\n" +msgstr "" + +#: ../src/sdcv.cpp:127 +#, c-format +msgid "Console version of Stardict, version %s\n" +msgstr "Stardict 的控制台版本,版本为 %s\n" + +#: ../src/sdcv.cpp:202 +#, c-format +msgid "g_mkdir failed: %s\n" +msgstr "" + +#: ../src/sdcv.cpp:217 +msgid "Enter word or phrase: " +msgstr "请输入单词或短语:" + +#: ../src/sdcv.cpp:225 +#, c-format +msgid "There are no words/phrases to translate.\n" +msgstr "没有供翻译的单词或短语。\n" + +#: ../src/sdcv.cpp:237 +#, fuzzy, c-format +msgid "Dictionary's name Word count\n" +msgstr "字典名 单词量\n" + +#: ../src/utils.cpp:48 +#, c-format +msgid "Can not convert %s to current locale.\n" +msgstr "无法将 %s 转换为当前 Locale。\n" + +#~ msgid "" +#~ "Unknown option.\n" +#~ "Try '%s --help' for more information.\n" +#~ msgstr "" +#~ "未知选项。\n" +#~ "更多信息请看 '%s --help'。\n" + +#~ msgid "Usage: %s [OPTIONS] words\n" +#~ msgstr "用法:%s [选项] 单词\n" + +#~ msgid "-h, --help display this help and exit\n" +#~ msgstr "-h, --help 显示本帮助并退出\n" + +#~ msgid "-n, --non-interactive for use in scripts\n" +#~ msgstr "-n, --non-interactive 在脚本中使用\n" + +#~ msgid "There is no dictionary with this bookname: %s.\n" +#~ msgstr "没有与该字典名 ‘%s’ 相同的字典。\n" diff --git a/po/zh_TW.po b/po/zh_TW.po new file mode 100644 index 0000000..a722c78 --- /dev/null +++ b/po/zh_TW.po @@ -0,0 +1,165 @@ +# Traditional Chinese Messages for sdcv +# Copyright (C) 2005 Free Software Foundation, Inc. +# This file is distributed under the same license as the sdcv package. +# Cai Qian <caiqian@gnome.org>, 2005 +# Wei-Lun Chao <bluebat@member.fsf.org>, 2005, 2013. +# +msgid "" +msgstr "" +"Project-Id-Version: sdcv 0.4.2\n" +"Report-Msgid-Bugs-To: dushistov@mail.ru\n" +"POT-Creation-Date: 2017-08-16 09:52+0300\n" +"PO-Revision-Date: 2013-06-12 14:11+0800\n" +"Last-Translator: Wei-Lun Chao <bluebat@member.fsf.org>\n" +"Language-Team: Chinese (traditional) <zh-l10n@linux.org.tw>\n" +"Language: zh_TW\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=UTF-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Plural-Forms: nplurals=1; plural=0;\n" + +#: ../src/libwrapper.cpp:300 +msgid "popen failed" +msgstr "popen 失敗" + +#: ../src/libwrapper.cpp:340 +#, c-format +msgid "Can not convert %s to utf8.\n" +msgstr "無法將 %s 轉換為 UTF-8。\n" + +#: ../src/libwrapper.cpp:398 ../src/libwrapper.cpp:432 +#, fuzzy, c-format +msgid "Found %zu items, similar to %s.\n" +msgstr "找到 %d 項紀錄和 %s 相似。\n" + +#: ../src/libwrapper.cpp:416 +msgid "Your choice[-1 to abort]: " +msgstr "您的選擇是[-1 表示放棄]:" + +#: ../src/libwrapper.cpp:426 +#, fuzzy, c-format +msgid "" +"Invalid choice.\n" +"It must be from 0 to %zu or -1.\n" +msgstr "" +"無效的選擇。\n" +"必須是 0 到 %d 之間或 -1。\n" + +#: ../src/libwrapper.cpp:445 +#, c-format +msgid "Nothing similar to %s, sorry :(\n" +msgstr "抱歉,沒有和 %s 相似者 :(\n" + +#: ../src/sdcv.cpp:88 +#, fuzzy +msgid "display version information and exit" +msgstr "-v, --version 顯示版本資訊並離開\n" + +#: ../src/sdcv.cpp:90 +#, fuzzy +msgid "display list of available dictionaries and exit" +msgstr "-l, --list-dicts 顯示可用的字典清單並離開\n" + +#: ../src/sdcv.cpp:92 +#, fuzzy +msgid "for search use only dictionary with this bookname" +msgstr "-u, --use-dict 字典名 只使用指定的字典進行單字搜尋\n" + +#: ../src/sdcv.cpp:93 +msgid "bookname" +msgstr "" + +#: ../src/sdcv.cpp:95 +msgid "for use in scripts" +msgstr "" + +#: ../src/sdcv.cpp:97 +msgid "print the result formatted as JSON" +msgstr "" + +#: ../src/sdcv.cpp:99 +msgid "do not fuzzy-search for similar words, only return exact matches" +msgstr "" + +#: ../src/sdcv.cpp:101 +#, fuzzy +msgid "output must be in utf8" +msgstr "--utf8-output 輸出必須是 UTF-8\n" + +#: ../src/sdcv.cpp:103 +#, fuzzy +msgid "input of sdcv in utf8" +msgstr "--utf8-input sdcv 的輸入為 UTF-8\n" + +#: ../src/sdcv.cpp:105 +#, fuzzy +msgid "use this directory as path to stardict data directory" +msgstr "--data-dir 目錄路徑 指定 Stardict 資料所在目錄的路徑\n" + +#: ../src/sdcv.cpp:106 +msgid "path/to/dir" +msgstr "" + +#: ../src/sdcv.cpp:108 +msgid "" +"only use the dictionaries in data-dir, do not search in user and system " +"directories" +msgstr "" + +#: ../src/sdcv.cpp:110 +msgid "colorize the output" +msgstr "" + +#: ../src/sdcv.cpp:115 +msgid " words" +msgstr "" + +#: ../src/sdcv.cpp:121 +#, c-format +msgid "Invalid command line arguments: %s\n" +msgstr "" + +#: ../src/sdcv.cpp:127 +#, c-format +msgid "Console version of Stardict, version %s\n" +msgstr "Stardict 的主控臺版本,版本為 %s\n" + +#: ../src/sdcv.cpp:202 +#, c-format +msgid "g_mkdir failed: %s\n" +msgstr "g_mkdir 失敗:%s\n" + +#: ../src/sdcv.cpp:217 +msgid "Enter word or phrase: " +msgstr "請輸入單字或片語:" + +#: ../src/sdcv.cpp:225 +#, c-format +msgid "There are no words/phrases to translate.\n" +msgstr "沒有可供翻譯的單字或片語。\n" + +#: ../src/sdcv.cpp:237 +#, c-format +msgid "Dictionary's name Word count\n" +msgstr "字典名稱 單字數量\n" + +#: ../src/utils.cpp:48 +#, fuzzy, c-format +msgid "Can not convert %s to current locale.\n" +msgstr "無法將 %s 轉換為 UTF-8。\n" + +#~ msgid "" +#~ "Unknown option.\n" +#~ "Try '%s --help' for more information.\n" +#~ msgstr "" +#~ "不明選項。\n" +#~ "更多資訊請看 '%s --help'。\n" + +#~ msgid "Usage: %s [OPTIONS] words\n" +#~ msgstr "用法:%s [選項] 單字…\n" + +#~ msgid "-h, --help display this help and exit\n" +#~ msgstr "-h, --help 顯示本輔助並離開\n" + +#~ msgid "-n, --non-interactive for use in scripts\n" +#~ msgstr "-n, --non-interactive 在指令稿中使用\n" diff --git a/src/dictziplib.cpp b/src/dictziplib.cpp new file mode 100644 index 0000000..e8716bb --- /dev/null +++ b/src/dictziplib.cpp @@ -0,0 +1,479 @@ +/* dictziplib.c -- + * http://stardict.sourceforge.net + * Copyright (C) 2003-2003 Hu Zheng <huzheng_001@163.com> + * This file is a modify version of dictd-1.9.7's data.c + * + * data.c -- + * Created: Tue Jul 16 12:45:41 1996 by faith@dict.org + * Revised: Sat Mar 30 10:46:06 2002 by faith@dict.org + * Copyright 1996, 1997, 1998, 2000, 2002 Rickard E. Faith (faith@dict.org) + * + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Library General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +//#define HAVE_MMAP //it will defined in config.h. this can be done by configure.in with a AC_FUNC_MMAP. +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include <cassert> +#include <cstdio> +#include <cstdlib> +#include <cstring> +#include <fcntl.h> +#include <limits.h> +#include <unistd.h> + +#include <sys/stat.h> + +#include "dictziplib.hpp" + +#define USE_CACHE 1 + +#define BUFFERSIZE 10240 + +/* + * Output buffer must be greater than or + * equal to 110% of input buffer size, plus + * 12 bytes. +*/ +#define OUT_BUFFER_SIZE 0xffffL + +#define IN_BUFFER_SIZE ((unsigned long)((double)(OUT_BUFFER_SIZE - 12) * 0.89)) + +/* For gzip-compatible header, as defined in RFC 1952 */ + +/* Magic for GZIP (rfc1952) */ +#define GZ_MAGIC1 0x1f /* First magic byte */ +#define GZ_MAGIC2 0x8b /* Second magic byte */ + +/* FLaGs (bitmapped), from rfc1952 */ +#define GZ_FTEXT 0x01 /* Set for ASCII text */ +#define GZ_FHCRC 0x02 /* Header CRC16 */ +#define GZ_FEXTRA 0x04 /* Optional field (random access index) */ +#define GZ_FNAME 0x08 /* Original name */ +#define GZ_COMMENT 0x10 /* Zero-terminated, human-readable comment */ +#define GZ_MAX 2 /* Maximum compression */ +#define GZ_FAST 4 /* Fasted compression */ + +/* These are from rfc1952 */ +#define GZ_OS_FAT 0 /* FAT filesystem (MS-DOS, OS/2, NT/Win32) */ +#define GZ_OS_AMIGA 1 /* Amiga */ +#define GZ_OS_VMS 2 /* VMS (or OpenVMS) */ +#define GZ_OS_UNIX 3 /* Unix */ +#define GZ_OS_VMCMS 4 /* VM/CMS */ +#define GZ_OS_ATARI 5 /* Atari TOS */ +#define GZ_OS_HPFS 6 /* HPFS filesystem (OS/2, NT) */ +#define GZ_OS_MAC 7 /* Macintosh */ +#define GZ_OS_Z 8 /* Z-System */ +#define GZ_OS_CPM 9 /* CP/M */ +#define GZ_OS_TOPS20 10 /* TOPS-20 */ +#define GZ_OS_NTFS 11 /* NTFS filesystem (NT) */ +#define GZ_OS_QDOS 12 /* QDOS */ +#define GZ_OS_ACORN 13 /* Acorn RISCOS */ +#define GZ_OS_UNKNOWN 255 /* unknown */ + +#define GZ_RND_S1 'R' /* First magic for random access format */ +#define GZ_RND_S2 'A' /* Second magic for random access format */ + +#define GZ_ID1 0 /* GZ_MAGIC1 */ +#define GZ_ID2 1 /* GZ_MAGIC2 */ +#define GZ_CM 2 /* Compression Method (Z_DEFALTED) */ +#define GZ_FLG 3 /* FLaGs (see above) */ +#define GZ_MTIME 4 /* Modification TIME */ +#define GZ_XFL 8 /* eXtra FLags (GZ_MAX or GZ_FAST) */ +#define GZ_OS 9 /* Operating System */ +#define GZ_XLEN 10 /* eXtra LENgth (16bit) */ +#define GZ_FEXTRA_START 12 /* Start of extra fields */ +#define GZ_SI1 12 /* Subfield ID1 */ +#define GZ_SI2 13 /* Subfield ID2 */ +#define GZ_SUBLEN 14 /* Subfield length (16bit) */ +#define GZ_VERSION 16 /* Version for subfield format */ +#define GZ_CHUNKLEN 18 /* Chunk length (16bit) */ +#define GZ_CHUNKCNT 20 /* Number of chunks (16bit) */ +#define GZ_RNDDATA 22 /* Random access data (16bit) */ + +#define DICT_UNKNOWN 0 +#define DICT_TEXT 1 +#define DICT_GZIP 2 +#define DICT_DZIP 3 + +int DictData::read_header(const std::string &fname, int computeCRC) +{ + FILE *str; + int id1, id2, si1, si2; + char buffer[BUFFERSIZE]; + int extraLength, subLength; + int i; + char *pt; + int c; + struct stat sb; + unsigned long crc = crc32(0L, Z_NULL, 0); + int count; + unsigned long offset; + + if (!(str = fopen(fname.c_str(), "rb"))) { + //err_fatal_errno( __FUNCTION__, + // "Cannot open data file \"%s\" for read\n", filename ); + return -1; + } + + this->headerLength = GZ_XLEN - 1; + this->type = DICT_UNKNOWN; + + id1 = getc(str); + id2 = getc(str); + + if (id1 != GZ_MAGIC1 || id2 != GZ_MAGIC2) { + this->type = DICT_TEXT; + fstat(fileno(str), &sb); + this->compressedLength = this->length = sb.st_size; + this->origFilename = fname; + this->mtime = sb.st_mtime; + if (computeCRC) { + rewind(str); + while (!feof(str)) { + if ((count = fread(buffer, 1, BUFFERSIZE, str))) { + crc = crc32(crc, (Bytef *)buffer, count); + } + } + } + this->crc = crc; + fclose(str); + return 0; + } + this->type = DICT_GZIP; + + this->method = getc(str); + this->flags = getc(str); + this->mtime = getc(str) << 0; + this->mtime |= getc(str) << 8; + this->mtime |= getc(str) << 16; + this->mtime |= getc(str) << 24; + this->extraFlags = getc(str); + this->os = getc(str); + + if (this->flags & GZ_FEXTRA) { + extraLength = getc(str) << 0; + extraLength |= getc(str) << 8; + this->headerLength += extraLength + 2; + si1 = getc(str); + si2 = getc(str); + + if (si1 == GZ_RND_S1 || si2 == GZ_RND_S2) { + subLength = getc(str) << 0; + subLength |= getc(str) << 8; + this->version = getc(str) << 0; + this->version |= getc(str) << 8; + + if (this->version != 1) { + //err_internal( __FUNCTION__, + // "dzip header version %d not supported\n", + // this->version ); + } + + this->chunkLength = getc(str) << 0; + this->chunkLength |= getc(str) << 8; + this->chunkCount = getc(str) << 0; + this->chunkCount |= getc(str) << 8; + + if (this->chunkCount <= 0) { + fclose(str); + return 5; + } + this->chunks = (int *)malloc(sizeof(this->chunks[0]) + * this->chunkCount); + for (i = 0; i < this->chunkCount; i++) { + this->chunks[i] = getc(str) << 0; + this->chunks[i] |= getc(str) << 8; + } + this->type = DICT_DZIP; + } else { + fseek(str, this->headerLength, SEEK_SET); + } + } + + if (this->flags & GZ_FNAME) { /* FIXME! Add checking against header len */ + pt = buffer; + while ((c = getc(str)) && c != EOF) + *pt++ = c; + *pt = '\0'; + + this->origFilename = buffer; + this->headerLength += this->origFilename.length() + 1; + } else { + this->origFilename = ""; + } + + if (this->flags & GZ_COMMENT) { /* FIXME! Add checking for header len */ + pt = buffer; + while ((c = getc(str)) && c != EOF) + *pt++ = c; + *pt = '\0'; + comment = buffer; + headerLength += comment.length() + 1; + } else { + comment = ""; + } + + if (this->flags & GZ_FHCRC) { + getc(str); + getc(str); + this->headerLength += 2; + } + + if (ftell(str) != this->headerLength + 1) { + //err_internal( __FUNCTION__, + // "File position (%lu) != header length + 1 (%d)\n", + // ftell( str ), this->headerLength + 1 ); + } + + fseek(str, -8, SEEK_END); + this->crc = getc(str) << 0; + this->crc |= getc(str) << 8; + this->crc |= getc(str) << 16; + this->crc |= getc(str) << 24; + this->length = getc(str) << 0; + this->length |= getc(str) << 8; + this->length |= getc(str) << 16; + this->length |= getc(str) << 24; + this->compressedLength = ftell(str); + + /* Compute offsets */ + this->offsets = (unsigned long *)malloc(sizeof(this->offsets[0]) + * this->chunkCount); + for (offset = this->headerLength + 1, i = 0; + i < this->chunkCount; + i++) { + this->offsets[i] = offset; + offset += this->chunks[i]; + } + + fclose(str); + return 0; +} + +bool DictData::open(const std::string &fname, int computeCRC) +{ + struct stat sb; + int fd; + + this->initialized = 0; + + if (stat(fname.c_str(), &sb) || !S_ISREG(sb.st_mode)) { + //err_warning( __FUNCTION__, + // "%s is not a regular file -- ignoring\n", fname ); + return false; + } + + if (read_header(fname, computeCRC)) { + //err_fatal( __FUNCTION__, + // "\"%s\" not in text or dzip format\n", fname ); + return false; + } + + if ((fd = ::open(fname.c_str(), O_RDONLY)) < 0) { + //err_fatal_errno( __FUNCTION__, + // "Cannot open data file \"%s\"\n", fname ); + return false; + } + if (fstat(fd, &sb)) { + //err_fatal_errno( __FUNCTION__, + // "Cannot stat data file \"%s\"\n", fname ); + return false; + } + + this->size = sb.st_size; + ::close(fd); + if (!mapfile.open(fname.c_str(), size)) + return false; + + this->start = mapfile.begin(); + this->end = this->start + this->size; + + for (size_t j = 0; j < DICT_CACHE_SIZE; j++) { + cache[j].chunk = -1; + cache[j].stamp = -1; + cache[j].inBuffer = nullptr; + cache[j].count = 0; + } + + return true; +} + +void DictData::close() +{ + if (this->chunks) + free(this->chunks); + if (this->offsets) + free(this->offsets); + + if (this->initialized) { + if (inflateEnd(&this->zStream)) { + //err_internal( __FUNCTION__, + // "Cannot shut down inflation engine: %s\n", + // this->zStream.msg ); + } + } + + for (size_t i = 0; i < DICT_CACHE_SIZE; ++i) { + if (this->cache[i].inBuffer) + free(this->cache[i].inBuffer); + } +} + +void DictData::read(char *buffer, unsigned long start, unsigned long size) +{ + char *pt; + unsigned long end; + int count; + char *inBuffer; + char outBuffer[OUT_BUFFER_SIZE]; + int firstChunk, lastChunk; + int firstOffset, lastOffset; + int i; + int found, target, lastStamp; + static int stamp = 0; + + end = start + size; + + //buffer = malloc( size + 1 ); + + //PRINTF(DBG_UNZIP, + // ("dict_data_read( %p, %lu, %lu )\n", + //h, start, size )); + + switch (this->type) { + case DICT_GZIP: + //err_fatal( __FUNCTION__, + // "Cannot seek on pure gzip format files.\n" + // "Use plain text (for performance)" + // " or dzip format (for space savings).\n" ); + break; + case DICT_TEXT: + memcpy(buffer, this->start + start, size); + //buffer[size] = '\0'; + break; + case DICT_DZIP: + if (!this->initialized) { + ++this->initialized; + this->zStream.zalloc = nullptr; + this->zStream.zfree = nullptr; + this->zStream.opaque = nullptr; + this->zStream.next_in = 0; + this->zStream.avail_in = 0; + this->zStream.next_out = nullptr; + this->zStream.avail_out = 0; + if (inflateInit2(&this->zStream, -15) != Z_OK) { + //err_internal( __FUNCTION__, + // "Cannot initialize inflation engine: %s\n", + //this->zStream.msg ); + } + } + firstChunk = start / this->chunkLength; + firstOffset = start - firstChunk * this->chunkLength; + lastChunk = end / this->chunkLength; + lastOffset = end - lastChunk * this->chunkLength; + //PRINTF(DBG_UNZIP, + // (" start = %lu, end = %lu\n" + //"firstChunk = %d, firstOffset = %d," + //" lastChunk = %d, lastOffset = %d\n", + //start, end, firstChunk, firstOffset, lastChunk, lastOffset )); + for (pt = buffer, i = firstChunk; i <= lastChunk; i++) { + + /* Access cache */ + found = 0; + target = 0; + lastStamp = INT_MAX; + for (size_t j = 0; j < DICT_CACHE_SIZE; j++) { +#if USE_CACHE + if (this->cache[j].chunk == i) { + found = 1; + target = j; + break; + } +#endif + if (this->cache[j].stamp < lastStamp) { + lastStamp = this->cache[j].stamp; + target = j; + } + } + + this->cache[target].stamp = ++stamp; + if (found) { + count = this->cache[target].count; + inBuffer = this->cache[target].inBuffer; + } else { + this->cache[target].chunk = i; + if (!this->cache[target].inBuffer) + this->cache[target].inBuffer = (char *)malloc(IN_BUFFER_SIZE); + inBuffer = this->cache[target].inBuffer; + + if (this->chunks[i] >= OUT_BUFFER_SIZE) { + //err_internal( __FUNCTION__, + // "this->chunks[%d] = %d >= %ld (OUT_BUFFER_SIZE)\n", + // i, this->chunks[i], OUT_BUFFER_SIZE ); + } + memcpy(outBuffer, this->start + this->offsets[i], this->chunks[i]); + + this->zStream.next_in = (Bytef *)outBuffer; + this->zStream.avail_in = this->chunks[i]; + this->zStream.next_out = (Bytef *)inBuffer; + this->zStream.avail_out = IN_BUFFER_SIZE; + if (inflate(&this->zStream, Z_PARTIAL_FLUSH) != Z_OK) { + //err_fatal( __FUNCTION__, "inflate: %s\n", this->zStream.msg ); + } + if (this->zStream.avail_in) { + //err_internal( __FUNCTION__, + // "inflate did not flush (%d pending, %d avail)\n", + // this->zStream.avail_in, this->zStream.avail_out ); + } + + count = IN_BUFFER_SIZE - this->zStream.avail_out; + + this->cache[target].count = count; + } + + if (i == firstChunk) { + if (i == lastChunk) { + memcpy(pt, inBuffer + firstOffset, lastOffset - firstOffset); + pt += lastOffset - firstOffset; + } else { + if (count != this->chunkLength) { + //err_internal( __FUNCTION__, + // "Length = %d instead of %d\n", + //count, this->chunkLength ); + } + memcpy(pt, inBuffer + firstOffset, + this->chunkLength - firstOffset); + pt += this->chunkLength - firstOffset; + } + } else if (i == lastChunk) { + memcpy(pt, inBuffer, lastOffset); + pt += lastOffset; + } else { + assert(count == this->chunkLength); + memcpy(pt, inBuffer, this->chunkLength); + pt += this->chunkLength; + } + } + //*pt = '\0'; + break; + case DICT_UNKNOWN: + //err_fatal( __FUNCTION__, "Cannot read unknown file type\n" ); + break; + } +} diff --git a/src/dictziplib.hpp b/src/dictziplib.hpp new file mode 100644 index 0000000..859c295 --- /dev/null +++ b/src/dictziplib.hpp @@ -0,0 +1,56 @@ +#pragma once + +#include <ctime> +#include <string> +#include <zlib.h> + +#include "mapfile.hpp" + +struct DictCache { + int chunk; + char *inBuffer; + int stamp; + int count; +}; + +class DictData +{ +public: + static const size_t DICT_CACHE_SIZE = 5; + + DictData() {} + ~DictData() { close(); } + bool open(const std::string &filename, int computeCRC); + void close(); + void read(char *buffer, unsigned long start, unsigned long size); + +private: + const char *start; /* start of mmap'd area */ + const char *end; /* end of mmap'd area */ + unsigned long size; /* size of mmap */ + + int type; + z_stream zStream; + int initialized; + + int headerLength; + int method; + int flags; + time_t mtime; + int extraFlags; + int os; + int version; + int chunkLength; + int chunkCount; + int *chunks; + unsigned long *offsets; /* Sum-scan of chunks. */ + std::string origFilename; + std::string comment; + unsigned long crc; + unsigned long length; + unsigned long compressedLength; + DictCache cache[DICT_CACHE_SIZE]; + MapFile mapfile; + + int read_header(const std::string &filename, int computeCRC); +}; diff --git a/src/distance.cpp b/src/distance.cpp new file mode 100644 index 0000000..8e5b553 --- /dev/null +++ b/src/distance.cpp @@ -0,0 +1,145 @@ +/* + writer : Opera Wang + E-Mail : wangvisual AT sohu DOT com + License: GPL +*/ + +/* filename: distance.cc */ +/* +http://www.merriampark.com/ld.htm +What is Levenshtein Distance? + +Levenshtein distance (LD) is a measure of the similarity between two strings, +which we will refer to as the source string (s) and the target string (t). +The distance is the number of deletions, insertions, or substitutions required + to transform s into t. For example, + + * If s is "test" and t is "test", then LD(s,t) = 0, because no transformations are needed. + The strings are already identical. + * If s is "test" and t is "tent", then LD(s,t) = 1, because one substitution + (change "s" to "n") is sufficient to transform s into t. + +The greater the Levenshtein distance, the more different the strings are. + +Levenshtein distance is named after the Russian scientist Vladimir Levenshtein, + who devised the algorithm in 1965. If you can't spell or pronounce Levenshtein, + the metric is also sometimes called edit distance. + +The Levenshtein distance algorithm has been used in: + + * Spell checking + * Speech recognition + * DNA analysis + * Plagiarism detection +*/ + +#include <cstdlib> +#include <cstring> + +#include "distance.hpp" + +/* +Cover transposition, in addition to deletion, +insertion and substitution. This step is taken from: +Berghel, Hal ; Roach, David : "An Extension of Ukkonen's +Enhanced Dynamic Programming ASM Algorithm" +(http://www.acm.org/~hlb/publications/asm/asm.html) +*/ +#define COVER_TRANSPOSITION + +/****************************************/ +/*Implementation of Levenshtein distance*/ +/****************************************/ + +/*Gets the minimum of three values */ +static inline int minimum(const int a, const int b, const int c) +{ + int min = a; + if (b < min) + min = b; + if (c < min) + min = c; + return min; +} + +int EditDistance::CalEditDistance(const gunichar *s, const gunichar *t, const int limit) +/*Compute levenshtein distance between s and t, this is using QUICK algorithm*/ +{ + int n = 0, m = 0, iLenDif, k, i, j, cost; + // Remove leftmost matching portion of strings + while (*s && (*s == *t)) { + s++; + t++; + } + + while (s[n]) { + n++; + } + while (t[m]) { + m++; + } + + // Remove rightmost matching portion of strings by decrement n and m. + while (n && m && (*(s + n - 1) == *(t + m - 1))) { + n--; + m--; + } + if (m == 0 || n == 0 || d == nullptr) + return (m + n); + if (m < n) { + const gunichar *temp = s; + int itemp = n; + s = t; + t = temp; + n = m; + m = itemp; + } + iLenDif = m - n; + if (iLenDif >= limit) + return iLenDif; + // step 1 + n++; + m++; + // d=(int*)malloc(sizeof(int)*m*n); + if (m * n > currentelements) { + currentelements = m * n * 2; // double the request + d = static_cast<int *>(realloc(d, sizeof(int) * currentelements)); + if (nullptr == d) + return (m + n); + } + // step 2, init matrix + for (k = 0; k < n; k++) + d[k] = k; + for (k = 1; k < m; k++) + d[k * n] = k; + // step 3 + for (i = 1; i < n; i++) { + // first calculate column, d(i,j) + for (j = 1; j < iLenDif + i; j++) { + cost = s[i - 1] == t[j - 1] ? 0 : 1; + d[j * n + i] = minimum(d[(j - 1) * n + i] + 1, d[j * n + i - 1] + 1, d[(j - 1) * n + i - 1] + cost); +#ifdef COVER_TRANSPOSITION + if (i >= 2 && j >= 2 && (d[j * n + i] - d[(j - 2) * n + i - 2] == 2) + && (s[i - 2] == t[j - 1]) && (s[i - 1] == t[j - 2])) + d[j * n + i]--; +#endif + } + // second calculate row, d(k,j) + // now j==iLenDif+i; + for (k = 1; k <= i; k++) { + cost = s[k - 1] == t[j - 1] ? 0 : 1; + d[j * n + k] = minimum(d[(j - 1) * n + k] + 1, d[j * n + k - 1] + 1, d[(j - 1) * n + k - 1] + cost); +#ifdef COVER_TRANSPOSITION + if (k >= 2 && j >= 2 && (d[j * n + k] - d[(j - 2) * n + k - 2] == 2) + && (s[k - 2] == t[j - 1]) && (s[k - 1] == t[j - 2])) + d[j * n + k]--; +#endif + } + // test if d(i,j) limit gets equal or exceed + if (d[j * n + i] >= limit) { + return d[j * n + i]; + } + } + // d(n-1,m-1) + return d[n * m - 1]; +} diff --git a/src/distance.hpp b/src/distance.hpp new file mode 100644 index 0000000..d472d5b --- /dev/null +++ b/src/distance.hpp @@ -0,0 +1,26 @@ +#pragma once + +#include <cstdlib> +#include <glib.h> + +class EditDistance +{ +public: + EditDistance() + { + currentelements = 2500; // It's enough for most conditions :-) + d = static_cast<int *>(malloc(sizeof(int) * currentelements)); + } + ~EditDistance() + { + if (d != nullptr) + free(d); + } + EditDistance(const EditDistance &) = delete; + EditDistance &operator=(const EditDistance &) = delete; + int CalEditDistance(const gunichar *s, const gunichar *t, const int limit); + +private: + int *d; + int currentelements; +}; diff --git a/src/libwrapper.cpp b/src/libwrapper.cpp new file mode 100644 index 0000000..1544b18 --- /dev/null +++ b/src/libwrapper.cpp @@ -0,0 +1,452 @@ +/* + * This file part of sdcv - console version of Stardict program + * http://sdcv.sourceforge.net + * Copyright (C) 2005-2006 Evgeniy <dushistov@mail.ru> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Library General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include <cstring> +#include <map> +#include <memory> + +#include <glib/gi18n.h> + +#include "utils.hpp" + +#include "libwrapper.hpp" + +static const char ESC_BLUE[] = "\033[0;34m"; +static const char ESC_END[] = "\033[0m"; +static const char ESC_BOLD[] = "\033[1m"; +static const char ESC_ITALIC[] = "\033[3m"; +static const char ESC_LIGHT_GRAY[] = "\033[0;37m"; +static const char ESC_GREEN[] = "\033[0;32m"; + +static const char *SEARCH_TERM_VISFMT = ESC_BOLD; +static const char *NAME_OF_DICT_VISFMT = ESC_BLUE; +static const char *TRANSCRIPTION_VISFMT = ESC_BOLD; +static const char *EXAMPLE_VISFMT = ESC_LIGHT_GRAY; +static const char *KREF_VISFMT = ESC_BOLD; +static const char *ABR_VISFMT = ESC_GREEN; + +static std::string xdxf2text(const char *p, bool colorize_output) +{ + std::string res; + for (; *p; ++p) { + if (*p != '<') { + if (g_str_has_prefix(p, ">")) { + res += ">"; + p += 3; + } else if (g_str_has_prefix(p, "<")) { + res += "<"; + p += 3; + } else if (g_str_has_prefix(p, "&")) { + res += "&"; + p += 4; + } else if (g_str_has_prefix(p, """)) { + res += "\""; + p += 5; + } else if (g_str_has_prefix(p, "'")) { + res += "\'"; + p += 5; + } else + res += *p; + continue; + } + + const char *next = strchr(p, '>'); + if (!next) + continue; + + const std::string name(p + 1, next - p - 1); + + if (name == "abr") + res += colorize_output ? ABR_VISFMT : ""; + else if (name == "/abr") + res += colorize_output ? ESC_END : ""; + else if (name == "k") { + const char *begin = next; + if ((next = strstr(begin, "</k>")) != nullptr) + next += sizeof("</k>") - 1 - 1; + else + next = begin; + } else if (name == "kref") { + res += colorize_output ? KREF_VISFMT : ""; + } else if (name == "/kref") { + res += colorize_output ? ESC_END : ""; + } else if (name == "b") + res += colorize_output ? ESC_BOLD : ""; + else if (name == "/b") + res += colorize_output ? ESC_END : ""; + else if (name == "i") + res += colorize_output ? ESC_ITALIC : ""; + else if (name == "/i") + res += colorize_output ? ESC_END : ""; + else if (name == "tr") { + if (colorize_output) + res += TRANSCRIPTION_VISFMT; + res += "["; + } else if (name == "/tr") { + res += "]"; + if (colorize_output) + res += ESC_END; + } else if (name == "ex") + res += colorize_output ? EXAMPLE_VISFMT : ""; + else if (name == "/ex") + res += colorize_output ? ESC_END : ""; + else if (!name.empty() && name[0] == 'c' && name != "co") { + std::string::size_type pos = name.find("code"); + if (pos != std::string::npos) { + pos += sizeof("code=\"") - 1; + std::string::size_type end_pos = name.find("\""); + const std::string color(name, pos, end_pos - pos); + res += ""; + } else { + res += ""; + } + } else if (name == "/c") + res += ""; + + p = next; + } + return res; +} + +static std::string parse_data(const gchar *data, bool colorize_output) +{ + if (!data) + return ""; + + std::string res; + guint32 data_size, sec_size = 0; + gchar *m_str; + const gchar *p = data; + data_size = get_uint32(p); + p += sizeof(guint32); + while (guint32(p - data) < data_size) { + switch (*p++) { + case 'h': // HTML data + case 'w': // WikiMedia markup data + case 'm': // plain text, utf-8 + case 'l': // not utf-8, some other locale encoding, discouraged, need more work... + sec_size = strlen(p); + if (sec_size) { + res += "\n"; + m_str = g_strndup(p, sec_size); + res += m_str; + g_free(m_str); + } + sec_size++; + break; + case 'g': // pango markup data + case 'x': // xdxf + sec_size = strlen(p); + if (sec_size) { + res += "\n"; + m_str = g_strndup(p, sec_size); + res += xdxf2text(m_str, colorize_output); + g_free(m_str); + } + sec_size++; + break; + case 't': // english phonetic string + sec_size = strlen(p); + if (sec_size) { + res += "\n"; + if (colorize_output) + res += TRANSCRIPTION_VISFMT; + res += "[" + std::string(p, sec_size) + "]"; + if (colorize_output) + res += ESC_END; + } + sec_size++; + break; + case 'k': // KingSoft PowerWord data + case 'y': // chinese YinBiao or japanese kana, utf-8 + sec_size = strlen(p); + if (sec_size) + res += std::string(p, sec_size); + sec_size++; + break; + case 'W': // wav file + case 'P': // picture data + sec_size = get_uint32(p); + sec_size += sizeof(guint32); + break; + } + p += sec_size; + } + + return res; +} + +void Library::SimpleLookup(const std::string &str, TSearchResultList &res_list) +{ + glong ind; + res_list.reserve(ndicts()); + for (gint idict = 0; idict < ndicts(); ++idict) + if (SimpleLookupWord(str.c_str(), ind, idict)) + res_list.push_back( + TSearchResult(dict_name(idict), + poGetWord(ind, idict), + parse_data(poGetWordData(ind, idict), colorize_output_))); +} + +void Library::LookupWithFuzzy(const std::string &str, TSearchResultList &res_list) +{ + static const int MAXFUZZY = 10; + + gchar *fuzzy_res[MAXFUZZY]; + if (!Libs::LookupWithFuzzy(str.c_str(), fuzzy_res, MAXFUZZY)) + return; + + for (gchar **p = fuzzy_res, **end = (fuzzy_res + MAXFUZZY); p != end && *p; ++p) { + SimpleLookup(*p, res_list); + g_free(*p); + } +} + +void Library::LookupWithRule(const std::string &str, TSearchResultList &res_list) +{ + std::vector<gchar *> match_res((MAX_MATCH_ITEM_PER_LIB)*ndicts()); + + const gint nfound = Libs::LookupWithRule(str.c_str(), &match_res[0]); + if (nfound == 0) + return; + + for (gint i = 0; i < nfound; ++i) { + SimpleLookup(match_res[i], res_list); + g_free(match_res[i]); + } +} + +void Library::LookupData(const std::string &str, TSearchResultList &res_list) +{ + std::vector<std::vector<gchar *>> drl(ndicts()); + if (!Libs::LookupData(str.c_str(), &drl[0])) + return; + for (int idict = 0; idict < ndicts(); ++idict) + for (gchar *res : drl[idict]) { + SimpleLookup(res, res_list); + g_free(res); + } +} + +void Library::print_search_result(FILE *out, const TSearchResult &res, bool &first_result) +{ + std::string loc_bookname, loc_def, loc_exp; + + if (!utf8_output_) { + loc_bookname = utf8_to_locale_ign_err(res.bookname); + loc_def = utf8_to_locale_ign_err(res.def); + loc_exp = utf8_to_locale_ign_err(res.exp); + } + if (json_) { + if (!first_result) { + fputs(",", out); + } else { + first_result = false; + } + fprintf(out, "{\"dict\": \"%s\",\"word\":\"%s\",\"definition\":\"%s\"}", + json_escape_string(res.bookname).c_str(), + json_escape_string(res.def).c_str(), + json_escape_string(res.exp).c_str()); + + } else { + fprintf(out, + "-->%s%s%s\n" + "-->%s%s%s\n" + "%s\n\n", + colorize_output_ ? NAME_OF_DICT_VISFMT : "", + utf8_output_ ? res.bookname.c_str() : loc_bookname.c_str(), + colorize_output_ ? ESC_END : "", + colorize_output_ ? SEARCH_TERM_VISFMT : "", + utf8_output_ ? res.def.c_str() : loc_def.c_str(), + colorize_output_ ? ESC_END : "", + utf8_output_ ? res.exp.c_str() : loc_exp.c_str()); + } +} + +namespace +{ +class sdcv_pager final +{ +public: + explicit sdcv_pager(bool ignore_env = false) + { + output = stdout; + if (ignore_env) { + return; + } + const gchar *pager = g_getenv("SDCV_PAGER"); + if (pager && (output = popen(pager, "w")) == nullptr) { + perror(_("popen failed")); + output = stdout; + } + } + sdcv_pager(const sdcv_pager &) = delete; + sdcv_pager &operator=(const sdcv_pager &) = delete; + ~sdcv_pager() + { + if (output != stdout) { + pclose(output); + } + } + FILE *get_stream() { return output; } + +private: + FILE *output; +}; +} + +bool Library::process_phrase(const char *loc_str, IReadLine &io, bool force) +{ + if (nullptr == loc_str) + return true; + + std::string query; + + analyze_query(loc_str, query); + if (!query.empty()) + io.add_to_history(query.c_str()); + + gsize bytes_read; + gsize bytes_written; + glib::Error err; + glib::CharStr str; + if (!utf8_input_) + str.reset(g_locale_to_utf8(loc_str, -1, &bytes_read, &bytes_written, get_addr(err))); + else + str.reset(g_strdup(loc_str)); + + if (nullptr == get_impl(str)) { + fprintf(stderr, _("Can not convert %s to utf8.\n"), loc_str); + fprintf(stderr, "%s\n", err->message); + return false; + } + + if (str[0] == '\0') + return true; + + TSearchResultList res_list; + + switch (analyze_query(get_impl(str), query)) { + case qtFUZZY: + LookupWithFuzzy(query, res_list); + break; + case qtREGEXP: + LookupWithRule(query, res_list); + break; + case qtSIMPLE: + SimpleLookup(get_impl(str), res_list); + if (res_list.empty() && fuzzy_) + LookupWithFuzzy(get_impl(str), res_list); + break; + case qtDATA: + LookupData(query, res_list); + break; + default: + /*nothing*/; + } + + bool first_result = true; + if (json_) { + fputc('[', stdout); + } + if (!res_list.empty()) { + /* try to be more clever, if there are + one or zero results per dictionary show all + */ + bool show_all_results = true; + typedef std::map<std::string, int, std::less<std::string>> DictResMap; + if (!force) { + DictResMap res_per_dict; + for (const TSearchResult &search_res : res_list) { + auto r = res_per_dict.equal_range(search_res.bookname); + DictResMap tmp(r.first, r.second); + if (tmp.empty()) //there are no yet such bookname in map + res_per_dict.insert(DictResMap::value_type(search_res.bookname, 1)); + else { + ++((tmp.begin())->second); + if (tmp.begin()->second > 1) { + show_all_results = false; + break; + } + } + } + } //if (!force) + + if (!show_all_results && !force) { + if (!json_) { + printf(_("Found %zu items, similar to %s.\n"), res_list.size(), + utf8_output_ ? get_impl(str) : utf8_to_locale_ign_err(get_impl(str)).c_str()); + } + for (size_t i = 0; i < res_list.size(); ++i) { + const std::string loc_bookname = utf8_to_locale_ign_err(res_list[i].bookname); + const std::string loc_def = utf8_to_locale_ign_err(res_list[i].def); + printf("%zu)%s%s%s-->%s%s%s\n", i, + colorize_output_ ? NAME_OF_DICT_VISFMT : "", + utf8_output_ ? res_list[i].bookname.c_str() : loc_bookname.c_str(), + colorize_output_ ? ESC_END : "", + colorize_output_ ? SEARCH_TERM_VISFMT : "", + utf8_output_ ? res_list[i].def.c_str() : loc_def.c_str(), + colorize_output_ ? ESC_END : ""); + } + int choise; + std::unique_ptr<IReadLine> choice_readline(create_readline_object()); + for (;;) { + std::string str_choise; + choice_readline->read(_("Your choice[-1 to abort]: "), str_choise); + sscanf(str_choise.c_str(), "%d", &choise); + if (choise >= 0 && choise < int(res_list.size())) { + sdcv_pager pager; + io.add_to_history(res_list[choise].def.c_str()); + print_search_result(pager.get_stream(), res_list[choise], first_result); + break; + } else if (choise == -1) { + break; + } else + printf(_("Invalid choice.\nIt must be from 0 to %zu or -1.\n"), + res_list.size() - 1); + } + } else { + sdcv_pager pager(force || json_); + if (!json_) { + fprintf(pager.get_stream(), _("Found %zu items, similar to %s.\n"), + res_list.size(), utf8_output_ ? get_impl(str) : utf8_to_locale_ign_err(get_impl(str)).c_str()); + } + for (const TSearchResult &search_res : res_list) { + print_search_result(pager.get_stream(), search_res, first_result); + } + } + + } else { + std::string loc_str; + if (!utf8_output_) + loc_str = utf8_to_locale_ign_err(get_impl(str)); + if (!json_) + printf(_("Nothing similar to %s, sorry :(\n"), utf8_output_ ? get_impl(str) : loc_str.c_str()); + } + + if (json_) { + fputs("]\n", stdout); + } + return true; +} diff --git a/src/libwrapper.hpp b/src/libwrapper.hpp new file mode 100644 index 0000000..31783b6 --- /dev/null +++ b/src/libwrapper.hpp @@ -0,0 +1,54 @@ +#pragma once + +#include <string> +#include <vector> + +#include "readline.hpp" +#include "stardict_lib.hpp" + +//this structure is wrapper and it need for unification +//results of search whith return Dicts class +struct TSearchResult { + std::string bookname; + std::string def; + std::string exp; + + TSearchResult(const std::string &bookname_, const std::string &def_, const std::string &exp_) + : bookname(bookname_) + , def(def_) + , exp(exp_) + { + } +}; + +typedef std::vector<TSearchResult> TSearchResultList; + +//this class is wrapper around Dicts class for easy use +//of it +class Library : public Libs +{ +public: + Library(bool uinput, bool uoutput, bool colorize_output, bool use_json, bool no_fuzzy) + : utf8_input_(uinput) + , utf8_output_(uoutput) + , colorize_output_(colorize_output) + , json_(use_json) + { + setVerbose(!use_json); + setFuzzy(!no_fuzzy); + } + + bool process_phrase(const char *loc_str, IReadLine &io, bool force = false); + +private: + bool utf8_input_; + bool utf8_output_; + bool colorize_output_; + bool json_; + + void SimpleLookup(const std::string &str, TSearchResultList &res_list); + void LookupWithFuzzy(const std::string &str, TSearchResultList &res_list); + void LookupWithRule(const std::string &str, TSearchResultList &res_lsit); + void LookupData(const std::string &str, TSearchResultList &res_list); + void print_search_result(FILE *out, const TSearchResult &res, bool &first_result); +}; diff --git a/src/mapfile.hpp b/src/mapfile.hpp new file mode 100644 index 0000000..ca5a681 --- /dev/null +++ b/src/mapfile.hpp @@ -0,0 +1,86 @@ +#pragma once + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#ifdef HAVE_MMAP +#include <fcntl.h> +#include <sys/mman.h> +#include <sys/types.h> +#endif +#ifdef _WIN32 +#include <windows.h> +#endif +#include <glib.h> + +class MapFile +{ +public: + MapFile() {} + ~MapFile(); + MapFile(const MapFile &) = delete; + MapFile &operator=(const MapFile &) = delete; + bool open(const char *file_name, unsigned long file_size); + gchar *begin() { return data; } + +private: + char *data = nullptr; + unsigned long size = 0ul; +#ifdef HAVE_MMAP + int mmap_fd = -1; +#elif defined(_WIN32) + HANDLE hFile = 0; + HANDLE hFileMap = 0; +#endif +}; + +inline bool MapFile::open(const char *file_name, unsigned long file_size) +{ + size = file_size; +#ifdef HAVE_MMAP + if ((mmap_fd = ::open(file_name, O_RDONLY)) < 0) { + //g_print("Open file %s failed!\n",fullfilename); + return false; + } + data = (gchar *)mmap(nullptr, file_size, PROT_READ, MAP_SHARED, mmap_fd, 0); + if ((void *)data == (void *)(-1)) { + //g_print("mmap file %s failed!\n",idxfilename); + data = nullptr; + return false; + } +#elif defined(_WIN32) + hFile = CreateFile(file_name, GENERIC_READ, 0, nullptr, OPEN_ALWAYS, + FILE_ATTRIBUTE_NORMAL, 0); + hFileMap = CreateFileMapping(hFile, nullptr, PAGE_READONLY, 0, + file_size, nullptr); + data = (gchar *)MapViewOfFile(hFileMap, FILE_MAP_READ, 0, 0, file_size); +#else + gsize read_len; + if (!g_file_get_contents(file_name, &data, &read_len, nullptr)) + return false; + + if (read_len != file_size) + return false; +#endif + + return true; +} + +inline MapFile::~MapFile() +{ + if (!data) + return; +#ifdef HAVE_MMAP + munmap(data, size); + close(mmap_fd); +#else +#ifdef _WIN32 + UnmapViewOfFile(data); + CloseHandle(hFileMap); + CloseHandle(hFile); +#else + g_free(data); +#endif +#endif +} diff --git a/src/readline.cpp b/src/readline.cpp new file mode 100644 index 0000000..ab443ff --- /dev/null +++ b/src/readline.cpp @@ -0,0 +1,115 @@ +/* + * This file part of sdcv - console version of Stardict program + * http://sdcv.sourceforge.net + * Copyright (C) 2005 Evgeniy <dushistov@mail.ru> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Library General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include <cstdio> +#include <cstdlib> +#ifdef WITH_READLINE +#include <readline/history.h> +#include <readline/readline.h> +#endif +#include <glib.h> + +#include "utils.hpp" + +#include "readline.hpp" + +bool stdio_getline(FILE *in, std::string &str) +{ + assert(in != nullptr); + str.clear(); + int ch; + while ((ch = fgetc(in)) != EOF && ch != '\n') + str += ch; + + return EOF != ch; +} + +#ifndef WITH_READLINE +namespace +{ +class dummy_readline : public IReadLine +{ +public: + bool read(const std::string &banner, std::string &line) override + { + printf("%s", banner.c_str()); + return stdio_getline(stdin, line); + } +}; +} +#else + +namespace +{ +class real_readline : public IReadLine +{ + +public: + real_readline() + { + rl_readline_name = "sdcv"; + using_history(); + const std::string histname = std::string(g_get_home_dir()) + G_DIR_SEPARATOR + ".sdcv_history"; + read_history(histname.c_str()); + } + + ~real_readline() + { + const std::string histname = std::string(g_get_home_dir()) + G_DIR_SEPARATOR + ".sdcv_history"; + write_history(histname.c_str()); + const gchar *hist_size_str = g_getenv("SDCV_HISTSIZE"); + int hist_size; + if (!hist_size_str || sscanf(hist_size_str, "%d", &hist_size) < 1) + hist_size = 2000; + history_truncate_file(histname.c_str(), hist_size); + } + + bool read(const std::string &banner, std::string &line) override + { + char *phrase = nullptr; + phrase = readline(banner.c_str()); + if (phrase) { + line = phrase; + free(phrase); + return true; + } + return false; + } + + void add_to_history(const std::string &phrase) override + { + add_history(phrase.c_str()); + } +}; +} +#endif //WITH_READLINE + +IReadLine *create_readline_object() +{ +#ifdef WITH_READLINE + return new real_readline; +#else + return new dummy_readline; +#endif +} diff --git a/src/readline.hpp b/src/readline.hpp new file mode 100644 index 0000000..e14ae00 --- /dev/null +++ b/src/readline.hpp @@ -0,0 +1,15 @@ +#pragma once + +#include <string> + +class IReadLine +{ +public: + virtual ~IReadLine() {} + virtual bool read(const std::string &banner, std::string &line) = 0; + virtual void add_to_history(const std::string &) {} +}; + +extern std::string sdcv_readline; +extern IReadLine *create_readline_object(); +extern bool stdio_getline(FILE *in, std::string &str); diff --git a/src/sdcv.cpp b/src/sdcv.cpp new file mode 100644 index 0000000..0c75eb1 --- /dev/null +++ b/src/sdcv.cpp @@ -0,0 +1,260 @@ +/* + * This file part of sdcv - console version of Stardict program + * http://sdcv.sourceforge.net + * Copyright (C) 2003-2006 Evgeniy <dushistov@mail.ru> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Library General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include <algorithm> +#include <cerrno> +#include <clocale> +#include <cstdio> +#include <cstdlib> +#include <cstring> +#include <map> +#include <memory> +#include <string> +#include <vector> + +#include <glib.h> +#include <glib/gi18n.h> +#include <glib/gstdio.h> + +#include "libwrapper.hpp" +#include "readline.hpp" +#include "utils.hpp" + +static const char gVersion[] = VERSION; + +namespace +{ +static void free_str_array(gchar **arr) +{ + gchar **p; + + for (p = arr; *p; ++p) + g_free(*p); + g_free(arr); +} +} +namespace glib +{ +using StrArr = ResourceWrapper<gchar *, gchar *, free_str_array>; +} + +static void list_dicts(const std::list<std::string> &dicts_dir_list, bool use_json); + +int main(int argc, char *argv[]) try { + setlocale(LC_ALL, ""); +#if ENABLE_NLS + bindtextdomain("sdcv", + //"./locale"//< for testing + GETTEXT_TRANSLATIONS_PATH //< should be + ); + textdomain("sdcv"); +#endif + + gboolean show_version = FALSE; + gboolean show_list_dicts = FALSE; + glib::StrArr use_dict_list; + gboolean non_interactive = FALSE; + gboolean json_output = FALSE; + gboolean no_fuzzy = FALSE; + gboolean utf8_output = FALSE; + gboolean utf8_input = FALSE; + glib::CharStr opt_data_dir; + gboolean only_data_dir = FALSE; + gboolean colorize = FALSE; + + const GOptionEntry entries[] = { + { "version", 'v', 0, G_OPTION_ARG_NONE, &show_version, + _("display version information and exit"), nullptr }, + { "list-dicts", 'l', 0, G_OPTION_ARG_NONE, &show_list_dicts, + _("display list of available dictionaries and exit"), nullptr }, + { "use-dict", 'u', 0, G_OPTION_ARG_STRING_ARRAY, get_addr(use_dict_list), + _("for search use only dictionary with this bookname"), + _("bookname") }, + { "non-interactive", 'n', 0, G_OPTION_ARG_NONE, &non_interactive, + _("for use in scripts"), nullptr }, + { "json-output", 'j', 0, G_OPTION_ARG_NONE, &json_output, + _("print the result formatted as JSON"), nullptr }, + { "exact-search", 'e', 0, G_OPTION_ARG_NONE, &no_fuzzy, + _("do not fuzzy-search for similar words, only return exact matches"), nullptr }, + { "utf8-output", '0', 0, G_OPTION_ARG_NONE, &utf8_output, + _("output must be in utf8"), nullptr }, + { "utf8-input", '1', 0, G_OPTION_ARG_NONE, &utf8_input, + _("input of sdcv in utf8"), nullptr }, + { "data-dir", '2', 0, G_OPTION_ARG_STRING, get_addr(opt_data_dir), + _("use this directory as path to stardict data directory"), + _("path/to/dir") }, + { "only-data-dir", 'x', 0, G_OPTION_ARG_NONE, &only_data_dir, + _("only use the dictionaries in data-dir, do not search in user and system directories"), nullptr }, + { "color", 'c', 0, G_OPTION_ARG_NONE, &colorize, + _("colorize the output"), nullptr }, + {}, + }; + + glib::Error error; + GOptionContext *context = g_option_context_new(_(" words")); + g_option_context_set_help_enabled(context, TRUE); + g_option_context_add_main_entries(context, entries, nullptr); + const gboolean parse_res = g_option_context_parse(context, &argc, &argv, get_addr(error)); + g_option_context_free(context); + if (!parse_res) { + fprintf(stderr, _("Invalid command line arguments: %s\n"), + error->message); + return EXIT_FAILURE; + } + + if (show_version) { + printf(_("Console version of Stardict, version %s\n"), gVersion); + return EXIT_SUCCESS; + } + + const gchar *stardict_data_dir = g_getenv("STARDICT_DATA_DIR"); + std::string data_dir; + if (!opt_data_dir) { + if (!only_data_dir) { + if (stardict_data_dir) + data_dir = stardict_data_dir; + else + data_dir = "/usr/share/stardict/dic"; + } + } else { + data_dir = get_impl(opt_data_dir); + } + + const char *homedir = g_getenv("HOME"); + if (!homedir) + homedir = g_get_home_dir(); + + std::list<std::string> dicts_dir_list; + if (!only_data_dir) + dicts_dir_list.push_back(std::string(homedir) + G_DIR_SEPARATOR + ".stardict" + G_DIR_SEPARATOR + "dic"); + dicts_dir_list.push_back(data_dir); + if (show_list_dicts) { + list_dicts(dicts_dir_list, json_output); + return EXIT_SUCCESS; + } + + std::list<std::string> disable_list; + + std::map<std::string, std::string> bookname_to_ifo; + for_each_file(dicts_dir_list, ".ifo", std::list<std::string>(), std::list<std::string>(), + [&bookname_to_ifo](const std::string &fname, bool) { + DictInfo dict_info; + const bool load_ok = dict_info.load_from_ifo_file(fname, false); + if (!load_ok) + return; + bookname_to_ifo[dict_info.bookname] = dict_info.ifo_file_name; + }); + + std::list<std::string> order_list; + if (use_dict_list != nullptr) { + for (auto &&x : bookname_to_ifo) { + gchar **p = get_impl(use_dict_list); + for (; *p != nullptr; ++p) + if (x.first.compare(*p) == 0) { + break; + } + if (*p == nullptr) { + disable_list.push_back(x.second); + } + } + + // add bookname to list + gchar **p = get_impl(use_dict_list); + while (*p) { + order_list.push_back(bookname_to_ifo.at(*p)); + ++p; + } + } else { + const std::string odering_cfg_file = std::string(homedir) + G_DIR_SEPARATOR_S ".sdcv_ordering"; + FILE *ordering_file = fopen(odering_cfg_file.c_str(), "r"); + if (ordering_file != nullptr) { + std::string line; + while (stdio_getline(ordering_file, line)) { + order_list.push_back(bookname_to_ifo.at(line)); + } + fclose(ordering_file); + } + } + + const std::string conf_dir = std::string(g_get_home_dir()) + G_DIR_SEPARATOR + ".stardict"; + if (g_mkdir(conf_dir.c_str(), S_IRWXU) == -1 && errno != EEXIST) { + fprintf(stderr, _("g_mkdir failed: %s\n"), strerror(errno)); + } + + Library lib(utf8_input, utf8_output, colorize, json_output, no_fuzzy); + lib.load(dicts_dir_list, order_list, disable_list); + + std::unique_ptr<IReadLine> io(create_readline_object()); + if (optind < argc) { + for (int i = optind; i < argc; ++i) + if (!lib.process_phrase(argv[i], *io, non_interactive)) { + return EXIT_FAILURE; + } + } else if (!non_interactive) { + + std::string phrase; + while (io->read(_("Enter word or phrase: "), phrase)) { + if (!lib.process_phrase(phrase.c_str(), *io)) + return EXIT_FAILURE; + phrase.clear(); + } + + putchar('\n'); + } else { + fprintf(stderr, _("There are no words/phrases to translate.\n")); + } + return EXIT_SUCCESS; +} catch (const std::exception &ex) { + fprintf(stderr, "Internal error: %s\n", ex.what()); + exit(EXIT_FAILURE); +} + +static void list_dicts(const std::list<std::string> &dicts_dir_list, bool use_json) +{ + bool first_entry = true; + if (!use_json) + printf(_("Dictionary's name Word count\n")); + else + fputc('[', stdout); + std::list<std::string> order_list, disable_list; + for_each_file(dicts_dir_list, ".ifo", order_list, + disable_list, [use_json, &first_entry](const std::string &filename, bool) -> void { + DictInfo dict_info; + if (dict_info.load_from_ifo_file(filename, false)) { + const std::string bookname = utf8_to_locale_ign_err(dict_info.bookname); + if (use_json) { + if (first_entry) { + first_entry = false; + } else { + fputc(',', stdout); // comma between entries + } + printf("{\"name\": \"%s\", \"wordcount\": \"%d\"}", json_escape_string(bookname).c_str(), dict_info.wordcount); + } else { + printf("%s %d\n", bookname.c_str(), dict_info.wordcount); + } + } + }); + if (use_json) + fputs("]\n", stdout); +} diff --git a/src/stardict_lib.cpp b/src/stardict_lib.cpp new file mode 100644 index 0000000..63e23f5 --- /dev/null +++ b/src/stardict_lib.cpp @@ -0,0 +1,1642 @@ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include <algorithm> +#include <cctype> +#include <cstring> +#include <stdexcept> + +#include <glib/gstdio.h> +#include <sys/stat.h> +#include <zlib.h> + +#include "distance.hpp" +#include "mapfile.hpp" +#include "utils.hpp" + +#include "stardict_lib.hpp" + +#define TO_STR2(xstr) #xstr +#define TO_STR1(xstr) TO_STR2(xstr) + +#define THROW_IF_ERROR(expr) \ + do { \ + assert((expr)); \ + if (!(expr)) \ + throw std::runtime_error(#expr " not true at " __FILE__ ": " TO_STR1(__LINE__)); \ + } while (false) + +// Notice: read src/tools/DICTFILE_FORMAT for the dictionary +// file's format information! + +namespace +{ +struct Fuzzystruct { + char *pMatchWord; + int iMatchWordDistance; +}; + +static inline bool bIsVowel(gchar inputchar) +{ + gchar ch = g_ascii_toupper(inputchar); + return (ch == 'A' || ch == 'E' || ch == 'I' || ch == 'O' || ch == 'U'); +} + +static bool bIsPureEnglish(const gchar *str) +{ + // i think this should work even when it is UTF8 string :). + for (int i = 0; str[i] != 0; i++) + //if(str[i]<0) + //if(str[i]<32 || str[i]>126) // tab equal 9,so this is not OK. + // Better use isascii() but not str[i]<0 while char is default unsigned in arm + if (!isascii(str[i])) + return false; + return true; +} + +static inline gint stardict_strcmp(const gchar *s1, const gchar *s2) +{ + const gint a = g_ascii_strcasecmp(s1, s2); + if (a == 0) + return strcmp(s1, s2); + else + return a; +} + +static void unicode_strdown(gunichar *str) +{ + while (*str) { + *str = g_unichar_tolower(*str); + ++str; + } +} +} + +bool DictInfo::load_from_ifo_file(const std::string &ifofilename, + bool istreedict) +{ + ifo_file_name = ifofilename; + glib::CharStr buffer; + if (!g_file_get_contents(ifofilename.c_str(), get_addr(buffer), nullptr, nullptr)) + return false; + + static const char TREEDICT_MAGIC_DATA[] = "StarDict's treedict ifo file"; + static const char DICT_MAGIC_DATA[] = "StarDict's dict ifo file"; + + const gchar *magic_data = istreedict ? TREEDICT_MAGIC_DATA : DICT_MAGIC_DATA; + static const unsigned char utf8_bom[] = { 0xEF, 0xBB, 0xBF, '\0' }; + if (!g_str_has_prefix( + g_str_has_prefix(get_impl(buffer), (const gchar *)(utf8_bom)) ? get_impl(buffer) + 3 : get_impl(buffer), + magic_data)) { + return false; + } + + gchar *p1 = get_impl(buffer) + strlen(magic_data) - 1; + + gchar *p2 = strstr(p1, "\nwordcount="); + if (p2 == nullptr) + return false; + + gchar *p3 = strchr(p2 + sizeof("\nwordcount=") - 1, '\n'); + + wordcount = atol(std::string(p2 + sizeof("\nwordcount=") - 1, p3 - (p2 + sizeof("\nwordcount=") - 1)).c_str()); + + if (istreedict) { + p2 = strstr(p1, "\ntdxfilesize="); + if (p2 == nullptr) + return false; + + p3 = strchr(p2 + sizeof("\ntdxfilesize=") - 1, '\n'); + + index_file_size = atol(std::string(p2 + sizeof("\ntdxfilesize=") - 1, p3 - (p2 + sizeof("\ntdxfilesize=") - 1)).c_str()); + + } else { + + p2 = strstr(p1, "\nidxfilesize="); + if (p2 == nullptr) + return false; + + p3 = strchr(p2 + sizeof("\nidxfilesize=") - 1, '\n'); + index_file_size = atol(std::string(p2 + sizeof("\nidxfilesize=") - 1, p3 - (p2 + sizeof("\nidxfilesize=") - 1)).c_str()); + } + + p2 = strstr(p1, "\nbookname="); + + if (p2 == nullptr) + return false; + + p2 = p2 + sizeof("\nbookname=") - 1; + p3 = strchr(p2, '\n'); + bookname.assign(p2, p3 - p2); + + p2 = strstr(p1, "\nauthor="); + if (p2) { + p2 = p2 + sizeof("\nauthor=") - 1; + p3 = strchr(p2, '\n'); + author.assign(p2, p3 - p2); + } + + p2 = strstr(p1, "\nemail="); + if (p2) { + p2 = p2 + sizeof("\nemail=") - 1; + p3 = strchr(p2, '\n'); + email.assign(p2, p3 - p2); + } + + p2 = strstr(p1, "\nwebsite="); + if (p2) { + p2 = p2 + sizeof("\nwebsite=") - 1; + p3 = strchr(p2, '\n'); + website.assign(p2, p3 - p2); + } + + p2 = strstr(p1, "\ndate="); + if (p2) { + p2 = p2 + sizeof("\ndate=") - 1; + p3 = strchr(p2, '\n'); + date.assign(p2, p3 - p2); + } + + p2 = strstr(p1, "\ndescription="); + if (p2) { + p2 = p2 + sizeof("\ndescription=") - 1; + p3 = strchr(p2, '\n'); + description.assign(p2, p3 - p2); + } + + p2 = strstr(p1, "\nsametypesequence="); + if (p2) { + p2 += sizeof("\nsametypesequence=") - 1; + p3 = strchr(p2, '\n'); + sametypesequence.assign(p2, p3 - p2); + } + + p2 = strstr(p1, "\nsynwordcount="); + syn_wordcount = 0; + if (p2) { + p2 += sizeof("\nsynwordcount=") - 1; + p3 = strchr(p2, '\n'); + syn_wordcount = atol(std::string(p2, p3 - p2).c_str()); + } + + return true; +} + +gchar *DictBase::GetWordData(guint32 idxitem_offset, guint32 idxitem_size) +{ + for (int i = 0; i < WORDDATA_CACHE_NUM; i++) + if (cache[i].data && cache[i].offset == idxitem_offset) + return cache[i].data; + + if (dictfile) + fseek(dictfile, idxitem_offset, SEEK_SET); + + gchar *data; + if (!sametypesequence.empty()) { + glib::CharStr origin_data((gchar *)g_malloc(idxitem_size)); + + if (dictfile) { + const size_t nitems = fread(get_impl(origin_data), idxitem_size, 1, dictfile); + THROW_IF_ERROR(nitems == 1); + } else + dictdzfile->read(get_impl(origin_data), idxitem_offset, idxitem_size); + + guint32 data_size; + gint sametypesequence_len = sametypesequence.length(); + //there have sametypesequence_len char being omitted. + data_size = idxitem_size + sizeof(guint32) + sametypesequence_len; + //if the last item's size is determined by the end up '\0',then +=sizeof(gchar); + //if the last item's size is determined by the head guint32 type data,then +=sizeof(guint32); + switch (sametypesequence[sametypesequence_len - 1]) { + case 'm': + case 't': + case 'y': + case 'l': + case 'g': + case 'x': + case 'k': + data_size += sizeof(gchar); + break; + case 'W': + case 'P': + data_size += sizeof(guint32); + break; + default: + if (g_ascii_isupper(sametypesequence[sametypesequence_len - 1])) + data_size += sizeof(guint32); + else + data_size += sizeof(gchar); + break; + } + data = (gchar *)g_malloc(data_size); + gchar *p1, *p2; + p1 = data + sizeof(guint32); + p2 = get_impl(origin_data); + guint32 sec_size; + //copy the head items. + for (int i = 0; i < sametypesequence_len - 1; i++) { + *p1 = sametypesequence[i]; + p1 += sizeof(gchar); + switch (sametypesequence[i]) { + case 'm': + case 't': + case 'y': + case 'l': + case 'g': + case 'x': + case 'k': + sec_size = strlen(p2) + 1; + memcpy(p1, p2, sec_size); + p1 += sec_size; + p2 += sec_size; + break; + case 'W': + case 'P': + sec_size = get_uint32(p2); + sec_size += sizeof(guint32); + memcpy(p1, p2, sec_size); + p1 += sec_size; + p2 += sec_size; + break; + default: + if (g_ascii_isupper(sametypesequence[i])) { + sec_size = get_uint32(p2); + sec_size += sizeof(guint32); + } else { + sec_size = strlen(p2) + 1; + } + memcpy(p1, p2, sec_size); + p1 += sec_size; + p2 += sec_size; + break; + } + } + //calculate the last item 's size. + sec_size = idxitem_size - (p2 - get_impl(origin_data)); + *p1 = sametypesequence[sametypesequence_len - 1]; + p1 += sizeof(gchar); + switch (sametypesequence[sametypesequence_len - 1]) { + case 'm': + case 't': + case 'y': + case 'l': + case 'g': + case 'x': + case 'k': + memcpy(p1, p2, sec_size); + p1 += sec_size; + *p1 = '\0'; //add the end up '\0'; + break; + case 'W': + case 'P': + set_uint32(p1, sec_size); + p1 += sizeof(guint32); + memcpy(p1, p2, sec_size); + break; + default: + if (g_ascii_isupper(sametypesequence[sametypesequence_len - 1])) { + set_uint32(p1, sec_size); + p1 += sizeof(guint32); + memcpy(p1, p2, sec_size); + } else { + memcpy(p1, p2, sec_size); + p1 += sec_size; + *p1 = '\0'; + } + break; + } + set_uint32(data, data_size); + } else { + data = (gchar *)g_malloc(idxitem_size + sizeof(guint32)); + if (dictfile) { + const size_t nitems = fread(data + sizeof(guint32), idxitem_size, 1, dictfile); + THROW_IF_ERROR(nitems == 1); + } else + dictdzfile->read(data + sizeof(guint32), idxitem_offset, idxitem_size); + set_uint32(data, idxitem_size + sizeof(guint32)); + } + g_free(cache[cache_cur].data); + + cache[cache_cur].data = data; + cache[cache_cur].offset = idxitem_offset; + cache_cur++; + if (cache_cur == WORDDATA_CACHE_NUM) + cache_cur = 0; + return data; +} + +bool DictBase::SearchData(std::vector<std::string> &SearchWords, guint32 idxitem_offset, guint32 idxitem_size, gchar *origin_data) +{ + int nWord = SearchWords.size(); + std::vector<bool> WordFind(nWord, false); + int nfound = 0; + + if (dictfile) + fseek(dictfile, idxitem_offset, SEEK_SET); + if (dictfile) { + const size_t nitems = fread(origin_data, idxitem_size, 1, dictfile); + THROW_IF_ERROR(nitems == 1); + } else + dictdzfile->read(origin_data, idxitem_offset, idxitem_size); + gchar *p = origin_data; + guint32 sec_size; + int j; + if (!sametypesequence.empty()) { + gint sametypesequence_len = sametypesequence.length(); + for (int i = 0; i < sametypesequence_len - 1; i++) { + switch (sametypesequence[i]) { + case 'm': + case 't': + case 'y': + case 'l': + case 'g': + case 'x': + case 'k': + for (j = 0; j < nWord; j++) + if (!WordFind[j] && strstr(p, SearchWords[j].c_str())) { + WordFind[j] = true; + ++nfound; + } + + if (nfound == nWord) + return true; + sec_size = strlen(p) + 1; + p += sec_size; + break; + default: + if (g_ascii_isupper(sametypesequence[i])) { + sec_size = get_uint32(p); + sec_size += sizeof(guint32); + } else { + sec_size = strlen(p) + 1; + } + p += sec_size; + } + } + switch (sametypesequence[sametypesequence_len - 1]) { + case 'm': + case 't': + case 'y': + case 'l': + case 'g': + case 'x': + case 'k': + sec_size = idxitem_size - (p - origin_data); + for (j = 0; j < nWord; j++) + if (!WordFind[j] && g_strstr_len(p, sec_size, SearchWords[j].c_str())) { + WordFind[j] = true; + ++nfound; + } + + if (nfound == nWord) + return true; + break; + } + } else { + while (guint32(p - origin_data) < idxitem_size) { + switch (*p) { + case 'm': + case 't': + case 'y': + case 'l': + case 'g': + case 'x': + case 'k': + for (j = 0; j < nWord; j++) + if (!WordFind[j] && strstr(p, SearchWords[j].c_str())) { + WordFind[j] = true; + ++nfound; + } + + if (nfound == nWord) + return true; + sec_size = strlen(p) + 1; + p += sec_size; + break; + default: + if (g_ascii_isupper(*p)) { + sec_size = get_uint32(p); + sec_size += sizeof(guint32); + } else { + sec_size = strlen(p) + 1; + } + p += sec_size; + } + } + } + return false; +} + +namespace +{ +class OffsetIndex : public IIndexFile +{ +public: + OffsetIndex() + : idxfile(nullptr) + { + } + ~OffsetIndex() + { + if (idxfile) + fclose(idxfile); + } + bool load(const std::string &url, gulong wc, gulong fsize, bool verbose) override; + const gchar *get_key(glong idx) override; + void get_data(glong idx) override { get_key(idx); } + const gchar *get_key_and_data(glong idx) override + { + return get_key(idx); + } + bool lookup(const char *str, glong &idx) override; + +private: + static const gint ENTR_PER_PAGE = 32; + static const char *CACHE_MAGIC; + + std::vector<guint32> wordoffset; + FILE *idxfile; + gulong wordcount; + + gchar wordentry_buf[256 + sizeof(guint32) * 2]; // The length of "word_str" should be less than 256. See src/tools/DICTFILE_FORMAT. + struct index_entry { + glong idx; + std::string keystr; + void assign(glong i, const std::string &str) + { + idx = i; + keystr.assign(str); + } + }; + index_entry first, last, middle, real_last; + + struct page_entry { + gchar *keystr; + guint32 off, size; + }; + std::vector<gchar> page_data; + struct page_t { + glong idx = -1; + page_entry entries[ENTR_PER_PAGE]; + + page_t() {} + void fill(gchar *data, gint nent, glong idx_); + } page; + gulong load_page(glong page_idx); + const gchar *read_first_on_page_key(glong page_idx); + const gchar *get_first_on_page_key(glong page_idx); + bool load_cache(const std::string &url); + bool save_cache(const std::string &url, bool verbose); + static std::list<std::string> get_cache_variant(const std::string &url); +}; + +const char *OffsetIndex::CACHE_MAGIC = "StarDict's Cache, Version: 0.1"; + +class WordListIndex : public IIndexFile +{ +public: + WordListIndex() + : idxdatabuf(nullptr) + { + } + ~WordListIndex() { g_free(idxdatabuf); } + bool load(const std::string &url, gulong wc, gulong fsize, bool verbose) override; + const gchar *get_key(glong idx) override { return wordlist[idx]; } + void get_data(glong idx) override; + const gchar *get_key_and_data(glong idx) override + { + get_data(idx); + return get_key(idx); + } + bool lookup(const char *str, glong &idx) override; + +private: + gchar *idxdatabuf; + std::vector<gchar *> wordlist; +}; + +void OffsetIndex::page_t::fill(gchar *data, gint nent, glong idx_) +{ + idx = idx_; + gchar *p = data; + glong len; + for (gint i = 0; i < nent; ++i) { + entries[i].keystr = p; + len = strlen(p); + p += len + 1; + entries[i].off = g_ntohl(get_uint32(p)); + p += sizeof(guint32); + entries[i].size = g_ntohl(get_uint32(p)); + p += sizeof(guint32); + } +} + +inline const gchar *OffsetIndex::read_first_on_page_key(glong page_idx) +{ + fseek(idxfile, wordoffset[page_idx], SEEK_SET); + guint32 page_size = wordoffset[page_idx + 1] - wordoffset[page_idx]; + const size_t nitems = fread(wordentry_buf, + std::min(sizeof(wordentry_buf), static_cast<size_t>(page_size)), + 1, idxfile); + THROW_IF_ERROR(nitems == 1); + //TODO: check returned values, deal with word entry that strlen>255. + return wordentry_buf; +} + +inline const gchar *OffsetIndex::get_first_on_page_key(glong page_idx) +{ + if (page_idx < middle.idx) { + if (page_idx == first.idx) + return first.keystr.c_str(); + return read_first_on_page_key(page_idx); + } else if (page_idx > middle.idx) { + if (page_idx == last.idx) + return last.keystr.c_str(); + return read_first_on_page_key(page_idx); + } else + return middle.keystr.c_str(); +} + +bool OffsetIndex::load_cache(const std::string &url) +{ + const std::list<std::string> vars = get_cache_variant(url); + + for (const std::string &item : vars) { + struct ::stat idxstat, cachestat; + if (g_stat(url.c_str(), &idxstat) != 0 || g_stat(item.c_str(), &cachestat) != 0) + continue; + if (cachestat.st_mtime < idxstat.st_mtime) + continue; + MapFile mf; + if (!mf.open(item.c_str(), cachestat.st_size)) + continue; + if (strncmp(mf.begin(), CACHE_MAGIC, strlen(CACHE_MAGIC)) != 0) + continue; + memcpy(&wordoffset[0], mf.begin() + strlen(CACHE_MAGIC), wordoffset.size() * sizeof(wordoffset[0])); + return true; + } + + return false; +} + +std::list<std::string> OffsetIndex::get_cache_variant(const std::string &url) +{ + std::list<std::string> res = { url + ".oft" }; + if (!g_file_test(g_get_user_cache_dir(), G_FILE_TEST_EXISTS) && g_mkdir(g_get_user_cache_dir(), 0700) == -1) + return res; + + const std::string cache_dir = std::string(g_get_user_cache_dir()) + G_DIR_SEPARATOR_S + "sdcv"; + + if (!g_file_test(cache_dir.c_str(), G_FILE_TEST_EXISTS)) { + if (g_mkdir(cache_dir.c_str(), 0700) == -1) + return res; + } else if (!g_file_test(cache_dir.c_str(), G_FILE_TEST_IS_DIR)) + return res; + + gchar *base = g_path_get_basename(url.c_str()); + res.push_back(cache_dir + G_DIR_SEPARATOR_S + base + ".oft"); + g_free(base); + return res; +} + +bool OffsetIndex::save_cache(const std::string &url, bool verbose) +{ + const std::list<std::string> vars = get_cache_variant(url); + for (const std::string &item : vars) { + FILE *out = fopen(item.c_str(), "wb"); + if (!out) + continue; + if (fwrite(CACHE_MAGIC, 1, strlen(CACHE_MAGIC), out) != strlen(CACHE_MAGIC)) + continue; + if (fwrite(&wordoffset[0], sizeof(wordoffset[0]), wordoffset.size(), out) != wordoffset.size()) + continue; + fclose(out); + if (verbose) { + printf("save to cache %s\n", url.c_str()); + } + return true; + } + return false; +} + +bool OffsetIndex::load(const std::string &url, gulong wc, gulong fsize, bool verbose) +{ + wordcount = wc; + gulong npages = (wc - 1) / ENTR_PER_PAGE + 2; + wordoffset.resize(npages); + if (!load_cache(url)) { //map file will close after finish of block + MapFile map_file; + if (!map_file.open(url.c_str(), fsize)) + return false; + const gchar *idxdatabuffer = map_file.begin(); + + const gchar *p1 = idxdatabuffer; + gulong index_size; + guint32 j = 0; + for (guint32 i = 0; i < wc; i++) { + index_size = strlen(p1) + 1 + 2 * sizeof(guint32); + if (i % ENTR_PER_PAGE == 0) { + wordoffset[j] = p1 - idxdatabuffer; + ++j; + } + p1 += index_size; + } + wordoffset[j] = p1 - idxdatabuffer; + if (!save_cache(url, verbose)) + fprintf(stderr, "cache update failed\n"); + } + + if (!(idxfile = fopen(url.c_str(), "rb"))) { + wordoffset.resize(0); + return false; + } + + first.assign(0, read_first_on_page_key(0)); + last.assign(wordoffset.size() - 2, read_first_on_page_key(wordoffset.size() - 2)); + middle.assign((wordoffset.size() - 2) / 2, read_first_on_page_key((wordoffset.size() - 2) / 2)); + real_last.assign(wc - 1, get_key(wc - 1)); + + return true; +} + +inline gulong OffsetIndex::load_page(glong page_idx) +{ + gulong nentr = ENTR_PER_PAGE; + if (page_idx == glong(wordoffset.size() - 2)) + if ((nentr = (wordcount % ENTR_PER_PAGE)) == 0) + nentr = ENTR_PER_PAGE; + + if (page_idx != page.idx) { + page_data.resize(wordoffset[page_idx + 1] - wordoffset[page_idx]); + fseek(idxfile, wordoffset[page_idx], SEEK_SET); + const size_t nitems = fread(&page_data[0], 1, page_data.size(), idxfile); + THROW_IF_ERROR(nitems == page_data.size()); + + page.fill(&page_data[0], nentr, page_idx); + } + + return nentr; +} + +const gchar *OffsetIndex::get_key(glong idx) +{ + load_page(idx / ENTR_PER_PAGE); + glong idx_in_page = idx % ENTR_PER_PAGE; + wordentry_offset = page.entries[idx_in_page].off; + wordentry_size = page.entries[idx_in_page].size; + + return page.entries[idx_in_page].keystr; +} + +bool OffsetIndex::lookup(const char *str, glong &idx) +{ + bool bFound = false; + glong iFrom; + glong iTo = wordoffset.size() - 2; + gint cmpint; + glong iThisIndex; + if (stardict_strcmp(str, first.keystr.c_str()) < 0) { + idx = 0; + return false; + } else if (stardict_strcmp(str, real_last.keystr.c_str()) > 0) { + idx = INVALID_INDEX; + return false; + } else { + iFrom = 0; + iThisIndex = 0; + while (iFrom <= iTo) { + iThisIndex = (iFrom + iTo) / 2; + cmpint = stardict_strcmp(str, get_first_on_page_key(iThisIndex)); + if (cmpint > 0) + iFrom = iThisIndex + 1; + else if (cmpint < 0) + iTo = iThisIndex - 1; + else { + bFound = true; + break; + } + } + if (!bFound) + idx = iTo; //prev + else + idx = iThisIndex; + } + if (!bFound) { + gulong netr = load_page(idx); + iFrom = 1; // Needn't search the first word anymore. + iTo = netr - 1; + iThisIndex = 0; + while (iFrom <= iTo) { + iThisIndex = (iFrom + iTo) / 2; + cmpint = stardict_strcmp(str, page.entries[iThisIndex].keystr); + if (cmpint > 0) + iFrom = iThisIndex + 1; + else if (cmpint < 0) + iTo = iThisIndex - 1; + else { + bFound = true; + break; + } + } + idx *= ENTR_PER_PAGE; + if (!bFound) + idx += iFrom; //next + else + idx += iThisIndex; + } else { + idx *= ENTR_PER_PAGE; + } + return bFound; +} + +bool WordListIndex::load(const std::string &url, gulong wc, gulong fsize, bool verbose) +{ + gzFile in = gzopen(url.c_str(), "rb"); + if (in == nullptr) + return false; + + idxdatabuf = (gchar *)g_malloc(fsize); + + const int len = gzread(in, idxdatabuf, fsize); + gzclose(in); + if (len < 0) + return false; + + if (gulong(len) != fsize) + return false; + + wordlist.resize(wc + 1); + gchar *p1 = idxdatabuf; + guint32 i; + for (i = 0; i < wc; i++) { + wordlist[i] = p1; + p1 += strlen(p1) + 1 + 2 * sizeof(guint32); + } + wordlist[wc] = p1; + + return true; +} + +void WordListIndex::get_data(glong idx) +{ + gchar *p1 = wordlist[idx] + strlen(wordlist[idx]) + sizeof(gchar); + wordentry_offset = g_ntohl(get_uint32(p1)); + p1 += sizeof(guint32); + wordentry_size = g_ntohl(get_uint32(p1)); +} + +bool WordListIndex::lookup(const char *str, glong &idx) +{ + bool bFound = false; + glong iTo = wordlist.size() - 2; + + if (stardict_strcmp(str, get_key(0)) < 0) { + idx = 0; + } else if (stardict_strcmp(str, get_key(iTo)) > 0) { + idx = INVALID_INDEX; + } else { + glong iThisIndex = 0; + glong iFrom = 0; + gint cmpint; + while (iFrom <= iTo) { + iThisIndex = (iFrom + iTo) / 2; + cmpint = stardict_strcmp(str, get_key(iThisIndex)); + if (cmpint > 0) + iFrom = iThisIndex + 1; + else if (cmpint < 0) + iTo = iThisIndex - 1; + else { + bFound = true; + break; + } + } + if (!bFound) + idx = iFrom; //next + else + idx = iThisIndex; + } + return bFound; +} +} + +bool SynFile::load(const std::string &url, gulong wc) +{ + struct stat stat_buf; + if (!stat(url.c_str(), &stat_buf)) { + MapFile syn; + if (!syn.open(url.c_str(), stat_buf.st_size)) + return false; + const gchar *current = syn.begin(); + for (unsigned long i = 0; i < wc; i++) { + // each entry in a syn-file is: + // - 0-terminated string + // 4-byte index into .dict file in network byte order + glib::CharStr lower_string{ g_utf8_casefold(current, -1) }; + std::string synonym{ get_impl(lower_string) }; + current += synonym.length() + 1; + const guint32 idx = g_ntohl(get_uint32(current)); + current += sizeof(idx); + synonyms[synonym] = idx; + } + return true; + } else { + return false; + } +} + +bool SynFile::lookup(const char *str, glong &idx) +{ + glib::CharStr lower_string{ g_utf8_casefold(str, -1) }; + auto it = synonyms.find(get_impl(lower_string)); + if (it != synonyms.end()) { + idx = it->second; + return true; + } + return false; +} + +bool Dict::Lookup(const char *str, glong &idx) +{ + return syn_file->lookup(str, idx) || idx_file->lookup(str, idx); +} + +bool Dict::load(const std::string &ifofilename, bool verbose) +{ + gulong idxfilesize; + if (!load_ifofile(ifofilename, idxfilesize)) + return false; + + std::string fullfilename(ifofilename); + fullfilename.replace(fullfilename.length() - sizeof("ifo") + 1, sizeof("ifo") - 1, "dict.dz"); + + if (g_file_test(fullfilename.c_str(), G_FILE_TEST_EXISTS)) { + dictdzfile.reset(new DictData); + if (!dictdzfile->open(fullfilename, 0)) { + //g_print("open file %s failed!\n",fullfilename); + return false; + } + } else { + fullfilename.erase(fullfilename.length() - sizeof(".dz") + 1, sizeof(".dz") - 1); + dictfile = fopen(fullfilename.c_str(), "rb"); + if (!dictfile) { + //g_print("open file %s failed!\n",fullfilename); + return false; + } + } + + fullfilename = ifofilename; + fullfilename.replace(fullfilename.length() - sizeof("ifo") + 1, sizeof("ifo") - 1, "idx.gz"); + + if (g_file_test(fullfilename.c_str(), G_FILE_TEST_EXISTS)) { + idx_file.reset(new WordListIndex); + } else { + fullfilename.erase(fullfilename.length() - sizeof(".gz") + 1, sizeof(".gz") - 1); + idx_file.reset(new OffsetIndex); + } + + if (!idx_file->load(fullfilename, wordcount, idxfilesize, verbose)) + return false; + + fullfilename = ifofilename; + fullfilename.replace(fullfilename.length() - sizeof("ifo") + 1, sizeof("ifo") - 1, "syn"); + syn_file.reset(new SynFile); + syn_file->load(fullfilename, syn_wordcount); + + //g_print("bookname: %s , wordcount %lu\n", bookname.c_str(), narticles()); + return true; +} + +bool Dict::load_ifofile(const std::string &ifofilename, gulong &idxfilesize) +{ + DictInfo dict_info; + if (!dict_info.load_from_ifo_file(ifofilename, false)) + return false; + if (dict_info.wordcount == 0) + return false; + + ifo_file_name = dict_info.ifo_file_name; + wordcount = dict_info.wordcount; + syn_wordcount = dict_info.syn_wordcount; + bookname = dict_info.bookname; + + idxfilesize = dict_info.index_file_size; + + sametypesequence = dict_info.sametypesequence; + + return true; +} + +bool Dict::LookupWithRule(GPatternSpec *pspec, glong *aIndex, int iBuffLen) +{ + int iIndexCount = 0; + + for (guint32 i = 0; i < narticles() && iIndexCount < (iBuffLen - 1); i++) + if (g_pattern_match_string(pspec, get_key(i))) + aIndex[iIndexCount++] = i; + + aIndex[iIndexCount] = -1; // -1 is the end. + + return iIndexCount > 0; +} + +Libs::~Libs() +{ + for (Dict *p : oLib) + delete p; +} + +void Libs::load_dict(const std::string &url) +{ + Dict *lib = new Dict; + if (lib->load(url, verbose_)) + oLib.push_back(lib); + else + delete lib; +} + +void Libs::load(const std::list<std::string> &dicts_dirs, + const std::list<std::string> &order_list, + const std::list<std::string> &disable_list) +{ + for_each_file(dicts_dirs, ".ifo", order_list, disable_list, + [this](const std::string &url, bool disable) -> void { + if (!disable) + load_dict(url); + }); +} + +const gchar *Libs::poGetCurrentWord(glong *iCurrent) +{ + const gchar *poCurrentWord = nullptr; + const gchar *word; + for (std::vector<Dict *>::size_type iLib = 0; iLib < oLib.size(); iLib++) { + if (iCurrent[iLib] == INVALID_INDEX) + continue; + if (iCurrent[iLib] >= narticles(iLib) || iCurrent[iLib] < 0) + continue; + if (poCurrentWord == nullptr) { + poCurrentWord = poGetWord(iCurrent[iLib], iLib); + } else { + word = poGetWord(iCurrent[iLib], iLib); + + if (stardict_strcmp(poCurrentWord, word) > 0) + poCurrentWord = word; + } + } + return poCurrentWord; +} + +const gchar *Libs::poGetNextWord(const gchar *sWord, glong *iCurrent) +{ + // the input can be: + // (word,iCurrent),read word,write iNext to iCurrent,and return next word. used by TopWin::NextCallback(); + // (nullptr,iCurrent),read iCurrent,write iNext to iCurrent,and return next word. used by AppCore::ListWords(); + const gchar *poCurrentWord = nullptr; + size_t iCurrentLib = 0; + const gchar *word; + + for (size_t iLib = 0; iLib < oLib.size(); ++iLib) { + if (sWord) + oLib[iLib]->Lookup(sWord, iCurrent[iLib]); + if (iCurrent[iLib] == INVALID_INDEX) + continue; + if (iCurrent[iLib] >= narticles(iLib) || iCurrent[iLib] < 0) + continue; + if (poCurrentWord == nullptr) { + poCurrentWord = poGetWord(iCurrent[iLib], iLib); + iCurrentLib = iLib; + } else { + word = poGetWord(iCurrent[iLib], iLib); + + if (stardict_strcmp(poCurrentWord, word) > 0) { + poCurrentWord = word; + iCurrentLib = iLib; + } + } + } + if (poCurrentWord) { + iCurrent[iCurrentLib]++; + for (std::vector<Dict *>::size_type iLib = 0; iLib < oLib.size(); iLib++) { + if (iLib == iCurrentLib) + continue; + if (iCurrent[iLib] == INVALID_INDEX) + continue; + if (iCurrent[iLib] >= narticles(iLib) || iCurrent[iLib] < 0) + continue; + if (strcmp(poCurrentWord, poGetWord(iCurrent[iLib], iLib)) == 0) + iCurrent[iLib]++; + } + poCurrentWord = poGetCurrentWord(iCurrent); + } + return poCurrentWord; +} + +const gchar * +Libs::poGetPreWord(glong *iCurrent) +{ + // used by TopWin::PreviousCallback(); the iCurrent is cached by AppCore::TopWinWordChange(); + const gchar *poCurrentWord = nullptr; + std::vector<Dict *>::size_type iCurrentLib = 0; + const gchar *word; + + for (std::vector<Dict *>::size_type iLib = 0; iLib < oLib.size(); iLib++) { + if (iCurrent[iLib] == INVALID_INDEX) + iCurrent[iLib] = narticles(iLib); + else { + if (iCurrent[iLib] > narticles(iLib) || iCurrent[iLib] <= 0) + continue; + } + if (poCurrentWord == nullptr) { + poCurrentWord = poGetWord(iCurrent[iLib] - 1, iLib); + iCurrentLib = iLib; + } else { + word = poGetWord(iCurrent[iLib] - 1, iLib); + if (stardict_strcmp(poCurrentWord, word) < 0) { + poCurrentWord = word; + iCurrentLib = iLib; + } + } + } + + if (poCurrentWord) { + iCurrent[iCurrentLib]--; + for (std::vector<Dict *>::size_type iLib = 0; iLib < oLib.size(); iLib++) { + if (iLib == iCurrentLib) + continue; + if (iCurrent[iLib] > narticles(iLib) || iCurrent[iLib] <= 0) + continue; + if (strcmp(poCurrentWord, poGetWord(iCurrent[iLib] - 1, iLib)) == 0) { + iCurrent[iLib]--; + } else { + if (iCurrent[iLib] == narticles(iLib)) + iCurrent[iLib] = INVALID_INDEX; + } + } + } + return poCurrentWord; +} + +bool Libs::LookupSimilarWord(const gchar *sWord, glong &iWordIndex, int iLib) +{ + glong iIndex; + bool bFound = false; + gchar *casestr; + + if (!bFound) { + // to lower case. + casestr = g_utf8_strdown(sWord, -1); + if (strcmp(casestr, sWord)) { + if (oLib[iLib]->Lookup(casestr, iIndex)) + bFound = true; + } + g_free(casestr); + // to upper case. + if (!bFound) { + casestr = g_utf8_strup(sWord, -1); + if (strcmp(casestr, sWord)) { + if (oLib[iLib]->Lookup(casestr, iIndex)) + bFound = true; + } + g_free(casestr); + } + // Upper the first character and lower others. + if (!bFound) { + gchar *nextchar = g_utf8_next_char(sWord); + gchar *firstchar = g_utf8_strup(sWord, nextchar - sWord); + nextchar = g_utf8_strdown(nextchar, -1); + casestr = g_strdup_printf("%s%s", firstchar, nextchar); + g_free(firstchar); + g_free(nextchar); + if (strcmp(casestr, sWord)) { + if (oLib[iLib]->Lookup(casestr, iIndex)) + bFound = true; + } + g_free(casestr); + } + } + + if (bIsPureEnglish(sWord)) { + // If not Found , try other status of sWord. + int iWordLen = strlen(sWord); + bool isupcase; + + gchar *sNewWord = (gchar *)g_malloc(iWordLen + 1); + + //cut one char "s" or "d" + if (!bFound && iWordLen > 1) { + isupcase = sWord[iWordLen - 1] == 'S' || !strncmp(&sWord[iWordLen - 2], "ED", 2); + if (isupcase || sWord[iWordLen - 1] == 's' || !strncmp(&sWord[iWordLen - 2], "ed", 2)) { + strcpy(sNewWord, sWord); + sNewWord[iWordLen - 1] = '\0'; // cut "s" or "d" + if (oLib[iLib]->Lookup(sNewWord, iIndex)) + bFound = true; + else if (isupcase || g_ascii_isupper(sWord[0])) { + casestr = g_ascii_strdown(sNewWord, -1); + if (strcmp(casestr, sNewWord)) { + if (oLib[iLib]->Lookup(casestr, iIndex)) + bFound = true; + } + g_free(casestr); + } + } + } + + //cut "ly" + if (!bFound && iWordLen > 2) { + isupcase = !strncmp(&sWord[iWordLen - 2], "LY", 2); + if (isupcase || (!strncmp(&sWord[iWordLen - 2], "ly", 2))) { + strcpy(sNewWord, sWord); + sNewWord[iWordLen - 2] = '\0'; // cut "ly" + if (iWordLen > 5 && sNewWord[iWordLen - 3] == sNewWord[iWordLen - 4] + && !bIsVowel(sNewWord[iWordLen - 4]) && bIsVowel(sNewWord[iWordLen - 5])) { //doubled + + sNewWord[iWordLen - 3] = '\0'; + if (oLib[iLib]->Lookup(sNewWord, iIndex)) + bFound = true; + else { + if (isupcase || g_ascii_isupper(sWord[0])) { + casestr = g_ascii_strdown(sNewWord, -1); + if (strcmp(casestr, sNewWord)) { + if (oLib[iLib]->Lookup(casestr, iIndex)) + bFound = true; + } + g_free(casestr); + } + if (!bFound) + sNewWord[iWordLen - 3] = sNewWord[iWordLen - 4]; //restore + } + } + if (!bFound) { + if (oLib[iLib]->Lookup(sNewWord, iIndex)) + bFound = true; + else if (isupcase || g_ascii_isupper(sWord[0])) { + casestr = g_ascii_strdown(sNewWord, -1); + if (strcmp(casestr, sNewWord)) { + if (oLib[iLib]->Lookup(casestr, iIndex)) + bFound = true; + } + g_free(casestr); + } + } + } + } + + //cut "ing" + if (!bFound && iWordLen > 3) { + isupcase = !strncmp(&sWord[iWordLen - 3], "ING", 3); + if (isupcase || !strncmp(&sWord[iWordLen - 3], "ing", 3)) { + strcpy(sNewWord, sWord); + sNewWord[iWordLen - 3] = '\0'; + if (iWordLen > 6 && (sNewWord[iWordLen - 4] == sNewWord[iWordLen - 5]) + && !bIsVowel(sNewWord[iWordLen - 5]) && bIsVowel(sNewWord[iWordLen - 6])) { //doubled + sNewWord[iWordLen - 4] = '\0'; + if (oLib[iLib]->Lookup(sNewWord, iIndex)) + bFound = true; + else { + if (isupcase || g_ascii_isupper(sWord[0])) { + casestr = g_ascii_strdown(sNewWord, -1); + if (strcmp(casestr, sNewWord)) { + if (oLib[iLib]->Lookup(casestr, iIndex)) + bFound = true; + } + g_free(casestr); + } + if (!bFound) + sNewWord[iWordLen - 4] = sNewWord[iWordLen - 5]; //restore + } + } + if (!bFound) { + if (oLib[iLib]->Lookup(sNewWord, iIndex)) + bFound = true; + else if (isupcase || g_ascii_isupper(sWord[0])) { + casestr = g_ascii_strdown(sNewWord, -1); + if (strcmp(casestr, sNewWord)) { + if (oLib[iLib]->Lookup(casestr, iIndex)) + bFound = true; + } + g_free(casestr); + } + } + if (!bFound) { + if (isupcase) + strcat(sNewWord, "E"); // add a char "E" + else + strcat(sNewWord, "e"); // add a char "e" + if (oLib[iLib]->Lookup(sNewWord, iIndex)) + bFound = true; + else if (isupcase || g_ascii_isupper(sWord[0])) { + casestr = g_ascii_strdown(sNewWord, -1); + if (strcmp(casestr, sNewWord)) { + if (oLib[iLib]->Lookup(casestr, iIndex)) + bFound = true; + } + g_free(casestr); + } + } + } + } + + //cut two char "es" + if (!bFound && iWordLen > 3) { + isupcase = (!strncmp(&sWord[iWordLen - 2], "ES", 2) && (sWord[iWordLen - 3] == 'S' || sWord[iWordLen - 3] == 'X' || sWord[iWordLen - 3] == 'O' || (iWordLen > 4 && sWord[iWordLen - 3] == 'H' && (sWord[iWordLen - 4] == 'C' || sWord[iWordLen - 4] == 'S')))); + if (isupcase || (!strncmp(&sWord[iWordLen - 2], "es", 2) && (sWord[iWordLen - 3] == 's' || sWord[iWordLen - 3] == 'x' || sWord[iWordLen - 3] == 'o' || (iWordLen > 4 && sWord[iWordLen - 3] == 'h' && (sWord[iWordLen - 4] == 'c' || sWord[iWordLen - 4] == 's'))))) { + strcpy(sNewWord, sWord); + sNewWord[iWordLen - 2] = '\0'; + if (oLib[iLib]->Lookup(sNewWord, iIndex)) + bFound = true; + else if (isupcase || g_ascii_isupper(sWord[0])) { + casestr = g_ascii_strdown(sNewWord, -1); + if (strcmp(casestr, sNewWord)) { + if (oLib[iLib]->Lookup(casestr, iIndex)) + bFound = true; + } + g_free(casestr); + } + } + } + + //cut "ed" + if (!bFound && iWordLen > 3) { + isupcase = !strncmp(&sWord[iWordLen - 2], "ED", 2); + if (isupcase || !strncmp(&sWord[iWordLen - 2], "ed", 2)) { + strcpy(sNewWord, sWord); + sNewWord[iWordLen - 2] = '\0'; + if (iWordLen > 5 && (sNewWord[iWordLen - 3] == sNewWord[iWordLen - 4]) + && !bIsVowel(sNewWord[iWordLen - 4]) && bIsVowel(sNewWord[iWordLen - 5])) { //doubled + sNewWord[iWordLen - 3] = '\0'; + if (oLib[iLib]->Lookup(sNewWord, iIndex)) + bFound = true; + else { + if (isupcase || g_ascii_isupper(sWord[0])) { + casestr = g_ascii_strdown(sNewWord, -1); + if (strcmp(casestr, sNewWord)) { + if (oLib[iLib]->Lookup(casestr, iIndex)) + bFound = true; + } + g_free(casestr); + } + if (!bFound) + sNewWord[iWordLen - 3] = sNewWord[iWordLen - 4]; //restore + } + } + if (!bFound) { + if (oLib[iLib]->Lookup(sNewWord, iIndex)) + bFound = true; + else if (isupcase || g_ascii_isupper(sWord[0])) { + casestr = g_ascii_strdown(sNewWord, -1); + if (strcmp(casestr, sNewWord)) { + if (oLib[iLib]->Lookup(casestr, iIndex)) + bFound = true; + } + g_free(casestr); + } + } + } + } + + // cut "ied" , add "y". + if (!bFound && iWordLen > 3) { + isupcase = !strncmp(&sWord[iWordLen - 3], "IED", 3); + if (isupcase || (!strncmp(&sWord[iWordLen - 3], "ied", 3))) { + strcpy(sNewWord, sWord); + sNewWord[iWordLen - 3] = '\0'; + if (isupcase) + strcat(sNewWord, "Y"); // add a char "Y" + else + strcat(sNewWord, "y"); // add a char "y" + if (oLib[iLib]->Lookup(sNewWord, iIndex)) + bFound = true; + else if (isupcase || g_ascii_isupper(sWord[0])) { + casestr = g_ascii_strdown(sNewWord, -1); + if (strcmp(casestr, sNewWord)) { + if (oLib[iLib]->Lookup(casestr, iIndex)) + bFound = true; + } + g_free(casestr); + } + } + } + + // cut "ies" , add "y". + if (!bFound && iWordLen > 3) { + isupcase = !strncmp(&sWord[iWordLen - 3], "IES", 3); + if (isupcase || (!strncmp(&sWord[iWordLen - 3], "ies", 3))) { + strcpy(sNewWord, sWord); + sNewWord[iWordLen - 3] = '\0'; + if (isupcase) + strcat(sNewWord, "Y"); // add a char "Y" + else + strcat(sNewWord, "y"); // add a char "y" + if (oLib[iLib]->Lookup(sNewWord, iIndex)) + bFound = true; + else if (isupcase || g_ascii_isupper(sWord[0])) { + casestr = g_ascii_strdown(sNewWord, -1); + if (strcmp(casestr, sNewWord)) { + if (oLib[iLib]->Lookup(casestr, iIndex)) + bFound = true; + } + g_free(casestr); + } + } + } + + // cut "er". + if (!bFound && iWordLen > 2) { + isupcase = !strncmp(&sWord[iWordLen - 2], "ER", 2); + if (isupcase || (!strncmp(&sWord[iWordLen - 2], "er", 2))) { + strcpy(sNewWord, sWord); + sNewWord[iWordLen - 2] = '\0'; + if (oLib[iLib]->Lookup(sNewWord, iIndex)) + bFound = true; + else if (isupcase || g_ascii_isupper(sWord[0])) { + casestr = g_ascii_strdown(sNewWord, -1); + if (strcmp(casestr, sNewWord)) { + if (oLib[iLib]->Lookup(casestr, iIndex)) + bFound = true; + } + g_free(casestr); + } + } + } + + // cut "est". + if (!bFound && iWordLen > 3) { + isupcase = !strncmp(&sWord[iWordLen - 3], "EST", 3); + if (isupcase || (!strncmp(&sWord[iWordLen - 3], "est", 3))) { + strcpy(sNewWord, sWord); + sNewWord[iWordLen - 3] = '\0'; + if (oLib[iLib]->Lookup(sNewWord, iIndex)) + bFound = true; + else if (isupcase || g_ascii_isupper(sWord[0])) { + casestr = g_ascii_strdown(sNewWord, -1); + if (strcmp(casestr, sNewWord)) { + if (oLib[iLib]->Lookup(casestr, iIndex)) + bFound = true; + } + g_free(casestr); + } + } + } + + g_free(sNewWord); + } + + if (bFound) + iWordIndex = iIndex; +#if 0 + else { + //don't change iWordIndex here. + //when LookupSimilarWord all failed too, we want to use the old LookupWord index to list words. + //iWordIndex = INVALID_INDEX; + } +#endif + return bFound; +} + +bool Libs::SimpleLookupWord(const gchar *sWord, glong &iWordIndex, int iLib) +{ + bool bFound = oLib[iLib]->Lookup(sWord, iWordIndex); + if (!bFound && fuzzy_) + bFound = LookupSimilarWord(sWord, iWordIndex, iLib); + return bFound; +} + +bool Libs::LookupWithFuzzy(const gchar *sWord, gchar *reslist[], gint reslist_size) +{ + if (sWord[0] == '\0') + return false; + + Fuzzystruct oFuzzystruct[reslist_size]; + + for (int i = 0; i < reslist_size; i++) { + oFuzzystruct[i].pMatchWord = nullptr; + oFuzzystruct[i].iMatchWordDistance = iMaxFuzzyDistance; + } + int iMaxDistance = iMaxFuzzyDistance; + int iDistance; + bool Found = false; + EditDistance oEditDistance; + + glong iCheckWordLen; + const char *sCheck; + gunichar *ucs4_str1, *ucs4_str2; + glong ucs4_str2_len; + + ucs4_str2 = g_utf8_to_ucs4_fast(sWord, -1, &ucs4_str2_len); + unicode_strdown(ucs4_str2); + + for (size_t iLib = 0; iLib < oLib.size(); ++iLib) { + if (progress_func) + progress_func(); + + //if (stardict_strcmp(sWord, poGetWord(0,iLib))>=0 && stardict_strcmp(sWord, poGetWord(narticles(iLib)-1,iLib))<=0) { + //there are Chinese dicts and English dicts... + + const int iwords = narticles(iLib); + for (int index = 0; index < iwords; index++) { + sCheck = poGetWord(index, iLib); + // tolower and skip too long or too short words + iCheckWordLen = g_utf8_strlen(sCheck, -1); + if (iCheckWordLen - ucs4_str2_len >= iMaxDistance || ucs4_str2_len - iCheckWordLen >= iMaxDistance) + continue; + ucs4_str1 = g_utf8_to_ucs4_fast(sCheck, -1, nullptr); + if (iCheckWordLen > ucs4_str2_len) + ucs4_str1[ucs4_str2_len] = 0; + unicode_strdown(ucs4_str1); + + iDistance = oEditDistance.CalEditDistance(ucs4_str1, ucs4_str2, iMaxDistance); + g_free(ucs4_str1); + if (iDistance < iMaxDistance && iDistance < ucs4_str2_len) { + // when ucs4_str2_len=1,2 we need less fuzzy. + Found = true; + bool bAlreadyInList = false; + int iMaxDistanceAt = 0; + for (int j = 0; j < reslist_size; j++) { + if (oFuzzystruct[j].pMatchWord && strcmp(oFuzzystruct[j].pMatchWord, sCheck) == 0) { //already in list + bAlreadyInList = true; + break; + } + //find the position,it will certainly be found (include the first time) as iMaxDistance is set by last time. + if (oFuzzystruct[j].iMatchWordDistance == iMaxDistance) { + iMaxDistanceAt = j; + } + } + if (!bAlreadyInList) { + if (oFuzzystruct[iMaxDistanceAt].pMatchWord) + g_free(oFuzzystruct[iMaxDistanceAt].pMatchWord); + oFuzzystruct[iMaxDistanceAt].pMatchWord = g_strdup(sCheck); + oFuzzystruct[iMaxDistanceAt].iMatchWordDistance = iDistance; + // calc new iMaxDistance + iMaxDistance = iDistance; + for (int j = 0; j < reslist_size; j++) { + if (oFuzzystruct[j].iMatchWordDistance > iMaxDistance) + iMaxDistance = oFuzzystruct[j].iMatchWordDistance; + } // calc new iMaxDistance + } // add to list + } // find one + } // each word + + } // each lib + g_free(ucs4_str2); + + if (Found) // sort with distance + std::sort(oFuzzystruct, oFuzzystruct + reslist_size, [](const Fuzzystruct &lh, const Fuzzystruct &rh) -> bool { + if (lh.iMatchWordDistance != rh.iMatchWordDistance) + return lh.iMatchWordDistance < rh.iMatchWordDistance; + + if (lh.pMatchWord && rh.pMatchWord) + return stardict_strcmp(lh.pMatchWord, rh.pMatchWord) < 0; + + return false; + }); + + for (gint i = 0; i < reslist_size; ++i) + reslist[i] = oFuzzystruct[i].pMatchWord; + + return Found; +} + +gint Libs::LookupWithRule(const gchar *word, gchar **ppMatchWord) +{ + glong aiIndex[MAX_MATCH_ITEM_PER_LIB + 1]; + gint iMatchCount = 0; + GPatternSpec *pspec = g_pattern_spec_new(word); + + for (std::vector<Dict *>::size_type iLib = 0; iLib < oLib.size(); iLib++) { + //if(oLibs.LookdupWordsWithRule(pspec,aiIndex,MAX_MATCH_ITEM_PER_LIB+1-iMatchCount,iLib)) + // -iMatchCount,so save time,but may got less result and the word may repeat. + + if (oLib[iLib]->LookupWithRule(pspec, aiIndex, MAX_MATCH_ITEM_PER_LIB + 1)) { + if (progress_func) + progress_func(); + for (int i = 0; aiIndex[i] != -1; i++) { + const gchar *sMatchWord = poGetWord(aiIndex[i], iLib); + bool bAlreadyInList = false; + for (int j = 0; j < iMatchCount; j++) { + if (strcmp(ppMatchWord[j], sMatchWord) == 0) { //already in list + bAlreadyInList = true; + break; + } + } + if (!bAlreadyInList) + ppMatchWord[iMatchCount++] = g_strdup(sMatchWord); + } + } + } + g_pattern_spec_free(pspec); + + if (iMatchCount) // sort it. + std::sort(ppMatchWord, ppMatchWord + iMatchCount, [](const char *lh, const char *rh) -> bool { + return stardict_strcmp(lh, rh) < 0; + }); + + return iMatchCount; +} + +bool Libs::LookupData(const gchar *sWord, std::vector<gchar *> *reslist) +{ + std::vector<std::string> SearchWords; + std::string SearchWord; + const char *p = sWord; + while (*p) { + if (*p == '\\') { + p++; + switch (*p) { + case ' ': + SearchWord += ' '; + break; + case '\\': + SearchWord += '\\'; + break; + case 't': + SearchWord += '\t'; + break; + case 'n': + SearchWord += '\n'; + break; + default: + SearchWord += *p; + } + } else if (*p == ' ') { + if (!SearchWord.empty()) { + SearchWords.push_back(SearchWord); + SearchWord.clear(); + } + } else { + SearchWord += *p; + } + p++; + } + if (!SearchWord.empty()) { + SearchWords.push_back(SearchWord); + SearchWord.clear(); + } + if (SearchWords.empty()) + return false; + + guint32 max_size = 0; + gchar *origin_data = nullptr; + for (std::vector<Dict *>::size_type i = 0; i < oLib.size(); ++i) { + if (!oLib[i]->containSearchData()) + continue; + if (progress_func) + progress_func(); + const gulong iwords = narticles(i); + const gchar *key; + guint32 offset, size; + for (gulong j = 0; j < iwords; ++j) { + oLib[i]->get_key_and_data(j, &key, &offset, &size); + if (size > max_size) { + origin_data = (gchar *)g_realloc(origin_data, size); + max_size = size; + } + if (oLib[i]->SearchData(SearchWords, offset, size, origin_data)) + reslist[i].push_back(g_strdup(key)); + } + } + g_free(origin_data); + + std::vector<Dict *>::size_type i; + for (i = 0; i < oLib.size(); ++i) + if (!reslist[i].empty()) + break; + + return i != oLib.size(); +} + +/**************************************************/ +query_t analyze_query(const char *s, std::string &res) +{ + if (!s || !*s) { + res = ""; + return qtSIMPLE; + } + if (*s == '/') { + res = s + 1; + return qtFUZZY; + } + + if (*s == '|') { + res = s + 1; + return qtDATA; + } + + bool regexp = false; + const char *p = s; + res = ""; + for (; *p; res += *p, ++p) { + if (*p == '\\') { + ++p; + if (!*p) + break; + continue; + } + if (*p == '*' || *p == '?') + regexp = true; + } + if (regexp) + return qtREGEXP; + + return qtSIMPLE; +} diff --git a/src/stardict_lib.hpp b/src/stardict_lib.hpp new file mode 100644 index 0000000..a629cbe --- /dev/null +++ b/src/stardict_lib.hpp @@ -0,0 +1,215 @@ +#pragma once + +#include <cstdio> +#include <cstring> +#include <functional> +#include <list> +#include <map> +#include <memory> +#include <string> +#include <vector> + +#include "dictziplib.hpp" + +const int MAX_MATCH_ITEM_PER_LIB = 100; +const int MAX_FUZZY_DISTANCE = 3; // at most MAX_FUZZY_DISTANCE-1 differences allowed when find similar words + +inline guint32 get_uint32(const gchar *addr) +{ + guint32 result; + memcpy(&result, addr, sizeof(guint32)); + return result; +} + +inline void set_uint32(gchar *addr, guint32 val) +{ + memcpy(addr, &val, sizeof(guint32)); +} + +struct cacheItem { + guint32 offset; + gchar *data; + //write code here to make it inline + cacheItem() { data = nullptr; } + ~cacheItem() { g_free(data); } +}; + +const int WORDDATA_CACHE_NUM = 10; +const int INVALID_INDEX = -100; + +class DictBase +{ +public: + DictBase() {} + ~DictBase() + { + if (dictfile) + fclose(dictfile); + } + DictBase(const DictBase &) = delete; + DictBase &operator=(const DictBase &) = delete; + gchar *GetWordData(guint32 idxitem_offset, guint32 idxitem_size); + bool containSearchData() const + { + if (sametypesequence.empty()) + return true; + return sametypesequence.find_first_of("mlgxty") != std::string::npos; + } + bool SearchData(std::vector<std::string> &SearchWords, guint32 idxitem_offset, guint32 idxitem_size, gchar *origin_data); + +protected: + std::string sametypesequence; + FILE *dictfile = nullptr; + std::unique_ptr<DictData> dictdzfile; + +private: + cacheItem cache[WORDDATA_CACHE_NUM]; + gint cache_cur = 0; +}; + +//this structure contain all information about dictionary +struct DictInfo { + std::string ifo_file_name; + guint32 wordcount; + guint32 syn_wordcount; + std::string bookname; + std::string author; + std::string email; + std::string website; + std::string date; + std::string description; + guint32 index_file_size; + guint32 syn_file_size; + std::string sametypesequence; + + bool load_from_ifo_file(const std::string &ifofilename, bool istreedict); +}; + +class IIndexFile +{ +public: + guint32 wordentry_offset; + guint32 wordentry_size; + + virtual ~IIndexFile() {} + virtual bool load(const std::string &url, gulong wc, gulong fsize, bool verbose) = 0; + virtual const gchar *get_key(glong idx) = 0; + virtual void get_data(glong idx) = 0; + virtual const gchar *get_key_and_data(glong idx) = 0; + virtual bool lookup(const char *str, glong &idx) = 0; +}; + +class SynFile +{ +public: + bool load(const std::string &url, gulong wc); + bool lookup(const char *str, glong &idx); + +private: + std::map<std::string, gulong> synonyms; +}; + +class Dict : public DictBase +{ +public: + Dict() {} + Dict(const Dict &) = delete; + Dict &operator=(const Dict &) = delete; + bool load(const std::string &ifofilename, bool verbose); + + gulong narticles() const { return wordcount; } + const std::string &dict_name() const { return bookname; } + const std::string &ifofilename() const { return ifo_file_name; } + + const gchar *get_key(glong index) { return idx_file->get_key(index); } + gchar *get_data(glong index) + { + idx_file->get_data(index); + return DictBase::GetWordData(idx_file->wordentry_offset, idx_file->wordentry_size); + } + void get_key_and_data(glong index, const gchar **key, guint32 *offset, guint32 *size) + { + *key = idx_file->get_key_and_data(index); + *offset = idx_file->wordentry_offset; + *size = idx_file->wordentry_size; + } + bool Lookup(const char *str, glong &idx); + + bool LookupWithRule(GPatternSpec *pspec, glong *aIndex, int iBuffLen); + +private: + std::string ifo_file_name; + gulong wordcount; + gulong syn_wordcount; + std::string bookname; + + std::unique_ptr<IIndexFile> idx_file; + std::unique_ptr<SynFile> syn_file; + + bool load_ifofile(const std::string &ifofilename, gulong &idxfilesize); +}; + +class Libs +{ +public: + Libs(std::function<void(void)> f = std::function<void(void)>()) + { + progress_func = f; + iMaxFuzzyDistance = MAX_FUZZY_DISTANCE; //need to read from cfg. + } + void setVerbose(bool verbose) { verbose_ = verbose; } + void setFuzzy(bool fuzzy) { fuzzy_ = fuzzy; } + ~Libs(); + Libs(const Libs &) = delete; + Libs &operator=(const Libs &) = delete; + + void load_dict(const std::string &url); + void load(const std::list<std::string> &dicts_dirs, + const std::list<std::string> &order_list, + const std::list<std::string> &disable_list); + glong narticles(int idict) const { return oLib[idict]->narticles(); } + const std::string &dict_name(int idict) const { return oLib[idict]->dict_name(); } + gint ndicts() const { return oLib.size(); } + + const gchar *poGetWord(glong iIndex, int iLib) + { + return oLib[iLib]->get_key(iIndex); + } + gchar *poGetWordData(glong iIndex, int iLib) + { + if (iIndex == INVALID_INDEX) + return nullptr; + return oLib[iLib]->get_data(iIndex); + } + const gchar *poGetCurrentWord(glong *iCurrent); + const gchar *poGetNextWord(const gchar *word, glong *iCurrent); + const gchar *poGetPreWord(glong *iCurrent); + bool LookupWord(const gchar *sWord, glong &iWordIndex, int iLib) + { + return oLib[iLib]->Lookup(sWord, iWordIndex); + } + bool LookupSimilarWord(const gchar *sWord, glong &iWordIndex, int iLib); + bool SimpleLookupWord(const gchar *sWord, glong &iWordIndex, int iLib); + + bool LookupWithFuzzy(const gchar *sWord, gchar *reslist[], gint reslist_size); + gint LookupWithRule(const gchar *sWord, gchar *reslist[]); + bool LookupData(const gchar *sWord, std::vector<gchar *> *reslist); + +protected: + bool fuzzy_; + +private: + std::vector<Dict *> oLib; // word Libs. + int iMaxFuzzyDistance; + std::function<void(void)> progress_func; + bool verbose_; +}; + +enum query_t { + qtSIMPLE, + qtREGEXP, + qtFUZZY, + qtDATA +}; + +extern query_t analyze_query(const char *s, std::string &res); diff --git a/src/utils.cpp b/src/utils.cpp new file mode 100644 index 0000000..33bfeaa --- /dev/null +++ b/src/utils.cpp @@ -0,0 +1,131 @@ +/* + * This file part of sdcv - console version of Stardict program + * http://sdcv.sourceforge.net + * Copyright (C) 2005-2006 Evgeniy <dushistov@mail.ru> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Library General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include <algorithm> +#include <cstdio> +#include <cstdlib> +#include <glib.h> +#include <glib/gi18n.h> +#include <iomanip> +#include <sstream> + +#include "utils.hpp" + +std::string utf8_to_locale_ign_err(const std::string &utf8_str) +{ + std::string res; + + const char *charset; + if (g_get_charset(&charset)) + res = utf8_str; + else { + gsize bytes_read, bytes_written; + glib::Error err; + glib::CharStr tmp(g_convert_with_fallback(utf8_str.c_str(), -1, charset, "UTF-8", nullptr, + &bytes_read, &bytes_written, get_addr(err))); + if (nullptr == get_impl(tmp)) { + fprintf(stderr, _("Can not convert %s to current locale.\n"), utf8_str.c_str()); + fprintf(stderr, "%s\n", err->message); + exit(EXIT_FAILURE); + } + res = get_impl(tmp); + } + + return res; +} + +static void __for_each_file(const std::string &dirname, const std::string &suff, + const std::list<std::string> &order_list, const std::list<std::string> &disable_list, + const std::function<void(const std::string &, bool)> &f) +{ + GDir *dir = g_dir_open(dirname.c_str(), 0, nullptr); + if (dir) { + const gchar *filename; + + while ((filename = g_dir_read_name(dir)) != nullptr) { + const std::string fullfilename(dirname + G_DIR_SEPARATOR_S + filename); + if (g_file_test(fullfilename.c_str(), G_FILE_TEST_IS_DIR)) + __for_each_file(fullfilename, suff, order_list, disable_list, f); + else if (g_str_has_suffix(filename, suff.c_str()) && std::find(order_list.begin(), order_list.end(), fullfilename) == order_list.end()) { + const bool disable = std::find(disable_list.begin(), + disable_list.end(), + fullfilename) + != disable_list.end(); + f(fullfilename, disable); + } + } + g_dir_close(dir); + } +} + +void for_each_file(const std::list<std::string> &dirs_list, const std::string &suff, + const std::list<std::string> &order_list, const std::list<std::string> &disable_list, + const std::function<void(const std::string &, bool)> &f) +{ + for (const std::string &item : order_list) { + const bool disable = std::find(disable_list.begin(), disable_list.end(), item) != disable_list.end(); + f(item, disable); + } + for (const std::string &item : dirs_list) + __for_each_file(item, suff, order_list, disable_list, f); +} + +// based on https://stackoverflow.com/questions/7724448/simple-json-string-escape-for-c/33799784#33799784 +std::string json_escape_string(const std::string &s) +{ + std::ostringstream o; + for (auto c = s.cbegin(); c != s.cend(); c++) { + switch (*c) { + case '"': + o << "\\\""; + break; + case '\\': + o << "\\\\"; + break; + case '\b': + o << "\\b"; + break; + case '\f': + o << "\\f"; + break; + case '\n': + o << "\\n"; + break; + case '\r': + o << "\\r"; + break; + case '\t': + o << "\\t"; + break; + default: + if ('\x00' <= *c && *c <= '\x1f') { + o << "\\u" + << std::hex << std::setw(4) << std::setfill('0') << (int)*c; + } else { + o << *c; + } + } + } + return o.str(); +} diff --git a/src/utils.hpp b/src/utils.hpp new file mode 100644 index 0000000..1081fd3 --- /dev/null +++ b/src/utils.hpp @@ -0,0 +1,78 @@ +#pragma once + +#include <cassert> +#include <cstddef> +#include <functional> +#include <glib.h> +#include <list> +#include <string> + +template <typename T, typename unref_res_t, void (*unref_res)(unref_res_t *)> +class ResourceWrapper +{ +public: + ResourceWrapper(T *p = nullptr) + : p_(p) + { + } + ~ResourceWrapper() { free_resource(); } + ResourceWrapper(const ResourceWrapper &) = delete; + ResourceWrapper &operator=(const ResourceWrapper &) = delete; + T *operator->() const { return p_; } + bool operator!() const { return p_ == nullptr; } + const T &operator[](size_t idx) const + { + assert(p_ != nullptr); + return p_[idx]; + } + + void reset(T *newp) + { + if (p_ != newp) { + free_resource(); + p_ = newp; + } + } + + friend inline bool operator==(const ResourceWrapper &lhs, std::nullptr_t) noexcept + { + return !lhs.p_; + } + + friend inline bool operator!=(const ResourceWrapper &lhs, std::nullptr_t) noexcept + { + return !!lhs.p_; + } + + friend inline T *get_impl(const ResourceWrapper &rw) + { + return rw.p_; + } + + friend inline T **get_addr(ResourceWrapper &rw) + { + return &rw.p_; + } + +private: + T *p_; + + void free_resource() + { + if (p_) + unref_res(p_); + } +}; + +namespace glib +{ +typedef ResourceWrapper<gchar, void, g_free> CharStr; +typedef ResourceWrapper<GError, GError, g_error_free> Error; +} + +extern std::string utf8_to_locale_ign_err(const std::string &utf8_str); + +extern void for_each_file(const std::list<std::string> &dirs_list, const std::string &suff, + const std::list<std::string> &order_list, const std::list<std::string> &disable_list, + const std::function<void(const std::string &, bool)> &f); +extern std::string json_escape_string(const std::string &str); diff --git a/tests/rus-eng-stardict-2.4.2/1.xdxf.dict b/tests/rus-eng-stardict-2.4.2/1.xdxf.dict new file mode 100644 index 0000000..125ecd8 --- /dev/null +++ b/tests/rus-eng-stardict-2.4.2/1.xdxf.dict @@ -0,0 +1,2 @@ +<k>человек</k> +man
\ No newline at end of file diff --git a/tests/rus-eng-stardict-2.4.2/1.xdxf.idx b/tests/rus-eng-stardict-2.4.2/1.xdxf.idx Binary files differnew file mode 100644 index 0000000..93df80e --- /dev/null +++ b/tests/rus-eng-stardict-2.4.2/1.xdxf.idx diff --git a/tests/rus-eng-stardict-2.4.2/1.xdxf.idx.oft b/tests/rus-eng-stardict-2.4.2/1.xdxf.idx.oft Binary files differnew file mode 100644 index 0000000..bd5c857 --- /dev/null +++ b/tests/rus-eng-stardict-2.4.2/1.xdxf.idx.oft diff --git a/tests/rus-eng-stardict-2.4.2/1.xdxf.ifo b/tests/rus-eng-stardict-2.4.2/1.xdxf.ifo new file mode 100644 index 0000000..fa86d41 --- /dev/null +++ b/tests/rus-eng-stardict-2.4.2/1.xdxf.ifo @@ -0,0 +1,8 @@ +StarDict's dict ifo file +version=2.4.2 +wordcount=1 +idxfilesize=23 +bookname=Sample 1 test dictionary +date=2016.06.02 +sametypesequence=x +description=Copyright: GNU Public License.; Version: 0.1 diff --git a/tests/stardict-test_dict-2.4.2/test_dict.dict b/tests/stardict-test_dict-2.4.2/test_dict.dict new file mode 100644 index 0000000..5fb702d --- /dev/null +++ b/tests/stardict-test_dict-2.4.2/test_dict.dict @@ -0,0 +1,2 @@ +<k>test</k> +test passed
\ No newline at end of file diff --git a/tests/stardict-test_dict-2.4.2/test_dict.idx b/tests/stardict-test_dict-2.4.2/test_dict.idx Binary files differnew file mode 100644 index 0000000..241fa00 --- /dev/null +++ b/tests/stardict-test_dict-2.4.2/test_dict.idx diff --git a/tests/stardict-test_dict-2.4.2/test_dict.ifo b/tests/stardict-test_dict-2.4.2/test_dict.ifo new file mode 100644 index 0000000..54c03a6 --- /dev/null +++ b/tests/stardict-test_dict-2.4.2/test_dict.ifo @@ -0,0 +1,7 @@ +StarDict's dict ifo file +version=2.4.2 +wordcount=1 +idxfilesize=13 +bookname=test_dict +date=2006.04.24 +sametypesequence=x diff --git a/tests/stardict-test_synonyms-2.4.2/test.dict.dz b/tests/stardict-test_synonyms-2.4.2/test.dict.dz Binary files differnew file mode 100644 index 0000000..fea9f2e --- /dev/null +++ b/tests/stardict-test_synonyms-2.4.2/test.dict.dz diff --git a/tests/stardict-test_synonyms-2.4.2/test.idx b/tests/stardict-test_synonyms-2.4.2/test.idx Binary files differnew file mode 100644 index 0000000..871c01e --- /dev/null +++ b/tests/stardict-test_synonyms-2.4.2/test.idx diff --git a/tests/stardict-test_synonyms-2.4.2/test.ifo b/tests/stardict-test_synonyms-2.4.2/test.ifo new file mode 100644 index 0000000..70f26e1 --- /dev/null +++ b/tests/stardict-test_synonyms-2.4.2/test.ifo @@ -0,0 +1,7 @@ +StarDict's dict ifo file +version=2.4.2 +bookname=Test synonyms +wordcount=2 +synwordcount=2 +idxfilesize=32 +sametypesequence=m diff --git a/tests/stardict-test_synonyms-2.4.2/test.syn b/tests/stardict-test_synonyms-2.4.2/test.syn Binary files differnew file mode 100644 index 0000000..e4c409d --- /dev/null +++ b/tests/stardict-test_synonyms-2.4.2/test.syn diff --git a/tests/stardict-test_synonyms-2.4.2/test.xml b/tests/stardict-test_synonyms-2.4.2/test.xml new file mode 100644 index 0000000..ddad79a --- /dev/null +++ b/tests/stardict-test_synonyms-2.4.2/test.xml @@ -0,0 +1,23 @@ +<?xml version="1.0" encoding="UTF-8" ?> +<stardict xmlns:xi="http://www.w3.org/2003/XInclude"> + <info> + <version>2.4.2</version> + <bookname>Test synonyms</bookname> + <author></author> + <email></email> + <website></website> + <description></description> + <date></date> + <dicttype></dicttype> + </info> + <article><key>test</key><synonym>foo</synonym><synonym>bar</synonym> + <definition type="m"> + <![CDATA[result of test]]> + </definition> + </article> + <article><key>testawordy</key> + <definition type="m"> + <![CDATA[word that ends in y to test with fuzzy search in -ied]]> + </definition> + </article> +</stardict> diff --git a/tests/t_datadir b/tests/t_datadir new file mode 100755 index 0000000..0ebe965 --- /dev/null +++ b/tests/t_datadir @@ -0,0 +1,17 @@ +#!/bin/sh + +PATH_TO_SDCV="$1" + +unset SDCV_PAGER +have=`"$PATH_TO_SDCV" --data-dir /tmp/bugagaga -l | wc -l` +#do not count header +have=$(($have-1)) +ndicts=`find "${HOME}"/.stardict/dic -name "*.ifo" -print | wc -l` +#ndicts=$(($ndicts+1)) +if [ $have -ne $ndicts ]; then + ndicts=$(($ndicts-1)) + echo "test failed: sdcv says: we have: $have, but really we have: $ndicts" >&2 + exit 1 +fi + +exit 0 diff --git a/tests/t_exact b/tests/t_exact new file mode 100755 index 0000000..f4c11d2 --- /dev/null +++ b/tests/t_exact @@ -0,0 +1,24 @@ +#!/bin/sh + +set -e + +SDCV="$1" +TEST_DIR="$2" + +unset SDCV_PAGER + +test_word() { + WORD=$1 + EXPECTED=$2 + TAG=$3 + RES=$($SDCV -e -n --data-dir "$TEST_DIR" -u "Test synonyms" $WORD | grep "$TAG") + if [ "$EXPECTED" != "$RES" ]; then + echo "synonym for $WORD should be '$EXPECTED' but was '$RES'" + exit 1 + fi +} + +test_word testawordies "Nothing similar to testawordies, sorry :(" "Nothing similar" +test_word testawordy "word that ends in y to test with fuzzy search in -ied" "fuzzy" + +exit 0 diff --git a/tests/t_interactive b/tests/t_interactive new file mode 100755 index 0000000..124bdad --- /dev/null +++ b/tests/t_interactive @@ -0,0 +1,20 @@ +#!/bin/sh +# check that in not-interactive mode sdcv not wait any input + +PATH_TO_SDCV="$1" + +if test ! -x "$PATH_TO_SDCV"; then + echo "Can not find sdcv binary $1" >&2 + exit 1 +fi + +"$PATH_TO_SDCV" -n >/dev/null 2>&1 & +PID=$! +sleep 1 + +if kill -0 $PID >/dev/null 2>&1 ; then + echo "process wait input: $PID, test failed" >&2 + exit 1 +fi + +exit 0 diff --git a/tests/t_json b/tests/t_json new file mode 100755 index 0000000..65b580b --- /dev/null +++ b/tests/t_json @@ -0,0 +1,25 @@ +#!/bin/sh + +set -e + +SDCV="$1" +TEST_DIR="$2" + +unset SDCV_PAGER +unset STARDICT_DATA_DIR + +test_json() { + PARAMS="$1" + EXPECTED=$(echo "$2" | jq 'sort') + RESULT=$($SDCV $PARAMS | jq 'sort') + if [ "$EXPECTED" != "$RESULT"]; then + echo "expected $EXPECTED but got $RESULT" + exit 1 + fi +} + +test_json "-x -j -l -n --data-dir \"$TEST_DIR\"" "[{\"name\": \"Test synonyms\", \"wordcount\": \"1\"},{\"name\": \"Sample 1 test dictionary\", \"wordcount\": \"1\"},{\"name\": \"test_dict\", \"wordcount\": \"1\"}]" +test_json "-x -j -n --data-dir \"$TEST_DIR\" foo" "[{\"dict\": \"Test synonyms\",\"word\":\"test\",\"definition\":\"\nresult of test\"}]" +test_json "-x -j -n --data-dir \"$TEST_DIR\" foobarbaaz" "[]" + +exit 0 diff --git a/tests/t_list b/tests/t_list new file mode 100755 index 0000000..3518731 --- /dev/null +++ b/tests/t_list @@ -0,0 +1,15 @@ +#!/bin/sh + +PATH_TO_SDCV="$1" +ndicts=`"$PATH_TO_SDCV" -l | wc -l` +ndicts=$(($ndicts-1)) +ncom=`find /usr/share/stardict/dic -name "*.ifo" | wc -l` +nspe=`find "${HOME}"/.stardict/dic -name "*.ifo" | wc -l` +nmy=$(($ncom+$nspe)) + +if [ $nmy -ne $ndicts ]; then + echo "should be: $nmy, we have: $ndicts" >&2 + exit 1 +fi + +exit 0 diff --git a/tests/t_only_data_dir b/tests/t_only_data_dir new file mode 100755 index 0000000..392f056 --- /dev/null +++ b/tests/t_only_data_dir @@ -0,0 +1,19 @@ +#!/bin/sh + +set -e + +SDCV="$1" +TEST_DIR="$2" + +unset SDCV_PAGER +unset STARDICT_DATA_DIR + +DICTS=$($SDCV -x -n -l --data-dir "$TEST_DIR" | tail -n +2 | wc -l) +# the expected result: +ACTUAL_DICTS=$(find "$TEST_DIR" -name "*.ifo" | wc -l) +if [ $DICTS -ne $ACTUAL_DICTS ]; then + echo "number of dictionaries in $TEST_DIR should be $ACTUAL_DICTS but was $DICTS according to sdcv" + exit 1 +fi + +exit 0 diff --git a/tests/t_synonyms b/tests/t_synonyms new file mode 100755 index 0000000..3ad3951 --- /dev/null +++ b/tests/t_synonyms @@ -0,0 +1,22 @@ +#!/bin/sh + +set -e + +SDCV="$1" +TEST_DIR="$2" + +unset SDCV_PAGER +test_word() { + WORD=$1 + RES=$($SDCV -n --data-dir "$TEST_DIR" -u "Test synonyms" $WORD | grep result) + if [ "result of test" != "$RES" ]; then + echo "synonym for $WORD should be 'result of test' but was '$RES'" + exit 1 + fi +} + +test_word foo +test_word bar +test_word test + +exit 0 diff --git a/tests/t_use b/tests/t_use new file mode 100755 index 0000000..d141726 --- /dev/null +++ b/tests/t_use @@ -0,0 +1,20 @@ +#!/bin/sh + +set -e + +PATH_TO_SDCV="$1" +TESTS_DIR="$2" + +mkdir -p "${HOME}"/.stardict/dic +cp -R "${TESTS_DIR}/stardict-test_dict-2.4.2" "${HOME}"/.stardict/dic +unset SDCV_PAGER +RES=`"$PATH_TO_SDCV" -n -u test_dict test | grep "test passed"` + +if [ -z "$RES" ]; then + echo "we didn't find in stardict-test_dict-2.4.2 "test" keyword, something wrong" >&2 + exit 1 +fi + +rm -fr "${HOME}"/.stardict/dic/stardict-test_dict-2.4.2 + +exit 0 diff --git a/tests/t_utf8input b/tests/t_utf8input new file mode 100755 index 0000000..1ad8360 --- /dev/null +++ b/tests/t_utf8input @@ -0,0 +1,28 @@ +#!/bin/sh + +set -e + +PATH_TO_SDCV="$1" +TESTS_DIR="$2" + +mkdir -p "${HOME}"/.stardict/dic +cp -R "${TESTS_DIR}/rus-eng-stardict-2.4.2" "${HOME}"/.stardict/dic/ + +unset SDCV_PAGER +export LANG=ru_RU.KOI8-R +IFS=" +" +j=0 +for i in `"$PATH_TO_SDCV" --utf8-input -n человек 2>&1`; do + j=$(($j+1)) + if [ $j -ne 1 ]; then + break; + fi +done + +if [ $j -eq 1 ]; then + echo "$0: empty results of search: test failed" >&2 + exit 1 +fi + +exit 0 diff --git a/tests/t_utf8output b/tests/t_utf8output new file mode 100755 index 0000000..4d04778 --- /dev/null +++ b/tests/t_utf8output @@ -0,0 +1,20 @@ +#!/bin/sh + +set -e + +export LANG=ru_RU.KOI8-R +unset SDCV_PAGER + +PATH_TO_SDCV="$1" + +if test ! -x "$PATH_TO_SDCV"; then + echo "Can not find sdcv binary $1" >&2 + exit 1 +fi + +if ! "$PATH_TO_SDCV" -n --utf8-output man | tail -n -1 | iconv -f utf-8 -t utf-8 >/dev/null; then + echo "utf8 output didn't work" >&2 + exit 1 +fi + +exit 0 |