summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.clang-format56
-rw-r--r--.gitignore1
-rw-r--r--.travis.yml49
-rw-r--r--AUTHORS4
-rw-r--r--CMakeLists.txt156
-rw-r--r--LICENSE341
-rw-r--r--NEWS79
-rw-r--r--README.org40
-rw-r--r--cmake/FindGLIB2.cmake217
-rw-r--r--cmake/FindGettextTools.cmake152
-rw-r--r--cmake/FindGettextTools/config.cmake.in14
-rw-r--r--cmake/FindGettextTools/script.cmake103
-rw-r--r--cmake/FindIconv.cmake64
-rw-r--r--cmake/FindLibintl.cmake56
-rw-r--r--cmake/FindPackageHandleStandardArgs.cmake58
-rw-r--r--cmake/compiler.cmake35
-rw-r--r--config.h.cmake5
-rw-r--r--doc/DICTFILE_FORMAT352
-rw-r--r--doc/sdcv.1104
-rw-r--r--doc/uk/sdcv.184
-rw-r--r--po/cs.po165
-rw-r--r--po/en@boldquot.header25
-rw-r--r--po/en@quot.header22
-rw-r--r--po/fr.po175
-rw-r--r--po/ru.po145
-rw-r--r--po/sdcv.pot139
-rw-r--r--po/sk.po172
-rw-r--r--po/uk.po176
-rw-r--r--po/zh_CN.po166
-rw-r--r--po/zh_TW.po165
-rw-r--r--src/dictziplib.cpp479
-rw-r--r--src/dictziplib.hpp56
-rw-r--r--src/distance.cpp145
-rw-r--r--src/distance.hpp26
-rw-r--r--src/libwrapper.cpp452
-rw-r--r--src/libwrapper.hpp54
-rw-r--r--src/mapfile.hpp86
-rw-r--r--src/readline.cpp115
-rw-r--r--src/readline.hpp15
-rw-r--r--src/sdcv.cpp260
-rw-r--r--src/stardict_lib.cpp1642
-rw-r--r--src/stardict_lib.hpp215
-rw-r--r--src/utils.cpp131
-rw-r--r--src/utils.hpp78
-rw-r--r--tests/rus-eng-stardict-2.4.2/1.xdxf.dict2
-rw-r--r--tests/rus-eng-stardict-2.4.2/1.xdxf.idxbin0 -> 23 bytes
-rw-r--r--tests/rus-eng-stardict-2.4.2/1.xdxf.idx.oftbin0 -> 38 bytes
-rw-r--r--tests/rus-eng-stardict-2.4.2/1.xdxf.ifo8
-rw-r--r--tests/stardict-test_dict-2.4.2/test_dict.dict2
-rw-r--r--tests/stardict-test_dict-2.4.2/test_dict.idxbin0 -> 13 bytes
-rw-r--r--tests/stardict-test_dict-2.4.2/test_dict.ifo7
-rw-r--r--tests/stardict-test_synonyms-2.4.2/test.dict.dzbin0 -> 108 bytes
-rw-r--r--tests/stardict-test_synonyms-2.4.2/test.idxbin0 -> 32 bytes
-rw-r--r--tests/stardict-test_synonyms-2.4.2/test.ifo7
-rw-r--r--tests/stardict-test_synonyms-2.4.2/test.synbin0 -> 16 bytes
-rw-r--r--tests/stardict-test_synonyms-2.4.2/test.xml23
-rwxr-xr-xtests/t_datadir17
-rwxr-xr-xtests/t_exact24
-rwxr-xr-xtests/t_interactive20
-rwxr-xr-xtests/t_json25
-rwxr-xr-xtests/t_list15
-rwxr-xr-xtests/t_only_data_dir19
-rwxr-xr-xtests/t_synonyms22
-rwxr-xr-xtests/t_use20
-rwxr-xr-xtests/t_utf8input28
-rwxr-xr-xtests/t_utf8output20
66 files changed, 7333 insertions, 0 deletions
diff --git a/.clang-format b/.clang-format
new file mode 100644
index 0000000..d24b5aa
--- /dev/null
+++ b/.clang-format
@@ -0,0 +1,56 @@
+---
+Language: Cpp
+AccessModifierOffset: -4
+ConstructorInitializerIndentWidth: 4
+AlignEscapedNewlinesLeft: false
+AlignTrailingComments: false
+AllowAllParametersOfDeclarationOnNextLine: true
+AllowShortBlocksOnASingleLine: false
+AllowShortIfStatementsOnASingleLine: false
+AllowShortLoopsOnASingleLine: false
+AllowShortFunctionsOnASingleLine: All
+AlwaysBreakTemplateDeclarations: false
+AlwaysBreakBeforeMultilineStrings: false
+BreakBeforeBinaryOperators: true
+BreakBeforeTernaryOperators: true
+BreakConstructorInitializersBeforeComma: true
+BinPackParameters: true
+ColumnLimit: 0
+ConstructorInitializerAllOnOneLineOrOnePerLine: false
+DerivePointerAlignment: false
+ExperimentalAutoDetectBinPacking: false
+IndentCaseLabels: false
+IndentWrappedFunctionNames: false
+IndentFunctionDeclarationAfterType: false
+MaxEmptyLinesToKeep: 1
+KeepEmptyLinesAtTheStartOfBlocks: true
+NamespaceIndentation: Inner
+ObjCSpaceAfterProperty: true
+ObjCSpaceBeforeProtocolList: true
+PenaltyBreakBeforeFirstCallParameter: 19
+PenaltyBreakComment: 300
+PenaltyBreakString: 1000
+PenaltyBreakFirstLessLess: 120
+PenaltyExcessCharacter: 1000000
+PenaltyReturnTypeOnItsOwnLine: 60
+PointerAlignment: Right
+SpacesBeforeTrailingComments: 1
+Cpp11BracedListStyle: false
+Standard: Cpp11
+IndentWidth: 4
+TabWidth: 8
+UseTab: Never
+BreakBeforeBraces: Linux
+SpacesInParentheses: false
+SpacesInAngles: false
+SpaceInEmptyParentheses: false
+SpacesInCStyleCastParentheses: false
+SpacesInContainerLiterals: true
+SpaceBeforeAssignmentOperators: true
+ContinuationIndentWidth: 4
+CommentPragmas: '^ IWYU pragma:'
+ForEachMacros: [ foreach, Q_FOREACH, BOOST_FOREACH ]
+SpaceBeforeParens: ControlStatements
+DisableFormat: false
+...
+
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..e4e5f6c
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1 @@
+*~ \ No newline at end of file
diff --git a/.travis.yml b/.travis.yml
new file mode 100644
index 0000000..237c68f
--- /dev/null
+++ b/.travis.yml
@@ -0,0 +1,49 @@
+#
+# Available repositories are listed here:
+# https://github.com/travis-ci/apt-source-whitelist/blob/master/ubuntu.json
+#
+
+sudo: false
+
+language: cpp
+
+matrix:
+ include:
+ - env: COMPILER_VERSION=4.8
+ os: linux
+ compiler: g++
+ addons:
+ apt:
+ sources:
+ - ubuntu-toolchain-r-test
+ - kalakris-cmake
+ - ubuntu-sdk-team
+ packages:
+ - g++-4.8
+ - cmake
+ - libglib2.0-dev
+ - jq
+ # - env: COMPILER_VERSION=3.5
+ # os: linux
+ # compiler: clang++
+ # addons:
+ # apt:
+ # sources:
+ # - ubuntu-toolchain-r-test
+ # - llvm-toolchain-precise-3.5
+ # packages:
+ # - clang-3.5
+ # - cmake
+ # - libglib2.0-dev
+
+
+before_script:
+ - mkdir build
+ - cd build
+ - CC=$CC-${COMPILER_VERSION} CXX=$CXX-${COMPILER_VERSION} cmake -DBUILD_TESTS=True ..
+ - cd ..
+
+script:
+ - cd build
+ - make -k -j2 VERBOSE=1
+ - ctest --output-on-failure
diff --git a/AUTHORS b/AUTHORS
new file mode 100644
index 0000000..abb01e9
--- /dev/null
+++ b/AUTHORS
@@ -0,0 +1,4 @@
+author of stardict:
+ Hu Zheng <huzheng_001@163.com> http://forlinux.yeah.net
+author of sdcv:
+ Evgeniy Dushistov <dushistov@mail.ru>
diff --git a/CMakeLists.txt b/CMakeLists.txt
new file mode 100644
index 0000000..a34e357
--- /dev/null
+++ b/CMakeLists.txt
@@ -0,0 +1,156 @@
+project(sdcv)
+
+# Older versions have a different signature for CMAKE_MINIMUM_REQUIRED,
+# check it manually just to make sure
+if (${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION} LESS 2.8)
+ message(FATAL_ERROR "${PROJECT_NAME} requires at least CMake v2.8."
+ " You are running v${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION}."
+ " Please upgrade." )
+endif(${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION} LESS 2.8)
+
+# If we get this far, use the modern signature. This will also cause newer
+# CMake versions to try to be backwards-compatible with the desired version
+cmake_minimum_required(VERSION 2.8 FATAL_ERROR)
+cmake_policy(VERSION 2.8)
+
+include("${CMAKE_CURRENT_SOURCE_DIR}/cmake/compiler.cmake")
+
+set(ZLIB_FIND_REQUIRED True)
+include(FindZLIB)
+
+set(GLIB2_REQ "'glib-2.0 >= 2.6.1'")
+set(CMAKE_MODULE_PATH "${CMAKE_SOURCE_DIR}/cmake")
+include("${CMAKE_CURRENT_SOURCE_DIR}/cmake/FindGLIB2.cmake")
+
+if (NOT GLIB2_FOUND)
+ message(FATAL_ERROR "sdcv require ${GLIB2_REQ}, "
+ "make sure that you install it")
+endif()
+
+set(WITH_READLINE True CACHE BOOL "Use readline library")
+
+if (WITH_READLINE)
+ find_path(READLINE_INCLUDE_DIR readline/readline.h)
+ find_library(READLINE_LIBRARY NAMES readline)
+ if (NOT (READLINE_INCLUDE_DIR AND READLINE_LIBRARY))
+ set(WITH_READLINE False CACHE FORCE)
+ endif ()
+endif (WITH_READLINE)
+
+option(ENABLE_NLS "Enable NLS support" True)
+
+set(sdcv_SRCS
+ src/sdcv.cpp
+ src/readline.cpp
+ src/readline.hpp
+ src/libwrapper.cpp
+ src/libwrapper.hpp
+ src/utils.cpp
+ src/utils.hpp
+
+ src/stardict_lib.cpp
+ src/stardict_lib.hpp
+ src/dictziplib.cpp
+ src/dictziplib.hpp
+ src/distance.cpp
+ src/distance.hpp
+ src/mapfile.hpp
+)
+
+if (ENABLE_NLS)
+ find_package(GettextTools REQUIRED)
+ set(gettext_stockDir "${CMAKE_CURRENT_SOURCE_DIR}/po")
+ set(gettext_langDir "${CMAKE_CURRENT_BINARY_DIR}/lang")
+ set(gettext_outDir "${CMAKE_CURRENT_BINARY_DIR}/locale")
+ set(GETTEXT_TRANSLATIONS_PATH "${CMAKE_INSTALL_PREFIX}/share/locale")
+ gettext_make_target("lang"
+ HIERARCHY "{1}/{2}/{3}/{4}.mo"
+ KEYWORDS "_"
+ DOMAIN "sdcv"
+ STOCK_DIR ${gettext_stockDir}
+ LANG_DIR ${gettext_langDir}
+ OUT_DIR ${gettext_outDir}
+ SOURCE ${sdcv_SRCS})
+
+ list(APPEND makeCleanFiles ${gettext_outDir})
+endif ()
+
+include(CheckFunctionExists)
+check_function_exists(mmap HAVE_MMAP)
+
+include(CheckIncludeFile)
+check_include_file(locale.h HAVE_LOCALE_H)
+
+configure_file(${CMAKE_CURRENT_SOURCE_DIR}/config.h.cmake
+ ${CMAKE_CURRENT_BINARY_DIR}/config.h)
+
+
+include_directories(
+ ${ZLIB_INCLUDE_DIR}
+ ${GLIB2_INCLUDE_DIRS}
+ ${READLINE_INCLUDE_DIR}
+ ${CMAKE_CURRENT_SOURCE_DIR}/src/lib
+ ${CMAKE_CURRENT_BINARY_DIR}
+)
+
+#
+# Packing stuff
+#
+set(CPACK_PACKAGE_DESCRIPTION_SUMMARY "console version of StarDict program")
+set(CPACK_PACKAGE_VENDOR "Evgeniy Dushistov <dushistov@mail.ru>")
+set(CPACK_PACKAGE_DESCRIPTION_FILE "${CMAKE_CURRENT_SOURCE_DIR}/README.org")
+set(CPACK_PACKAGE_VERSION_MAJOR "0")
+set(CPACK_PACKAGE_VERSION_MINOR "5")
+set(CPACK_PACKAGE_VERSION_PATCH "2")
+
+set(sdcv_VERSION
+ "${CPACK_PACKAGE_VERSION_MAJOR}.${CPACK_PACKAGE_VERSION_MINOR}.${CPACK_PACKAGE_VERSION_PATCH}")
+
+add_definitions(-DVERSION="${sdcv_VERSION}" -DHAVE_CONFIG_H)
+
+add_executable(sdcv ${sdcv_SRCS})
+
+target_link_libraries(sdcv
+ ${GLIB2_LIBRARIES}
+ ${ZLIB_LIBRARIES}
+ ${READLINE_LIBRARY}
+)
+if (ENABLE_NLS)
+ set_directory_properties(PROPERTIES ADDITIONAL_MAKE_CLEAN_FILES "locale")
+endif ()
+
+include(CPack)
+
+install(TARGETS sdcv DESTINATION "bin")
+install(FILES doc/sdcv.1 DESTINATION "share/man/man1")
+install(FILES doc/uk/sdcv.1 DESTINATION "share/man/uk/man1")
+
+if (ENABLE_NLS)
+ install(DIRECTORY "${gettext_outDir}" DESTINATION "share")
+endif ()
+
+option(BUILD_TESTS "Enable automatic testing" False)
+
+if (BUILD_TESTS)
+ find_program(SHELL_CMD NAMES sh bash
+ DOC "Shell scripts interpretator command")
+ message(STATUS "Build tests")
+ enable_testing()# Force "make test" to works
+
+ macro(add_sdcv_shell_test test_name)
+ add_test(NAME ${test_name}
+ COMMAND "${SHELL_CMD}" "${CMAKE_CURRENT_SOURCE_DIR}/tests/${test_name}" $<TARGET_FILE:sdcv> "${CMAKE_CURRENT_SOURCE_DIR}/tests")
+ endmacro()
+
+ add_sdcv_shell_test(t_list)
+ add_sdcv_shell_test(t_use)
+ add_sdcv_shell_test(t_only_data_dir)
+ add_sdcv_shell_test(t_synonyms)
+ add_sdcv_shell_test(t_json)
+ add_sdcv_shell_test(t_exact)
+ add_sdcv_shell_test(t_interactive)
+ add_sdcv_shell_test(t_utf8output)
+ add_sdcv_shell_test(t_utf8input)
+ add_sdcv_shell_test(t_datadir)
+
+endif (BUILD_TESTS)
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..1e1081d
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,341 @@
+ GNU GENERAL PUBLIC LICENSE
+ Version 2, June 1991
+
+Copyright (C) 1989, 1991 Free Software Foundation, Inc.
+51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+
+Everyone is permitted to copy and distribute verbatim copies
+of this license document, but changing it is not allowed.
+
+ Preamble
+
+ The licenses for most software are designed to take away your
+freedom to share and change it. By contrast, the GNU General Public
+License is intended to guarantee your freedom to share and change free
+software--to make sure the software is free for all its users. This
+General Public License applies to most of the Free Software
+Foundation's software and to any other program whose authors commit to
+using it. (Some other Free Software Foundation software is covered by
+the GNU Library General Public License instead.) You can apply it to
+your programs, too.
+
+ When we speak of free software, we are referring to freedom, not
+price. Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+this service if you wish), that you receive source code or can get it
+if you want it, that you can change the software or use pieces of it
+in new free programs; and that you know you can do these things.
+
+ To protect your rights, we need to make restrictions that forbid
+anyone to deny you these rights or to ask you to surrender the rights.
+These restrictions translate to certain responsibilities for you if you
+distribute copies of the software, or if you modify it.
+
+ For example, if you distribute copies of such a program, whether
+gratis or for a fee, you must give the recipients all the rights that
+you have. You must make sure that they, too, receive or can get the
+source code. And you must show them these terms so they know their
+rights.
+
+ We protect your rights with two steps: (1) copyright the software, and
+(2) offer you this license which gives you legal permission to copy,
+distribute and/or modify the software.
+
+ Also, for each author's protection and ours, we want to make certain
+that everyone understands that there is no warranty for this free
+software. If the software is modified by someone else and passed on, we
+want its recipients to know that what they have is not the original, so
+that any problems introduced by others will not reflect on the original
+authors' reputations.
+
+ Finally, any free program is threatened constantly by software
+patents. We wish to avoid the danger that redistributors of a free
+program will individually obtain patent licenses, in effect making the
+program proprietary. To prevent this, we have made it clear that any
+patent must be licensed for everyone's free use or not licensed at all.
+
+ The precise terms and conditions for copying, distribution and
+modification follow.
+
+ GNU GENERAL PUBLIC LICENSE
+ TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+ 0. This License applies to any program or other work which contains
+a notice placed by the copyright holder saying it may be distributed
+under the terms of this General Public License. The "Program", below,
+refers to any such program or work, and a "work based on the Program"
+means either the Program or any derivative work under copyright law:
+that is to say, a work containing the Program or a portion of it,
+either verbatim or with modifications and/or translated into another
+language. (Hereinafter, translation is included without limitation in
+the term "modification".) Each licensee is addressed as "you".
+
+Activities other than copying, distribution and modification are not
+covered by this License; they are outside its scope. The act of
+running the Program is not restricted, and the output from the Program
+is covered only if its contents constitute a work based on the
+Program (independent of having been made by running the Program).
+Whether that is true depends on what the Program does.
+
+ 1. You may copy and distribute verbatim copies of the Program's
+source code as you receive it, in any medium, provided that you
+conspicuously and appropriately publish on each copy an appropriate
+copyright notice and disclaimer of warranty; keep intact all the
+notices that refer to this License and to the absence of any warranty;
+and give any other recipients of the Program a copy of this License
+along with the Program.
+
+You may charge a fee for the physical act of transferring a copy, and
+you may at your option offer warranty protection in exchange for a fee.
+
+ 2. You may modify your copy or copies of the Program or any portion
+of it, thus forming a work based on the Program, and copy and
+distribute such modifications or work under the terms of Section 1
+above, provided that you also meet all of these conditions:
+
+ a) You must cause the modified files to carry prominent notices
+ stating that you changed the files and the date of any change.
+
+ b) You must cause any work that you distribute or publish, that in
+ whole or in part contains or is derived from the Program or any
+ part thereof, to be licensed as a whole at no charge to all third
+ parties under the terms of this License.
+
+ c) If the modified program normally reads commands interactively
+ when run, you must cause it, when started running for such
+ interactive use in the most ordinary way, to print or display an
+ announcement including an appropriate copyright notice and a
+ notice that there is no warranty (or else, saying that you provide
+ a warranty) and that users may redistribute the program under
+ these conditions, and telling the user how to view a copy of this
+ License. (Exception: if the Program itself is interactive but
+ does not normally print such an announcement, your work based on
+ the Program is not required to print an announcement.)
+
+These requirements apply to the modified work as a whole. If
+identifiable sections of that work are not derived from the Program,
+and can be reasonably considered independent and separate works in
+themselves, then this License, and its terms, do not apply to those
+sections when you distribute them as separate works. But when you
+distribute the same sections as part of a whole which is a work based
+on the Program, the distribution of the whole must be on the terms of
+this License, whose permissions for other licensees extend to the
+entire whole, and thus to each and every part regardless of who wrote it.
+
+Thus, it is not the intent of this section to claim rights or contest
+your rights to work written entirely by you; rather, the intent is to
+exercise the right to control the distribution of derivative or
+collective works based on the Program.
+
+In addition, mere aggregation of another work not based on the Program
+with the Program (or with a work based on the Program) on a volume of
+a storage or distribution medium does not bring the other work under
+the scope of this License.
+
+ 3. You may copy and distribute the Program (or a work based on it,
+under Section 2) in object code or executable form under the terms of
+Sections 1 and 2 above provided that you also do one of the following:
+
+ a) Accompany it with the complete corresponding machine-readable
+ source code, which must be distributed under the terms of Sections
+ 1 and 2 above on a medium customarily used for software interchange; or,
+
+ b) Accompany it with a written offer, valid for at least three
+ years, to give any third party, for a charge no more than your
+ cost of physically performing source distribution, a complete
+ machine-readable copy of the corresponding source code, to be
+ distributed under the terms of Sections 1 and 2 above on a medium
+ customarily used for software interchange; or,
+
+ c) Accompany it with the information you received as to the offer
+ to distribute corresponding source code. (This alternative is
+ allowed only for noncommercial distribution and only if you
+ received the program in object code or executable form with such
+ an offer, in accord with Subsection b above.)
+
+The source code for a work means the preferred form of the work for
+making modifications to it. For an executable work, complete source
+code means all the source code for all modules it contains, plus any
+associated interface definition files, plus the scripts used to
+control compilation and installation of the executable. However, as a
+special exception, the source code distributed need not include
+anything that is normally distributed (in either source or binary
+form) with the major components (compiler, kernel, and so on) of the
+operating system on which the executable runs, unless that component
+itself accompanies the executable.
+
+If distribution of executable or object code is made by offering
+access to copy from a designated place, then offering equivalent
+access to copy the source code from the same place counts as
+distribution of the source code, even though third parties are not
+compelled to copy the source along with the object code.
+
+ 4. You may not copy, modify, sublicense, or distribute the Program
+except as expressly provided under this License. Any attempt
+otherwise to copy, modify, sublicense or distribute the Program is
+void, and will automatically terminate your rights under this License.
+However, parties who have received copies, or rights, from you under
+this License will not have their licenses terminated so long as such
+parties remain in full compliance.
+
+ 5. You are not required to accept this License, since you have not
+signed it. However, nothing else grants you permission to modify or
+distribute the Program or its derivative works. These actions are
+prohibited by law if you do not accept this License. Therefore, by
+modifying or distributing the Program (or any work based on the
+Program), you indicate your acceptance of this License to do so, and
+all its terms and conditions for copying, distributing or modifying
+the Program or works based on it.
+
+ 6. Each time you redistribute the Program (or any work based on the
+Program), the recipient automatically receives a license from the
+original licensor to copy, distribute or modify the Program subject to
+these terms and conditions. You may not impose any further
+restrictions on the recipients' exercise of the rights granted herein.
+You are not responsible for enforcing compliance by third parties to
+this License.
+
+ 7. If, as a consequence of a court judgment or allegation of patent
+infringement or for any other reason (not limited to patent issues),
+conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License. If you cannot
+distribute so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you
+may not distribute the Program at all. For example, if a patent
+license would not permit royalty-free redistribution of the Program by
+all those who receive copies directly or indirectly through you, then
+the only way you could satisfy both it and this License would be to
+refrain entirely from distribution of the Program.
+
+If any portion of this section is held invalid or unenforceable under
+any particular circumstance, the balance of the section is intended to
+apply and the section as a whole is intended to apply in other
+circumstances.
+
+It is not the purpose of this section to induce you to infringe any
+patents or other property right claims or to contest validity of any
+such claims; this section has the sole purpose of protecting the
+integrity of the free software distribution system, which is
+implemented by public license practices. Many people have made
+generous contributions to the wide range of software distributed
+through that system in reliance on consistent application of that
+system; it is up to the author/donor to decide if he or she is willing
+to distribute software through any other system and a licensee cannot
+impose that choice.
+
+This section is intended to make thoroughly clear what is believed to
+be a consequence of the rest of this License.
+
+ 8. If the distribution and/or use of the Program is restricted in
+certain countries either by patents or by copyrighted interfaces, the
+original copyright holder who places the Program under this License
+may add an explicit geographical distribution limitation excluding
+those countries, so that distribution is permitted only in or among
+countries not thus excluded. In such case, this License incorporates
+the limitation as if written in the body of this License.
+
+ 9. The Free Software Foundation may publish revised and/or new versions
+of the General Public License from time to time. Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+Each version is given a distinguishing version number. If the Program
+specifies a version number of this License which applies to it and "any
+later version", you have the option of following the terms and conditions
+either of that version or of any later version published by the Free
+Software Foundation. If the Program does not specify a version number of
+this License, you may choose any version ever published by the Free Software
+Foundation.
+
+ 10. If you wish to incorporate parts of the Program into other free
+programs whose distribution conditions are different, write to the author
+to ask for permission. For software which is copyrighted by the Free
+Software Foundation, write to the Free Software Foundation; we sometimes
+make exceptions for this. Our decision will be guided by the two goals
+of preserving the free status of all derivatives of our free software and
+of promoting the sharing and reuse of software generally.
+
+ NO WARRANTY
+
+ 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
+FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
+OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
+PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
+OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS
+TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE
+PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
+REPAIR OR CORRECTION.
+
+ 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
+REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
+INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
+OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
+TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
+YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
+PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGES.
+
+ END OF TERMS AND CONDITIONS
+
+ How to Apply These Terms to Your New Programs
+
+ If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these terms.
+
+ To do so, attach the following notices to the program. It is safest
+to attach them to the start of each source file to most effectively
+convey the exclusion of warranty; and each file should have at least
+the "copyright" line and a pointer to where the full notice is found.
+
+ <one line to give the program's name and a brief idea of what it does.>
+ Copyright (C) <year> <name of author>
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+
+
+Also add information on how to contact you by electronic and paper mail.
+
+If the program is interactive, make it output a short notice like this
+when it starts in an interactive mode:
+
+ Gnomovision version 69, Copyright (C) year name of author
+ Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
+ This is free software, and you are welcome to redistribute it
+ under certain conditions; type `show c' for details.
+
+The hypothetical commands `show w' and `show c' should show the appropriate
+parts of the General Public License. Of course, the commands you use may
+be called something other than `show w' and `show c'; they could even be
+mouse-clicks or menu items--whatever suits your program.
+
+You should also get your employer (if you work as a programmer) or your
+school, if any, to sign a "copyright disclaimer" for the program, if
+necessary. Here is a sample; alter the names:
+
+ Yoyodyne, Inc., hereby disclaims all copyright interest in the program
+ `Gnomovision' (which makes passes at compilers) written by James Hacker.
+
+ <signature of Ty Coon>, 1 April 1989
+ Ty Coon, President of Vice
+
+This General Public License does not permit incorporating your program into
+proprietary programs. If your program is a subroutine library, you may
+consider it more useful to permit linking proprietary applications with the
+library. If this is what you want to do, use the GNU Library General
+Public License instead of this License.
diff --git a/NEWS b/NEWS
new file mode 100644
index 0000000..1e89e87
--- /dev/null
+++ b/NEWS
@@ -0,0 +1,79 @@
+Version 0.5.2
+Synonyms index support (.syn files) by Peter <craven@gmx.net>
+Add support of json output by Peter <craven@gmx.net> (--json-output)
+Add -e for exact searches (no fuzzy matches) by Peter <craven@gmx.net>
+Fix build with clang 3.4.1
+fix FSF address in license by Tomáš Čech <sleep_walker@suse.com>
+
+Version 0.5.1
+ Fix usage of SDCV_PAGER by Anton Yuzhaninov
+ Fix build without readline<
+ specify dictionary order by -u switches or ~/.sdcv_ordering by Cong Gu
+
+Version 0.5
+- add option to colorize output
+- Roman Imankulov's patch to better use of readline
+- Fix build with modern compilers
+- Fix work on CPU with align issues, like ARM and SPARC (thanks to Michal Čihař )
+- Migrate to C++11 and CMake (so at now archive with code 180K->54K)
+- Update Russian translation
+
+Version 0.4.3
+- fix build problems with gcc 4.x
+- French translation, thanks to Vincent Petry <PVince81@yahoo.fr>
+- Slovak translation update, thanks to Ivan Masár <helix84@centrum.sk>
+- Add Czech translation, thanks to Michal Čihař <michal@cihar.com>
+
+Version 0.4.2
+* Cache scheme has been rewritten, size of cache has been reduced
+* Merge with stardict, speedup of lookup, and add full-text search
+* Russian translation update
+
+Version 0.4.1
+* Recreate cache if idx file was modified
+* Abbility to use pager(SDCV_PAGER)
+* Add Chinese (traditional) translation
+* Add Ukrainian translation
+
+Version 0.4
+* Fix several bugs
+* libstardict by default linked statically with sdcv
+* Add a Simplified Chinese translation
+* documentation update
+* add ability to use readline library with sdcv
+* add ability hold history in ~/.sdcv_history
+
+Version 0.3.4
+* Fix bug of handling of fuzzy search
+* More clever showing search results
+
+Version 0.3.3
+* Fix bug in using -u option
+* Update Slovak translation
+
+Version 0.3.2
+* Fix bug in Dictionary::load for correct work with gzip'ed idx files
+* Add Slovak translation
+* Change format of output for -l option
+
+Version 0.3.1
+* Fix bug in Lib::Lookup, which cause SF and other problems in search
+* Change installation procedure to install with libstardict headers files,
+* to help compile any application with libstardict
+* Documentation update
+
+Version 0.3
+* update libstardict, make search in dictionaries and loading of dictionaries
+faster
+* add --data-dir option
+* -u, --use-dict now take bookname as argument, not a name of 'ifo' file
+* documentation update and improve
+
+Version 0.2
+
+* Add autoconf, automake, gettext support.
+* Divide package into two parts: libstardict and sdcv by it self.
+* Add options for:
+* search word in special dictionary
+* get list of dictionaries
+* for using in scripts
diff --git a/README.org b/README.org
new file mode 100644
index 0000000..d041fb6
--- /dev/null
+++ b/README.org
@@ -0,0 +1,40 @@
+#+OPTIONS: ^:nil
+[[https://travis-ci.org/Dushistov/sdcv][https://travis-ci.org/Dushistov/sdcv.svg?branch=master]]
+[[https://github.com/Dushistov/sdcv/blob/master/LICENSE][https://img.shields.io/badge/license-GPL%202-brightgreen.svg]]
+* How to compile and install
+#+BEGIN_SRC sh
+mkdir /tmp/build-sdcv
+cd /tmp/build-sdcv
+cmake path/to/source/code/of/sdcv
+make
+#+END_SRC
+if you enable nls then you should also type
+#+BEGIN_SRC sh
+make lang
+#+END_SRC
+** to install type
+#+BEGIN_SRC sh
+make install
+#+END_SRC
+you can use "DESTDIR" variable to change installation path
+
+* Documentation
+See sdcv man page for usage description.
+
+* Bugs
+If you find bug reports it via email to dushistov at mail dot ru.
+Be sure to include the word "sdcv" somewhere in the "Subject:" field.
+
+* Notes to developer
+** make source code release
+#+BEGIN_SRC sh
+make package_source
+#+END_SRC
+** update translation
+#+BEGIN_SRC sh
+cd po
+xgettext -k_ ../src/*.cpp -o new.pot
+msgmerge -U sdcv.pot new.pot
+rm new.pot
+for i in `ls *.po`; do msgmerge -U $i sdcv.pot; done
+#+END_SRC
diff --git a/cmake/FindGLIB2.cmake b/cmake/FindGLIB2.cmake
new file mode 100644
index 0000000..db8d759
--- /dev/null
+++ b/cmake/FindGLIB2.cmake
@@ -0,0 +1,217 @@
+# - Try to find GLib2
+# Once done this will define
+#
+# GLIB2_FOUND - system has GLib2
+# GLIB2_INCLUDE_DIRS - the GLib2 include directory
+# GLIB2_LIBRARIES - Link these to use GLib2
+#
+# HAVE_GLIB_GREGEX_H glib has gregex.h header and
+# supports g_regex_match_simple
+#
+# Copyright (c) 2006 Andreas Schneider <mail@cynapses.org>
+# Copyright (c) 2006 Philippe Bernery <philippe.bernery@gmail.com>
+# Copyright (c) 2007 Daniel Gollub <dgollub@suse.de>
+# Copyright (c) 2007 Alban Browaeys <prahal@yahoo.com>
+# Copyright (c) 2008 Michael Bell <michael.bell@web.de>
+# Copyright (c) 2008 Bjoern Ricks <bjoern.ricks@googlemail.com>
+#
+# Redistribution and use is allowed according to the terms of the New
+# BSD license.
+# For details see the accompanying COPYING-CMAKE-SCRIPTS file.
+#
+
+
+IF (GLIB2_LIBRARIES AND GLIB2_INCLUDE_DIRS )
+ # in cache already
+ SET(GLIB2_FOUND TRUE)
+ELSE (GLIB2_LIBRARIES AND GLIB2_INCLUDE_DIRS )
+
+ INCLUDE(FindPkgConfig)
+
+ ## Glib
+ IF ( GLIB2_FIND_REQUIRED )
+ SET( _pkgconfig_REQUIRED "REQUIRED" )
+ ELSE ( GLIB2_FIND_REQUIRED )
+ SET( _pkgconfig_REQUIRED "" )
+ ENDIF ( GLIB2_FIND_REQUIRED )
+
+ IF ( GLIB2_MIN_VERSION )
+ PKG_SEARCH_MODULE( GLIB2 ${_pkgconfig_REQUIRED} glib-2.0>=${GLIB2_MIN_VERSION} )
+ ELSE ( GLIB2_MIN_VERSION )
+ PKG_SEARCH_MODULE( GLIB2 ${_pkgconfig_REQUIRED} glib-2.0 )
+ ENDIF ( GLIB2_MIN_VERSION )
+ IF ( PKG_CONFIG_FOUND )
+ IF ( GLIB2_FOUND )
+ SET ( GLIB2_CORE_FOUND TRUE )
+ ELSE ( GLIB2_FOUND )
+ SET ( GLIB2_CORE_FOUND FALSE )
+ ENDIF ( GLIB2_FOUND )
+ ENDIF ( PKG_CONFIG_FOUND )
+
+ # Look for glib2 include dir and libraries w/o pkgconfig
+ IF ( NOT GLIB2_FOUND AND NOT PKG_CONFIG_FOUND )
+ FIND_PATH(
+ _glibconfig_include_DIR
+ NAMES
+ glibconfig.h
+ PATHS
+ /opt/gnome/lib64
+ /opt/gnome/lib
+ /opt/lib/
+ /opt/local/lib
+ /sw/lib/
+ /usr/lib64
+ /usr/lib
+ /usr/local/include
+ ${CMAKE_LIBRARY_PATH}
+ PATH_SUFFIXES
+ glib-2.0/include
+ )
+
+ FIND_PATH(
+ _glib2_include_DIR
+ NAMES
+ glib.h
+ PATHS
+ /opt/gnome/include
+ /opt/local/include
+ /sw/include
+ /usr/include
+ /usr/local/include
+ PATH_SUFFIXES
+ glib-2.0
+ )
+
+ #MESSAGE(STATUS "Glib headers: ${_glib2_include_DIR}")
+
+ FIND_LIBRARY(
+ _glib2_link_DIR
+ NAMES
+ glib-2.0
+ glib
+ PATHS
+ /opt/gnome/lib
+ /opt/local/lib
+ /sw/lib
+ /usr/lib
+ /usr/local/lib
+ )
+ IF ( _glib2_include_DIR AND _glib2_link_DIR )
+ SET ( _glib2_FOUND TRUE )
+ ENDIF ( _glib2_include_DIR AND _glib2_link_DIR )
+
+
+ IF ( _glib2_FOUND )
+ SET ( GLIB2_INCLUDE_DIRS ${_glib2_include_DIR} ${_glibconfig_include_DIR} )
+ SET ( GLIB2_LIBRARIES ${_glib2_link_DIR} )
+ SET ( GLIB2_CORE_FOUND TRUE )
+ ELSE ( _glib2_FOUND )
+ SET ( GLIB2_CORE_FOUND FALSE )
+ ENDIF ( _glib2_FOUND )
+
+ # Handle dependencies
+ # libintl
+ IF ( NOT LIBINTL_FOUND )
+ FIND_PATH(LIBINTL_INCLUDE_DIR
+ NAMES
+ libintl.h
+ PATHS
+ /opt/gnome/include
+ /opt/local/include
+ /sw/include
+ /usr/include
+ /usr/local/include
+ )
+
+ FIND_LIBRARY(LIBINTL_LIBRARY
+ NAMES
+ intl
+ PATHS
+ /opt/gnome/lib
+ /opt/local/lib
+ /sw/lib
+ /usr/local/lib
+ /usr/lib
+ )
+
+ IF (LIBINTL_LIBRARY AND LIBINTL_INCLUDE_DIR)
+ SET (LIBINTL_FOUND TRUE)
+ ENDIF (LIBINTL_LIBRARY AND LIBINTL_INCLUDE_DIR)
+ ENDIF ( NOT LIBINTL_FOUND )
+
+ # libiconv
+ IF ( NOT LIBICONV_FOUND )
+ FIND_PATH(LIBICONV_INCLUDE_DIR
+ NAMES
+ iconv.h
+ PATHS
+ /opt/gnome/include
+ /opt/local/include
+ /opt/local/include
+ /sw/include
+ /sw/include
+ /usr/local/include
+ /usr/include
+ PATH_SUFFIXES
+ glib-2.0
+ )
+
+ FIND_LIBRARY(LIBICONV_LIBRARY
+ NAMES
+ iconv
+ PATHS
+ /opt/gnome/lib
+ /opt/local/lib
+ /sw/lib
+ /usr/lib
+ /usr/local/lib
+ )
+
+ IF (LIBICONV_LIBRARY AND LIBICONV_INCLUDE_DIR)
+ SET (LIBICONV_FOUND TRUE)
+ ENDIF (LIBICONV_LIBRARY AND LIBICONV_INCLUDE_DIR)
+ ENDIF ( NOT LIBICONV_FOUND )
+
+ IF (LIBINTL_FOUND)
+ SET (GLIB2_LIBRARIES ${GLIB2_LIBRARIES} ${LIBINTL_LIBRARY})
+ SET (GLIB2_INCLUDE_DIRS ${GLIB2_INCLUDE_DIRS} ${LIBINTL_INCLUDE_DIR})
+ ENDIF (LIBINTL_FOUND)
+
+ IF (LIBICONV_FOUND)
+ SET (GLIB2_LIBRARIES ${GLIB2_LIBRARIES} ${LIBICONV_LIBRARY})
+ SET (GLIB2_INCLUDE_DIRS ${GLIB2_INCLUDE_DIRS} ${LIBICONV_INCLUDE_DIR})
+ ENDIF (LIBICONV_FOUND)
+
+ ENDIF ( NOT GLIB2_FOUND AND NOT PKG_CONFIG_FOUND )
+ ##
+
+ IF (GLIB2_CORE_FOUND AND GLIB2_INCLUDE_DIRS AND GLIB2_LIBRARIES)
+ SET (GLIB2_FOUND TRUE)
+ ENDIF (GLIB2_CORE_FOUND AND GLIB2_INCLUDE_DIRS AND GLIB2_LIBRARIES)
+
+ IF (GLIB2_FOUND)
+ IF (NOT GLIB2_FIND_QUIETLY)
+ MESSAGE (STATUS "Found GLib2: ${GLIB2_LIBRARIES} ${GLIB2_INCLUDE_DIRS}")
+ ENDIF (NOT GLIB2_FIND_QUIETLY)
+ ELSE (GLIB2_FOUND)
+ IF (GLIB2_FIND_REQUIRED)
+ MESSAGE (SEND_ERROR "Could not find GLib2")
+ ENDIF (GLIB2_FIND_REQUIRED)
+ ENDIF (GLIB2_FOUND)
+
+ # show the GLIB2_INCLUDE_DIRS and GLIB2_LIBRARIES variables only in the advanced view
+ MARK_AS_ADVANCED(GLIB2_INCLUDE_DIRS GLIB2_LIBRARIES)
+ MARK_AS_ADVANCED(LIBICONV_INCLUDE_DIR LIBICONV_LIBRARY)
+ MARK_AS_ADVANCED(LIBINTL_INCLUDE_DIR LIBINTL_LIBRARY)
+
+ENDIF (GLIB2_LIBRARIES AND GLIB2_INCLUDE_DIRS)
+
+IF ( GLIB2_FOUND )
+ # Check if system has a newer version of glib
+ # which supports g_regex_match_simple
+ INCLUDE( CheckIncludeFiles )
+ SET( CMAKE_REQUIRED_INCLUDES ${GLIB2_INCLUDE_DIRS} )
+ CHECK_INCLUDE_FILES ( glib/gregex.h HAVE_GLIB_GREGEX_H )
+ # Reset CMAKE_REQUIRED_INCLUDES
+ SET( CMAKE_REQUIRED_INCLUDES "" )
+ENDIF( GLIB2_FOUND )
diff --git a/cmake/FindGettextTools.cmake b/cmake/FindGettextTools.cmake
new file mode 100644
index 0000000..231813b
--- /dev/null
+++ b/cmake/FindGettextTools.cmake
@@ -0,0 +1,152 @@
+# - Finds GNU gettext and provides tools
+# This module looks for the GNU gettext tools. This module defines the
+# following values:
+# GETTEXT_XGETTEXT_EXECUTABLE: The full path to the xgettext tool.
+# GETTEXT_MSGMERGE_EXECUTABLE: The full path to the msgmerge tool.
+# GETTEXT_MSGFMT_EXECUTABLE: The full path to the msgfmt tool.
+# GETTEXT_FOUND: True if gettext has been found.
+# GETTEXT_VERSION_STRING: The version of gettext found (since CMake 2.8.8)
+#
+# It provides the following macro:
+#
+# GETTEXT_MAKE_TARGET (
+# targetName
+# HIERARCHY <HIERARCHY_FORMAT>
+# KEYWORDS keyword1 ... keywordN
+# DOMAIN <TRANSLATION_DOMAIN>
+# STOCK_DIR <DIR>
+# LANG_DIR <DIR>
+# OUT_DIR <DIR>
+# SOURCE sourceFile1 ... sourceFileN )
+#
+# Creates a target that will take a set of translatable source files,
+# create a Gettext pot file then copy stock translations in to the build
+# directory to allow user editing, then compiles them in to mo files in a
+# directory hierarchy to be used in the application.
+#
+# USAGE:
+# targetName (e.g., "lang")
+# The name of the target that will be created to generate translations.
+#
+# HIERARCHY (e.g., "{1}/{2}/{3}/{4}.mo")
+# This is the format in which compiled message catalogs are placed.
+# {1}: The path prefix. (e.g., "/my-repo/build/locale/")
+# {2}: The language name. (e.g., "en")
+# {3}: The catalog category. (e.g., "LC_MESSAGES")
+# {4}: The domain. (e.g., "my-app")
+#
+# KEYWORDS (e.g., "_")
+# A list of keywords used by xgettext to find translatable strings in the
+# source files.
+#
+# DOMAIN (e.g., "my-app")
+# The Gettext domain. It should be unique to your application.
+#
+# STOCK_DIR (e.g., "/my-repo/stock-lang/")
+# The path to the initial translations to be copied to the LANG_DIR.
+# If you have a set of official translations in your source repository,
+# you'd want to set STOCK_DIR to this.
+#
+# LANG_DIR (e.g., "lang")
+# The name of the directory to be created in the build folder, containing
+# editable translations and updated templates.
+#
+# OUT_DIR (e.g., "locale")
+# The directory that compiled catalogs will be placed in, according to
+# the HIERARCHY format.
+#
+# SOURCE (e.g., "main.c")
+# A list of source files to read translatable strings from. Usually this
+# could be the same list you pass to add_executable.
+#
+# If you use the examples above and have a structure like this:
+# /my-repo/stock-lang/en.po
+#
+# You may end up with this structure:
+# /my-repo/stock-lang/en.po
+# /my-repo/build/lang/my-app.pot
+# /my-repo/build/lang/en.po
+# /my-repo/build/locale/en/LC_MESSAGES/my-app.mo
+
+# This nasty set of tools is divided up in to three files:
+# FindGettextTools.cmake
+# This is the file you're reading right now. It provides a neat macro.
+# FindGettextTools/config.cmake.in
+# This is used as the bridge to transfer arguments from the macro to the
+# actual script used to do Gettext stuff. A copy is created and filled in by
+# FindGettextTools.cmake and read by FindGettextTools/script.cmake.
+# The copy is found in the target's directory in the CMakeFiles directory
+# under the name 'gettext.cmake'.
+# FindGettextTools/script.cmake
+# Does Gettext things based on the bridge config file whenever the target
+# created using FindGettextTools.cmake is run.
+
+FIND_PROGRAM(GETTEXT_XGETTEXT_EXECUTABLE xgettext)
+FIND_PROGRAM(GETTEXT_MSGMERGE_EXECUTABLE msgmerge)
+FIND_PROGRAM(GETTEXT_MSGFMT_EXECUTABLE msgfmt)
+
+SET(_gettextScript "${CMAKE_CURRENT_LIST_DIR}/FindGettextTools/script.cmake")
+SET(_gettextConfig "${CMAKE_CURRENT_LIST_DIR}/FindGettextTools/config.cmake.in")
+
+IF(GETTEXT_XGETTEXT_EXECUTABLE)
+ EXECUTE_PROCESS(COMMAND ${GETTEXT_XGETTEXT_EXECUTABLE} --version
+ OUTPUT_VARIABLE gettext_version
+ ERROR_QUIET
+ OUTPUT_STRIP_TRAILING_WHITESPACE)
+ IF(gettext_version MATCHES "^xgettext \\(.*\\) [0-9]")
+ STRING(REGEX REPLACE "^xgettext \\([^\\)]*\\) ([0-9\\.]+[^ \n]*).*" "\\1"
+ GETTEXT_VERSION_STRING "${gettext_version}")
+ ENDIF()
+ SET(gettext_version)
+ENDIF()
+
+INCLUDE(FindPackageHandleStandardArgs)
+FIND_PACKAGE_HANDLE_STANDARD_ARGS(Gettext
+ REQUIRED_VARS
+ GETTEXT_XGETTEXT_EXECUTABLE
+ GETTEXT_MSGMERGE_EXECUTABLE
+ GETTEXT_MSGFMT_EXECUTABLE
+ VERSION_VAR GETTEXT_VERSION_STRING)
+
+INCLUDE(CMakeParseArguments)
+
+FUNCTION(GETTEXT_MAKE_TARGET _targetName)
+ SET(_oneValueArgs HIERARCHY DOMAIN STOCK_DIR LANG_DIR OUT_DIR)
+ SET(_multiValueArgs KEYWORDS SOURCE)
+
+ CMAKE_PARSE_ARGUMENTS(_parsedArguments
+ ""
+ "${_oneValueArgs}"
+ "${_multiValueArgs}"
+ "${ARGN}")
+
+ IF(NOT (
+ _parsedArguments_HIERARCHY AND
+ _parsedArguments_KEYWORDS AND
+ _parsedArguments_DOMAIN AND
+ _parsedArguments_STOCK_DIR AND
+ _parsedArguments_LANG_DIR AND
+ _parsedArguments_OUT_DIR AND
+ _parsedArguments_SOURCE))
+ MESSAGE(FATAL_ERROR "Wrong usage!")
+ ENDIF()
+
+ SET(_config
+ "${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/${_targetName}.dir/gettext.cmake")
+
+ CONFIGURE_FILE(${_gettextConfig} ${_config})
+
+ ADD_CUSTOM_TARGET(${_targetName} ${CMAKE_COMMAND} "-P" ${_gettextScript}
+ ${_config})
+ENDFUNCTION()
+
+IF(GETTEXT_MSGMERGE_EXECUTABLE AND
+ GETTEXT_MSGFMT_EXECUTABLE AND
+ GETTEXT_XGETTEXT_EXECUTABLE)
+ SET(GETTEXT_FOUND TRUE)
+ELSE()
+ SET(GETTEXT_FOUND FALSE)
+ IF(GETTEXT_REQUIRED)
+ MESSAGE(FATAL_ERROR "Gettext not found")
+ ENDIF()
+ENDIF()
diff --git a/cmake/FindGettextTools/config.cmake.in b/cmake/FindGettextTools/config.cmake.in
new file mode 100644
index 0000000..57e3739
--- /dev/null
+++ b/cmake/FindGettextTools/config.cmake.in
@@ -0,0 +1,14 @@
+# This file is automatically generated by GettextTools.
+# It WILL be overwritten by CMake, so editing it is futile. Sorry!
+
+set(hierarchy "${_parsedArguments_HIERARCHY}")
+set(keywords "${_parsedArguments_KEYWORDS}")
+set(domain "${_parsedArguments_DOMAIN}")
+set(sourcePrefix "${CMAKE_CURRENT_SOURCE_DIR}")
+set(stockDir "${_parsedArguments_STOCK_DIR}")
+set(langDir "${_parsedArguments_LANG_DIR}")
+set(outDir "${_parsedArguments_OUT_DIR}")
+set(XGETTEXT_EXECUTABLE "${GETTEXT_XGETTEXT_EXECUTABLE}")
+set(MSGMERGE_EXECUTABLE "${GETTEXT_MSGMERGE_EXECUTABLE}")
+set(MSGFMT_EXECUTABLE "${GETTEXT_MSGFMT_EXECUTABLE}")
+set(sourceFiles "${_parsedArguments_SOURCE}")
diff --git a/cmake/FindGettextTools/script.cmake b/cmake/FindGettextTools/script.cmake
new file mode 100644
index 0000000..364c5ea
--- /dev/null
+++ b/cmake/FindGettextTools/script.cmake
@@ -0,0 +1,103 @@
+# ----- Set up variables.
+
+# Read variables from the generated config.
+include(${CMAKE_ARGV3})
+
+# Transform keywords in to flags.
+set(keywordArgs "")
+foreach(keyword ${keywords})
+ list(APPEND keywordArgs "--keyword=${keyword}")
+endforeach()
+
+# ----- Make the pot file.
+
+message("Creating translation template...")
+
+file(MAKE_DIRECTORY ${langDir})
+
+set(potFile "${langDir}/${domain}.pot")
+
+execute_process(COMMAND ${XGETTEXT_EXECUTABLE}
+ "--output=${potFile}"
+ "--omit-header" "--add-comments"
+ ${keywordArgs}
+ ${sourceFiles}
+ WORKING_DIRECTORY ${sourcePrefix})
+
+message(" '${domain}.pot' done.")
+
+# ----- Copy and merge across the po files that come with the source.
+
+message("Copying and updating stock translations...")
+
+file(GLOB poFiles "${stockDir}/*.po")
+
+foreach(file ${poFiles})
+ # Get the language name, like en_US or zh_CN from the name of the po file, so
+ # 'en_US.po' or 'zh_CN.po' become 'en_US' or 'zh_CN.po'
+ get_filename_component(langName ${file} NAME_WE)
+
+ set(newFile "${langDir}/${langName}.po")
+
+ if(NOT EXISTS ${newFile})
+ execute_process(COMMAND ${MSGMERGE_EXECUTABLE}
+ "--output-file" ${newFile} ${file} ${potFile}
+ OUTPUT_QUIET ERROR_VARIABLE error RESULT_VARIABLE ret)
+
+ if(ret) # Have to do this hack as msgmerge prints to stderr.
+ message(SEND_ERROR "${error}")
+ endif()
+
+ message(" '${langName}' copied.")
+ elseif(${file} IS_NEWER_THAN ${newFile})
+ execute_process(COMMAND ${MSGMERGE_EXECUTABLE}
+ "--update" ${newFile} ${file}
+ OUTPUT_QUIET ERROR_VARIABLE error RESULT_VARIABLE ret)
+
+ if(ret) # Have to do this hack as msgmerge prints to stderr.
+ message(SEND_ERROR "${error}")
+ endif()
+
+ message(" '${langName}' merged.")
+ endif()
+endforeach()
+
+# ----- Process the files in to mo files.
+
+message("Compiling translations...")
+
+file(GLOB localPoFiles "${langDir}/*.po")
+
+foreach(file ${localPoFiles})
+ execute_process(COMMAND ${MSGMERGE_EXECUTABLE}
+ "--update" ${file} ${potFile}
+ OUTPUT_QUIET ERROR_VARIABLE error RESULT_VARIABLE ret)
+
+ if(ret) # Have to do this hack as msgmerge prints to stderr.
+ message(SEND_ERROR "${error}")
+ endif()
+
+ get_filename_component(langName ${file} NAME_WE)
+
+ set(binaryFile "${hierarchy}")
+ string(REPLACE "{1}" "${outDir}" binaryFile "${binaryFile}")
+ string(REPLACE "{2}" "${langName}" binaryFile "${binaryFile}")
+ string(REPLACE "{3}" "LC_MESSAGES" binaryFile "${binaryFile}")
+ string(REPLACE "{4}" "${domain}" binaryFile "${binaryFile}")
+
+ if(${file} IS_NEWER_THAN ${binaryFile})
+ get_filename_component(binaryDir ${binaryFile} PATH)
+
+ file(MAKE_DIRECTORY ${binaryDir})
+
+ execute_process(COMMAND ${MSGFMT_EXECUTABLE}
+ ${file} "--output-file" ${binaryFile}
+ OUTPUT_QUIET ERROR_VARIABLE error RESULT_VARIABLE ret)
+
+ if(ret) # Have to do this hack as msgfmt prints to stderr.
+ message(SEND_ERROR "${error}")
+ endif()
+
+ message(" '${langName}' done.")
+ endif()
+endforeach()
diff --git a/cmake/FindIconv.cmake b/cmake/FindIconv.cmake
new file mode 100644
index 0000000..092b357
--- /dev/null
+++ b/cmake/FindIconv.cmake
@@ -0,0 +1,64 @@
+# - Try to find Iconv
+# Once done this will define
+#
+# ICONV_FOUND - system has Iconv
+# ICONV_INCLUDE_DIR - the Iconv include directory
+# ICONV_LIBRARIES - Link these to use Iconv
+# ICONV_SECOND_ARGUMENT_IS_CONST - the second argument for iconv() is const
+#
+include(CheckCCompilerFlag)
+include(CheckCSourceCompiles)
+
+IF (ICONV_INCLUDE_DIR AND ICONV_LIBRARIES)
+ # Already in cache, be silent
+ SET(ICONV_FIND_QUIETLY TRUE)
+ENDIF (ICONV_INCLUDE_DIR AND ICONV_LIBRARIES)
+
+FIND_PATH(ICONV_INCLUDE_DIR iconv.h HINTS /sw/include/ PATHS /opt/local)
+
+FIND_LIBRARY(ICONV_LIBRARIES NAMES iconv libiconv c PATHS /opt/local)
+
+IF(ICONV_INCLUDE_DIR AND ICONV_LIBRARIES)
+ SET(ICONV_FOUND TRUE)
+ENDIF(ICONV_INCLUDE_DIR AND ICONV_LIBRARIES)
+
+set(CMAKE_REQUIRED_INCLUDES ${ICONV_INCLUDE_DIR})
+set(CMAKE_REQUIRED_LIBRARIES ${ICONV_LIBRARIES})
+IF(ICONV_FOUND)
+ check_c_compiler_flag("-Werror" ICONV_HAVE_WERROR)
+ set (CMAKE_C_FLAGS_BACKUP "${CMAKE_C_FLAGS}")
+ if(ICONV_HAVE_WERROR)
+ set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Werror")
+ endif(ICONV_HAVE_WERROR)
+ check_c_source_compiles("
+ #include <iconv.h>
+ int main(){
+ iconv_t conv = 0;
+ const char* in = 0;
+ size_t ilen = 0;
+ char* out = 0;
+ size_t olen = 0;
+ iconv(conv, &in, &ilen, &out, &olen);
+ return 0;
+ }
+" ICONV_SECOND_ARGUMENT_IS_CONST )
+ set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS_BACKUP}")
+ENDIF(ICONV_FOUND)
+set(CMAKE_REQUIRED_INCLUDES)
+set(CMAKE_REQUIRED_LIBRARIES)
+
+IF(ICONV_FOUND)
+ IF(NOT ICONV_FIND_QUIETLY)
+ MESSAGE(STATUS "Found Iconv: ${ICONV_LIBRARIES}")
+ ENDIF(NOT ICONV_FIND_QUIETLY)
+ELSE(ICONV_FOUND)
+ IF(Iconv_FIND_REQUIRED)
+ MESSAGE(FATAL_ERROR "Could not find Iconv")
+ ENDIF(Iconv_FIND_REQUIRED)
+ENDIF(ICONV_FOUND)
+
+MARK_AS_ADVANCED(
+ ICONV_INCLUDE_DIR
+ ICONV_LIBRARIES
+ ICONV_SECOND_ARGUMENT_IS_CONST
+)
diff --git a/cmake/FindLibintl.cmake b/cmake/FindLibintl.cmake
new file mode 100644
index 0000000..7fec58b
--- /dev/null
+++ b/cmake/FindLibintl.cmake
@@ -0,0 +1,56 @@
+# Try to find the libintl library. Explicit searching is currently
+# only required for Win32, though it might be useful for some UNIX
+# variants, too. Therefore code for searching common UNIX include
+# directories is included, too.
+#
+# Once done this will define
+#
+# LIBINTL_FOUND - system has libintl
+# LIBINTL_LIBRARIES - libraries needed for linking
+
+IF (LIBINTL_FOUND)
+ SET(LIBINTL_FIND_QUIETLY TRUE)
+ENDIF ()
+
+# for Windows we rely on the environement variables
+# %INCLUDE% and %LIB%; FIND_LIBRARY checks %LIB%
+# automatically on Windows
+IF(WIN32)
+ FIND_LIBRARY(LIBINTL_LIBRARIES
+ NAMES intl
+ )
+ IF(LIBINTL_LIBRARIES)
+ SET(LIBINTL_FOUND TRUE)
+ ELSE(LIBINTL_LIBRARIES)
+ SET(LIBINTL_FOUND FALSE)
+ ENDIF(LIBINTL_LIBRARIES)
+ELSE()
+ include(CheckFunctionExists)
+ check_function_exists(dgettext LIBINTL_LIBC_HAS_DGETTEXT)
+ if (LIBINTL_LIBC_HAS_DGETTEXT)
+ find_library(LIBINTL_LIBRARIES NAMES c)
+ set(LIBINTL_FOUND TRUE)
+ else (LIBINTL_LIBC_HAS_DGETTEXT)
+ find_library(LIBINTL_LIBRARIES
+ NAMES intl libintl
+ PATHS /usr/lib /usr/local/lib
+ )
+ IF(LIBINTL_LIBRARIES)
+ SET(LIBINTL_FOUND TRUE)
+ ELSE(LIBINTL_LIBRARIES)
+ SET(LIBINTL_FOUND FALSE)
+ ENDIF(LIBINTL_LIBRARIES)
+ ENDIF (LIBINTL_LIBC_HAS_DGETTEXT)
+ENDIF()
+
+IF (LIBINTL_FOUND)
+ IF (NOT LIBINTL_FIND_QUIETLY)
+ MESSAGE(STATUS "Found libintl: ${LIBINTL_LIBRARIES}")
+ ENDIF ()
+ELSE ()
+ IF (LIBINTL_FIND_REQUIRED)
+ MESSAGE(FATAL_ERROR "Could NOT find libintl")
+ ENDIF ()
+ENDIF ()
+
+MARK_AS_ADVANCED(LIBINTL_LIBRARIES LIBINTL_LIBC_HAS_DGETTEXT)
diff --git a/cmake/FindPackageHandleStandardArgs.cmake b/cmake/FindPackageHandleStandardArgs.cmake
new file mode 100644
index 0000000..151d812
--- /dev/null
+++ b/cmake/FindPackageHandleStandardArgs.cmake
@@ -0,0 +1,58 @@
+# FIND_PACKAGE_HANDLE_STANDARD_ARGS(NAME (DEFAULT_MSG|"Custom failure message") VAR1 ... )
+# This macro is intended to be used in FindXXX.cmake modules files.
+# It handles the REQUIRED and QUIET argument to FIND_PACKAGE() and
+# it also sets the <UPPERCASED_NAME>_FOUND variable.
+# The package is found if all variables listed are TRUE.
+# Example:
+#
+# FIND_PACKAGE_HANDLE_STANDARD_ARGS(LibXml2 DEFAULT_MSG LIBXML2_LIBRARIES LIBXML2_INCLUDE_DIR)
+#
+# LibXml2 is considered to be found, if both LIBXML2_LIBRARIES and
+# LIBXML2_INCLUDE_DIR are valid. Then also LIBXML2_FOUND is set to TRUE.
+# If it is not found and REQUIRED was used, it fails with FATAL_ERROR,
+# independent whether QUIET was used or not.
+# If it is found, the location is reported using the VAR1 argument, so
+# here a message "Found LibXml2: /usr/lib/libxml2.so" will be printed out.
+# If the second argument is DEFAULT_MSG, the message in the failure case will
+# be "Could NOT find LibXml2", if you don't like this message you can specify
+# your own custom failure message there.
+
+MACRO(FIND_PACKAGE_HANDLE_STANDARD_ARGS _NAME _FAIL_MSG _VAR1 )
+
+ IF("${_FAIL_MSG}" STREQUAL "DEFAULT_MSG")
+ IF (${_NAME}_FIND_REQUIRED)
+ SET(_FAIL_MESSAGE "Could not find REQUIRED package ${_NAME}")
+ ELSE (${_NAME}_FIND_REQUIRED)
+ SET(_FAIL_MESSAGE "Could not find OPTIONAL package ${_NAME}")
+ ENDIF (${_NAME}_FIND_REQUIRED)
+ ELSE("${_FAIL_MSG}" STREQUAL "DEFAULT_MSG")
+ SET(_FAIL_MESSAGE "${_FAIL_MSG}")
+ ENDIF("${_FAIL_MSG}" STREQUAL "DEFAULT_MSG")
+
+ STRING(TOUPPER ${_NAME} _NAME_UPPER)
+
+ SET(${_NAME_UPPER}_FOUND TRUE)
+ IF(NOT ${_VAR1})
+ SET(${_NAME_UPPER}_FOUND FALSE)
+ ENDIF(NOT ${_VAR1})
+
+ FOREACH(_CURRENT_VAR ${ARGN})
+ IF(NOT ${_CURRENT_VAR})
+ SET(${_NAME_UPPER}_FOUND FALSE)
+ ENDIF(NOT ${_CURRENT_VAR})
+ ENDFOREACH(_CURRENT_VAR)
+
+ IF (${_NAME_UPPER}_FOUND)
+ IF (NOT ${_NAME}_FIND_QUIETLY)
+ MESSAGE(STATUS "Found ${_NAME}: ${${_VAR1}}")
+ ENDIF (NOT ${_NAME}_FIND_QUIETLY)
+ ELSE (${_NAME_UPPER}_FOUND)
+ IF (${_NAME}_FIND_REQUIRED)
+ MESSAGE(FATAL_ERROR "${_FAIL_MESSAGE}")
+ ELSE (${_NAME}_FIND_REQUIRED)
+ IF (NOT ${_NAME}_FIND_QUIETLY)
+ MESSAGE(STATUS "${_FAIL_MESSAGE}")
+ ENDIF (NOT ${_NAME}_FIND_QUIETLY)
+ ENDIF (${_NAME}_FIND_REQUIRED)
+ ENDIF (${_NAME_UPPER}_FOUND)
+ENDMACRO(FIND_PACKAGE_HANDLE_STANDARD_ARGS)
diff --git a/cmake/compiler.cmake b/cmake/compiler.cmake
new file mode 100644
index 0000000..cf94ed3
--- /dev/null
+++ b/cmake/compiler.cmake
@@ -0,0 +1,35 @@
+function(append value)
+ foreach(variable ${ARGN})
+ set(${variable} "${${variable}} ${value}" PARENT_SCOPE)
+ endforeach(variable)
+endfunction()
+
+include(CheckCXXCompilerFlag)
+
+if (NOT DEFINED SDCV_COMPILER_IS_GCC_COMPATIBLE)
+ if (CMAKE_COMPILER_IS_GNUCXX)
+ set(SDCV_COMPILER_IS_GCC_COMPATIBLE ON)
+ elseif (MSVC)
+ set(SDCV_COMPILER_IS_GCC_COMPATIBLE OFF)
+ elseif ("${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang")
+ set (SDCV_COMPILER_IS_GCC_COMPATIBLE ON)
+ endif()
+endif()
+
+if (MSVC AND (MSVC_VERSION LESS 1900))
+ message(FATAL_ERROR "MSVC version ${MSVC_VERSION} have no full c++11 support")
+elseif (MSVC)
+ add_definitions(-DNOMINMAX)
+elseif (NOT MSVC)
+ check_cxx_compiler_flag("-std=c++11" CXX_SUPPORTS_CXX11)
+ if (CXX_SUPPORTS_CXX11)
+ append("-std=c++11" CMAKE_CXX_FLAGS)
+ else ()
+ message(FATAL_ERROR "sdcv requires C++11 support but the '-std=c++11' flag isn't supported.")
+ endif()
+endif ()
+
+if (SDCV_COMPILER_IS_GCC_COMPATIBLE)
+ append("-Wall" "-Wextra" "-Wformat-security" "-Wcast-align" "-Werror=format" "-Wcast-qual" CMAKE_C_FLAGS)
+ append("-Wall" "-pedantic" "-Wextra" "-Wformat-security" "-Wcast-align" "-Werror=format" "-Wcast-qual" CMAKE_CXX_FLAGS)
+endif ()
diff --git a/config.h.cmake b/config.h.cmake
new file mode 100644
index 0000000..e67be0c
--- /dev/null
+++ b/config.h.cmake
@@ -0,0 +1,5 @@
+#cmakedefine HAVE_MMAP 1
+#cmakedefine HAVE_LOCALE_H 1
+#cmakedefine WITH_READLINE 1
+#cmakedefine ENABLE_NLS 1
+#cmakedefine GETTEXT_TRANSLATIONS_PATH "${GETTEXT_TRANSLATIONS_PATH}" \ No newline at end of file
diff --git a/doc/DICTFILE_FORMAT b/doc/DICTFILE_FORMAT
new file mode 100644
index 0000000..d1b1d9d
--- /dev/null
+++ b/doc/DICTFILE_FORMAT
@@ -0,0 +1,352 @@
+Format for StarDict dictionary files
+------------------------------------
+
+StarDict homepage: http://stardict.sourceforge.net
+
+{0}. Number and Byte-order Conventions
+When you record the numbers that identify sizes, offsets, etc., you
+should use 32-bit numbers, such as you might represent with a glong.
+
+In order to make StarDict work on different platforms, these numbers
+must be in network byte order. You can ensure the correct byte order
+by using the g_htonl() function when creating dictionary files.
+Conversely, you should use g_ntohl() when reading dictionary files.
+
+Strings should be encoded in UTF-8.
+
+
+{1}. Files
+Every dictionary consists of three files:
+(1). somedict.ifo
+(2). somedict.idx or somedict.idx.gz
+(3). somedict.dict or somedict.dict.dz
+
+You can use gzip -9 to compress the .idx file. If the .idx file are not
+compressed, the loading can be fast and save memory when using, compress it
+will make the .idx file load into memory and make the quering fast when using.
+
+You can use dictzip to compress the .dict file.
+"dictzip" uses the same compression algorithm and file format as does gzip,
+but provides a table that can be used to randomly access compressed blocks
+in the file. The use of 50-64kB blocks for compression typically degrades
+compression by less than 10%, while maintaining acceptable random access
+capabilities for all data in the file. As an added benefit, files
+compressed with dictzip can be decompressed with gunzip.
+For more information about dictzip, refer to DICT project, please see:
+http://www.dict.org
+
+Stardict will search for the .ifo file, then open the .idx or
+.idx.gz file and the .dict.dz or .dict file which is in the same directory and
+has the same base name.
+
+
+
+{2}. The ".ifo" file's format.
+The .ifo file has the following format:
+
+StarDict's dict ifo file
+version=2.4.2
+[options]
+
+Note that the current "version" string must be "2.4.2". If it's not,
+then StarDict will refuse to read the file.
+
+[options]
+---------
+In the example above, [options] expands to any of the following lines
+specifying information about the dictionary. Each option is a keyword
+followed by an equal sign, then the value of that option, then a
+newline. The options may be appear in any order.
+
+Note that the dictionary must have at least a bookname, a wordcount and a
+idxfilesize, or the load will fail. All other information is optional. All
+strings should be encoded in UTF-8.
+
+Available options:
+
+bookname= // required
+wordcount= // required
+idxfilesize= // required
+author=
+email=
+website=
+description=
+date=
+sametypesequence= // very important.
+
+
+wordcount is the count of word entries in .idx file, it must be right.
+
+idxfilesize is the size(in bytes) of the .idx file, even the .idx is compressed
+to a .idx.gz file, this entry must record the original .idx file's size, and it
+must be right too. The .gz file don't contain its original size information,
+but knowing the original size can speed up the extraction to memory, as you
+don't need to call realloc() for many times.
+
+
+The "sametypesequence" option is described in further detail below.
+
+***
+sametypesequence
+
+You should first familiarize yourself with the .dict file format
+described in the next section so that you can understand what effect
+this option has on the .dict file.
+
+If the sametypesequence option is set, it tells StarDict that each
+word's data in the .dict file will have the same sequence of datatypes.
+In this case, we expect a .dict file that's been optimized in two
+ways: the type identifiers should be omitted, and the size marker for
+the last data entry of each word should be omitted.
+
+Let's consider some concrete examples of the sametypesequence option.
+
+Suppose that a dictionary records many .wav files, and so sets:
+ sametypesequence=W
+In this case, each word's entry in the .dict file consists solely of a
+wav file. In the .dict file, you would leave out the 'W' character
+before each entry, and you would also omit the 32-bit integer at the
+front of each .wav entry that would normally give the entry's length.
+You can do this since the length is known from the information in the
+idx file.
+
+As another example, suppose a dictionary contains phonetic information
+and a meaning for each word. The sametypesequence option for this
+dictionary would be:
+ sametypesequence=tm
+Once again, you can omit the 't' and 'm' characters before each data
+entry in the .dict file. In addition, you should omit the terminating
+'\0' for the 'm' entry for each word in the .dict file, as the length
+of the meaning string can be inferred from the length of the phonetic
+string (still indicated by a terminating '\0') and the length of the
+entire word entry (listed in the .idx file).
+
+So for cases where the last data entry for each word normally requires
+a terminating '\0' character, you should omit this character in the
+dict file. And for cases where the last data entry for each word
+normally requires an initial 32-bit number giving the length of the
+field (such as WAV and PNG entries), you must omit this number in the
+dictionary.
+
+Every dictionary should try to use the sametypesequence feature to
+save disk space.
+***
+
+
+{3}. The ".idx" file's format.
+The .idx file is just a word list.
+
+The word list is a sorted list of word entries.
+
+Each entry in the word list contains three fields, one after the other:
+ word_str; // a utf-8 string terminated by '\0'.
+ word_data_offset; // word data's offset in .dict file
+ word_data_size; // word data's total size in .dict file
+
+word_str gives the string representing this word. It's the string
+that is "looked up" by the StarDict.
+
+word_data_offset and word_data_size should both be 32-bit numbers in
+network byte order.
+
+No two entries should have the same "word_str". In other words,
+(strcmp(s1, s2) != 0).
+
+The length of "word_str" should be less than 256. In other words,
+(strlen(word) < 256).
+
+The word list must be sorted by calling stardict_strcmp() on the "word_str"
+fields. If the word list order is wrong, StarDict will fail to function
+correctly!
+
+============
+gint stardict_strcmp(const gchar *s1, const gchar *s2)
+{
+ gint a;
+ a = g_ascii_strcasecmp(s1, s2);
+ if (a == 0)
+ return strcmp(s1, s2);
+ else
+ return a;
+}
+============
+g_ascii_strcasecmp() is a glib function:
+Unlike the BSD strcasecmp() function, this only recognizes standard
+ASCII letters and ignores the locale, treating all non-ASCII characters
+as if they are not letters.
+
+stardict_strcmp() works fine with English characters, but the other
+locale characters' sorting is not so good. There should be a _strcmp
+function which handles the utf-8 string sorting better. If you know
+one, email me :)
+
+g_utf8_collate()? This is a locale-dependent funcition. So if you look
+up Chinese characters while in the Chinese locale, it works fine. But
+if you are in some other locale then the lookup will fail, as the
+order is not the same as in the Chinese locale (which was used when
+creating the dictionary).
+
+g_utf8_to_ucs4() then do comparing? This sounds like a good solution, but..
+
+The complete solution can be found in "Unicode Technical Standard #10: Unicode
+Collation Algorithm", http://www.unicode.org/reports/tr10/
+
+I hope glib will provide a locale-independent g_utf8_collate() soon.
+http://bugzilla.gnome.org/show_bug.cgi?id=112798
+
+
+
+{4}. The ".dict" file's format.
+The .dict file is a pure data sequence, as the offset and size of each
+word is recorded in the corresponding .idx file.
+
+If the "sametypesequence" option is not used in the .ifo file, then
+the .dict file has fields in the following order:
+==============
+word_1_data_1_type; // a single char identifying the data type
+word_1_data_1_data; // the data
+word_1_data_2_type;
+word_1_data_2_data;
+...... // the number of data entries for each word is determined by
+ // word_data_size in .idx file
+word_2_data_1_type;
+word_2_data_1_data;
+......
+==============
+It's important to note that each field in each word indicates its
+own length, as described below. The number of possible fields per
+word is also not fixed, and is determined by simply reading data until
+you've read word_data_size bytes for that word.
+
+
+Suppose the "sametypesequence" option is used in the .idx file, and
+the option is set like this:
+sametypesequence=tm
+Then the .dict file will look like this:
+==============
+word_1_data_1_data
+word_1_data_2_data
+word_2_data_1_data
+word_2_data_2_data
+......
+==============
+The first data entry for each word will have a terminating '\0', but
+the second entry will not have a terminating '\0'. The omissions of
+the type chars and of the last field's size information are the
+optimizations required by the "sametypesequence" option described
+above.
+
+
+Type identifiers
+----------------
+Here are the single-character type identifiers that may be used with
+the "sametypesequence" option in the .idx file, or may appear in the
+dict file itself if the "sametypesequence" option is not used.
+
+Lower-case characters signify that a field's size is determined by a
+terminating '\0', while upper-case characters indicate that the data
+begins with a 32-bit integer that gives the length of the data field.
+
+'m'
+Word's pure text meaning.
+The data should be a utf-8 string ending with '\0'.
+
+'l'
+Word's pure text meaning.
+The data is NOT a utf-8 string, but is instead a string in locale
+encoding, ending with '\0'. Sometimes using this type will save disk
+space, but its use is discouraged.
+
+'g'
+A utf-8 string which is marked up with the Pango text markup language.
+For more information about this markup language, See the "Pango
+Reference Manual."
+You might have it installed locally at:
+file:///usr/share/gtk-doc/html/pango/PangoMarkupFormat.html
+
+'t'
+English phonetic string.
+The data should be a utf-8 string ending with '\0'.
+
+Here are some utf-8 phonetic characters:
+θʃŋʧðʒæıʌʊɒɛəɑɜɔˌˈːˑ
+æɑɒʌәєŋvθðʃʒːɡˏˊˋ
+
+'y'
+Chinese YinBiao.
+The data should be a utf-8 string ending with '\0'.
+
+
+'W'
+wav file.
+The data begins with a network byte-ordered glong to identify the wav
+file's size, immediately followed by the file's content.
+
+'P'
+png file.
+The data begins with a network byte-ordered glong to identify the png
+file's size, immediately followed by the file's content.
+
+'X'
+this type identifier is reserved for experimental extensions.
+
+
+{5}. Tree Dictionary
+The tree dictionary support is used for information viewing, etc.
+
+A tree dictionary contains three file: sometreedict.ifo, sometreedict.tdx.gz
+and sometreedict.dict.dz.
+
+It is better to compress the .tdx file, as it is always load into memory.
+
+The .ifo file has the following format:
+
+StarDict's treedict ifo file
+version=2.4.2
+[options]
+
+Available options:
+
+bookname= // required
+tdxfilesize= // required
+wordcount=
+author=
+email=
+website=
+description=
+date=
+sametypesequence=
+
+wordcount is only used for info view in the dict manage dialog, so it is not
+important in tree dictionary.
+
+The .tdx file is just the word list.
+-----------
+The word list is a tree list of word entries.
+
+Each entry in the word list contains four fields, one after the other:
+ word_str; // a utf-8 string terminated by '\0'.
+ word_data_offset; // word data's offset in .dict file
+ word_data_size; // word data's total size in .dict file. it can be 0.
+ word_subentry_count; //have many sub word this entry has, 0 means none.
+
+Subentry is immidiately followed by its parent entry. This make the order is
+just as when a tree list with all its nodes extended, then sort from top to
+bottom.
+
+The .dict file's format is the same as the normal dictionary.
+
+
+
+{6}. More information.
+You can read "src/lib.cpp", "src/dictmanagedlg.cpp" and
+"src/tools/*.cpp" for more information.
+
+If you have any questions, email me. :)
+
+Thanks to Will Robinson <wsr23@stanford.edu> for cleaning up this file's
+English.
+
+Hu Zheng <huzheng_001@163.com>
+http://forlinux.yeah.net
+2003.11.11
diff --git a/doc/sdcv.1 b/doc/sdcv.1
new file mode 100644
index 0000000..86351b7
--- /dev/null
+++ b/doc/sdcv.1
@@ -0,0 +1,104 @@
+.TH SDCV 1 "2006-04-24" "sdcv-0.4.2"
+.SH NAME
+sdcv \- console version of StarDict program
+.SH SYNOPSIS
+.B sdcv
+[
+.BI options
+]
+[list of words]
+.SH DESCRIPTION
+.I sdcv
+is a simple, cross-platform text-based utility
+for working with dictionaries in StarDict format.
+Each word from "list of words" may be a string
+with a leading '/' for using a Fuzzy search algorithm,
+with a leading '|' for using full-text search,
+and the string may contain '?' and '*' for regexp search.
+It works in interactive and non-interactive mode.
+To exit from interactive mode press Ctrl+D.
+In interactive mode,
+if sdcv was compiled with readline library support,
+you can use the UP and DOWN keys to cycle through history.
+.SH OPTIONS
+.TP 8
+.B "\-h \-\-help"
+Display help message and exit
+.TP 8
+.B "\-v \-\-verbose"
+Display version and exit
+.TP 8
+.B "\-l \-\-list\-dicts"
+Display list of available dictionaries and exit
+.TP 8
+.B "\-u \-\-use\-dict filename"
+For search use only dictionary with this bookname
+.TP 8
+.B "\-n \-\-non\-interactive"
+For use in scripts
+.TP 8
+.B "\-x \-\-only\-data\-dir"
+For use in scripts: only use the dictionaries in data-dir, do not search in user and system directories
+.TP 8
+.B "\-e \-\-exact\-search"
+Do not fuzzy-search for similar words, only return exact matches
+.TP 8
+.B "\-j \-\-json"
+Print the results of list-dicts and searches as json, not as plain text.
+For use in automatically processing the results of a dictionary lookup.
+.TP 8
+.B "\-\-utf8\-output"
+Force sdcv to not convert to locale charset, output in utf8
+.TP 8
+.B "\-\-utf8\-input"
+Force sdcv to not convert from locale charset, assume that
+input is in utf8
+.TP 8
+.B "\-\-data\-dir path/to/directory"
+Use this directory as the path to the stardict data directory. This means that
+sdcv searches for dictionaries in data-dir/dic directory.
+.TP 8
+.B "\-\-color"
+Use ANSI escape codes for colorizing sdcv output (does not work with json output).
+.SH FILES
+.TP
+/usr/share/stardict/dic
+.TP
+$(HOME)/.stardict/dic
+
+Place where sdcv expects to find dictionaries.
+Instead of /usr/share/stardict/dic you can use any directory
+you want, just set the STARDICT_DATA_DIR environment variable.
+For example, if you have dictionaries in /mnt/data/stardict-dicts/dic,
+set STARDICT_DATA_DIR to /mnt/data/stardict-dicts.
+.TP
+$(HOME)/.sdcv_history
+
+This file includes the last $(SDCV_HISTSIZE) words, which you sought with sdcv.
+SDCV uses this file only if it was compiled with readline library support.
+.TP
+$(HOME)/.sdcv_ordering
+
+This is a text file containing one dictionary bookname per line.
+It specifies in which order the results of a search should be shown.
+.SH ENVIRONMENT
+Environment Variables Used By \fIsdcv\fR:
+.TP 20
+.B STARDICT_DATA_DIR
+If set, sdcv uses this variable as the data directory, this means that sdcv
+searches dictionaries in $\fBSTARDICT_DATA_DIR\fR\\dic
+.TP 20
+.B SDCV_HISTSIZE
+If set, sdcv writes in $(HOME)/.sdcv_history the last $(SDCV_HISTSIZE) words,
+which you look up using sdcv. If it is not set, then the last 2000 words are saved in $(HOME)/.sdcv_history.
+.TP 20
+.B SDCV_PAGER
+If SDCV_PAGER is set, its value is used as the name of the program
+to use to display the dictionary article.
+.SH BUGS
+Email bug reports to dushistov at mail dot ru. Be sure to include the word
+"sdcv" somewhere in the "Subject:" field.
+.SH AUTHORS
+Evgeniy A. Dushistov, Hu Zheng
+.SH SEE ALSO
+stardict(1), http://sdcv.sourceforge.net/, http://stardict.sourceforge.net
diff --git a/doc/uk/sdcv.1 b/doc/uk/sdcv.1
new file mode 100644
index 0000000..ff3b270
--- /dev/null
+++ b/doc/uk/sdcv.1
@@ -0,0 +1,84 @@
+.TH SDCV 1 "2004-12-06" "sdcv-0.4"
+.SH NAME
+sdcv \- консольна версія Зоряного словника [Stardict]
+.SH SYNOPSIS
+.B sdcv
+[
+.BI options
+]
+[list of words]
+.SH DESCRIPTION
+.I sdcv
+sdcv проста, міжплатформена текстова утиліта для роботи із
+словниками у форматі Зоряного словника [StarDict].
+Слово зі "списку слів", може бути рядком з початковим слешем '/'
+щоб задіяти нечіткий пошуковий алгоритм, рядок, може
+містити '?' і '*' для використання пошуку з регулярними виразами.
+Утиліта працює в діалоговому та не в інтерактивному режимах.
+Щоб вийти з діалогового режиму натискають Ctrl+D.
+У діалоговому режимі, якщо sdcv був скомпільований з підтримкою
+бібліотеки readline, Ви можете використовувати клавіші ДОГОРИ
+та ВНИЗ для роботи з хронологією.
+.SH OPTIONS
+.TP 8
+.B "\-h \-\-help"
+відображає повідомлення довідки та виходить
+.TP 8
+.B "\-v \-\-verbose"
+відображає версію та виходить
+.TP 8
+.B "\-l \-\-list\-dicts"
+відображає список доступних словників та виходить
+.TP 8
+.B "\-u \-\-use\-dict filename"
+для пошуку з використанням лише словника з цим іменем(bookname)
+.TP 8
+.B "\-n \-\-non\-interactive"
+для використання в скриптах
+.TP 8
+.B "\-\-utf8\-output"
+Заставити sdcv розмовляти не в системному кодуванні locale, а робити вивід в utf8
+.TP 8
+.B "\-\-utf8\-input"
+Заставити sdcv слухати не в системному кодуванні locale, а припускати що це
+ввід в utf8
+.TP 8
+.B "\-\-data\-dir path/to/directory"
+Використовуйте цю теку як шлях до теки даних зоряного словника [stardict].
+Це значає, що sdcv шукає словники у теці data-dir/dic.
+.SH FILES
+.TP
+/usr/share/stardict/dic
+.TP
+$(HOME)/.stardict/dic
+
+Місце, де sdcv очікує знайти словники.
+Замість шляху /usr/share/stardict/dic Ви можете використовувати все,
+що Ви хочете, лише встановіть змінну оточення STARDICT_DATA_DIR.
+Наприклад, якщо Ви маєте словники у теці /mnt/data/stardict-dicts/dic,
+встановіть STARDICT_DATA_DIR у /mnt/data/stardict-dicts.
+.TP
+$(HOME)/.sdcv_history
+
+Цей файл містить останні $(SDCV_HISTSIZE) слова, які Ви шукали з sdcv.
+SDCV використовує цей файл при умові, якщо sdcv був скомпільований
+з підтримкою бібліотеки readline.
+
+.SH ENVIRONMENT
+Змінні оточення для \fIsdcv\fR:
+.TP 20
+.B STARDICT_DATA_DIR
+Якщо встановлена, sdcv використає цю змінну як теку даних, це означає,
+що sdcv шукатиме словники у $\fBSTARDICT_DATA_DIR\fR\dic
+.TP 20
+.B SDCV_HISTSIZE
+Якщо встановлена, sdcv писатиме у $(HOME)/.sdcv_history лише
+останні $(SDCV_HISTSIZE) слова, які Ви шукали з sdcv. Якщо не встановлена,
+то збірігатиметься останніх 2000 слів у $(HOME)/.sdcv_history.
+.SH BUGS
+Звіти про помилки висилайте на адресу dushistov на mail крапка ru.
+Не забувайте включати слово "sdcv" десь у полі "Тема:".
+.SH AUTHORS
+Эвгений А. Душистов, Hu Zheng
+.SH SEE ALSO
+stardict(1), http://sdcv.sourceforge.net/, http://stardict.sourceforge.net
diff --git a/po/cs.po b/po/cs.po
new file mode 100644
index 0000000..0d8c3c1
--- /dev/null
+++ b/po/cs.po
@@ -0,0 +1,165 @@
+# Czech translations for sdcv package.
+# Copyright (C) 2008 Free Software Foundation, Inc.
+# This file is distributed under the same license as the sdcv package.
+# Michal Čihař <michal@cihar.com>, 2008.
+msgid ""
+msgstr ""
+"Project-Id-Version: sdcv 0.4.2\n"
+"Report-Msgid-Bugs-To: dushistov@mail.ru\n"
+"POT-Creation-Date: 2017-08-16 09:52+0300\n"
+"PO-Revision-Date: 2008-09-24 10:42+0200\n"
+"Last-Translator: Michal Čihař <michal@cihar.com>\n"
+"Language-Team: Czech <cs@li.org>\n"
+"Language: cs\n"
+"MIME-Version: 1.0\n"
+"Content-Type: text/plain; charset=UTF-8\n"
+"Content-Transfer-Encoding: 8bit\n"
+"Plural-Forms: nplurals=3; plural=(n==1) ? 0 : (n>=2 && n<=4) ? 1 : 2;\n"
+
+#: ../src/libwrapper.cpp:300
+msgid "popen failed"
+msgstr "popen selhalo"
+
+#: ../src/libwrapper.cpp:340
+#, c-format
+msgid "Can not convert %s to utf8.\n"
+msgstr "Nepodařilo se převést %s do utf8.\n"
+
+#: ../src/libwrapper.cpp:398 ../src/libwrapper.cpp:432
+#, fuzzy, c-format
+msgid "Found %zu items, similar to %s.\n"
+msgstr "Nalezeno %d záznamů podobných %s.\n"
+
+#: ../src/libwrapper.cpp:416
+msgid "Your choice[-1 to abort]: "
+msgstr "Vaše volba [-1 pro ukončení]: "
+
+#: ../src/libwrapper.cpp:426
+#, fuzzy, c-format
+msgid ""
+"Invalid choice.\n"
+"It must be from 0 to %zu or -1.\n"
+msgstr ""
+"Chybná volba.\n"
+"Musí být mezi 0 a %d nebo -1.\n"
+
+#: ../src/libwrapper.cpp:445
+#, c-format
+msgid "Nothing similar to %s, sorry :(\n"
+msgstr "Nic podobného %s nenalezeno, promiň :(\n"
+
+#: ../src/sdcv.cpp:88
+#, fuzzy
+msgid "display version information and exit"
+msgstr "-v, --version zobrazí informace o verzi a skončí\n"
+
+#: ../src/sdcv.cpp:90
+#, fuzzy
+msgid "display list of available dictionaries and exit"
+msgstr "-l, --list-dicts zobrazí seznam dostupných slovníků a skončí\n"
+
+#: ../src/sdcv.cpp:92
+#, fuzzy
+msgid "for search use only dictionary with this bookname"
+msgstr "-u, --use-dict jméno vyhledávat jen v zadaném slovníku\n"
+
+#: ../src/sdcv.cpp:93
+msgid "bookname"
+msgstr ""
+
+#: ../src/sdcv.cpp:95
+msgid "for use in scripts"
+msgstr ""
+
+#: ../src/sdcv.cpp:97
+msgid "print the result formatted as JSON"
+msgstr ""
+
+#: ../src/sdcv.cpp:99
+msgid "do not fuzzy-search for similar words, only return exact matches"
+msgstr ""
+
+#: ../src/sdcv.cpp:101
+#, fuzzy
+msgid "output must be in utf8"
+msgstr "--utf8-output výstup musí být v utf8\n"
+
+#: ../src/sdcv.cpp:103
+#, fuzzy
+msgid "input of sdcv in utf8"
+msgstr "--utf8-input vstup musí být v utf8\n"
+
+#: ../src/sdcv.cpp:105
+#, fuzzy
+msgid "use this directory as path to stardict data directory"
+msgstr ""
+"--data-dir cesta/někam použít tento adresář jako cestu ke slovníkům "
+"stardict\n"
+
+#: ../src/sdcv.cpp:106
+msgid "path/to/dir"
+msgstr ""
+
+#: ../src/sdcv.cpp:108
+msgid ""
+"only use the dictionaries in data-dir, do not search in user and system "
+"directories"
+msgstr ""
+
+#: ../src/sdcv.cpp:110
+msgid "colorize the output"
+msgstr ""
+
+#: ../src/sdcv.cpp:115
+msgid " words"
+msgstr ""
+
+#: ../src/sdcv.cpp:121
+#, c-format
+msgid "Invalid command line arguments: %s\n"
+msgstr ""
+
+#: ../src/sdcv.cpp:127
+#, c-format
+msgid "Console version of Stardict, version %s\n"
+msgstr "Konzolová verze Stardictu, verze %s\n"
+
+#: ../src/sdcv.cpp:202
+#, c-format
+msgid "g_mkdir failed: %s\n"
+msgstr "g_mkdir selhalo: %s\n"
+
+#: ../src/sdcv.cpp:217
+msgid "Enter word or phrase: "
+msgstr "Zadejte slovo nebo frázi: "
+
+#: ../src/sdcv.cpp:225
+#, c-format
+msgid "There are no words/phrases to translate.\n"
+msgstr "Nebyla zadáno nic k přeložení.\n"
+
+#: ../src/sdcv.cpp:237
+#, c-format
+msgid "Dictionary's name Word count\n"
+msgstr "Jméno slovníku Počet slov\n"
+
+#: ../src/utils.cpp:48
+#, fuzzy, c-format
+msgid "Can not convert %s to current locale.\n"
+msgstr "Nepodařilo se převést %s do utf8.\n"
+
+#~ msgid ""
+#~ "Unknown option.\n"
+#~ "Try '%s --help' for more information.\n"
+#~ msgstr ""
+#~ "Neznámý parametr.\n"
+#~ "Zkuste '%s --help' pro nápovědu.\n"
+
+#~ msgid "Usage: %s [OPTIONS] words\n"
+#~ msgstr "Použití: %s [PARAMETRY] slova\n"
+
+#~ msgid "-h, --help display this help and exit\n"
+#~ msgstr "-h, --help zobrazí tuto nápovědu a skončí\n"
+
+#~ msgid "-n, --non-interactive for use in scripts\n"
+#~ msgstr "-n, --non-interactive pro použití ve skriptech\n"
diff --git a/po/en@boldquot.header b/po/en@boldquot.header
new file mode 100644
index 0000000..fedb6a0
--- /dev/null
+++ b/po/en@boldquot.header
@@ -0,0 +1,25 @@
+# All this catalog "translates" are quotation characters.
+# The msgids must be ASCII and therefore cannot contain real quotation
+# characters, only substitutes like grave accent (0x60), apostrophe (0x27)
+# and double quote (0x22). These substitutes look strange; see
+# http://www.cl.cam.ac.uk/~mgk25/ucs/quotes.html
+#
+# This catalog translates grave accent (0x60) and apostrophe (0x27) to
+# left single quotation mark (U+2018) and right single quotation mark (U+2019).
+# It also translates pairs of apostrophe (0x27) to
+# left single quotation mark (U+2018) and right single quotation mark (U+2019)
+# and pairs of quotation mark (0x22) to
+# left double quotation mark (U+201C) and right double quotation mark (U+201D).
+#
+# When output to an UTF-8 terminal, the quotation characters appear perfectly.
+# When output to an ISO-8859-1 terminal, the single quotation marks are
+# transliterated to apostrophes (by iconv in glibc 2.2 or newer) or to
+# grave/acute accent (by libiconv), and the double quotation marks are
+# transliterated to 0x22.
+# When output to an ASCII terminal, the single quotation marks are
+# transliterated to apostrophes, and the double quotation marks are
+# transliterated to 0x22.
+#
+# This catalog furthermore displays the text between the quotation marks in
+# bold face, assuming the VT100/XTerm escape sequences.
+#
diff --git a/po/en@quot.header b/po/en@quot.header
new file mode 100644
index 0000000..a9647fc
--- /dev/null
+++ b/po/en@quot.header
@@ -0,0 +1,22 @@
+# All this catalog "translates" are quotation characters.
+# The msgids must be ASCII and therefore cannot contain real quotation
+# characters, only substitutes like grave accent (0x60), apostrophe (0x27)
+# and double quote (0x22). These substitutes look strange; see
+# http://www.cl.cam.ac.uk/~mgk25/ucs/quotes.html
+#
+# This catalog translates grave accent (0x60) and apostrophe (0x27) to
+# left single quotation mark (U+2018) and right single quotation mark (U+2019).
+# It also translates pairs of apostrophe (0x27) to
+# left single quotation mark (U+2018) and right single quotation mark (U+2019)
+# and pairs of quotation mark (0x22) to
+# left double quotation mark (U+201C) and right double quotation mark (U+201D).
+#
+# When output to an UTF-8 terminal, the quotation characters appear perfectly.
+# When output to an ISO-8859-1 terminal, the single quotation marks are
+# transliterated to apostrophes (by iconv in glibc 2.2 or newer) or to
+# grave/acute accent (by libiconv), and the double quotation marks are
+# transliterated to 0x22.
+# When output to an ASCII terminal, the single quotation marks are
+# transliterated to apostrophes, and the double quotation marks are
+# transliterated to 0x22.
+#
diff --git a/po/fr.po b/po/fr.po
new file mode 100644
index 0000000..c07403d
--- /dev/null
+++ b/po/fr.po
@@ -0,0 +1,175 @@
+# French translations for sdcv package
+# Traduction française pour le paquet sdcv.
+# Copyright (C) 2004 THE sdcv'S COPYRIGHT HOLDER
+# This file is distributed under the same license as the sdcv package.
+# <dushistov@mail.ru>, 2004.
+#
+msgid ""
+msgstr ""
+"Project-Id-Version: sdcv 0.4.2\n"
+"Report-Msgid-Bugs-To: dushistov@mail.ru\n"
+"POT-Creation-Date: 2017-08-16 09:52+0300\n"
+"PO-Revision-Date: 2009-06-15 23:20+0800\n"
+"Language-Team: Vincent Petry <PVince81@yahoo.fr>\n"
+"Language: \n"
+"MIME-Version: 1.0\n"
+"Content-Type: text/plain; charset=UTF-8\n"
+"Content-Transfer-Encoding: 8bit\n"
+"Plural-Forms: nplurals=3; plural=(n%10==1 && n%100!=11 ? 0 : n%10>=2 && n"
+"%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2);\n"
+
+#: ../src/libwrapper.cpp:300
+msgid "popen failed"
+msgstr "Échec de popen"
+
+#: ../src/libwrapper.cpp:340
+#, c-format
+msgid "Can not convert %s to utf8.\n"
+msgstr "Ne peut convertir %s au format utf8.\n"
+
+#: ../src/libwrapper.cpp:398 ../src/libwrapper.cpp:432
+#, fuzzy, c-format
+msgid "Found %zu items, similar to %s.\n"
+msgstr "Trouvé %d éléments similaires à %s.\n"
+
+#: ../src/libwrapper.cpp:416
+msgid "Your choice[-1 to abort]: "
+msgstr "Votre choix[-1 pour abandonner] : "
+
+#: ../src/libwrapper.cpp:426
+#, fuzzy, c-format
+msgid ""
+"Invalid choice.\n"
+"It must be from 0 to %zu or -1.\n"
+msgstr ""
+"Selection invalide.\n"
+"Veuillez choisir un nombre entre 0 et %d, ou -1.\n"
+
+#: ../src/libwrapper.cpp:445
+#, c-format
+msgid "Nothing similar to %s, sorry :(\n"
+msgstr "Aucun mot/phrase similaire à %s, désolé :(\n"
+
+#: ../src/sdcv.cpp:88
+#, fuzzy
+msgid "display version information and exit"
+msgstr ""
+"-v, --version afficher les informations de version et sortir\n"
+
+#: ../src/sdcv.cpp:90
+#, fuzzy
+msgid "display list of available dictionaries and exit"
+msgstr ""
+"-l, --list-dicts afficher la liste des dictionnaires disponibles et "
+"sortir\n"
+
+#: ../src/sdcv.cpp:92
+#, fuzzy
+msgid "for search use only dictionary with this bookname"
+msgstr ""
+"-u, --use-dict nom_dict pour chercher seulement en utilisant le "
+"dictionnaire spécifié\n"
+
+#: ../src/sdcv.cpp:93
+msgid "bookname"
+msgstr ""
+
+#: ../src/sdcv.cpp:95
+msgid "for use in scripts"
+msgstr ""
+
+#: ../src/sdcv.cpp:97
+msgid "print the result formatted as JSON"
+msgstr ""
+
+#: ../src/sdcv.cpp:99
+msgid "do not fuzzy-search for similar words, only return exact matches"
+msgstr ""
+
+#: ../src/sdcv.cpp:101
+#, fuzzy
+msgid "output must be in utf8"
+msgstr "--utf8-output force la sortie au format utf8\n"
+
+#: ../src/sdcv.cpp:103
+#, fuzzy
+msgid "input of sdcv in utf8"
+msgstr "--utf8-input force l'entrée de sdcv au format utf8\n"
+
+#: ../src/sdcv.cpp:105
+#, fuzzy
+msgid "use this directory as path to stardict data directory"
+msgstr ""
+"--data-dir chemin utiliser ce chemin pour trouver les données de "
+"stardict\n"
+
+#: ../src/sdcv.cpp:106
+msgid "path/to/dir"
+msgstr ""
+
+#: ../src/sdcv.cpp:108
+msgid ""
+"only use the dictionaries in data-dir, do not search in user and system "
+"directories"
+msgstr ""
+
+#: ../src/sdcv.cpp:110
+msgid "colorize the output"
+msgstr ""
+
+#: ../src/sdcv.cpp:115
+msgid " words"
+msgstr ""
+
+#: ../src/sdcv.cpp:121
+#, c-format
+msgid "Invalid command line arguments: %s\n"
+msgstr ""
+
+#: ../src/sdcv.cpp:127
+#, c-format
+msgid "Console version of Stardict, version %s\n"
+msgstr "Version console de Stardict, version %s\n"
+
+#: ../src/sdcv.cpp:202
+#, c-format
+msgid "g_mkdir failed: %s\n"
+msgstr "Échec de g_mkdir : %s\n"
+
+#: ../src/sdcv.cpp:217
+msgid "Enter word or phrase: "
+msgstr "Entrez un mot ou une phrase: "
+
+#: ../src/sdcv.cpp:225
+#, c-format
+msgid "There are no words/phrases to translate.\n"
+msgstr "Il n'y a pas de mots/phrases à traduire.\n"
+
+#: ../src/sdcv.cpp:237
+#, c-format
+msgid "Dictionary's name Word count\n"
+msgstr "Nom dictionnaire Nombre de mots\n"
+
+#: ../src/utils.cpp:48
+#, c-format
+msgid "Can not convert %s to current locale.\n"
+msgstr "Ne peut pas convertir %s dans la locale courante.\n"
+
+#~ msgid ""
+#~ "Unknown option.\n"
+#~ "Try '%s --help' for more information.\n"
+#~ msgstr ""
+#~ "Option inconnue.\n"
+#~ "Essayez '%s --help' pour plus d'informations.\n"
+
+#~ msgid "Usage: %s [OPTIONS] words\n"
+#~ msgstr "Usage: %s [OPTIONS] mots\n"
+
+#~ msgid "-h, --help display this help and exit\n"
+#~ msgstr "-h, --help afficher cet aide et sortir\n"
+
+#~ msgid "-n, --non-interactive for use in scripts\n"
+#~ msgstr "-n, --non-interactive pour l'utilisation dans des scripts\n"
+
+#~ msgid "There is no dictionary with this bookname: %s.\n"
+#~ msgstr "Pas de dictionnaire : %s.\n"
diff --git a/po/ru.po b/po/ru.po
new file mode 100644
index 0000000..abbb597
--- /dev/null
+++ b/po/ru.po
@@ -0,0 +1,145 @@
+# Russian translations for sdcv package
+# Английские переводы для пакета sdcv.
+# Copyright (C) 2004 THE sdcv'S COPYRIGHT HOLDER
+# This file is distributed under the same license as the sdcv package.
+# <dushistov@mail.ru>, 2004.
+#
+msgid ""
+msgstr ""
+"Project-Id-Version: sdcv 0.5\n"
+"Report-Msgid-Bugs-To: dushistov@mail.ru\n"
+"POT-Creation-Date: 2017-08-16 09:52+0300\n"
+"PO-Revision-Date: 2017-08-16 10:05+0300\n"
+"Last-Translator: Evgeniy Dushistov <dushistov@mail.ru>\n"
+"Language-Team: Russian <ru@li.org>\n"
+"Language: ru\n"
+"MIME-Version: 1.0\n"
+"Content-Type: text/plain; charset=UTF-8\n"
+"Content-Transfer-Encoding: 8bit\n"
+"Plural-Forms: nplurals=3; plural=(n%10==1 && n%100!=11 ? 0 : n%10>=2 && n"
+"%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2);\n"
+
+#: ../src/libwrapper.cpp:300
+msgid "popen failed"
+msgstr "функция popen завершилась с ошибкой"
+
+#: ../src/libwrapper.cpp:340
+#, c-format
+msgid "Can not convert %s to utf8.\n"
+msgstr "Не могу преобразовать %s в utf8.\n"
+
+#: ../src/libwrapper.cpp:398 ../src/libwrapper.cpp:432
+#, c-format
+msgid "Found %zu items, similar to %s.\n"
+msgstr "Найдено %zu слов, похожих на %s.\n"
+
+#: ../src/libwrapper.cpp:416
+msgid "Your choice[-1 to abort]: "
+msgstr "Ваш выбор[-1 - отмена]: "
+
+#: ../src/libwrapper.cpp:426
+#, c-format
+msgid ""
+"Invalid choice.\n"
+"It must be from 0 to %zu or -1.\n"
+msgstr ""
+"Неправильный выбор.\n"
+"Должно быть от 0 до %zu или -1.\n"
+
+#: ../src/libwrapper.cpp:445
+#, c-format
+msgid "Nothing similar to %s, sorry :(\n"
+msgstr "Ничего похожего на %s, извините :(\n"
+
+#: ../src/sdcv.cpp:88
+msgid "display version information and exit"
+msgstr "показать номер версии и завершить работу"
+
+#: ../src/sdcv.cpp:90
+msgid "display list of available dictionaries and exit"
+msgstr "показать список доступных словарей и завершить работу"
+
+#: ../src/sdcv.cpp:92
+msgid "for search use only dictionary with this bookname"
+msgstr "для поиска использовать только этот словарь с таким именем"
+
+#: ../src/sdcv.cpp:93
+msgid "bookname"
+msgstr "имя_словаря"
+
+#: ../src/sdcv.cpp:95
+msgid "for use in scripts"
+msgstr "для использования в скриптах"
+
+#: ../src/sdcv.cpp:97
+msgid "print the result formatted as JSON"
+msgstr "выдать результат в JSON формате"
+
+#: ../src/sdcv.cpp:99
+msgid "do not fuzzy-search for similar words, only return exact matches"
+msgstr "не использовать нечеткий поиск похожих слов, вернуть только точные совпадения"
+
+#: ../src/sdcv.cpp:101
+msgid "output must be in utf8"
+msgstr "вывод программы должен быть в utf8"
+
+#: ../src/sdcv.cpp:103
+msgid "input of sdcv in utf8"
+msgstr "ввод программы в utf8"
+
+#: ../src/sdcv.cpp:105
+msgid "use this directory as path to stardict data directory"
+msgstr ""
+"использовать эту директорию в качестве пути к \"stardict data\" директории"
+
+#: ../src/sdcv.cpp:106
+msgid "path/to/dir"
+msgstr "путь/до/директории"
+
+#: ../src/sdcv.cpp:108
+msgid ""
+"only use the dictionaries in data-dir, do not search in user and system "
+"directories"
+msgstr "использовать словари только из data-dir, не искать в пользовательских и системных каталогах"
+
+#: ../src/sdcv.cpp:110
+msgid "colorize the output"
+msgstr "раскрашивать вывод в разные цвета"
+
+#: ../src/sdcv.cpp:115
+msgid " words"
+msgstr "слова"
+
+#: ../src/sdcv.cpp:121
+#, c-format
+msgid "Invalid command line arguments: %s\n"
+msgstr "Неправильный аргумент командой строки: %s\n"
+
+#: ../src/sdcv.cpp:127
+#, c-format
+msgid "Console version of Stardict, version %s\n"
+msgstr "Консольная версия StarDict, версия %s\n"
+
+#: ../src/sdcv.cpp:202
+#, c-format
+msgid "g_mkdir failed: %s\n"
+msgstr "g_mkdir завершился с ошибкой: %s\n"
+
+#: ../src/sdcv.cpp:217
+msgid "Enter word or phrase: "
+msgstr "Введите слово или фразу: "
+
+#: ../src/sdcv.cpp:225
+#, c-format
+msgid "There are no words/phrases to translate.\n"
+msgstr "Не задано слова/фразы для перевода.\n"
+
+#: ../src/sdcv.cpp:237
+#, c-format
+msgid "Dictionary's name Word count\n"
+msgstr "Название словаря Количество слов\n"
+
+#: ../src/utils.cpp:48
+#, c-format
+msgid "Can not convert %s to current locale.\n"
+msgstr "Не могу преобразовать %s в локальную кодировку.\n"
diff --git a/po/sdcv.pot b/po/sdcv.pot
new file mode 100644
index 0000000..5348adb
--- /dev/null
+++ b/po/sdcv.pot
@@ -0,0 +1,139 @@
+# SOME DESCRIPTIVE TITLE.
+# Copyright (C) YEAR Free Software Foundation, Inc.
+# This file is distributed under the same license as the PACKAGE package.
+# FIRST AUTHOR <EMAIL@ADDRESS>, YEAR.
+#
+msgid ""
+msgstr ""
+"Project-Id-Version: sdcv 0.5\n"
+"Report-Msgid-Bugs-To: dushistov@mail.ru\n"
+"POT-Creation-Date: 2017-08-16 09:52+0300\n"
+"PO-Revision-Date: 2017-08-16 10:01+0300\n"
+"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
+"Language-Team: LANGUAGE <LL@li.org>\n"
+"Language: \n"
+"MIME-Version: 1.0\n"
+"Content-Type: text/plain; charset=CHARSET\n"
+"Content-Transfer-Encoding: 8bit\n"
+
+#: ../src/libwrapper.cpp:300
+msgid "popen failed"
+msgstr ""
+
+#: ../src/libwrapper.cpp:340
+#, c-format
+msgid "Can not convert %s to utf8.\n"
+msgstr ""
+
+#: ../src/libwrapper.cpp:398 ../src/libwrapper.cpp:432
+#, c-format
+msgid "Found %zu items, similar to %s.\n"
+msgstr ""
+
+#: ../src/libwrapper.cpp:416
+msgid "Your choice[-1 to abort]: "
+msgstr ""
+
+#: ../src/libwrapper.cpp:426
+#, c-format
+msgid ""
+"Invalid choice.\n"
+"It must be from 0 to %zu or -1.\n"
+msgstr ""
+
+#: ../src/libwrapper.cpp:445
+#, c-format
+msgid "Nothing similar to %s, sorry :(\n"
+msgstr ""
+
+#: ../src/sdcv.cpp:88
+msgid "display version information and exit"
+msgstr ""
+
+#: ../src/sdcv.cpp:90
+msgid "display list of available dictionaries and exit"
+msgstr ""
+
+#: ../src/sdcv.cpp:92
+msgid "for search use only dictionary with this bookname"
+msgstr ""
+
+#: ../src/sdcv.cpp:93
+msgid "bookname"
+msgstr ""
+
+#: ../src/sdcv.cpp:95
+msgid "for use in scripts"
+msgstr ""
+
+#: ../src/sdcv.cpp:97
+msgid "print the result formatted as JSON"
+msgstr ""
+
+#: ../src/sdcv.cpp:99
+msgid "do not fuzzy-search for similar words, only return exact matches"
+msgstr ""
+
+#: ../src/sdcv.cpp:101
+msgid "output must be in utf8"
+msgstr ""
+
+#: ../src/sdcv.cpp:103
+msgid "input of sdcv in utf8"
+msgstr ""
+
+#: ../src/sdcv.cpp:105
+msgid "use this directory as path to stardict data directory"
+msgstr ""
+
+#: ../src/sdcv.cpp:106
+msgid "path/to/dir"
+msgstr ""
+
+#: ../src/sdcv.cpp:108
+msgid ""
+"only use the dictionaries in data-dir, do not search in user and system "
+"directories"
+msgstr ""
+
+#: ../src/sdcv.cpp:110
+msgid "colorize the output"
+msgstr ""
+
+#: ../src/sdcv.cpp:115
+msgid " words"
+msgstr ""
+
+#: ../src/sdcv.cpp:121
+#, c-format
+msgid "Invalid command line arguments: %s\n"
+msgstr ""
+
+#: ../src/sdcv.cpp:127
+#, c-format
+msgid "Console version of Stardict, version %s\n"
+msgstr ""
+
+#: ../src/sdcv.cpp:202
+#, c-format
+msgid "g_mkdir failed: %s\n"
+msgstr ""
+
+#: ../src/sdcv.cpp:217
+msgid "Enter word or phrase: "
+msgstr ""
+
+#: ../src/sdcv.cpp:225
+#, c-format
+msgid "There are no words/phrases to translate.\n"
+msgstr ""
+
+#: ../src/sdcv.cpp:237
+#, c-format
+msgid "Dictionary's name Word count\n"
+msgstr ""
+
+#: ../src/utils.cpp:48
+#, c-format
+msgid "Can not convert %s to current locale.\n"
+msgstr ""
diff --git a/po/sk.po b/po/sk.po
new file mode 100644
index 0000000..c248cec
--- /dev/null
+++ b/po/sk.po
@@ -0,0 +1,172 @@
+# translation of sdcv.po to Slovak
+# Copyright (C)
+# Zdenko Podobný <zdpo@mailbox.sk>, 2004, 2005.
+# Ivan Masár <helix84@centrum.sk>, 2007.
+msgid ""
+msgstr ""
+"Project-Id-Version: sdcv 0.3.2\n"
+"Report-Msgid-Bugs-To: dushistov@mail.ru\n"
+"POT-Creation-Date: 2017-08-16 09:52+0300\n"
+"PO-Revision-Date: 2007-09-11 00:22+0100\n"
+"Last-Translator: Ivan Masár <helix84@centrum.sk>\n"
+"Language-Team: Slovak <sk-i18n@lists.linux.sk>\n"
+"Language: sk\n"
+"MIME-Version: 1.0\n"
+"Content-Type: text/plain; charset=UTF-8\n"
+"Content-Transfer-Encoding: 8bit\n"
+"X-Generator: KBabel 1.9.1\n"
+"Plural-Forms: nplurals=3; plural=(n%10==1 && n%100!=11 ? 0 : n%10>=2 && n"
+"%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2);\n"
+
+#: ../src/libwrapper.cpp:300
+msgid "popen failed"
+msgstr "popen zlyhalo"
+
+#: ../src/libwrapper.cpp:340
+#, c-format
+msgid "Can not convert %s to utf8.\n"
+msgstr "nie je možné konvertovať %s na utf8.\n"
+
+#: ../src/libwrapper.cpp:398 ../src/libwrapper.cpp:432
+#, fuzzy, c-format
+msgid "Found %zu items, similar to %s.\n"
+msgstr "Nájdené %d položiek, podobných %s,\n"
+
+#: ../src/libwrapper.cpp:416
+#, fuzzy
+msgid "Your choice[-1 to abort]: "
+msgstr "Vaša voľba[-1 zruší]: "
+
+#: ../src/libwrapper.cpp:426
+#, fuzzy, c-format
+msgid ""
+"Invalid choice.\n"
+"It must be from 0 to %zu or -1.\n"
+msgstr ""
+"Neplatná voľba.\n"
+"Musí byť od 0 do %d alebo -1.\n"
+
+#: ../src/libwrapper.cpp:445
+#, c-format
+msgid "Nothing similar to %s, sorry :(\n"
+msgstr "Ľutujem, nič sa nepodobá na %s :(\n"
+
+#: ../src/sdcv.cpp:88
+#, fuzzy
+msgid "display version information and exit"
+msgstr "-v, --version zobrazí informácie o verzii a skončí\n"
+
+#: ../src/sdcv.cpp:90
+#, fuzzy
+msgid "display list of available dictionaries and exit"
+msgstr ""
+"-l, --list-dicts zobrazí zoznam dostupných slovníkov a skončí\n"
+
+#: ../src/sdcv.cpp:92
+#, fuzzy
+msgid "for search use only dictionary with this bookname"
+msgstr "-u, --use-dict názov použiť pre hľadanie iba zvolený slovník\n"
+
+#: ../src/sdcv.cpp:93
+msgid "bookname"
+msgstr ""
+
+#: ../src/sdcv.cpp:95
+msgid "for use in scripts"
+msgstr ""
+
+#: ../src/sdcv.cpp:97
+msgid "print the result formatted as JSON"
+msgstr ""
+
+#: ../src/sdcv.cpp:99
+msgid "do not fuzzy-search for similar words, only return exact matches"
+msgstr ""
+
+#: ../src/sdcv.cpp:101
+#, fuzzy
+msgid "output must be in utf8"
+msgstr "--utf8-output výstup musí byť v utf8\n"
+
+#: ../src/sdcv.cpp:103
+#, fuzzy
+msgid "input of sdcv in utf8"
+msgstr "--utf8-input vstup pre sdcv je v utf8\n"
+
+#: ../src/sdcv.cpp:105
+#, fuzzy
+msgid "use this directory as path to stardict data directory"
+msgstr ""
+"--data-dir path/to/dir použiť tento priečinok ako cestu pre stardict "
+"dátový priečinok\n"
+
+#: ../src/sdcv.cpp:106
+msgid "path/to/dir"
+msgstr ""
+
+#: ../src/sdcv.cpp:108
+msgid ""
+"only use the dictionaries in data-dir, do not search in user and system "
+"directories"
+msgstr ""
+
+#: ../src/sdcv.cpp:110
+msgid "colorize the output"
+msgstr ""
+
+#: ../src/sdcv.cpp:115
+msgid " words"
+msgstr ""
+
+#: ../src/sdcv.cpp:121
+#, c-format
+msgid "Invalid command line arguments: %s\n"
+msgstr ""
+
+#: ../src/sdcv.cpp:127
+#, c-format
+msgid "Console version of Stardict, version %s\n"
+msgstr "Konzolová verzia StarDict, verzia %s\n"
+
+#: ../src/sdcv.cpp:202
+#, c-format
+msgid "g_mkdir failed: %s\n"
+msgstr "g_mkdir zlyhalo: %s\n"
+
+#: ../src/sdcv.cpp:217
+msgid "Enter word or phrase: "
+msgstr "Vložte slovo alebo frázu: "
+
+#: ../src/sdcv.cpp:225
+#, c-format
+msgid "There are no words/phrases to translate.\n"
+msgstr "Nie je čo preložiť.\n"
+
+#: ../src/sdcv.cpp:237
+#, c-format
+msgid "Dictionary's name Word count\n"
+msgstr "názov slovníka počet slov\n"
+
+#: ../src/utils.cpp:48
+#, c-format
+msgid "Can not convert %s to current locale.\n"
+msgstr "Nie je možné konvertovať %s na aktuálne locale.\n"
+
+#~ msgid ""
+#~ "Unknown option.\n"
+#~ "Try '%s --help' for more information.\n"
+#~ msgstr ""
+#~ "Neznáma voľba.\n"
+#~ "Skúste „%s --help“.\n"
+
+#~ msgid "Usage: %s [OPTIONS] words\n"
+#~ msgstr "Použitie: %s [VOĽBY] slová\n"
+
+#~ msgid "-h, --help display this help and exit\n"
+#~ msgstr "-h, --help zobrazí tento text a skončí\n"
+
+#~ msgid "-n, --non-interactive for use in scripts\n"
+#~ msgstr "-n, --non-interactive pre použitie v skriptoch\n"
+
+#~ msgid "There is no dictionary with this bookname: %s.\n"
+#~ msgstr "Neexistuje slovník s takýmto názvom: %s.\n"
diff --git a/po/uk.po b/po/uk.po
new file mode 100644
index 0000000..577b640
--- /dev/null
+++ b/po/uk.po
@@ -0,0 +1,176 @@
+# Ukrainian translations for sdcv package
+# Українські переклади для пакету sdcv.
+# Copyright (C) 2004 THE sdcv'S COPYRIGHT HOLDER
+# This file is distributed under the same license as the sdcv package.
+# <dushistov@mail.ru>, 2004.
+#
+msgid ""
+msgstr ""
+"Project-Id-Version: sdcv 0.3\n"
+"Report-Msgid-Bugs-To: dushistov@mail.ru\n"
+"POT-Creation-Date: 2017-08-16 09:52+0300\n"
+"PO-Revision-Date: 2004-12-14 17:54+0300\n"
+"Last-Translator: <dubyk@lsl.lviv.ua>\n"
+"Language-Team: Ukrainian <dubyk@lsl.lviv.ua>\n"
+"Language: uk\n"
+"MIME-Version: 1.0\n"
+"Content-Type: text/plain; charset=UTF-8\n"
+"Content-Transfer-Encoding: 8bit\n"
+"Plural-Forms: nplurals=3; plural=(n%10==1 && n%100!=11 ? 0 : n%10>=2 && n"
+"%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2);\n"
+
+#: ../src/libwrapper.cpp:300
+msgid "popen failed"
+msgstr ""
+
+#: ../src/libwrapper.cpp:340
+#, c-format
+msgid "Can not convert %s to utf8.\n"
+msgstr "Не можу перетворити %s у utf8.\n"
+
+#: ../src/libwrapper.cpp:398 ../src/libwrapper.cpp:432
+#, fuzzy, c-format
+msgid "Found %zu items, similar to %s.\n"
+msgstr "Знайдено %d слів, схожих на %s.\n"
+
+#: ../src/libwrapper.cpp:416
+#, fuzzy
+msgid "Your choice[-1 to abort]: "
+msgstr "Ваш вибір: "
+
+#: ../src/libwrapper.cpp:426
+#, fuzzy, c-format
+msgid ""
+"Invalid choice.\n"
+"It must be from 0 to %zu or -1.\n"
+msgstr ""
+"Неправильний вибір.\n"
+"Повинно бути від 0 до %d.\n"
+
+#: ../src/libwrapper.cpp:445
+#, c-format
+msgid "Nothing similar to %s, sorry :(\n"
+msgstr "Нічого схожого на %s, даруйте :(\n"
+
+#: ../src/sdcv.cpp:88
+#, fuzzy
+msgid "display version information and exit"
+msgstr "-v, --version показати номер версії і завершити роботу\n"
+
+#: ../src/sdcv.cpp:90
+#, fuzzy
+msgid "display list of available dictionaries and exit"
+msgstr ""
+"-l, --list-dicts показати список доступних словників і завершити "
+"роботу\n"
+
+#: ../src/sdcv.cpp:92
+#, fuzzy
+msgid "for search use only dictionary with this bookname"
+msgstr ""
+"-u, --use-dict ім`я словника для пошуку використовувати лише цей словник\n"
+
+#: ../src/sdcv.cpp:93
+msgid "bookname"
+msgstr ""
+
+#: ../src/sdcv.cpp:95
+msgid "for use in scripts"
+msgstr ""
+
+#: ../src/sdcv.cpp:97
+msgid "print the result formatted as JSON"
+msgstr ""
+
+#: ../src/sdcv.cpp:99
+msgid "do not fuzzy-search for similar words, only return exact matches"
+msgstr ""
+
+#: ../src/sdcv.cpp:101
+#, fuzzy
+msgid "output must be in utf8"
+msgstr "--utf8-output вивід програми повинен бути в utf8\n"
+
+#: ../src/sdcv.cpp:103
+#, fuzzy
+msgid "input of sdcv in utf8"
+msgstr "--utf8-input ввід програми в utf8\n"
+
+#: ../src/sdcv.cpp:105
+#, fuzzy
+msgid "use this directory as path to stardict data directory"
+msgstr ""
+"--data-dir тека використовувати цю теку як шлях до stardict data "
+"directory\n"
+
+#: ../src/sdcv.cpp:106
+msgid "path/to/dir"
+msgstr ""
+
+#: ../src/sdcv.cpp:108
+msgid ""
+"only use the dictionaries in data-dir, do not search in user and system "
+"directories"
+msgstr ""
+
+#: ../src/sdcv.cpp:110
+msgid "colorize the output"
+msgstr ""
+
+#: ../src/sdcv.cpp:115
+msgid " words"
+msgstr ""
+
+#: ../src/sdcv.cpp:121
+#, c-format
+msgid "Invalid command line arguments: %s\n"
+msgstr ""
+
+#: ../src/sdcv.cpp:127
+#, c-format
+msgid "Console version of Stardict, version %s\n"
+msgstr "Консольна версія Зоряного словника [Stardict], номер версії %s\n"
+
+#: ../src/sdcv.cpp:202
+#, c-format
+msgid "g_mkdir failed: %s\n"
+msgstr ""
+
+#: ../src/sdcv.cpp:217
+msgid "Enter word or phrase: "
+msgstr "ВведЁть слово або фразу: "
+
+#: ../src/sdcv.cpp:225
+#, c-format
+msgid "There are no words/phrases to translate.\n"
+msgstr "Не задано слова/фрази для перекладу.\n"
+
+#: ../src/sdcv.cpp:237
+#, fuzzy, c-format
+msgid "Dictionary's name Word count\n"
+msgstr "назва словника кількість слів\n"
+
+#: ../src/utils.cpp:48
+#, c-format
+msgid "Can not convert %s to current locale.\n"
+msgstr "Не можу перетворити %s у локальне кодування.\n"
+
+#~ msgid ""
+#~ "Unknown option.\n"
+#~ "Try '%s --help' for more information.\n"
+#~ msgstr ""
+#~ "НевЁдома опцЁя.\n"
+#~ "Спробуйте '%s --help' для отримання докладнішої інформації.\n"
+
+#~ msgid "Usage: %s [OPTIONS] words\n"
+#~ msgstr "Використання: %s [OPTIONS] слова\n"
+
+#~ msgid "-h, --help display this help and exit\n"
+#~ msgstr ""
+#~ "-h, --help показати це повідомлення і завершити роботу\n"
+
+#~ msgid "-n, --non-interactive for use in scripts\n"
+#~ msgstr "-n, --non-interactive для використання у 'скриптах'\n"
+
+#~ msgid "There is no dictionary with this bookname: %s.\n"
+#~ msgstr "Словника з таким іменем не існує: %s.\n"
diff --git a/po/zh_CN.po b/po/zh_CN.po
new file mode 100644
index 0000000..98db0a9
--- /dev/null
+++ b/po/zh_CN.po
@@ -0,0 +1,166 @@
+# Simplified Chinese translation for sdcv
+# Copyright (C) 2005 Free Software Foundation, Inc.
+# Cai Qian <caiqian@gnome.org>, 2005.
+#
+msgid ""
+msgstr ""
+"Project-Id-Version: sdcv 0.3\n"
+"Report-Msgid-Bugs-To: dushistov@mail.ru\n"
+"POT-Creation-Date: 2017-08-16 09:52+0300\n"
+"PO-Revision-Date: 2005-1-17 00:58+0800\n"
+"Last-Translator: Cai Qian <caiqian@gnome.org>\n"
+"Language-Team: Simplified Chinese\n"
+"Language: \n"
+"MIME-Version: 1.0\n"
+"Content-Type: text/plain; charset=UTF-8\n"
+"Content-Transfer-Encoding: 8bit\n"
+
+#: ../src/libwrapper.cpp:300
+msgid "popen failed"
+msgstr ""
+
+#: ../src/libwrapper.cpp:340
+#, c-format
+msgid "Can not convert %s to utf8.\n"
+msgstr "无法将 %s 转换为 UTF-8。\n"
+
+#: ../src/libwrapper.cpp:398 ../src/libwrapper.cpp:432
+#, fuzzy, c-format
+msgid "Found %zu items, similar to %s.\n"
+msgstr "发现 %d 条记录和 %s 相似。\n"
+
+#: ../src/libwrapper.cpp:416
+#, fuzzy
+msgid "Your choice[-1 to abort]: "
+msgstr "您的选择为:"
+
+#: ../src/libwrapper.cpp:426
+#, fuzzy, c-format
+msgid ""
+"Invalid choice.\n"
+"It must be from 0 to %zu or -1.\n"
+msgstr ""
+"无效的选择。\n"
+"必须是 0 到 %d。\n"
+
+#: ../src/libwrapper.cpp:445
+#, c-format
+msgid "Nothing similar to %s, sorry :(\n"
+msgstr "对不起,没有发现和 %s 相似的 :(\n"
+
+#: ../src/sdcv.cpp:88
+#, fuzzy
+msgid "display version information and exit"
+msgstr "-v, --version 显示版本信息并退出\n"
+
+#: ../src/sdcv.cpp:90
+#, fuzzy
+msgid "display list of available dictionaries and exit"
+msgstr "-l, --list-dicts 显示可用的字典列表并退出\n"
+
+#: ../src/sdcv.cpp:92
+#, fuzzy
+msgid "for search use only dictionary with this bookname"
+msgstr "-u, --use-dict 字典名 只使用指定的字典进行单词搜索\n"
+
+#: ../src/sdcv.cpp:93
+msgid "bookname"
+msgstr ""
+
+#: ../src/sdcv.cpp:95
+msgid "for use in scripts"
+msgstr ""
+
+#: ../src/sdcv.cpp:97
+msgid "print the result formatted as JSON"
+msgstr ""
+
+#: ../src/sdcv.cpp:99
+msgid "do not fuzzy-search for similar words, only return exact matches"
+msgstr ""
+
+#: ../src/sdcv.cpp:101
+#, fuzzy
+msgid "output must be in utf8"
+msgstr "--utf8-output 输出必须是 UTF-8\n"
+
+#: ../src/sdcv.cpp:103
+#, fuzzy
+msgid "input of sdcv in utf8"
+msgstr "--utf8-input sdcv 的输入为 UTF-8\n"
+
+#: ../src/sdcv.cpp:105
+#, fuzzy
+msgid "use this directory as path to stardict data directory"
+msgstr "--data-dir 目录路径 指定 Stardict 数据所在目录的路径\n"
+
+#: ../src/sdcv.cpp:106
+msgid "path/to/dir"
+msgstr ""
+
+#: ../src/sdcv.cpp:108
+msgid ""
+"only use the dictionaries in data-dir, do not search in user and system "
+"directories"
+msgstr ""
+
+#: ../src/sdcv.cpp:110
+msgid "colorize the output"
+msgstr ""
+
+#: ../src/sdcv.cpp:115
+msgid " words"
+msgstr ""
+
+#: ../src/sdcv.cpp:121
+#, c-format
+msgid "Invalid command line arguments: %s\n"
+msgstr ""
+
+#: ../src/sdcv.cpp:127
+#, c-format
+msgid "Console version of Stardict, version %s\n"
+msgstr "Stardict 的控制台版本,版本为 %s\n"
+
+#: ../src/sdcv.cpp:202
+#, c-format
+msgid "g_mkdir failed: %s\n"
+msgstr ""
+
+#: ../src/sdcv.cpp:217
+msgid "Enter word or phrase: "
+msgstr "请输入单词或短语:"
+
+#: ../src/sdcv.cpp:225
+#, c-format
+msgid "There are no words/phrases to translate.\n"
+msgstr "没有供翻译的单词或短语。\n"
+
+#: ../src/sdcv.cpp:237
+#, fuzzy, c-format
+msgid "Dictionary's name Word count\n"
+msgstr "字典名 单词量\n"
+
+#: ../src/utils.cpp:48
+#, c-format
+msgid "Can not convert %s to current locale.\n"
+msgstr "无法将 %s 转换为当前 Locale。\n"
+
+#~ msgid ""
+#~ "Unknown option.\n"
+#~ "Try '%s --help' for more information.\n"
+#~ msgstr ""
+#~ "未知选项。\n"
+#~ "更多信息请看 '%s --help'。\n"
+
+#~ msgid "Usage: %s [OPTIONS] words\n"
+#~ msgstr "用法:%s [选项] 单词\n"
+
+#~ msgid "-h, --help display this help and exit\n"
+#~ msgstr "-h, --help 显示本帮助并退出\n"
+
+#~ msgid "-n, --non-interactive for use in scripts\n"
+#~ msgstr "-n, --non-interactive 在脚本中使用\n"
+
+#~ msgid "There is no dictionary with this bookname: %s.\n"
+#~ msgstr "没有与该字典名 ‘%s’ 相同的字典。\n"
diff --git a/po/zh_TW.po b/po/zh_TW.po
new file mode 100644
index 0000000..a722c78
--- /dev/null
+++ b/po/zh_TW.po
@@ -0,0 +1,165 @@
+# Traditional Chinese Messages for sdcv
+# Copyright (C) 2005 Free Software Foundation, Inc.
+# This file is distributed under the same license as the sdcv package.
+# Cai Qian <caiqian@gnome.org>, 2005
+# Wei-Lun Chao <bluebat@member.fsf.org>, 2005, 2013.
+#
+msgid ""
+msgstr ""
+"Project-Id-Version: sdcv 0.4.2\n"
+"Report-Msgid-Bugs-To: dushistov@mail.ru\n"
+"POT-Creation-Date: 2017-08-16 09:52+0300\n"
+"PO-Revision-Date: 2013-06-12 14:11+0800\n"
+"Last-Translator: Wei-Lun Chao <bluebat@member.fsf.org>\n"
+"Language-Team: Chinese (traditional) <zh-l10n@linux.org.tw>\n"
+"Language: zh_TW\n"
+"MIME-Version: 1.0\n"
+"Content-Type: text/plain; charset=UTF-8\n"
+"Content-Transfer-Encoding: 8bit\n"
+"Plural-Forms: nplurals=1; plural=0;\n"
+
+#: ../src/libwrapper.cpp:300
+msgid "popen failed"
+msgstr "popen 失敗"
+
+#: ../src/libwrapper.cpp:340
+#, c-format
+msgid "Can not convert %s to utf8.\n"
+msgstr "無法將 %s 轉換為 UTF-8。\n"
+
+#: ../src/libwrapper.cpp:398 ../src/libwrapper.cpp:432
+#, fuzzy, c-format
+msgid "Found %zu items, similar to %s.\n"
+msgstr "找到 %d 項紀錄和 %s 相似。\n"
+
+#: ../src/libwrapper.cpp:416
+msgid "Your choice[-1 to abort]: "
+msgstr "您的選擇是[-1 表示放棄]:"
+
+#: ../src/libwrapper.cpp:426
+#, fuzzy, c-format
+msgid ""
+"Invalid choice.\n"
+"It must be from 0 to %zu or -1.\n"
+msgstr ""
+"無效的選擇。\n"
+"必須是 0 到 %d 之間或 -1。\n"
+
+#: ../src/libwrapper.cpp:445
+#, c-format
+msgid "Nothing similar to %s, sorry :(\n"
+msgstr "抱歉,沒有和 %s 相似者 :(\n"
+
+#: ../src/sdcv.cpp:88
+#, fuzzy
+msgid "display version information and exit"
+msgstr "-v, --version 顯示版本資訊並離開\n"
+
+#: ../src/sdcv.cpp:90
+#, fuzzy
+msgid "display list of available dictionaries and exit"
+msgstr "-l, --list-dicts 顯示可用的字典清單並離開\n"
+
+#: ../src/sdcv.cpp:92
+#, fuzzy
+msgid "for search use only dictionary with this bookname"
+msgstr "-u, --use-dict 字典名 只使用指定的字典進行單字搜尋\n"
+
+#: ../src/sdcv.cpp:93
+msgid "bookname"
+msgstr ""
+
+#: ../src/sdcv.cpp:95
+msgid "for use in scripts"
+msgstr ""
+
+#: ../src/sdcv.cpp:97
+msgid "print the result formatted as JSON"
+msgstr ""
+
+#: ../src/sdcv.cpp:99
+msgid "do not fuzzy-search for similar words, only return exact matches"
+msgstr ""
+
+#: ../src/sdcv.cpp:101
+#, fuzzy
+msgid "output must be in utf8"
+msgstr "--utf8-output 輸出必須是 UTF-8\n"
+
+#: ../src/sdcv.cpp:103
+#, fuzzy
+msgid "input of sdcv in utf8"
+msgstr "--utf8-input sdcv 的輸入為 UTF-8\n"
+
+#: ../src/sdcv.cpp:105
+#, fuzzy
+msgid "use this directory as path to stardict data directory"
+msgstr "--data-dir 目錄路徑 指定 Stardict 資料所在目錄的路徑\n"
+
+#: ../src/sdcv.cpp:106
+msgid "path/to/dir"
+msgstr ""
+
+#: ../src/sdcv.cpp:108
+msgid ""
+"only use the dictionaries in data-dir, do not search in user and system "
+"directories"
+msgstr ""
+
+#: ../src/sdcv.cpp:110
+msgid "colorize the output"
+msgstr ""
+
+#: ../src/sdcv.cpp:115
+msgid " words"
+msgstr ""
+
+#: ../src/sdcv.cpp:121
+#, c-format
+msgid "Invalid command line arguments: %s\n"
+msgstr ""
+
+#: ../src/sdcv.cpp:127
+#, c-format
+msgid "Console version of Stardict, version %s\n"
+msgstr "Stardict 的主控臺版本,版本為 %s\n"
+
+#: ../src/sdcv.cpp:202
+#, c-format
+msgid "g_mkdir failed: %s\n"
+msgstr "g_mkdir 失敗:%s\n"
+
+#: ../src/sdcv.cpp:217
+msgid "Enter word or phrase: "
+msgstr "請輸入單字或片語:"
+
+#: ../src/sdcv.cpp:225
+#, c-format
+msgid "There are no words/phrases to translate.\n"
+msgstr "沒有可供翻譯的單字或片語。\n"
+
+#: ../src/sdcv.cpp:237
+#, c-format
+msgid "Dictionary's name Word count\n"
+msgstr "字典名稱 單字數量\n"
+
+#: ../src/utils.cpp:48
+#, fuzzy, c-format
+msgid "Can not convert %s to current locale.\n"
+msgstr "無法將 %s 轉換為 UTF-8。\n"
+
+#~ msgid ""
+#~ "Unknown option.\n"
+#~ "Try '%s --help' for more information.\n"
+#~ msgstr ""
+#~ "不明選項。\n"
+#~ "更多資訊請看 '%s --help'。\n"
+
+#~ msgid "Usage: %s [OPTIONS] words\n"
+#~ msgstr "用法:%s [選項] 單字…\n"
+
+#~ msgid "-h, --help display this help and exit\n"
+#~ msgstr "-h, --help 顯示本輔助並離開\n"
+
+#~ msgid "-n, --non-interactive for use in scripts\n"
+#~ msgstr "-n, --non-interactive 在指令稿中使用\n"
diff --git a/src/dictziplib.cpp b/src/dictziplib.cpp
new file mode 100644
index 0000000..e8716bb
--- /dev/null
+++ b/src/dictziplib.cpp
@@ -0,0 +1,479 @@
+/* dictziplib.c --
+ * http://stardict.sourceforge.net
+ * Copyright (C) 2003-2003 Hu Zheng <huzheng_001@163.com>
+ * This file is a modify version of dictd-1.9.7's data.c
+ *
+ * data.c --
+ * Created: Tue Jul 16 12:45:41 1996 by faith@dict.org
+ * Revised: Sat Mar 30 10:46:06 2002 by faith@dict.org
+ * Copyright 1996, 1997, 1998, 2000, 2002 Rickard E. Faith (faith@dict.org)
+ *
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+//#define HAVE_MMAP //it will defined in config.h. this can be done by configure.in with a AC_FUNC_MMAP.
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <cassert>
+#include <cstdio>
+#include <cstdlib>
+#include <cstring>
+#include <fcntl.h>
+#include <limits.h>
+#include <unistd.h>
+
+#include <sys/stat.h>
+
+#include "dictziplib.hpp"
+
+#define USE_CACHE 1
+
+#define BUFFERSIZE 10240
+
+/*
+ * Output buffer must be greater than or
+ * equal to 110% of input buffer size, plus
+ * 12 bytes.
+*/
+#define OUT_BUFFER_SIZE 0xffffL
+
+#define IN_BUFFER_SIZE ((unsigned long)((double)(OUT_BUFFER_SIZE - 12) * 0.89))
+
+/* For gzip-compatible header, as defined in RFC 1952 */
+
+/* Magic for GZIP (rfc1952) */
+#define GZ_MAGIC1 0x1f /* First magic byte */
+#define GZ_MAGIC2 0x8b /* Second magic byte */
+
+/* FLaGs (bitmapped), from rfc1952 */
+#define GZ_FTEXT 0x01 /* Set for ASCII text */
+#define GZ_FHCRC 0x02 /* Header CRC16 */
+#define GZ_FEXTRA 0x04 /* Optional field (random access index) */
+#define GZ_FNAME 0x08 /* Original name */
+#define GZ_COMMENT 0x10 /* Zero-terminated, human-readable comment */
+#define GZ_MAX 2 /* Maximum compression */
+#define GZ_FAST 4 /* Fasted compression */
+
+/* These are from rfc1952 */
+#define GZ_OS_FAT 0 /* FAT filesystem (MS-DOS, OS/2, NT/Win32) */
+#define GZ_OS_AMIGA 1 /* Amiga */
+#define GZ_OS_VMS 2 /* VMS (or OpenVMS) */
+#define GZ_OS_UNIX 3 /* Unix */
+#define GZ_OS_VMCMS 4 /* VM/CMS */
+#define GZ_OS_ATARI 5 /* Atari TOS */
+#define GZ_OS_HPFS 6 /* HPFS filesystem (OS/2, NT) */
+#define GZ_OS_MAC 7 /* Macintosh */
+#define GZ_OS_Z 8 /* Z-System */
+#define GZ_OS_CPM 9 /* CP/M */
+#define GZ_OS_TOPS20 10 /* TOPS-20 */
+#define GZ_OS_NTFS 11 /* NTFS filesystem (NT) */
+#define GZ_OS_QDOS 12 /* QDOS */
+#define GZ_OS_ACORN 13 /* Acorn RISCOS */
+#define GZ_OS_UNKNOWN 255 /* unknown */
+
+#define GZ_RND_S1 'R' /* First magic for random access format */
+#define GZ_RND_S2 'A' /* Second magic for random access format */
+
+#define GZ_ID1 0 /* GZ_MAGIC1 */
+#define GZ_ID2 1 /* GZ_MAGIC2 */
+#define GZ_CM 2 /* Compression Method (Z_DEFALTED) */
+#define GZ_FLG 3 /* FLaGs (see above) */
+#define GZ_MTIME 4 /* Modification TIME */
+#define GZ_XFL 8 /* eXtra FLags (GZ_MAX or GZ_FAST) */
+#define GZ_OS 9 /* Operating System */
+#define GZ_XLEN 10 /* eXtra LENgth (16bit) */
+#define GZ_FEXTRA_START 12 /* Start of extra fields */
+#define GZ_SI1 12 /* Subfield ID1 */
+#define GZ_SI2 13 /* Subfield ID2 */
+#define GZ_SUBLEN 14 /* Subfield length (16bit) */
+#define GZ_VERSION 16 /* Version for subfield format */
+#define GZ_CHUNKLEN 18 /* Chunk length (16bit) */
+#define GZ_CHUNKCNT 20 /* Number of chunks (16bit) */
+#define GZ_RNDDATA 22 /* Random access data (16bit) */
+
+#define DICT_UNKNOWN 0
+#define DICT_TEXT 1
+#define DICT_GZIP 2
+#define DICT_DZIP 3
+
+int DictData::read_header(const std::string &fname, int computeCRC)
+{
+ FILE *str;
+ int id1, id2, si1, si2;
+ char buffer[BUFFERSIZE];
+ int extraLength, subLength;
+ int i;
+ char *pt;
+ int c;
+ struct stat sb;
+ unsigned long crc = crc32(0L, Z_NULL, 0);
+ int count;
+ unsigned long offset;
+
+ if (!(str = fopen(fname.c_str(), "rb"))) {
+ //err_fatal_errno( __FUNCTION__,
+ // "Cannot open data file \"%s\" for read\n", filename );
+ return -1;
+ }
+
+ this->headerLength = GZ_XLEN - 1;
+ this->type = DICT_UNKNOWN;
+
+ id1 = getc(str);
+ id2 = getc(str);
+
+ if (id1 != GZ_MAGIC1 || id2 != GZ_MAGIC2) {
+ this->type = DICT_TEXT;
+ fstat(fileno(str), &sb);
+ this->compressedLength = this->length = sb.st_size;
+ this->origFilename = fname;
+ this->mtime = sb.st_mtime;
+ if (computeCRC) {
+ rewind(str);
+ while (!feof(str)) {
+ if ((count = fread(buffer, 1, BUFFERSIZE, str))) {
+ crc = crc32(crc, (Bytef *)buffer, count);
+ }
+ }
+ }
+ this->crc = crc;
+ fclose(str);
+ return 0;
+ }
+ this->type = DICT_GZIP;
+
+ this->method = getc(str);
+ this->flags = getc(str);
+ this->mtime = getc(str) << 0;
+ this->mtime |= getc(str) << 8;
+ this->mtime |= getc(str) << 16;
+ this->mtime |= getc(str) << 24;
+ this->extraFlags = getc(str);
+ this->os = getc(str);
+
+ if (this->flags & GZ_FEXTRA) {
+ extraLength = getc(str) << 0;
+ extraLength |= getc(str) << 8;
+ this->headerLength += extraLength + 2;
+ si1 = getc(str);
+ si2 = getc(str);
+
+ if (si1 == GZ_RND_S1 || si2 == GZ_RND_S2) {
+ subLength = getc(str) << 0;
+ subLength |= getc(str) << 8;
+ this->version = getc(str) << 0;
+ this->version |= getc(str) << 8;
+
+ if (this->version != 1) {
+ //err_internal( __FUNCTION__,
+ // "dzip header version %d not supported\n",
+ // this->version );
+ }
+
+ this->chunkLength = getc(str) << 0;
+ this->chunkLength |= getc(str) << 8;
+ this->chunkCount = getc(str) << 0;
+ this->chunkCount |= getc(str) << 8;
+
+ if (this->chunkCount <= 0) {
+ fclose(str);
+ return 5;
+ }
+ this->chunks = (int *)malloc(sizeof(this->chunks[0])
+ * this->chunkCount);
+ for (i = 0; i < this->chunkCount; i++) {
+ this->chunks[i] = getc(str) << 0;
+ this->chunks[i] |= getc(str) << 8;
+ }
+ this->type = DICT_DZIP;
+ } else {
+ fseek(str, this->headerLength, SEEK_SET);
+ }
+ }
+
+ if (this->flags & GZ_FNAME) { /* FIXME! Add checking against header len */
+ pt = buffer;
+ while ((c = getc(str)) && c != EOF)
+ *pt++ = c;
+ *pt = '\0';
+
+ this->origFilename = buffer;
+ this->headerLength += this->origFilename.length() + 1;
+ } else {
+ this->origFilename = "";
+ }
+
+ if (this->flags & GZ_COMMENT) { /* FIXME! Add checking for header len */
+ pt = buffer;
+ while ((c = getc(str)) && c != EOF)
+ *pt++ = c;
+ *pt = '\0';
+ comment = buffer;
+ headerLength += comment.length() + 1;
+ } else {
+ comment = "";
+ }
+
+ if (this->flags & GZ_FHCRC) {
+ getc(str);
+ getc(str);
+ this->headerLength += 2;
+ }
+
+ if (ftell(str) != this->headerLength + 1) {
+ //err_internal( __FUNCTION__,
+ // "File position (%lu) != header length + 1 (%d)\n",
+ // ftell( str ), this->headerLength + 1 );
+ }
+
+ fseek(str, -8, SEEK_END);
+ this->crc = getc(str) << 0;
+ this->crc |= getc(str) << 8;
+ this->crc |= getc(str) << 16;
+ this->crc |= getc(str) << 24;
+ this->length = getc(str) << 0;
+ this->length |= getc(str) << 8;
+ this->length |= getc(str) << 16;
+ this->length |= getc(str) << 24;
+ this->compressedLength = ftell(str);
+
+ /* Compute offsets */
+ this->offsets = (unsigned long *)malloc(sizeof(this->offsets[0])
+ * this->chunkCount);
+ for (offset = this->headerLength + 1, i = 0;
+ i < this->chunkCount;
+ i++) {
+ this->offsets[i] = offset;
+ offset += this->chunks[i];
+ }
+
+ fclose(str);
+ return 0;
+}
+
+bool DictData::open(const std::string &fname, int computeCRC)
+{
+ struct stat sb;
+ int fd;
+
+ this->initialized = 0;
+
+ if (stat(fname.c_str(), &sb) || !S_ISREG(sb.st_mode)) {
+ //err_warning( __FUNCTION__,
+ // "%s is not a regular file -- ignoring\n", fname );
+ return false;
+ }
+
+ if (read_header(fname, computeCRC)) {
+ //err_fatal( __FUNCTION__,
+ // "\"%s\" not in text or dzip format\n", fname );
+ return false;
+ }
+
+ if ((fd = ::open(fname.c_str(), O_RDONLY)) < 0) {
+ //err_fatal_errno( __FUNCTION__,
+ // "Cannot open data file \"%s\"\n", fname );
+ return false;
+ }
+ if (fstat(fd, &sb)) {
+ //err_fatal_errno( __FUNCTION__,
+ // "Cannot stat data file \"%s\"\n", fname );
+ return false;
+ }
+
+ this->size = sb.st_size;
+ ::close(fd);
+ if (!mapfile.open(fname.c_str(), size))
+ return false;
+
+ this->start = mapfile.begin();
+ this->end = this->start + this->size;
+
+ for (size_t j = 0; j < DICT_CACHE_SIZE; j++) {
+ cache[j].chunk = -1;
+ cache[j].stamp = -1;
+ cache[j].inBuffer = nullptr;
+ cache[j].count = 0;
+ }
+
+ return true;
+}
+
+void DictData::close()
+{
+ if (this->chunks)
+ free(this->chunks);
+ if (this->offsets)
+ free(this->offsets);
+
+ if (this->initialized) {
+ if (inflateEnd(&this->zStream)) {
+ //err_internal( __FUNCTION__,
+ // "Cannot shut down inflation engine: %s\n",
+ // this->zStream.msg );
+ }
+ }
+
+ for (size_t i = 0; i < DICT_CACHE_SIZE; ++i) {
+ if (this->cache[i].inBuffer)
+ free(this->cache[i].inBuffer);
+ }
+}
+
+void DictData::read(char *buffer, unsigned long start, unsigned long size)
+{
+ char *pt;
+ unsigned long end;
+ int count;
+ char *inBuffer;
+ char outBuffer[OUT_BUFFER_SIZE];
+ int firstChunk, lastChunk;
+ int firstOffset, lastOffset;
+ int i;
+ int found, target, lastStamp;
+ static int stamp = 0;
+
+ end = start + size;
+
+ //buffer = malloc( size + 1 );
+
+ //PRINTF(DBG_UNZIP,
+ // ("dict_data_read( %p, %lu, %lu )\n",
+ //h, start, size ));
+
+ switch (this->type) {
+ case DICT_GZIP:
+ //err_fatal( __FUNCTION__,
+ // "Cannot seek on pure gzip format files.\n"
+ // "Use plain text (for performance)"
+ // " or dzip format (for space savings).\n" );
+ break;
+ case DICT_TEXT:
+ memcpy(buffer, this->start + start, size);
+ //buffer[size] = '\0';
+ break;
+ case DICT_DZIP:
+ if (!this->initialized) {
+ ++this->initialized;
+ this->zStream.zalloc = nullptr;
+ this->zStream.zfree = nullptr;
+ this->zStream.opaque = nullptr;
+ this->zStream.next_in = 0;
+ this->zStream.avail_in = 0;
+ this->zStream.next_out = nullptr;
+ this->zStream.avail_out = 0;
+ if (inflateInit2(&this->zStream, -15) != Z_OK) {
+ //err_internal( __FUNCTION__,
+ // "Cannot initialize inflation engine: %s\n",
+ //this->zStream.msg );
+ }
+ }
+ firstChunk = start / this->chunkLength;
+ firstOffset = start - firstChunk * this->chunkLength;
+ lastChunk = end / this->chunkLength;
+ lastOffset = end - lastChunk * this->chunkLength;
+ //PRINTF(DBG_UNZIP,
+ // (" start = %lu, end = %lu\n"
+ //"firstChunk = %d, firstOffset = %d,"
+ //" lastChunk = %d, lastOffset = %d\n",
+ //start, end, firstChunk, firstOffset, lastChunk, lastOffset ));
+ for (pt = buffer, i = firstChunk; i <= lastChunk; i++) {
+
+ /* Access cache */
+ found = 0;
+ target = 0;
+ lastStamp = INT_MAX;
+ for (size_t j = 0; j < DICT_CACHE_SIZE; j++) {
+#if USE_CACHE
+ if (this->cache[j].chunk == i) {
+ found = 1;
+ target = j;
+ break;
+ }
+#endif
+ if (this->cache[j].stamp < lastStamp) {
+ lastStamp = this->cache[j].stamp;
+ target = j;
+ }
+ }
+
+ this->cache[target].stamp = ++stamp;
+ if (found) {
+ count = this->cache[target].count;
+ inBuffer = this->cache[target].inBuffer;
+ } else {
+ this->cache[target].chunk = i;
+ if (!this->cache[target].inBuffer)
+ this->cache[target].inBuffer = (char *)malloc(IN_BUFFER_SIZE);
+ inBuffer = this->cache[target].inBuffer;
+
+ if (this->chunks[i] >= OUT_BUFFER_SIZE) {
+ //err_internal( __FUNCTION__,
+ // "this->chunks[%d] = %d >= %ld (OUT_BUFFER_SIZE)\n",
+ // i, this->chunks[i], OUT_BUFFER_SIZE );
+ }
+ memcpy(outBuffer, this->start + this->offsets[i], this->chunks[i]);
+
+ this->zStream.next_in = (Bytef *)outBuffer;
+ this->zStream.avail_in = this->chunks[i];
+ this->zStream.next_out = (Bytef *)inBuffer;
+ this->zStream.avail_out = IN_BUFFER_SIZE;
+ if (inflate(&this->zStream, Z_PARTIAL_FLUSH) != Z_OK) {
+ //err_fatal( __FUNCTION__, "inflate: %s\n", this->zStream.msg );
+ }
+ if (this->zStream.avail_in) {
+ //err_internal( __FUNCTION__,
+ // "inflate did not flush (%d pending, %d avail)\n",
+ // this->zStream.avail_in, this->zStream.avail_out );
+ }
+
+ count = IN_BUFFER_SIZE - this->zStream.avail_out;
+
+ this->cache[target].count = count;
+ }
+
+ if (i == firstChunk) {
+ if (i == lastChunk) {
+ memcpy(pt, inBuffer + firstOffset, lastOffset - firstOffset);
+ pt += lastOffset - firstOffset;
+ } else {
+ if (count != this->chunkLength) {
+ //err_internal( __FUNCTION__,
+ // "Length = %d instead of %d\n",
+ //count, this->chunkLength );
+ }
+ memcpy(pt, inBuffer + firstOffset,
+ this->chunkLength - firstOffset);
+ pt += this->chunkLength - firstOffset;
+ }
+ } else if (i == lastChunk) {
+ memcpy(pt, inBuffer, lastOffset);
+ pt += lastOffset;
+ } else {
+ assert(count == this->chunkLength);
+ memcpy(pt, inBuffer, this->chunkLength);
+ pt += this->chunkLength;
+ }
+ }
+ //*pt = '\0';
+ break;
+ case DICT_UNKNOWN:
+ //err_fatal( __FUNCTION__, "Cannot read unknown file type\n" );
+ break;
+ }
+}
diff --git a/src/dictziplib.hpp b/src/dictziplib.hpp
new file mode 100644
index 0000000..859c295
--- /dev/null
+++ b/src/dictziplib.hpp
@@ -0,0 +1,56 @@
+#pragma once
+
+#include <ctime>
+#include <string>
+#include <zlib.h>
+
+#include "mapfile.hpp"
+
+struct DictCache {
+ int chunk;
+ char *inBuffer;
+ int stamp;
+ int count;
+};
+
+class DictData
+{
+public:
+ static const size_t DICT_CACHE_SIZE = 5;
+
+ DictData() {}
+ ~DictData() { close(); }
+ bool open(const std::string &filename, int computeCRC);
+ void close();
+ void read(char *buffer, unsigned long start, unsigned long size);
+
+private:
+ const char *start; /* start of mmap'd area */
+ const char *end; /* end of mmap'd area */
+ unsigned long size; /* size of mmap */
+
+ int type;
+ z_stream zStream;
+ int initialized;
+
+ int headerLength;
+ int method;
+ int flags;
+ time_t mtime;
+ int extraFlags;
+ int os;
+ int version;
+ int chunkLength;
+ int chunkCount;
+ int *chunks;
+ unsigned long *offsets; /* Sum-scan of chunks. */
+ std::string origFilename;
+ std::string comment;
+ unsigned long crc;
+ unsigned long length;
+ unsigned long compressedLength;
+ DictCache cache[DICT_CACHE_SIZE];
+ MapFile mapfile;
+
+ int read_header(const std::string &filename, int computeCRC);
+};
diff --git a/src/distance.cpp b/src/distance.cpp
new file mode 100644
index 0000000..8e5b553
--- /dev/null
+++ b/src/distance.cpp
@@ -0,0 +1,145 @@
+/*
+ writer : Opera Wang
+ E-Mail : wangvisual AT sohu DOT com
+ License: GPL
+*/
+
+/* filename: distance.cc */
+/*
+http://www.merriampark.com/ld.htm
+What is Levenshtein Distance?
+
+Levenshtein distance (LD) is a measure of the similarity between two strings,
+which we will refer to as the source string (s) and the target string (t).
+The distance is the number of deletions, insertions, or substitutions required
+ to transform s into t. For example,
+
+ * If s is "test" and t is "test", then LD(s,t) = 0, because no transformations are needed.
+ The strings are already identical.
+ * If s is "test" and t is "tent", then LD(s,t) = 1, because one substitution
+ (change "s" to "n") is sufficient to transform s into t.
+
+The greater the Levenshtein distance, the more different the strings are.
+
+Levenshtein distance is named after the Russian scientist Vladimir Levenshtein,
+ who devised the algorithm in 1965. If you can't spell or pronounce Levenshtein,
+ the metric is also sometimes called edit distance.
+
+The Levenshtein distance algorithm has been used in:
+
+ * Spell checking
+ * Speech recognition
+ * DNA analysis
+ * Plagiarism detection
+*/
+
+#include <cstdlib>
+#include <cstring>
+
+#include "distance.hpp"
+
+/*
+Cover transposition, in addition to deletion,
+insertion and substitution. This step is taken from:
+Berghel, Hal ; Roach, David : "An Extension of Ukkonen's
+Enhanced Dynamic Programming ASM Algorithm"
+(http://www.acm.org/~hlb/publications/asm/asm.html)
+*/
+#define COVER_TRANSPOSITION
+
+/****************************************/
+/*Implementation of Levenshtein distance*/
+/****************************************/
+
+/*Gets the minimum of three values */
+static inline int minimum(const int a, const int b, const int c)
+{
+ int min = a;
+ if (b < min)
+ min = b;
+ if (c < min)
+ min = c;
+ return min;
+}
+
+int EditDistance::CalEditDistance(const gunichar *s, const gunichar *t, const int limit)
+/*Compute levenshtein distance between s and t, this is using QUICK algorithm*/
+{
+ int n = 0, m = 0, iLenDif, k, i, j, cost;
+ // Remove leftmost matching portion of strings
+ while (*s && (*s == *t)) {
+ s++;
+ t++;
+ }
+
+ while (s[n]) {
+ n++;
+ }
+ while (t[m]) {
+ m++;
+ }
+
+ // Remove rightmost matching portion of strings by decrement n and m.
+ while (n && m && (*(s + n - 1) == *(t + m - 1))) {
+ n--;
+ m--;
+ }
+ if (m == 0 || n == 0 || d == nullptr)
+ return (m + n);
+ if (m < n) {
+ const gunichar *temp = s;
+ int itemp = n;
+ s = t;
+ t = temp;
+ n = m;
+ m = itemp;
+ }
+ iLenDif = m - n;
+ if (iLenDif >= limit)
+ return iLenDif;
+ // step 1
+ n++;
+ m++;
+ // d=(int*)malloc(sizeof(int)*m*n);
+ if (m * n > currentelements) {
+ currentelements = m * n * 2; // double the request
+ d = static_cast<int *>(realloc(d, sizeof(int) * currentelements));
+ if (nullptr == d)
+ return (m + n);
+ }
+ // step 2, init matrix
+ for (k = 0; k < n; k++)
+ d[k] = k;
+ for (k = 1; k < m; k++)
+ d[k * n] = k;
+ // step 3
+ for (i = 1; i < n; i++) {
+ // first calculate column, d(i,j)
+ for (j = 1; j < iLenDif + i; j++) {
+ cost = s[i - 1] == t[j - 1] ? 0 : 1;
+ d[j * n + i] = minimum(d[(j - 1) * n + i] + 1, d[j * n + i - 1] + 1, d[(j - 1) * n + i - 1] + cost);
+#ifdef COVER_TRANSPOSITION
+ if (i >= 2 && j >= 2 && (d[j * n + i] - d[(j - 2) * n + i - 2] == 2)
+ && (s[i - 2] == t[j - 1]) && (s[i - 1] == t[j - 2]))
+ d[j * n + i]--;
+#endif
+ }
+ // second calculate row, d(k,j)
+ // now j==iLenDif+i;
+ for (k = 1; k <= i; k++) {
+ cost = s[k - 1] == t[j - 1] ? 0 : 1;
+ d[j * n + k] = minimum(d[(j - 1) * n + k] + 1, d[j * n + k - 1] + 1, d[(j - 1) * n + k - 1] + cost);
+#ifdef COVER_TRANSPOSITION
+ if (k >= 2 && j >= 2 && (d[j * n + k] - d[(j - 2) * n + k - 2] == 2)
+ && (s[k - 2] == t[j - 1]) && (s[k - 1] == t[j - 2]))
+ d[j * n + k]--;
+#endif
+ }
+ // test if d(i,j) limit gets equal or exceed
+ if (d[j * n + i] >= limit) {
+ return d[j * n + i];
+ }
+ }
+ // d(n-1,m-1)
+ return d[n * m - 1];
+}
diff --git a/src/distance.hpp b/src/distance.hpp
new file mode 100644
index 0000000..d472d5b
--- /dev/null
+++ b/src/distance.hpp
@@ -0,0 +1,26 @@
+#pragma once
+
+#include <cstdlib>
+#include <glib.h>
+
+class EditDistance
+{
+public:
+ EditDistance()
+ {
+ currentelements = 2500; // It's enough for most conditions :-)
+ d = static_cast<int *>(malloc(sizeof(int) * currentelements));
+ }
+ ~EditDistance()
+ {
+ if (d != nullptr)
+ free(d);
+ }
+ EditDistance(const EditDistance &) = delete;
+ EditDistance &operator=(const EditDistance &) = delete;
+ int CalEditDistance(const gunichar *s, const gunichar *t, const int limit);
+
+private:
+ int *d;
+ int currentelements;
+};
diff --git a/src/libwrapper.cpp b/src/libwrapper.cpp
new file mode 100644
index 0000000..1544b18
--- /dev/null
+++ b/src/libwrapper.cpp
@@ -0,0 +1,452 @@
+/*
+ * This file part of sdcv - console version of Stardict program
+ * http://sdcv.sourceforge.net
+ * Copyright (C) 2005-2006 Evgeniy <dushistov@mail.ru>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <cstring>
+#include <map>
+#include <memory>
+
+#include <glib/gi18n.h>
+
+#include "utils.hpp"
+
+#include "libwrapper.hpp"
+
+static const char ESC_BLUE[] = "\033[0;34m";
+static const char ESC_END[] = "\033[0m";
+static const char ESC_BOLD[] = "\033[1m";
+static const char ESC_ITALIC[] = "\033[3m";
+static const char ESC_LIGHT_GRAY[] = "\033[0;37m";
+static const char ESC_GREEN[] = "\033[0;32m";
+
+static const char *SEARCH_TERM_VISFMT = ESC_BOLD;
+static const char *NAME_OF_DICT_VISFMT = ESC_BLUE;
+static const char *TRANSCRIPTION_VISFMT = ESC_BOLD;
+static const char *EXAMPLE_VISFMT = ESC_LIGHT_GRAY;
+static const char *KREF_VISFMT = ESC_BOLD;
+static const char *ABR_VISFMT = ESC_GREEN;
+
+static std::string xdxf2text(const char *p, bool colorize_output)
+{
+ std::string res;
+ for (; *p; ++p) {
+ if (*p != '<') {
+ if (g_str_has_prefix(p, "&gt;")) {
+ res += ">";
+ p += 3;
+ } else if (g_str_has_prefix(p, "&lt;")) {
+ res += "<";
+ p += 3;
+ } else if (g_str_has_prefix(p, "&amp;")) {
+ res += "&";
+ p += 4;
+ } else if (g_str_has_prefix(p, "&quot;")) {
+ res += "\"";
+ p += 5;
+ } else if (g_str_has_prefix(p, "&apos;")) {
+ res += "\'";
+ p += 5;
+ } else
+ res += *p;
+ continue;
+ }
+
+ const char *next = strchr(p, '>');
+ if (!next)
+ continue;
+
+ const std::string name(p + 1, next - p - 1);
+
+ if (name == "abr")
+ res += colorize_output ? ABR_VISFMT : "";
+ else if (name == "/abr")
+ res += colorize_output ? ESC_END : "";
+ else if (name == "k") {
+ const char *begin = next;
+ if ((next = strstr(begin, "</k>")) != nullptr)
+ next += sizeof("</k>") - 1 - 1;
+ else
+ next = begin;
+ } else if (name == "kref") {
+ res += colorize_output ? KREF_VISFMT : "";
+ } else if (name == "/kref") {
+ res += colorize_output ? ESC_END : "";
+ } else if (name == "b")
+ res += colorize_output ? ESC_BOLD : "";
+ else if (name == "/b")
+ res += colorize_output ? ESC_END : "";
+ else if (name == "i")
+ res += colorize_output ? ESC_ITALIC : "";
+ else if (name == "/i")
+ res += colorize_output ? ESC_END : "";
+ else if (name == "tr") {
+ if (colorize_output)
+ res += TRANSCRIPTION_VISFMT;
+ res += "[";
+ } else if (name == "/tr") {
+ res += "]";
+ if (colorize_output)
+ res += ESC_END;
+ } else if (name == "ex")
+ res += colorize_output ? EXAMPLE_VISFMT : "";
+ else if (name == "/ex")
+ res += colorize_output ? ESC_END : "";
+ else if (!name.empty() && name[0] == 'c' && name != "co") {
+ std::string::size_type pos = name.find("code");
+ if (pos != std::string::npos) {
+ pos += sizeof("code=\"") - 1;
+ std::string::size_type end_pos = name.find("\"");
+ const std::string color(name, pos, end_pos - pos);
+ res += "";
+ } else {
+ res += "";
+ }
+ } else if (name == "/c")
+ res += "";
+
+ p = next;
+ }
+ return res;
+}
+
+static std::string parse_data(const gchar *data, bool colorize_output)
+{
+ if (!data)
+ return "";
+
+ std::string res;
+ guint32 data_size, sec_size = 0;
+ gchar *m_str;
+ const gchar *p = data;
+ data_size = get_uint32(p);
+ p += sizeof(guint32);
+ while (guint32(p - data) < data_size) {
+ switch (*p++) {
+ case 'h': // HTML data
+ case 'w': // WikiMedia markup data
+ case 'm': // plain text, utf-8
+ case 'l': // not utf-8, some other locale encoding, discouraged, need more work...
+ sec_size = strlen(p);
+ if (sec_size) {
+ res += "\n";
+ m_str = g_strndup(p, sec_size);
+ res += m_str;
+ g_free(m_str);
+ }
+ sec_size++;
+ break;
+ case 'g': // pango markup data
+ case 'x': // xdxf
+ sec_size = strlen(p);
+ if (sec_size) {
+ res += "\n";
+ m_str = g_strndup(p, sec_size);
+ res += xdxf2text(m_str, colorize_output);
+ g_free(m_str);
+ }
+ sec_size++;
+ break;
+ case 't': // english phonetic string
+ sec_size = strlen(p);
+ if (sec_size) {
+ res += "\n";
+ if (colorize_output)
+ res += TRANSCRIPTION_VISFMT;
+ res += "[" + std::string(p, sec_size) + "]";
+ if (colorize_output)
+ res += ESC_END;
+ }
+ sec_size++;
+ break;
+ case 'k': // KingSoft PowerWord data
+ case 'y': // chinese YinBiao or japanese kana, utf-8
+ sec_size = strlen(p);
+ if (sec_size)
+ res += std::string(p, sec_size);
+ sec_size++;
+ break;
+ case 'W': // wav file
+ case 'P': // picture data
+ sec_size = get_uint32(p);
+ sec_size += sizeof(guint32);
+ break;
+ }
+ p += sec_size;
+ }
+
+ return res;
+}
+
+void Library::SimpleLookup(const std::string &str, TSearchResultList &res_list)
+{
+ glong ind;
+ res_list.reserve(ndicts());
+ for (gint idict = 0; idict < ndicts(); ++idict)
+ if (SimpleLookupWord(str.c_str(), ind, idict))
+ res_list.push_back(
+ TSearchResult(dict_name(idict),
+ poGetWord(ind, idict),
+ parse_data(poGetWordData(ind, idict), colorize_output_)));
+}
+
+void Library::LookupWithFuzzy(const std::string &str, TSearchResultList &res_list)
+{
+ static const int MAXFUZZY = 10;
+
+ gchar *fuzzy_res[MAXFUZZY];
+ if (!Libs::LookupWithFuzzy(str.c_str(), fuzzy_res, MAXFUZZY))
+ return;
+
+ for (gchar **p = fuzzy_res, **end = (fuzzy_res + MAXFUZZY); p != end && *p; ++p) {
+ SimpleLookup(*p, res_list);
+ g_free(*p);
+ }
+}
+
+void Library::LookupWithRule(const std::string &str, TSearchResultList &res_list)
+{
+ std::vector<gchar *> match_res((MAX_MATCH_ITEM_PER_LIB)*ndicts());
+
+ const gint nfound = Libs::LookupWithRule(str.c_str(), &match_res[0]);
+ if (nfound == 0)
+ return;
+
+ for (gint i = 0; i < nfound; ++i) {
+ SimpleLookup(match_res[i], res_list);
+ g_free(match_res[i]);
+ }
+}
+
+void Library::LookupData(const std::string &str, TSearchResultList &res_list)
+{
+ std::vector<std::vector<gchar *>> drl(ndicts());
+ if (!Libs::LookupData(str.c_str(), &drl[0]))
+ return;
+ for (int idict = 0; idict < ndicts(); ++idict)
+ for (gchar *res : drl[idict]) {
+ SimpleLookup(res, res_list);
+ g_free(res);
+ }
+}
+
+void Library::print_search_result(FILE *out, const TSearchResult &res, bool &first_result)
+{
+ std::string loc_bookname, loc_def, loc_exp;
+
+ if (!utf8_output_) {
+ loc_bookname = utf8_to_locale_ign_err(res.bookname);
+ loc_def = utf8_to_locale_ign_err(res.def);
+ loc_exp = utf8_to_locale_ign_err(res.exp);
+ }
+ if (json_) {
+ if (!first_result) {
+ fputs(",", out);
+ } else {
+ first_result = false;
+ }
+ fprintf(out, "{\"dict\": \"%s\",\"word\":\"%s\",\"definition\":\"%s\"}",
+ json_escape_string(res.bookname).c_str(),
+ json_escape_string(res.def).c_str(),
+ json_escape_string(res.exp).c_str());
+
+ } else {
+ fprintf(out,
+ "-->%s%s%s\n"
+ "-->%s%s%s\n"
+ "%s\n\n",
+ colorize_output_ ? NAME_OF_DICT_VISFMT : "",
+ utf8_output_ ? res.bookname.c_str() : loc_bookname.c_str(),
+ colorize_output_ ? ESC_END : "",
+ colorize_output_ ? SEARCH_TERM_VISFMT : "",
+ utf8_output_ ? res.def.c_str() : loc_def.c_str(),
+ colorize_output_ ? ESC_END : "",
+ utf8_output_ ? res.exp.c_str() : loc_exp.c_str());
+ }
+}
+
+namespace
+{
+class sdcv_pager final
+{
+public:
+ explicit sdcv_pager(bool ignore_env = false)
+ {
+ output = stdout;
+ if (ignore_env) {
+ return;
+ }
+ const gchar *pager = g_getenv("SDCV_PAGER");
+ if (pager && (output = popen(pager, "w")) == nullptr) {
+ perror(_("popen failed"));
+ output = stdout;
+ }
+ }
+ sdcv_pager(const sdcv_pager &) = delete;
+ sdcv_pager &operator=(const sdcv_pager &) = delete;
+ ~sdcv_pager()
+ {
+ if (output != stdout) {
+ pclose(output);
+ }
+ }
+ FILE *get_stream() { return output; }
+
+private:
+ FILE *output;
+};
+}
+
+bool Library::process_phrase(const char *loc_str, IReadLine &io, bool force)
+{
+ if (nullptr == loc_str)
+ return true;
+
+ std::string query;
+
+ analyze_query(loc_str, query);
+ if (!query.empty())
+ io.add_to_history(query.c_str());
+
+ gsize bytes_read;
+ gsize bytes_written;
+ glib::Error err;
+ glib::CharStr str;
+ if (!utf8_input_)
+ str.reset(g_locale_to_utf8(loc_str, -1, &bytes_read, &bytes_written, get_addr(err)));
+ else
+ str.reset(g_strdup(loc_str));
+
+ if (nullptr == get_impl(str)) {
+ fprintf(stderr, _("Can not convert %s to utf8.\n"), loc_str);
+ fprintf(stderr, "%s\n", err->message);
+ return false;
+ }
+
+ if (str[0] == '\0')
+ return true;
+
+ TSearchResultList res_list;
+
+ switch (analyze_query(get_impl(str), query)) {
+ case qtFUZZY:
+ LookupWithFuzzy(query, res_list);
+ break;
+ case qtREGEXP:
+ LookupWithRule(query, res_list);
+ break;
+ case qtSIMPLE:
+ SimpleLookup(get_impl(str), res_list);
+ if (res_list.empty() && fuzzy_)
+ LookupWithFuzzy(get_impl(str), res_list);
+ break;
+ case qtDATA:
+ LookupData(query, res_list);
+ break;
+ default:
+ /*nothing*/;
+ }
+
+ bool first_result = true;
+ if (json_) {
+ fputc('[', stdout);
+ }
+ if (!res_list.empty()) {
+ /* try to be more clever, if there are
+ one or zero results per dictionary show all
+ */
+ bool show_all_results = true;
+ typedef std::map<std::string, int, std::less<std::string>> DictResMap;
+ if (!force) {
+ DictResMap res_per_dict;
+ for (const TSearchResult &search_res : res_list) {
+ auto r = res_per_dict.equal_range(search_res.bookname);
+ DictResMap tmp(r.first, r.second);
+ if (tmp.empty()) //there are no yet such bookname in map
+ res_per_dict.insert(DictResMap::value_type(search_res.bookname, 1));
+ else {
+ ++((tmp.begin())->second);
+ if (tmp.begin()->second > 1) {
+ show_all_results = false;
+ break;
+ }
+ }
+ }
+ } //if (!force)
+
+ if (!show_all_results && !force) {
+ if (!json_) {
+ printf(_("Found %zu items, similar to %s.\n"), res_list.size(),
+ utf8_output_ ? get_impl(str) : utf8_to_locale_ign_err(get_impl(str)).c_str());
+ }
+ for (size_t i = 0; i < res_list.size(); ++i) {
+ const std::string loc_bookname = utf8_to_locale_ign_err(res_list[i].bookname);
+ const std::string loc_def = utf8_to_locale_ign_err(res_list[i].def);
+ printf("%zu)%s%s%s-->%s%s%s\n", i,
+ colorize_output_ ? NAME_OF_DICT_VISFMT : "",
+ utf8_output_ ? res_list[i].bookname.c_str() : loc_bookname.c_str(),
+ colorize_output_ ? ESC_END : "",
+ colorize_output_ ? SEARCH_TERM_VISFMT : "",
+ utf8_output_ ? res_list[i].def.c_str() : loc_def.c_str(),
+ colorize_output_ ? ESC_END : "");
+ }
+ int choise;
+ std::unique_ptr<IReadLine> choice_readline(create_readline_object());
+ for (;;) {
+ std::string str_choise;
+ choice_readline->read(_("Your choice[-1 to abort]: "), str_choise);
+ sscanf(str_choise.c_str(), "%d", &choise);
+ if (choise >= 0 && choise < int(res_list.size())) {
+ sdcv_pager pager;
+ io.add_to_history(res_list[choise].def.c_str());
+ print_search_result(pager.get_stream(), res_list[choise], first_result);
+ break;
+ } else if (choise == -1) {
+ break;
+ } else
+ printf(_("Invalid choice.\nIt must be from 0 to %zu or -1.\n"),
+ res_list.size() - 1);
+ }
+ } else {
+ sdcv_pager pager(force || json_);
+ if (!json_) {
+ fprintf(pager.get_stream(), _("Found %zu items, similar to %s.\n"),
+ res_list.size(), utf8_output_ ? get_impl(str) : utf8_to_locale_ign_err(get_impl(str)).c_str());
+ }
+ for (const TSearchResult &search_res : res_list) {
+ print_search_result(pager.get_stream(), search_res, first_result);
+ }
+ }
+
+ } else {
+ std::string loc_str;
+ if (!utf8_output_)
+ loc_str = utf8_to_locale_ign_err(get_impl(str));
+ if (!json_)
+ printf(_("Nothing similar to %s, sorry :(\n"), utf8_output_ ? get_impl(str) : loc_str.c_str());
+ }
+
+ if (json_) {
+ fputs("]\n", stdout);
+ }
+ return true;
+}
diff --git a/src/libwrapper.hpp b/src/libwrapper.hpp
new file mode 100644
index 0000000..31783b6
--- /dev/null
+++ b/src/libwrapper.hpp
@@ -0,0 +1,54 @@
+#pragma once
+
+#include <string>
+#include <vector>
+
+#include "readline.hpp"
+#include "stardict_lib.hpp"
+
+//this structure is wrapper and it need for unification
+//results of search whith return Dicts class
+struct TSearchResult {
+ std::string bookname;
+ std::string def;
+ std::string exp;
+
+ TSearchResult(const std::string &bookname_, const std::string &def_, const std::string &exp_)
+ : bookname(bookname_)
+ , def(def_)
+ , exp(exp_)
+ {
+ }
+};
+
+typedef std::vector<TSearchResult> TSearchResultList;
+
+//this class is wrapper around Dicts class for easy use
+//of it
+class Library : public Libs
+{
+public:
+ Library(bool uinput, bool uoutput, bool colorize_output, bool use_json, bool no_fuzzy)
+ : utf8_input_(uinput)
+ , utf8_output_(uoutput)
+ , colorize_output_(colorize_output)
+ , json_(use_json)
+ {
+ setVerbose(!use_json);
+ setFuzzy(!no_fuzzy);
+ }
+
+ bool process_phrase(const char *loc_str, IReadLine &io, bool force = false);
+
+private:
+ bool utf8_input_;
+ bool utf8_output_;
+ bool colorize_output_;
+ bool json_;
+
+ void SimpleLookup(const std::string &str, TSearchResultList &res_list);
+ void LookupWithFuzzy(const std::string &str, TSearchResultList &res_list);
+ void LookupWithRule(const std::string &str, TSearchResultList &res_lsit);
+ void LookupData(const std::string &str, TSearchResultList &res_list);
+ void print_search_result(FILE *out, const TSearchResult &res, bool &first_result);
+};
diff --git a/src/mapfile.hpp b/src/mapfile.hpp
new file mode 100644
index 0000000..ca5a681
--- /dev/null
+++ b/src/mapfile.hpp
@@ -0,0 +1,86 @@
+#pragma once
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#ifdef HAVE_MMAP
+#include <fcntl.h>
+#include <sys/mman.h>
+#include <sys/types.h>
+#endif
+#ifdef _WIN32
+#include <windows.h>
+#endif
+#include <glib.h>
+
+class MapFile
+{
+public:
+ MapFile() {}
+ ~MapFile();
+ MapFile(const MapFile &) = delete;
+ MapFile &operator=(const MapFile &) = delete;
+ bool open(const char *file_name, unsigned long file_size);
+ gchar *begin() { return data; }
+
+private:
+ char *data = nullptr;
+ unsigned long size = 0ul;
+#ifdef HAVE_MMAP
+ int mmap_fd = -1;
+#elif defined(_WIN32)
+ HANDLE hFile = 0;
+ HANDLE hFileMap = 0;
+#endif
+};
+
+inline bool MapFile::open(const char *file_name, unsigned long file_size)
+{
+ size = file_size;
+#ifdef HAVE_MMAP
+ if ((mmap_fd = ::open(file_name, O_RDONLY)) < 0) {
+ //g_print("Open file %s failed!\n",fullfilename);
+ return false;
+ }
+ data = (gchar *)mmap(nullptr, file_size, PROT_READ, MAP_SHARED, mmap_fd, 0);
+ if ((void *)data == (void *)(-1)) {
+ //g_print("mmap file %s failed!\n",idxfilename);
+ data = nullptr;
+ return false;
+ }
+#elif defined(_WIN32)
+ hFile = CreateFile(file_name, GENERIC_READ, 0, nullptr, OPEN_ALWAYS,
+ FILE_ATTRIBUTE_NORMAL, 0);
+ hFileMap = CreateFileMapping(hFile, nullptr, PAGE_READONLY, 0,
+ file_size, nullptr);
+ data = (gchar *)MapViewOfFile(hFileMap, FILE_MAP_READ, 0, 0, file_size);
+#else
+ gsize read_len;
+ if (!g_file_get_contents(file_name, &data, &read_len, nullptr))
+ return false;
+
+ if (read_len != file_size)
+ return false;
+#endif
+
+ return true;
+}
+
+inline MapFile::~MapFile()
+{
+ if (!data)
+ return;
+#ifdef HAVE_MMAP
+ munmap(data, size);
+ close(mmap_fd);
+#else
+#ifdef _WIN32
+ UnmapViewOfFile(data);
+ CloseHandle(hFileMap);
+ CloseHandle(hFile);
+#else
+ g_free(data);
+#endif
+#endif
+}
diff --git a/src/readline.cpp b/src/readline.cpp
new file mode 100644
index 0000000..ab443ff
--- /dev/null
+++ b/src/readline.cpp
@@ -0,0 +1,115 @@
+/*
+ * This file part of sdcv - console version of Stardict program
+ * http://sdcv.sourceforge.net
+ * Copyright (C) 2005 Evgeniy <dushistov@mail.ru>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <cstdio>
+#include <cstdlib>
+#ifdef WITH_READLINE
+#include <readline/history.h>
+#include <readline/readline.h>
+#endif
+#include <glib.h>
+
+#include "utils.hpp"
+
+#include "readline.hpp"
+
+bool stdio_getline(FILE *in, std::string &str)
+{
+ assert(in != nullptr);
+ str.clear();
+ int ch;
+ while ((ch = fgetc(in)) != EOF && ch != '\n')
+ str += ch;
+
+ return EOF != ch;
+}
+
+#ifndef WITH_READLINE
+namespace
+{
+class dummy_readline : public IReadLine
+{
+public:
+ bool read(const std::string &banner, std::string &line) override
+ {
+ printf("%s", banner.c_str());
+ return stdio_getline(stdin, line);
+ }
+};
+}
+#else
+
+namespace
+{
+class real_readline : public IReadLine
+{
+
+public:
+ real_readline()
+ {
+ rl_readline_name = "sdcv";
+ using_history();
+ const std::string histname = std::string(g_get_home_dir()) + G_DIR_SEPARATOR + ".sdcv_history";
+ read_history(histname.c_str());
+ }
+
+ ~real_readline()
+ {
+ const std::string histname = std::string(g_get_home_dir()) + G_DIR_SEPARATOR + ".sdcv_history";
+ write_history(histname.c_str());
+ const gchar *hist_size_str = g_getenv("SDCV_HISTSIZE");
+ int hist_size;
+ if (!hist_size_str || sscanf(hist_size_str, "%d", &hist_size) < 1)
+ hist_size = 2000;
+ history_truncate_file(histname.c_str(), hist_size);
+ }
+
+ bool read(const std::string &banner, std::string &line) override
+ {
+ char *phrase = nullptr;
+ phrase = readline(banner.c_str());
+ if (phrase) {
+ line = phrase;
+ free(phrase);
+ return true;
+ }
+ return false;
+ }
+
+ void add_to_history(const std::string &phrase) override
+ {
+ add_history(phrase.c_str());
+ }
+};
+}
+#endif //WITH_READLINE
+
+IReadLine *create_readline_object()
+{
+#ifdef WITH_READLINE
+ return new real_readline;
+#else
+ return new dummy_readline;
+#endif
+}
diff --git a/src/readline.hpp b/src/readline.hpp
new file mode 100644
index 0000000..e14ae00
--- /dev/null
+++ b/src/readline.hpp
@@ -0,0 +1,15 @@
+#pragma once
+
+#include <string>
+
+class IReadLine
+{
+public:
+ virtual ~IReadLine() {}
+ virtual bool read(const std::string &banner, std::string &line) = 0;
+ virtual void add_to_history(const std::string &) {}
+};
+
+extern std::string sdcv_readline;
+extern IReadLine *create_readline_object();
+extern bool stdio_getline(FILE *in, std::string &str);
diff --git a/src/sdcv.cpp b/src/sdcv.cpp
new file mode 100644
index 0000000..0c75eb1
--- /dev/null
+++ b/src/sdcv.cpp
@@ -0,0 +1,260 @@
+/*
+ * This file part of sdcv - console version of Stardict program
+ * http://sdcv.sourceforge.net
+ * Copyright (C) 2003-2006 Evgeniy <dushistov@mail.ru>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <algorithm>
+#include <cerrno>
+#include <clocale>
+#include <cstdio>
+#include <cstdlib>
+#include <cstring>
+#include <map>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include <glib.h>
+#include <glib/gi18n.h>
+#include <glib/gstdio.h>
+
+#include "libwrapper.hpp"
+#include "readline.hpp"
+#include "utils.hpp"
+
+static const char gVersion[] = VERSION;
+
+namespace
+{
+static void free_str_array(gchar **arr)
+{
+ gchar **p;
+
+ for (p = arr; *p; ++p)
+ g_free(*p);
+ g_free(arr);
+}
+}
+namespace glib
+{
+using StrArr = ResourceWrapper<gchar *, gchar *, free_str_array>;
+}
+
+static void list_dicts(const std::list<std::string> &dicts_dir_list, bool use_json);
+
+int main(int argc, char *argv[]) try {
+ setlocale(LC_ALL, "");
+#if ENABLE_NLS
+ bindtextdomain("sdcv",
+ //"./locale"//< for testing
+ GETTEXT_TRANSLATIONS_PATH //< should be
+ );
+ textdomain("sdcv");
+#endif
+
+ gboolean show_version = FALSE;
+ gboolean show_list_dicts = FALSE;
+ glib::StrArr use_dict_list;
+ gboolean non_interactive = FALSE;
+ gboolean json_output = FALSE;
+ gboolean no_fuzzy = FALSE;
+ gboolean utf8_output = FALSE;
+ gboolean utf8_input = FALSE;
+ glib::CharStr opt_data_dir;
+ gboolean only_data_dir = FALSE;
+ gboolean colorize = FALSE;
+
+ const GOptionEntry entries[] = {
+ { "version", 'v', 0, G_OPTION_ARG_NONE, &show_version,
+ _("display version information and exit"), nullptr },
+ { "list-dicts", 'l', 0, G_OPTION_ARG_NONE, &show_list_dicts,
+ _("display list of available dictionaries and exit"), nullptr },
+ { "use-dict", 'u', 0, G_OPTION_ARG_STRING_ARRAY, get_addr(use_dict_list),
+ _("for search use only dictionary with this bookname"),
+ _("bookname") },
+ { "non-interactive", 'n', 0, G_OPTION_ARG_NONE, &non_interactive,
+ _("for use in scripts"), nullptr },
+ { "json-output", 'j', 0, G_OPTION_ARG_NONE, &json_output,
+ _("print the result formatted as JSON"), nullptr },
+ { "exact-search", 'e', 0, G_OPTION_ARG_NONE, &no_fuzzy,
+ _("do not fuzzy-search for similar words, only return exact matches"), nullptr },
+ { "utf8-output", '0', 0, G_OPTION_ARG_NONE, &utf8_output,
+ _("output must be in utf8"), nullptr },
+ { "utf8-input", '1', 0, G_OPTION_ARG_NONE, &utf8_input,
+ _("input of sdcv in utf8"), nullptr },
+ { "data-dir", '2', 0, G_OPTION_ARG_STRING, get_addr(opt_data_dir),
+ _("use this directory as path to stardict data directory"),
+ _("path/to/dir") },
+ { "only-data-dir", 'x', 0, G_OPTION_ARG_NONE, &only_data_dir,
+ _("only use the dictionaries in data-dir, do not search in user and system directories"), nullptr },
+ { "color", 'c', 0, G_OPTION_ARG_NONE, &colorize,
+ _("colorize the output"), nullptr },
+ {},
+ };
+
+ glib::Error error;
+ GOptionContext *context = g_option_context_new(_(" words"));
+ g_option_context_set_help_enabled(context, TRUE);
+ g_option_context_add_main_entries(context, entries, nullptr);
+ const gboolean parse_res = g_option_context_parse(context, &argc, &argv, get_addr(error));
+ g_option_context_free(context);
+ if (!parse_res) {
+ fprintf(stderr, _("Invalid command line arguments: %s\n"),
+ error->message);
+ return EXIT_FAILURE;
+ }
+
+ if (show_version) {
+ printf(_("Console version of Stardict, version %s\n"), gVersion);
+ return EXIT_SUCCESS;
+ }
+
+ const gchar *stardict_data_dir = g_getenv("STARDICT_DATA_DIR");
+ std::string data_dir;
+ if (!opt_data_dir) {
+ if (!only_data_dir) {
+ if (stardict_data_dir)
+ data_dir = stardict_data_dir;
+ else
+ data_dir = "/usr/share/stardict/dic";
+ }
+ } else {
+ data_dir = get_impl(opt_data_dir);
+ }
+
+ const char *homedir = g_getenv("HOME");
+ if (!homedir)
+ homedir = g_get_home_dir();
+
+ std::list<std::string> dicts_dir_list;
+ if (!only_data_dir)
+ dicts_dir_list.push_back(std::string(homedir) + G_DIR_SEPARATOR + ".stardict" + G_DIR_SEPARATOR + "dic");
+ dicts_dir_list.push_back(data_dir);
+ if (show_list_dicts) {
+ list_dicts(dicts_dir_list, json_output);
+ return EXIT_SUCCESS;
+ }
+
+ std::list<std::string> disable_list;
+
+ std::map<std::string, std::string> bookname_to_ifo;
+ for_each_file(dicts_dir_list, ".ifo", std::list<std::string>(), std::list<std::string>(),
+ [&bookname_to_ifo](const std::string &fname, bool) {
+ DictInfo dict_info;
+ const bool load_ok = dict_info.load_from_ifo_file(fname, false);
+ if (!load_ok)
+ return;
+ bookname_to_ifo[dict_info.bookname] = dict_info.ifo_file_name;
+ });
+
+ std::list<std::string> order_list;
+ if (use_dict_list != nullptr) {
+ for (auto &&x : bookname_to_ifo) {
+ gchar **p = get_impl(use_dict_list);
+ for (; *p != nullptr; ++p)
+ if (x.first.compare(*p) == 0) {
+ break;
+ }
+ if (*p == nullptr) {
+ disable_list.push_back(x.second);
+ }
+ }
+
+ // add bookname to list
+ gchar **p = get_impl(use_dict_list);
+ while (*p) {
+ order_list.push_back(bookname_to_ifo.at(*p));
+ ++p;
+ }
+ } else {
+ const std::string odering_cfg_file = std::string(homedir) + G_DIR_SEPARATOR_S ".sdcv_ordering";
+ FILE *ordering_file = fopen(odering_cfg_file.c_str(), "r");
+ if (ordering_file != nullptr) {
+ std::string line;
+ while (stdio_getline(ordering_file, line)) {
+ order_list.push_back(bookname_to_ifo.at(line));
+ }
+ fclose(ordering_file);
+ }
+ }
+
+ const std::string conf_dir = std::string(g_get_home_dir()) + G_DIR_SEPARATOR + ".stardict";
+ if (g_mkdir(conf_dir.c_str(), S_IRWXU) == -1 && errno != EEXIST) {
+ fprintf(stderr, _("g_mkdir failed: %s\n"), strerror(errno));
+ }
+
+ Library lib(utf8_input, utf8_output, colorize, json_output, no_fuzzy);
+ lib.load(dicts_dir_list, order_list, disable_list);
+
+ std::unique_ptr<IReadLine> io(create_readline_object());
+ if (optind < argc) {
+ for (int i = optind; i < argc; ++i)
+ if (!lib.process_phrase(argv[i], *io, non_interactive)) {
+ return EXIT_FAILURE;
+ }
+ } else if (!non_interactive) {
+
+ std::string phrase;
+ while (io->read(_("Enter word or phrase: "), phrase)) {
+ if (!lib.process_phrase(phrase.c_str(), *io))
+ return EXIT_FAILURE;
+ phrase.clear();
+ }
+
+ putchar('\n');
+ } else {
+ fprintf(stderr, _("There are no words/phrases to translate.\n"));
+ }
+ return EXIT_SUCCESS;
+} catch (const std::exception &ex) {
+ fprintf(stderr, "Internal error: %s\n", ex.what());
+ exit(EXIT_FAILURE);
+}
+
+static void list_dicts(const std::list<std::string> &dicts_dir_list, bool use_json)
+{
+ bool first_entry = true;
+ if (!use_json)
+ printf(_("Dictionary's name Word count\n"));
+ else
+ fputc('[', stdout);
+ std::list<std::string> order_list, disable_list;
+ for_each_file(dicts_dir_list, ".ifo", order_list,
+ disable_list, [use_json, &first_entry](const std::string &filename, bool) -> void {
+ DictInfo dict_info;
+ if (dict_info.load_from_ifo_file(filename, false)) {
+ const std::string bookname = utf8_to_locale_ign_err(dict_info.bookname);
+ if (use_json) {
+ if (first_entry) {
+ first_entry = false;
+ } else {
+ fputc(',', stdout); // comma between entries
+ }
+ printf("{\"name\": \"%s\", \"wordcount\": \"%d\"}", json_escape_string(bookname).c_str(), dict_info.wordcount);
+ } else {
+ printf("%s %d\n", bookname.c_str(), dict_info.wordcount);
+ }
+ }
+ });
+ if (use_json)
+ fputs("]\n", stdout);
+}
diff --git a/src/stardict_lib.cpp b/src/stardict_lib.cpp
new file mode 100644
index 0000000..63e23f5
--- /dev/null
+++ b/src/stardict_lib.cpp
@@ -0,0 +1,1642 @@
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <algorithm>
+#include <cctype>
+#include <cstring>
+#include <stdexcept>
+
+#include <glib/gstdio.h>
+#include <sys/stat.h>
+#include <zlib.h>
+
+#include "distance.hpp"
+#include "mapfile.hpp"
+#include "utils.hpp"
+
+#include "stardict_lib.hpp"
+
+#define TO_STR2(xstr) #xstr
+#define TO_STR1(xstr) TO_STR2(xstr)
+
+#define THROW_IF_ERROR(expr) \
+ do { \
+ assert((expr)); \
+ if (!(expr)) \
+ throw std::runtime_error(#expr " not true at " __FILE__ ": " TO_STR1(__LINE__)); \
+ } while (false)
+
+// Notice: read src/tools/DICTFILE_FORMAT for the dictionary
+// file's format information!
+
+namespace
+{
+struct Fuzzystruct {
+ char *pMatchWord;
+ int iMatchWordDistance;
+};
+
+static inline bool bIsVowel(gchar inputchar)
+{
+ gchar ch = g_ascii_toupper(inputchar);
+ return (ch == 'A' || ch == 'E' || ch == 'I' || ch == 'O' || ch == 'U');
+}
+
+static bool bIsPureEnglish(const gchar *str)
+{
+ // i think this should work even when it is UTF8 string :).
+ for (int i = 0; str[i] != 0; i++)
+ //if(str[i]<0)
+ //if(str[i]<32 || str[i]>126) // tab equal 9,so this is not OK.
+ // Better use isascii() but not str[i]<0 while char is default unsigned in arm
+ if (!isascii(str[i]))
+ return false;
+ return true;
+}
+
+static inline gint stardict_strcmp(const gchar *s1, const gchar *s2)
+{
+ const gint a = g_ascii_strcasecmp(s1, s2);
+ if (a == 0)
+ return strcmp(s1, s2);
+ else
+ return a;
+}
+
+static void unicode_strdown(gunichar *str)
+{
+ while (*str) {
+ *str = g_unichar_tolower(*str);
+ ++str;
+ }
+}
+}
+
+bool DictInfo::load_from_ifo_file(const std::string &ifofilename,
+ bool istreedict)
+{
+ ifo_file_name = ifofilename;
+ glib::CharStr buffer;
+ if (!g_file_get_contents(ifofilename.c_str(), get_addr(buffer), nullptr, nullptr))
+ return false;
+
+ static const char TREEDICT_MAGIC_DATA[] = "StarDict's treedict ifo file";
+ static const char DICT_MAGIC_DATA[] = "StarDict's dict ifo file";
+
+ const gchar *magic_data = istreedict ? TREEDICT_MAGIC_DATA : DICT_MAGIC_DATA;
+ static const unsigned char utf8_bom[] = { 0xEF, 0xBB, 0xBF, '\0' };
+ if (!g_str_has_prefix(
+ g_str_has_prefix(get_impl(buffer), (const gchar *)(utf8_bom)) ? get_impl(buffer) + 3 : get_impl(buffer),
+ magic_data)) {
+ return false;
+ }
+
+ gchar *p1 = get_impl(buffer) + strlen(magic_data) - 1;
+
+ gchar *p2 = strstr(p1, "\nwordcount=");
+ if (p2 == nullptr)
+ return false;
+
+ gchar *p3 = strchr(p2 + sizeof("\nwordcount=") - 1, '\n');
+
+ wordcount = atol(std::string(p2 + sizeof("\nwordcount=") - 1, p3 - (p2 + sizeof("\nwordcount=") - 1)).c_str());
+
+ if (istreedict) {
+ p2 = strstr(p1, "\ntdxfilesize=");
+ if (p2 == nullptr)
+ return false;
+
+ p3 = strchr(p2 + sizeof("\ntdxfilesize=") - 1, '\n');
+
+ index_file_size = atol(std::string(p2 + sizeof("\ntdxfilesize=") - 1, p3 - (p2 + sizeof("\ntdxfilesize=") - 1)).c_str());
+
+ } else {
+
+ p2 = strstr(p1, "\nidxfilesize=");
+ if (p2 == nullptr)
+ return false;
+
+ p3 = strchr(p2 + sizeof("\nidxfilesize=") - 1, '\n');
+ index_file_size = atol(std::string(p2 + sizeof("\nidxfilesize=") - 1, p3 - (p2 + sizeof("\nidxfilesize=") - 1)).c_str());
+ }
+
+ p2 = strstr(p1, "\nbookname=");
+
+ if (p2 == nullptr)
+ return false;
+
+ p2 = p2 + sizeof("\nbookname=") - 1;
+ p3 = strchr(p2, '\n');
+ bookname.assign(p2, p3 - p2);
+
+ p2 = strstr(p1, "\nauthor=");
+ if (p2) {
+ p2 = p2 + sizeof("\nauthor=") - 1;
+ p3 = strchr(p2, '\n');
+ author.assign(p2, p3 - p2);
+ }
+
+ p2 = strstr(p1, "\nemail=");
+ if (p2) {
+ p2 = p2 + sizeof("\nemail=") - 1;
+ p3 = strchr(p2, '\n');
+ email.assign(p2, p3 - p2);
+ }
+
+ p2 = strstr(p1, "\nwebsite=");
+ if (p2) {
+ p2 = p2 + sizeof("\nwebsite=") - 1;
+ p3 = strchr(p2, '\n');
+ website.assign(p2, p3 - p2);
+ }
+
+ p2 = strstr(p1, "\ndate=");
+ if (p2) {
+ p2 = p2 + sizeof("\ndate=") - 1;
+ p3 = strchr(p2, '\n');
+ date.assign(p2, p3 - p2);
+ }
+
+ p2 = strstr(p1, "\ndescription=");
+ if (p2) {
+ p2 = p2 + sizeof("\ndescription=") - 1;
+ p3 = strchr(p2, '\n');
+ description.assign(p2, p3 - p2);
+ }
+
+ p2 = strstr(p1, "\nsametypesequence=");
+ if (p2) {
+ p2 += sizeof("\nsametypesequence=") - 1;
+ p3 = strchr(p2, '\n');
+ sametypesequence.assign(p2, p3 - p2);
+ }
+
+ p2 = strstr(p1, "\nsynwordcount=");
+ syn_wordcount = 0;
+ if (p2) {
+ p2 += sizeof("\nsynwordcount=") - 1;
+ p3 = strchr(p2, '\n');
+ syn_wordcount = atol(std::string(p2, p3 - p2).c_str());
+ }
+
+ return true;
+}
+
+gchar *DictBase::GetWordData(guint32 idxitem_offset, guint32 idxitem_size)
+{
+ for (int i = 0; i < WORDDATA_CACHE_NUM; i++)
+ if (cache[i].data && cache[i].offset == idxitem_offset)
+ return cache[i].data;
+
+ if (dictfile)
+ fseek(dictfile, idxitem_offset, SEEK_SET);
+
+ gchar *data;
+ if (!sametypesequence.empty()) {
+ glib::CharStr origin_data((gchar *)g_malloc(idxitem_size));
+
+ if (dictfile) {
+ const size_t nitems = fread(get_impl(origin_data), idxitem_size, 1, dictfile);
+ THROW_IF_ERROR(nitems == 1);
+ } else
+ dictdzfile->read(get_impl(origin_data), idxitem_offset, idxitem_size);
+
+ guint32 data_size;
+ gint sametypesequence_len = sametypesequence.length();
+ //there have sametypesequence_len char being omitted.
+ data_size = idxitem_size + sizeof(guint32) + sametypesequence_len;
+ //if the last item's size is determined by the end up '\0',then +=sizeof(gchar);
+ //if the last item's size is determined by the head guint32 type data,then +=sizeof(guint32);
+ switch (sametypesequence[sametypesequence_len - 1]) {
+ case 'm':
+ case 't':
+ case 'y':
+ case 'l':
+ case 'g':
+ case 'x':
+ case 'k':
+ data_size += sizeof(gchar);
+ break;
+ case 'W':
+ case 'P':
+ data_size += sizeof(guint32);
+ break;
+ default:
+ if (g_ascii_isupper(sametypesequence[sametypesequence_len - 1]))
+ data_size += sizeof(guint32);
+ else
+ data_size += sizeof(gchar);
+ break;
+ }
+ data = (gchar *)g_malloc(data_size);
+ gchar *p1, *p2;
+ p1 = data + sizeof(guint32);
+ p2 = get_impl(origin_data);
+ guint32 sec_size;
+ //copy the head items.
+ for (int i = 0; i < sametypesequence_len - 1; i++) {
+ *p1 = sametypesequence[i];
+ p1 += sizeof(gchar);
+ switch (sametypesequence[i]) {
+ case 'm':
+ case 't':
+ case 'y':
+ case 'l':
+ case 'g':
+ case 'x':
+ case 'k':
+ sec_size = strlen(p2) + 1;
+ memcpy(p1, p2, sec_size);
+ p1 += sec_size;
+ p2 += sec_size;
+ break;
+ case 'W':
+ case 'P':
+ sec_size = get_uint32(p2);
+ sec_size += sizeof(guint32);
+ memcpy(p1, p2, sec_size);
+ p1 += sec_size;
+ p2 += sec_size;
+ break;
+ default:
+ if (g_ascii_isupper(sametypesequence[i])) {
+ sec_size = get_uint32(p2);
+ sec_size += sizeof(guint32);
+ } else {
+ sec_size = strlen(p2) + 1;
+ }
+ memcpy(p1, p2, sec_size);
+ p1 += sec_size;
+ p2 += sec_size;
+ break;
+ }
+ }
+ //calculate the last item 's size.
+ sec_size = idxitem_size - (p2 - get_impl(origin_data));
+ *p1 = sametypesequence[sametypesequence_len - 1];
+ p1 += sizeof(gchar);
+ switch (sametypesequence[sametypesequence_len - 1]) {
+ case 'm':
+ case 't':
+ case 'y':
+ case 'l':
+ case 'g':
+ case 'x':
+ case 'k':
+ memcpy(p1, p2, sec_size);
+ p1 += sec_size;
+ *p1 = '\0'; //add the end up '\0';
+ break;
+ case 'W':
+ case 'P':
+ set_uint32(p1, sec_size);
+ p1 += sizeof(guint32);
+ memcpy(p1, p2, sec_size);
+ break;
+ default:
+ if (g_ascii_isupper(sametypesequence[sametypesequence_len - 1])) {
+ set_uint32(p1, sec_size);
+ p1 += sizeof(guint32);
+ memcpy(p1, p2, sec_size);
+ } else {
+ memcpy(p1, p2, sec_size);
+ p1 += sec_size;
+ *p1 = '\0';
+ }
+ break;
+ }
+ set_uint32(data, data_size);
+ } else {
+ data = (gchar *)g_malloc(idxitem_size + sizeof(guint32));
+ if (dictfile) {
+ const size_t nitems = fread(data + sizeof(guint32), idxitem_size, 1, dictfile);
+ THROW_IF_ERROR(nitems == 1);
+ } else
+ dictdzfile->read(data + sizeof(guint32), idxitem_offset, idxitem_size);
+ set_uint32(data, idxitem_size + sizeof(guint32));
+ }
+ g_free(cache[cache_cur].data);
+
+ cache[cache_cur].data = data;
+ cache[cache_cur].offset = idxitem_offset;
+ cache_cur++;
+ if (cache_cur == WORDDATA_CACHE_NUM)
+ cache_cur = 0;
+ return data;
+}
+
+bool DictBase::SearchData(std::vector<std::string> &SearchWords, guint32 idxitem_offset, guint32 idxitem_size, gchar *origin_data)
+{
+ int nWord = SearchWords.size();
+ std::vector<bool> WordFind(nWord, false);
+ int nfound = 0;
+
+ if (dictfile)
+ fseek(dictfile, idxitem_offset, SEEK_SET);
+ if (dictfile) {
+ const size_t nitems = fread(origin_data, idxitem_size, 1, dictfile);
+ THROW_IF_ERROR(nitems == 1);
+ } else
+ dictdzfile->read(origin_data, idxitem_offset, idxitem_size);
+ gchar *p = origin_data;
+ guint32 sec_size;
+ int j;
+ if (!sametypesequence.empty()) {
+ gint sametypesequence_len = sametypesequence.length();
+ for (int i = 0; i < sametypesequence_len - 1; i++) {
+ switch (sametypesequence[i]) {
+ case 'm':
+ case 't':
+ case 'y':
+ case 'l':
+ case 'g':
+ case 'x':
+ case 'k':
+ for (j = 0; j < nWord; j++)
+ if (!WordFind[j] && strstr(p, SearchWords[j].c_str())) {
+ WordFind[j] = true;
+ ++nfound;
+ }
+
+ if (nfound == nWord)
+ return true;
+ sec_size = strlen(p) + 1;
+ p += sec_size;
+ break;
+ default:
+ if (g_ascii_isupper(sametypesequence[i])) {
+ sec_size = get_uint32(p);
+ sec_size += sizeof(guint32);
+ } else {
+ sec_size = strlen(p) + 1;
+ }
+ p += sec_size;
+ }
+ }
+ switch (sametypesequence[sametypesequence_len - 1]) {
+ case 'm':
+ case 't':
+ case 'y':
+ case 'l':
+ case 'g':
+ case 'x':
+ case 'k':
+ sec_size = idxitem_size - (p - origin_data);
+ for (j = 0; j < nWord; j++)
+ if (!WordFind[j] && g_strstr_len(p, sec_size, SearchWords[j].c_str())) {
+ WordFind[j] = true;
+ ++nfound;
+ }
+
+ if (nfound == nWord)
+ return true;
+ break;
+ }
+ } else {
+ while (guint32(p - origin_data) < idxitem_size) {
+ switch (*p) {
+ case 'm':
+ case 't':
+ case 'y':
+ case 'l':
+ case 'g':
+ case 'x':
+ case 'k':
+ for (j = 0; j < nWord; j++)
+ if (!WordFind[j] && strstr(p, SearchWords[j].c_str())) {
+ WordFind[j] = true;
+ ++nfound;
+ }
+
+ if (nfound == nWord)
+ return true;
+ sec_size = strlen(p) + 1;
+ p += sec_size;
+ break;
+ default:
+ if (g_ascii_isupper(*p)) {
+ sec_size = get_uint32(p);
+ sec_size += sizeof(guint32);
+ } else {
+ sec_size = strlen(p) + 1;
+ }
+ p += sec_size;
+ }
+ }
+ }
+ return false;
+}
+
+namespace
+{
+class OffsetIndex : public IIndexFile
+{
+public:
+ OffsetIndex()
+ : idxfile(nullptr)
+ {
+ }
+ ~OffsetIndex()
+ {
+ if (idxfile)
+ fclose(idxfile);
+ }
+ bool load(const std::string &url, gulong wc, gulong fsize, bool verbose) override;
+ const gchar *get_key(glong idx) override;
+ void get_data(glong idx) override { get_key(idx); }
+ const gchar *get_key_and_data(glong idx) override
+ {
+ return get_key(idx);
+ }
+ bool lookup(const char *str, glong &idx) override;
+
+private:
+ static const gint ENTR_PER_PAGE = 32;
+ static const char *CACHE_MAGIC;
+
+ std::vector<guint32> wordoffset;
+ FILE *idxfile;
+ gulong wordcount;
+
+ gchar wordentry_buf[256 + sizeof(guint32) * 2]; // The length of "word_str" should be less than 256. See src/tools/DICTFILE_FORMAT.
+ struct index_entry {
+ glong idx;
+ std::string keystr;
+ void assign(glong i, const std::string &str)
+ {
+ idx = i;
+ keystr.assign(str);
+ }
+ };
+ index_entry first, last, middle, real_last;
+
+ struct page_entry {
+ gchar *keystr;
+ guint32 off, size;
+ };
+ std::vector<gchar> page_data;
+ struct page_t {
+ glong idx = -1;
+ page_entry entries[ENTR_PER_PAGE];
+
+ page_t() {}
+ void fill(gchar *data, gint nent, glong idx_);
+ } page;
+ gulong load_page(glong page_idx);
+ const gchar *read_first_on_page_key(glong page_idx);
+ const gchar *get_first_on_page_key(glong page_idx);
+ bool load_cache(const std::string &url);
+ bool save_cache(const std::string &url, bool verbose);
+ static std::list<std::string> get_cache_variant(const std::string &url);
+};
+
+const char *OffsetIndex::CACHE_MAGIC = "StarDict's Cache, Version: 0.1";
+
+class WordListIndex : public IIndexFile
+{
+public:
+ WordListIndex()
+ : idxdatabuf(nullptr)
+ {
+ }
+ ~WordListIndex() { g_free(idxdatabuf); }
+ bool load(const std::string &url, gulong wc, gulong fsize, bool verbose) override;
+ const gchar *get_key(glong idx) override { return wordlist[idx]; }
+ void get_data(glong idx) override;
+ const gchar *get_key_and_data(glong idx) override
+ {
+ get_data(idx);
+ return get_key(idx);
+ }
+ bool lookup(const char *str, glong &idx) override;
+
+private:
+ gchar *idxdatabuf;
+ std::vector<gchar *> wordlist;
+};
+
+void OffsetIndex::page_t::fill(gchar *data, gint nent, glong idx_)
+{
+ idx = idx_;
+ gchar *p = data;
+ glong len;
+ for (gint i = 0; i < nent; ++i) {
+ entries[i].keystr = p;
+ len = strlen(p);
+ p += len + 1;
+ entries[i].off = g_ntohl(get_uint32(p));
+ p += sizeof(guint32);
+ entries[i].size = g_ntohl(get_uint32(p));
+ p += sizeof(guint32);
+ }
+}
+
+inline const gchar *OffsetIndex::read_first_on_page_key(glong page_idx)
+{
+ fseek(idxfile, wordoffset[page_idx], SEEK_SET);
+ guint32 page_size = wordoffset[page_idx + 1] - wordoffset[page_idx];
+ const size_t nitems = fread(wordentry_buf,
+ std::min(sizeof(wordentry_buf), static_cast<size_t>(page_size)),
+ 1, idxfile);
+ THROW_IF_ERROR(nitems == 1);
+ //TODO: check returned values, deal with word entry that strlen>255.
+ return wordentry_buf;
+}
+
+inline const gchar *OffsetIndex::get_first_on_page_key(glong page_idx)
+{
+ if (page_idx < middle.idx) {
+ if (page_idx == first.idx)
+ return first.keystr.c_str();
+ return read_first_on_page_key(page_idx);
+ } else if (page_idx > middle.idx) {
+ if (page_idx == last.idx)
+ return last.keystr.c_str();
+ return read_first_on_page_key(page_idx);
+ } else
+ return middle.keystr.c_str();
+}
+
+bool OffsetIndex::load_cache(const std::string &url)
+{
+ const std::list<std::string> vars = get_cache_variant(url);
+
+ for (const std::string &item : vars) {
+ struct ::stat idxstat, cachestat;
+ if (g_stat(url.c_str(), &idxstat) != 0 || g_stat(item.c_str(), &cachestat) != 0)
+ continue;
+ if (cachestat.st_mtime < idxstat.st_mtime)
+ continue;
+ MapFile mf;
+ if (!mf.open(item.c_str(), cachestat.st_size))
+ continue;
+ if (strncmp(mf.begin(), CACHE_MAGIC, strlen(CACHE_MAGIC)) != 0)
+ continue;
+ memcpy(&wordoffset[0], mf.begin() + strlen(CACHE_MAGIC), wordoffset.size() * sizeof(wordoffset[0]));
+ return true;
+ }
+
+ return false;
+}
+
+std::list<std::string> OffsetIndex::get_cache_variant(const std::string &url)
+{
+ std::list<std::string> res = { url + ".oft" };
+ if (!g_file_test(g_get_user_cache_dir(), G_FILE_TEST_EXISTS) && g_mkdir(g_get_user_cache_dir(), 0700) == -1)
+ return res;
+
+ const std::string cache_dir = std::string(g_get_user_cache_dir()) + G_DIR_SEPARATOR_S + "sdcv";
+
+ if (!g_file_test(cache_dir.c_str(), G_FILE_TEST_EXISTS)) {
+ if (g_mkdir(cache_dir.c_str(), 0700) == -1)
+ return res;
+ } else if (!g_file_test(cache_dir.c_str(), G_FILE_TEST_IS_DIR))
+ return res;
+
+ gchar *base = g_path_get_basename(url.c_str());
+ res.push_back(cache_dir + G_DIR_SEPARATOR_S + base + ".oft");
+ g_free(base);
+ return res;
+}
+
+bool OffsetIndex::save_cache(const std::string &url, bool verbose)
+{
+ const std::list<std::string> vars = get_cache_variant(url);
+ for (const std::string &item : vars) {
+ FILE *out = fopen(item.c_str(), "wb");
+ if (!out)
+ continue;
+ if (fwrite(CACHE_MAGIC, 1, strlen(CACHE_MAGIC), out) != strlen(CACHE_MAGIC))
+ continue;
+ if (fwrite(&wordoffset[0], sizeof(wordoffset[0]), wordoffset.size(), out) != wordoffset.size())
+ continue;
+ fclose(out);
+ if (verbose) {
+ printf("save to cache %s\n", url.c_str());
+ }
+ return true;
+ }
+ return false;
+}
+
+bool OffsetIndex::load(const std::string &url, gulong wc, gulong fsize, bool verbose)
+{
+ wordcount = wc;
+ gulong npages = (wc - 1) / ENTR_PER_PAGE + 2;
+ wordoffset.resize(npages);
+ if (!load_cache(url)) { //map file will close after finish of block
+ MapFile map_file;
+ if (!map_file.open(url.c_str(), fsize))
+ return false;
+ const gchar *idxdatabuffer = map_file.begin();
+
+ const gchar *p1 = idxdatabuffer;
+ gulong index_size;
+ guint32 j = 0;
+ for (guint32 i = 0; i < wc; i++) {
+ index_size = strlen(p1) + 1 + 2 * sizeof(guint32);
+ if (i % ENTR_PER_PAGE == 0) {
+ wordoffset[j] = p1 - idxdatabuffer;
+ ++j;
+ }
+ p1 += index_size;
+ }
+ wordoffset[j] = p1 - idxdatabuffer;
+ if (!save_cache(url, verbose))
+ fprintf(stderr, "cache update failed\n");
+ }
+
+ if (!(idxfile = fopen(url.c_str(), "rb"))) {
+ wordoffset.resize(0);
+ return false;
+ }
+
+ first.assign(0, read_first_on_page_key(0));
+ last.assign(wordoffset.size() - 2, read_first_on_page_key(wordoffset.size() - 2));
+ middle.assign((wordoffset.size() - 2) / 2, read_first_on_page_key((wordoffset.size() - 2) / 2));
+ real_last.assign(wc - 1, get_key(wc - 1));
+
+ return true;
+}
+
+inline gulong OffsetIndex::load_page(glong page_idx)
+{
+ gulong nentr = ENTR_PER_PAGE;
+ if (page_idx == glong(wordoffset.size() - 2))
+ if ((nentr = (wordcount % ENTR_PER_PAGE)) == 0)
+ nentr = ENTR_PER_PAGE;
+
+ if (page_idx != page.idx) {
+ page_data.resize(wordoffset[page_idx + 1] - wordoffset[page_idx]);
+ fseek(idxfile, wordoffset[page_idx], SEEK_SET);
+ const size_t nitems = fread(&page_data[0], 1, page_data.size(), idxfile);
+ THROW_IF_ERROR(nitems == page_data.size());
+
+ page.fill(&page_data[0], nentr, page_idx);
+ }
+
+ return nentr;
+}
+
+const gchar *OffsetIndex::get_key(glong idx)
+{
+ load_page(idx / ENTR_PER_PAGE);
+ glong idx_in_page = idx % ENTR_PER_PAGE;
+ wordentry_offset = page.entries[idx_in_page].off;
+ wordentry_size = page.entries[idx_in_page].size;
+
+ return page.entries[idx_in_page].keystr;
+}
+
+bool OffsetIndex::lookup(const char *str, glong &idx)
+{
+ bool bFound = false;
+ glong iFrom;
+ glong iTo = wordoffset.size() - 2;
+ gint cmpint;
+ glong iThisIndex;
+ if (stardict_strcmp(str, first.keystr.c_str()) < 0) {
+ idx = 0;
+ return false;
+ } else if (stardict_strcmp(str, real_last.keystr.c_str()) > 0) {
+ idx = INVALID_INDEX;
+ return false;
+ } else {
+ iFrom = 0;
+ iThisIndex = 0;
+ while (iFrom <= iTo) {
+ iThisIndex = (iFrom + iTo) / 2;
+ cmpint = stardict_strcmp(str, get_first_on_page_key(iThisIndex));
+ if (cmpint > 0)
+ iFrom = iThisIndex + 1;
+ else if (cmpint < 0)
+ iTo = iThisIndex - 1;
+ else {
+ bFound = true;
+ break;
+ }
+ }
+ if (!bFound)
+ idx = iTo; //prev
+ else
+ idx = iThisIndex;
+ }
+ if (!bFound) {
+ gulong netr = load_page(idx);
+ iFrom = 1; // Needn't search the first word anymore.
+ iTo = netr - 1;
+ iThisIndex = 0;
+ while (iFrom <= iTo) {
+ iThisIndex = (iFrom + iTo) / 2;
+ cmpint = stardict_strcmp(str, page.entries[iThisIndex].keystr);
+ if (cmpint > 0)
+ iFrom = iThisIndex + 1;
+ else if (cmpint < 0)
+ iTo = iThisIndex - 1;
+ else {
+ bFound = true;
+ break;
+ }
+ }
+ idx *= ENTR_PER_PAGE;
+ if (!bFound)
+ idx += iFrom; //next
+ else
+ idx += iThisIndex;
+ } else {
+ idx *= ENTR_PER_PAGE;
+ }
+ return bFound;
+}
+
+bool WordListIndex::load(const std::string &url, gulong wc, gulong fsize, bool verbose)
+{
+ gzFile in = gzopen(url.c_str(), "rb");
+ if (in == nullptr)
+ return false;
+
+ idxdatabuf = (gchar *)g_malloc(fsize);
+
+ const int len = gzread(in, idxdatabuf, fsize);
+ gzclose(in);
+ if (len < 0)
+ return false;
+
+ if (gulong(len) != fsize)
+ return false;
+
+ wordlist.resize(wc + 1);
+ gchar *p1 = idxdatabuf;
+ guint32 i;
+ for (i = 0; i < wc; i++) {
+ wordlist[i] = p1;
+ p1 += strlen(p1) + 1 + 2 * sizeof(guint32);
+ }
+ wordlist[wc] = p1;
+
+ return true;
+}
+
+void WordListIndex::get_data(glong idx)
+{
+ gchar *p1 = wordlist[idx] + strlen(wordlist[idx]) + sizeof(gchar);
+ wordentry_offset = g_ntohl(get_uint32(p1));
+ p1 += sizeof(guint32);
+ wordentry_size = g_ntohl(get_uint32(p1));
+}
+
+bool WordListIndex::lookup(const char *str, glong &idx)
+{
+ bool bFound = false;
+ glong iTo = wordlist.size() - 2;
+
+ if (stardict_strcmp(str, get_key(0)) < 0) {
+ idx = 0;
+ } else if (stardict_strcmp(str, get_key(iTo)) > 0) {
+ idx = INVALID_INDEX;
+ } else {
+ glong iThisIndex = 0;
+ glong iFrom = 0;
+ gint cmpint;
+ while (iFrom <= iTo) {
+ iThisIndex = (iFrom + iTo) / 2;
+ cmpint = stardict_strcmp(str, get_key(iThisIndex));
+ if (cmpint > 0)
+ iFrom = iThisIndex + 1;
+ else if (cmpint < 0)
+ iTo = iThisIndex - 1;
+ else {
+ bFound = true;
+ break;
+ }
+ }
+ if (!bFound)
+ idx = iFrom; //next
+ else
+ idx = iThisIndex;
+ }
+ return bFound;
+}
+}
+
+bool SynFile::load(const std::string &url, gulong wc)
+{
+ struct stat stat_buf;
+ if (!stat(url.c_str(), &stat_buf)) {
+ MapFile syn;
+ if (!syn.open(url.c_str(), stat_buf.st_size))
+ return false;
+ const gchar *current = syn.begin();
+ for (unsigned long i = 0; i < wc; i++) {
+ // each entry in a syn-file is:
+ // - 0-terminated string
+ // 4-byte index into .dict file in network byte order
+ glib::CharStr lower_string{ g_utf8_casefold(current, -1) };
+ std::string synonym{ get_impl(lower_string) };
+ current += synonym.length() + 1;
+ const guint32 idx = g_ntohl(get_uint32(current));
+ current += sizeof(idx);
+ synonyms[synonym] = idx;
+ }
+ return true;
+ } else {
+ return false;
+ }
+}
+
+bool SynFile::lookup(const char *str, glong &idx)
+{
+ glib::CharStr lower_string{ g_utf8_casefold(str, -1) };
+ auto it = synonyms.find(get_impl(lower_string));
+ if (it != synonyms.end()) {
+ idx = it->second;
+ return true;
+ }
+ return false;
+}
+
+bool Dict::Lookup(const char *str, glong &idx)
+{
+ return syn_file->lookup(str, idx) || idx_file->lookup(str, idx);
+}
+
+bool Dict::load(const std::string &ifofilename, bool verbose)
+{
+ gulong idxfilesize;
+ if (!load_ifofile(ifofilename, idxfilesize))
+ return false;
+
+ std::string fullfilename(ifofilename);
+ fullfilename.replace(fullfilename.length() - sizeof("ifo") + 1, sizeof("ifo") - 1, "dict.dz");
+
+ if (g_file_test(fullfilename.c_str(), G_FILE_TEST_EXISTS)) {
+ dictdzfile.reset(new DictData);
+ if (!dictdzfile->open(fullfilename, 0)) {
+ //g_print("open file %s failed!\n",fullfilename);
+ return false;
+ }
+ } else {
+ fullfilename.erase(fullfilename.length() - sizeof(".dz") + 1, sizeof(".dz") - 1);
+ dictfile = fopen(fullfilename.c_str(), "rb");
+ if (!dictfile) {
+ //g_print("open file %s failed!\n",fullfilename);
+ return false;
+ }
+ }
+
+ fullfilename = ifofilename;
+ fullfilename.replace(fullfilename.length() - sizeof("ifo") + 1, sizeof("ifo") - 1, "idx.gz");
+
+ if (g_file_test(fullfilename.c_str(), G_FILE_TEST_EXISTS)) {
+ idx_file.reset(new WordListIndex);
+ } else {
+ fullfilename.erase(fullfilename.length() - sizeof(".gz") + 1, sizeof(".gz") - 1);
+ idx_file.reset(new OffsetIndex);
+ }
+
+ if (!idx_file->load(fullfilename, wordcount, idxfilesize, verbose))
+ return false;
+
+ fullfilename = ifofilename;
+ fullfilename.replace(fullfilename.length() - sizeof("ifo") + 1, sizeof("ifo") - 1, "syn");
+ syn_file.reset(new SynFile);
+ syn_file->load(fullfilename, syn_wordcount);
+
+ //g_print("bookname: %s , wordcount %lu\n", bookname.c_str(), narticles());
+ return true;
+}
+
+bool Dict::load_ifofile(const std::string &ifofilename, gulong &idxfilesize)
+{
+ DictInfo dict_info;
+ if (!dict_info.load_from_ifo_file(ifofilename, false))
+ return false;
+ if (dict_info.wordcount == 0)
+ return false;
+
+ ifo_file_name = dict_info.ifo_file_name;
+ wordcount = dict_info.wordcount;
+ syn_wordcount = dict_info.syn_wordcount;
+ bookname = dict_info.bookname;
+
+ idxfilesize = dict_info.index_file_size;
+
+ sametypesequence = dict_info.sametypesequence;
+
+ return true;
+}
+
+bool Dict::LookupWithRule(GPatternSpec *pspec, glong *aIndex, int iBuffLen)
+{
+ int iIndexCount = 0;
+
+ for (guint32 i = 0; i < narticles() && iIndexCount < (iBuffLen - 1); i++)
+ if (g_pattern_match_string(pspec, get_key(i)))
+ aIndex[iIndexCount++] = i;
+
+ aIndex[iIndexCount] = -1; // -1 is the end.
+
+ return iIndexCount > 0;
+}
+
+Libs::~Libs()
+{
+ for (Dict *p : oLib)
+ delete p;
+}
+
+void Libs::load_dict(const std::string &url)
+{
+ Dict *lib = new Dict;
+ if (lib->load(url, verbose_))
+ oLib.push_back(lib);
+ else
+ delete lib;
+}
+
+void Libs::load(const std::list<std::string> &dicts_dirs,
+ const std::list<std::string> &order_list,
+ const std::list<std::string> &disable_list)
+{
+ for_each_file(dicts_dirs, ".ifo", order_list, disable_list,
+ [this](const std::string &url, bool disable) -> void {
+ if (!disable)
+ load_dict(url);
+ });
+}
+
+const gchar *Libs::poGetCurrentWord(glong *iCurrent)
+{
+ const gchar *poCurrentWord = nullptr;
+ const gchar *word;
+ for (std::vector<Dict *>::size_type iLib = 0; iLib < oLib.size(); iLib++) {
+ if (iCurrent[iLib] == INVALID_INDEX)
+ continue;
+ if (iCurrent[iLib] >= narticles(iLib) || iCurrent[iLib] < 0)
+ continue;
+ if (poCurrentWord == nullptr) {
+ poCurrentWord = poGetWord(iCurrent[iLib], iLib);
+ } else {
+ word = poGetWord(iCurrent[iLib], iLib);
+
+ if (stardict_strcmp(poCurrentWord, word) > 0)
+ poCurrentWord = word;
+ }
+ }
+ return poCurrentWord;
+}
+
+const gchar *Libs::poGetNextWord(const gchar *sWord, glong *iCurrent)
+{
+ // the input can be:
+ // (word,iCurrent),read word,write iNext to iCurrent,and return next word. used by TopWin::NextCallback();
+ // (nullptr,iCurrent),read iCurrent,write iNext to iCurrent,and return next word. used by AppCore::ListWords();
+ const gchar *poCurrentWord = nullptr;
+ size_t iCurrentLib = 0;
+ const gchar *word;
+
+ for (size_t iLib = 0; iLib < oLib.size(); ++iLib) {
+ if (sWord)
+ oLib[iLib]->Lookup(sWord, iCurrent[iLib]);
+ if (iCurrent[iLib] == INVALID_INDEX)
+ continue;
+ if (iCurrent[iLib] >= narticles(iLib) || iCurrent[iLib] < 0)
+ continue;
+ if (poCurrentWord == nullptr) {
+ poCurrentWord = poGetWord(iCurrent[iLib], iLib);
+ iCurrentLib = iLib;
+ } else {
+ word = poGetWord(iCurrent[iLib], iLib);
+
+ if (stardict_strcmp(poCurrentWord, word) > 0) {
+ poCurrentWord = word;
+ iCurrentLib = iLib;
+ }
+ }
+ }
+ if (poCurrentWord) {
+ iCurrent[iCurrentLib]++;
+ for (std::vector<Dict *>::size_type iLib = 0; iLib < oLib.size(); iLib++) {
+ if (iLib == iCurrentLib)
+ continue;
+ if (iCurrent[iLib] == INVALID_INDEX)
+ continue;
+ if (iCurrent[iLib] >= narticles(iLib) || iCurrent[iLib] < 0)
+ continue;
+ if (strcmp(poCurrentWord, poGetWord(iCurrent[iLib], iLib)) == 0)
+ iCurrent[iLib]++;
+ }
+ poCurrentWord = poGetCurrentWord(iCurrent);
+ }
+ return poCurrentWord;
+}
+
+const gchar *
+Libs::poGetPreWord(glong *iCurrent)
+{
+ // used by TopWin::PreviousCallback(); the iCurrent is cached by AppCore::TopWinWordChange();
+ const gchar *poCurrentWord = nullptr;
+ std::vector<Dict *>::size_type iCurrentLib = 0;
+ const gchar *word;
+
+ for (std::vector<Dict *>::size_type iLib = 0; iLib < oLib.size(); iLib++) {
+ if (iCurrent[iLib] == INVALID_INDEX)
+ iCurrent[iLib] = narticles(iLib);
+ else {
+ if (iCurrent[iLib] > narticles(iLib) || iCurrent[iLib] <= 0)
+ continue;
+ }
+ if (poCurrentWord == nullptr) {
+ poCurrentWord = poGetWord(iCurrent[iLib] - 1, iLib);
+ iCurrentLib = iLib;
+ } else {
+ word = poGetWord(iCurrent[iLib] - 1, iLib);
+ if (stardict_strcmp(poCurrentWord, word) < 0) {
+ poCurrentWord = word;
+ iCurrentLib = iLib;
+ }
+ }
+ }
+
+ if (poCurrentWord) {
+ iCurrent[iCurrentLib]--;
+ for (std::vector<Dict *>::size_type iLib = 0; iLib < oLib.size(); iLib++) {
+ if (iLib == iCurrentLib)
+ continue;
+ if (iCurrent[iLib] > narticles(iLib) || iCurrent[iLib] <= 0)
+ continue;
+ if (strcmp(poCurrentWord, poGetWord(iCurrent[iLib] - 1, iLib)) == 0) {
+ iCurrent[iLib]--;
+ } else {
+ if (iCurrent[iLib] == narticles(iLib))
+ iCurrent[iLib] = INVALID_INDEX;
+ }
+ }
+ }
+ return poCurrentWord;
+}
+
+bool Libs::LookupSimilarWord(const gchar *sWord, glong &iWordIndex, int iLib)
+{
+ glong iIndex;
+ bool bFound = false;
+ gchar *casestr;
+
+ if (!bFound) {
+ // to lower case.
+ casestr = g_utf8_strdown(sWord, -1);
+ if (strcmp(casestr, sWord)) {
+ if (oLib[iLib]->Lookup(casestr, iIndex))
+ bFound = true;
+ }
+ g_free(casestr);
+ // to upper case.
+ if (!bFound) {
+ casestr = g_utf8_strup(sWord, -1);
+ if (strcmp(casestr, sWord)) {
+ if (oLib[iLib]->Lookup(casestr, iIndex))
+ bFound = true;
+ }
+ g_free(casestr);
+ }
+ // Upper the first character and lower others.
+ if (!bFound) {
+ gchar *nextchar = g_utf8_next_char(sWord);
+ gchar *firstchar = g_utf8_strup(sWord, nextchar - sWord);
+ nextchar = g_utf8_strdown(nextchar, -1);
+ casestr = g_strdup_printf("%s%s", firstchar, nextchar);
+ g_free(firstchar);
+ g_free(nextchar);
+ if (strcmp(casestr, sWord)) {
+ if (oLib[iLib]->Lookup(casestr, iIndex))
+ bFound = true;
+ }
+ g_free(casestr);
+ }
+ }
+
+ if (bIsPureEnglish(sWord)) {
+ // If not Found , try other status of sWord.
+ int iWordLen = strlen(sWord);
+ bool isupcase;
+
+ gchar *sNewWord = (gchar *)g_malloc(iWordLen + 1);
+
+ //cut one char "s" or "d"
+ if (!bFound && iWordLen > 1) {
+ isupcase = sWord[iWordLen - 1] == 'S' || !strncmp(&sWord[iWordLen - 2], "ED", 2);
+ if (isupcase || sWord[iWordLen - 1] == 's' || !strncmp(&sWord[iWordLen - 2], "ed", 2)) {
+ strcpy(sNewWord, sWord);
+ sNewWord[iWordLen - 1] = '\0'; // cut "s" or "d"
+ if (oLib[iLib]->Lookup(sNewWord, iIndex))
+ bFound = true;
+ else if (isupcase || g_ascii_isupper(sWord[0])) {
+ casestr = g_ascii_strdown(sNewWord, -1);
+ if (strcmp(casestr, sNewWord)) {
+ if (oLib[iLib]->Lookup(casestr, iIndex))
+ bFound = true;
+ }
+ g_free(casestr);
+ }
+ }
+ }
+
+ //cut "ly"
+ if (!bFound && iWordLen > 2) {
+ isupcase = !strncmp(&sWord[iWordLen - 2], "LY", 2);
+ if (isupcase || (!strncmp(&sWord[iWordLen - 2], "ly", 2))) {
+ strcpy(sNewWord, sWord);
+ sNewWord[iWordLen - 2] = '\0'; // cut "ly"
+ if (iWordLen > 5 && sNewWord[iWordLen - 3] == sNewWord[iWordLen - 4]
+ && !bIsVowel(sNewWord[iWordLen - 4]) && bIsVowel(sNewWord[iWordLen - 5])) { //doubled
+
+ sNewWord[iWordLen - 3] = '\0';
+ if (oLib[iLib]->Lookup(sNewWord, iIndex))
+ bFound = true;
+ else {
+ if (isupcase || g_ascii_isupper(sWord[0])) {
+ casestr = g_ascii_strdown(sNewWord, -1);
+ if (strcmp(casestr, sNewWord)) {
+ if (oLib[iLib]->Lookup(casestr, iIndex))
+ bFound = true;
+ }
+ g_free(casestr);
+ }
+ if (!bFound)
+ sNewWord[iWordLen - 3] = sNewWord[iWordLen - 4]; //restore
+ }
+ }
+ if (!bFound) {
+ if (oLib[iLib]->Lookup(sNewWord, iIndex))
+ bFound = true;
+ else if (isupcase || g_ascii_isupper(sWord[0])) {
+ casestr = g_ascii_strdown(sNewWord, -1);
+ if (strcmp(casestr, sNewWord)) {
+ if (oLib[iLib]->Lookup(casestr, iIndex))
+ bFound = true;
+ }
+ g_free(casestr);
+ }
+ }
+ }
+ }
+
+ //cut "ing"
+ if (!bFound && iWordLen > 3) {
+ isupcase = !strncmp(&sWord[iWordLen - 3], "ING", 3);
+ if (isupcase || !strncmp(&sWord[iWordLen - 3], "ing", 3)) {
+ strcpy(sNewWord, sWord);
+ sNewWord[iWordLen - 3] = '\0';
+ if (iWordLen > 6 && (sNewWord[iWordLen - 4] == sNewWord[iWordLen - 5])
+ && !bIsVowel(sNewWord[iWordLen - 5]) && bIsVowel(sNewWord[iWordLen - 6])) { //doubled
+ sNewWord[iWordLen - 4] = '\0';
+ if (oLib[iLib]->Lookup(sNewWord, iIndex))
+ bFound = true;
+ else {
+ if (isupcase || g_ascii_isupper(sWord[0])) {
+ casestr = g_ascii_strdown(sNewWord, -1);
+ if (strcmp(casestr, sNewWord)) {
+ if (oLib[iLib]->Lookup(casestr, iIndex))
+ bFound = true;
+ }
+ g_free(casestr);
+ }
+ if (!bFound)
+ sNewWord[iWordLen - 4] = sNewWord[iWordLen - 5]; //restore
+ }
+ }
+ if (!bFound) {
+ if (oLib[iLib]->Lookup(sNewWord, iIndex))
+ bFound = true;
+ else if (isupcase || g_ascii_isupper(sWord[0])) {
+ casestr = g_ascii_strdown(sNewWord, -1);
+ if (strcmp(casestr, sNewWord)) {
+ if (oLib[iLib]->Lookup(casestr, iIndex))
+ bFound = true;
+ }
+ g_free(casestr);
+ }
+ }
+ if (!bFound) {
+ if (isupcase)
+ strcat(sNewWord, "E"); // add a char "E"
+ else
+ strcat(sNewWord, "e"); // add a char "e"
+ if (oLib[iLib]->Lookup(sNewWord, iIndex))
+ bFound = true;
+ else if (isupcase || g_ascii_isupper(sWord[0])) {
+ casestr = g_ascii_strdown(sNewWord, -1);
+ if (strcmp(casestr, sNewWord)) {
+ if (oLib[iLib]->Lookup(casestr, iIndex))
+ bFound = true;
+ }
+ g_free(casestr);
+ }
+ }
+ }
+ }
+
+ //cut two char "es"
+ if (!bFound && iWordLen > 3) {
+ isupcase = (!strncmp(&sWord[iWordLen - 2], "ES", 2) && (sWord[iWordLen - 3] == 'S' || sWord[iWordLen - 3] == 'X' || sWord[iWordLen - 3] == 'O' || (iWordLen > 4 && sWord[iWordLen - 3] == 'H' && (sWord[iWordLen - 4] == 'C' || sWord[iWordLen - 4] == 'S'))));
+ if (isupcase || (!strncmp(&sWord[iWordLen - 2], "es", 2) && (sWord[iWordLen - 3] == 's' || sWord[iWordLen - 3] == 'x' || sWord[iWordLen - 3] == 'o' || (iWordLen > 4 && sWord[iWordLen - 3] == 'h' && (sWord[iWordLen - 4] == 'c' || sWord[iWordLen - 4] == 's'))))) {
+ strcpy(sNewWord, sWord);
+ sNewWord[iWordLen - 2] = '\0';
+ if (oLib[iLib]->Lookup(sNewWord, iIndex))
+ bFound = true;
+ else if (isupcase || g_ascii_isupper(sWord[0])) {
+ casestr = g_ascii_strdown(sNewWord, -1);
+ if (strcmp(casestr, sNewWord)) {
+ if (oLib[iLib]->Lookup(casestr, iIndex))
+ bFound = true;
+ }
+ g_free(casestr);
+ }
+ }
+ }
+
+ //cut "ed"
+ if (!bFound && iWordLen > 3) {
+ isupcase = !strncmp(&sWord[iWordLen - 2], "ED", 2);
+ if (isupcase || !strncmp(&sWord[iWordLen - 2], "ed", 2)) {
+ strcpy(sNewWord, sWord);
+ sNewWord[iWordLen - 2] = '\0';
+ if (iWordLen > 5 && (sNewWord[iWordLen - 3] == sNewWord[iWordLen - 4])
+ && !bIsVowel(sNewWord[iWordLen - 4]) && bIsVowel(sNewWord[iWordLen - 5])) { //doubled
+ sNewWord[iWordLen - 3] = '\0';
+ if (oLib[iLib]->Lookup(sNewWord, iIndex))
+ bFound = true;
+ else {
+ if (isupcase || g_ascii_isupper(sWord[0])) {
+ casestr = g_ascii_strdown(sNewWord, -1);
+ if (strcmp(casestr, sNewWord)) {
+ if (oLib[iLib]->Lookup(casestr, iIndex))
+ bFound = true;
+ }
+ g_free(casestr);
+ }
+ if (!bFound)
+ sNewWord[iWordLen - 3] = sNewWord[iWordLen - 4]; //restore
+ }
+ }
+ if (!bFound) {
+ if (oLib[iLib]->Lookup(sNewWord, iIndex))
+ bFound = true;
+ else if (isupcase || g_ascii_isupper(sWord[0])) {
+ casestr = g_ascii_strdown(sNewWord, -1);
+ if (strcmp(casestr, sNewWord)) {
+ if (oLib[iLib]->Lookup(casestr, iIndex))
+ bFound = true;
+ }
+ g_free(casestr);
+ }
+ }
+ }
+ }
+
+ // cut "ied" , add "y".
+ if (!bFound && iWordLen > 3) {
+ isupcase = !strncmp(&sWord[iWordLen - 3], "IED", 3);
+ if (isupcase || (!strncmp(&sWord[iWordLen - 3], "ied", 3))) {
+ strcpy(sNewWord, sWord);
+ sNewWord[iWordLen - 3] = '\0';
+ if (isupcase)
+ strcat(sNewWord, "Y"); // add a char "Y"
+ else
+ strcat(sNewWord, "y"); // add a char "y"
+ if (oLib[iLib]->Lookup(sNewWord, iIndex))
+ bFound = true;
+ else if (isupcase || g_ascii_isupper(sWord[0])) {
+ casestr = g_ascii_strdown(sNewWord, -1);
+ if (strcmp(casestr, sNewWord)) {
+ if (oLib[iLib]->Lookup(casestr, iIndex))
+ bFound = true;
+ }
+ g_free(casestr);
+ }
+ }
+ }
+
+ // cut "ies" , add "y".
+ if (!bFound && iWordLen > 3) {
+ isupcase = !strncmp(&sWord[iWordLen - 3], "IES", 3);
+ if (isupcase || (!strncmp(&sWord[iWordLen - 3], "ies", 3))) {
+ strcpy(sNewWord, sWord);
+ sNewWord[iWordLen - 3] = '\0';
+ if (isupcase)
+ strcat(sNewWord, "Y"); // add a char "Y"
+ else
+ strcat(sNewWord, "y"); // add a char "y"
+ if (oLib[iLib]->Lookup(sNewWord, iIndex))
+ bFound = true;
+ else if (isupcase || g_ascii_isupper(sWord[0])) {
+ casestr = g_ascii_strdown(sNewWord, -1);
+ if (strcmp(casestr, sNewWord)) {
+ if (oLib[iLib]->Lookup(casestr, iIndex))
+ bFound = true;
+ }
+ g_free(casestr);
+ }
+ }
+ }
+
+ // cut "er".
+ if (!bFound && iWordLen > 2) {
+ isupcase = !strncmp(&sWord[iWordLen - 2], "ER", 2);
+ if (isupcase || (!strncmp(&sWord[iWordLen - 2], "er", 2))) {
+ strcpy(sNewWord, sWord);
+ sNewWord[iWordLen - 2] = '\0';
+ if (oLib[iLib]->Lookup(sNewWord, iIndex))
+ bFound = true;
+ else if (isupcase || g_ascii_isupper(sWord[0])) {
+ casestr = g_ascii_strdown(sNewWord, -1);
+ if (strcmp(casestr, sNewWord)) {
+ if (oLib[iLib]->Lookup(casestr, iIndex))
+ bFound = true;
+ }
+ g_free(casestr);
+ }
+ }
+ }
+
+ // cut "est".
+ if (!bFound && iWordLen > 3) {
+ isupcase = !strncmp(&sWord[iWordLen - 3], "EST", 3);
+ if (isupcase || (!strncmp(&sWord[iWordLen - 3], "est", 3))) {
+ strcpy(sNewWord, sWord);
+ sNewWord[iWordLen - 3] = '\0';
+ if (oLib[iLib]->Lookup(sNewWord, iIndex))
+ bFound = true;
+ else if (isupcase || g_ascii_isupper(sWord[0])) {
+ casestr = g_ascii_strdown(sNewWord, -1);
+ if (strcmp(casestr, sNewWord)) {
+ if (oLib[iLib]->Lookup(casestr, iIndex))
+ bFound = true;
+ }
+ g_free(casestr);
+ }
+ }
+ }
+
+ g_free(sNewWord);
+ }
+
+ if (bFound)
+ iWordIndex = iIndex;
+#if 0
+ else {
+ //don't change iWordIndex here.
+ //when LookupSimilarWord all failed too, we want to use the old LookupWord index to list words.
+ //iWordIndex = INVALID_INDEX;
+ }
+#endif
+ return bFound;
+}
+
+bool Libs::SimpleLookupWord(const gchar *sWord, glong &iWordIndex, int iLib)
+{
+ bool bFound = oLib[iLib]->Lookup(sWord, iWordIndex);
+ if (!bFound && fuzzy_)
+ bFound = LookupSimilarWord(sWord, iWordIndex, iLib);
+ return bFound;
+}
+
+bool Libs::LookupWithFuzzy(const gchar *sWord, gchar *reslist[], gint reslist_size)
+{
+ if (sWord[0] == '\0')
+ return false;
+
+ Fuzzystruct oFuzzystruct[reslist_size];
+
+ for (int i = 0; i < reslist_size; i++) {
+ oFuzzystruct[i].pMatchWord = nullptr;
+ oFuzzystruct[i].iMatchWordDistance = iMaxFuzzyDistance;
+ }
+ int iMaxDistance = iMaxFuzzyDistance;
+ int iDistance;
+ bool Found = false;
+ EditDistance oEditDistance;
+
+ glong iCheckWordLen;
+ const char *sCheck;
+ gunichar *ucs4_str1, *ucs4_str2;
+ glong ucs4_str2_len;
+
+ ucs4_str2 = g_utf8_to_ucs4_fast(sWord, -1, &ucs4_str2_len);
+ unicode_strdown(ucs4_str2);
+
+ for (size_t iLib = 0; iLib < oLib.size(); ++iLib) {
+ if (progress_func)
+ progress_func();
+
+ //if (stardict_strcmp(sWord, poGetWord(0,iLib))>=0 && stardict_strcmp(sWord, poGetWord(narticles(iLib)-1,iLib))<=0) {
+ //there are Chinese dicts and English dicts...
+
+ const int iwords = narticles(iLib);
+ for (int index = 0; index < iwords; index++) {
+ sCheck = poGetWord(index, iLib);
+ // tolower and skip too long or too short words
+ iCheckWordLen = g_utf8_strlen(sCheck, -1);
+ if (iCheckWordLen - ucs4_str2_len >= iMaxDistance || ucs4_str2_len - iCheckWordLen >= iMaxDistance)
+ continue;
+ ucs4_str1 = g_utf8_to_ucs4_fast(sCheck, -1, nullptr);
+ if (iCheckWordLen > ucs4_str2_len)
+ ucs4_str1[ucs4_str2_len] = 0;
+ unicode_strdown(ucs4_str1);
+
+ iDistance = oEditDistance.CalEditDistance(ucs4_str1, ucs4_str2, iMaxDistance);
+ g_free(ucs4_str1);
+ if (iDistance < iMaxDistance && iDistance < ucs4_str2_len) {
+ // when ucs4_str2_len=1,2 we need less fuzzy.
+ Found = true;
+ bool bAlreadyInList = false;
+ int iMaxDistanceAt = 0;
+ for (int j = 0; j < reslist_size; j++) {
+ if (oFuzzystruct[j].pMatchWord && strcmp(oFuzzystruct[j].pMatchWord, sCheck) == 0) { //already in list
+ bAlreadyInList = true;
+ break;
+ }
+ //find the position,it will certainly be found (include the first time) as iMaxDistance is set by last time.
+ if (oFuzzystruct[j].iMatchWordDistance == iMaxDistance) {
+ iMaxDistanceAt = j;
+ }
+ }
+ if (!bAlreadyInList) {
+ if (oFuzzystruct[iMaxDistanceAt].pMatchWord)
+ g_free(oFuzzystruct[iMaxDistanceAt].pMatchWord);
+ oFuzzystruct[iMaxDistanceAt].pMatchWord = g_strdup(sCheck);
+ oFuzzystruct[iMaxDistanceAt].iMatchWordDistance = iDistance;
+ // calc new iMaxDistance
+ iMaxDistance = iDistance;
+ for (int j = 0; j < reslist_size; j++) {
+ if (oFuzzystruct[j].iMatchWordDistance > iMaxDistance)
+ iMaxDistance = oFuzzystruct[j].iMatchWordDistance;
+ } // calc new iMaxDistance
+ } // add to list
+ } // find one
+ } // each word
+
+ } // each lib
+ g_free(ucs4_str2);
+
+ if (Found) // sort with distance
+ std::sort(oFuzzystruct, oFuzzystruct + reslist_size, [](const Fuzzystruct &lh, const Fuzzystruct &rh) -> bool {
+ if (lh.iMatchWordDistance != rh.iMatchWordDistance)
+ return lh.iMatchWordDistance < rh.iMatchWordDistance;
+
+ if (lh.pMatchWord && rh.pMatchWord)
+ return stardict_strcmp(lh.pMatchWord, rh.pMatchWord) < 0;
+
+ return false;
+ });
+
+ for (gint i = 0; i < reslist_size; ++i)
+ reslist[i] = oFuzzystruct[i].pMatchWord;
+
+ return Found;
+}
+
+gint Libs::LookupWithRule(const gchar *word, gchar **ppMatchWord)
+{
+ glong aiIndex[MAX_MATCH_ITEM_PER_LIB + 1];
+ gint iMatchCount = 0;
+ GPatternSpec *pspec = g_pattern_spec_new(word);
+
+ for (std::vector<Dict *>::size_type iLib = 0; iLib < oLib.size(); iLib++) {
+ //if(oLibs.LookdupWordsWithRule(pspec,aiIndex,MAX_MATCH_ITEM_PER_LIB+1-iMatchCount,iLib))
+ // -iMatchCount,so save time,but may got less result and the word may repeat.
+
+ if (oLib[iLib]->LookupWithRule(pspec, aiIndex, MAX_MATCH_ITEM_PER_LIB + 1)) {
+ if (progress_func)
+ progress_func();
+ for (int i = 0; aiIndex[i] != -1; i++) {
+ const gchar *sMatchWord = poGetWord(aiIndex[i], iLib);
+ bool bAlreadyInList = false;
+ for (int j = 0; j < iMatchCount; j++) {
+ if (strcmp(ppMatchWord[j], sMatchWord) == 0) { //already in list
+ bAlreadyInList = true;
+ break;
+ }
+ }
+ if (!bAlreadyInList)
+ ppMatchWord[iMatchCount++] = g_strdup(sMatchWord);
+ }
+ }
+ }
+ g_pattern_spec_free(pspec);
+
+ if (iMatchCount) // sort it.
+ std::sort(ppMatchWord, ppMatchWord + iMatchCount, [](const char *lh, const char *rh) -> bool {
+ return stardict_strcmp(lh, rh) < 0;
+ });
+
+ return iMatchCount;
+}
+
+bool Libs::LookupData(const gchar *sWord, std::vector<gchar *> *reslist)
+{
+ std::vector<std::string> SearchWords;
+ std::string SearchWord;
+ const char *p = sWord;
+ while (*p) {
+ if (*p == '\\') {
+ p++;
+ switch (*p) {
+ case ' ':
+ SearchWord += ' ';
+ break;
+ case '\\':
+ SearchWord += '\\';
+ break;
+ case 't':
+ SearchWord += '\t';
+ break;
+ case 'n':
+ SearchWord += '\n';
+ break;
+ default:
+ SearchWord += *p;
+ }
+ } else if (*p == ' ') {
+ if (!SearchWord.empty()) {
+ SearchWords.push_back(SearchWord);
+ SearchWord.clear();
+ }
+ } else {
+ SearchWord += *p;
+ }
+ p++;
+ }
+ if (!SearchWord.empty()) {
+ SearchWords.push_back(SearchWord);
+ SearchWord.clear();
+ }
+ if (SearchWords.empty())
+ return false;
+
+ guint32 max_size = 0;
+ gchar *origin_data = nullptr;
+ for (std::vector<Dict *>::size_type i = 0; i < oLib.size(); ++i) {
+ if (!oLib[i]->containSearchData())
+ continue;
+ if (progress_func)
+ progress_func();
+ const gulong iwords = narticles(i);
+ const gchar *key;
+ guint32 offset, size;
+ for (gulong j = 0; j < iwords; ++j) {
+ oLib[i]->get_key_and_data(j, &key, &offset, &size);
+ if (size > max_size) {
+ origin_data = (gchar *)g_realloc(origin_data, size);
+ max_size = size;
+ }
+ if (oLib[i]->SearchData(SearchWords, offset, size, origin_data))
+ reslist[i].push_back(g_strdup(key));
+ }
+ }
+ g_free(origin_data);
+
+ std::vector<Dict *>::size_type i;
+ for (i = 0; i < oLib.size(); ++i)
+ if (!reslist[i].empty())
+ break;
+
+ return i != oLib.size();
+}
+
+/**************************************************/
+query_t analyze_query(const char *s, std::string &res)
+{
+ if (!s || !*s) {
+ res = "";
+ return qtSIMPLE;
+ }
+ if (*s == '/') {
+ res = s + 1;
+ return qtFUZZY;
+ }
+
+ if (*s == '|') {
+ res = s + 1;
+ return qtDATA;
+ }
+
+ bool regexp = false;
+ const char *p = s;
+ res = "";
+ for (; *p; res += *p, ++p) {
+ if (*p == '\\') {
+ ++p;
+ if (!*p)
+ break;
+ continue;
+ }
+ if (*p == '*' || *p == '?')
+ regexp = true;
+ }
+ if (regexp)
+ return qtREGEXP;
+
+ return qtSIMPLE;
+}
diff --git a/src/stardict_lib.hpp b/src/stardict_lib.hpp
new file mode 100644
index 0000000..a629cbe
--- /dev/null
+++ b/src/stardict_lib.hpp
@@ -0,0 +1,215 @@
+#pragma once
+
+#include <cstdio>
+#include <cstring>
+#include <functional>
+#include <list>
+#include <map>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "dictziplib.hpp"
+
+const int MAX_MATCH_ITEM_PER_LIB = 100;
+const int MAX_FUZZY_DISTANCE = 3; // at most MAX_FUZZY_DISTANCE-1 differences allowed when find similar words
+
+inline guint32 get_uint32(const gchar *addr)
+{
+ guint32 result;
+ memcpy(&result, addr, sizeof(guint32));
+ return result;
+}
+
+inline void set_uint32(gchar *addr, guint32 val)
+{
+ memcpy(addr, &val, sizeof(guint32));
+}
+
+struct cacheItem {
+ guint32 offset;
+ gchar *data;
+ //write code here to make it inline
+ cacheItem() { data = nullptr; }
+ ~cacheItem() { g_free(data); }
+};
+
+const int WORDDATA_CACHE_NUM = 10;
+const int INVALID_INDEX = -100;
+
+class DictBase
+{
+public:
+ DictBase() {}
+ ~DictBase()
+ {
+ if (dictfile)
+ fclose(dictfile);
+ }
+ DictBase(const DictBase &) = delete;
+ DictBase &operator=(const DictBase &) = delete;
+ gchar *GetWordData(guint32 idxitem_offset, guint32 idxitem_size);
+ bool containSearchData() const
+ {
+ if (sametypesequence.empty())
+ return true;
+ return sametypesequence.find_first_of("mlgxty") != std::string::npos;
+ }
+ bool SearchData(std::vector<std::string> &SearchWords, guint32 idxitem_offset, guint32 idxitem_size, gchar *origin_data);
+
+protected:
+ std::string sametypesequence;
+ FILE *dictfile = nullptr;
+ std::unique_ptr<DictData> dictdzfile;
+
+private:
+ cacheItem cache[WORDDATA_CACHE_NUM];
+ gint cache_cur = 0;
+};
+
+//this structure contain all information about dictionary
+struct DictInfo {
+ std::string ifo_file_name;
+ guint32 wordcount;
+ guint32 syn_wordcount;
+ std::string bookname;
+ std::string author;
+ std::string email;
+ std::string website;
+ std::string date;
+ std::string description;
+ guint32 index_file_size;
+ guint32 syn_file_size;
+ std::string sametypesequence;
+
+ bool load_from_ifo_file(const std::string &ifofilename, bool istreedict);
+};
+
+class IIndexFile
+{
+public:
+ guint32 wordentry_offset;
+ guint32 wordentry_size;
+
+ virtual ~IIndexFile() {}
+ virtual bool load(const std::string &url, gulong wc, gulong fsize, bool verbose) = 0;
+ virtual const gchar *get_key(glong idx) = 0;
+ virtual void get_data(glong idx) = 0;
+ virtual const gchar *get_key_and_data(glong idx) = 0;
+ virtual bool lookup(const char *str, glong &idx) = 0;
+};
+
+class SynFile
+{
+public:
+ bool load(const std::string &url, gulong wc);
+ bool lookup(const char *str, glong &idx);
+
+private:
+ std::map<std::string, gulong> synonyms;
+};
+
+class Dict : public DictBase
+{
+public:
+ Dict() {}
+ Dict(const Dict &) = delete;
+ Dict &operator=(const Dict &) = delete;
+ bool load(const std::string &ifofilename, bool verbose);
+
+ gulong narticles() const { return wordcount; }
+ const std::string &dict_name() const { return bookname; }
+ const std::string &ifofilename() const { return ifo_file_name; }
+
+ const gchar *get_key(glong index) { return idx_file->get_key(index); }
+ gchar *get_data(glong index)
+ {
+ idx_file->get_data(index);
+ return DictBase::GetWordData(idx_file->wordentry_offset, idx_file->wordentry_size);
+ }
+ void get_key_and_data(glong index, const gchar **key, guint32 *offset, guint32 *size)
+ {
+ *key = idx_file->get_key_and_data(index);
+ *offset = idx_file->wordentry_offset;
+ *size = idx_file->wordentry_size;
+ }
+ bool Lookup(const char *str, glong &idx);
+
+ bool LookupWithRule(GPatternSpec *pspec, glong *aIndex, int iBuffLen);
+
+private:
+ std::string ifo_file_name;
+ gulong wordcount;
+ gulong syn_wordcount;
+ std::string bookname;
+
+ std::unique_ptr<IIndexFile> idx_file;
+ std::unique_ptr<SynFile> syn_file;
+
+ bool load_ifofile(const std::string &ifofilename, gulong &idxfilesize);
+};
+
+class Libs
+{
+public:
+ Libs(std::function<void(void)> f = std::function<void(void)>())
+ {
+ progress_func = f;
+ iMaxFuzzyDistance = MAX_FUZZY_DISTANCE; //need to read from cfg.
+ }
+ void setVerbose(bool verbose) { verbose_ = verbose; }
+ void setFuzzy(bool fuzzy) { fuzzy_ = fuzzy; }
+ ~Libs();
+ Libs(const Libs &) = delete;
+ Libs &operator=(const Libs &) = delete;
+
+ void load_dict(const std::string &url);
+ void load(const std::list<std::string> &dicts_dirs,
+ const std::list<std::string> &order_list,
+ const std::list<std::string> &disable_list);
+ glong narticles(int idict) const { return oLib[idict]->narticles(); }
+ const std::string &dict_name(int idict) const { return oLib[idict]->dict_name(); }
+ gint ndicts() const { return oLib.size(); }
+
+ const gchar *poGetWord(glong iIndex, int iLib)
+ {
+ return oLib[iLib]->get_key(iIndex);
+ }
+ gchar *poGetWordData(glong iIndex, int iLib)
+ {
+ if (iIndex == INVALID_INDEX)
+ return nullptr;
+ return oLib[iLib]->get_data(iIndex);
+ }
+ const gchar *poGetCurrentWord(glong *iCurrent);
+ const gchar *poGetNextWord(const gchar *word, glong *iCurrent);
+ const gchar *poGetPreWord(glong *iCurrent);
+ bool LookupWord(const gchar *sWord, glong &iWordIndex, int iLib)
+ {
+ return oLib[iLib]->Lookup(sWord, iWordIndex);
+ }
+ bool LookupSimilarWord(const gchar *sWord, glong &iWordIndex, int iLib);
+ bool SimpleLookupWord(const gchar *sWord, glong &iWordIndex, int iLib);
+
+ bool LookupWithFuzzy(const gchar *sWord, gchar *reslist[], gint reslist_size);
+ gint LookupWithRule(const gchar *sWord, gchar *reslist[]);
+ bool LookupData(const gchar *sWord, std::vector<gchar *> *reslist);
+
+protected:
+ bool fuzzy_;
+
+private:
+ std::vector<Dict *> oLib; // word Libs.
+ int iMaxFuzzyDistance;
+ std::function<void(void)> progress_func;
+ bool verbose_;
+};
+
+enum query_t {
+ qtSIMPLE,
+ qtREGEXP,
+ qtFUZZY,
+ qtDATA
+};
+
+extern query_t analyze_query(const char *s, std::string &res);
diff --git a/src/utils.cpp b/src/utils.cpp
new file mode 100644
index 0000000..33bfeaa
--- /dev/null
+++ b/src/utils.cpp
@@ -0,0 +1,131 @@
+/*
+ * This file part of sdcv - console version of Stardict program
+ * http://sdcv.sourceforge.net
+ * Copyright (C) 2005-2006 Evgeniy <dushistov@mail.ru>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <algorithm>
+#include <cstdio>
+#include <cstdlib>
+#include <glib.h>
+#include <glib/gi18n.h>
+#include <iomanip>
+#include <sstream>
+
+#include "utils.hpp"
+
+std::string utf8_to_locale_ign_err(const std::string &utf8_str)
+{
+ std::string res;
+
+ const char *charset;
+ if (g_get_charset(&charset))
+ res = utf8_str;
+ else {
+ gsize bytes_read, bytes_written;
+ glib::Error err;
+ glib::CharStr tmp(g_convert_with_fallback(utf8_str.c_str(), -1, charset, "UTF-8", nullptr,
+ &bytes_read, &bytes_written, get_addr(err)));
+ if (nullptr == get_impl(tmp)) {
+ fprintf(stderr, _("Can not convert %s to current locale.\n"), utf8_str.c_str());
+ fprintf(stderr, "%s\n", err->message);
+ exit(EXIT_FAILURE);
+ }
+ res = get_impl(tmp);
+ }
+
+ return res;
+}
+
+static void __for_each_file(const std::string &dirname, const std::string &suff,
+ const std::list<std::string> &order_list, const std::list<std::string> &disable_list,
+ const std::function<void(const std::string &, bool)> &f)
+{
+ GDir *dir = g_dir_open(dirname.c_str(), 0, nullptr);
+ if (dir) {
+ const gchar *filename;
+
+ while ((filename = g_dir_read_name(dir)) != nullptr) {
+ const std::string fullfilename(dirname + G_DIR_SEPARATOR_S + filename);
+ if (g_file_test(fullfilename.c_str(), G_FILE_TEST_IS_DIR))
+ __for_each_file(fullfilename, suff, order_list, disable_list, f);
+ else if (g_str_has_suffix(filename, suff.c_str()) && std::find(order_list.begin(), order_list.end(), fullfilename) == order_list.end()) {
+ const bool disable = std::find(disable_list.begin(),
+ disable_list.end(),
+ fullfilename)
+ != disable_list.end();
+ f(fullfilename, disable);
+ }
+ }
+ g_dir_close(dir);
+ }
+}
+
+void for_each_file(const std::list<std::string> &dirs_list, const std::string &suff,
+ const std::list<std::string> &order_list, const std::list<std::string> &disable_list,
+ const std::function<void(const std::string &, bool)> &f)
+{
+ for (const std::string &item : order_list) {
+ const bool disable = std::find(disable_list.begin(), disable_list.end(), item) != disable_list.end();
+ f(item, disable);
+ }
+ for (const std::string &item : dirs_list)
+ __for_each_file(item, suff, order_list, disable_list, f);
+}
+
+// based on https://stackoverflow.com/questions/7724448/simple-json-string-escape-for-c/33799784#33799784
+std::string json_escape_string(const std::string &s)
+{
+ std::ostringstream o;
+ for (auto c = s.cbegin(); c != s.cend(); c++) {
+ switch (*c) {
+ case '"':
+ o << "\\\"";
+ break;
+ case '\\':
+ o << "\\\\";
+ break;
+ case '\b':
+ o << "\\b";
+ break;
+ case '\f':
+ o << "\\f";
+ break;
+ case '\n':
+ o << "\\n";
+ break;
+ case '\r':
+ o << "\\r";
+ break;
+ case '\t':
+ o << "\\t";
+ break;
+ default:
+ if ('\x00' <= *c && *c <= '\x1f') {
+ o << "\\u"
+ << std::hex << std::setw(4) << std::setfill('0') << (int)*c;
+ } else {
+ o << *c;
+ }
+ }
+ }
+ return o.str();
+}
diff --git a/src/utils.hpp b/src/utils.hpp
new file mode 100644
index 0000000..1081fd3
--- /dev/null
+++ b/src/utils.hpp
@@ -0,0 +1,78 @@
+#pragma once
+
+#include <cassert>
+#include <cstddef>
+#include <functional>
+#include <glib.h>
+#include <list>
+#include <string>
+
+template <typename T, typename unref_res_t, void (*unref_res)(unref_res_t *)>
+class ResourceWrapper
+{
+public:
+ ResourceWrapper(T *p = nullptr)
+ : p_(p)
+ {
+ }
+ ~ResourceWrapper() { free_resource(); }
+ ResourceWrapper(const ResourceWrapper &) = delete;
+ ResourceWrapper &operator=(const ResourceWrapper &) = delete;
+ T *operator->() const { return p_; }
+ bool operator!() const { return p_ == nullptr; }
+ const T &operator[](size_t idx) const
+ {
+ assert(p_ != nullptr);
+ return p_[idx];
+ }
+
+ void reset(T *newp)
+ {
+ if (p_ != newp) {
+ free_resource();
+ p_ = newp;
+ }
+ }
+
+ friend inline bool operator==(const ResourceWrapper &lhs, std::nullptr_t) noexcept
+ {
+ return !lhs.p_;
+ }
+
+ friend inline bool operator!=(const ResourceWrapper &lhs, std::nullptr_t) noexcept
+ {
+ return !!lhs.p_;
+ }
+
+ friend inline T *get_impl(const ResourceWrapper &rw)
+ {
+ return rw.p_;
+ }
+
+ friend inline T **get_addr(ResourceWrapper &rw)
+ {
+ return &rw.p_;
+ }
+
+private:
+ T *p_;
+
+ void free_resource()
+ {
+ if (p_)
+ unref_res(p_);
+ }
+};
+
+namespace glib
+{
+typedef ResourceWrapper<gchar, void, g_free> CharStr;
+typedef ResourceWrapper<GError, GError, g_error_free> Error;
+}
+
+extern std::string utf8_to_locale_ign_err(const std::string &utf8_str);
+
+extern void for_each_file(const std::list<std::string> &dirs_list, const std::string &suff,
+ const std::list<std::string> &order_list, const std::list<std::string> &disable_list,
+ const std::function<void(const std::string &, bool)> &f);
+extern std::string json_escape_string(const std::string &str);
diff --git a/tests/rus-eng-stardict-2.4.2/1.xdxf.dict b/tests/rus-eng-stardict-2.4.2/1.xdxf.dict
new file mode 100644
index 0000000..125ecd8
--- /dev/null
+++ b/tests/rus-eng-stardict-2.4.2/1.xdxf.dict
@@ -0,0 +1,2 @@
+<k>человек</k>
+man \ No newline at end of file
diff --git a/tests/rus-eng-stardict-2.4.2/1.xdxf.idx b/tests/rus-eng-stardict-2.4.2/1.xdxf.idx
new file mode 100644
index 0000000..93df80e
--- /dev/null
+++ b/tests/rus-eng-stardict-2.4.2/1.xdxf.idx
Binary files differ
diff --git a/tests/rus-eng-stardict-2.4.2/1.xdxf.idx.oft b/tests/rus-eng-stardict-2.4.2/1.xdxf.idx.oft
new file mode 100644
index 0000000..bd5c857
--- /dev/null
+++ b/tests/rus-eng-stardict-2.4.2/1.xdxf.idx.oft
Binary files differ
diff --git a/tests/rus-eng-stardict-2.4.2/1.xdxf.ifo b/tests/rus-eng-stardict-2.4.2/1.xdxf.ifo
new file mode 100644
index 0000000..fa86d41
--- /dev/null
+++ b/tests/rus-eng-stardict-2.4.2/1.xdxf.ifo
@@ -0,0 +1,8 @@
+StarDict's dict ifo file
+version=2.4.2
+wordcount=1
+idxfilesize=23
+bookname=Sample 1 test dictionary
+date=2016.06.02
+sametypesequence=x
+description=Copyright: GNU Public License.; Version: 0.1
diff --git a/tests/stardict-test_dict-2.4.2/test_dict.dict b/tests/stardict-test_dict-2.4.2/test_dict.dict
new file mode 100644
index 0000000..5fb702d
--- /dev/null
+++ b/tests/stardict-test_dict-2.4.2/test_dict.dict
@@ -0,0 +1,2 @@
+<k>test</k>
+test passed \ No newline at end of file
diff --git a/tests/stardict-test_dict-2.4.2/test_dict.idx b/tests/stardict-test_dict-2.4.2/test_dict.idx
new file mode 100644
index 0000000..241fa00
--- /dev/null
+++ b/tests/stardict-test_dict-2.4.2/test_dict.idx
Binary files differ
diff --git a/tests/stardict-test_dict-2.4.2/test_dict.ifo b/tests/stardict-test_dict-2.4.2/test_dict.ifo
new file mode 100644
index 0000000..54c03a6
--- /dev/null
+++ b/tests/stardict-test_dict-2.4.2/test_dict.ifo
@@ -0,0 +1,7 @@
+StarDict's dict ifo file
+version=2.4.2
+wordcount=1
+idxfilesize=13
+bookname=test_dict
+date=2006.04.24
+sametypesequence=x
diff --git a/tests/stardict-test_synonyms-2.4.2/test.dict.dz b/tests/stardict-test_synonyms-2.4.2/test.dict.dz
new file mode 100644
index 0000000..fea9f2e
--- /dev/null
+++ b/tests/stardict-test_synonyms-2.4.2/test.dict.dz
Binary files differ
diff --git a/tests/stardict-test_synonyms-2.4.2/test.idx b/tests/stardict-test_synonyms-2.4.2/test.idx
new file mode 100644
index 0000000..871c01e
--- /dev/null
+++ b/tests/stardict-test_synonyms-2.4.2/test.idx
Binary files differ
diff --git a/tests/stardict-test_synonyms-2.4.2/test.ifo b/tests/stardict-test_synonyms-2.4.2/test.ifo
new file mode 100644
index 0000000..70f26e1
--- /dev/null
+++ b/tests/stardict-test_synonyms-2.4.2/test.ifo
@@ -0,0 +1,7 @@
+StarDict's dict ifo file
+version=2.4.2
+bookname=Test synonyms
+wordcount=2
+synwordcount=2
+idxfilesize=32
+sametypesequence=m
diff --git a/tests/stardict-test_synonyms-2.4.2/test.syn b/tests/stardict-test_synonyms-2.4.2/test.syn
new file mode 100644
index 0000000..e4c409d
--- /dev/null
+++ b/tests/stardict-test_synonyms-2.4.2/test.syn
Binary files differ
diff --git a/tests/stardict-test_synonyms-2.4.2/test.xml b/tests/stardict-test_synonyms-2.4.2/test.xml
new file mode 100644
index 0000000..ddad79a
--- /dev/null
+++ b/tests/stardict-test_synonyms-2.4.2/test.xml
@@ -0,0 +1,23 @@
+<?xml version="1.0" encoding="UTF-8" ?>
+<stardict xmlns:xi="http://www.w3.org/2003/XInclude">
+ <info>
+ <version>2.4.2</version>
+ <bookname>Test synonyms</bookname>
+ <author></author>
+ <email></email>
+ <website></website>
+ <description></description>
+ <date></date>
+ <dicttype></dicttype>
+ </info>
+ <article><key>test</key><synonym>foo</synonym><synonym>bar</synonym>
+ <definition type="m">
+ <![CDATA[result of test]]>
+ </definition>
+ </article>
+ <article><key>testawordy</key>
+ <definition type="m">
+ <![CDATA[word that ends in y to test with fuzzy search in -ied]]>
+ </definition>
+ </article>
+</stardict>
diff --git a/tests/t_datadir b/tests/t_datadir
new file mode 100755
index 0000000..0ebe965
--- /dev/null
+++ b/tests/t_datadir
@@ -0,0 +1,17 @@
+#!/bin/sh
+
+PATH_TO_SDCV="$1"
+
+unset SDCV_PAGER
+have=`"$PATH_TO_SDCV" --data-dir /tmp/bugagaga -l | wc -l`
+#do not count header
+have=$(($have-1))
+ndicts=`find "${HOME}"/.stardict/dic -name "*.ifo" -print | wc -l`
+#ndicts=$(($ndicts+1))
+if [ $have -ne $ndicts ]; then
+ ndicts=$(($ndicts-1))
+ echo "test failed: sdcv says: we have: $have, but really we have: $ndicts" >&2
+ exit 1
+fi
+
+exit 0
diff --git a/tests/t_exact b/tests/t_exact
new file mode 100755
index 0000000..f4c11d2
--- /dev/null
+++ b/tests/t_exact
@@ -0,0 +1,24 @@
+#!/bin/sh
+
+set -e
+
+SDCV="$1"
+TEST_DIR="$2"
+
+unset SDCV_PAGER
+
+test_word() {
+ WORD=$1
+ EXPECTED=$2
+ TAG=$3
+ RES=$($SDCV -e -n --data-dir "$TEST_DIR" -u "Test synonyms" $WORD | grep "$TAG")
+ if [ "$EXPECTED" != "$RES" ]; then
+ echo "synonym for $WORD should be '$EXPECTED' but was '$RES'"
+ exit 1
+ fi
+}
+
+test_word testawordies "Nothing similar to testawordies, sorry :(" "Nothing similar"
+test_word testawordy "word that ends in y to test with fuzzy search in -ied" "fuzzy"
+
+exit 0
diff --git a/tests/t_interactive b/tests/t_interactive
new file mode 100755
index 0000000..124bdad
--- /dev/null
+++ b/tests/t_interactive
@@ -0,0 +1,20 @@
+#!/bin/sh
+# check that in not-interactive mode sdcv not wait any input
+
+PATH_TO_SDCV="$1"
+
+if test ! -x "$PATH_TO_SDCV"; then
+ echo "Can not find sdcv binary $1" >&2
+ exit 1
+fi
+
+"$PATH_TO_SDCV" -n >/dev/null 2>&1 &
+PID=$!
+sleep 1
+
+if kill -0 $PID >/dev/null 2>&1 ; then
+ echo "process wait input: $PID, test failed" >&2
+ exit 1
+fi
+
+exit 0
diff --git a/tests/t_json b/tests/t_json
new file mode 100755
index 0000000..65b580b
--- /dev/null
+++ b/tests/t_json
@@ -0,0 +1,25 @@
+#!/bin/sh
+
+set -e
+
+SDCV="$1"
+TEST_DIR="$2"
+
+unset SDCV_PAGER
+unset STARDICT_DATA_DIR
+
+test_json() {
+ PARAMS="$1"
+ EXPECTED=$(echo "$2" | jq 'sort')
+ RESULT=$($SDCV $PARAMS | jq 'sort')
+ if [ "$EXPECTED" != "$RESULT"]; then
+ echo "expected $EXPECTED but got $RESULT"
+ exit 1
+ fi
+}
+
+test_json "-x -j -l -n --data-dir \"$TEST_DIR\"" "[{\"name\": \"Test synonyms\", \"wordcount\": \"1\"},{\"name\": \"Sample 1 test dictionary\", \"wordcount\": \"1\"},{\"name\": \"test_dict\", \"wordcount\": \"1\"}]"
+test_json "-x -j -n --data-dir \"$TEST_DIR\" foo" "[{\"dict\": \"Test synonyms\",\"word\":\"test\",\"definition\":\"\nresult of test\"}]"
+test_json "-x -j -n --data-dir \"$TEST_DIR\" foobarbaaz" "[]"
+
+exit 0
diff --git a/tests/t_list b/tests/t_list
new file mode 100755
index 0000000..3518731
--- /dev/null
+++ b/tests/t_list
@@ -0,0 +1,15 @@
+#!/bin/sh
+
+PATH_TO_SDCV="$1"
+ndicts=`"$PATH_TO_SDCV" -l | wc -l`
+ndicts=$(($ndicts-1))
+ncom=`find /usr/share/stardict/dic -name "*.ifo" | wc -l`
+nspe=`find "${HOME}"/.stardict/dic -name "*.ifo" | wc -l`
+nmy=$(($ncom+$nspe))
+
+if [ $nmy -ne $ndicts ]; then
+ echo "should be: $nmy, we have: $ndicts" >&2
+ exit 1
+fi
+
+exit 0
diff --git a/tests/t_only_data_dir b/tests/t_only_data_dir
new file mode 100755
index 0000000..392f056
--- /dev/null
+++ b/tests/t_only_data_dir
@@ -0,0 +1,19 @@
+#!/bin/sh
+
+set -e
+
+SDCV="$1"
+TEST_DIR="$2"
+
+unset SDCV_PAGER
+unset STARDICT_DATA_DIR
+
+DICTS=$($SDCV -x -n -l --data-dir "$TEST_DIR" | tail -n +2 | wc -l)
+# the expected result:
+ACTUAL_DICTS=$(find "$TEST_DIR" -name "*.ifo" | wc -l)
+if [ $DICTS -ne $ACTUAL_DICTS ]; then
+ echo "number of dictionaries in $TEST_DIR should be $ACTUAL_DICTS but was $DICTS according to sdcv"
+ exit 1
+fi
+
+exit 0
diff --git a/tests/t_synonyms b/tests/t_synonyms
new file mode 100755
index 0000000..3ad3951
--- /dev/null
+++ b/tests/t_synonyms
@@ -0,0 +1,22 @@
+#!/bin/sh
+
+set -e
+
+SDCV="$1"
+TEST_DIR="$2"
+
+unset SDCV_PAGER
+test_word() {
+ WORD=$1
+ RES=$($SDCV -n --data-dir "$TEST_DIR" -u "Test synonyms" $WORD | grep result)
+ if [ "result of test" != "$RES" ]; then
+ echo "synonym for $WORD should be 'result of test' but was '$RES'"
+ exit 1
+ fi
+}
+
+test_word foo
+test_word bar
+test_word test
+
+exit 0
diff --git a/tests/t_use b/tests/t_use
new file mode 100755
index 0000000..d141726
--- /dev/null
+++ b/tests/t_use
@@ -0,0 +1,20 @@
+#!/bin/sh
+
+set -e
+
+PATH_TO_SDCV="$1"
+TESTS_DIR="$2"
+
+mkdir -p "${HOME}"/.stardict/dic
+cp -R "${TESTS_DIR}/stardict-test_dict-2.4.2" "${HOME}"/.stardict/dic
+unset SDCV_PAGER
+RES=`"$PATH_TO_SDCV" -n -u test_dict test | grep "test passed"`
+
+if [ -z "$RES" ]; then
+ echo "we didn't find in stardict-test_dict-2.4.2 "test" keyword, something wrong" >&2
+ exit 1
+fi
+
+rm -fr "${HOME}"/.stardict/dic/stardict-test_dict-2.4.2
+
+exit 0
diff --git a/tests/t_utf8input b/tests/t_utf8input
new file mode 100755
index 0000000..1ad8360
--- /dev/null
+++ b/tests/t_utf8input
@@ -0,0 +1,28 @@
+#!/bin/sh
+
+set -e
+
+PATH_TO_SDCV="$1"
+TESTS_DIR="$2"
+
+mkdir -p "${HOME}"/.stardict/dic
+cp -R "${TESTS_DIR}/rus-eng-stardict-2.4.2" "${HOME}"/.stardict/dic/
+
+unset SDCV_PAGER
+export LANG=ru_RU.KOI8-R
+IFS="
+"
+j=0
+for i in `"$PATH_TO_SDCV" --utf8-input -n человек 2>&1`; do
+ j=$(($j+1))
+ if [ $j -ne 1 ]; then
+ break;
+ fi
+done
+
+if [ $j -eq 1 ]; then
+ echo "$0: empty results of search: test failed" >&2
+ exit 1
+fi
+
+exit 0
diff --git a/tests/t_utf8output b/tests/t_utf8output
new file mode 100755
index 0000000..4d04778
--- /dev/null
+++ b/tests/t_utf8output
@@ -0,0 +1,20 @@
+#!/bin/sh
+
+set -e
+
+export LANG=ru_RU.KOI8-R
+unset SDCV_PAGER
+
+PATH_TO_SDCV="$1"
+
+if test ! -x "$PATH_TO_SDCV"; then
+ echo "Can not find sdcv binary $1" >&2
+ exit 1
+fi
+
+if ! "$PATH_TO_SDCV" -n --utf8-output man | tail -n -1 | iconv -f utf-8 -t utf-8 >/dev/null; then
+ echo "utf8 output didn't work" >&2
+ exit 1
+fi
+
+exit 0